from fastcore.test import test_eq, test_fail
from nbdev import show_doc

源代码

generate_daily_series

 generate_daily_series (n_series:int, min_length:int=50,
                        max_length:int=500, n_static_features:int=0,
                        equal_ends:bool=False,
                        static_as_categorical:bool=True,
                        with_trend:bool=False, seed:int=0,
                        engine:str='pandas')

生成合成面板序列。

类型默认值详情
n_seriesint合成面板的序列数量。
min_lengthint50合成面板序列的最小长度。
max_lengthint500合成面板序列的最大长度。
n_static_featuresint0合成面板序列的静态外部变量数量。
equal_endsboolFalse序列应在相同的日期戳 ds 结束。
static_as_categoricalboolTrue静态特征应具有分类数据类型。
with_trendboolFalse序列应具有(正向)趋势。
seedint0用于生成数据的随机种子。
enginestrpandas输出 Dataframe 类型。
返回值Union包含列 [`unique_id`, `ds`, `y`] 和外部特征的合成面板。

生成 20 个序列,长度介于 100 到 1,000 之间。

n_series = 20
min_length = 100
max_length = 1000

series = generate_daily_series(n_series, min_length, max_length)
series
unique_iddsy
0id_002000-01-010.395863
1id_002000-01-021.264447
2id_002000-01-032.284022
3id_002000-01-043.462798
4id_002000-01-054.035518
12446id_192002-03-110.309275
12447id_192002-03-121.189464
12448id_192002-03-132.325032
12449id_192002-03-143.333198
12450id_192002-03-154.306117

我们还可以为每个序列添加静态特征(例如 product_id 或 store_id)。只有第一个静态特征 (`static_0`) 与目标变量相关。

n_static_features = 2

series_with_statics = generate_daily_series(n_series, min_length, max_length, n_static_features)
series_with_statics
unique_iddsystatic_0static_1
0id_002000-01-017.5213881810
1id_002000-01-0224.0245021810
2id_002000-01-0343.3964231810
3id_002000-01-0465.7931681810
4id_002000-01-0576.6748431810
12446id_192002-03-1127.8347718942
12447id_192002-03-12107.0517468942
12448id_192002-03-13209.2528458942
12449id_192002-03-14299.9878018942
12450id_192002-03-15387.5505368942
for i in range(n_static_features):
    assert all(series_with_statics.groupby('unique_id')[f'static_{i}'].nunique() == 1)

如果 `equal_ends=False`(默认值),则每个序列有不同的结束日期。

assert series_with_statics.groupby('unique_id')['ds'].max().nunique() > 1

通过指定 `equal_ends=True`,我们可以让所有序列在同一日期结束。

series_equal_ends = generate_daily_series(n_series, min_length, max_length, equal_ends=True)

assert series_equal_ends.groupby('unique_id')['ds'].max().nunique() == 1

源代码

generate_prices_for_series

 generate_prices_for_series (series:pandas.core.frame.DataFrame,
                             horizon:int=7, seed:int=0)
series_for_prices = generate_daily_series(20, n_static_features=2, equal_ends=True)
series_for_prices.rename(columns={'static_1': 'product_id'}, inplace=True)
prices_catalog = generate_prices_for_series(series_for_prices, horizon=7)
prices_catalog
dsunique_idprice
02000-10-05id_000.548814
12000-10-06id_000.715189
22000-10-07id_000.602763
32000-10-08id_000.544883
42000-10-09id_000.423655
50092001-05-17id_190.288027
50102001-05-18id_190.846305
50112001-05-19id_190.791284
50122001-05-20id_190.578636
50132001-05-21id_190.288589
test_eq(set(prices_catalog['unique_id']), set(series_for_prices['unique_id']))
test_fail(lambda: generate_prices_for_series(series), contains='equal ends')

源代码

PredictionIntervals

 PredictionIntervals (n_windows:int=2, h:int=1,
                      method:str='conformal_distribution')

用于存储预测区间元数据信息的类。