本地
优化
源文件
mlforecast_objective
mlforecast_objective (df:Union[pandas.core.frame.DataFrame,polars.datafr ame.frame.DataFrame], config_fn:Callable[[optuna.tr ial._trial.Trial],Dict[str,Any]], loss:Callable, model:sklearn.base.BaseEstimator, freq:Union[int,str], n_windows:int, h:int, step_size:Optional[int]=None, input_size:Optional[int]=None, refit:Union[bool,int]=False, id_col:str='unique_id', time_col:str='ds', target_col:str='y')
MLForecast 类的 optuna 目标函数
类型 | 默认值 | 详情 | |
---|---|---|---|
df | 联合 | ||
config_fn | 可调用对象 | 接受一个 optuna trial 并生成包含以下键的配置的函数 - model_params - mlf_init_params - mlf_fit_params | |
loss | 可调用对象 | 接受验证和训练数据帧并生成一个浮点数的函数。 | |
model | BaseEstimator | 用于训练的 scikit-learn 兼容模型 | |
freq | 联合 | pandas 或 polars 的 offset 别名或表示序列频率的整数。 | |
n_windows | 整数 | 评估的窗口数量。 | |
h | 整数 | 预测范围。 | |
step_size | 可选 | None | 每个交叉验证窗口之间的步长。如果为 None,则等于 h 。 |
input_size | 可选 | None | 每个窗口中每个序列的最大训练样本数。如果为 None,将使用 expanding window(扩展窗口)。 |
refit | 联合 | False | 为每个交叉验证窗口重新训练模型。 如果为 False,模型在开始时训练一次,然后用于预测每个窗口。 如果为正整数,模型每 refit 个窗口重新训练一次。 |
id_col | 字符串 | unique_id | 标识每个序列的列。 |
time_col | 字符串 | ds | 标识每个时间步的列,其值可以是时间戳或整数。 |
target_col | 字符串 | y | 包含目标值的列。 |
返回值 | 可调用对象 |
import lightgbm as lgb
from datasetsforecast.m4 import M4, M4Evaluation, M4Info
from utilsforecast.losses import smape
from mlforecast.lag_transforms import ExpandingMean, RollingMean
from mlforecast.target_transforms import Differences, LocalBoxCox, LocalStandardScaler
def train_valid_split(group):
df, *_ = M4.load(directory='data', group=group)
df['ds'] = df['ds'].astype('int')
horizon = M4Info[group].horizon
valid = df.groupby('unique_id').tail(horizon)
train = df.drop(valid.index)
return train, valid
h = M4Info['Weekly'].horizon
weekly_train, weekly_valid = train_valid_split('Weekly')
weekly_train['unique_id'] = weekly_train['unique_id'].astype('category')
weekly_valid['unique_id'] = weekly_valid['unique_id'].astype(weekly_train['unique_id'].dtype)
def config_fn(trial):
candidate_lags = [
[1],
[13],
[1, 13],
range(1, 33),
]
lag_idx = trial.suggest_categorical('lag_idx', range(len(candidate_lags)))
candidate_lag_tfms = [
{
1: [RollingMean(window_size=13)]
},
{
1: [RollingMean(window_size=13)],
13: [RollingMean(window_size=13)],
},
{
13: [RollingMean(window_size=13)],
},
{
4: [ExpandingMean(), RollingMean(window_size=4)],
8: [ExpandingMean(), RollingMean(window_size=4)],
}
]
lag_tfms_idx = trial.suggest_categorical('lag_tfms_idx', range(len(candidate_lag_tfms)))
candidate_targ_tfms = [
[Differences([1])],
[LocalBoxCox()],
[LocalStandardScaler()],
[LocalBoxCox(), Differences([1])],
[LocalBoxCox(), LocalStandardScaler()],
[LocalBoxCox(), Differences([1]), LocalStandardScaler()],
]
targ_tfms_idx = trial.suggest_categorical('targ_tfms_idx', range(len(candidate_targ_tfms)))
return {
'model_params': {
'learning_rate': 0.05,
'objective': 'l1',
'bagging_freq': 1,
'num_threads': 2,
'verbose': -1,
'force_col_wise': True,
'n_estimators': trial.suggest_int('n_estimators', 10, 1000, log=True),
'num_leaves': trial.suggest_int('num_leaves', 31, 1024, log=True),
'lambda_l1': trial.suggest_float('lambda_l1', 0.01, 10, log=True),
'lambda_l2': trial.suggest_float('lambda_l2', 0.01, 10, log=True),
'bagging_fraction': trial.suggest_float('bagging_fraction', 0.75, 1.0),
'feature_fraction': trial.suggest_float('feature_fraction', 0.75, 1.0),
},
'mlf_init_params': {
'lags': candidate_lags[lag_idx],
'lag_transforms': candidate_lag_tfms[lag_tfms_idx],
'target_transforms': candidate_targ_tfms[targ_tfms_idx],
},
'mlf_fit_params': {
'static_features': ['unique_id'],
}
}
def loss(df, train_df):
return smape(df, models=['model'])['model'].mean()
optuna.logging.set_verbosity(optuna.logging.WARNING)
objective = mlforecast_objective(
df=weekly_train,
config_fn=config_fn,
loss=loss,
model=lgb.LGBMRegressor(),
freq=1,
n_windows=2,
h=h,
)
study = optuna.create_study(
direction='minimize', sampler=optuna.samplers.TPESampler(seed=0)
)
study.optimize(objective, n_trials=2)
best_cfg = study.best_trial.user_attrs['config']
final_model = MLForecast(
models=[lgb.LGBMRegressor(**best_cfg['model_params'])],
freq=1,
**best_cfg['mlf_init_params'],
)
final_model.fit(weekly_train, **best_cfg['mlf_fit_params'])
preds = final_model.predict(h)
M4Evaluation.evaluate('data', 'Weekly', preds['LGBMRegressor'].values.reshape(-1, 13))
SMAPE | MASE | OWA | |
---|---|---|---|
每周 | 9.261538 | 2.614473 | 0.976158 |
此页面有帮助吗?
助手
回复由 AI 生成,可能包含错误。