Holt-Winters time series forecasting with statsmodels

后端 未结 2 1732
一向
一向 2020-12-24 15:12

I tried forecasting with holt-winters model as shown below but I keep getting a prediction that is not consistent with what I expect. I also showed a visualizat

2条回答
  •  猫巷女王i
    2020-12-24 15:42

    This is improvisation of above answer https://stackoverflow.com/users/2285236/ayhan

    import pandas as pd
    import numpy as np
    import matplotlib.pyplot as plt
    from statsmodels.tsa.holtwinters import ExponentialSmoothing
    from sklearn.metrics import mean_squared_error
    from math import sqrt
    
    from matplotlib.pylab import rcParams
    rcParams['figure.figsize'] = 15, 7
    
    df = pd.read_csv('D:/WORK/international-airline-passengers.csv', 
                     parse_dates=['Month'], 
                     index_col='Month'
    )
    df.index.freq = 'MS'
    train, test = df.iloc[:132, 0], df.iloc[132:, 0]
    # model = ExponentialSmoothing(train, seasonal='mul', seasonal_periods=12).fit()
    model = ExponentialSmoothing(train, trend='add', seasonal='add', seasonal_periods=12, damped=True)
    hw_model = model.fit(optimized=True, use_boxcox=False, remove_bias=False)
    pred = hw_model.predict(start=test.index[0], end=test.index[-1])
    
    plt.plot(train.index, train, label='Train')
    plt.plot(test.index, test, label='Test')
    plt.plot(pred.index, pred, label='Holt-Winters')
    plt.legend(loc='best');
    

    Here is how I got the best parameters

    def exp_smoothing_configs(seasonal=[None]):
        models = list()
        # define config lists
        t_params = ['add', 'mul', None]
        d_params = [True, False]
        s_params = ['add', 'mul', None]
        p_params = seasonal
        b_params = [True, False]
        r_params = [True, False]
        # create config instances
        for t in t_params:
            for d in d_params:
                for s in s_params:
                    for p in p_params:
                        for b in b_params:
                            for r in r_params:
                                cfg = [t,d,s,p,b,r]
                                models.append(cfg)
        return models
    
    cfg_list = exp_smoothing_configs(seasonal=[12]) #[0,6,12]
    

    edf = df['Passengers']
    ts = edf[:'1959-12-01'].copy()
    ts_v = edf['1960-01-01':].copy()
    ind = edf.index[-12:]  # this will select last 12 months' indexes
    
    print("Holt's Winter Model")
    best_RMSE = np.inf
    best_config = []
    t1 = d1 = s1 = p1 = b1 = r1 = ''
    for j in range(len(cfg_list)):
        print(j)
        try:
            cg = cfg_list[j]
            print(cg)
            t,d,s,p,b,r = cg
            train = edf[:'1959'].copy()
            test = edf['1960-01-01':'1960-12-01'].copy()
            # define model
            if (t == None):
                model = ExponentialSmoothing(ts, trend=t, seasonal=s, seasonal_periods=p)
            else:
                model = ExponentialSmoothing(ts, trend=t, damped=d, seasonal=s, seasonal_periods=p)
            # fit model
            model_fit = model.fit(optimized=True, use_boxcox=b, remove_bias=r)
            # make one step forecast
            y_forecast = model_fit.forecast(12)
            rmse = np.sqrt(mean_squared_error(ts_v,y_forecast))
            print(rmse)
            if rmse < best_RMSE:
                best_RMSE = rmse
                best_config = cfg_list[j]
        except:
           continue
    

    Function to evaluate model

    def model_eval(y, predictions):
    
        # Import library for metrics
        from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
    
        # Mean absolute error (MAE)
        mae = mean_absolute_error(y, predictions)
    
        # Mean squared error (MSE)
        mse = mean_squared_error(y, predictions)
    
    
        # SMAPE is an alternative for MAPE when there are zeros in the testing data. It
        # scales the absolute percentage by the sum of forecast and observed values
        SMAPE = np.mean(np.abs((y - predictions) / ((y + predictions)/2))) * 100
    
    
        # Calculate the Root Mean Squared Error
        rmse = np.sqrt(mean_squared_error(y, predictions))
    
        # Calculate the Mean Absolute Percentage Error
        # y, predictions = check_array(y, predictions)
        MAPE = np.mean(np.abs((y - predictions) / y)) * 100
    
        # mean_forecast_error
        mfe = np.mean(y - predictions)
    
        # NMSE normalizes the obtained MSE after dividing it by the test variance. It
        # is a balanced error measure and is very effective in judging forecast
        # accuracy of a model.
    
        # normalised_mean_squared_error
        NMSE = mse / (np.sum((y - np.mean(y)) ** 2)/(len(y)-1))
    
    
        # theil_u_statistic
        # It is a normalized measure of total forecast error.
        error = y - predictions
        mfe = np.sqrt(np.mean(predictions**2))
        mse = np.sqrt(np.mean(y**2))
        rmse = np.sqrt(np.mean(error**2))
        theil_u_statistic =  rmse / (mfe*mse)
    
    
        # mean_absolute_scaled_error
        # This evaluation metric is used to over come some of the problems of MAPE and
        # is used to measure if the forecasting model is better than the naive model or
        # not.
    
    
        # Print metrics
        print('Mean Absolute Error:', round(mae, 3))
        print('Mean Squared Error:', round(mse, 3))
        print('Root Mean Squared Error:', round(rmse, 3))
        print('Mean absolute percentage error:', round(MAPE, 3))
        print('Scaled Mean absolute percentage error:', round(SMAPE, 3))
        print('Mean forecast error:', round(mfe, 3))
        print('Normalised mean squared error:', round(NMSE, 3))
        print('Theil_u_statistic:', round(theil_u_statistic, 3))
    

    print(best_RMSE, best_config)
    
    t1,d1,s1,p1,b1,r1 = best_config
    
    if t1 == None:
        hw_model1 = ExponentialSmoothing(ts, trend=t1, seasonal=s1, seasonal_periods=p1)
    else:
        hw_model1 = ExponentialSmoothing(ts, trend=t1, seasonal=s1, seasonal_periods=p1, damped=d1)
    
    fit2 = hw_model1.fit(optimized=True, use_boxcox=b1, remove_bias=r1)
    
    pred_HW = fit2.predict(start=pd.to_datetime('1960-01-01'), end = pd.to_datetime('1960-12-01'))
    # pred_HW = fit2.forecast(12)
    
    pred_HW = pd.Series(data=pred_HW, index=ind)
    df_pass_pred = pd.concat([df, pred_HW.rename('pred_HW')], axis=1)
    
    print(model_eval(ts_v, pred_HW))
    print('-*-'*20)
    
    # 15.570830579664698 ['add', True, 'add', 12, False, False]
    # Mean Absolute Error: 10.456
    # Mean Squared Error: 481.948
    # Root Mean Squared Error: 15.571
    # Mean absolute percentage error: 2.317
    # Scaled Mean absolute percentage error: 2.273
    # Mean forecast error: 483.689
    # Normalised mean squared error: 0.04
    # Theil_u_statistic: 0.0
    # None
    # -*--*--*--*--*--*--*--*--*--*--*--*--*--*--*--*--*--*--*--*-
    

    Summary:

    New Model results:

    Mean Absolute Error: 10.456
    Mean Squared Error: 481.948
    Root Mean Squared Error: 15.571
    Mean absolute percentage error: 2.317
    Scaled Mean absolute percentage error: 2.273
    Mean forecast error: 483.689
    Normalised mean squared error: 0.04
    Theil_u_statistic: 0.0
    

    Old Model Results:

    Mean Absolute Error: 20.682
    Mean Squared Error: 481.948
    Root Mean Squared Error: 23.719
    Mean absolute percentage error: 4.468
    Scaled Mean absolute percentage error: 4.56
    Mean forecast error: 466.704
    Normalised mean squared error: 0.093
    Theil_u_statistic: 0.0
    

    Bonus:

    You will get this nice dataframe where you can compare the original values with predicted values.

    df_pass_pred['1960':]
    

    output

                Passengers     pred_HW
    Month                             
    1960-01-01         417  417.826543
    1960-02-01         391  400.452916
    1960-03-01         419  461.804259
    1960-04-01         461  450.787208
    1960-05-01         472  472.695903
    1960-06-01         535  528.560823
    1960-07-01         622  601.265794
    1960-08-01         606  608.370401
    1960-09-01         508  508.869849
    1960-10-01         461  452.958727
    1960-11-01         390  407.634391
    1960-12-01         432  437.385058
    

提交回复
热议问题