Time Series and Forecasting Introduction

Author

Mwangi George

# a bit of time series modeling with matt dancho

# load Essential Packages
pacman::p_load(tidyverse, lubridate, tidymodels, timetk, modeltime)

# data
head(bike_sharing_daily)
# A tibble: 6 × 16
  instant dteday     season    yr  mnth holiday weekday workingday weath…¹  temp
    <dbl> <date>      <dbl> <dbl> <dbl>   <dbl>   <dbl>      <dbl>   <dbl> <dbl>
1       1 2011-01-01      1     0     1       0       6          0       2 0.344
2       2 2011-01-02      1     0     1       0       0          0       2 0.363
3       3 2011-01-03      1     0     1       0       1          1       1 0.196
4       4 2011-01-04      1     0     1       0       2          1       1 0.2  
5       5 2011-01-05      1     0     1       0       3          1       1 0.227
6       6 2011-01-06      1     0     1       0       4          1       1 0.204
# … with 6 more variables: atemp <dbl>, hum <dbl>, windspeed <dbl>,
#   casual <dbl>, registered <dbl>, cnt <dbl>, and abbreviated variable name
#   ¹​weathersit
# bike subset
bike_subset <- bike_sharing_daily %>% select(dteday, cnt)

# plot dtedate against cnt
bike_subset %>% plot_time_series(.date_var = dteday, .value = cnt, .title = "Count of People Sharing bikes in DC over time")
Jan 2011Apr 2011Jul 2011Oct 2011Jan 2012Apr 2012Jul 2012Oct 20120100020003000400050006000700080009000
Count of People Sharing bikes in DC over time
# split data into training and testing sets------------

splits <- time_series_split(data = bike_subset, date_var = dteday, assess = "3 months", cumulative = TRUE)

# visualize splits----------------
splits %>% 
  tk_time_series_cv_plan() %>% # extract time series cross validation plan 
  plot_time_series_cv_plan(.date_var = dteday, .value = cnt) # plot cv plan
Jan 2011Apr 2011Jul 2011Oct 2011Jan 2012Apr 2012Jul 2012Oct 20120100020003000400050006000700080009000
LegendtrainingtestingTime Series Cross Validation PlanSlice1
# Forecast-------------------

# arima model
# use modeltime's arima_reg() to fit an autoregressive arima model
# set_engine connects the model to the auto.reg() from forecast library
# fit trains the model to learn from the training data set.
model_arima <- arima_reg() %>% 
  set_engine("auto_arima") %>% 
  fit(cnt ~ dteday, training(splits))
frequency = 7 observations per 1 week
# prophet model
# seasonality = TRUE tells model to check the yearly seasonality in the data
model_prophet <- prophet_reg(seasonality_yearly = TRUE) %>% 
  set_engine("prophet") %>% 
    fit(cnt ~ dteday, training(splits))
Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
# machine learning---GLM--
# use a penalized regression model
# expand fit to include weekday, month and a numeric trend
model_glmnet <- linear_reg(penalty = 0.01) %>% 
  set_engine("glmnet") %>% 
  fit(
    cnt ~ lubridate::wday(dteday, label = TRUE)
        + lubridate::month(dteday, label = TRUE)
        + as.numeric(dteday),
    training(splits)
  )



# Organize models with modeltime workflow-------------
  # create modeltime table,
  # calibrate(forecast test set & test accuracy)
  # refit model with orginal data

# modeltime table
model_tbl <- modeltime_table(model_arima, model_prophet, model_glmnet)


# calibration
# this calculates the residuals(errors) for our testing test that was unseen during training
calib_tbl <- model_tbl %>% 
  modeltime_calibrate(new_data = testing(splits))

# measure accuracy from our test set predictions
calib_tbl %>% modeltime_accuracy()
# A tibble: 3 × 9
  .model_id .model_desc             .type   mae  mape  mase smape  rmse   rsq
      <int> <chr>                   <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1         1 ARIMA(0,1,3) WITH DRIFT Test  2540.  475.  2.74  46.0 3188. 0.390
2         2 PROPHET                 Test  1220.  365.  1.32  28.7 1763. 0.437
3         3 GLMNET                  Test  1289.  373.  1.39  29.7 1835. 0.247
# Visualize test set predictions 
calib_tbl %>% 
  modeltime_forecast(
    new_data = testing(splits),
    actual_data = bike_subset
  ) %>% 
  plot_modeltime_forecast()
Jan 2011Jul 2011Jan 2012Jul 201202k4k6k8k10k12k14k
LegendACTUAL1_ARIMA(0,1,3) WITH DRIFT2_PROPHET3_GLMNETForecast Plot
# forecast future
calib_tbl %>% 
  modeltime_refit(bike_subset) %>% 
  modeltime_forecast(h = "3 months", actual_data = bike_subset) %>% 
  plot_modeltime_forecast()
frequency = 7 observations per 1 week
Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
Jan 2011Jul 2011Jan 2012Jul 2012Jan 2013−4k−2k02k4k6k8k10k
LegendACTUAL1_UPDATE: ARIMA(1,1,1)(1,0,2)[7]2_PROPHET3_GLMNETForecast Plot