# a bit of time series modeling with matt dancho# load Essential Packagespacman::p_load(tidyverse, lubridate, tidymodels, timetk, modeltime)# datahead(bike_sharing_daily)
# bike subsetbike_subset <- bike_sharing_daily %>%select(dteday, cnt)# plot dtedate against cntbike_subset %>%plot_time_series(.date_var = dteday, .value = cnt, .title ="Count of People Sharing bikes in DC over time")
# split data into training and testing sets------------splits <-time_series_split(data = bike_subset, date_var = dteday, assess ="3 months", cumulative =TRUE)# visualize splits----------------splits %>%tk_time_series_cv_plan() %>%# extract time series cross validation plan plot_time_series_cv_plan(.date_var = dteday, .value = cnt) # plot cv plan
# Forecast-------------------# arima model# use modeltime's arima_reg() to fit an autoregressive arima model# set_engine connects the model to the auto.reg() from forecast library# fit trains the model to learn from the training data set.model_arima <-arima_reg() %>%set_engine("auto_arima") %>%fit(cnt ~ dteday, training(splits))
frequency = 7 observations per 1 week
# prophet model# seasonality = TRUE tells model to check the yearly seasonality in the datamodel_prophet <-prophet_reg(seasonality_yearly =TRUE) %>%set_engine("prophet") %>%fit(cnt ~ dteday, training(splits))
Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
# machine learning---GLM--# use a penalized regression model# expand fit to include weekday, month and a numeric trendmodel_glmnet <-linear_reg(penalty =0.01) %>%set_engine("glmnet") %>%fit( cnt ~ lubridate::wday(dteday, label =TRUE)+ lubridate::month(dteday, label =TRUE)+as.numeric(dteday),training(splits) )# Organize models with modeltime workflow-------------# create modeltime table,# calibrate(forecast test set & test accuracy)# refit model with orginal data# modeltime tablemodel_tbl <-modeltime_table(model_arima, model_prophet, model_glmnet)# calibration# this calculates the residuals(errors) for our testing test that was unseen during trainingcalib_tbl <- model_tbl %>%modeltime_calibrate(new_data =testing(splits))# measure accuracy from our test set predictionscalib_tbl %>%modeltime_accuracy()
# A tibble: 3 × 9
.model_id .model_desc .type mae mape mase smape rmse rsq
<int> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 ARIMA(0,1,3) WITH DRIFT Test 2540. 475. 2.74 46.0 3188. 0.390
2 2 PROPHET Test 1220. 365. 1.32 28.7 1763. 0.437
3 3 GLMNET Test 1289. 373. 1.39 29.7 1835. 0.247
# Visualize test set predictions calib_tbl %>%modeltime_forecast(new_data =testing(splits),actual_data = bike_subset ) %>%plot_modeltime_forecast()