From 20f3190d2fc26846f55328a7481de70e9fe3f84b Mon Sep 17 00:00:00 2001 From: rey-esp Date: Fri, 27 Dec 2024 11:14:42 -0600 Subject: [PATCH] docs: add python snippet for "Create the time series model" section of the Forecast a single time series with a univariate model tutorial (#1227) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * merge main * merge main * comments added - draft * fix test --------- Co-authored-by: Tim Sweña (Swast) --- ...ingle_timeseries_forecasting_model_test.py | 34 +++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/samples/snippets/create_single_timeseries_forecasting_model_test.py b/samples/snippets/create_single_timeseries_forecasting_model_test.py index 0c694de2dc..0e69eba3dd 100644 --- a/samples/snippets/create_single_timeseries_forecasting_model_test.py +++ b/samples/snippets/create_single_timeseries_forecasting_model_test.py @@ -22,18 +22,48 @@ def test_create_single_timeseries() -> None: # Read and visualize the time series you want to forecast. df = bpd.read_gbq("bigquery-public-data.google_analytics_sample.ga_sessions_*") parsed_date = bpd.to_datetime(df.date, format="%Y%m%d", utc=True) + parsed_date.name = "parsed_date" visits = df["totals"].struct.field("visits") + visits.name = "total_visits" total_visits = visits.groupby(parsed_date).sum() # Expected output: total_visits.head() - # date + # parsed_date # 2016-08-01 00:00:00+00:00 1711 # 2016-08-02 00:00:00+00:00 2140 # 2016-08-03 00:00:00+00:00 2890 # 2016-08-04 00:00:00+00:00 3161 # 2016-08-05 00:00:00+00:00 2702 - # Name: visits, dtype: Int64 + # Name: total_visits, dtype: Int64 total_visits.plot.line() # [END bigquery_dataframes_single_timeseries_forecasting_model_tutorial] + + # [START bigquery_dataframes_single_timeseries_forecasting_model_tutorial_create] + from bigframes.ml import forecasting + import bigframes.pandas as bpd + + # Create a time series model to forecast total site visits: + # The auto_arima option defaults to True, so the auto.ARIMA algorithm automatically + # tunes the hyperparameters in the model. + # The data_frequency option defaults to 'auto_frequency so the training + # process automatically infers the data frequency of the input time series. + # The decompose_time_series option defaults to True, so that information about + # the time series data is returned when you evaluate the model in the next step. + model = forecasting.ARIMAPlus() + model.auto_arima = True + model.data_frequency = "auto_frequency" + model.decompose_time_series = True + + # Use the data loaded in the previous step to fit the model + training_data = total_visits.to_frame().reset_index(drop=False) + + X = training_data[["parsed_date"]] + y = training_data[["total_visits"]] + + model.fit(X, y) + # [END bigquery_dataframes_single_timeseries_forecasting_model_tutorial_create] + assert model is not None + assert parsed_date is not None + assert total_visits is not None