Setup¶

In [ ]:
import warnings
warnings.filterwarnings("ignore")
In [ ]:
from datetime import datetime, timedelta
from openstef_dbc.log import logging
import numpy as np
import plotly.graph_objects as go

from app_settings import AppSettings

Settings = AppSettings()

logging.configure_logging(loglevel=Settings.loglevel, runtime_env=Settings.env)

Forecasting in practice with OpenSTEF¶

Create config object¶

In [ ]:
from openstef.data_classes.prediction_job import PredictionJobDataClass

prediction_job = PredictionJobDataClass(
    id=1337,
    model='xgb',
    quantiles=[0.1,0.3,0.5,0.7,0.9],
    forecast_type="demand",
    lat=52.0,
    lon=5.0,
    horizon_minutes=47*60,
    resolution_minutes=15,
    name="Example",
    default_modelspecs=None,
)

Load data¶

In [ ]:
import pandas as pd

data = pd.read_csv('data/example_input.csv', index_col='index', parse_dates=True)
In [ ]:
split = 200

data_train = data.iloc[:-split,:]
data_test = data.iloc[-split:,:]

data_forecast = data.copy()
data_forecast["load"].iloc[-split:] = np.nan

Visualize data¶

In [ ]:
data_train.head(5)
Out[ ]:
load APX clouds radiation temp winddeg windspeed windspeed_100m pressure humidity ... sjv_E1A sjv_E1B sjv_E1C sjv_E2A sjv_E2B sjv_E3A sjv_E3B sjv_E3C sjv_E3D sjv_E4A
index
2020-10-02 10:00:00+00:00 2.620000 34.0 99.758911 1.552899e+06 16.449036 154.711456 3.527778 9.349441 99453.476562 0.686240 ... 0.000031 0.000030 0.000029 0.000033 0.000032 0.000061 0.000048 0.000048 0.000031 0.0
2020-10-02 10:15:00+00:00 0.796667 34.0 99.819193 1.575618e+06 16.400948 157.491554 3.557639 9.232026 99416.363281 0.683780 ... 0.000032 0.000030 0.000029 0.000033 0.000032 0.000060 0.000048 0.000048 0.000031 0.0
2020-10-02 10:30:00+00:00 0.300000 34.0 99.879475 1.598338e+06 16.352859 160.271652 3.587500 9.114612 99379.250000 0.681319 ... 0.000032 0.000031 0.000029 0.000033 0.000031 0.000058 0.000048 0.000048 0.000031 0.0
2020-10-02 10:45:00+00:00 1.773333 34.0 99.939756 1.594736e+06 16.304771 163.051750 3.617361 8.997197 99342.136719 0.678859 ... 0.000032 0.000030 0.000029 0.000032 0.000031 0.000057 0.000048 0.000048 0.000031 0.0
2020-10-02 11:00:00+00:00 1.740000 28.8 100.000038 1.591135e+06 16.256683 165.831848 3.647222 8.879783 99305.023438 0.676398 ... 0.000031 0.000029 0.000027 0.000031 0.000030 0.000057 0.000048 0.000048 0.000031 0.0

5 rows × 25 columns

In [ ]:
import plotly.graph_objects as go

figure = go.Figure()

figure.add_scatter(x=data_train.index, y=data_train["load"], name="Measured")
figure.update_layout(title="Historic load")

figure.show()

Train model¶

In [ ]:
from openstef.pipeline.train_model import train_model_pipeline

train_model_pipeline(
    prediction_job,
    data_train,
    check_old_model_age=False,
    mlflow_tracking_uri=Settings.paths_mlflow_tracking_uri,
    artifact_folder=Settings.paths_artifact_folder,
)
2023-02-04 12:30:09 [info     ] Proloaf not available, setting constructor to None [openstef.model.model_creator] 
2023-02-04 12:30:09 [warning  ] feature_modules not an attribute of the old model, using None  [MLflowSerializer] experiment_name=1337
2023-02-04 12:30:09 [info     ] Model successfully loaded with MLflow [MLflowSerializer] 
2023-02-04 12:30:09 [info     ] Found 2 values of constant load (repeated values), converted to NaN value. [openstef.validation.validation] cleansing_step=repeated_values frac_values=0.00017972681524083394 num_values=2 pj_id=1337
2023-02-04 12:30:09 [info     ] Removed 2 NaN values           [openstef.validation.validation] num_removed_values=2
2023-02-04 12:30:15 [info     ] Fitted a new model, not yet stored [root] 
2023-02-04 12:30:17 [info     ] New model is better than old model, continuing with training procces [openstef.pipeline.train_model] 
2023-02-04 12:30:20 [info     ] Model saved with MLflow        [MLflowSerializer] experiment_name=1337
2023-02-04 12:30:22 [info     ] Logged figures to MLflow.      [MLflowSerializer] 
2023-02-04 12:30:22 [info     ] Writing reports to ./output/artifacts/1337 [openstef.metrics.reporter] 

Create forecast¶

In [ ]:
from openstef.pipeline.create_forecast import create_forecast_pipeline

forecast = create_forecast_pipeline(
    prediction_job,
    data_forecast,
    mlflow_tracking_uri=Settings.paths_mlflow_tracking_uri,
)
2023-02-04 12:30:24 [warning  ] feature_modules not an attribute of the old model, using None  [MLflowSerializer] experiment_name=1337
2023-02-04 12:30:24 [info     ] Model successfully loaded with MLflow [MLflowSerializer] 
2023-02-04 12:30:24 [info     ] Model successfully loaded with MLflow [MLflowSerializer] 
2023-02-04 12:30:24 [info     ] Found 202 values of constant load (repeated values), converted to NaN value. [openstef.validation.validation] cleansing_step=repeated_values frac_values=0.0178319209039548 num_values=202 pj_id=1337
2023-02-04 12:30:26 [info     ] Postproces in preparation of storing [openstef.postprocessing.postprocessing] 

Visualize forecast¶

In [ ]:
import plotly.graph_objects as go

figure = go.Figure()

figure.add_scatter(x=data_test.index, y=data_test["load"], name="Measured")
figure.add_scatter(x=forecast.index, y=forecast["forecast"], name="Forecasted")

figure.update_layout(title="Forecast 48h")
figure.show()

OpenSTEF in an operational setting¶

Connect to databases¶

In [ ]:
from openstef_dbc.database import DataBase

database = DataBase(Settings)

Retrieve config object¶

In [ ]:
pid = 321

database.get_prediction_job(pid)
2023-02-04 12:30:27 [info     ] package: mysql.connector.plugins [mysql.connector.authentication] 
2023-02-04 12:30:27 [info     ] plugin_name: mysql_native_password [mysql.connector.authentication] 
2023-02-04 12:30:27 [info     ] AUTHENTICATION_PLUGIN_CLASS: MySQLNativePasswordAuthPlugin [mysql.connector.authentication] 
Out[ ]:
PredictionJobDataClass(id=321, model='xgb', forecast_type='demand', horizon_minutes=2880, resolution_minutes=15, lat=52.067, lon=5.894, name='Location_B', train_components=True, description='Location_B_System_1+Location_B_System_2', quantiles=[0.05, 0.1, 0.3, 0.5, 0.7, 0.9, 0.95], train_split_func=None, backtest_split_func=None, train_horizons_minutes=None, default_modelspecs=None, save_train_forecasts=False, completeness_treshold=0.5, minimal_table_length=100, flatliner_treshold=24, depends_on=None, sid=None, turbine_type=None, n_turbines=None, hub_height=None, pipelines_to_run=[<PipelineType.TRAIN: 'train'>, <PipelineType.HYPER_PARMATERS: 'hyper_parameters'>, <PipelineType.FORECAST: 'forecast'>], alternative_forecast_model_pid=None)

Retrieve data¶

In [ ]:
now = datetime.utcnow()

data = database.get_model_input(
    pid=pid,
    datetime_start=now - timedelta(days=120),
    datetime_end=now + timedelta(days=1),
    location=(prediction_job.lat, prediction_job.lon),
).dropna()

data.head(5)
2023-02-04 12:30:28 [info     ] Combining sources into single dataframe [Weather] 
Out[ ]:
load radiation temp windspeed pressure
2022-10-23 13:30:00+00:00 -11.766667 315037.0 8.530 4.0600 69406.0
2022-10-23 13:45:00+00:00 -7.733333 235796.0 7.715 4.2075 77971.0
2022-10-23 14:00:00+00:00 -3.186667 156555.0 6.900 4.3550 86536.0
2022-10-23 14:15:00+00:00 1.106667 118011.5 6.085 4.5025 95101.0
2022-10-23 14:30:00+00:00 5.276667 79468.0 5.270 4.6500 103666.0
In [ ]:
from openstef.tasks import train_model

train_model.main(config=Settings, database=database)
2023-02-04 12:30:29 [info     ] Task started                   [openstef.tasks.utils.taskcontext] task=train_model
2023-02-04 12:30:29 [info     ] Querying prediction jobs from database [openstef.tasks.utils.taskcontext] model_type=['xgb', 'xgb_quantile', 'lgb', 'linear', 'proloaf'] task=train_model
2023-02-04 12:30:29 [info     ] Pre-loop completed             [openstef.tasks.utils.taskcontext] ktp_checkpoint=pre-loop ktp_runtime=0.1 task=train_model
2023-02-04 12:30:29 [info     ] Iteration started              [openstef.tasks.utils.taskcontext] datetime_end=datetime.datetime(2023, 2, 4, 12, 30, 29, 832705) iteration=0 num_jobs=4 pid=317 task=train_model
2023-02-04 12:30:29 [info     ] Added metadata to predictionjob completed [openstef.tasks.utils.taskcontext] iteration=0 ktp_checkpoint=Added metadata to PredictionJob ktp_runtime=0.0 task=train_model
2023-02-04 12:30:31 [info     ] Combining sources into single dataframe [Weather] 
2023-02-04 12:30:31 [info     ] Retrieved timeseries input completed [openstef.tasks.utils.taskcontext] iteration=0 ktp_checkpoint=Retrieved timeseries input ktp_runtime=1.315 task=train_model
2023-02-04 12:30:31 [warning  ] No old model found, training new model [openstef.pipeline.train_model] pid=317
2023-02-04 12:30:31 [info     ] Found 1545 values of constant load (repeated values), converted to NaN value. [openstef.validation.validation] cleansing_step=repeated_values frac_values=0.13410294245291207 num_values=1545 pj_id=317
2023-02-04 12:30:31 [info     ] Removed 1545 NaN values        [openstef.validation.validation] num_removed_values=1545
2023-02-04 12:30:36 [info     ] Fitted a new model, not yet stored [root] 
2023/02/04 13:30:38 INFO mlflow.tracking.fluent: Experiment with name '317' does not exist. Creating a new experiment.
2023-02-04 12:30:38 [info     ] No previous model found in MLflow [MLflowSerializer] experiment_name=317
2023-02-04 12:30:40 [info     ] Model saved with MLflow        [MLflowSerializer] experiment_name=317
2023-02-04 12:30:42 [info     ] Logged figures to MLflow.      [MLflowSerializer] 
2023-02-04 12:30:42 [info     ] Writing reports to ./output/artifacts/317 [openstef.metrics.reporter] 
2023-02-04 12:30:43 [info     ] Model trained completed        [openstef.tasks.utils.taskcontext] iteration=0 ktp_checkpoint=Model trained ktp_runtime=12.166 task=train_model
2023-02-04 12:30:43 [info     ] Iteration completed            [openstef.tasks.utils.taskcontext] iteration=0 ktp_runtime=13.485 ktp_successful=1 task=train_model
2023-02-04 12:30:43 [info     ] Iteration started              [openstef.tasks.utils.taskcontext] datetime_end=datetime.datetime(2023, 2, 4, 12, 30, 29, 832705) iteration=1 num_jobs=4 pid=313 task=train_model
2023-02-04 12:30:43 [info     ] Added metadata to predictionjob completed [openstef.tasks.utils.taskcontext] iteration=1 ktp_checkpoint=Added metadata to PredictionJob ktp_runtime=0.0 task=train_model
2023-02-04 12:30:44 [info     ] Combining sources into single dataframe [Weather] 
2023-02-04 12:30:44 [info     ] Retrieved timeseries input completed [openstef.tasks.utils.taskcontext] iteration=1 ktp_checkpoint=Retrieved timeseries input ktp_runtime=1.31 task=train_model
2023-02-04 12:30:44 [warning  ] No old model found, training new model [openstef.pipeline.train_model] pid=313
2023-02-04 12:30:44 [info     ] Found 1545 values of constant load (repeated values), converted to NaN value. [openstef.validation.validation] cleansing_step=repeated_values frac_values=0.13410294245291207 num_values=1545 pj_id=313
2023-02-04 12:30:44 [info     ] Removed 1545 NaN values        [openstef.validation.validation] num_removed_values=1545
2023-02-04 12:30:49 [info     ] Fitted a new model, not yet stored [root] 
2023/02/04 13:30:50 INFO mlflow.tracking.fluent: Experiment with name '313' does not exist. Creating a new experiment.
2023-02-04 12:30:50 [info     ] No previous model found in MLflow [MLflowSerializer] experiment_name=313
2023-02-04 12:30:53 [info     ] Model saved with MLflow        [MLflowSerializer] experiment_name=313
2023-02-04 12:30:55 [info     ] Logged figures to MLflow.      [MLflowSerializer] 
2023-02-04 12:30:55 [info     ] Writing reports to ./output/artifacts/313 [openstef.metrics.reporter] 
2023-02-04 12:30:56 [info     ] Model trained completed        [openstef.tasks.utils.taskcontext] iteration=1 ktp_checkpoint=Model trained ktp_runtime=11.813 task=train_model
2023-02-04 12:30:56 [info     ] Iteration completed            [openstef.tasks.utils.taskcontext] iteration=1 ktp_runtime=13.126 ktp_successful=1 task=train_model
2023-02-04 12:30:56 [info     ] Iteration started              [openstef.tasks.utils.taskcontext] datetime_end=datetime.datetime(2023, 2, 4, 12, 30, 29, 832705) iteration=2 num_jobs=4 pid=459 task=train_model
2023-02-04 12:30:56 [info     ] Added metadata to predictionjob completed [openstef.tasks.utils.taskcontext] iteration=2 ktp_checkpoint=Added metadata to PredictionJob ktp_runtime=0.0 task=train_model
2023-02-04 12:30:57 [info     ] Combining sources into single dataframe [Weather] 
2023-02-04 12:30:57 [info     ] Retrieved timeseries input completed [openstef.tasks.utils.taskcontext] iteration=2 ktp_checkpoint=Retrieved timeseries input ktp_runtime=1.291 task=train_model
2023-02-04 12:30:57 [warning  ] No old model found, training new model [openstef.pipeline.train_model] pid=459
2023-02-04 12:30:57 [info     ] Found 1545 values of constant load (repeated values), converted to NaN value. [openstef.validation.validation] cleansing_step=repeated_values frac_values=0.13410294245291207 num_values=1545 pj_id=459
2023-02-04 12:30:57 [info     ] Removed 1545 NaN values        [openstef.validation.validation] num_removed_values=1545
2023-02-04 12:31:49 [info     ] Fitted a new model, not yet stored [root] 
2023/02/04 13:31:51 INFO mlflow.tracking.fluent: Experiment with name '459' does not exist. Creating a new experiment.
2023-02-04 12:31:51 [info     ] No previous model found in MLflow [MLflowSerializer] experiment_name=459
2023-02-04 12:31:53 [info     ] Model saved with MLflow        [MLflowSerializer] experiment_name=459
2023-02-04 12:31:55 [info     ] Logged figures to MLflow.      [MLflowSerializer] 
2023-02-04 12:31:55 [info     ] Writing reports to ./output/artifacts/459 [openstef.metrics.reporter] 
2023-02-04 12:31:56 [info     ] Model trained completed        [openstef.tasks.utils.taskcontext] iteration=2 ktp_checkpoint=Model trained ktp_runtime=59.099 task=train_model
2023-02-04 12:31:56 [info     ] Iteration completed            [openstef.tasks.utils.taskcontext] iteration=2 ktp_runtime=60.396 ktp_successful=1 task=train_model
2023-02-04 12:31:56 [info     ] Iteration started              [openstef.tasks.utils.taskcontext] datetime_end=datetime.datetime(2023, 2, 4, 12, 30, 29, 832705) iteration=3 num_jobs=4 pid=321 task=train_model
2023-02-04 12:31:56 [info     ] Added metadata to predictionjob completed [openstef.tasks.utils.taskcontext] iteration=3 ktp_checkpoint=Added metadata to PredictionJob ktp_runtime=0.0 task=train_model
2023-02-04 12:31:57 [info     ] Combining sources into single dataframe [Weather] 
2023-02-04 12:31:57 [info     ] Retrieved timeseries input completed [openstef.tasks.utils.taskcontext] iteration=3 ktp_checkpoint=Retrieved timeseries input ktp_runtime=0.955 task=train_model
2023-02-04 12:31:57 [warning  ] No old model found, training new model [openstef.pipeline.train_model] pid=321
2023-02-04 12:31:57 [info     ] Found 1545 values of constant load (repeated values), converted to NaN value. [openstef.validation.validation] cleansing_step=repeated_values frac_values=0.13410294245291207 num_values=1545 pj_id=321
2023-02-04 12:31:57 [info     ] Removed 1545 NaN values        [openstef.validation.validation] num_removed_values=1545
2023-02-04 12:32:03 [info     ] Fitted a new model, not yet stored [root] 
2023/02/04 13:32:04 INFO mlflow.tracking.fluent: Experiment with name '321' does not exist. Creating a new experiment.
2023-02-04 12:32:04 [info     ] No previous model found in MLflow [MLflowSerializer] experiment_name=321
2023-02-04 12:32:06 [info     ] Model saved with MLflow        [MLflowSerializer] experiment_name=321
2023-02-04 12:32:08 [info     ] Logged figures to MLflow.      [MLflowSerializer] 
2023-02-04 12:32:08 [info     ] Writing reports to ./output/artifacts/321 [openstef.metrics.reporter] 
2023-02-04 12:32:10 [info     ] Model trained completed        [openstef.tasks.utils.taskcontext] iteration=3 ktp_checkpoint=Model trained ktp_runtime=12.173 task=train_model
2023-02-04 12:32:10 [info     ] Iteration completed            [openstef.tasks.utils.taskcontext] iteration=3 ktp_runtime=13.133 ktp_successful=1 task=train_model
2023-02-04 12:32:10 [info     ] Loop completed                 [openstef.tasks.utils.taskcontext] jobs_started=4 jobs_successful=4 jobs_unsuccessful=0 ktp_checkpoint=loop ktp_runtime=100.148 num_jobs=4 successful=0 task=train_model
2023-02-04 12:32:10 [info     ] Task completed                 [openstef.tasks.utils.taskcontext] ktp_runtime=100.25 ktp_successful=1 task=train_model