--- title: EPF dataset keywords: fastai sidebar: home_sidebar summary: "Donwload the EDF dataset." description: "Donwload the EDF dataset." nb_path: "nbs/data_datasets__epf.ipynb" ---
args = pd.Series({'dataset': 'NP'})
Y_df, Xt_df, _ = EPF.load(directory='data', group=args.dataset)
# train_mask: 1 to keep, 0 to mask
offset = 365 * 24 * 2
train_outsample_mask = np.ones(len(Y_df))
train_outsample_mask[-offset:] = 0
print(f'Dataset: {args.dataset}')
#print("Xt_df.columns", Xt_df.columns)
print(f'Train mask percentage: {np.round(np.sum(train_outsample_mask)/len(train_outsample_mask),2)}')
print('X: time series features, of shape (#hours, #times,#features): \t' + str(Xt_df.shape))
print('Y: target series (in X), of shape (#hours, #times): \t \t' + str(Y_df.shape))
print(f'Last ds {Y_df.ds.max()}')
print(f'Train {sum(1-train_outsample_mask)} hours = {np.round(sum(1-train_outsample_mask)/(24*365),2)} years')
print(f'Validation {sum(train_outsample_mask)} hours = {np.round(sum(train_outsample_mask)/(24*365),2)} years')
# print('S: static features, of shape (#series,#features): \t \t' + str(S.shape))
#Y_df.head()
print('\n')
import matplotlib.pyplot as plt
from mpl_toolkits.axes_grid1 import make_axes_locatable
import pylab as plt
from pylab import rcParams
plt.style.use('seaborn-whitegrid')
from matplotlib import rcParams
plt.rcParams['font.family'] = 'serif'
FONTSIZE = 22
from nixtlats.data.datasets.epf import EPF, EPFInfo
if not os.path.exists('./results/'):
os.makedirs('./results/')
dataset = ['NP', 'PJM', 'BE', 'FR', 'DE']
Y_df, X_df, S_df = EPF.load_groups(directory='data', groups=dataset)
fig = plt.figure(figsize=(20, 15))
fig.tight_layout()
plt.ylim(-200, 800)
rcParams['figure.figsize'] = 15, 15
ax0 = plt.subplot2grid((3,2),(0, 0))
ax1 = plt.subplot2grid((3,2),(0, 1))
ax2 = plt.subplot2grid((3,2),(1, 0))
ax3 = plt.subplot2grid((3,2),(1, 1))
ax4 = plt.subplot2grid((3,2),(2, 0))
axs = [ax0, ax1, ax2, ax3, ax4]
for idx, market in enumerate(dataset):
currency = 'USD' if market == 'PJM' else 'EUR'
title_str = 'EPEX-' if not (market in ['PJM', 'NP']) else ''
title_str += f'{market} market'
y_axis_str = f'Price [{currency}/MWh]'
x_plot = Y_df[Y_df.unique_id==market].ds.values
y_plot = Y_df[Y_df.unique_id==market].y.values
x_axis_str = f'Hours [{str(x_plot.min())[:10]} to {str(x_plot.max())[:10]}]'
axs[idx].plot(x_plot, y_plot, color='#628793', linewidth=0.4)
axs[idx].tick_params(labelsize=FONTSIZE-2)
axs[idx].set_xlabel(x_axis_str, fontsize=FONTSIZE)
axs[idx].vlines(x_plot[-728*24],-200,800, linestyle=(0, (5, 10)),
color='black', linewidth=1.)
axs[idx].set_ylabel(y_axis_str, fontsize=FONTSIZE)
axs[idx].set_title(title_str)
axs[idx].set_ylim(-200,800)
plt.subplots_adjust(left=0.125, bottom=0.1, right=0.9, top=1.2, wspace=0.2, hspace=0.2)
plt.savefig('./results/market_plots.pdf', bbox_inches = 'tight')
plt.show()
from nixtlats.data.datasets.epf import EPF, EPFInfo
EXOGENOUS_NAMES = {'NP': ['Load [GW]', 'Wind Generation [GW]'],
'PJM': ['Load [GW]', 'COMED Load [GW]'],
'BE': ['Load [GW]', 'Total France Generation [GW]'],
'FR': ['Load [GW]', 'Total France Generation [GW]'],
'DE': ['TSO Zonal Load [GW]', 'DE Wind Generation [GW]']}
# dataset = ['NP', 'PJM', 'BE', 'FR', 'DE']
dataset = ['NP']
# dataset = ['DE']
Y_df, X_df, S_df = EPF.load_groups(directory='data', groups=dataset)
fig = plt.figure(figsize=(34, 12))
fig.tight_layout()
plt.ylim(-200, 800)
# rcParams['figure.figsize'] = 15, 15
ax0 = plt.subplot2grid((3,2),(0, 0))
ax1 = plt.subplot2grid((3,2),(1, 0))
ax2 = plt.subplot2grid((3,2),(2, 0))
axs = [ax0, ax1, ax2]
# for idx, market in enumerate(dataset):
market = dataset[0]
currency = 'USD' if market == 'PJM' else 'EUR'
title_str = 'EPEX-' if not (market in ['PJM', 'NP']) else ''
title_str += f'{market} market'
y_axis_str = f'Price [{currency}/MWh]'
x_plot = Y_df.ds.values
x_plot_min = pd.to_datetime(x_plot.min()).strftime('%B %d, %Y')
x_plot_max = pd.to_datetime(x_plot.max()).strftime('%B %d, %Y')
x_axis_str = f'Hours [{x_plot_min} to {x_plot_max}]'
y_plot = Y_df.y.values
x1_plot = X_df.Exogenous1.values
x2_plot = X_df.Exogenous2.values
axs[0].plot(x_plot, y_plot, color='#628793', linewidth=0.4, alpha=1.)
axs[0].vlines(x_plot[-728*24],0,200, linestyle=(0, (5, 10)),
color='black', linewidth=1.9)
axs[0].tick_params(labelsize=FONTSIZE-2)
axs[0].set_xlabel(x_axis_str, fontsize=FONTSIZE)
axs[0].set_ylabel(y_axis_str, fontsize=FONTSIZE)
#axs[0].vlines(x_plot[-728*24],-250,280, linestyle=(0, (5, 10)),
# color='black', linewidth=1.)
#axs[0].vlines(x_plot[-728*24],0,210, linestyle=(0, (5, 10)),
# color='black', linewidth=1.)
axs[1].plot(x_plot, x1_plot/1000, color='#628793', linewidth=0.37, alpha=0.8)
axs[1].vlines(x_plot[-728*24],25,72, linestyle=(0, (5, 10)),
color='black', linewidth=1.9)
axs[1].tick_params(labelsize=FONTSIZE-2)
axs[1].set_xlabel(x_axis_str, fontsize=FONTSIZE)
axs[1].set_ylabel(EXOGENOUS_NAMES[market][0], fontsize=FONTSIZE)
x2_plot[-728*24-60:-728*24+60] = [np.nan] * 60 * 2 # mini hack
axs[2].plot(x_plot, x2_plot/1000, color='#628793', linewidth=0.37, alpha=0.8)
axs[2].vlines(x_plot[-728*24],-.2,5.2, linestyle=(0, (5, 10)),
color='black', linewidth=1.9)
axs[2].tick_params(labelsize=FONTSIZE-2)
axs[2].set_xlabel(x_axis_str, fontsize=FONTSIZE)
axs[2].set_ylabel(EXOGENOUS_NAMES[market][1], fontsize=FONTSIZE)
# # axs[idx].set_title(title_str)
# axs[idx].set_ylim(-200,800)
# # print(\x_plot[-728*24]\, x_plot[-728*24])
plt.subplots_adjust(left=0.125, bottom=0.1, right=0.9, top=1.5, wspace=0.2, hspace=0.2)
plt.savefig('./results/NP.pdf', bbox_inches = 'tight')
plt.show()
import pandas as pd
# dataset = ['NP', 'PJM', 'BE', 'FR', 'DE']
dataset = ['NP']
# dataset = ['DE']
Y_df, X_df, S_df = EPF.load_groups(directory='data', groups=dataset)
fig = plt.figure(figsize=(15.5, 5))
fig.tight_layout()
plt.ylim(-200, 800)
ax0 = plt.subplot2grid((1,1),(0, 0))
axs = [ax0]
# for idx, market in enumerate(dataset):
market = dataset[0]
currency = 'USD' if market == 'PJM' else 'EUR'
title_str = 'EPEX-' if not (market in ['PJM', 'NP']) else ''
title_str += f'{market} market'
y_axis_str = f'Price [{currency}/MWh]'
x_plot = Y_df.ds.values
x_plot_min = pd.to_datetime(x_plot.min()).strftime('%B %d, %Y') #'%B %d, %Y, %r'
x_plot_max = pd.to_datetime(x_plot.max()).strftime('%B %d, %Y') #'%B %d, %Y, %r'
x_axis_str = f'Hours [{x_plot_min} to {x_plot_max}]'
y_plot = Y_df.y.values
x1_plot = X_df.Exogenous1.values
x2_plot = X_df.Exogenous2.values
axs[0].plot(x_plot, y_plot, color='#628793', linewidth=0.4)
axs[0].tick_params(labelsize=FONTSIZE-2)
axs[0].set_xlabel(x_axis_str, fontsize=FONTSIZE)
axs[0].set_ylabel(y_axis_str, fontsize=FONTSIZE)
axs[0].vlines(x_plot[-(42*7*24)-(728*24)],0,200, linestyle=(0, (5, 10)),
color='black', linewidth=1.)
axs[0].vlines(x_plot[-728*24],0,200, linestyle=(0, (5, 10)),
color='black', linewidth=1.)
plt.savefig('./results/train_methodology.pdf', bbox_inches = 'tight')
plt.show()
Y_hat_df = epf_naive_forecast(Y_df)
fig = plt.figure(figsize=(15.5, 5))
plt.plot(Y_hat_df['y'][:24*7*3], label='true')
plt.plot(Y_hat_df['y_hat'][:24*7*3], label='naive')
plt.legend()
plt.show()