from mmm_functions import *

dataset = 'MMM_ts_manga5.csv'
df = pd.read_csv(
    dataset,
    parse_dates=['week'],
    index_col='week',
)

apply_effects_features = []

# 三角関数特徴量を追加したデータフレームを表示して確認
df = add_fourier_terms(
    df, 
    num=5, 
    seasonal=52.25
)

print(df)

                 sales  traditional   internet  promotional    t     sin_1  \
week                                                                         
2015-12-27   649315000    656100000          0            0    0  0.000000   
2016-01-03   958266000            0  178050000    188587500    1  0.119963   
2016-01-10   755973000            0  150450000    162037500    2  0.238193   
2016-01-17   551603000            0          0    126900000    3  0.352983   
2016-01-24   520183000            0  175500000            0    4  0.462674   
...                ...          ...        ...          ...  ...       ...   
2023-11-26   839617000            0  154950000            0  413 -0.565683   
2023-12-03   829314000            0          0    249187500  414 -0.462674   
2023-12-10   852512000            0  148200000            0  415 -0.352983   
2023-12-17   963276000    656100000          0            0  416 -0.238193   
2023-12-24  1297220000            0  178050000    188587500  417 -0.119963   

               cos_1     sin_2     cos_2     sin_3     cos_3     sin_4  \
week                                                                     
2015-12-27  1.000000  0.000000  1.000000  0.000000  1.000000  0.000000   
2016-01-03  0.992778  0.238193  0.971218  0.352983  0.935630  0.462674   
2016-01-10  0.971218  0.462674  0.886528  0.660522  0.750806  0.820348   
2016-01-17  0.935630  0.660522  0.750806  0.883026  0.469324  0.991849   
2016-01-24  0.886528  0.820348  0.571865  0.991849  0.127421  0.938256   
...              ...       ...       ...       ...       ...       ...   
2023-11-26  0.824623 -0.932951  0.360005 -0.972981 -0.230887 -0.671733   
2023-12-03  0.886528 -0.820348  0.571865 -0.991849  0.127421 -0.938256   
2023-12-10  0.935630 -0.660522  0.750806 -0.883026  0.469324 -0.991849   
2023-12-17  0.971218 -0.462674  0.886528 -0.660522  0.750806 -0.820348   
2023-12-24  0.992778 -0.238193  0.971218 -0.352983  0.935630 -0.462674   

               cos_4     sin_5     cos_5  
week                                      
2015-12-27  1.000000  0.000000  1.000000  
2016-01-03  0.886528  0.565683  0.824623  
2016-01-10  0.571865  0.932951  0.360005  
2016-01-17  0.127421  0.972981 -0.230887  
2016-01-24 -0.345941  0.671733 -0.740793  
...              ...       ...       ...  
2023-11-26 -0.740793 -0.134872 -0.990863  
2023-12-03 -0.345941 -0.671733 -0.740793  
2023-12-10  0.127421 -0.972981 -0.230887  
2023-12-17  0.571865 -0.932951  0.360005  
2023-12-24  0.886528 -0.565683  0.824623  

[418 rows x 15 columns]

# 直近5年間（52.25週×5年）のデータに絞り
# 目的変数yと説明変数Xに分解

# 対象データ期間の設定
data_term = int(52.25 * 5)

# 説明変数Xと目的変数yに分解
X = df.drop(columns=['sales'])[-data_term:]
y = df['sales'][-data_term:]

y.plot()
plt.show()

X.plot(subplots=True,figsize=(12,X.shape[1]*3))
plt.show()

study = run_optimization(
    regarima_objective, 
    X, y, 
    apply_effects_features, 
    n_trials=1000
)

Best trial: 807. Best value: 2.69083e+08: 100%|██████████| 1000/1000 [2:18:11<00:00,  8.29s/it]Best trial:
Value: 269083260.37553835
Params: 
alpha: 0.5527984492087785

trained_model,model_params,pred = create_model_from_trial_regarima(
    study.best_trial, 
    X, y, 
    apply_effects_features
)

RMSE: 219276071.56580266
MAE: 163652742.0577427
MAPE: 0.13104365126782422
R2: 0.9136130421675566

# 媒体特徴量のリストを作成
apply_effects_features = ['traditional', 'internet', 'promotional']

# 貢献度の算出
contribution = calculate_and_plot_contribution(y, X, trained_model, (0, np.max(y)*1.1), apply_effects_features)

# 数値を表示
print(contribution)

                    Base   traditional      internet   promotional
week                                                              
2018-12-30  1.940607e+08  0.000000e+00  2.894843e+08  0.000000e+00
2019-01-06  1.599915e+08  0.000000e+00  2.922605e+08  0.000000e+00
2019-01-13  1.850764e+08  0.000000e+00  2.744746e+08  0.000000e+00
2019-01-20  2.582967e+08  0.000000e+00  3.186712e+08  2.253031e+08
2019-01-27  3.410246e+08  0.000000e+00  3.122123e+08  3.348200e+08
...                  ...           ...           ...           ...
2023-11-26  4.961964e+08  0.000000e+00  3.434206e+08  0.000000e+00
2023-12-03  4.751738e+08  0.000000e+00  0.000000e+00  3.541402e+08
2023-12-10  5.136874e+08  0.000000e+00  3.388246e+08  0.000000e+00
2023-12-17  4.789076e+08  4.843684e+08  0.000000e+00  0.000000e+00
2023-12-24  4.946604e+08  0.000000e+00  4.643471e+08  3.382125e+08

[261 rows x 4 columns]

# 貢献度構成比の算出
contribution_results = summarize_and_plot_contribution(contribution)

# 数値を表示
print(contribution_results)

             contribution     ratio
Base         2.180671e+11  0.623925
traditional  2.679816e+10  0.076674
internet     6.963195e+10  0.199228
promotional  3.501109e+10  0.100172

# マーケティングROIの算出
ROI = calculate_marketing_roi(
    X[apply_effects_features], 
    contribution[apply_effects_features]
)

# 数値を表示
print(ROI)

traditional   -0.262161
internet       1.313511
promotional    0.616772
dtype: float64

# 散布図作成（売上貢献度×マーケティングROI）
data_to_plot = plot_scatter_of_contribution_and_roi(
    X[apply_effects_features], 
    contribution[apply_effects_features]
)

# 散布図の数値を表示
print(data_to_plot)

             contribution_percentage       ROI         cost
traditional                 0.203879 -0.262161  36319800000
internet                    0.529757  1.313511  30097950000
promotional                 0.266363  0.616772  21654937500

import matplotlib.pyplot as plt
import japanize_matplotlib
import pandas as pd
import numpy as np

# 月別アイスクリーム支出金額と海浜事故人数データの読み込み
df_icecream = pd.read_csv(
    'icecream2.csv', 
    index_col='month'
).sort_index()

print(df_icecream)

       ice_cream_sales  deaths_drowning
month                                  
1                  550               85
2                  500               92
3                  665              103
4                  797              131
5                 1063              128
6                 1142              132
7                 1688              228
8                 1710              272
9                 1234              155
10                 823              137
11                 682               93
12                 724              100

# グラフの描画

fig, ax1 = plt.subplots(figsize=(8, 5))

# グラフの色を設定
color_icecream = 'blue'  # アイスクリーム支出金額の線の色
color_drowning = 'red'   # 海浜事故人数の線の色

# アイスクリーム支出金額をプロット
ax1.set_xlabel('月')
ax1.set_ylabel(
    'アイスクリーム支出金額（1世帯あたり）', 
    color=color_icecream
)
line1 = ax1.plot(
    df_icecream.index, 
    df_icecream['ice_cream_sales'], 
    color=color_icecream, 
    label='アイスクリーム支出金額（1世帯あたり）'
)
ax1.tick_params(
    axis='y', 
    labelcolor=color_icecream
)
ax1.set_xticks(df_icecream.index)
ax1.set_ylim(
    bottom=0, 
    top=df_icecream['ice_cream_sales'].max() * 1.25
)

# 海浜事故人数に対応する第2のy軸を追加
ax2 = ax1.twinx()  # 第2y軸を共有する新しいAxesオブジェクトを作成
ax2.set_ylabel(
    '海浜事故人数', 
    color=color_drowning
)
line2 = ax2.plot(
    df_icecream.index, 
    df_icecream['deaths_drowning'], 
    color=color_drowning, 
    linestyle='--', 
    label='海浜事故人数'
)
ax2.tick_params(
    axis='y', 
    labelcolor=color_drowning
)
ax2.set_ylim(
    bottom=0, 
    top=df_icecream['deaths_drowning'].max() * 1.25
)

# 凡例を追加
lines = line1 + line2  # 凡例用の全線オブジェクトをリスト化
labels = [l.get_label() for l in lines]  # ラベルをリスト化
ax1.legend(lines, labels, loc="upper left")  # 凡例を左上に配置

# グラフのタイトルを設定
plt.title('アイスクリーム支出金額と海浜事故人数')

# グラフを表示
plt.show()

from statsmodels.tsa.stattools import adfuller

# 定常性検定の実行
# 最初の3つの説明変数について拡張ディッキー・フラー検定（ADFテスト）を実施し、結果を出力します

# 最初の3列だけ選択して検定を実行
for column in X.columns[:3]:
    # 各列のデータに対してADF検定を実施
    result = adfuller(X[column])

    # 結果をコンソールに出力
    print(column)  # 列名（変数名）を出力
    print('- Test Statistic:', result[0])  # テスト統計量を出力
    print('- p-value:', result[1])  # p値を出力
    print('- Critical Values:', result[4])  # 信頼水準における臨界値を出力

traditional
- Test Statistic: -10.159352311769545
- p-value: 7.579888911588827e-18
- Critical Values: {'1%': -3.4558530692911504, '5%': -2.872764881778665, '10%': -2.572751643088207}
internet
- Test Statistic: -15.681906957312055
- p-value: 1.4691906894548407e-28
- Critical Values: {'1%': -3.4557539868570775, '5%': -2.8727214497041422, '10%': -2.572728476331361}
promotional
- Test Statistic: -5.29778728915373
- p-value: 5.533738398557567e-06
- Critical Values: {'1%': -3.457105309726321, '5%': -2.873313676101283, '10%': -2.5730443824681606}

MMMによる振返り分析¶

準備¶

モジュールの読み込み¶

データセットの読み込み¶

アドストックを考慮する特徴量のリストを作成¶

季節性を表現する三角関数特徴量を追加¶

説明変数と目的変数の設定¶

視覚化¶

MMM構築¶

ハイパーパラメータ自動調整¶

MMMの構築¶

MMM結果¶

売上貢献度¶

マーケティングROI¶

売上貢献度×マーケティングROI¶

見せかけの回帰¶

説明変数（特徴量）に対するADF検定¶