Add regression script
This commit is contained in:
parent
387b976f81
commit
06c68c5167
99
regression.py
Normal file
99
regression.py
Normal file
@ -0,0 +1,99 @@
|
||||
import datetime
|
||||
from pathlib import Path
|
||||
|
||||
import fire
|
||||
import matplotlib
|
||||
import matplotlib.pyplot as plt
|
||||
import scipy
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
from wsgi import create_fig, create_plot_df, plot
|
||||
|
||||
import matplotlib.dates as mdates
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.ticker as mtick
|
||||
|
||||
|
||||
def create_dfs():
|
||||
last_file = sorted(Path("data").iterdir())[-1]
|
||||
key = last_file.name[:10]
|
||||
|
||||
with (Path("data") / f"{key}_data.ods").open("rb") as ff:
|
||||
df = pd.read_excel(ff, sheet_name="digital", index_col=0).astype(
|
||||
{"Digitale Befragung": "Int32"}
|
||||
)
|
||||
with (Path("data") / f"{key}_state_data.ods").open("rb") as ff:
|
||||
df_state = pd.read_excel(ff, sheet_name="digital", index_col=0).astype(
|
||||
{"Digitale Befragung": "Int32"}
|
||||
)
|
||||
|
||||
plot_df = create_plot_df(None, None)
|
||||
|
||||
|
||||
return df, df_state, plot_df
|
||||
|
||||
|
||||
def main():
|
||||
df, df_state, plot_df = create_dfs()
|
||||
|
||||
plot(plot_df, landesbez_str=[None], max_shading_date = "2023-10-02")
|
||||
|
||||
plt.gcf().set_size_inches(10, 5)
|
||||
|
||||
target_time = pd.Timestamp("2023-10-01")
|
||||
xlim = plt.xlim()
|
||||
plt.xlim(xlim[0], pd.Timestamp("2023-10-02"))
|
||||
|
||||
plt.ylim(0, 3500 * 1.025)
|
||||
|
||||
data = plot_df.dropna().sum(1)
|
||||
data = data.iloc[3:]
|
||||
casted_timepoints = data.index.to_numpy().astype(np.int64)
|
||||
reg = scipy.stats.linregress(casted_timepoints, data)
|
||||
|
||||
print(f"Regression R^2: {reg.rvalue**2:.6f}")
|
||||
|
||||
date_range = pd.date_range(start="2023-08-21 10:00:00", end=target_time)
|
||||
date_range = date_range.to_series(index=np.arange(len(date_range)))
|
||||
date_range.loc[len(date_range)] = target_time
|
||||
|
||||
regression_curve = lambda x: reg.intercept + reg.slope * x.astype(np.int64)
|
||||
vals = regression_curve(date_range.to_numpy())
|
||||
print(f"Projizierte Teilnahme am {target_time}: {vals[-1]:.2f}")
|
||||
now = pd.Timestamp.now()
|
||||
print(f"Projizierte Teilnahme jetzt: {regression_curve(pd.Series([now]).to_numpy()).item():.2f}")
|
||||
|
||||
print()
|
||||
|
||||
for target in [1500, 2500, 3500]:
|
||||
target_reached_date = (target - reg.intercept) / reg.slope
|
||||
print(f"Ziel {target} erreicht am {pd.Timestamp(target_reached_date).strftime('%Y-%m-%d %X')}")
|
||||
|
||||
num_skipped_days = 2
|
||||
|
||||
x = date_range.to_numpy().astype(np.int64)
|
||||
curr_time = x[data.index.argmax() + num_skipped_days]
|
||||
|
||||
delta = 3500 - data[-1]
|
||||
|
||||
target_line = data[-1] + delta / (x[-1] - curr_time) * (x[data.index.argmax() + num_skipped_days:] - curr_time)
|
||||
|
||||
plt.plot(date_range, vals, label=f"Lineare Regression ($R^2={reg.rvalue**2:.3f}$)", color="tab:blue", zorder=1)
|
||||
plt.plot(date_range[data.index.argmax() + num_skipped_days:], target_line, label="Ziellinie", color="tab:orange", linestyle=":", zorder=1)
|
||||
# plt.gca().relim() # make sure all the data fits
|
||||
# plt.gca().autoscale() # auto-scale
|
||||
plt.xlabel("Zeit in Tagen ab dem 15.08.")
|
||||
plt.axvline(x=target_time, color="tab:red", linestyle="--")
|
||||
plt.legend()
|
||||
plt.gca().xaxis.set_major_locator(matplotlib.ticker.NullLocator())
|
||||
plt.gca().xaxis.set_major_locator(matplotlib.ticker.NullLocator())
|
||||
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%d.%m."))
|
||||
|
||||
plt.gca().set_xticks([target_time])
|
||||
plt.title("Projektion Teilnahme an Digitaler Beschäftigtenbefragung")
|
||||
plt.savefig("plots/regression.png")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
fire.Fire(main)
|
||||
Loading…
x
Reference in New Issue
Block a user