Compare commits
No commits in common. "76d890980ced0224bc61546381a894aace9da8fd" and "387b976f810d412d870eb4bad087b80c0d4e3fe6" have entirely different histories.
76d890980c
...
387b976f81
115
regression.py
115
regression.py
@ -1,115 +0,0 @@
|
|||||||
import datetime
|
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
import fire
|
|
||||||
import matplotlib
|
|
||||||
import matplotlib.dates as mdates
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import matplotlib.ticker as mtick
|
|
||||||
import numpy as np
|
|
||||||
import pandas as pd
|
|
||||||
import scipy
|
|
||||||
|
|
||||||
from wsgi import create_fig, create_plot_df, plot
|
|
||||||
|
|
||||||
|
|
||||||
def create_dfs():
|
|
||||||
last_file = sorted(Path("data").iterdir())[-1]
|
|
||||||
key = last_file.name[:10]
|
|
||||||
|
|
||||||
with (Path("data") / f"{key}_data.ods").open("rb") as ff:
|
|
||||||
df = pd.read_excel(ff, sheet_name="digital", index_col=0).astype(
|
|
||||||
{"Digitale Befragung": "Int32"}
|
|
||||||
)
|
|
||||||
with (Path("data") / f"{key}_state_data.ods").open("rb") as ff:
|
|
||||||
df_state = pd.read_excel(ff, sheet_name="digital", index_col=0).astype(
|
|
||||||
{"Digitale Befragung": "Int32"}
|
|
||||||
)
|
|
||||||
|
|
||||||
plot_df = create_plot_df(None, None)
|
|
||||||
|
|
||||||
return df, df_state, plot_df
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
df, df_state, plot_df = create_dfs()
|
|
||||||
|
|
||||||
plot(plot_df, landesbez_str=[None], max_shading_date="2023-10-02")
|
|
||||||
|
|
||||||
plt.gcf().set_size_inches(10, 5)
|
|
||||||
|
|
||||||
target_time = pd.Timestamp("2023-10-01")
|
|
||||||
xlim = plt.xlim()
|
|
||||||
plt.xlim(xlim[0], pd.Timestamp("2023-10-02"))
|
|
||||||
|
|
||||||
plt.ylim(0, 3500 * 1.025)
|
|
||||||
|
|
||||||
data = plot_df.dropna().sum(1)
|
|
||||||
data = data.iloc[3:]
|
|
||||||
casted_timepoints = data.index.to_numpy().astype(np.int64)
|
|
||||||
reg = scipy.stats.linregress(casted_timepoints, data)
|
|
||||||
|
|
||||||
print(f"Regression R^2: {reg.rvalue**2:.6f}")
|
|
||||||
|
|
||||||
date_range = pd.date_range(start="2023-08-21 10:00:00", end=target_time)
|
|
||||||
date_range = date_range.to_series(index=np.arange(len(date_range)))
|
|
||||||
date_range.loc[len(date_range)] = target_time
|
|
||||||
|
|
||||||
regression_curve = lambda x: reg.intercept + reg.slope * x.astype(np.int64)
|
|
||||||
vals = regression_curve(date_range.to_numpy())
|
|
||||||
print(f"Projizierte Teilnahme am {target_time}: {vals[-1]:.2f}")
|
|
||||||
now = pd.Timestamp.now()
|
|
||||||
print(
|
|
||||||
f"Projizierte Teilnahme jetzt: {regression_curve(pd.Series([now]).to_numpy()).item():.2f}"
|
|
||||||
)
|
|
||||||
|
|
||||||
print()
|
|
||||||
|
|
||||||
for target in [1500, 2500, 3500]:
|
|
||||||
target_reached_date = (target - reg.intercept) / reg.slope
|
|
||||||
print(
|
|
||||||
f"Ziel {target} erreicht am {pd.Timestamp(target_reached_date).strftime('%Y-%m-%d %X')}"
|
|
||||||
)
|
|
||||||
|
|
||||||
num_skipped_days = 2
|
|
||||||
|
|
||||||
x = date_range.to_numpy().astype(np.int64)
|
|
||||||
curr_time = x[data.index.argmax() + num_skipped_days]
|
|
||||||
|
|
||||||
delta = 3500 - data[-1]
|
|
||||||
|
|
||||||
target_line = data[-1] + delta / (x[-1] - curr_time) * (
|
|
||||||
x[data.index.argmax() + num_skipped_days :] - curr_time
|
|
||||||
)
|
|
||||||
|
|
||||||
plt.plot(
|
|
||||||
date_range,
|
|
||||||
vals,
|
|
||||||
label=f"Lineare Regression ($R^2={reg.rvalue**2:.3f}$)",
|
|
||||||
color="tab:blue",
|
|
||||||
zorder=1,
|
|
||||||
)
|
|
||||||
plt.plot(
|
|
||||||
date_range[data.index.argmax() + num_skipped_days :],
|
|
||||||
target_line,
|
|
||||||
label="Ziellinie",
|
|
||||||
color="tab:orange",
|
|
||||||
linestyle=":",
|
|
||||||
zorder=1,
|
|
||||||
)
|
|
||||||
# plt.gca().relim() # make sure all the data fits
|
|
||||||
# plt.gca().autoscale() # auto-scale
|
|
||||||
plt.xlabel("Zeit in Tagen ab dem 15.08.")
|
|
||||||
plt.axvline(x=target_time, color="tab:red", linestyle="--")
|
|
||||||
plt.legend()
|
|
||||||
plt.gca().xaxis.set_major_locator(matplotlib.ticker.NullLocator())
|
|
||||||
plt.gca().xaxis.set_major_locator(matplotlib.ticker.NullLocator())
|
|
||||||
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%d.%m."))
|
|
||||||
|
|
||||||
plt.gca().set_xticks([target_time])
|
|
||||||
plt.title("Projektion Teilnahme an Digitaler Beschäftigtenbefragung")
|
|
||||||
plt.savefig("plots/regression.png")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
fire.Fire(main)
|
|
||||||
Loading…
x
Reference in New Issue
Block a user