import datetime from pathlib import Path import fire import matplotlib import matplotlib.pyplot as plt import scipy import numpy as np import pandas as pd from wsgi import create_fig, create_plot_df, plot import matplotlib.dates as mdates import matplotlib.pyplot as plt import matplotlib.ticker as mtick def create_dfs(): last_file = sorted(Path("data").iterdir())[-1] key = last_file.name[:10] with (Path("data") / f"{key}_data.ods").open("rb") as ff: df = pd.read_excel(ff, sheet_name="digital", index_col=0).astype( {"Digitale Befragung": "Int32"} ) with (Path("data") / f"{key}_state_data.ods").open("rb") as ff: df_state = pd.read_excel(ff, sheet_name="digital", index_col=0).astype( {"Digitale Befragung": "Int32"} ) plot_df = create_plot_df(None, None) return df, df_state, plot_df def main(): df, df_state, plot_df = create_dfs() plot(plot_df, landesbez_str=[None], max_shading_date = "2023-10-02") plt.gcf().set_size_inches(10, 5) target_time = pd.Timestamp("2023-10-01") xlim = plt.xlim() plt.xlim(xlim[0], pd.Timestamp("2023-10-02")) plt.ylim(0, 3500 * 1.025) data = plot_df.dropna().sum(1) data = data.iloc[3:] casted_timepoints = data.index.to_numpy().astype(np.int64) reg = scipy.stats.linregress(casted_timepoints, data) print(f"Regression R^2: {reg.rvalue**2:.6f}") date_range = pd.date_range(start="2023-08-21 10:00:00", end=target_time) date_range = date_range.to_series(index=np.arange(len(date_range))) date_range.loc[len(date_range)] = target_time regression_curve = lambda x: reg.intercept + reg.slope * x.astype(np.int64) vals = regression_curve(date_range.to_numpy()) print(f"Projizierte Teilnahme am {target_time}: {vals[-1]:.2f}") now = pd.Timestamp.now() print(f"Projizierte Teilnahme jetzt: {regression_curve(pd.Series([now]).to_numpy()).item():.2f}") print() for target in [1500, 2500, 3500]: target_reached_date = (target - reg.intercept) / reg.slope print(f"Ziel {target} erreicht am {pd.Timestamp(target_reached_date).strftime('%Y-%m-%d %X')}") num_skipped_days = 2 x = date_range.to_numpy().astype(np.int64) curr_time = x[data.index.argmax() + num_skipped_days] delta = 3500 - data[-1] target_line = data[-1] + delta / (x[-1] - curr_time) * (x[data.index.argmax() + num_skipped_days:] - curr_time) plt.plot(date_range, vals, label=f"Lineare Regression ($R^2={reg.rvalue**2:.3f}$)", color="tab:blue", zorder=1) plt.plot(date_range[data.index.argmax() + num_skipped_days:], target_line, label="Ziellinie", color="tab:orange", linestyle=":", zorder=1) # plt.gca().relim() # make sure all the data fits # plt.gca().autoscale() # auto-scale plt.xlabel("Zeit in Tagen ab dem 15.08.") plt.axvline(x=target_time, color="tab:red", linestyle="--") plt.legend() plt.gca().xaxis.set_major_locator(matplotlib.ticker.NullLocator()) plt.gca().xaxis.set_major_locator(matplotlib.ticker.NullLocator()) plt.gca().xaxis.set_major_formatter(mdates.DateFormatter("%d.%m.")) plt.gca().set_xticks([target_time]) plt.title("Projektion Teilnahme an Digitaler Beschäftigtenbefragung") plt.savefig("plots/regression.png") if __name__ == "__main__": fire.Fire(main)