Compare commits
11 Commits
3bb4f432e4
...
092a1d7417
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
092a1d7417 | ||
|
|
014217604c | ||
|
|
c5f6067e8b | ||
|
|
45647def39 | ||
|
|
11a4cf4248 | ||
|
|
dc80671295 | ||
|
|
046fce6bb0 | ||
|
|
204195ac06 | ||
|
|
6f29bdc6da | ||
|
|
7aca691596 | ||
|
|
ae6beafa3d |
@ -71,6 +71,24 @@ bundesland_dict = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
landesbezirk_dict = {
|
||||||
|
"100": "Nord",
|
||||||
|
"200": "Niedersachsen-Bremen",
|
||||||
|
"300": "Berlin-Brandenburg",
|
||||||
|
"400": "Nordrhein-Westfalen",
|
||||||
|
"500": "Rheinland-Pfalz-Saarland",
|
||||||
|
"600": "Hessen",
|
||||||
|
"700": "Sachsen, Sachsen-Anhalt, Thüringen",
|
||||||
|
"800": "Bayern",
|
||||||
|
"900": "Baden-Württemberg",
|
||||||
|
"1000": "Hamburg",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_landesbezirk(id: str):
|
||||||
|
return landesbezirk_dict[str((int(id) // 100) * 100)]
|
||||||
|
|
||||||
|
|
||||||
def get_bez_data(
|
def get_bez_data(
|
||||||
tags: list[str], url: str = "https://beschaeftigtenbefragung.verdi.de/"
|
tags: list[str], url: str = "https://beschaeftigtenbefragung.verdi.de/"
|
||||||
) -> list[dict]:
|
) -> list[dict]:
|
||||||
@ -90,22 +108,22 @@ def get_bez_data(
|
|||||||
|
|
||||||
def construct_dataframe(
|
def construct_dataframe(
|
||||||
bez_data: dict[str, dict],
|
bez_data: dict[str, dict],
|
||||||
grouped: bool = False,
|
|
||||||
special_tag: str | None = None,
|
special_tag: str | None = None,
|
||||||
no_processing: bool = False,
|
|
||||||
):
|
):
|
||||||
data = {}
|
data = {}
|
||||||
if not no_processing:
|
|
||||||
|
first_key = next(iter(bez_data.keys()))
|
||||||
|
if first_key in landesbezirk_dict:
|
||||||
|
data["Landesbezirk"] = pd.Series(
|
||||||
|
[v["name"] for v in bez_data.values()], index=list(bez_data.keys())
|
||||||
|
)
|
||||||
|
else:
|
||||||
data["Bundesland"] = pd.Series(
|
data["Bundesland"] = pd.Series(
|
||||||
[bundesland_dict[k] for k in bez_data], index=list(bez_data.keys())
|
[bundesland_dict[k] for k in bez_data], index=list(bez_data.keys())
|
||||||
)
|
)
|
||||||
data["Bezirk"] = pd.Series(
|
data["Bezirk"] = pd.Series(
|
||||||
[v["name"] for v in bez_data.values()], index=list(bez_data.keys())
|
[v["name"] for v in bez_data.values()], index=list(bez_data.keys())
|
||||||
)
|
)
|
||||||
else:
|
|
||||||
data["Landesbezirk"] = pd.Series(
|
|
||||||
[v["name"] for v in bez_data.values()], index=list(bez_data.keys())
|
|
||||||
)
|
|
||||||
|
|
||||||
tot_col_data = []
|
tot_col_data = []
|
||||||
tot_col_index = []
|
tot_col_index = []
|
||||||
@ -121,15 +139,7 @@ def construct_dataframe(
|
|||||||
tot_col_index.append(k)
|
tot_col_index.append(k)
|
||||||
|
|
||||||
data["Digitale Befragung"] = pd.Series(tot_col_data, index=tot_col_index)
|
data["Digitale Befragung"] = pd.Series(tot_col_data, index=tot_col_index)
|
||||||
df = pd.DataFrame(data=data)
|
return pd.DataFrame(data=data).astype({"Digitale Befragung": "Int32"})
|
||||||
df = df.astype({"Digitale Befragung": "Int32"})
|
|
||||||
|
|
||||||
if grouped and no_processing:
|
|
||||||
raise ValueError
|
|
||||||
elif grouped:
|
|
||||||
df = df.groupby("Bundesland", as_index=False)[["Digitale Befragung"]].sum()
|
|
||||||
|
|
||||||
return df
|
|
||||||
|
|
||||||
|
|
||||||
def main(
|
def main(
|
||||||
@ -138,7 +148,6 @@ def main(
|
|||||||
dry_run: bool = False,
|
dry_run: bool = False,
|
||||||
grouped: bool = False,
|
grouped: bool = False,
|
||||||
special_tag: str | None = None,
|
special_tag: str | None = None,
|
||||||
no_processing: bool = False,
|
|
||||||
folder: str = "data",
|
folder: str = "data",
|
||||||
name: str = "data",
|
name: str = "data",
|
||||||
sheet_name: str = "digital",
|
sheet_name: str = "digital",
|
||||||
@ -146,11 +155,12 @@ def main(
|
|||||||
bez_data = get_bez_data([tag], url)[0]
|
bez_data = get_bez_data([tag], url)[0]
|
||||||
df = construct_dataframe(
|
df = construct_dataframe(
|
||||||
bez_data=bez_data,
|
bez_data=bez_data,
|
||||||
grouped=grouped,
|
|
||||||
special_tag=special_tag,
|
special_tag=special_tag,
|
||||||
no_processing=no_processing,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if grouped:
|
||||||
|
df = df.groupby("Bundesland", as_index=False)[["Digitale Befragung"]].sum()
|
||||||
|
|
||||||
if dry_run:
|
if dry_run:
|
||||||
print(df)
|
print(df)
|
||||||
else:
|
else:
|
||||||
|
|||||||
168
wsgi.py
168
wsgi.py
@ -1,5 +1,8 @@
|
|||||||
import datetime
|
import datetime
|
||||||
import io
|
import io
|
||||||
|
import locale
|
||||||
|
import os
|
||||||
|
import time
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@ -11,7 +14,7 @@ import pandas as pd
|
|||||||
from flask import Flask, Markup, render_template, request
|
from flask import Flask, Markup, render_template, request
|
||||||
from flask_caching import Cache
|
from flask_caching import Cache
|
||||||
|
|
||||||
from download_digital import construct_dataframe, get_bez_data
|
from download_digital import construct_dataframe, get_bez_data, get_landesbezirk
|
||||||
|
|
||||||
config = {
|
config = {
|
||||||
"CACHE_TYPE": "FileSystemCache",
|
"CACHE_TYPE": "FileSystemCache",
|
||||||
@ -19,7 +22,9 @@ config = {
|
|||||||
"CACHE_THRESHOLD": 50,
|
"CACHE_THRESHOLD": 50,
|
||||||
"CACHE_DIR": "cache",
|
"CACHE_DIR": "cache",
|
||||||
}
|
}
|
||||||
import locale
|
|
||||||
|
os.environ["TZ"] = "Europe/Berlin"
|
||||||
|
time.tzset()
|
||||||
|
|
||||||
locale.setlocale(locale.LC_ALL, "de_DE.UTF-8")
|
locale.setlocale(locale.LC_ALL, "de_DE.UTF-8")
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
@ -30,28 +35,18 @@ cache = Cache(app)
|
|||||||
def get_tables(url: str) -> tuple[pd.DataFrame, pd.DataFrame]:
|
def get_tables(url: str) -> tuple[pd.DataFrame, pd.DataFrame]:
|
||||||
bez_data = get_bez_data(["bez_data_0", "bez_data_2"], url)
|
bez_data = get_bez_data(["bez_data_0", "bez_data_2"], url)
|
||||||
|
|
||||||
df = construct_dataframe(
|
df = construct_dataframe(bez_data=bez_data[0], special_tag="stud")
|
||||||
bez_data=bez_data[0],
|
df_state = construct_dataframe(bez_data=bez_data[1])
|
||||||
grouped=False,
|
|
||||||
special_tag="stud",
|
|
||||||
)
|
|
||||||
|
|
||||||
df_state = construct_dataframe(
|
|
||||||
bez_data=bez_data[1], grouped=False, no_processing=True
|
|
||||||
)
|
|
||||||
|
|
||||||
return df, df_state
|
return df, df_state
|
||||||
|
|
||||||
|
|
||||||
def plot(
|
def create_plot_df(
|
||||||
current_df: pd.DataFrame | None = None,
|
current_df: pd.DataFrame | None,
|
||||||
data_folder: str = "data",
|
data_folder: str,
|
||||||
sheet_name: str = "digital",
|
sheet_name: str,
|
||||||
total_target: int = 1500,
|
curr_datetime,
|
||||||
alpha: float | None = None,
|
) -> pd.DataFrame:
|
||||||
) -> str:
|
|
||||||
curr_datetime = datetime.datetime.now()
|
|
||||||
|
|
||||||
data_dict = {}
|
data_dict = {}
|
||||||
|
|
||||||
## Important: If multiple results are stored for the same date
|
## Important: If multiple results are stored for the same date
|
||||||
@ -61,31 +56,55 @@ def plot(
|
|||||||
|
|
||||||
for f in sorted(Path(data_folder).iterdir()):
|
for f in sorted(Path(data_folder).iterdir()):
|
||||||
with f.open("rb") as ff:
|
with f.open("rb") as ff:
|
||||||
df = pd.read_excel(ff, sheet_name=sheet_name)
|
df = pd.read_excel(ff, sheet_name=sheet_name, index_col=0)
|
||||||
|
|
||||||
|
if "Landesbezirk" not in df.columns:
|
||||||
|
df["Landesbezirk"] = df.index.map(get_landesbezirk)
|
||||||
|
|
||||||
df = df.astype({"Digitale Befragung": "Int32"})
|
df = df.astype({"Digitale Befragung": "Int32"})
|
||||||
sum_val = df[["Digitale Befragung"]].sum().iloc[0]
|
df = df.groupby("Landesbezirk")[["Digitale Befragung"]].sum()
|
||||||
|
|
||||||
key = f.name[:10]
|
key = f.name[:10]
|
||||||
data_dict[key] = sum_val
|
data_dict[key] = df["Digitale Befragung"]
|
||||||
|
|
||||||
data_dict["2023-08-15"] = 275
|
df = pd.DataFrame(data=data_dict).T
|
||||||
|
|
||||||
series = pd.Series(data_dict.values(), index=data_dict)
|
df.index = df.index.astype("datetime64[ns]") + pd.DateOffset(hours=10)
|
||||||
series.index = series.index.astype("datetime64[ns]") + pd.DateOffset(hours=10)
|
|
||||||
|
|
||||||
df = series.to_frame("Digitale Befragung")
|
|
||||||
df = df.reindex(
|
df = df.reindex(
|
||||||
pd.date_range(start="2023-08-15", end=curr_datetime)
|
pd.date_range(start="2023-08-15", end=curr_datetime) + pd.DateOffset(hours=10)
|
||||||
+ pd.DateOffset(hours=10)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
if current_df is not None:
|
if current_df is not None:
|
||||||
|
if "Landesbezirk" not in current_df.columns:
|
||||||
|
current_df["Landesbezirk"] = current_df.index.map(get_landesbezirk)
|
||||||
current_df = current_df.astype({"Digitale Befragung": "Int32"})
|
current_df = current_df.astype({"Digitale Befragung": "Int32"})
|
||||||
sum_val = current_df[["Digitale Befragung"]].sum().iloc[0]
|
current_df = current_df.groupby("Landesbezirk")[["Digitale Befragung"]].sum()
|
||||||
df.loc[curr_datetime] = sum_val
|
|
||||||
|
df.loc[curr_datetime] = current_df["Digitale Befragung"]
|
||||||
|
|
||||||
if pd.isna(df.loc[df.index.max()][0]):
|
if pd.isna(df.loc[df.index.max()][0]):
|
||||||
df = df.drop([df.index.max()])
|
df = df.drop([df.index.max()])
|
||||||
|
|
||||||
|
return df
|
||||||
|
|
||||||
|
|
||||||
|
def plot(
|
||||||
|
current_df: pd.DataFrame | None = None,
|
||||||
|
data_folder: str = "data",
|
||||||
|
sheet_name: str = "digital",
|
||||||
|
total_targets: tuple[int, ...] = (1500,),
|
||||||
|
alpha: float | None = None,
|
||||||
|
landesbez_str: str | None = None,
|
||||||
|
) -> str:
|
||||||
|
curr_datetime = datetime.datetime.now()
|
||||||
|
df = create_plot_df(
|
||||||
|
current_df=current_df,
|
||||||
|
data_folder=data_folder,
|
||||||
|
sheet_name=sheet_name,
|
||||||
|
curr_datetime=curr_datetime,
|
||||||
|
)
|
||||||
|
|
||||||
fig = plt.figure(dpi=300)
|
fig = plt.figure(dpi=300)
|
||||||
|
|
||||||
# fill weekends
|
# fill weekends
|
||||||
@ -95,39 +114,51 @@ def plot(
|
|||||||
if day.weekday() >= 5:
|
if day.weekday() >= 5:
|
||||||
plt.gca().axvspan(days[idx], days[idx + 1], alpha=0.2, color="gray")
|
plt.gca().axvspan(days[idx], days[idx + 1], alpha=0.2, color="gray")
|
||||||
|
|
||||||
if alpha is not None:
|
series = df.sum(axis=1) if landesbez_str is None else df[landesbez_str]
|
||||||
plt.fill_between(
|
plot_df = series.to_frame("Digitale Befragung").replace(0, np.nan)
|
||||||
df.dropna().index,
|
plot_df = plot_df.astype({"Digitale Befragung": "float32"})
|
||||||
df.dropna()["Digitale Befragung"],
|
if not pd.isna(plot_df).all().item():
|
||||||
|
if alpha is not None:
|
||||||
|
plt.fill_between(
|
||||||
|
plot_df.dropna().index,
|
||||||
|
plot_df.dropna()["Digitale Befragung"],
|
||||||
|
color="#e4004e",
|
||||||
|
alpha=alpha,
|
||||||
|
)
|
||||||
|
|
||||||
|
plt.plot(
|
||||||
|
plot_df.dropna().index,
|
||||||
|
plot_df.dropna()["Digitale Befragung"],
|
||||||
|
ls="--",
|
||||||
|
marker="o",
|
||||||
|
lw=1,
|
||||||
color="#e4004e",
|
color="#e4004e",
|
||||||
alpha=alpha,
|
markersize=4,
|
||||||
|
label=landesbez_str,
|
||||||
)
|
)
|
||||||
|
|
||||||
plt.plot(
|
if current_df is not None:
|
||||||
df.dropna().index,
|
plt.annotate(
|
||||||
df.dropna()["Digitale Befragung"],
|
"Jetzt",
|
||||||
ls="--",
|
(
|
||||||
marker="o",
|
plot_df.dropna().index[-1],
|
||||||
lw=1,
|
plot_df.dropna()["Digitale Befragung"][-1] * 1.03,
|
||||||
color="#e4004e",
|
),
|
||||||
markersize=4,
|
fontsize=8,
|
||||||
)
|
ha="center",
|
||||||
|
)
|
||||||
|
|
||||||
if current_df is not None:
|
plt.plot(
|
||||||
plt.annotate(
|
plot_df.index,
|
||||||
"Jetzt",
|
plot_df["Digitale Befragung"],
|
||||||
(df.dropna().index[-1], df.dropna()["Digitale Befragung"][-1] * 1.03),
|
lw=1.5,
|
||||||
fontsize=8,
|
color="#e4004e",
|
||||||
ha="center",
|
label=landesbez_str,
|
||||||
)
|
)
|
||||||
|
|
||||||
plt.plot(df.index, df["Digitale Befragung"], lw=1.5, color="#e4004e")
|
|
||||||
|
|
||||||
plt.title("Teilnahme an Digitaler Beschäftigtenbefragung")
|
plt.title("Teilnahme an Digitaler Beschäftigtenbefragung")
|
||||||
plt.ylabel("# Teilnahmen")
|
plt.ylabel("# Teilnahmen")
|
||||||
plt.ylim(0, total_target + 100)
|
plt.ylim(0, total_targets[0] + 100)
|
||||||
|
|
||||||
# plt.gcf().autofmt_xdate()
|
|
||||||
|
|
||||||
# use timezone offset to center tick labels
|
# use timezone offset to center tick labels
|
||||||
plt.gca().xaxis.set_major_locator(
|
plt.gca().xaxis.set_major_locator(
|
||||||
@ -142,18 +173,24 @@ def plot(
|
|||||||
plt.gca().tick_params("x", length=0, which="major")
|
plt.gca().tick_params("x", length=0, which="major")
|
||||||
|
|
||||||
def val_to_perc(val):
|
def val_to_perc(val):
|
||||||
return 100 * val / total_target
|
return 100 * val / total_targets[0]
|
||||||
|
|
||||||
def perc_to_val(perc):
|
def perc_to_val(perc):
|
||||||
return perc * total_target / 100
|
return perc * total_targets[0] / 100
|
||||||
|
|
||||||
sec_ax = plt.gca().secondary_yaxis("right", functions=(val_to_perc, perc_to_val))
|
sec_ax = plt.gca().secondary_yaxis("right", functions=(val_to_perc, perc_to_val))
|
||||||
sec_ax.set_ylabel("# Teilnahmen [% Erfolg]")
|
sec_ax.set_ylabel("# Teilnahmen [% Erfolg]")
|
||||||
sec_ax.yaxis.set_major_formatter(mtick.PercentFormatter())
|
sec_ax.yaxis.set_major_formatter(mtick.PercentFormatter())
|
||||||
|
|
||||||
plt.axhline(y=total_target, color="#48a9be", linestyle="--")
|
for total_target in total_targets:
|
||||||
|
plt.axhline(y=total_target, color="#48a9be", linestyle="--")
|
||||||
|
|
||||||
plt.tight_layout()
|
plt.tight_layout()
|
||||||
|
|
||||||
|
return fig
|
||||||
|
|
||||||
|
|
||||||
|
def convert_fig_to_svg(fig: plt.Figure) -> str:
|
||||||
# Convert plot to SVG image
|
# Convert plot to SVG image
|
||||||
imgdata = io.StringIO()
|
imgdata = io.StringIO()
|
||||||
fig.savefig(imgdata, format="svg")
|
fig.savefig(imgdata, format="svg")
|
||||||
@ -209,23 +246,24 @@ def tables(
|
|||||||
|
|
||||||
df_state = df_state.sort_values("Landesbezirk")
|
df_state = df_state.sort_values("Landesbezirk")
|
||||||
|
|
||||||
image = plot(df_state)
|
fig = plot(df_state)
|
||||||
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
||||||
except Exception:
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
last_file = sorted(Path("data").iterdir())[-1]
|
last_file = sorted(Path("data").iterdir())[-1]
|
||||||
key = last_file.name[:10]
|
key = last_file.name[:10]
|
||||||
|
|
||||||
with (Path("data") / f"{key}_data.ods").open("rb") as ff:
|
with (Path("data") / f"{key}_data.ods").open("rb") as ff:
|
||||||
df = pd.read_excel(ff, sheet_name="digital").astype(
|
df = pd.read_excel(ff, sheet_name="digital", index_col=0).astype(
|
||||||
{"Digitale Befragung": "Int32"}
|
{"Digitale Befragung": "Int32"}
|
||||||
)
|
)
|
||||||
with (Path("data") / f"{key}_state_data.ods").open("rb") as ff:
|
with (Path("data") / f"{key}_state_data.ods").open("rb") as ff:
|
||||||
df_state = pd.read_excel(ff, sheet_name="digital").astype(
|
df_state = pd.read_excel(ff, sheet_name="digital", index_col=0).astype(
|
||||||
{"Digitale Befragung": "Int32"}
|
{"Digitale Befragung": "Int32"}
|
||||||
)
|
)
|
||||||
|
|
||||||
image = plot()
|
fig = plot()
|
||||||
timestamp = Markup(f'<font color="red">{key} 10:00:00</font>')
|
timestamp = Markup(f'<font color="red">{key} 10:00:00</font>')
|
||||||
|
|
||||||
_print_as_html(df_state)
|
_print_as_html(df_state)
|
||||||
@ -235,7 +273,7 @@ def tables(
|
|||||||
"base.html",
|
"base.html",
|
||||||
tables="\n".join(output_str),
|
tables="\n".join(output_str),
|
||||||
timestamp=timestamp,
|
timestamp=timestamp,
|
||||||
image=image,
|
image=convert_fig_to_svg(fig),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user