Generalize inference of missing bezirk data

This commit is contained in:
Felix Blanke 2023-09-14 11:34:27 +02:00
parent 13d47be9c1
commit 2cbf2af0de

32
wsgi.py
View File

@ -287,17 +287,26 @@ def convert_fig_to_svg(fig: plt.Figure) -> str:
def _print_as_html(
df: pd.DataFrame,
output_str: list[str],
total: int | None = None,
df_state: pd.DataFrame | None = None,
dropna: bool = True,
) -> list[str]:
df = df.astype({"Digitale Befragung": "Int32"})
num_missing = (df[df.isna().any(axis=1)]["Landesbezirk"] != "Hessen").sum()
if num_missing == 1:
# infer value from total
df_tmp = df.loc[df["Landesbezirk"] != "Hessen"]
idx = df_tmp.loc[df_tmp.isna().any(axis=1)].iloc[0].name
df["Digitale Befragung"].loc[idx] = (
total - df.dropna()["Digitale Befragung"].sum()
missing_df = df[["Digitale Befragung"]].isna().join(df[["Landesbezirk"]]).groupby("Landesbezirk").sum()
total = df_state["Digitale Befragung"].sum() if df_state is not None else None
if df_state is not None:
for idx, row in missing_df.loc[missing_df["Digitale Befragung"] == 1].iterrows():
df_tmp = df.loc[df["Landesbezirk"] == idx]
df_state_tmp = df_state.loc[df_state["Landesbezirk"] == idx]
missing_idx = df_tmp.loc[df_tmp.isna().any(axis=1)].iloc[0].name
df["Digitale Befragung"].loc[missing_idx] = (
df_state_tmp["Digitale Befragung"].sum()
- df_tmp["Digitale Befragung"].sum()
)
df = df.sort_values(
["Digitale Befragung", "Landesbezirk", "Bezirk"],
ascending=[False, True, True],
)
if dropna:
df = df.dropna()
@ -325,6 +334,7 @@ def _print_as_html(
)
if total and (diff := total - df["Digitale Befragung"].sum()):
tfoot.append(" <tr>")
num_missing = missing_df['Digitale Befragung'].sum()
tfoot.append(
f" <td>Weitere Bezirke ({num_missing})</td>"
if num_missing
@ -376,7 +386,7 @@ def state_dashboard(state: str):
output_str = []
output_str = _print_as_html(df_state, output_str, dropna=False)
output_str = _print_as_html(
df, output_str, total=df_state["Digitale Befragung"].sum(), dropna=False
df, output_str, df_state=df_state, dropna=False
)
return render_template(
@ -405,9 +415,7 @@ def dashboard():
output_str = []
output_str = _print_as_html(df_state, output_str, dropna=False)
output_str = _print_as_html(
df, output_str, total=df_state["Digitale Befragung"].sum()
)
output_str = _print_as_html(df, output_str, df_state)
return render_template(
"base.html",