Generalize inference of missing bezirk data

This commit is contained in:
Felix Blanke 2023-09-14 11:34:27 +02:00
parent 13d47be9c1
commit 2cbf2af0de

32
wsgi.py
View File

@ -287,17 +287,26 @@ def convert_fig_to_svg(fig: plt.Figure) -> str:
def _print_as_html( def _print_as_html(
df: pd.DataFrame, df: pd.DataFrame,
output_str: list[str], output_str: list[str],
total: int | None = None, df_state: pd.DataFrame | None = None,
dropna: bool = True, dropna: bool = True,
) -> list[str]: ) -> list[str]:
df = df.astype({"Digitale Befragung": "Int32"}) df = df.astype({"Digitale Befragung": "Int32"})
num_missing = (df[df.isna().any(axis=1)]["Landesbezirk"] != "Hessen").sum() missing_df = df[["Digitale Befragung"]].isna().join(df[["Landesbezirk"]]).groupby("Landesbezirk").sum()
if num_missing == 1: total = df_state["Digitale Befragung"].sum() if df_state is not None else None
# infer value from total
df_tmp = df.loc[df["Landesbezirk"] != "Hessen"] if df_state is not None:
idx = df_tmp.loc[df_tmp.isna().any(axis=1)].iloc[0].name for idx, row in missing_df.loc[missing_df["Digitale Befragung"] == 1].iterrows():
df["Digitale Befragung"].loc[idx] = ( df_tmp = df.loc[df["Landesbezirk"] == idx]
total - df.dropna()["Digitale Befragung"].sum() df_state_tmp = df_state.loc[df_state["Landesbezirk"] == idx]
missing_idx = df_tmp.loc[df_tmp.isna().any(axis=1)].iloc[0].name
df["Digitale Befragung"].loc[missing_idx] = (
df_state_tmp["Digitale Befragung"].sum()
- df_tmp["Digitale Befragung"].sum()
)
df = df.sort_values(
["Digitale Befragung", "Landesbezirk", "Bezirk"],
ascending=[False, True, True],
) )
if dropna: if dropna:
df = df.dropna() df = df.dropna()
@ -325,6 +334,7 @@ def _print_as_html(
) )
if total and (diff := total - df["Digitale Befragung"].sum()): if total and (diff := total - df["Digitale Befragung"].sum()):
tfoot.append(" <tr>") tfoot.append(" <tr>")
num_missing = missing_df['Digitale Befragung'].sum()
tfoot.append( tfoot.append(
f" <td>Weitere Bezirke ({num_missing})</td>" f" <td>Weitere Bezirke ({num_missing})</td>"
if num_missing if num_missing
@ -376,7 +386,7 @@ def state_dashboard(state: str):
output_str = [] output_str = []
output_str = _print_as_html(df_state, output_str, dropna=False) output_str = _print_as_html(df_state, output_str, dropna=False)
output_str = _print_as_html( output_str = _print_as_html(
df, output_str, total=df_state["Digitale Befragung"].sum(), dropna=False df, output_str, df_state=df_state, dropna=False
) )
return render_template( return render_template(
@ -405,9 +415,7 @@ def dashboard():
output_str = [] output_str = []
output_str = _print_as_html(df_state, output_str, dropna=False) output_str = _print_as_html(df_state, output_str, dropna=False)
output_str = _print_as_html( output_str = _print_as_html(df, output_str, df_state)
df, output_str, total=df_state["Digitale Befragung"].sum()
)
return render_template( return render_template(
"base.html", "base.html",