diff --git a/download_digital.py b/download_digital.py index 4a563c5..e762f09 100644 --- a/download_digital.py +++ b/download_digital.py @@ -76,6 +76,7 @@ def construct_dataframe( tag: str = "bez_data_2", grouped: bool = False, special_tag: str | None = None, + no_processing: bool = False, ): r = requests.get(url) soup = BeautifulSoup(r.text, "html.parser") @@ -89,9 +90,10 @@ def construct_dataframe( bez_data = json.loads(substring[: substring.find("\n") - 1]) data = {} - data["Bundesland"] = pd.Series( - [bundesland_dict[k] for k in bez_data], index=list(bez_data.keys()) - ) + if not no_processing: + data["Bundesland"] = pd.Series( + [bundesland_dict[k] for k in bez_data], index=list(bez_data.keys()) + ) data["Bezirk"] = pd.Series( [v["name"] for v in bez_data.values()], index=list(bez_data.keys()) ) @@ -112,7 +114,10 @@ def construct_dataframe( data["Digitale Befragung"] = pd.Series(tot_col_data, index=tot_col_index) df = pd.DataFrame(data=data) df = df.astype({"Digitale Befragung": "Int32"}) - if grouped: + + if grouped and no_processing: + raise ValueError + elif grouped: df = df.groupby("Bundesland", as_index=False)[["Digitale Befragung"]].sum() return df @@ -124,8 +129,15 @@ def main( dry_run: bool = False, grouped: bool = False, special_tag: str | None = None, + no_processing: bool = False, ) -> None: - df = construct_dataframe(url=url, tag=tag, grouped=grouped, special_tag=special_tag) + df = construct_dataframe( + url=url, + tag=tag, + grouped=grouped, + special_tag=special_tag, + no_processing=no_processing, + ) if dry_run: print(df)