diff --git a/download_digital.py b/download_digital.py index 6b85dc4..b6fa0d8 100644 --- a/download_digital.py +++ b/download_digital.py @@ -74,7 +74,10 @@ bundesland_dict = { def main( - url: str = "https://beschaeftigtenbefragung.verdi.de/", tag: str = "bez_data_2" + url: str = "https://beschaeftigtenbefragung.verdi.de/", + tag: str = "bez_data_2", + dry_run: bool = False, + grouped: bool = False, ) -> pd.DataFrame: options = Options() options.add_argument("--headless") @@ -107,10 +110,16 @@ def main( data["Digitale Befragung"] = pd.Series(tot_col_data, index=tot_col_index) df = pd.DataFrame(data=data) filename = f"data/{datetime.today().strftime('%Y-%m-%d')}_data.ods" - if Path(filename).exists(): - print("File already exists!") + if grouped: + df = df.groupby("Bundesland")[["Digitale Befragung"]].sum() + if dry_run: + df.loc["Total"] = df.sum(numeric_only=True) + print(df) else: - df.to_excel(filename, sheet_name="digital") + if Path(filename).exists(): + print("File already exists!") + else: + df.to_excel(filename, sheet_name="digital") if __name__ == "__main__":