Simplify download script
This commit is contained in:
parent
3bb4f432e4
commit
ae6beafa3d
@ -71,6 +71,20 @@ bundesland_dict = {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
landesbezirk_dict = {
|
||||||
|
"100": "Nord",
|
||||||
|
"200": "Niedersachsen-Bremen",
|
||||||
|
"300": "Berlin-Brandenburg",
|
||||||
|
"400": "Nordrhein-Westfalen",
|
||||||
|
"500": "Rheinland-Pfalz-Saarland",
|
||||||
|
"600": "Hessen",
|
||||||
|
"700": "Sachsen, Sachsen-Anhalt, Thüringen",
|
||||||
|
"800": "Bayern",
|
||||||
|
"900": "Baden-Württemberg",
|
||||||
|
"1000": "Hamburg",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def get_bez_data(
|
def get_bez_data(
|
||||||
tags: list[str], url: str = "https://beschaeftigtenbefragung.verdi.de/"
|
tags: list[str], url: str = "https://beschaeftigtenbefragung.verdi.de/"
|
||||||
) -> list[dict]:
|
) -> list[dict]:
|
||||||
@ -90,22 +104,22 @@ def get_bez_data(
|
|||||||
|
|
||||||
def construct_dataframe(
|
def construct_dataframe(
|
||||||
bez_data: dict[str, dict],
|
bez_data: dict[str, dict],
|
||||||
grouped: bool = False,
|
|
||||||
special_tag: str | None = None,
|
special_tag: str | None = None,
|
||||||
no_processing: bool = False,
|
|
||||||
):
|
):
|
||||||
data = {}
|
data = {}
|
||||||
if not no_processing:
|
|
||||||
|
first_key = next(iter(bez_data.keys()))
|
||||||
|
if first_key in landesbezirk_dict:
|
||||||
|
data["Landesbezirk"] = pd.Series(
|
||||||
|
[v["name"] for v in bez_data.values()], index=list(bez_data.keys())
|
||||||
|
)
|
||||||
|
else:
|
||||||
data["Bundesland"] = pd.Series(
|
data["Bundesland"] = pd.Series(
|
||||||
[bundesland_dict[k] for k in bez_data], index=list(bez_data.keys())
|
[bundesland_dict[k] for k in bez_data], index=list(bez_data.keys())
|
||||||
)
|
)
|
||||||
data["Bezirk"] = pd.Series(
|
data["Bezirk"] = pd.Series(
|
||||||
[v["name"] for v in bez_data.values()], index=list(bez_data.keys())
|
[v["name"] for v in bez_data.values()], index=list(bez_data.keys())
|
||||||
)
|
)
|
||||||
else:
|
|
||||||
data["Landesbezirk"] = pd.Series(
|
|
||||||
[v["name"] for v in bez_data.values()], index=list(bez_data.keys())
|
|
||||||
)
|
|
||||||
|
|
||||||
tot_col_data = []
|
tot_col_data = []
|
||||||
tot_col_index = []
|
tot_col_index = []
|
||||||
@ -121,15 +135,7 @@ def construct_dataframe(
|
|||||||
tot_col_index.append(k)
|
tot_col_index.append(k)
|
||||||
|
|
||||||
data["Digitale Befragung"] = pd.Series(tot_col_data, index=tot_col_index)
|
data["Digitale Befragung"] = pd.Series(tot_col_data, index=tot_col_index)
|
||||||
df = pd.DataFrame(data=data)
|
return pd.DataFrame(data=data).astype({"Digitale Befragung": "Int32"})
|
||||||
df = df.astype({"Digitale Befragung": "Int32"})
|
|
||||||
|
|
||||||
if grouped and no_processing:
|
|
||||||
raise ValueError
|
|
||||||
elif grouped:
|
|
||||||
df = df.groupby("Bundesland", as_index=False)[["Digitale Befragung"]].sum()
|
|
||||||
|
|
||||||
return df
|
|
||||||
|
|
||||||
|
|
||||||
def main(
|
def main(
|
||||||
@ -138,7 +144,6 @@ def main(
|
|||||||
dry_run: bool = False,
|
dry_run: bool = False,
|
||||||
grouped: bool = False,
|
grouped: bool = False,
|
||||||
special_tag: str | None = None,
|
special_tag: str | None = None,
|
||||||
no_processing: bool = False,
|
|
||||||
folder: str = "data",
|
folder: str = "data",
|
||||||
name: str = "data",
|
name: str = "data",
|
||||||
sheet_name: str = "digital",
|
sheet_name: str = "digital",
|
||||||
@ -146,11 +151,12 @@ def main(
|
|||||||
bez_data = get_bez_data([tag], url)[0]
|
bez_data = get_bez_data([tag], url)[0]
|
||||||
df = construct_dataframe(
|
df = construct_dataframe(
|
||||||
bez_data=bez_data,
|
bez_data=bez_data,
|
||||||
grouped=grouped,
|
|
||||||
special_tag=special_tag,
|
special_tag=special_tag,
|
||||||
no_processing=no_processing,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if grouped:
|
||||||
|
df = df.groupby("Bundesland", as_index=False)[["Digitale Befragung"]].sum()
|
||||||
|
|
||||||
if dry_run:
|
if dry_run:
|
||||||
print(df)
|
print(df)
|
||||||
else:
|
else:
|
||||||
|
|||||||
11
wsgi.py
11
wsgi.py
@ -30,15 +30,8 @@ cache = Cache(app)
|
|||||||
def get_tables(url: str) -> tuple[pd.DataFrame, pd.DataFrame]:
|
def get_tables(url: str) -> tuple[pd.DataFrame, pd.DataFrame]:
|
||||||
bez_data = get_bez_data(["bez_data_0", "bez_data_2"], url)
|
bez_data = get_bez_data(["bez_data_0", "bez_data_2"], url)
|
||||||
|
|
||||||
df = construct_dataframe(
|
df = construct_dataframe(bez_data=bez_data[0], special_tag="stud")
|
||||||
bez_data=bez_data[0],
|
df_state = construct_dataframe(bez_data=bez_data[1])
|
||||||
grouped=False,
|
|
||||||
special_tag="stud",
|
|
||||||
)
|
|
||||||
|
|
||||||
df_state = construct_dataframe(
|
|
||||||
bez_data=bez_data[1], grouped=False, no_processing=True
|
|
||||||
)
|
|
||||||
|
|
||||||
return df, df_state
|
return df, df_state
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user