Make downloaded site reusable
This commit is contained in:
parent
6bc8c909d5
commit
195338aa89
@ -70,25 +70,27 @@ bundesland_dict = {
|
||||
"1001": "Hamburg",
|
||||
}
|
||||
|
||||
def get_bez_data(tags: list[str], url: str = "https://beschaeftigtenbefragung.verdi.de/") -> list[dict]:
|
||||
r = requests.get(url)
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
bez_data = []
|
||||
for tag in tags:
|
||||
for a in soup.find_all("script"):
|
||||
script_contents = a.decode_contents()
|
||||
if script_contents.find(tag) >= 0:
|
||||
break
|
||||
|
||||
substring = script_contents[script_contents.find(tag) + len(tag) + 3 :]
|
||||
bez_data.append(json.loads(substring[: substring.find("\n") - 1]))
|
||||
return bez_data
|
||||
|
||||
|
||||
def construct_dataframe(
|
||||
url: str = "https://beschaeftigtenbefragung.verdi.de/",
|
||||
tag: str = "bez_data_2",
|
||||
bez_data: dict[str, dict],
|
||||
grouped: bool = False,
|
||||
special_tag: str | None = None,
|
||||
no_processing: bool = False,
|
||||
):
|
||||
r = requests.get(url)
|
||||
soup = BeautifulSoup(r.text, "html.parser")
|
||||
|
||||
for a in soup.find_all("script"):
|
||||
script_contents = a.decode_contents()
|
||||
if script_contents.find(tag) >= 0:
|
||||
break
|
||||
|
||||
substring = script_contents[script_contents.find(tag) + len(tag) + 3 :]
|
||||
bez_data = json.loads(substring[: substring.find("\n") - 1])
|
||||
|
||||
data = {}
|
||||
if not no_processing:
|
||||
data["Bundesland"] = pd.Series(
|
||||
@ -135,9 +137,9 @@ def main(
|
||||
special_tag: str | None = None,
|
||||
no_processing: bool = False,
|
||||
) -> None:
|
||||
bez_data = get_bez_data([tag], url)[0]
|
||||
df = construct_dataframe(
|
||||
url=url,
|
||||
tag=tag,
|
||||
bez_data=bez_data,
|
||||
grouped=grouped,
|
||||
special_tag=special_tag,
|
||||
no_processing=no_processing,
|
||||
|
||||
10
wsgi.py
10
wsgi.py
@ -1,7 +1,7 @@
|
||||
import pandas as pd
|
||||
from flask import Flask, render_template, request
|
||||
|
||||
from download_digital import construct_dataframe
|
||||
from download_digital import construct_dataframe, get_bez_data
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@ -10,16 +10,16 @@ app = Flask(__name__)
|
||||
def tables(
|
||||
url: str = "https://beschaeftigtenbefragung.verdi.de/",
|
||||
):
|
||||
bez_data = get_bez_data(["bez_data_0", "bez_data_2"], url)
|
||||
|
||||
df = construct_dataframe(
|
||||
url=url,
|
||||
tag="bez_data_0",
|
||||
bez_data=bez_data[0],
|
||||
grouped=False,
|
||||
special_tag="stud",
|
||||
)
|
||||
|
||||
df_state = construct_dataframe(
|
||||
url=url,
|
||||
tag="bez_data_2",
|
||||
bez_data=bez_data[1],
|
||||
grouped=False,
|
||||
no_processing=True
|
||||
)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user