Move from selenium to requests
This commit is contained in:
parent
11d885e786
commit
677810c213
@ -4,9 +4,8 @@ from pathlib import Path
|
|||||||
|
|
||||||
import fire
|
import fire
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from selenium import webdriver
|
import requests
|
||||||
from selenium.webdriver.common.by import By
|
from bs4 import BeautifulSoup
|
||||||
from selenium.webdriver.firefox.options import Options
|
|
||||||
|
|
||||||
bundesland_dict = {
|
bundesland_dict = {
|
||||||
"110": "Schleswig-Holstein",
|
"110": "Schleswig-Holstein",
|
||||||
@ -78,13 +77,11 @@ def main(
|
|||||||
dry_run: bool = False,
|
dry_run: bool = False,
|
||||||
grouped: bool = False,
|
grouped: bool = False,
|
||||||
) -> pd.DataFrame:
|
) -> pd.DataFrame:
|
||||||
options = Options()
|
r = requests.get(url)
|
||||||
options.add_argument("--headless")
|
soup = BeautifulSoup(r.text, "html.parser")
|
||||||
driver = webdriver.Firefox(options=options)
|
|
||||||
driver.get(url)
|
|
||||||
|
|
||||||
for a in driver.find_elements(By.XPATH, "//script"):
|
for a in soup.find_all("script"):
|
||||||
script_contents = a.get_attribute("innerHTML")
|
script_contents = a.decode_contents()
|
||||||
if script_contents.find(tag) >= 0:
|
if script_contents.find(tag) >= 0:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user