scraper for searching of feeds in html sites

This commit is contained in:
Martin Rotter 2021-03-16 09:02:21 +01:00
parent 24df33dd89
commit a96e83fb10

View file

@ -16,8 +16,11 @@ regexp_href = re.compile("href=\"([^\"]+)\"")
for url in urls_lines:
# Download HTML data.
try:
url_response = urllib.request.urlopen(url)
html = url_response.read().decode("utf-8")
except:
continue
# Search for XML feeds with regexps.
for link_tag in re.findall(regexp_link, html):