scraper for searching of feeds in html sites

This commit is contained in:
Martin Rotter 2021-03-16 09:02:21 +01:00
parent 24df33dd89
commit a96e83fb10

View file

@ -16,9 +16,12 @@ regexp_href = re.compile("href=\"([^\"]+)\"")
for url in urls_lines:
# Download HTML data.
url_response = urllib.request.urlopen(url)
html = url_response.read().decode("utf-8")
try:
url_response = urllib.request.urlopen(url)
html = url_response.read().decode("utf-8")
except:
continue
# Search for XML feeds with regexps.
for link_tag in re.findall(regexp_link, html):
for link_xml_feed in re.findall(regexp_href, link_tag):