diff --git a/resources/desktop/com.github.rssguard.appdata.xml b/resources/desktop/com.github.rssguard.appdata.xml index 01bd667ba..cc573b754 100644 --- a/resources/desktop/com.github.rssguard.appdata.xml +++ b/resources/desktop/com.github.rssguard.appdata.xml @@ -26,7 +26,7 @@ https://github.com/sponsors/martinrotter - + none diff --git a/resources/scripts/scrapers/metacritic.py b/resources/scripts/scrapers/metacritic.py new file mode 100755 index 000000000..4462fbdbd --- /dev/null +++ b/resources/scripts/scrapers/metacritic.py @@ -0,0 +1,66 @@ +# Provides filtering of entries provided via Metacritic RSS feeds. +# +# Example input feed is: https://www.metacritic.com/rss/tv +# +# This script expects raw RSS 2.0 feed data as input and can be called like +# this on cli: +# curl 'https://www.metacritic.com/rss/tv' | python ./metacritic.py "" +# +# Replace with minimal numerical score your articles must have or pass nothing +# to keep all articles. + +import json +import sys +import urllib.request +import xml.etree.ElementTree as ET +import re +import ssl + +def get_score_of_url(article_url): + # Download HTML of article. + req = urllib.request.Request(article_url) + req.add_header("Accept", "*/*") + req.add_header("User-Agent", "curl/7.55.1") + response = urllib.request.urlopen(req, context=ssl.SSLContext()) + text = response.read().decode("utf-8") + score = int(re.search(r'metascore_w larger tvshow positive">(\d{1,2})', text).group(1)) + return score + +def main(): + minimal_score = int(sys.argv[1]) if len(sys.argv) >= 2 else -1 + + # Read RSS 2.0 feed data from input. + sys.stdin.reconfigure(encoding="utf-8") + + #req = urllib.request.Request("https://www.metacritic.com/rss/tv") + #req.add_header("Accept", "*/*") + #req.add_header("User-Agent", "curl/7.55.1") + #feed_data = urllib.request.urlopen(req, context=ssl.SSLContext()).read() + + feed_data = sys.stdin.read() + feed_document = ET.fromstring(feed_data) + + # Process articles one by one. + feed_channel = feed_document.find(".//channel") + feed_articles = feed_channel.findall("item") + + for article in feed_articles: + try: + article_score = get_score_of_url(article.find("link").text) + except: + article_score = minimal_score - 1 + pass + + if article_score < minimal_score: + feed_channel.remove(article) + else: + article_title = article.find("title") + article_title.text += " - {}".format(article_score) + + out_xml = ET.tostring(feed_document) + out_decoded_xml = out_xml.decode() + + print(out_decoded_xml) + +if __name__ == '__main__': + main() \ No newline at end of file