diff --git a/resources/desktop/com.github.rssguard.appdata.xml b/resources/desktop/com.github.rssguard.appdata.xml
index de4a88404..091df44fe 100644
--- a/resources/desktop/com.github.rssguard.appdata.xml
+++ b/resources/desktop/com.github.rssguard.appdata.xml
@@ -26,7 +26,7 @@
https://github.com/sponsors/martinrotter
-
+
none
diff --git a/resources/scripts/scrapers/scrape-as-rss2.py b/resources/scripts/scrapers/scrape-as-rss2.py
deleted file mode 100644
index 8defd7659..000000000
--- a/resources/scripts/scrapers/scrape-as-rss2.py
+++ /dev/null
@@ -1,55 +0,0 @@
-# Downloads full articles for RSS 2.0 feed and replaces original articles.
-#
-# Make sure to have all dependencies installed:
-# pip3 install asyncio (if using parallel version of the script)
-#
-# You must provide raw RSS 2.0 UTF-8 feed XML data as input, for example with curl:
-# curl 'http://rss.cnn.com/rss/edition.rss' | python ./scrape-rss2.py "4"
-#
-# You must provide three command line arguments:
-# scrape-rss2.py [NUMBER-OF-PARALLEL-THREADS]
-
-import json
-import re
-import sys
-import time
-import html
-import urllib.request
-import distutils.util
-import xml.etree.ElementTree as ET
-
-no_threads = int(sys.argv[1])
-
-if no_threads > 1:
- import asyncio
- from concurrent.futures import ThreadPoolExecutor
-
-sys.stdin.reconfigure(encoding='utf-8')
-rss_data = sys.stdin.read()
-rss_document = ET.fromstring(rss_data)
-
-def process_article(article):
- try:
- link = "https://us-central1-technews-251304.cloudfunctions.net/article-parser?url=" + article.find("link").text
- response = urllib.request.urlopen(link)
- text = response.read().decode("utf-8")
- js = json.loads(text)
-
- if int(js["error"]) == 0:
- article.find("description").text = js["data"]["content"]
- except:
- pass
-
-# Scrape articles.
-if no_threads > 1:
- with ThreadPoolExecutor(max_workers = no_threads) as executor:
- futures = []
- for article in rss_document.findall(".//item"):
- futures.append(executor.submit(process_article, article))
- for future in futures:
- future.result()
-else:
- for article in rss_document.findall(".//item"):
- process_article(article)
-
-print(ET.tostring(rss_document, encoding = "unicode"))
\ No newline at end of file
diff --git a/resources/scripts/scrapers/scrape-full-articles.py b/resources/scripts/scrapers/scrape-full-articles.py
new file mode 100644
index 000000000..a40b69f24
--- /dev/null
+++ b/resources/scripts/scrapers/scrape-full-articles.py
@@ -0,0 +1,96 @@
+# Downloads full (HTML) articles for ATOM or RSS 2.0 feed and replaces original articles.
+#
+# Make sure to have all dependencies installed:
+# pip3 install asyncio (if using parallel version of the script)
+#
+# You must provide raw ATOM or RSS 2.0 UTF-8 feed XML data as input, for example with curl:
+# curl 'http://rss.cnn.com/rss/edition.rss' | python ./scrape-full-articles.py "4"
+#
+# You must provide three command line arguments:
+# scrape-full-articles.py [NUMBER-OF-PARALLEL-THREADS]
+
+import json
+import sys
+import urllib.request
+import xml.etree.ElementTree as ET
+
+# Globals.
+atom_ns = {"atom": "http://www.w3.org/2005/Atom"}
+article_parser_url = "https://us-central1-technews-251304.cloudfunctions.net/article-parser?url="
+
+
+# Methods.
+def process_article(article, is_rss, is_atom):
+ try:
+ # Extract link.
+ scraped_article = ""
+
+ if is_rss:
+ article_link = article.find("link").text
+ elif is_atom:
+ article_link = article.find("atom:link", atom_ns).attrib['href']
+
+ # Scrape with article-parser.
+ link = article_parser_url + article_link
+
+ response = urllib.request.urlopen(link)
+ text = response.read().decode("utf-8")
+ js = json.loads(text)
+
+ if int(js["error"]) == 0:
+ scraped_article = js["data"]["content"]
+
+ # Save scraped data.
+ if scraped_article:
+ if is_rss:
+ article.find("description").text = scraped_article
+ elif is_atom:
+ article.find("atom:content", atom_ns).text = scraped_article
+ except:
+ pass
+
+
+def main():
+ no_threads = int(sys.argv[1]) if len(sys.argv) >= 2 else 1
+
+ if no_threads > 1:
+ import asyncio
+ from concurrent.futures import ThreadPoolExecutor
+
+ sys.stdin.reconfigure(encoding="utf-8")
+
+ #feed_data = urllib.request.urlopen("https://dilbert.com/feed").read()
+ feed_data = sys.stdin.read()
+ feed_document = ET.fromstring(feed_data)
+
+ # Determine feed type.
+ is_rss = feed_document.tag == "rss"
+ is_atom = feed_document.tag == "{http://www.w3.org/2005/Atom}feed"
+
+ if not is_rss and not is_atom:
+ sys.exit("Passed file is neither ATOM nor RSS 2.0 feed.")
+
+ # Extract articles.
+ if is_rss:
+ feed_articles = feed_document.findall(".//item")
+ elif is_atom:
+ feed_articles = feed_document.findall(".//atom:entry", atom_ns)
+
+ # Scrape articles.
+ if no_threads > 1:
+ with ThreadPoolExecutor(max_workers=no_threads) as executor:
+ futures = []
+ for article in feed_articles:
+ futures.append(
+ executor.submit(process_article, article, is_rss, is_atom))
+ for future in futures:
+ future.result()
+ else:
+ for article in feed_articles:
+ process_article(article, is_rss, is_atom)
+
+ print(ET.tostring(feed_document, encoding="unicode"))
+
+
+if __name__ == '__main__':
+ main()
diff --git a/resources/scripts/scrapers/scrape-rss2.py b/resources/scripts/scrapers/scrape-rss2.py
deleted file mode 100644
index 63ab40eb7..000000000
--- a/resources/scripts/scrapers/scrape-rss2.py
+++ /dev/null
@@ -1,56 +0,0 @@
-# Downloads full articles for RSS 2.0 feed and replaces original articles.
-#
-# Make sure to have all dependencies installed:
-# pip3 install newspaper3k
-# pip3 install asyncio (if using parallel version of the script)
-#
-# You must provide raw RSS 2.0 UTF-8 feed XML data as input, for example with curl:
-# curl 'http://rss.cnn.com/rss/edition.rss' | python ./scrape-rss2.py "4"
-#
-# You must provide three command line arguments:
-# scrape-rss2.py [NUMBER-OF-PARALLEL-THREADS]
-
-import json
-import re
-import sys
-import time
-import html
-import requests
-import distutils.util
-import xml.etree.ElementTree as ET
-from newspaper import Article
-
-no_threads = int(sys.argv[1])
-
-if no_threads > 1:
- import asyncio
- from concurrent.futures import ThreadPoolExecutor
-
-sys.stdin.reconfigure(encoding='utf-8')
-rss_data = sys.stdin.read()
-rss_document = ET.fromstring(rss_data)
-
-def process_article(article):
- try:
- link = article.find("link").text
-
- f = Article(link, keep_article_html = True)
- f.download()
- f.parse()
- article.find("description").text = f.article_html
- except:
- pass
-
-# Scrape articles.
-if no_threads > 1:
- with ThreadPoolExecutor(max_workers = no_threads) as executor:
- futures = []
- for article in rss_document.findall(".//item"):
- futures.append(executor.submit(process_article, article))
- for future in futures:
- future.result()
-else:
- for article in rss_document.findall(".//item"):
- process_article(article)
-
-print(ET.tostring(rss_document, encoding = "unicode"))
\ No newline at end of file