use resolved/redirected URLs when discovering feeds, this reduces number of duplicate discovered feeds

This commit is contained in:
Martin Rotter 2024-11-12 14:18:17 +01:00
parent b230267c22
commit 793cbcbf58
20 changed files with 81 additions and 84 deletions

View file

@ -174,7 +174,14 @@ void FormDiscoverFeeds::discoverFeeds() {
std::function<QList<StandardFeed*>(QList<StandardFeed*>&, const QList<StandardFeed*>&)> reducer =
[=](QList<StandardFeed*>& res, const QList<StandardFeed*>& interm) -> QList<StandardFeed*> {
res.append(interm);
for (StandardFeed* new_fd : interm) {
if (!std::any_of(res.cbegin(), res.cend(), [=](const StandardFeed* fd) {
return fd->source() == new_fd->source();
})) {
res.append(new_fd);
}
}
return res;
};

View file

@ -65,9 +65,7 @@ QList<StandardFeed*> AtomParser::discoverFeeds(ServiceRoot* root, const QUrl& ur
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
// 1.
auto guessed_feed = guessFeed(data, res.m_contentType);
guessed_feed.first->setSource(my_url);
auto guessed_feed = guessFeed(data, res);
return {guessed_feed.first};
}
@ -113,9 +111,8 @@ QList<StandardFeed*> AtomParser::discoverFeeds(ServiceRoot* root, const QUrl& ur
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
auto guessed_feed = guessFeed(data, res);
guessed_feed.first->setSource(feed_link);
feeds.append(guessed_feed.first);
}
catch (const ApplicationException& ex) {
@ -141,9 +138,8 @@ QList<StandardFeed*> AtomParser::discoverFeeds(ServiceRoot* root, const QUrl& ur
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
auto guessed_feed = guessFeed(data, res);
guessed_feed.first->setSource(my_url);
feeds.append(guessed_feed.first);
}
catch (...) {
@ -166,9 +162,8 @@ QList<StandardFeed*> AtomParser::discoverFeeds(ServiceRoot* root, const QUrl& ur
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
auto guessed_feed = guessFeed(data, res);
guessed_feed.first->setSource(my_url);
feeds.append(guessed_feed.first);
}
catch (...) {
@ -201,9 +196,8 @@ QList<StandardFeed*> AtomParser::discoverFeeds(ServiceRoot* root, const QUrl& ur
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
auto guessed_feed = guessFeed(data, res);
guessed_feed.first->setSource(my_url);
feeds.append(guessed_feed.first);
}
catch (...) {
@ -241,9 +235,8 @@ QList<StandardFeed*> AtomParser::discoverFeeds(ServiceRoot* root, const QUrl& ur
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
auto guessed_feed = guessFeed(data, res);
guessed_feed.first->setSource(my_url);
feeds.append(guessed_feed.first);
}
catch (...) {
@ -271,9 +264,8 @@ QList<StandardFeed*> AtomParser::discoverFeeds(ServiceRoot* root, const QUrl& ur
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
auto guessed_feed = guessFeed(data, res);
guessed_feed.first->setSource(my_url);
feeds.append(guessed_feed.first);
}
catch (...) {
@ -286,9 +278,7 @@ QList<StandardFeed*> AtomParser::discoverFeeds(ServiceRoot* root, const QUrl& ur
}
QPair<StandardFeed*, QList<IconLocation>> AtomParser::guessFeed(const QByteArray& content,
const QString& content_type) const {
Q_UNUSED(content_type)
const NetworkResult& network_res) const {
QString xml_schema_encoding = QSL(DEFAULT_FEED_ENCODING);
QString xml_contents_encoded;
QString enc =
@ -336,6 +326,7 @@ QPair<StandardFeed*, QList<IconLocation>> AtomParser::guessFeed(const QByteArray
feed->setType(StandardFeed::Type::Atom10);
feed->setTitle(root_element.namedItem(QSL("title")).toElement().text());
feed->setDescription(root_element.namedItem(QSL("subtitle")).toElement().text());
feed->setSource(network_res.m_url.toString());
QString icon_link = root_element.namedItem(QSL("icon")).toElement().text();

View file

@ -18,7 +18,7 @@ class AtomParser : public FeedParser {
virtual QList<StandardFeed*> discoverFeeds(ServiceRoot* root, const QUrl& url, bool greedy) const;
virtual QPair<StandardFeed*, QList<IconLocation>> guessFeed(const QByteArray& content,
const QString& content_type) const;
const NetworkResult& network_res) const;
protected:
virtual QDomNodeList xmlMessageElements();

View file

@ -57,7 +57,7 @@ QList<StandardFeed*> FeedParser::discoverFeeds(ServiceRoot* root, const QUrl& ur
if (QFile::exists(file_path)) {
try {
auto guessed_feed = guessFeed(IOFactory::readFile(file_path), {});
auto guessed_feed = guessFeed(IOFactory::readFile(file_path));
guessed_feed.first->setSourceType(StandardFeed::SourceType::LocalFile);
guessed_feed.first->setSource(file_path);
@ -74,7 +74,7 @@ QList<StandardFeed*> FeedParser::discoverFeeds(ServiceRoot* root, const QUrl& ur
}
QPair<StandardFeed*, QList<IconLocation>> FeedParser::guessFeed(const QByteArray& content,
const QString& content_type) const {
const NetworkResult& network_res) const {
return {};
}

View file

@ -32,7 +32,8 @@ class FeedParser {
// Guesses feed.
virtual QPair<StandardFeed*, QList<IconLocation>> guessFeed(const QByteArray& content,
const QString& content_type) const;
const NetworkResult& network_res =
NetworkResult()) const;
// Returns list of all messages from the feed.
virtual QList<Message> messages();

View file

@ -43,9 +43,7 @@ QList<StandardFeed*> IcalParser::discoverFeeds(ServiceRoot* root, const QUrl& ur
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
// 1.
auto guessed_feed = guessFeed(data, res.m_contentType);
guessed_feed.first->setSource(my_url);
auto guessed_feed = guessFeed(data, res);
return {guessed_feed.first};
}
@ -58,8 +56,8 @@ QList<StandardFeed*> IcalParser::discoverFeeds(ServiceRoot* root, const QUrl& ur
}
QPair<StandardFeed*, QList<IconLocation>> IcalParser::guessFeed(const QByteArray& content,
const QString& content_type) const {
if (content_type.contains(QSL("text/calendar")) || content.startsWith(QSL("BEGIN").toLocal8Bit())) {
const NetworkResult& network_res) const {
if (network_res.m_contentType.contains(QSL("text/calendar")) || content.startsWith(QSL("BEGIN").toLocal8Bit())) {
Icalendar calendar;
try {
@ -75,6 +73,7 @@ QPair<StandardFeed*, QList<IconLocation>> IcalParser::guessFeed(const QByteArray
feed->setEncoding(QSL(DEFAULT_FEED_ENCODING));
feed->setType(StandardFeed::Type::iCalendar);
feed->setTitle(calendar.title());
feed->setSource(network_res.m_url.toString());
return QPair<StandardFeed*, QList<IconLocation>>(feed, icon_possible_locations);
}

View file

@ -78,7 +78,7 @@ class IcalParser : public FeedParser {
virtual QList<StandardFeed*> discoverFeeds(ServiceRoot* root, const QUrl& url, bool greedy) const;
virtual QPair<StandardFeed*, QList<IconLocation>> guessFeed(const QByteArray& content,
const QString& content_type) const;
const NetworkResult& network_res) const;
virtual QVariantList objMessageElements();
virtual QString objMessageTitle(const QVariant& msg_element) const;

View file

@ -50,9 +50,7 @@ QList<StandardFeed*> JsonParser::discoverFeeds(ServiceRoot* root, const QUrl& ur
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
// 1.
auto guessed_feed = guessFeed(data, res.m_contentType);
guessed_feed.first->setSource(my_url);
auto guessed_feed = guessFeed(data, res);
return {guessed_feed.first};
}
@ -97,9 +95,8 @@ QList<StandardFeed*> JsonParser::discoverFeeds(ServiceRoot* root, const QUrl& ur
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
auto guessed_feed = guessFeed(data, res);
guessed_feed.first->setSource(feed_link);
feeds.append(guessed_feed.first);
}
catch (const ApplicationException& ex) {
@ -114,8 +111,8 @@ QList<StandardFeed*> JsonParser::discoverFeeds(ServiceRoot* root, const QUrl& ur
}
QPair<StandardFeed*, QList<IconLocation>> JsonParser::guessFeed(const QByteArray& content,
const QString& content_type) const {
if (content_type.contains(QSL("json"), Qt::CaseSensitivity::CaseInsensitive) ||
const NetworkResult& network_res) const {
if (network_res.m_contentType.contains(QSL("json"), Qt::CaseSensitivity::CaseInsensitive) ||
content.simplified().startsWith('{')) {
QJsonParseError json_err;
QJsonDocument json = QJsonDocument::fromJson(content, &json_err);
@ -135,6 +132,7 @@ QPair<StandardFeed*, QList<IconLocation>> JsonParser::guessFeed(const QByteArray
feed->setType(StandardFeed::Type::Json);
feed->setTitle(json.object()[QSL("title")].toString());
feed->setDescription(json.object()[QSL("description")].toString());
feed->setSource(network_res.m_url.toString());
auto home_page = json.object()[QSL("home_page_url")].toString();

View file

@ -15,7 +15,7 @@ class JsonParser : public FeedParser {
virtual QList<StandardFeed*> discoverFeeds(ServiceRoot* root, const QUrl& url, bool greedy) const;
virtual QPair<StandardFeed*, QList<IconLocation>> guessFeed(const QByteArray& content,
const QString& content_type) const;
const NetworkResult& network_res) const;
protected:
virtual QString feedAuthor() const;
@ -25,7 +25,7 @@ class JsonParser : public FeedParser {
virtual QString jsonMessageUrl(const QJsonObject& msg_element) const;
virtual QString jsonMessageDescription(const QJsonObject& msg_element) const;
virtual QString jsonMessageAuthor(const QJsonObject& msg_element) const;
virtual QDateTime jsonMessageDateCreated(const QJsonObject& msg_element) ;
virtual QDateTime jsonMessageDateCreated(const QJsonObject& msg_element);
virtual QString jsonMessageId(const QJsonObject& msg_element) const;
virtual QList<Enclosure> jsonMessageEnclosures(const QJsonObject& msg_element) const;
virtual QString jsonMessageRawContents(const QJsonObject& msg_element) const;

View file

@ -51,9 +51,7 @@ QList<StandardFeed*> RdfParser::discoverFeeds(ServiceRoot* root, const QUrl& url
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
// 1.
auto guessed_feed = guessFeed(data, res.m_contentType);
guessed_feed.first->setSource(my_url);
auto guessed_feed = guessFeed(data, res);
return {guessed_feed.first};
}
@ -98,9 +96,8 @@ QList<StandardFeed*> RdfParser::discoverFeeds(ServiceRoot* root, const QUrl& url
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
auto guessed_feed = guessFeed(data, res);
guessed_feed.first->setSource(feed_link);
feeds.append(guessed_feed.first);
}
catch (const ApplicationException& ex) {
@ -126,9 +123,8 @@ QList<StandardFeed*> RdfParser::discoverFeeds(ServiceRoot* root, const QUrl& url
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
auto guessed_feed = guessFeed(data, res);
guessed_feed.first->setSource(my_url);
feeds.append(guessed_feed.first);
}
catch (...) {
@ -151,9 +147,8 @@ QList<StandardFeed*> RdfParser::discoverFeeds(ServiceRoot* root, const QUrl& url
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
auto guessed_feed = guessFeed(data, res);
guessed_feed.first->setSource(my_url);
feeds.append(guessed_feed.first);
}
catch (...) {
@ -165,7 +160,7 @@ QList<StandardFeed*> RdfParser::discoverFeeds(ServiceRoot* root, const QUrl& url
}
QPair<StandardFeed*, QList<IconLocation>> RdfParser::guessFeed(const QByteArray& content,
const QString& content_type) const {
const NetworkResult& network_res) const {
QString xml_schema_encoding = QSL(DEFAULT_FEED_ENCODING);
QString xml_contents_encoded;
QString enc =
@ -211,6 +206,7 @@ QPair<StandardFeed*, QList<IconLocation>> RdfParser::guessFeed(const QByteArray&
feed->setEncoding(xml_schema_encoding);
feed->setType(StandardFeed::Type::Rdf);
feed->setSource(network_res.m_url.toString());
QDomElement channel_element = root_element.elementsByTagNameNS(rssNamespace(), QSL("channel")).at(0).toElement();

View file

@ -17,7 +17,7 @@ class RdfParser : public FeedParser {
virtual QList<StandardFeed*> discoverFeeds(ServiceRoot* root, const QUrl& url, bool greedy) const;
virtual QPair<StandardFeed*, QList<IconLocation>> guessFeed(const QByteArray& content,
const QString& content_type) const;
const NetworkResult& network_res) const;
protected:
virtual QString xmlMessageTitle(const QDomElement& msg_element) const;

View file

@ -51,9 +51,7 @@ QList<StandardFeed*> RssParser::discoverFeeds(ServiceRoot* root, const QUrl& url
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
// 1.
auto guessed_feed = guessFeed(data, res.m_contentType);
guessed_feed.first->setSource(my_url);
auto guessed_feed = guessFeed(data, res);
return {guessed_feed.first};
}
@ -98,9 +96,8 @@ QList<StandardFeed*> RssParser::discoverFeeds(ServiceRoot* root, const QUrl& url
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
auto guessed_feed = guessFeed(data, res);
guessed_feed.first->setSource(feed_link);
feeds.append(guessed_feed.first);
}
catch (const ApplicationException& ex) {
@ -126,9 +123,8 @@ QList<StandardFeed*> RssParser::discoverFeeds(ServiceRoot* root, const QUrl& url
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
auto guessed_feed = guessFeed(data, res);
guessed_feed.first->setSource(my_url);
feeds.append(guessed_feed.first);
}
catch (...) {
@ -151,9 +147,8 @@ QList<StandardFeed*> RssParser::discoverFeeds(ServiceRoot* root, const QUrl& url
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
auto guessed_feed = guessFeed(data, res);
guessed_feed.first->setSource(my_url);
feeds.append(guessed_feed.first);
}
catch (...) {
@ -165,7 +160,7 @@ QList<StandardFeed*> RssParser::discoverFeeds(ServiceRoot* root, const QUrl& url
}
QPair<StandardFeed*, QList<IconLocation>> RssParser::guessFeed(const QByteArray& content,
const QString& content_type) const {
const NetworkResult& network_res) const {
QString xml_schema_encoding = QSL(DEFAULT_FEED_ENCODING);
QString xml_contents_encoded;
QString enc =
@ -210,6 +205,7 @@ QPair<StandardFeed*, QList<IconLocation>> RssParser::guessFeed(const QByteArray&
QList<IconLocation> icon_possible_locations;
feed->setEncoding(xml_schema_encoding);
feed->setSource(network_res.m_url.toString());
QString rss_type = root_element.attribute(QSL("version"), QSL("2.0"));

View file

@ -17,7 +17,7 @@ class RssParser : public FeedParser {
virtual QList<StandardFeed*> discoverFeeds(ServiceRoot* root, const QUrl& url, bool greedy) const;
virtual QPair<StandardFeed*, QList<IconLocation>> guessFeed(const QByteArray& content,
const QString& content_type) const;
const NetworkResult& network_res) const;
protected:
virtual QDomNodeList xmlMessageElements();

View file

@ -122,10 +122,7 @@ QList<StandardFeed*> SitemapParser::discoverFeeds(ServiceRoot* root, const QUrl&
if (res.m_networkError == QNetworkReply::NetworkError::NoError) {
try {
auto guessed_feed = guessFeed(data, res.m_contentType);
guessed_feed.first->setSource(my_url);
guessed_feed.first->setTitle(my_url);
auto guessed_feed = guessFeed(data, res);
feeds.insert(my_url, guessed_feed.first);
@ -149,7 +146,7 @@ QList<StandardFeed*> SitemapParser::discoverFeeds(ServiceRoot* root, const QUrl&
}
QPair<StandardFeed*, QList<IconLocation>> SitemapParser::guessFeed(const QByteArray& content,
const QString& content_type) const {
const NetworkResult& network_res) const {
QByteArray uncompressed_content;
if (isGzip(content)) {
@ -216,7 +213,8 @@ QPair<StandardFeed*, QList<IconLocation>> SitemapParser::guessFeed(const QByteAr
feed->setEncoding(xml_schema_encoding);
feed->setType(StandardFeed::Type::Sitemap);
feed->setTitle(StandardFeed::typeToString(StandardFeed::Type::Sitemap));
feed->setTitle(network_res.m_url.toString());
feed->setSource(network_res.m_url.toString());
return {feed, icon_possible_locations};
}

View file

@ -14,7 +14,7 @@ class SitemapParser : public FeedParser {
virtual QList<StandardFeed*> discoverFeeds(ServiceRoot* root, const QUrl& url, bool greedy) const;
virtual QPair<StandardFeed*, QList<IconLocation>> guessFeed(const QByteArray& content,
const QString& content_type) const;
const NetworkResult& network_res) const;
static bool isGzip(const QByteArray& content);

View file

@ -290,25 +290,22 @@ StandardFeed* StandardFeed::guessFeed(StandardFeed::SourceType source_type,
const QNetworkProxy& custom_proxy) {
auto timeout = qApp->settings()->value(GROUP(Feeds), SETTING(Feeds::UpdateTimeout)).toInt();
QByteArray feed_contents;
QString content_type;
NetworkResult network_result;
if (source_type == StandardFeed::SourceType::Url) {
QList<QPair<QByteArray, QByteArray>> headers = http_headers;
headers << NetworkFactory::generateBasicAuthHeader(protection, username, password);
NetworkResult network_result =
NetworkFactory::performNetworkOperation(source,
timeout,
QByteArray(),
feed_contents,
QNetworkAccessManager::Operation::GetOperation,
headers,
false,
{},
{},
custom_proxy);
content_type = network_result.m_contentType;
network_result = NetworkFactory::performNetworkOperation(source,
timeout,
QByteArray(),
feed_contents,
QNetworkAccessManager::Operation::GetOperation,
headers,
false,
{},
{},
custom_proxy);
if (network_result.m_networkError != QNetworkReply::NetworkError::NoError) {
throw NetworkException(network_result.m_networkError);
@ -369,7 +366,7 @@ StandardFeed* StandardFeed::guessFeed(StandardFeed::SourceType source_type,
for (const QSharedPointer<FeedParser>& parser : parsers) {
try {
QPair<StandardFeed*, QList<IconLocation>> res = parser->guessFeed(feed_contents, content_type);
QPair<StandardFeed*, QList<IconLocation>> res = parser->guessFeed(feed_contents, network_result);
feed = res.first;
icon_possible_locations = res.second;

View file

@ -233,6 +233,7 @@ void Downloader::finished() {
m_lastCookies = {};
}
m_lastUrl = reply->url();
m_lastContentType = reply->header(QNetworkRequest::KnownHeaders::ContentTypeHeader).toString();
m_lastOutputError = reply->error();
m_lastHttpStatusCode = reply->attribute(QNetworkRequest::Attribute::HttpStatusCodeAttribute).toInt();
@ -372,6 +373,10 @@ void Downloader::runGetRequest(const QNetworkRequest& request) {
connect(m_activeReply, &QNetworkReply::finished, this, &Downloader::finished);
}
QUrl Downloader::lastUrl() const {
return m_lastUrl;
}
QMap<QString, QString> Downloader::lastHeaders() const {
return m_lastHeaders;
}

View file

@ -30,6 +30,7 @@ class Downloader : public QObject {
QList<QNetworkCookie> lastCookies() const;
int lastHttpStatusCode() const;
QMap<QString, QString> lastHeaders() const;
QUrl lastUrl() const;
void setProxy(const QNetworkProxy& proxy);
@ -114,6 +115,7 @@ class Downloader : public QObject {
QNetworkReply::NetworkError m_lastOutputError;
int m_lastHttpStatusCode;
QString m_lastContentType;
QUrl m_lastUrl;
QList<QNetworkCookie> m_lastCookies;
QMap<QString, QString> m_lastHeaders;
};

View file

@ -299,6 +299,9 @@ NetworkResult NetworkFactory::performNetworkOperation(const QString& url,
result.m_cookies = downloader.lastCookies();
result.m_httpCode = downloader.lastHttpStatusCode();
result.m_headers = downloader.lastHeaders();
result.m_url = downloader.lastUrl();
qDebugNN << LOGSEC_NETWORK << "URLS\n" << url << "\n" << result.m_url.toString();
return result;
}
@ -340,16 +343,19 @@ NetworkResult NetworkFactory::performNetworkOperation(const QString& url,
result.m_cookies = downloader.lastCookies();
result.m_httpCode = downloader.lastHttpStatusCode();
result.m_headers = downloader.lastHeaders();
result.m_url = downloader.lastUrl();
qDebugNN << LOGSEC_NETWORK << "URLS\n" << url << "\n" << result.m_url.toString();
return result;
}
NetworkResult::NetworkResult()
: m_networkError(QNetworkReply::NetworkError::NoError), m_httpCode(0), m_contentType(QString()), m_cookies({}),
m_headers({}) {}
m_headers({}), m_url({}) {}
NetworkResult::NetworkResult(QNetworkReply::NetworkError err,
int http_code,
const QString& ct,
const QList<QNetworkCookie>& cook)
: m_networkError(err), m_httpCode(http_code), m_contentType(ct), m_cookies(cook) {}
: m_networkError(err), m_httpCode(http_code), m_contentType(ct), m_cookies(cook), m_url({}) {}

View file

@ -21,6 +21,7 @@ struct RSSGUARD_DLLSPEC NetworkResult {
QString m_contentType;
QList<QNetworkCookie> m_cookies;
QMap<QString, QString> m_headers;
QUrl m_url;
explicit NetworkResult();
explicit NetworkResult(QNetworkReply::NetworkError err,