own implementation of tld
This commit is contained in:
parent
df6368d3d5
commit
c13cf31654
4 changed files with 13777 additions and 4 deletions
|
@ -5,6 +5,8 @@
|
||||||
<file>text/COPYING_MIT</file>
|
<file>text/COPYING_MIT</file>
|
||||||
<file>text/COPYING_GNU_GPL</file>
|
<file>text/COPYING_GNU_GPL</file>
|
||||||
<file>text/COPYING_GNU_GPL_HTML</file>
|
<file>text/COPYING_GNU_GPL_HTML</file>
|
||||||
|
|
||||||
|
<file>scripts/public_suffix_list.dat</file>
|
||||||
|
|
||||||
<file>graphics/rssguard.ico</file>
|
<file>graphics/rssguard.ico</file>
|
||||||
|
|
||||||
|
|
13680
resources/scripts/public_suffix_list.dat
Executable file
13680
resources/scripts/public_suffix_list.dat
Executable file
File diff suppressed because it is too large
Load diff
|
@ -52,6 +52,8 @@
|
||||||
#include "network-web/adblock/adblockrequestinfo.h"
|
#include "network-web/adblock/adblockrequestinfo.h"
|
||||||
#include "network-web/adblock/adblocksubscription.h"
|
#include "network-web/adblock/adblocksubscription.h"
|
||||||
|
|
||||||
|
#include "network-web/urltld.cpp"
|
||||||
|
|
||||||
#include <QRegularExpression>
|
#include <QRegularExpression>
|
||||||
#include <QString>
|
#include <QString>
|
||||||
#include <QStringList>
|
#include <QStringList>
|
||||||
|
@ -59,14 +61,14 @@
|
||||||
#include <QWebEnginePage>
|
#include <QWebEnginePage>
|
||||||
|
|
||||||
static QString toSecondLevelDomain(const QUrl& url) {
|
static QString toSecondLevelDomain(const QUrl& url) {
|
||||||
const QString topLevelDomain = url.topLevelDomain();
|
const QString tld = topLevelDomain(url);
|
||||||
const QString urlHost = url.host();
|
const QString urlHost = url.host();
|
||||||
|
|
||||||
if (topLevelDomain.isEmpty() || urlHost.isEmpty()) {
|
if (tld.isEmpty() || urlHost.isEmpty()) {
|
||||||
return QString();
|
return QString();
|
||||||
}
|
}
|
||||||
|
|
||||||
QString domain = urlHost.left(urlHost.size() - topLevelDomain.size());
|
QString domain = urlHost.left(urlHost.size() - tld.size());
|
||||||
|
|
||||||
if (domain.count(QL1C('.')) == 0) {
|
if (domain.count(QL1C('.')) == 0) {
|
||||||
return urlHost;
|
return urlHost;
|
||||||
|
@ -76,7 +78,7 @@ static QString toSecondLevelDomain(const QUrl& url) {
|
||||||
domain = domain.mid(domain.indexOf(QL1C('.')) + 1);
|
domain = domain.mid(domain.indexOf(QL1C('.')) + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
return domain + topLevelDomain;
|
return domain + tld;
|
||||||
}
|
}
|
||||||
|
|
||||||
AdBlockRule::AdBlockRule(const QString& filter, AdBlockSubscription* subscription)
|
AdBlockRule::AdBlockRule(const QString& filter, AdBlockSubscription* subscription)
|
||||||
|
|
89
src/librssguard/network-web/urltld.cpp
Executable file
89
src/librssguard/network-web/urltld.cpp
Executable file
|
@ -0,0 +1,89 @@
|
||||||
|
// For license of this file, see <project-root-folder>/LICENSE.md.
|
||||||
|
|
||||||
|
#include <QHash>
|
||||||
|
#include <QUrl>
|
||||||
|
#include <QFile>
|
||||||
|
#include <QStringList>
|
||||||
|
#include <QStringLiteral>
|
||||||
|
#include <QRegularExpression>
|
||||||
|
|
||||||
|
static QStringList s_tlds = {};
|
||||||
|
|
||||||
|
static void loadTlds() {
|
||||||
|
QFile fl(QStringLiteral(":/scripts/public_suffix_list.dat"));
|
||||||
|
|
||||||
|
QByteArray data;
|
||||||
|
|
||||||
|
if (fl.open(QIODevice::OpenModeFlag::Text | QIODevice::OpenModeFlag::Unbuffered | QIODevice::OpenModeFlag::ReadOnly)) {
|
||||||
|
data = fl.readAll();
|
||||||
|
fl.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
QString str_data = QString::fromUtf8(data);
|
||||||
|
|
||||||
|
s_tlds << str_data.split(QStringLiteral("\n"), Qt::SplitBehaviorFlags::SkipEmptyParts).filter(QRegularExpression("^[^/].+$"));
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool containsTldEntry(const QString& entry) {
|
||||||
|
if (s_tlds.isEmpty()) {
|
||||||
|
loadTlds();
|
||||||
|
|
||||||
|
std::sort(s_tlds.begin(), s_tlds.end(), [=](const QString& lhs, const QString& rhs) {
|
||||||
|
return lhs.compare(rhs) < 0;
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return std::binary_search(s_tlds.begin(), s_tlds.end(), entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool isEffectiveTld(const QString& domain) {
|
||||||
|
// for domain 'foo.bar.com':
|
||||||
|
// 1. return if TLD table contains 'foo.bar.com'
|
||||||
|
if (containsTldEntry(domain)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (domain.contains(QLatin1Char('.'))) {
|
||||||
|
int count = domain.size() - domain.indexOf(QLatin1Char('.'));
|
||||||
|
QString wild_card_domain;
|
||||||
|
|
||||||
|
wild_card_domain.reserve(count + 1);
|
||||||
|
wild_card_domain.append(QLatin1Char('*'));
|
||||||
|
wild_card_domain.append(domain.rightRef(count));
|
||||||
|
|
||||||
|
// 2. if table contains '*.bar.com',
|
||||||
|
// test if table contains '!foo.bar.com'
|
||||||
|
if (containsTldEntry(wild_card_domain)) {
|
||||||
|
QString exception_domain;
|
||||||
|
|
||||||
|
exception_domain.reserve(domain.size() + 1);
|
||||||
|
exception_domain.append(QLatin1Char('!'));
|
||||||
|
exception_domain.append(domain);
|
||||||
|
|
||||||
|
return !containsTldEntry(exception_domain);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static QString topLevelDomain(const QUrl& url) {
|
||||||
|
auto domain = url.toString(QUrl::ComponentFormattingOption::PrettyDecoded);
|
||||||
|
QStringList sections = domain.toLower().split(QLatin1Char('.'), Qt::SplitBehaviorFlags::SkipEmptyParts);
|
||||||
|
|
||||||
|
if (sections.isEmpty()) {
|
||||||
|
return QString();
|
||||||
|
}
|
||||||
|
|
||||||
|
QString level, tld;
|
||||||
|
|
||||||
|
for (int j = sections.count() - 1; j >= 0; j--) {
|
||||||
|
level.prepend(QLatin1Char('.') + sections.at(j));
|
||||||
|
|
||||||
|
if (isEffectiveTld(level.right(level.size() - 1))) {
|
||||||
|
tld = level;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return tld;
|
||||||
|
}
|
Loading…
Add table
Reference in a new issue