Some fixes to author detemrination in atom feeds.

This commit is contained in:
Martin Rotter 2017-07-26 11:16:18 +02:00
parent b6b10919fc
commit 28fe0aee64
2 changed files with 78 additions and 69 deletions

View file

@ -2,7 +2,12 @@
————— —————
Added: Added:
▪ Some minor tweaks/fixes.
▪ New Ad-Block feature for internal web browser. Based on Qt examples and QupZilla. Original license GNU GPLv3 is retained. (#123) ▪ New Ad-Block feature for internal web browser. Based on Qt examples and QupZilla. Original license GNU GPLv3 is retained. (#123)
▪ New Qt 5.9.1 release.
▪ New SSL libraries.
▪ Binaries are now compiled on x64-only mode for Windows!!!
3.4.1 3.4.1
————— —————

View file

@ -30,101 +30,105 @@ AtomParser::~AtomParser() {
} }
QString AtomParser::feedAuthor() const { QString AtomParser::feedAuthor() const {
QDomNodeList authors = m_xml.documentElement().elementsByTagNameNS(m_atomNamespace, QSL("author")); QDomNodeList authors = m_xml.documentElement().elementsByTagNameNS(m_atomNamespace, QSL("author"));
QStringList author_str; QStringList author_str;
for (int i = 0; i < authors.size(); i++) { for (int i = 0; i < authors.size(); i++) {
QDomNodeList names = authors.at(i).toElement().elementsByTagNameNS(m_atomNamespace, QSL("name")); QDomNodeList names = authors.at(i).toElement().elementsByTagNameNS(m_atomNamespace, QSL("name"));
if (!names.isEmpty()) { if (!names.isEmpty()) {
author_str.append(names.at(0).toElement().text()); const QString name = names.at(0).toElement().text();
}
}
return author_str.join(", "); if (!name.isEmpty() && !author_str.contains(name)) {
author_str.append(name);
}
}
}
return author_str.join(", ");
} }
Message AtomParser::extractMessage(const QDomElement& msg_element, QDateTime current_time) const { Message AtomParser::extractMessage(const QDomElement& msg_element, QDateTime current_time) const {
Message new_message; Message new_message;
QString title = textsFromPath(msg_element, m_atomNamespace, QSL("title"), true).join(QSL(", ")); QString title = textsFromPath(msg_element, m_atomNamespace, QSL("title"), true).join(QSL(", "));
QString summary = textsFromPath(msg_element, m_atomNamespace, QSL("content"), true).join(QSL(", ")); QString summary = textsFromPath(msg_element, m_atomNamespace, QSL("content"), true).join(QSL(", "));
if (summary.isEmpty()) { if (summary.isEmpty()) {
summary = textsFromPath(msg_element, m_atomNamespace, QSL("summary"), true).join(QSL(", ")); summary = textsFromPath(msg_element, m_atomNamespace, QSL("summary"), true).join(QSL(", "));
} }
// Now we obtained maximum of information for title & description. // Now we obtained maximum of information for title & description.
if (title.isEmpty() && summary.isEmpty()) { if (title.isEmpty() && summary.isEmpty()) {
// BOTH title and description are empty, skip this message. // BOTH title and description are empty, skip this message.
throw new ApplicationException(QSL("Not enough data for the message.")); throw new ApplicationException(QSL("Not enough data for the message."));
} }
// Title is not empty, description does not matter. // Title is not empty, description does not matter.
new_message.m_title = WebFactory::instance()->stripTags(title); new_message.m_title = WebFactory::instance()->stripTags(title);
new_message.m_contents = summary; new_message.m_contents = summary;
new_message.m_author = WebFactory::instance()->escapeHtml(messageAuthor(msg_element)); new_message.m_author = WebFactory::instance()->escapeHtml(messageAuthor(msg_element));
QString updated = textsFromPath(msg_element, m_atomNamespace, QSL("updated"), true).join(QSL(", ")); QString updated = textsFromPath(msg_element, m_atomNamespace, QSL("updated"), true).join(QSL(", "));
// Deal with creation date. // Deal with creation date.
new_message.m_created = TextFactory::parseDateTime(updated); new_message.m_created = TextFactory::parseDateTime(updated);
new_message.m_createdFromFeed = !new_message.m_created.isNull(); new_message.m_createdFromFeed = !new_message.m_created.isNull();
if (!new_message.m_createdFromFeed) { if (!new_message.m_createdFromFeed) {
// Date was NOT obtained from the feed, set current date as creation date for the message. // Date was NOT obtained from the feed, set current date as creation date for the message.
new_message.m_created = current_time; new_message.m_created = current_time;
} }
// Deal with links // Deal with links
QDomNodeList elem_links = msg_element.toElement().elementsByTagNameNS(m_atomNamespace, QSL("link")); QDomNodeList elem_links = msg_element.toElement().elementsByTagNameNS(m_atomNamespace, QSL("link"));
QString last_link_alternate, last_link_other; QString last_link_alternate, last_link_other;
for (int i = 0; i < elem_links.size(); i++) { for (int i = 0; i < elem_links.size(); i++) {
QDomElement link = elem_links.at(i).toElement(); QDomElement link = elem_links.at(i).toElement();
QString attribute = link.attribute(QSL("rel")); QString attribute = link.attribute(QSL("rel"));
if (attribute == QSL("enclosure")) { if (attribute == QSL("enclosure")) {
new_message.m_enclosures.append(Enclosure(link.attribute(QSL("href")), link.attribute(QSL("type")))); new_message.m_enclosures.append(Enclosure(link.attribute(QSL("href")), link.attribute(QSL("type"))));
qDebug("Adding enclosure '%s' for the message.", qPrintable(new_message.m_enclosures.last().m_url)); qDebug("Adding enclosure '%s' for the message.", qPrintable(new_message.m_enclosures.last().m_url));
} }
else if (attribute.isEmpty() || attribute == QSL("alternate")) { else if (attribute.isEmpty() || attribute == QSL("alternate")) {
last_link_alternate = link.attribute(QSL("href")); last_link_alternate = link.attribute(QSL("href"));
} }
else { else {
last_link_other = link.attribute(QSL("href")); last_link_other = link.attribute(QSL("href"));
} }
} }
if (!last_link_alternate.isEmpty()) { if (!last_link_alternate.isEmpty()) {
new_message.m_url = last_link_alternate; new_message.m_url = last_link_alternate;
} }
else if (!last_link_other.isEmpty()) { else if (!last_link_other.isEmpty()) {
new_message.m_url = last_link_other; new_message.m_url = last_link_other;
} }
else if (!new_message.m_enclosures.isEmpty()) { else if (!new_message.m_enclosures.isEmpty()) {
new_message.m_url = new_message.m_enclosures.first().m_url; new_message.m_url = new_message.m_enclosures.first().m_url;
} }
return new_message; return new_message;
} }
QString AtomParser::messageAuthor(const QDomElement& msg_element) const { QString AtomParser::messageAuthor(const QDomElement& msg_element) const {
QDomNodeList authors = msg_element.elementsByTagNameNS(m_atomNamespace, QSL("author")); QDomNodeList authors = msg_element.elementsByTagNameNS(m_atomNamespace, QSL("author"));
QStringList author_str; QStringList author_str;
for (int i = 0; i < authors.size(); i++) { for (int i = 0; i < authors.size(); i++) {
QDomNodeList names = authors.at(i).toElement().elementsByTagNameNS(m_atomNamespace, QSL("name")); QDomNodeList names = authors.at(i).toElement().elementsByTagNameNS(m_atomNamespace, QSL("name"));
if (!names.isEmpty()) { if (!names.isEmpty()) {
author_str.append(names.at(0).toElement().text()); author_str.append(names.at(0).toElement().text());
} }
} }
return author_str.join(", "); return author_str.join(", ");
} }
QDomNodeList AtomParser::messageElements() { QDomNodeList AtomParser::messageElements() {
return m_xml.elementsByTagNameNS(m_atomNamespace, QSL("entry")); return m_xml.elementsByTagNameNS(m_atomNamespace, QSL("entry"));
} }