rssguard/src/librssguard/services/standard/atomparser.cpp
2021-07-25 09:08:43 +02:00

156 lines
5.2 KiB
C++
Executable file

// For license of this file, see <project-root-folder>/LICENSE.md.
#include "services/standard/atomparser.h"
#include "miscellaneous/application.h"
#include "miscellaneous/textfactory.h"
#include "network-web/webfactory.h"
#include "services/standard/definitions.h"
#include "exceptions/applicationexception.h"
AtomParser::AtomParser(const QString& data) : FeedParser(data) {
QString version = m_xml.documentElement().attribute(QSL("version"));
if (version == QSL("0.3")) {
m_atomNamespace = QSL("http://purl.org/atom/ns#");
}
else {
m_atomNamespace = QSL("http://www.w3.org/2005/Atom");
}
}
QString AtomParser::feedAuthor() const {
QDomNodeList top_level_nodes = m_xml.documentElement().childNodes();
QStringList author_str;
for (int i = 0; i < top_level_nodes.size(); i++) {
auto elem = top_level_nodes.at(i).toElement();
if (elem.localName() != QSL("author") || elem.namespaceURI() != m_atomNamespace) {
continue;
}
QDomNodeList names = elem.elementsByTagNameNS(m_atomNamespace, QSL("name"));
if (!names.isEmpty()) {
const QString name = names.at(0).toElement().text();
if (!name.isEmpty() && !author_str.contains(name)) {
author_str.append(name);
}
}
}
return author_str.join(", ");
}
Message AtomParser::extractMessage(const QDomElement& msg_element, QDateTime current_time) const {
Message new_message;
QString title = textsFromPath(msg_element, m_atomNamespace, QSL("title"), true).join(QSL(", "));
QString summary = rawXmlChild(msg_element.elementsByTagNameNS(m_atomNamespace, QSL("content")).at(0).toElement());
if (summary.isEmpty()) {
summary = rawXmlChild(msg_element.elementsByTagNameNS(m_atomNamespace, QSL("summary")).at(0).toElement());
if (summary.isEmpty()) {
summary = rawXmlChild(msg_element.elementsByTagNameNS(m_mrssNamespace, QSL("description")).at(0).toElement());
}
}
// Now we obtained maximum of information for title & description.
if (title.isEmpty() && summary.isEmpty()) {
// BOTH title and description are empty, skip this message.
throw ApplicationException(QSL("Not enough data for the message."));
}
// Title is not empty, description does not matter.
new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(title));
new_message.m_contents = summary;
new_message.m_author = qApp->web()->unescapeHtml(messageAuthor(msg_element));
new_message.m_customId = msg_element.elementsByTagNameNS(m_atomNamespace, QSL("id")).at(0).toElement().text();
QString raw_contents;
QTextStream str(&raw_contents);
str.setCodec(DEFAULT_FEED_ENCODING);
msg_element.save(str, 0, QDomNode::EncodingPolicy::EncodingFromTextStream);
new_message.m_rawContents = raw_contents;
QString updated = textsFromPath(msg_element, m_atomNamespace, QSL("updated"), true).join(QSL(", "));
if (updated.isEmpty()) {
updated = textsFromPath(msg_element, m_atomNamespace, QSL("modified"), true).join(QSL(", "));
}
// Deal with creation date.
new_message.m_created = TextFactory::parseDateTime(updated);
new_message.m_createdFromFeed = !new_message.m_created.isNull();
if (!new_message.m_createdFromFeed) {
// Date was NOT obtained from the feed, set current date as creation date for the message.
new_message.m_created = current_time;
}
// Deal with links
QDomNodeList elem_links = msg_element.toElement().elementsByTagNameNS(m_atomNamespace, QSL("link"));
QString last_link_alternate, last_link_other;
for (int i = 0; i < elem_links.size(); i++) {
QDomElement link = elem_links.at(i).toElement();
QString attribute = link.attribute(QSL("rel"));
if (attribute == QSL("enclosure")) {
new_message.m_enclosures.append(Enclosure(link.attribute(QSL("href")), link.attribute(QSL("type"))));
qDebugNN << LOGSEC_CORE
<< "Found enclosure"
<< QUOTE_W_SPACE(new_message.m_enclosures.last().m_url)
<< "for the message.";
}
else if (attribute.isEmpty() || attribute == QSL("alternate")) {
last_link_alternate = link.attribute(QSL("href"));
}
else {
last_link_other = link.attribute(QSL("href"));
}
}
// Obtain MRSS enclosures.
new_message.m_enclosures.append(mrssGetEnclosures(msg_element));
if (!last_link_alternate.isEmpty()) {
new_message.m_url = last_link_alternate;
}
else if (!last_link_other.isEmpty()) {
new_message.m_url = last_link_other;
}
else if (!new_message.m_enclosures.isEmpty()) {
new_message.m_url = new_message.m_enclosures.first().m_url;
}
return new_message;
}
QString AtomParser::messageAuthor(const QDomElement& msg_element) const {
QDomNodeList authors = msg_element.elementsByTagNameNS(m_atomNamespace, QSL("author"));
QStringList author_str;
for (int i = 0; i < authors.size(); i++) {
QDomNodeList names = authors.at(i).toElement().elementsByTagNameNS(m_atomNamespace, QSL("name"));
if (!names.isEmpty()) {
author_str.append(names.at(0).toElement().text());
}
}
return author_str.join(", ");
}
QString AtomParser::atomNamespace() const {
return m_atomNamespace;
}
QDomNodeList AtomParser::messageElements() {
return m_xml.elementsByTagNameNS(m_atomNamespace, QSL("entry"));
}