156 lines
5.2 KiB
C++
Executable file
156 lines
5.2 KiB
C++
Executable file
// For license of this file, see <project-root-folder>/LICENSE.md.
|
|
|
|
#include "services/standard/atomparser.h"
|
|
|
|
#include "miscellaneous/application.h"
|
|
#include "miscellaneous/textfactory.h"
|
|
#include "network-web/webfactory.h"
|
|
#include "services/standard/definitions.h"
|
|
|
|
#include "exceptions/applicationexception.h"
|
|
|
|
AtomParser::AtomParser(const QString& data) : FeedParser(data) {
|
|
QString version = m_xml.documentElement().attribute(QSL("version"));
|
|
|
|
if (version == QSL("0.3")) {
|
|
m_atomNamespace = QSL("http://purl.org/atom/ns#");
|
|
}
|
|
else {
|
|
m_atomNamespace = QSL("http://www.w3.org/2005/Atom");
|
|
}
|
|
}
|
|
|
|
QString AtomParser::feedAuthor() const {
|
|
QDomNodeList top_level_nodes = m_xml.documentElement().childNodes();
|
|
QStringList author_str;
|
|
|
|
for (int i = 0; i < top_level_nodes.size(); i++) {
|
|
auto elem = top_level_nodes.at(i).toElement();
|
|
|
|
if (elem.localName() != QSL("author") || elem.namespaceURI() != m_atomNamespace) {
|
|
continue;
|
|
}
|
|
|
|
QDomNodeList names = elem.elementsByTagNameNS(m_atomNamespace, QSL("name"));
|
|
|
|
if (!names.isEmpty()) {
|
|
const QString name = names.at(0).toElement().text();
|
|
|
|
if (!name.isEmpty() && !author_str.contains(name)) {
|
|
author_str.append(name);
|
|
}
|
|
}
|
|
}
|
|
|
|
return author_str.join(", ");
|
|
}
|
|
|
|
Message AtomParser::extractMessage(const QDomElement& msg_element, QDateTime current_time) const {
|
|
Message new_message;
|
|
QString title = textsFromPath(msg_element, m_atomNamespace, QSL("title"), true).join(QSL(", "));
|
|
QString summary = rawXmlChild(msg_element.elementsByTagNameNS(m_atomNamespace, QSL("content")).at(0).toElement());
|
|
|
|
if (summary.isEmpty()) {
|
|
summary = rawXmlChild(msg_element.elementsByTagNameNS(m_atomNamespace, QSL("summary")).at(0).toElement());
|
|
|
|
if (summary.isEmpty()) {
|
|
summary = rawXmlChild(msg_element.elementsByTagNameNS(m_mrssNamespace, QSL("description")).at(0).toElement());
|
|
}
|
|
}
|
|
|
|
// Now we obtained maximum of information for title & description.
|
|
if (title.isEmpty() && summary.isEmpty()) {
|
|
// BOTH title and description are empty, skip this message.
|
|
throw ApplicationException(QSL("Not enough data for the message."));
|
|
}
|
|
|
|
// Title is not empty, description does not matter.
|
|
new_message.m_title = qApp->web()->unescapeHtml(qApp->web()->stripTags(title));
|
|
new_message.m_contents = summary;
|
|
new_message.m_author = qApp->web()->unescapeHtml(messageAuthor(msg_element));
|
|
new_message.m_customId = msg_element.elementsByTagNameNS(m_atomNamespace, QSL("id")).at(0).toElement().text();
|
|
|
|
QString raw_contents;
|
|
QTextStream str(&raw_contents);
|
|
|
|
str.setCodec(DEFAULT_FEED_ENCODING);
|
|
|
|
msg_element.save(str, 0, QDomNode::EncodingPolicy::EncodingFromTextStream);
|
|
new_message.m_rawContents = raw_contents;
|
|
|
|
QString updated = textsFromPath(msg_element, m_atomNamespace, QSL("updated"), true).join(QSL(", "));
|
|
|
|
if (updated.isEmpty()) {
|
|
updated = textsFromPath(msg_element, m_atomNamespace, QSL("modified"), true).join(QSL(", "));
|
|
}
|
|
|
|
// Deal with creation date.
|
|
new_message.m_created = TextFactory::parseDateTime(updated);
|
|
new_message.m_createdFromFeed = !new_message.m_created.isNull();
|
|
|
|
if (!new_message.m_createdFromFeed) {
|
|
// Date was NOT obtained from the feed, set current date as creation date for the message.
|
|
new_message.m_created = current_time;
|
|
}
|
|
|
|
// Deal with links
|
|
QDomNodeList elem_links = msg_element.toElement().elementsByTagNameNS(m_atomNamespace, QSL("link"));
|
|
QString last_link_alternate, last_link_other;
|
|
|
|
for (int i = 0; i < elem_links.size(); i++) {
|
|
QDomElement link = elem_links.at(i).toElement();
|
|
QString attribute = link.attribute(QSL("rel"));
|
|
|
|
if (attribute == QSL("enclosure")) {
|
|
new_message.m_enclosures.append(Enclosure(link.attribute(QSL("href")), link.attribute(QSL("type"))));
|
|
qDebugNN << LOGSEC_CORE
|
|
<< "Found enclosure"
|
|
<< QUOTE_W_SPACE(new_message.m_enclosures.last().m_url)
|
|
<< "for the message.";
|
|
}
|
|
else if (attribute.isEmpty() || attribute == QSL("alternate")) {
|
|
last_link_alternate = link.attribute(QSL("href"));
|
|
}
|
|
else {
|
|
last_link_other = link.attribute(QSL("href"));
|
|
}
|
|
}
|
|
|
|
// Obtain MRSS enclosures.
|
|
new_message.m_enclosures.append(mrssGetEnclosures(msg_element));
|
|
|
|
if (!last_link_alternate.isEmpty()) {
|
|
new_message.m_url = last_link_alternate;
|
|
}
|
|
else if (!last_link_other.isEmpty()) {
|
|
new_message.m_url = last_link_other;
|
|
}
|
|
else if (!new_message.m_enclosures.isEmpty()) {
|
|
new_message.m_url = new_message.m_enclosures.first().m_url;
|
|
}
|
|
|
|
return new_message;
|
|
}
|
|
|
|
QString AtomParser::messageAuthor(const QDomElement& msg_element) const {
|
|
QDomNodeList authors = msg_element.elementsByTagNameNS(m_atomNamespace, QSL("author"));
|
|
QStringList author_str;
|
|
|
|
for (int i = 0; i < authors.size(); i++) {
|
|
QDomNodeList names = authors.at(i).toElement().elementsByTagNameNS(m_atomNamespace, QSL("name"));
|
|
|
|
if (!names.isEmpty()) {
|
|
author_str.append(names.at(0).toElement().text());
|
|
}
|
|
}
|
|
|
|
return author_str.join(", ");
|
|
}
|
|
|
|
QString AtomParser::atomNamespace() const {
|
|
return m_atomNamespace;
|
|
}
|
|
|
|
QDomNodeList AtomParser::messageElements() {
|
|
return m_xml.elementsByTagNameNS(m_atomNamespace, QSL("entry"));
|
|
}
|