From 7161156a2eded2240cd379bbc4572e53340f8e6f Mon Sep 17 00:00:00 2001 From: Penelope Gomez / Pogmommy Date: Thu, 2 Mar 2023 02:29:47 -0700 Subject: [PATCH] attempt at overhauling xpath selectors attempted to integrate more-static attributes will likely break next week --- ref/classes/xpaths.js | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/ref/classes/xpaths.js b/ref/classes/xpaths.js index 5473cfd..8a317d0 100644 --- a/ref/classes/xpaths.js +++ b/ref/classes/xpaths.js @@ -1,36 +1,37 @@ class TweetPaths { constructor(orig,i) { if (orig == 'home') { - this.timeLine = "//*[@id='react-root']/div/div/div[2]/main/div/div/div/div/div/div[3]/div/div/section/div/div"; //the immediate parent div of all tweets + this.timeLine = "/html//section/div/div"; //FIXED //the immediate parent div of all tweets } else if (orig == 'thread') { - this.timeLine = "/html/body/div[1]/div/div/div[2]/main/div/div/div/div[1]/div/section/div/div" //thread tweet xpath + this.timeLine = "/html//section/div/div" //FIXED //thread tweet xpath } this.tweet = (`${this.timeLine}/div`); //the div containing individual tweet content: (tweetXpath + '[1]') - this.containsDivs = (`${this.timeLine}[count(div) > 1]`) //timeline conntaining divs - this.path = `${this.tweet}[${orig == 'home' ? 1 : i}]`; + this.containsDivs = (`${this.timeLine}[count(div[@data-testid='cellInnerDiv']) > 1]`) //timeline conntaining divs + this.path = `${this.tweet}[${orig == 'home' ? 1 : i}][@data-testid='cellInnerDiv']`; //FIXED //the following xpaths follow an individual tweet xpath: (tweetXpath + '[1]' + variableXPath) - this.urlCard = `${this.path}/div/div/div/article/div/div/div/div[*]/div[*]/div[*]/div[*]/div/div[2]/a` - this.tweeterHandle = `${this.path}/div/div/div/article/div/div/div/div[2]/div[2]/div[1]/div/div/div[1]/div/div/div[2]/div/div[1]/a/div/span[contains(text(),'@')]` /*text label containing tweeter's handle*/ - this.tweeterName = `${this.path}/div/div/div/article/div/div/div/div[2]/div[2]/div[1]/div/div/div[1]/div/div/div[1]/div/a/div/div[1]/span` /*text label containing tweeter's name*/ - this.quoteTweetHandle = `${this.path}/div/div/div/article/div/div/div/div[2]/div[2]/div[2]/div[2]/div[*]/div[2]/div/div[1]/div/div/div/div/div/div[2]/div[1]/div/div/div/span`; //xpath to text label that reveals if a tweet is a quote tweet (leads to the quote tweeted user's handle) - this.quoteTweetContent = `${this.path}/div/div/div/article/div/div/div/div[2]/div[2]/div[2]/div[2]/div[*]/div[2][div/div[1]/div/div/div/div/div/div[2]/div[1]/div/div/div/span]` /*xpath to locate entirety of Quote Tweeted Content*/ + this.tweeterHandle = `${this.path}//article//div[@data-testid='User-Names']//a[not(contains(@href,'status'))]/div/span[starts-with(text(),'@')]` //FIXED /*text label containing tweeter's handle*/ //FIXED + this.tweeterName = `${this.path}//article//div[@data-testid='User-Names']/div[1]//a[not(contains(@href,'status'))]//div[1]/span` //FIXED /*text label containing tweeter's name*/ //FIXED + //this.quoteTweetHandle = `${this.path}//article/div/div/div/div[2]/div[2]/div[2]/div[2]/div[*]/div[2]/div/div[1]/div/div/div/div/div/div[2]/div[1]/div/div/div/span`; //xpath to text label that reveals if a tweet is a quote tweet (leads to the quote tweeted user's handle) + this.quoteTweetContent = `${this.path}//article//div[div/span[.="Quote Tweet"]]/div[2]` //FIXED /*xpath to locate entirety of Quote Tweeted Content*/ - this.ageRestricted = `${this.path}/div/div/div/article//span/span[1]/span[contains(text(),'Age-restricted')]`; //xpath that reveals if tweet is age-restricted (& therefore not visible) - this.pinnedTweet = `${this.path}/div/div/div/article/div/div/div/div[1]/div/div/div/div/div[2]/div/div/div/span[contains(text(),'Pinned')]` /*//xpath that reveals if tweet is pinned*/ + this.ageRestricted = `${this.path}//article//span[span[span[contains(text(),'Age-restricted')]]]/a[contains(@href,'notices')]`; //FIXED //xpath that reveals if tweet is age-restricted (& therefore not visible) + this.pinnedTweet = `${this.path}//article//div[@data-testid='socialContext']/span[contains(text(),'Pinned')]`; //FIXED /*//xpath that reveals if tweet is pinned*/ - this.tweetText = `${this.path}//div[@data-testid='tweetText']`; //xpath that leads to div containing all tweet text + this.tweetText = `${this.path}//article//div[@data-testid='tweetText']`; //xpath that leads to div containing all tweet text //FIXED + this.urlCard = `${this.path}//article//a[div[contains(@data-testid,'card')]]` //FIXED //this.emoji = this.path + "//img"; //xpath that leads to div containing all tweet text - this.tweetURL = `${this.path}//div[3]/a[contains(@href, 'status')]`; //xpath to tweet url - this.video = `${this.path}/div/div/div/article/div/div/div/div[*]/div[*]/div[*]/div[*]/div[1]/div[1]//video`; //xpath that leads to videos that are not parts of quoted content - this.singleImage = `${this.path}//div[1]/div/div/div/div/a/div/div[2]/div/img[@alt='Image']`; //xpath to image that reveals if a tweet has one image - this.multiImage = `${this.path}//div[2]/div[2]/div[2]/div[2]/div/div/div/div/div[2]/div/div[1]/div[1]//a/div/div/img[@alt='Image']`; //xpath to image that reveals if a tweet has more than one image + this.tweetURL = `${this.path}//article//div[@data-testid='User-Names']//div[3]//a[contains(@href,'status')]`; //xpath to tweet url //FIXED + this.video = `${this.path}/html//section/div/div/div[*][@data-testid='cellInnerDiv']//article//div[2]/div[3]/div[1]/div[1]//video`; //FIXED //xpath that leads to videos that are not parts of quoted content + this.singleImage = `${this.path}//article//div[2]/div[3]/div[1]/div[1]/div/div[1]//a//div[2]/div[@data-testid='tweetPhoto']/img`; //FIXED //xpath to image that reveals if a tweet has one image + //this.singleImage = `${this.path}//div[1]/div/div/div/div/a/div/div[2]/div/img[@alt='Image']`; //xpath to image that reveals if a tweet has one image + this.multiImage = `${this.path}//article//div[3]/div[1]//div[1]/div[2]/div/div[2]/div[1]//a//div[@data-testid='tweetPhoto']/img`; //FIXED //xpath to image that reveals if a tweet has more than one image if (orig == 'home') { //home timeline only - this.detectThread = `${this.path}/div/div/div/article/div/a/div/div[2]/div/span`; //xpath to text label that reveals if a tweet is a part of a thread from home timeline - this.detectRT = `${this.path}/div/div/div/article/div/div/div/div[1]/div/div/div/div/div[2]/div/div/div/a/span`; //xpath to text label that reveals if a tweet is a retweet + this.detectThread = `${this.path}//article//a[contains(@href,'status')]//span[contains(text(),'Show this thread')]`; //xpath to text label that reveals if a tweet is a part of a thread from home timeline //FIXED + this.detectRT = `${this.path}//article//a[span[@data-testid='socialContext']]/span[contains(., 'Retweeted')]`; //xpath to text label that reveals if a tweet is a retweet //FIXED } else if (orig == 'thread'){ //thread timeline only - this.entryTweet = `${this.path}/div/div/div/article/div/div/div/div[3]/div[*]/div[@role='group']` /*xpath that reveals if tweet is open in thread //openThreadTweetTSXPath*/ + this.notEntryTweet = `${this.path}//article//div[@data-testid='User-Names']//time` //FIXED /*xpath that reveals if tweet is open in thread //openThreadTweetTSXPath*/ //fixed } //the following xpaths follow an individual tweet xpath and are used to find all images in a tweet with multiple images: (tweetXpath + '[1]' + multiImage1XPath + x + multiImage2XPath + y + multiImage3XPath) @@ -39,9 +40,9 @@ class TweetPaths { // 2,1 = second image // 2,2 = third image // 1,2 = fourth image - this.multiImage1 = "//div[2]/div[2]/div[2]/div[2]/div/div/div/div/div[2]/div/div["; + this.multiImage1 = "//article//div[3]/div[1]//div[1]/div[2]/div/div["; this.multiImage2 = "]/div["; - this.multiImage3 = "]//a/div/div/img[@alt='Image']"; + this.multiImage3 = "]//a//div[@data-testid='tweetPhoto']/img"; } tweetElement(i, pathFromTweet) { let xPath = (this.path + pathFromTweet);