This repository has been archived on 2025-03-19. You can view files and clone it, but cannot push or open issues or pull requests.
TwitToMast/TwitToMast.js
Penelope Gomez / Pogmommy 78f84dc2d0 fix tweet text xpath
now catches primary tweet text and not quote tweet text
2023-03-02 16:55:50 -07:00

273 lines
No EOL
13 KiB
JavaScript

//REQUIREMENTS
const webdriver = require('selenium-webdriver');
const chrome = require('selenium-webdriver/chrome');
const fs = require('fs');
//LOCAL REQUIREMENTS
const support = require('./ref/functions/support.js');
const debuglog = support.debuglog;
const elements = require('./ref/functions/elements.js');
const csv = require('./ref/functions/csv.js');
const mastodon = require('./ref/functions/mastodon.js');
const Args = require('./ref/classes/arguments.js');
const args = new Args();
const Formats = require('./ref/classes/formats.js');
const format = new Formats();
const Tweets = require('./ref/classes/tweets.js');
//LOG ARGUMENTS
support.validateArgs();
support.logArguments();
//SETUP SAVE DIRECTORY VARIABLES
const localDir = './';
const imgSavePath = (`${localDir}imgs/${args.userName}/`);
if (!fs.existsSync(imgSavePath)){
fs.mkdirSync(imgSavePath);
}
const csvSaveDir = (`${localDir}csv/`);
const csvFileName = (`${csvSaveDir + args.userName}.csv`);
if (!fs.existsSync(csvSaveDir)){
fs.mkdirSync(csvSaveDir);
}
var csvOutput = "_";
debuglog(`csv file name: ${csvFileName}`,2);
debuglog(`user image save path${imgSavePath}`,2);
//SETUP HEADLESS WEBDRIVER
const screen = {
width: 1920,
height: 1080
};
let chromeOptions = new chrome.Options().addArguments(['user-agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36']);
if (!args.displayBrowser) {chromeOptions.headless().windowSize(screen);}
var driver = new webdriver.Builder()
.forBrowser('chrome')
.setChromeOptions(chromeOptions)
.build();
//START WEBDRIVER AND ZOOM OUT
debuglog("starting webdriver...",2);
driver.get(`https://mobile.twitter.com/${args.userName}/`);
debuglog("started webdriver!",2);
driver.executeScript("document.body.style.zoom='35%'");
(async function(){
//OPEN CSV FILE, CREATE IF NEEDED
debuglog("opening csv",2);
fs.readFile(csvFileName, "utf-8", (err, data) => {
if (err) {
debuglog("Could not get CSV Data!", 2);
debuglog(err, 2);
csv.initCSV(csvFileName);
} else {
debuglog(`CSV OUTPUT IS:\n${data}`, 2);
csvOutput = data;
}
});
debuglog("opened csv",2);
var processedTweets = [];//DEFINE ARRAY THAT WILL BE POPULATED WITH TWEETS PROCESSED DURING THIS SESSION
var allTweetsArray = [];//INITIALIZE THE POST QUEUE
for (var t = 1; t < (parseInt(args.tweetCount) + 1); t++) {//LOOP THE NUMBER OF TIMES SPECIFIED IN ARGS
debuglog(format.notice(`Processing tweet #${t} of ${args.tweetCount}...`),1);
var homeTweet = new Tweets("home",t); //RESET HOME TWEET FOR PROCESSING
var threadTweet = new Tweets("thread",1); //RESET HOME TWEET FOR PROCESSING
var threadTweetArray = []; //ARRAY OF THREAD TWEET OBJECTS
await elements.waitFor(driver,homeTweet.x.containsDivs,args.timeOut); //WAIT FOR TIMELINE TO POPULATE ITSELF WITH TWEETS
//REMOVE NON-PRIMARY TWEETS
debuglog("Filtering out disabled tweets...",2)
while (!homeTweet.keep) {
await elements.waitFor(driver,homeTweet.x.tweetURL,args.timeOut); //WAIT FOR TIMELINE TO POPULATE ITSELF WITH TWEETS
var stupidPromptExists = await elements.doesExist(driver,"/html//div[@data-testid='sheetDialog']//div[@data-testid='app-bar-close']"); //CHECK IF NOTIFICATION SCREEN APPEARS
if (stupidPromptExists) {
var stupidPrompt = await elements.getElement(driver,"/html//div[@data-testid='sheetDialog']//div[@data-testid='app-bar-close']");
await driver.executeScript("arguments[0].click();", stupidPrompt); //DISMISS NOTIFICATION SCREEN
}
debuglog(`xpath: ${homeTweet.x.path}`,2) //PRINT XPATH OF CURRENT TWEET
await elements.waitFor(driver, homeTweet.x.path,args.timeOut); //WAIT UNTIL CURRENT TWEET IS LOADED
await homeTweet.identifyElements(driver); //IDENTIFY WHAT ELEMENTS EXIST WITHIN TWEET
debuglog(`tweet properties: isRT: ${homeTweet.isAR}, isAR: ${homeTweet.isAR}, isPin: ${homeTweet.isPin}, isQT: ${homeTweet.isQT}, isThread: ${homeTweet.isThread}`)
debuglog(homeTweet.isRT || homeTweet.isAR);
debuglog(homeTweet.isPin);
debuglog(!args.enableQuotes && homeTweet.isQT);
debuglog((!args.enableThreads && homeTweet.isThread));
if ((((homeTweet.isRT || homeTweet.isAR) || homeTweet.isPin) || (!args.enableQuotes && homeTweet.isQT)) || (!args.enableThreads && homeTweet.isThread)) {//IF TWEET IS DISABLED, MARK FOR REMOVAL
debuglog(`removing tweet ${homeTweet.url}`,2);
homeTweet.keep = false; //INDICATE THAT WE ARE NOT READY TO EXIT, CURRENT TWEET IS NOT ELIGIBLE FOR REPOST
await driver.executeScript(`var element = document.evaluate(\`${homeTweet.x.path}\`,document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue.remove();`); //REMOVE TWEET FROM DOM TO PROCESS NEXT
homeTweet = new Tweets("home",1); //RESET HOME TWEET OBJECT TO MAKE NEW TWEET READY FOR CHECKING
} else {
debuglog("keeping tweet! It is eligible for processing");
homeTweet.keep = true; //INDICATE THAT WE ARE READY TO EXIT, CURRENT TWEET IS ELIGIBLE FOR REPOST
}
}
processedTweets.forEach(function(u, uindex) { //CHECK IF TWEET HAS BEEN PROCESSED IN THIS SESSION
debuglog(`${u.url} exists at index ${uindex} ${(u.url == homeTweet.url)}`);
if (u.url == homeTweet.url) {homeTweet.processed = true;}
})
debuglog(`tweet has been proccessed: ${homeTweet.processed}`);
if (!homeTweet.processed && !csvOutput.includes(homeTweet.url)) { //IF CSV DOES NOT CONTAIN THE TWEET URL
debuglog(`Tweet #${homeTweet.no} has not been processed.`, 1);
await homeTweet.getElementProperties(driver);
homeTweet.compileText();//COMPILE TEXT FOR CROSS-POST
homeTweet.printPreview();//PRINT TWEET PREVIEW
await homeTweet.downloadImages(driver,imgSavePath);//DOWNLOAD IMAGES FROM TWITTER
await homeTweet.uploadImages(imgSavePath);//UPLOAD IMAGES TO MASTODON
if (homeTweet.isThread){ //IF TWEET IS A THREAD, RUN TWEET THREAD STUFF
var threadTweet = new Tweets("thread",1); //CREATE NEW THREAD TWEET OBJECT
var threadTweetArray = []; //ARRAY OF THREAD TWEET OBJECTS
debuglog(`THREAD TIMELINE: ${threadTweet.x.timeLine}`,2); //XPATH OF THREAD TIMELINE
driver.executeScript(`window.open("${homeTweet.url}");`); //OPEN THREAD IN NEW TAB
var parent = await driver.getWindowHandle();
var windows = await driver.getAllWindowHandles();
await driver.switchTo().window(windows[1]); //SWITCH TO NEW TAB WITH THREAD
await elements.waitFor(driver,threadTweet.x.containsDivs,args.timeOut);
await driver.executeScript("document.body.style.zoom='20%'");
await driver.executeScript("window.scrollTo(0, 0)");
await elements.waitFor(driver,threadTweet.x.containsDivs,args.timeOut); //WAIT UNTIL THREAD IS POPULATED WITH DIVS
for (var r = 1; !threadTweet.entryNotOpen; r++) {//LOOP UNTIL INDICATED THAT WE'VE REACHED THE ENTRY TWEET
threadTweet = new Tweets("thread", r); //RESETS ALL THREAD TWEET VARIABLES TO START FRESH
debuglog(threadTweet.x.path,2); //PRINTS XPATH TO CURRENT ITERATE DIV
threadTweet.entryNotOpen = await elements.doesExist(driver,threadTweet.x.notEntryTweet) // CHECKS IF THE CURRENT ITERATE DIV IS THE ONE USED TO OPEN THE THREAD
if (threadTweet.entryNotOpen){ //CURRENT ITERATE DIV DOES NOT CONTAIN THE TWEET USED TO OPEN THE THREAD
debuglog(`current tweet #${threadTweet.no} is not entry to thread`,2);
await threadTweet.identifyElements(driver); //IDENTIFIES WHAT THE TWEET CONTAINS
processedTweets.forEach(function(u, uindex) { //CHECK IF THREAD TWEET HAS BEEN PROCESSED IN THIS SESSION
debuglog(`${u.url} exists at index ${uindex} ${(u.url == threadTweet.url)}`);
if (u.url == threadTweet.url) { //IF TWEET HAS BEEN PROCESSED AS PART OF ANOTHER THREAD, PASS ON THE IMAGE ID ARRAY AND FLAG
threadTweet.processed = true;
threadTweet.imgArray = u.imgArray;
debuglog(`thread tweet (${threadTweet.url}) adopted image array from matching tweet at url (${u.url})`,2)
}
})
debuglog(csvOutput);
if (!csvOutput.includes(threadTweet.url)) {//CODE TO RUN IF TWEET IS NOT IN CSV
debuglog(`Thread tweet #${threadTweet.no} is not in CSV.`, 1);
await threadTweet.getElementProperties(driver); //COMPILE HEADER, BODY, AND FOOTER
threadTweet.compileText();//COMPILE TEXT FOR CROSS-POST
threadTweet.printPreview()//PRINT TWEET PREVIEW
if (!threadTweet.processed) {//IF IMAGES HAVE ALREADY BEEN TRANSFERRED FROM TWITTER TO MASTODON, SKIP TO PREVENT SLOWDOWN & AVOID RATE LIMIT
await threadTweet.downloadImages(driver,imgSavePath);
await threadTweet.uploadImages(imgSavePath);
}
}
threadTweetArray.push(threadTweet);//SENDS THREAD TWEET TO LIST OF CURRENT THREAD'S TWEETS
processedTweets.push(threadTweet);//SENDS THREAD TWEET TO LIST OF ALL PROCESSED TWEETS WITHOUT REGARD TO ORDER OR DUPLICATES
}
}
for (var a = 0;a < threadTweetArray.length; a++) { //FOR EVERY THREAD TWEET, LINK TO PARENT IF NEEDED & MARK AS PARENT
if (a != 0) {
threadTweetArray[a].prompturl = threadTweetArray[a - 1].url
threadTweetArray[a].isReply = true;
}
}
homeTweet.prompturl = threadTweetArray[threadTweetArray.length-1].url; //SET HOME (ENTRY) TWEET'S PARENT URL TO THAT OF LAST TWEET IN THREAD
homeTweet.isReply = true; //MARK HOME (ENTRY) TWEET AS PARENT
allTweetsArray.push(homeTweet); //HOME TWEET NEEDS TO BE SENT TO POST QUEUE BEFORE THREAD TWEETS
for (var aa = threadTweetArray.length;aa > 0; aa--) {
allTweetsArray.push(threadTweetArray[aa-1]); //SEND THREAD TWEETS TO POST QUEUE
}
await driver.close(); //CLOSE THREAD TAB
await driver.switchTo().window(parent); //MAKE SURE MAIN TIMELINE TAB IS OPEN
} else {
allTweetsArray.push(homeTweet); //SEND HOME TWEET TO POST QUEUE EVEN IF THERE ARE NO THREAD TWEETS
}
processedTweets.push(homeTweet); //SEND HOME TWEET TO LIST OF ALL PROCESSED TWEETS WITHOUT REGARD TO ORDER OR DUPLICATES
} else { //HOME TWEET EXISTS IN CSV
debuglog(`Tweet #${homeTweet.no} has already been processed.`,1); //HOME TWEET EXISTS IN CSV
}
if (homeTweet.no < args.tweetCount) {driver.executeScript(`var element = document.evaluate(\`${homeTweet.x.path}\`,document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue.remove();`);}//REMOVE TWEET FROM DOM TO PROCESS NEXT TWEET
}
var csvArray = csvOutput.split(/[\r\n]+/);
for (var b = 0;b < (allTweetsArray.length); b++) {//SCANS CSV FOR TWEET CORSSPOST DATA TO ADOPT
allTweetsArray[b].posted = csvArray.some((row,rowindex) => { //LOOKS THROUGH CSV FOR POST'S ID IN CASE IT HAS BEEN PROCESSED
rowArr = row.split(",");
if (rowArr[0] == allTweetsArray[b].url){
debuglog(`URL Exists at index ${rowindex} of csv`,2);
allTweetsArray[b].id = rowArr[1];
return true;
}
})
csvArray.some((row,rowindex) => {//LOOKS THROUGH CSV FOR TWEET'S PARENT
rowArr = row.split(",");
if (rowArr[0] === allTweetsArray[b].prompturl){
console.log(rowArr[0]);
console.log(allTweetsArray[b].prompturl);
debuglog(`URL Exists at index ${rowindex} of csv`,2);
allTweetsArray[b].prompt = rowArr[1];
return true;
}
})
}
allTweetsArray = allTweetsArray.filter((v,i,a)=>a.findLastIndex(v2=>(v2.url === v.url))===i) //FILTERS OUR DUPLICATE TWEETS WITH PREFERENCE FOR MOST RECENTLY ADDED
allTweetsArray = [... allTweetsArray.reverse()] //REVERSE TWEET ARRAY ORDER SO THEY CAN BE NEATLY PROCESSED IN REVERSE ORDER
for (t = 0;t < allTweetsArray.length;t++) { //ITERATES THROUGH LIST OF PROCESSED TWEETS, SENDS UNPOSTED TWEETS TO MASTODON, AND REMEMBERS ITS ID FOR FUTURE REPLIES
twt = allTweetsArray[t]
if (!twt.posted) {
if (twt.isReply){
if ((twt.prompt == 0) && (twt.prompturl != 0)){ //ONLY LOOKS FOR PARENT ID IF PROMPT IS UNFULFILLED BUT TWEET EXPECTS A PROMPT
allTweetsArray.some((cue,cueindex) =>{
if (twt.prompturl == cue.url){
twt.prompt = cue.id;
return true;
}
})
}
}
twt.id = await mastodon.postStatus(twt,csvFileName,csvOutput);
}
}
/*allTweetsArray.forEach((twt,index) => {//DEBUGGING, PRINTS INFO ON TWEETS
debuglog(`${String(index).padStart(2,0)}: ${twt.body.substring(0,20)}..., ${twt.url}, ${twt.prompturl}, ${twt.id}, ${twt.prompt}, is reply: ${twt.isReply}, posted: ${twt.posted}`,2)
debuglog(`${String(index).padStart(2,0)} ${twt.handle}: ${twt.body.substring(0,40).concat("...").padEnd(45," ").substring(0,43)}, ${twt.imgCount}, ${twt.imgArray}`,2);
});*/
debuglog("Cleaning up...",1); //REMOVE SAVED IMAGES
fs.rm(imgSavePath, { recursive: true, force: true }, (error) => {
debuglog(error,2);
});
debuglog(format.bold(`Finished scraping @${args.userName}'s tweets`),1) //CLOSE WEBDRIVER
setTimeout(() => {
driver.quit();
}, 100);
}());