This commit is contained in:
Penelope Gomez / Pogmommy 2023-02-14 14:35:57 -07:00
parent 97cc140175
commit ed9d5c68fd
16 changed files with 899 additions and 3626 deletions

View file

@ -1,210 +1,41 @@
//REQUIREMENTS
const webdriver = require('selenium-webdriver');
const chrome = require('selenium-webdriver/chrome');
const By = webdriver.By;
const until = webdriver.until;
const fs = require('fs');
const csvWriter = require('csv-write-stream');
const Masto = require('mastodon');
const client = require('https');
const request = require("request");
const Q = require("q");
//VALIDATE INPUT
//LOCAL REQUIREMENTS
const support = require('./ref/functions/support.js');
const debuglog = support.debuglog;
const elements = require('./ref/functions/elements.js');
const csv = require('./ref/functions/csv.js');
const mastodon = require('./ref/functions/mastodon.js');
const args = process.argv;
if (args[2] == "-h"){
console.log("usage: $node ./TwitToMast.js [username] [tweet count] [debug level] [disable posts] [print header]");
console.log(" username: (string) -username of account to scrape - required");
console.log(" tweet count: (integer) -number of tweets to scrape - required");
console.log(" debug level: (0-2) -amount of information to print to console - 0 by default");
console.log(" disable posts: ('write','noWrite') -enable/disable posting to Mastodon - disabled by default");
console.log(" print header: ('printHeader') -enable attaching a header with the user's name, twitter");
console.log(" handle, and link to tweet - disabled by default");
console.log(" config.txt:");
console.log(" API_KEY");
console.log(" API_URL");
console.log(" ENABLE_QUOTE_TWEETS");
console.log(" ENABLE_THREAD_TWEETS");
console.log(" ");
process.exit(0);
}
if (typeof args[2] == 'undefined') {
console.log("Expected String with length greater than 1, got '" + args[2] + "' instead");
console.log("for help: $TwitToMast.js -h");
process.exit(1);
} else if (args[2].length < 1) {
console.log("Expected String with length greater than 1, got '" + args[2] + "' instead");
console.log("for help: $TwitToMast.js -h");
process.exit(1);
}
if (isNaN(parseInt(args[3]))){
console.log("Expected Integer, got '" + args[3] + "' instead");
console.log("for help: $TwitToMast.js -h");
process.exit(1);
}
if (!((parseInt(args[4]) >= 0) && (parseInt(args[4]) <= 2)) && (typeof args[4] != 'undefined')){
console.log("Expected [0-2], got '" + args[4] + "' instead");
console.log("for help: $TwitToMast.js -h");
process.exit(1);
}
if ((args[5] != 'noWrite' && args[5] != 'write') && typeof args[5] != 'undefined') {
console.log("Expected 'noWrite', 'write', or undefined, got '" + args[5] + "' instead");
console.log("for help: $TwitToMast.js -h");
process.exit(1);
}
const Args = require('./ref/classes/arguments.js');
const args = new Args();
const Formats = require('./ref/classes/formats.js');
const format = new Formats();
const Tweets = require('./ref/classes/tweets.js');
//PROCESS CONFIG
//LOG ARGUMENTS
const config = fs.readFileSync("./config.txt").toString().split(/[\r\n]+/);
var M = new Masto({
access_token: config[0],
api_url: config[1]
})
var modulesToEnable = [false, false];
for(var c = 2; c < 4; c++){
if (config[c] = "true"){
modulesToEnable[c-2] = true;
} else if (config[c] = "false"){
modulesToEnable[c-2] = false;
} else {
console.log("config.txt line " + (c+1) + ": Expected [true/false], got '" + config[c] + "' instead");
console.log("for help: $TwitToMast.js -h");
process.exit(1);
}
}
support.validateArgs();
support.logArguments();
//SETUP SAVE DIRECTORY VARIABLES
//PROCESS ARGUMENTS
const userName = args[2];
const maxTweetScan = parseInt(args[3]);
const debug = args[4];
if (typeof args[4] == 'undefined') {debug = 0;}
var disablePosts = false;
if (typeof args[5] == 'undefined') {
disablePosts = false;
} else if (args[5] == 'noWrite') {
disablePosts = true;
}
var printHeader = false;
if (args[6] == 'printHeader'){
printHeader = true;
} else {
printHeader = false;
}
debuglog(args,2);
debuglog("userName: " + userName,2);
debuglog("maxTweetScan: " + maxTweetScan,2);
debuglog("debug: " + debug,2);
debuglog("disable posts: " + disablePosts,2);
//FUNCTIONS
function downloadImage(url, filepath) {
return new Promise((resolve, reject) => {
client.get(url, (res) => {
if (res.statusCode === 200) {
res.pipe(fs.createWriteStream(filepath))
.on('error', reject)
.once('close', () => resolve(filepath));
} else {
res.resume();
reject(new Error(`Request Failed With a Status Code: ${res.statusCode}`));
}
});
});
}
function debuglog(debugString,logLevel) {
prefix = "";
switch (logLevel) {
case 0:
prefix = "";
break;
case 1:
prefix = "-";
break;
case 2:
prefix = "!";
break;
}
if (logLevel <= debug) {console.log(prefix + " " + debugString);}
}
function expandUrl(shortUrl) {
var deferred = Q.defer();
request( { method: "HEAD", url: shortUrl, followAllRedirects: true },
function (error, response) {
if (error) {
deferred.reject(new Error(error));
} else {
deferred.resolve(response.request.href);
}
});
return deferred.promise;
}
debuglog("Setting up...",1);
debuglog("userName: " + userName,1);
debuglog("maxTweetScan: " + maxTweetScan,1);
debuglog("debug: " + debug,1);
debuglog("API_URL: " + config[1],1);
debuglog("Enable Quote Tweets: " + modulesToEnable[0],1);
debuglog("Enable Thread Tweets: " + modulesToEnable[1],1);
debuglog("Disable posting to Mastodon: " + disablePosts,1);
debuglog("running from loop: " + printHeader,1);
//SETUP REMAINDER OF VARIABLES
const csvFilename = "./URLList.csv";
const localDir = './';
const imgSavePath = (localDir + userName + '/');
const imgSavePath = (`${localDir}imgs/${args.userName}/`);
if (!fs.existsSync(imgSavePath)){
fs.mkdirSync(imgSavePath);
}
//XPATH CONSTANTS
const timeLineXPath = `//*[@id="react-root"]/div/div/div[2]/main/div/div/div/div/div/div[3]/div/div/section/div/div`; //the immediate parent div of all tweets
const tweetXPath = (timeLineXPath + `/div`); //the div containing individual tweet content: (tweetXpath + '[1]')
//the following xpaths follow an individual tweet xpath: (tweetXpath + '[1]' + variableXPath)
const urlCardXPath = `/div/div/div/article/div/div/div/div[*]/div[*]/div[*]/div[*]/div/div[2]/a`
const tweeterHandle = `/div/div/div/article/div/div/div/div[2]/div[2]/div[1]/div/div/div[1]/div/div/div[2]/div/div[1]/a/div/span[contains(text(),"@")]` //text label containing tweeter's handle
const tweeterName = `/div/div/div/article/div/div/div/div[2]/div[2]/div[1]/div/div/div[1]/div/div/div[1]/div/a/div/div[1]/span/span` //text label containing tweeter's name
const quoteTweetHandleXPath = `/div/div/div/article/div/div/div/div[2]/div[2]/div[2]/div[2]/div[*]/div[2]/div/div[1]/div/div/div/div/div/div[2]/div[1]/div/div/div/span`; //xpath to text label that reveals if a tweet is a quote tweet (leads to the quote tweeted user's handle)
const quoteTweetContentXPath= `/div/div/div/article/div/div/div/div[2]/div[2]/div[2]/div[2]/div[*]/div[2][div/div[1]/div/div/div/div/div/div[2]/div[1]/div/div/div/span]` //xpath to locate entirety of Quote Tweeted Content
const retweetIndicatorXPath = `/div/div/div/article/div/div/div/div[1]/div/div/div/div/div[2]/div/div/div/a/span`; //xpath to text label that reveals if a tweet is a retweet
const threadIndicatorXPath = `/div/div/div/article/div/a/div/div[2]/div/span`; //xpath to text label that reveals if a tweet is a part of a thread
const tweetTextXPath = `//div[@data-testid="tweetText"]`; //xpath that leads to div containing all tweet text
const tweetURLXPath = `//div[3]/a[contains(@href, 'status')]`; //xpath to tweet url
const singleImageXPath = `//div[2]/div/img[@alt="Image"]`; //xpath to image that reveals if a tweet has one image
const multiImageXPath = `//div[2]/div[2]/div[2]/div[2]/div/div/div/div/div[2]/div/div[1]/div[1]//a/div/div/img[@alt="Image"]`; //xpath to image that reveals if a tweet has more than one image
//the following xpaths follow and individual tweet xpath and are used to find all images in a tweet with multiple images: (tweetXpath + '[1]' + multiImage1XPath + x + multiImage2XPath + y + multiImage3XPath)
// the following combinations of x,y variables point to the corresponding image
// 1,1 = first image
// 2,1 = second image
// 2,2 = third image
// 1,2 = fourth image
const multiImage1XPath = `//div[2]/div[2]/div[2]/div[2]/div/div/div/div/div[2]/div/div[`;
const multiImage2XPath = `]/div[`;
const multiImage3XPath = `]//a/div/div/img[@alt="Image"]`;
const csvSaveDir = (`${localDir}csv/`);
const csvFileName = (`${csvSaveDir + args.userName}.csv`);
if (!fs.existsSync(csvSaveDir)){
fs.mkdirSync(csvSaveDir);
}
var csvOutput = "_";
debuglog(`csv file name: ${csvFileName}`,2);
debuglog(`user image save path${imgSavePath}`,2);
//SETUP HEADLESS WEBDRIVER
@ -212,314 +43,187 @@ const screen = {
width: 1920,
height: 1080
};
let chromeOptions = new chrome.Options().addArguments(['user-agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36']);
if (!args.displayBrowser) {chromeOptions.headless().windowSize(screen);}
var driver = new webdriver.Builder()
.forBrowser('chrome')
.setChromeOptions(new chrome.Options().headless().windowSize(screen).addArguments(['user-agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36']))
.setChromeOptions(chromeOptions)
.build();
//START WEBDRIVER AND ZOOM OUT
driver.get('https://mobile.twitter.com/' + userName + '/');
debuglog("starting webdriver...",2);
driver.get(`https://mobile.twitter.com/${args.userName}/`);
debuglog("started webdriver!",2);
driver.executeScript("document.body.style.zoom='35%'");
(async function(){
//WAIT UNTIL TIMELINE RENDERS
await driver.wait(until.elementLocated(By.xpath(timeLineXPath + `[count(div) > 1]`)), 30000);
//OPEN CSV FILE, CREATE IF NEEDED
var csvOutput = " ";
await fs.readFile(csvFilename, "utf-8", (err, data) => {
if (err) {
debuglog("Could not get CSV Data!",2)
debuglog(err,1);
writer = csvWriter({sendHeaders: false});
writer.pipe(fs.createWriteStream(csvFilename));
writer.write({
header1: 'URLs'
});
writer.end();
} else {
csvOutput = data;
}
debuglog("opening csv",2);
fs.readFile(csvFileName, "utf-8", (err, data) => {
if (err) {
debuglog("Could not get CSV Data!", 2);
debuglog(err, 2);
csv.initCSV(csvFileName);
} else {
debuglog(`CSV OUTPUT IS:\n${data}`, 2);
csvOutput = data;
}
});
for (var i = 1; i < (maxTweetScan+1); i++) {
//RUN THIS CODE FOR EVERY TWEET SCANNED
debuglog("Processing tweet " + i + " of " + maxTweetScan + "...",1);
//PER-TWEET VARIABLES
var thisTweetXPath = tweetXPath + `[1]`;
var keepTweet = false;
var quotedContent = "";
debuglog("opened csv",2);
var processedTweets = [];//DEFINE ARRAY THAT WILL BE POPULATED WITH TWEETS PROCESSED DURING THIS SESSION
for (var t = 1; t < (parseInt(args.tweetCount) + 1); t++) {//LOOP THE NUMBER OF TIMES SPECIFIED IN ARGS
debuglog(format.notice(`Processing tweet #${t} of ${args.tweetCount}...`),1);
var homeTweet = new Tweets("home",t); //RESET HOME TWEET FOR PROCESSING
var threadTweet = new Tweets("thread",1); //RESET HOME TWEET FOR PROCESSING
var threadTweetArray = []; //ARRAY OF THREAD TWEET OBJECTS
await elements.waitFor(driver,homeTweet.x.containsDivs,args.timeOut); //WAIT FOR TIMELINE TO POPULATE ITSELF WITH TWEETS
//REMOVE NON-PRIMARY TWEETS
debuglog("Filtering out disabled tweets...",2)
while (!keepTweet) {
await driver.wait(until.elementLocated(By.xpath(thisTweetXPath)), 30000);
if (!modulesToEnable[0]) {
//CHECK FOR QUOTE TWEETS
isQT = await driver.findElement(webdriver.By.xpath(thisTweetXPath + quoteTweetContentXPath)).then(function() {
return true; // It existed
}, function(err) {
if (err instanceof webdriver.error.NoSuchElementError) {
return false; // It was not found
} else {
//webdriver.promise.rejected(err);
}
});
}
if (!modulesToEnable[1]) {
//CHECK FOR THREAD TWEET
isThread = await driver.findElement(webdriver.By.xpath(thisTweetXPath + threadIndicatorXPath)).then(function() {
return true; // It existed
}, function(err) {
if (err instanceof webdriver.error.NoSuchElementError) {
return false; // It was not found
} else {
//webdriver.promise.rejected(err);
}
});
}
//CHECK FOR RETWEETS
isRT = await driver.findElement(webdriver.By.xpath(thisTweetXPath + retweetIndicatorXPath)).then(function() {
return true; // It existed
}, function(err) {
if (err instanceof webdriver.error.NoSuchElementError) {
return false; // It was not found
} else {
//webdriver.promise.rejected(err);
}
});
while (!homeTweet.keep) {
debuglog(`xpath: ${homeTweet.x.path}`,2) //PRINT XPATH OF CURRENT TWEET
await elements.waitFor(driver, homeTweet.x.path,args.timeOut); //WAIT UNTIL CURRENT TWEET IS LOADED
//IF TWEET IS DISABLED, MARK FOR REMOVAL
if (isRT || ((!modulesToEnable[0] && isQT) || (!modulesToEnable[1] && isThread)) ) {
//TWEET IS QT, RT, OR THREAD
keepTweet = false;
driver.executeScript('var element = document.evaluate(`' + thisTweetXPath + '`,document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue.remove();');
await homeTweet.identifyElements(driver); //IDENTIFY WHAT ELEMENTS EXIST WITHIN TWEET
if ((((homeTweet.isRT || homeTweet.isAR) || homeTweet.isPin) || (!args.enableQuotes && homeTweet.isQT)) || (!args.enableThreads && homeTweet.isThread) ) {//IF TWEET IS DISABLED, MARK FOR REMOVAL
debuglog("removing tweet",2);
homeTweet.keep = false; //INDICATE THAT WE ARE NOT READY TO EXIT, CURRENT TWEET IS NOT ELIGIBLE FOR REPOST
await driver.executeScript(`var element = document.evaluate(\`${homeTweet.x.path}\`,document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue.remove();`); //REMOVE TWEET FROM DOM TO PROCESS NEXT
homeTweet = new Tweets("home",1); //RESET HOME TWEET OBJECT TO MAKE NEW TWEET READY FOR CHECKING
} else {
keepTweet = true;
debuglog("keeping tweet! It is eligible for processing");
homeTweet.keep = true; //INDICATE THAT WE ARE READY TO EXIT, CURRENT TWEET IS ELIGIBLE FOR REPOST
}
}
//GET TWEET URL
await driver.wait(until.elementLocated(By.xpath(thisTweetXPath + tweetURLXPath)), 1000);
mobileTweetURL = await driver.findElement(By.xpath(thisTweetXPath + tweetURLXPath)).getAttribute('href');
tweetURL = await mobileTweetURL.replace('mobile.','');
debuglog(tweetURL,2);
processedTweets.forEach(function(u, uindex) { //CHECK IF TWEET HAS BEEN PROCESSED IN THIS SESSION
debuglog(`${u.url} exists at index ${uindex} ${(u.url == homeTweet.url)}`);
if (u.url == homeTweet.url) {homeTweet.processed = true;}
})
debuglog(`tweet has been proccessed: ${homeTweet.processed}`);
if (!csvOutput.includes(tweetURL)) {
if (!homeTweet.processed && !csvOutput.includes(homeTweet.url)) { //IF CSV DOES NOT CONTAIN THE TWEET URL
debuglog(`Tweet #${homeTweet.no} has not been processed.`, 1);
//SETUP TEXT FOR TWEET STATUS
var tweetHasText = false;
await driver.wait(until.elementLocated(By.xpath(timeLineXPath + tweetTextXPath)), 1000);
tweetText = ""
if (homeTweet.isThread){ //IF TWEET IS A THREAD, RUN TWEET THREAD STUFF
var threadTweet = new Tweets("thread",1); //CREATE NEW THREAD TWEET OBJECT
var threadTweetArray = []; //ARRAY OF THREAD TWEET OBJECTS
debuglog(`THREAD TIMELINE: ${threadTweet.x.timeLine}`,2); //XPATH OF THREAD TIMELINE
//IS TWEET PART OF MULTISCRAPER, IF SO ADD HEADER
if (printHeader) {
tweeterHandleText = await driver.findElement(By.xpath(thisTweetXPath + tweeterHandle)).getText();
tweeterNameText = await driver.findElement(By.xpath(thisTweetXPath + tweeterName)).getText();
tweetText = (tweeterNameText + " (" + tweeterHandleText + ")\r\n" + tweetURL + "\r\n\r\n")
}
//DOES TWEET HAVE TEXT
tweetHasText = await driver.findElement(webdriver.By.xpath(thisTweetXPath + tweetTextXPath)).then(function() {
return true; // It existed
}, function(err) {
if (err instanceof webdriver.error.NoSuchElementError) {
return false; // It was not found
} else {
webdriver.promise.rejected(err);
}
});
//IF SO, ADD BODY TEXT TO TWEET TEXT
if (tweetHasText){
tweetText = tweetText + await driver.findElement(By.xpath(thisTweetXPath + tweetTextXPath)).getText();
}
//DOES TWEET HAVE A URL CARD
tweetHasURL = await driver.findElement(webdriver.By.xpath(thisTweetXPath + urlCardXPath)).then(function() {
return true; // It existed
}, function(err) {
if (err instanceof webdriver.error.NoSuchElementError) {
return false; // It was not found
} else {
webdriver.promise.rejected(err);
}
});
//IF SO, ADD URL TO TWEET TEXT
if (tweetHasURL){
tweetCardURL = await driver.findElement(By.xpath(thisTweetXPath + urlCardXPath)).getAttribute('href');
await expandUrl(tweetCardURL)
.then(function (longUrl) {
debuglog("Long URL:" + longUrl,2);
tweetText = tweetText + "\r\n\r\n" + longUrl;
});
}
//IS TWEET A QUOTE TWEET
isQT = await driver.findElement(webdriver.By.xpath(thisTweetXPath + quoteTweetContentXPath)).then(function() {
return true; // It existed
}, function(err) {
if (err instanceof webdriver.error.NoSuchElementError) {
return false; // It was not found
} else {
//webdriver.promise.rejected(err);
}
});
//IF SO, ADD QUOTE TWEET LINK TO TWEET TEXT
if (isQT){
await driver.sleep(1 * 1000)
quotedContent = await driver.findElement(webdriver.By.xpath(thisTweetXPath + quoteTweetContentXPath));
await driver.findElement(webdriver.By.xpath(thisTweetXPath + quoteTweetContentXPath)).sendKeys(webdriver.Key.CONTROL, webdriver.Key.ENTER);
var parent = await driver.getWindowHandle();
var windows = await driver.getAllWindowHandles();
await driver.switchTo().window(windows[1]).then(() => {
driver.getCurrentUrl().then(url => {
debuglog('current url: "' + url + '"',2);
tweetText = tweetText + "\r\n\r\n" + "Quote tweeting: " + url;
});
driver.switchTo().window(parent);
});
await driver.switchTo().window(windows[1]);
await driver.close();
await driver.switchTo().window(parent);
}
debuglog(tweetText,1)
//CODE TO RUN IF TWEET IS NOT IN CSV
debuglog("Tweet #" + i + " has not been processed.", 1);
//HANDLE SAVING SINGLE IMAGES
var singleImageExisted = await driver.findElement(webdriver.By.xpath(thisTweetXPath + singleImageXPath)).then(function() {
return true; // It existed
}, function(err) {
if (err instanceof webdriver.error.NoSuchElementError) {
return false; // It was not found
} else {
webdriver.promise.rejected(err);
}
});
if (singleImageExisted) {
debuglog("Tweet #" + i + " contains a single image.", 2)
imageCount = 1;
imageURL = await driver.findElement(webdriver.By.xpath(thisTweetXPath + singleImageXPath)).getAttribute("src");
await downloadImage(imageURL, imgSavePath + i + "." + 1 +'.jpg')
.then(/*console.log*/)
.catch(console.error);
debuglog("Downloaded " + imageCount + "image from tweet #" + i + ".", 2)
}
//HANDLE SAVING MULTTIPLE IMAGES
var multiImageExisted = await driver.findElement(webdriver.By.xpath(thisTweetXPath + multiImageXPath)).then(function() {
return true; // It existed
}, function(err) {
if (err instanceof webdriver.error.NoSuchElementError) {
return false; // It was not found
} else {
webdriver.promise.rejected(err);
}
});
if (multiImageExisted) {
debuglog("Tweet #" + i + " contains multiple images.", 2)
imageCount = 0;
for (var x = 1; x < 3; x++) {
for (var y = 1; y < 3; y++) {
thisIteratExists = await driver.findElement(webdriver.By.xpath(thisTweetXPath + multiImage1XPath + x + multiImage2XPath + y + multiImage3XPath)).then(function() {
return true; // It existed
}, function(err) {
if (err instanceof webdriver.error.NoSuchElementError) {
return false; // It was not found
} else {
debuglog('I hope this doesnt break');
//webdriver.promise.rejected(err);
}
});
if (thisIteratExists) {
debuglog(x + "," + y + " Exists!")
iteratImgURL = await driver.findElement(webdriver.By.xpath(thisTweetXPath + multiImage1XPath + x + multiImage2XPath + y + multiImage3XPath)).getAttribute("src");
imageCount++;
await downloadImage(iteratImgURL, imgSavePath + i + "." + imageCount +'.jpg')
.then(/*console.log*/)
.catch(console.error);
}
}
}
debuglog("Downloaded " + imageCount + "images from tweet #" + i + ".", 2)
}
//HANDLE POSTING TWEETS TO MASTODON
if (!disablePosts){
if (singleImageExisted || multiImageExisted) {var imageExisted = true} else {var imageExisted = false}
if (imageExisted) {
driver.executeScript(`window.open("${homeTweet.url}");`); //OPEN THREAD IN NEW TAB
var parent = await driver.getWindowHandle();
var windows = await driver.getAllWindowHandles();
await driver.switchTo().window(windows[1]); //SWITCH TO NEW TAB WITH THREAD
//MAKE MASTODON POST WITH IMAGES
debuglog("Uploading images to Mastodon...",1);
var imageArray = [];
for (var f = 1; f < (imageCount+1); f++) {
await M.post('media', { file: fs.createReadStream(imgSavePath + i + '.' + f + '.jpg') }).then(resp => {
imageArray.push(resp.data.id);
}, function(err) {
if (err) {
debuglog(err,1);
}
})
await elements.waitFor(driver,threadTweet.x.containsDivs,args.timeOut);
await driver.executeScript("document.body.style.zoom='20%'");
await driver.executeScript("window.scrollTo(0, 0)");
//await driver.executeScript("window.scrollTo(0, -document.body.scrollHeight)");
await driver.sleep(1*5000) //WAIT 5 SECONDS TO GIVE BROWSER TIME TO SET ITSELF UP
await elements.waitFor(driver,threadTweet.x.containsDivs,args.timeOut); //WAIT UNTIL THREAD IS POPULATED WITH DIVS
for (var r = 1; !threadTweet.entryIsOpen; r++) {//LOOP UNTIL INDICATED THAT WE'VE REACHED THE ENTRY TWEET
threadTweet = new Tweets("thread", r); //RESETS ALL THREAD TWEET VARIABLES TO START FRESH
debuglog(threadTweet.x.path,2); //PRINTS XPATH TO CURRENT ITERATE DIV
threadTweet.entryIsOpen = await elements.doesExist(driver,threadTweet.x.entryTweet) // CHECKS IF THE CURRENT ITERATE DIV IS THE ONE USED TO OPEN THE THREAD
if (!threadTweet.entryIsOpen){ //CURRENT ITERATE DIV DOES NOT CONTAIN THE TWEET USED TO OPEN THE THREAD
await threadTweet.identifyElements(driver); //IDENTIFIES WHAT THE TWEET CONTAINS
debuglog(`current tweet #${threadTweet.no} is not entry to thread`,2);
debuglog(csvOutput);
if (processedTweets.some(e => e.url == processedTweets.url)) {
debuglog("TWEET EXISTS IN PROCESSED ARRAY!!",2);
}
if (!csvOutput.includes(threadTweet.url)) {//CODE TO RUN IF TWEET IS NOT IN CSV
debuglog(`Thread tweet #${threadTweet.no} has not been processed.`, 1);
await threadTweet.getElementProperties(driver); //COMPILE HEADER, BODY, AND FOOTER
threadTweet.compileText();//COMPILE TEXT FOR CROSS-POST
threadTweet.printPreview()//PRINT TWEET PREVIEW
await threadTweet.downloadImages(driver,imgSavePath);
await threadTweet.uploadImages(imgSavePath);
}
threadTweetArray.push(threadTweet);
processedTweets.push(threadTweet);
}
}
imageArray.length = 4
debuglog("Publishing post to Mastodon...",1);
await M.post('statuses', { status: tweetText, media_ids: imageArray }, (err, data) => {
if (err) {
debuglog("Post to Mastodon failed with error: " + err, 1);
} else {
//ADD TWEET TO CSV TO PREVENT FUTURE INDEXING
debuglog("Posting tweet #" + i + " to Mastodon succeeded!", 1);
writer = csvWriter({sendHeaders: false});
writer.pipe(fs.createWriteStream(csvFilename, {flags: 'a'}));
writer.write({
header1: tweetURL
});
writer.end();
}
var csvArray = csvOutput.split(/[\r\n]+/);
for (var a = 0;a < threadTweetArray.length; a++) {//SET TWEET OBJECT ID TO SAVED ID IF IT EXISTS IN CSV
debuglog(`CSV ARRAY: ${csvArray}`,2);
csvArray.forEach(function(row, csvIndex) {
debuglog(`csv row: ${row}`);
rowArr = row.split(",");
debuglog(`searching for '${threadTweetArray[a].url}' in '${row}'`,2)
if (row.includes(threadTweetArray[a].url)){
debuglog(`URL Exists at index ${csvIndex} of csv`,2);
threadTweetArray[a].id = rowArr[1];
threadTweetArray[a].posted = true;
}
})
}
threadTweetArray.forEach(twt =>{//LIST IDs THAT WERE DERIVED FROM CSV
debuglog(`${twt.no} id: ${twt.id}`,2)
})
} else {
//MAKE MASTODON POST WITHOUT IMAGES
debuglog("Publishing post to Mastodon...",1);
await M.post('statuses', { status: tweetText}, (err, data) => {
if (err) {
debuglog("Post to Mastodon failed with error: " + err, 1);
} else {
//ADD TWEET TO CSV TO PREVENT FUTURE PROCESSING
debuglog("Posting tweet #" + i + " to Mastodon succeeded!", 1);
writer = csvWriter({sendHeaders: false});
writer.pipe(fs.createWriteStream(csvFilename, {flags: 'a'}));
writer.write({
header1: tweetURL
});
writer.end();
for (var a = 0;a < threadTweetArray.length; a++) {//POST TO MASTODON REFERENCING ID OF PRIOR OBJECT AS PROMPT
if (a != 0) {threadTweetArray[a].prompt = threadTweetArray[a - 1].id}
if (!threadTweetArray[a].posted){
debuglog(`posting tweet: ${threadTweetArray[a].no} to mastodon in reply to id: ${threadTweetArray[a].prompt}`, 2);
threadTweetArray[a].id = await mastodon.postStatus(threadTweetArray[a],csvFileName,csvOutput)
debuglog(`POSTED TO MASTODON AND GOT BACK AN ID OF: ${threadTweetArray[a].id}`,2)
}
})
}
await driver.close();
await driver.switchTo().window(parent);
}
}
} else {
//CODE TO RUN IF TWEET IS IN CSV
debuglog("Tweet #" + i + " has already been processed.",1);
}
await homeTweet.getElementProperties(driver);
if (i < maxTweetScan) {driver.executeScript('var element = document.evaluate(`' + thisTweetXPath + '`,document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue.remove();');}
homeTweet.compileText();//COMPILE TEXT FOR CROSS-POST
homeTweet.printPreview();//PRINT TWEET PREVIEW
await homeTweet.downloadImages(driver,imgSavePath);//DOWNLOAD IMAGES FROM TWITTER
await homeTweet.uploadImages(imgSavePath);//UPLOAD IMAGES TO MASTODON
if (threadTweetArray.length>0) {homeTweet.prompt = threadTweetArray[threadTweetArray.length-1].id;}
debuglog(`Publishing post ${homeTweet.no} to Mastodon...`,2);
homeTweet.id = await mastodon.postStatus(homeTweet,csvFileName,csvOutput);
processedTweets.push(homeTweet);
} else { //HOME TWEET EXISTS IN CSV
debuglog(`Tweet #${homeTweet.no} has already been processed.`,1); //HOME TWEET EXISTS IN CSV
}
if (homeTweet.no < args.tweetCount) {driver.executeScript(`var element = document.evaluate(\`${homeTweet.x.path}\`,document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue.remove();`);}//REMOVE TWEET FROM DOM TO PROCESS NEXT TWEET
}
//REMOVE SAVED IMAGE FILES
debuglog("Cleaning up...",1);
fs.rm(imgSavePath, { recursive: true, force: true }, (error) => {
//you can handle the error here
});
debuglog("Finished scraping " + userName + "'s tweets",1)
//EXIT WEBDRIVER
driver.quit();
debuglog("Cleaning up...",1); //REMOVE SAVED IMAGES
fs.rm(imgSavePath, { recursive: true, force: true }, (error) => {
debuglog(error,2);
});
debuglog(format.bold(`Finished scraping @${args.userName}'s tweets`),1) //CLOSE WEBDRIVER
setTimeout(() => {
driver.quit();
}, 100);
}());

View file

@ -1 +0,0 @@
URLs
1 URLs

View file

@ -1,29 +1,39 @@
//REQUIREMENTS
const childProcess = require('child_process')
const path = require('path');
const support = require('./ref/functions/support.js');
//FUNCTIONS
async function fork(scriptPath, args = []) {
return new Promise((resolve, reject) => {
let process = childProcess.fork(scriptPath, args, {
cwd: path.dirname(scriptPath)
});
process.on('exit', code => resolve(code));
process.on('error', err => reject(err));
});
}
//RUNTIME
(async function(){
const args = process.argv;
const args = [...process.argv];
const defArgs = ["node","path","name","tweetCount","0","write","fromLoop"]
for (var i = 0; i < 2; i++) {args.shift();}
const config = require('fs').readFileSync("./usernameslist.txt").toString().split(/[\r\n]+/);
for (var i = 0; i < 2; i++) {args.shift();} //REMOVES `node ./TwitToMaster` from args
const config = require('fs').readFileSync("./usernameslist.txt").toString().split(/[\r\n]+/);//GET USERNAME LIST AS ARRAY
const customIndex = args.indexOf("-u");
console.log(args);
console.log(customIndex);
args.splice(customIndex,2);
console.log(args)
for (let name of config) {
var pArgs = [...args];
pArgs.splice(0, 0, name);
for (var i = 3; i < 7; i++) {
if (typeof pArgs[i-2] == 'undefined') {
pArgs.push(defArgs[i]);
}
}
console.log("pArgs: " + pArgs);
await fork('./TwitToMast.js', pArgs);
var fArgs = [...args];
fArgs.push("-u");
fArgs.push(name);
console.log("args: " + fArgs);
await fork('./TwitToMast.js', fArgs);
}
}());

BIN
null

Binary file not shown.

Before

Width:  |  Height:  |  Size: 168 KiB

3140
package-lock.json generated

File diff suppressed because it is too large Load diff

48
ref/classes/arguments.js Normal file
View file

@ -0,0 +1,48 @@
Array.prototype.findReg = function(match) {
var reg = match;
return this.filter(function(item){
return typeof item == 'string' && item.match(reg);
});
}
class Args {
constructor() {
this.help = this.getFlag("h",); //show help screen
this.displayBrowser = this.getFlag("b"); //show browser running (disable headless)
this.enablePosts = this.getFlag("p"); //enable posting images or statuses to Mastodon
this.forceCSV = this.getFlag("c"); //force logging tweets to CSV, even if not posted to Mastodon (by request or failure)
this.printMeta = this.getFlag("m"); //include Display Name, handle, and URL in Mastodon post
this.enableQuotes = this.getFlag("q"); //enable cross-posting quote tweets
this.enableThreads = this.getFlag("t"); //enable cross-posting thread tweets
this.reQuotes = this.getFlag("r"); //put links to quote tweets at top of mastodon posts
var userNamePreFormat = this.getArgument("-u","Twitter",false);
this.userName = userNamePreFormat.replace('@','')
this.tweetCount = this.getArgument("-n",5);
this.debug = this.getArgument("-d",1);
this.timeOut = this.getArgument("-w",30000);
}
getFlag(char){
let args = [...process.argv];
var regex = new RegExp(`-\\S*[${char}]\\S*`, "g");
return args.indexOf(args.findReg(regex)[0]) > -1 ? true : false;
}
getArgument(flag, def, isInt = true) {
const args = [...process.argv];
const customIndex = args.indexOf(flag);
const customValue = (customIndex > -1) ? args[customIndex + 1] : undefined;
let flagValue = customValue || def;
flagValue = isInt ? parseInt(flagValue) || def : flagValue;
return flagValue;
}
}
module.exports = Args

15
ref/classes/formats.js Normal file
View file

@ -0,0 +1,15 @@
const colors = require('cli-color');
class Formats {
constructor() {
this.success = colors.green.bold;
this.error = colors.red.bold;
this.warn = colors.yellow;
this.notice = colors.blue.bold;
this.bold = colors.bold;
this.underline = colors.underline;
this.italic = colors.italic;
}
}
module.exports = Formats

245
ref/classes/tweets.js Normal file
View file

@ -0,0 +1,245 @@
const webdriver = require('selenium-webdriver');
const By = webdriver.By;
//const { format } = require('fast-csv');
const elements = require('../functions/elements.js'); //link support.js
const XPathObjects = require('../classes/xpaths.js'); //link xpaths.js
const Args = require('../classes/arguments.js');
const args = new Args();
const support = require('../functions/support.js');
const debuglog = support.debuglog;
const funcs = require('../functions/functions.js'); //link functions.js
const mastodon = require('../functions/mastodon.js'); //link mastodon.js
const Formats = require('../classes/formats.js');
const format = new Formats();
//const homeX = new XPathObjects.TweetPaths("home"); //import xpath class object for home timeline
//const threadX = new XPathObjects.TweetPaths("thread"); //import xpath class object for thread timeline
class Tweets {
constructor(orig,i) {
//parameters
this.orig = orig;
this.index = i-1;
this.no = i;
this.processed = false;
//detect to filter out
this.isRT = false;
this.isAR = false;
this.isPin = false;
this.keep = false;
//detect to move into thread
this.isThread = false;
//processed text for posting
this.text = "";
//header
this.name = "";
this.handle = "";
this.url = "";
this.header = "";
//body
this.hasBody = false;
this.body = "";
this.hasLinks = false;
this.links = "";
//footer
this.hasVideo = false;
this.isQT = false;
this.quoteLink = "";
this.footer = "";
//media
this.hasSingleImage = false;
this.hasMultiImage = false;
this.hasImages = false;
this.imgArray = [];
this.imgCount = 0;
this.imgUrl = "";
this.iterateExists = false;
this.iteratePath = "";
//mastodon
this.id = 0;
this.prompt = 0;
this.posted = false;
if (orig == "thread") {
this.threadLength = 0;
this.entryIsOpen = false;
}
//xpaths of tweet & elements
this.x = new XPathObjects.TweetPaths(orig,i);
}
compileText(){
const sectionArray = [this.header, this.body, this.footer];
const nonEmptySections = sectionArray.filter(section => section !== '');
this.text = nonEmptySections.join('\r\n\r\n');
}
appendSection(txt, section) {
switch (section) {
case 'header':
this.header += this.header ? `\r\n${txt}` : txt;
break;
case 'body':
this.body += this.body ? `\r\n${txt}` : txt;
break;
case 'footer':
this.footer += this.footer ? `\r\n${txt}` : txt;
break;
default:
throw new Error(`Invalid section: ${section}`);
}
}
async identifyElements(driver){
await elements.waitFor(driver,this.x.tweet,args.timeOut); //WAIT FOR TWEET URL OF CURRENT ITERATE TWEET
this.isAR = await elements.doesExist(driver,this.x.ageRestricted);//IS TWEET AGE-RESTRICTED?
if (this.isAR){
return;
}
var mTweetURL = await elements.getAttribute(driver,this.x.tweetURL,'href') //GET URL OF TWEET
this.url = await mTweetURL.replace('://mobile.','://'); //SAVE TWEET URL TO TWEET OBJECT WITHOUT MOBILE
this.hasBody = await elements.doesExist(driver,this.x.tweetText);//DOES TWEET HAVE BODY TEST?
this.hasLinks = await elements.doesExist(driver,this.x.urlCard);//DOES TWEET HAVE URL CARDS
this.hasVideo = await elements.doesExist(driver,this.x.video);//DOES TWEET HAVE VIDEO MEDIA?
this.isQT = await elements.doesExist(driver, this.x.quoteTweetContent);//IS TWEET A QUOTE TWEET
this.isThread = await elements.doesExist(driver,this.x.detectThread);//IS TWEET A PART OF THREAD
this.isRT = await elements.doesExist(driver,this.x.detectRT);//CHECK FOR RETWEETS
this.isPin = await elements.doesExist(driver,this.x.pinnedTweet);//IS TWEET PINNED
this.hasSingleImage = await elements.doesExist(driver, this.x.singleImage);//DOES TWEET HAVE SINGLE IMAGE?
this.hasMultiImage = await elements.doesExist(driver,this.x.multiImage);//DOES TWEET HAVE MULTIPLE IMAGES?
this.hasImages = this.hasSingleImage || this.hasMultiImage;//DOES TWEET HAVE ANY MEDIA?
}
async getElementProperties(driver){
if (args.printMeta) { //IF TWEET HEADER IS ENABLED
debuglog("running header stuff",2);
this.handle = await elements.getText(driver,this.x.tweeterHandle);//GET TEXT OF TWEETER HANDLE (@)
this.name = await elements.getText(driver,this.x.tweeterName);//GET TEXT OF TWEETER NAME (DISPLAY NAME)
this.appendSection(`${this.name} (${this.handle})\r\n${this.url}`,'header');//COMBINE HEADER COMPONENTS WITH URL
debuglog(`Tweet Header:\r\n${this.header}`);
}
if (this.hasBody){//IF TWEET HAS BODY TEXT
debuglog("running body text stuff",2);
/*use this later to make emojis work? https://stackoverflow.com/questions/65328118/convert-img-with-alt-attribute-to-text-with-selenium-webdriver
await driver.findElement(webdriver.By.xpath(this.x.timeLine)) // GETS NUMBER OF ELEMENTS IN THREAD, SHOULD NOT ITERATE MORE THAN THIS MANY TIMES. NOT USED ANYMORE
.findElements(webdriver.By.xpath(this.x.emoji))
.then(function(elements){
debuglog("Found emoji!",2);
//this.threadLength = elements.length;
});*/
const bodyText = await elements.getText(driver,this.x.tweetText);//SET TWEET BODY TO TEXT OF TWEET
this.appendSection(bodyText,'body');
debuglog(`Tweet Body:\r\n${this.body}`);
}
if (this.hasLinks){//IF TWEET HAS URL CARD
debuglog("running url card stuff",2);
var tweetCardURL = await elements.getAttribute(driver,this.x.urlCard,"href");//GET URL OF URL CARD
this.links = await funcs.expandUrl(tweetCardURL);
this.appendSection(this.links,'body');
debuglog(`Tweet link: ${this.links}`);
}
if (this.isQT){ //IF THREAD IS A QUOTE TWEET, GET URL AND ADD TO EITHER HEADER OR FOOTER
debuglog("running quote tweet stuff",2);
await driver.findElement(By.xpath(this.x.quoteTweetContent)).sendKeys(webdriver.Key.CONTROL, webdriver.Key.ENTER);//OPEN QUOTE TWEET IN NEW TAB
this.parent = await driver.getWindowHandle();
var windows = await driver.getAllWindowHandles();
await driver.switchTo().window(windows[windows.length-1]).then(() => { //SWITCH TO NEW TAB WITH QUOTED TWEET
driver.getCurrentUrl().then(url => {
this.quoteLink = url.replace('://mobile.','://'); //MAKE MOBILE TWEET NON-MOBILE
const text = args.reQuotes //DETERMINE HOW TO FORMAT QUOTE LINK DEPENDING ON RELAVANT ARGUMENT
? `Re: ${this.quoteLink}`
: `« Quoting ${this.quoteLink} »`;
args.reQuotes ? this.appendSection(text,'header') : this.appendSection(text,'footer'); //PLACE QUOTE LINK AT HEADER OR FOOTER OF TWEET
});
driver.switchTo().window(this.parent);//SWITCH BACK TO ORIGINAL TAB
});
await driver.switchTo().window(windows[windows.length-1]);//SWITCH TO NEW TAB AGAIN BECAUSE THAT'S THE ONLY WAY I COULD MAKE THIS PART WORK
await driver.close();//CLOSE NEW TAB
await driver.switchTo().window(this.parent);//SWITCH BACK TO ORIGINAL TAB... AGAIN
debuglog(`Tweet Header: ${this.header}`)
debuglog(`Tweet Footer: ${this.footer}`);
}
if (this.hasVideo) {//IF TWEET HAS NON-POSTABLE MEDIA, APPEND FOOTER DETAILING SO
debuglog("running video stuff",2);
this.appendSection(`⚠ Original tweet had attachment(s) that couldn't be cross-posted. View it at ${homeTweet.url}`,'footer');
debuglog(`Tweet Footer: ${this.footer}`);
}
}
async downloadImages(driver,imgSavePath) {
if (this.hasSingleImage) {
debuglog(`${this.orig} Tweet #${this.no} contains a single image.`, 2)
this.imgCount = 1;
this.imgUrl = await elements.getAttribute(driver,this.x.singleImage,"src")
const jpgPath = `${imgSavePath}${this.orig == 'home' ? '' : 'r'}${this.no}.${this.imgCount}.jpg`
await funcs.downloadImage(this.imgUrl, jpgPath)
.then(debuglog)
.catch(console.error);
debuglog(`Downloaded ${this.imgCount} image from tweet #${this.no}.`, 2)
} else if (this.hasMultiImage) {
debuglog(`${this.orig} Tweet #${this.no} contains multiple images.`, 2)
this.imgCount = 0;
for (var x = 1; x < 3; x++) {
for (var y = 1; y < 3; y++) {
this.iterateExists = await elements.doesExist(driver,this.x.multiImages(x,y));
if (this.iterateExists) {
debuglog(`${x},${y} Exists!`);
this.imgUrl = await elements.getAttribute(driver,this.x.multiImages(x,y),'src')
debuglog(this.imgUrl,2);
this.imgCount++
const jpgPath = `${imgSavePath}${this.orig == 'home' ? '' : 'r'}${this.no}.${this.imgCount}.jpg`
await funcs.downloadImage(this.imgUrl, jpgPath)
.then(debuglog)
.catch(console.error);
}
}
}
debuglog(`Downloaded ${this.imgCount} images from tweet #${this.no}.`,1)
}
}
async uploadImages(imgSavePath) {
if (this.hasImages) {debuglog("Uploading images to Mastodon...",1);}
for (var f = 1; f < (this.imgCount+1); f++) {
var jpgPath = `${imgSavePath}${this.orig == 'home' ? '' : 'r'}${this.no}.${f}.jpg`
debuglog(`uploading image to mastodon: ${jpgPath}`);
var imgid = await mastodon.postMedia(jpgPath)
debuglog(`mastodon image id: ${imgid}`);
this.imgArray.push(imgid);
}
}
async printPreview(){
const postPreviewMessage = `${format.success('Mastodon Post Preview:')}
${'═'.repeat(process.stdout.columns-2)}
${this.text}
${'═'.repeat(process.stdout.columns-2)}`;
debuglog(postPreviewMessage, 1);
}
}
module.exports = Tweets

60
ref/classes/xpaths.js Normal file
View file

@ -0,0 +1,60 @@
class TweetPaths {
constructor(orig,i) {
if (orig == 'home') {
this.timeLine = "//*[@id='react-root']/div/div/div[2]/main/div/div/div/div/div/div[3]/div/div/section/div/div"; //the immediate parent div of all tweets
} else if (orig == 'thread') {
this.timeLine = "/html/body/div[1]/div/div/div[2]/main/div/div/div/div[1]/div/section/div/div" //thread tweet xpath
}
this.tweet = (`${this.timeLine}/div`); //the div containing individual tweet content: (tweetXpath + '[1]')
this.containsDivs = (`${this.timeLine}[count(div) > 1]`) //timeline conntaining divs
this.path = `${this.tweet}[${orig == 'home' ? 1 : i}]`;
//the following xpaths follow an individual tweet xpath: (tweetXpath + '[1]' + variableXPath)
this.urlCard = `${this.path}/div/div/div/article/div/div/div/div[*]/div[*]/div[*]/div[*]/div/div[2]/a`
this.tweeterHandle = `${this.path}/div/div/div/article/div/div/div/div[2]/div[2]/div[1]/div/div/div[1]/div/div/div[2]/div/div[1]/a/div/span[contains(text(),'@')]` /*text label containing tweeter's handle*/
this.tweeterName = `${this.path}/div/div/div/article/div/div/div/div[2]/div[2]/div[1]/div/div/div[1]/div/div/div[1]/div/a/div/div[1]/span` /*text label containing tweeter's name*/
this.quoteTweetHandle = `${this.path}/div/div/div/article/div/div/div/div[2]/div[2]/div[2]/div[2]/div[*]/div[2]/div/div[1]/div/div/div/div/div/div[2]/div[1]/div/div/div/span`; //xpath to text label that reveals if a tweet is a quote tweet (leads to the quote tweeted user's handle)
this.quoteTweetContent = `${this.path}/div/div/div/article/div/div/div/div[2]/div[2]/div[2]/div[2]/div[*]/div[2][div/div[1]/div/div/div/div/div/div[2]/div[1]/div/div/div/span]` /*xpath to locate entirety of Quote Tweeted Content*/
this.ageRestricted = `${this.path}/div/div/div/article//span/span[1]/span[contains(text(),'Age-restricted')]`; //xpath that reveals if tweet is age-restricted (& therefore not visible)
this.pinnedTweet = `${this.path}/div/div/div/article/div/div/div/div[1]/div/div/div/div/div[2]/div/div/div/span[contains(text(),'Pinned')]` /*//xpath that reveals if tweet is pinned*/
this.tweetText = `${this.path}//div[@data-testid='tweetText']`; //xpath that leads to div containing all tweet text
//this.emoji = this.path + "//img"; //xpath that leads to div containing all tweet text
this.tweetURL = `${this.path}//div[3]/a[contains(@href, 'status')]`; //xpath to tweet url
this.video = `${this.path}//div[1]//video`; //xpath that leads to video
this.singleImage = `${this.path}//div[1]/div/div/div/div/a/div/div[2]/div/img[@alt='Image']`; //xpath to image that reveals if a tweet has one image
this.multiImage = `${this.path}//div[2]/div[2]/div[2]/div[2]/div/div/div/div/div[2]/div/div[1]/div[1]//a/div/div/img[@alt='Image']`; //xpath to image that reveals if a tweet has more than one image
if (orig == 'home') { //home timeline only
this.detectThread = `${this.path}/div/div/div/article/div/a/div/div[2]/div/span`; //xpath to text label that reveals if a tweet is a part of a thread from home timeline
this.detectRT = `${this.path}/div/div/div/article/div/div/div/div[1]/div/div/div/div/div[2]/div/div/div/a/span`; //xpath to text label that reveals if a tweet is a retweet
} else if (orig == 'thread'){ //thread timeline only
this.entryTweet = `${this.path}/div/div/div/article/div/div/div/div[3]/div[5]/div/div[1]/div/a` /*xpath that reveals if tweet is open in thread //openThreadTweetTSXPath*/
}
//the following xpaths follow an individual tweet xpath and are used to find all images in a tweet with multiple images: (tweetXpath + '[1]' + multiImage1XPath + x + multiImage2XPath + y + multiImage3XPath)
// the following combinations of x,y variables point to the corresponding image
// 1,1 = first image
// 2,1 = second image
// 2,2 = third image
// 1,2 = fourth image
this.multiImage1 = "//div[2]/div[2]/div[2]/div[2]/div/div/div/div/div[2]/div/div[";
this.multiImage2 = "]/div[";
this.multiImage3 = "]//a/div/div/img[@alt='Image']";
}
tweetElement(i, pathFromTweet) {
let xPath = (this.path + pathFromTweet);
return xPath;
}
multiImages(x,y) {
let xPath = (this.path + this.multiImage1 + x + this.multiImage2 + y + this.multiImage3);
return xPath;
}
}
module.exports = { TweetPaths }

45
ref/functions/csv.js Normal file
View file

@ -0,0 +1,45 @@
const csvWriter = require('csv-write-stream');
const fs = require('fs');
const support = require('../functions/support.js');
const debuglog = support.debuglog;
async function initCSV(csvFN){
writer = csvWriter({sendHeaders: false});
writer.pipe(fs.createWriteStream(csvFN));
writer.write({
header1: 'URLs',
header2: 'IDs',
header3: 'Origin'
});
writer.end();
}
async function openCSV(csvFN){
await fs.readFile(csvFN, "utf-8", (err, data) => {
if (!err) {
return data;
}
});
return output;
}
async function appendToCSV(url,id,orig,csvFN,fc){
debuglog(`writing '${url}' to CSV!!`,2)
writer = csvWriter({sendHeaders: false});
writer.pipe(fs.createWriteStream(csvFN, {flags: 'a'}));
debuglog(`file contents: ${fc}`);
if (!fc.includes(url)){
writer.write({
header1: url,
header2: id,
header3: orig
});
}
writer.end();
}
module.exports = { initCSV,appendToCSV,openCSV };

37
ref/functions/elements.js Normal file
View file

@ -0,0 +1,37 @@
const webdriver = require('selenium-webdriver');
const By = webdriver.By;
const until = webdriver.until;
async function doesExist(drvr,path){
exists = drvr.findElement(By.xpath(path)).then(function() {
return true; // It existed
}, function(err) {
if (err instanceof webdriver.error.NoSuchElementError) {
return false; // It was not found
}
});
return exists;
}
async function waitFor(drvr,xpath,ms){
await drvr.wait(until.elementLocated(By.xpath(xpath)), ms);
}
async function getAttribute(drvr,xpath,attribute){
return drvr.findElement(By.xpath(xpath)).getAttribute(attribute);
}
async function getText(drvr,xpath){
return drvr.findElement(By.xpath(xpath)).getText();
}
async function getElement(drvr,xpath){
return drvr.findElement(By.xpath(xpath));
}
module.exports = { doesExist,waitFor,getAttribute,getText,getElement };

View file

@ -0,0 +1,41 @@
const fs = require('fs');
const client = require('https');
var { tall } = require('tall')
function downloadImage(url, filepath) {
return new Promise((resolve, reject) => {
client.get(url, (res) => {
if (res.statusCode === 200) {
res.pipe(fs.createWriteStream(filepath))
.on('error', reject)
.once('close', () => resolve(filepath));
} else {
res.resume();
reject(new Error(`Request Failed With a Status Code: ${res.statusCode}`));
}
});
});
}
async function expandUrl(shortUrl) {
try {
const unshortenedUrl = await tall(shortUrl);
return unshortenedUrl;
} catch (err) {
console.error('Error unshortening url: ', err)
return "";
}
}
function rand(min, max) {
return Math.floor(
Math.random() * (max - min + 1) + min
)
}
module.exports = { downloadImage,expandUrl,rand };

80
ref/functions/mastodon.js Normal file
View file

@ -0,0 +1,80 @@
const fs = require('fs');
const Masto = require('mastodon');
const support = require('../functions/support.js');
const csv = require('../functions/csv.js');
const debuglog = support.debuglog;
const funcs = require('../functions/functions.js');
const Args = require('../classes/arguments.js');
const args = new Args();
const Formats = require('../classes/formats.js');
const format = new Formats();
function setupMastodon(){
const config = fs.readFileSync("./config.txt").toString().split(/[\r\n]+/);
var M = new Masto({
access_token: config[0],
api_url: config[1]
})
return M;
}
async function postMedia(path){
id = 0;
if (args.enablePosts){
var M = setupMastodon();
await M.post('media', { file: fs.createReadStream(path) }).then(resp => {
id = resp.data.id;
}, function(err) {
if (err) {
debuglog(err,0);
return "err";
}
})
} else if (args.forceCSV) {
return funcs.rand(1,100);
}
return id;
}
async function postStatus(tweet,file,csvc){
var id = 0;
if (args.enablePosts){
var M = setupMastodon();
params = { status: tweet.text }
debuglog(`${tweet.no} is a reply to ${tweet.prompt}`);
if (tweet.hasImages) {//POST HAS IMAGES
debuglog("post has images!!",2)
debuglog(`images array: ${tweet.imgArray}`,2)
Object.assign(params, { media_ids: tweet.imgArray });
}
if (tweet.prompt != 0) {//POST IS A REPLY
debuglog("reply to: " + tweet.prompt,2)
Object.assign(params, { in_reply_to_id: tweet.prompt });
}
await M.post('statuses', params, (err, data) => {
if (err) {
debuglog(format.error(`Post to Mastodon failed with error: ${err}`), 1);
return "err";
} else {
//ADD TWEET TO CSV TO PREVENT FUTURE PROCESSING
csv.appendToCSV(tweet.url,data.id,tweet.orig,file,csvc);
debuglog(`posted to mastodon and got back id: ${data.id}`);
debuglog(format.bold(`Successfully posted ${tweet.url} to Mastodon!`),1);
id = data.id;
}
})
} else if (args.forceCSV) {
var fakeID = funcs.rand(1,100);
csv.appendToCSV(tweet.url,fakeID,(`forced ${tweet.orig}`),file,csvc);
id = fakeID;
}
return id;
}
module.exports = { postMedia,postStatus };

87
ref/functions/support.js Normal file
View file

@ -0,0 +1,87 @@
const fs = require('fs');
const Args = require('../classes/arguments.js');
const args = new Args();
const Formats = require('../classes/formats.js');
const format = new Formats();
function printHelp() { //PRINT USAGE TO CONSOLE
const usageText = fs.readFileSync('./usage.txt', 'utf-8');
const formattedUsage = usageText.replace(/{([^{}]+)}/g, format.bold('$1'))
.replace(/~([^~]+)~/g, format.underline('$1'))
.replace(/<([^<>]+)>/g, format.italic('$1'))
.replace(/(\r\n|\r|\n)/g, '\n░ ');
debuglog(formattedUsage,1);
}
function logArguments() {//PRINT ARGUMENTS TO CONSOLE
debuglog("Settings: ", 2);
debuglog(`-h help: ${args.help}`, 2);
debuglog(`-q quotes: ${args.enableQuotes}`, 2);
debuglog(`-t threads: ${args.enableThreads}`, 2);
debuglog(`-b displayBrowser: ${args.displayBrowser}`, 2);
debuglog(`-p enablePosts: ${args.enablePosts}`, 2);
debuglog(`-c forceCSV: ${args.forceCSV}`, 2);
debuglog(`-m printMeta: ${args.printMeta}`, 2);
debuglog(`-u userName: ${args.userName}`, 2);
debuglog(`-n tweetCount: ${args.tweetCount}`, 2);
debuglog(`-d debug: ${args.debug}`, 2);
debuglog(`-w timeout: ${args.timeOut}`, 2);
debuglog(`Scraping ${format.bold(args.tweetCount)} tweet(s) from ${format.bold(`@${args.userName}`)}...`, 1);
debuglog(`Browser is${args.displayBrowser ? "" : " not"} visible`, 1);
debuglog(`Tweets${args.enableQuotes ? ", Quote" : ""}${args.enableThreads ? ", Thread" : ""} Tweets will${args.enablePosts ? "" : " not"} be posted to Mastodon`, 1);
debuglog(`Tweet URLs will${args.forceCSV ? "" : " not"} be forcibly added to CSV file`, 1);
debuglog(`Name, handle, and URL will${args.printMeta ? "" : " not"} be added to the body text`, 1);
}
function debuglog(debugString,logLevel = 2) {//CUSTOM CONSOLE LOG THAT ALLOWS USER-SET DEBUG OUTPUT LEVELS
const prefixes = {
0: " ",
1: "░",
2: "█",
};
const prefix = prefixes[logLevel];
if (logLevel <= args.debug) {
console.log(`${prefix} ${debugString}`);
}
}
function validateArgs() {
if (args.help) {
printHelp();
process.exit(0);
}
const userNameRegex = /^@?(\w){1,15}$/g;
const usernameError = format.error("Uh-oh! It seems like the username doesn't work! Make sure you're entering the user's handle as it appears on-screen.");
const helpText = format.notice("For help: $node ./TwitToMast.js -h");
const tweetCountError = format.error(`Expected Integer greater than 0, got '${args.tweetCount}' instead`);
const debugError = format.error(`Expected 0-2, got '${args.debug}' instead`);
if (!userNameRegex.test(args.userName)) {
debuglog(usernameError, 0);
debuglog(helpText, 0);
process.exit(1);
}
if (args.tweetCount < 1) {
debuglog(tweetCountError, 0);
debuglog(helpText, 0);
process.exit(1);
}
if (args.debug < 0 || args.debug > 2) {
debuglog(debugError, 0);
debuglog(helpText, 0);
process.exit(1);
}
}
module.exports = { printHelp,logArguments,debuglog,validateArgs };

42
usage.txt Normal file
View file

@ -0,0 +1,42 @@
{Usage}
{node ./TwitToMast.js} [{-htqrpmbc}] [{-u} ~username~] [{-n} ~tweetcount~] [{-d} ~debuglevel~] [{-w} ~timeout~]
{node ./multi.js} [{-htqrpmbc}] [{-n} ~tweetcount~] [{-d} ~debuglevel~] [{-w} ~timeout~]
{Arguments}
{-h:} - show help screen (you made it here!)
{-u:} ~username~
- the twitter handle of the user whose account will be scraped
<- defaults to 'Twitter' (@twitter)>
{-n:} ~tweetcount~
- the number of enabled tweets that will be scraped from the targeted account
<- defaults to 5>
{-t:} - tweets that are part of threads will be included in the scan
{-q:} - quote tweets will be included in the scan
{-r:} - Link to quoted tweet will appear in the header, preceded by "re: "
- default behavior posts link at bottom of Mastodon post preceded by "Quoting "
{-p:} - enable/disable posting to Mastodon
{-m:} - include user's name, handle, and link to tweet
{-b:} - display browser (disable headless mode)
{-c:} - force URL to be logged to file if posts are disabled
{-d:} ~debuglevel~
- amount of information to print to console
<0: only errors>
<1: current task + tweet Text (default)>
<2: pretty much everything>
{-w:} ~timeout~
- length of time (in ms) to wait for page elements to load
<- defaults to 30000 (30 seconds)>
{config.txt}
{Line 1: API_KEY}
- Your Access Token obtained from Mastodon > Preferences > Development > Application
{Line 2: API_URL}
- https://~your mastodon server url~/api/v1/
{Examples}
{Scrape 10 most recent tweets, quote tweets, and thread tweets from @twitter account, and post to Mastodon}
$node ./TwitToMast.js -qtp -u twitter -n 10
{Scrape 10 most recent tweets, quote tweets, and thread tweets from accounts listed in usernameslist.txt, and post to Mastodon}
$node ./multi.js -qtp -n 10

View file

@ -9,4 +9,4 @@ TwitterBlue
TwitterDesign
TwitterEng
Policy
TwitterDev
TwitterDev