Update TwitToMast.js

update documentation
change fromLoop to printHeader for more universal application
set up individual folder for tweet images by different users
This commit is contained in:
Penelope Gomez / Pogmommy 2023-02-07 20:26:52 -07:00
parent 79f46b803a
commit d9576322c1

View file

@ -15,12 +15,13 @@ const Q = require("q");
const args = process.argv; const args = process.argv;
if (args[2] == "-h"){ if (args[2] == "-h"){
console.log("usage: $node ./TwitToMast.js [username] [tweet count] [debug level] [disable posts]"); console.log("usage: $node ./TwitToMast.js [username] [tweet count] [debug level] [disable posts] [print header]");
console.log(" username: (string) username of account to scrape - required"); console.log(" username: (string) -username of account to scrape - required");
console.log(" tweet count: (integer) number of tweets to scrape - required"); console.log(" tweet count: (integer) -number of tweets to scrape - required");
console.log(" debug level: (0-2) amount of information to print to console - defaults to 0"); console.log(" debug level: (0-2) -amount of information to print to console - 0 by default");
console.log(" disable posts: ('write','noWrite') enable/disable posting to Mastodon - defaults to enable"); console.log(" disable posts: ('write','noWrite') -enable/disable posting to Mastodon - disabled by default");
console.log(" "); console.log(" print header: ('printHeader') -enable attaching a header with the user's name, twitter");
console.log(" handle, and link to tweet - disabled by default");
console.log(" config.txt:"); console.log(" config.txt:");
console.log(" API_KEY"); console.log(" API_KEY");
console.log(" API_URL"); console.log(" API_URL");
@ -87,11 +88,11 @@ if (typeof args[5] == 'undefined') {
} else if (args[5] == 'noWrite') { } else if (args[5] == 'noWrite') {
disablePosts = true; disablePosts = true;
} }
var fromLoop = false; var printHeader = false;
if (args[6] == 'fromLoop'){ if (args[6] == 'printHeader'){
fromLoop = true; printHeader = true;
} else { } else {
fromLoop = false; printHeader = false;
} }
debuglog(args,2); debuglog(args,2);
debuglog("userName: " + userName,2); debuglog("userName: " + userName,2);
@ -154,11 +155,16 @@ debuglog("API_URL: " + config[1],1);
debuglog("Enable Quote Tweets: " + modulesToEnable[0],1); debuglog("Enable Quote Tweets: " + modulesToEnable[0],1);
debuglog("Enable Thread Tweets: " + modulesToEnable[1],1); debuglog("Enable Thread Tweets: " + modulesToEnable[1],1);
debuglog("Disable posting to Mastodon: " + disablePosts,1); debuglog("Disable posting to Mastodon: " + disablePosts,1);
debuglog("running from loop: " + fromLoop,1); debuglog("running from loop: " + printHeader,1);
//SETUP REMAINDER OF VARIABLES //SETUP REMAINDER OF VARIABLES
const csvFilename = "./URLList.csv"; const csvFilename = "./URLList.csv";
const localDir = './';
const imgSavePath = (localDir + userName + '/');
if (!fs.existsSync(imgSavePath)){
fs.mkdirSync(imgSavePath);
}
//XPATH CONSTANTS //XPATH CONSTANTS
@ -312,7 +318,7 @@ driver.executeScript("document.body.style.zoom='35%'");
tweetText = "" tweetText = ""
//IS TWEET PART OF MULTISCRAPER, IF SO ADD HEADER //IS TWEET PART OF MULTISCRAPER, IF SO ADD HEADER
if (fromLoop) { if (printHeader) {
tweeterHandleText = await driver.findElement(By.xpath(thisTweetXPath + tweeterHandle)).getText(); tweeterHandleText = await driver.findElement(By.xpath(thisTweetXPath + tweeterHandle)).getText();
tweeterNameText = await driver.findElement(By.xpath(thisTweetXPath + tweeterName)).getText(); tweeterNameText = await driver.findElement(By.xpath(thisTweetXPath + tweeterName)).getText();
tweetText = (tweeterNameText + " (" + tweeterHandleText + ")\r\n" + tweetURL + "\r\n\r\n") tweetText = (tweeterNameText + " (" + tweeterHandleText + ")\r\n" + tweetURL + "\r\n\r\n")
@ -401,7 +407,7 @@ driver.executeScript("document.body.style.zoom='35%'");
debuglog("Tweet #" + i + " contains a single image.", 2) debuglog("Tweet #" + i + " contains a single image.", 2)
imageCount = 1; imageCount = 1;
imageURL = await driver.findElement(webdriver.By.xpath(thisTweetXPath + singleImageXPath)).getAttribute("src"); imageURL = await driver.findElement(webdriver.By.xpath(thisTweetXPath + singleImageXPath)).getAttribute("src");
await downloadImage(imageURL, './' + i + "." + 1 +'.jpg') await downloadImage(imageURL, imgSavePath + i + "." + 1 +'.jpg')
.then(/*console.log*/) .then(/*console.log*/)
.catch(console.error); .catch(console.error);
debuglog("Downloaded " + imageCount + "image from tweet #" + i + ".", 2) debuglog("Downloaded " + imageCount + "image from tweet #" + i + ".", 2)
@ -436,7 +442,7 @@ driver.executeScript("document.body.style.zoom='35%'");
debuglog(x + "," + y + " Exists!") debuglog(x + "," + y + " Exists!")
iteratImgURL = await driver.findElement(webdriver.By.xpath(thisTweetXPath + multiImage1XPath + x + multiImage2XPath + y + multiImage3XPath)).getAttribute("src"); iteratImgURL = await driver.findElement(webdriver.By.xpath(thisTweetXPath + multiImage1XPath + x + multiImage2XPath + y + multiImage3XPath)).getAttribute("src");
imageCount++; imageCount++;
await downloadImage(iteratImgURL, './' + i + "." + imageCount +'.jpg') await downloadImage(iteratImgURL, imgSavePath + i + "." + imageCount +'.jpg')
.then(/*console.log*/) .then(/*console.log*/)
.catch(console.error); .catch(console.error);
} }
@ -454,7 +460,7 @@ driver.executeScript("document.body.style.zoom='35%'");
debuglog("Uploading images to Mastodon...",1); debuglog("Uploading images to Mastodon...",1);
var imageArray = []; var imageArray = [];
for (var f = 1; f < (imageCount+1); f++) { for (var f = 1; f < (imageCount+1); f++) {
await M.post('media', { file: fs.createReadStream('./' + i + '.' + f + '.jpg') }).then(resp => { await M.post('media', { file: fs.createReadStream(imgSavePath + i + '.' + f + '.jpg') }).then(resp => {
imageArray.push(resp.data.id); imageArray.push(resp.data.id);
}, function(err) { }, function(err) {
if (err) { if (err) {
@ -498,21 +504,6 @@ driver.executeScript("document.body.style.zoom='35%'");
}) })
} }
} }
//REMOVE SAVED IMAGE FILES
debuglog("Cleaning up...",1);
for (var j = 1; j < 5; j++) {
path = ("./" + i + "." + j + ".jpg");
try {
if (fs.existsSync(path)) {
fs.unlinkSync(path);
} else {
debuglog(path + " not found!",2);
}
} catch(err) {
console.error(err)
}
}
} else { } else {
//CODE TO RUN IF TWEET IS IN CSV //CODE TO RUN IF TWEET IS IN CSV
@ -522,6 +513,11 @@ driver.executeScript("document.body.style.zoom='35%'");
if (i < maxTweetScan) {driver.executeScript('var element = document.evaluate(`' + thisTweetXPath + '`,document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue.remove();');} if (i < maxTweetScan) {driver.executeScript('var element = document.evaluate(`' + thisTweetXPath + '`,document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue.remove();');}
} }
//REMOVE SAVED IMAGE FILES
debuglog("Cleaning up...",1);
fs.rm(imgSavePath, { recursive: true, force: true }, (error) => {
//you can handle the error here
});
debuglog("Finished scraping " + userName + "'s tweets",1) debuglog("Finished scraping " + userName + "'s tweets",1)
//EXIT WEBDRIVER //EXIT WEBDRIVER