Update TwitToMast.js
update documentation change fromLoop to printHeader for more universal application set up individual folder for tweet images by different users
This commit is contained in:
parent
79f46b803a
commit
d9576322c1
1 changed files with 26 additions and 30 deletions
|
@ -15,12 +15,13 @@ const Q = require("q");
|
||||||
|
|
||||||
const args = process.argv;
|
const args = process.argv;
|
||||||
if (args[2] == "-h"){
|
if (args[2] == "-h"){
|
||||||
console.log("usage: $node ./TwitToMast.js [username] [tweet count] [debug level] [disable posts]");
|
console.log("usage: $node ./TwitToMast.js [username] [tweet count] [debug level] [disable posts] [print header]");
|
||||||
console.log(" username: (string) username of account to scrape - required");
|
console.log(" username: (string) -username of account to scrape - required");
|
||||||
console.log(" tweet count: (integer) number of tweets to scrape - required");
|
console.log(" tweet count: (integer) -number of tweets to scrape - required");
|
||||||
console.log(" debug level: (0-2) amount of information to print to console - defaults to 0");
|
console.log(" debug level: (0-2) -amount of information to print to console - 0 by default");
|
||||||
console.log(" disable posts: ('write','noWrite') enable/disable posting to Mastodon - defaults to enable");
|
console.log(" disable posts: ('write','noWrite') -enable/disable posting to Mastodon - disabled by default");
|
||||||
console.log(" ");
|
console.log(" print header: ('printHeader') -enable attaching a header with the user's name, twitter");
|
||||||
|
console.log(" handle, and link to tweet - disabled by default");
|
||||||
console.log(" config.txt:");
|
console.log(" config.txt:");
|
||||||
console.log(" API_KEY");
|
console.log(" API_KEY");
|
||||||
console.log(" API_URL");
|
console.log(" API_URL");
|
||||||
|
@ -87,11 +88,11 @@ if (typeof args[5] == 'undefined') {
|
||||||
} else if (args[5] == 'noWrite') {
|
} else if (args[5] == 'noWrite') {
|
||||||
disablePosts = true;
|
disablePosts = true;
|
||||||
}
|
}
|
||||||
var fromLoop = false;
|
var printHeader = false;
|
||||||
if (args[6] == 'fromLoop'){
|
if (args[6] == 'printHeader'){
|
||||||
fromLoop = true;
|
printHeader = true;
|
||||||
} else {
|
} else {
|
||||||
fromLoop = false;
|
printHeader = false;
|
||||||
}
|
}
|
||||||
debuglog(args,2);
|
debuglog(args,2);
|
||||||
debuglog("userName: " + userName,2);
|
debuglog("userName: " + userName,2);
|
||||||
|
@ -154,11 +155,16 @@ debuglog("API_URL: " + config[1],1);
|
||||||
debuglog("Enable Quote Tweets: " + modulesToEnable[0],1);
|
debuglog("Enable Quote Tweets: " + modulesToEnable[0],1);
|
||||||
debuglog("Enable Thread Tweets: " + modulesToEnable[1],1);
|
debuglog("Enable Thread Tweets: " + modulesToEnable[1],1);
|
||||||
debuglog("Disable posting to Mastodon: " + disablePosts,1);
|
debuglog("Disable posting to Mastodon: " + disablePosts,1);
|
||||||
debuglog("running from loop: " + fromLoop,1);
|
debuglog("running from loop: " + printHeader,1);
|
||||||
|
|
||||||
//SETUP REMAINDER OF VARIABLES
|
//SETUP REMAINDER OF VARIABLES
|
||||||
|
|
||||||
const csvFilename = "./URLList.csv";
|
const csvFilename = "./URLList.csv";
|
||||||
|
const localDir = './';
|
||||||
|
const imgSavePath = (localDir + userName + '/');
|
||||||
|
if (!fs.existsSync(imgSavePath)){
|
||||||
|
fs.mkdirSync(imgSavePath);
|
||||||
|
}
|
||||||
|
|
||||||
//XPATH CONSTANTS
|
//XPATH CONSTANTS
|
||||||
|
|
||||||
|
@ -312,7 +318,7 @@ driver.executeScript("document.body.style.zoom='35%'");
|
||||||
tweetText = ""
|
tweetText = ""
|
||||||
|
|
||||||
//IS TWEET PART OF MULTISCRAPER, IF SO ADD HEADER
|
//IS TWEET PART OF MULTISCRAPER, IF SO ADD HEADER
|
||||||
if (fromLoop) {
|
if (printHeader) {
|
||||||
tweeterHandleText = await driver.findElement(By.xpath(thisTweetXPath + tweeterHandle)).getText();
|
tweeterHandleText = await driver.findElement(By.xpath(thisTweetXPath + tweeterHandle)).getText();
|
||||||
tweeterNameText = await driver.findElement(By.xpath(thisTweetXPath + tweeterName)).getText();
|
tweeterNameText = await driver.findElement(By.xpath(thisTweetXPath + tweeterName)).getText();
|
||||||
tweetText = (tweeterNameText + " (" + tweeterHandleText + ")\r\n" + tweetURL + "\r\n\r\n")
|
tweetText = (tweeterNameText + " (" + tweeterHandleText + ")\r\n" + tweetURL + "\r\n\r\n")
|
||||||
|
@ -401,7 +407,7 @@ driver.executeScript("document.body.style.zoom='35%'");
|
||||||
debuglog("Tweet #" + i + " contains a single image.", 2)
|
debuglog("Tweet #" + i + " contains a single image.", 2)
|
||||||
imageCount = 1;
|
imageCount = 1;
|
||||||
imageURL = await driver.findElement(webdriver.By.xpath(thisTweetXPath + singleImageXPath)).getAttribute("src");
|
imageURL = await driver.findElement(webdriver.By.xpath(thisTweetXPath + singleImageXPath)).getAttribute("src");
|
||||||
await downloadImage(imageURL, './' + i + "." + 1 +'.jpg')
|
await downloadImage(imageURL, imgSavePath + i + "." + 1 +'.jpg')
|
||||||
.then(/*console.log*/)
|
.then(/*console.log*/)
|
||||||
.catch(console.error);
|
.catch(console.error);
|
||||||
debuglog("Downloaded " + imageCount + "image from tweet #" + i + ".", 2)
|
debuglog("Downloaded " + imageCount + "image from tweet #" + i + ".", 2)
|
||||||
|
@ -436,7 +442,7 @@ driver.executeScript("document.body.style.zoom='35%'");
|
||||||
debuglog(x + "," + y + " Exists!")
|
debuglog(x + "," + y + " Exists!")
|
||||||
iteratImgURL = await driver.findElement(webdriver.By.xpath(thisTweetXPath + multiImage1XPath + x + multiImage2XPath + y + multiImage3XPath)).getAttribute("src");
|
iteratImgURL = await driver.findElement(webdriver.By.xpath(thisTweetXPath + multiImage1XPath + x + multiImage2XPath + y + multiImage3XPath)).getAttribute("src");
|
||||||
imageCount++;
|
imageCount++;
|
||||||
await downloadImage(iteratImgURL, './' + i + "." + imageCount +'.jpg')
|
await downloadImage(iteratImgURL, imgSavePath + i + "." + imageCount +'.jpg')
|
||||||
.then(/*console.log*/)
|
.then(/*console.log*/)
|
||||||
.catch(console.error);
|
.catch(console.error);
|
||||||
}
|
}
|
||||||
|
@ -454,7 +460,7 @@ driver.executeScript("document.body.style.zoom='35%'");
|
||||||
debuglog("Uploading images to Mastodon...",1);
|
debuglog("Uploading images to Mastodon...",1);
|
||||||
var imageArray = [];
|
var imageArray = [];
|
||||||
for (var f = 1; f < (imageCount+1); f++) {
|
for (var f = 1; f < (imageCount+1); f++) {
|
||||||
await M.post('media', { file: fs.createReadStream('./' + i + '.' + f + '.jpg') }).then(resp => {
|
await M.post('media', { file: fs.createReadStream(imgSavePath + i + '.' + f + '.jpg') }).then(resp => {
|
||||||
imageArray.push(resp.data.id);
|
imageArray.push(resp.data.id);
|
||||||
}, function(err) {
|
}, function(err) {
|
||||||
if (err) {
|
if (err) {
|
||||||
|
@ -499,21 +505,6 @@ driver.executeScript("document.body.style.zoom='35%'");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//REMOVE SAVED IMAGE FILES
|
|
||||||
debuglog("Cleaning up...",1);
|
|
||||||
for (var j = 1; j < 5; j++) {
|
|
||||||
path = ("./" + i + "." + j + ".jpg");
|
|
||||||
try {
|
|
||||||
if (fs.existsSync(path)) {
|
|
||||||
fs.unlinkSync(path);
|
|
||||||
} else {
|
|
||||||
debuglog(path + " not found!",2);
|
|
||||||
}
|
|
||||||
} catch(err) {
|
|
||||||
console.error(err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
//CODE TO RUN IF TWEET IS IN CSV
|
//CODE TO RUN IF TWEET IS IN CSV
|
||||||
debuglog("Tweet #" + i + " has already been processed.",1);
|
debuglog("Tweet #" + i + " has already been processed.",1);
|
||||||
|
@ -522,6 +513,11 @@ driver.executeScript("document.body.style.zoom='35%'");
|
||||||
if (i < maxTweetScan) {driver.executeScript('var element = document.evaluate(`' + thisTweetXPath + '`,document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue.remove();');}
|
if (i < maxTweetScan) {driver.executeScript('var element = document.evaluate(`' + thisTweetXPath + '`,document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue.remove();');}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
//REMOVE SAVED IMAGE FILES
|
||||||
|
debuglog("Cleaning up...",1);
|
||||||
|
fs.rm(imgSavePath, { recursive: true, force: true }, (error) => {
|
||||||
|
//you can handle the error here
|
||||||
|
});
|
||||||
|
|
||||||
debuglog("Finished scraping " + userName + "'s tweets",1)
|
debuglog("Finished scraping " + userName + "'s tweets",1)
|
||||||
//EXIT WEBDRIVER
|
//EXIT WEBDRIVER
|
||||||
|
|
Reference in a new issue