2.0.0
This commit is contained in:
parent
97cc140175
commit
ed9d5c68fd
16 changed files with 899 additions and 3626 deletions
646
TwitToMast.js
646
TwitToMast.js
|
@ -1,210 +1,41 @@
|
|||
//REQUIREMENTS
|
||||
|
||||
const webdriver = require('selenium-webdriver');
|
||||
const chrome = require('selenium-webdriver/chrome');
|
||||
const By = webdriver.By;
|
||||
const until = webdriver.until;
|
||||
const fs = require('fs');
|
||||
const csvWriter = require('csv-write-stream');
|
||||
const Masto = require('mastodon');
|
||||
const client = require('https');
|
||||
const request = require("request");
|
||||
const Q = require("q");
|
||||
|
||||
//VALIDATE INPUT
|
||||
//LOCAL REQUIREMENTS
|
||||
const support = require('./ref/functions/support.js');
|
||||
const debuglog = support.debuglog;
|
||||
const elements = require('./ref/functions/elements.js');
|
||||
const csv = require('./ref/functions/csv.js');
|
||||
const mastodon = require('./ref/functions/mastodon.js');
|
||||
|
||||
const args = process.argv;
|
||||
if (args[2] == "-h"){
|
||||
console.log("usage: $node ./TwitToMast.js [username] [tweet count] [debug level] [disable posts] [print header]");
|
||||
console.log(" username: (string) -username of account to scrape - required");
|
||||
console.log(" tweet count: (integer) -number of tweets to scrape - required");
|
||||
console.log(" debug level: (0-2) -amount of information to print to console - 0 by default");
|
||||
console.log(" disable posts: ('write','noWrite') -enable/disable posting to Mastodon - disabled by default");
|
||||
console.log(" print header: ('printHeader') -enable attaching a header with the user's name, twitter");
|
||||
console.log(" handle, and link to tweet - disabled by default");
|
||||
console.log(" config.txt:");
|
||||
console.log(" API_KEY");
|
||||
console.log(" API_URL");
|
||||
console.log(" ENABLE_QUOTE_TWEETS");
|
||||
console.log(" ENABLE_THREAD_TWEETS");
|
||||
console.log(" ");
|
||||
process.exit(0);
|
||||
}
|
||||
if (typeof args[2] == 'undefined') {
|
||||
console.log("Expected String with length greater than 1, got '" + args[2] + "' instead");
|
||||
console.log("for help: $TwitToMast.js -h");
|
||||
process.exit(1);
|
||||
} else if (args[2].length < 1) {
|
||||
console.log("Expected String with length greater than 1, got '" + args[2] + "' instead");
|
||||
console.log("for help: $TwitToMast.js -h");
|
||||
process.exit(1);
|
||||
}
|
||||
if (isNaN(parseInt(args[3]))){
|
||||
console.log("Expected Integer, got '" + args[3] + "' instead");
|
||||
console.log("for help: $TwitToMast.js -h");
|
||||
process.exit(1);
|
||||
}
|
||||
if (!((parseInt(args[4]) >= 0) && (parseInt(args[4]) <= 2)) && (typeof args[4] != 'undefined')){
|
||||
console.log("Expected [0-2], got '" + args[4] + "' instead");
|
||||
console.log("for help: $TwitToMast.js -h");
|
||||
process.exit(1);
|
||||
}
|
||||
if ((args[5] != 'noWrite' && args[5] != 'write') && typeof args[5] != 'undefined') {
|
||||
console.log("Expected 'noWrite', 'write', or undefined, got '" + args[5] + "' instead");
|
||||
console.log("for help: $TwitToMast.js -h");
|
||||
process.exit(1);
|
||||
}
|
||||
const Args = require('./ref/classes/arguments.js');
|
||||
const args = new Args();
|
||||
const Formats = require('./ref/classes/formats.js');
|
||||
const format = new Formats();
|
||||
const Tweets = require('./ref/classes/tweets.js');
|
||||
|
||||
//PROCESS CONFIG
|
||||
//LOG ARGUMENTS
|
||||
|
||||
const config = fs.readFileSync("./config.txt").toString().split(/[\r\n]+/);
|
||||
var M = new Masto({
|
||||
access_token: config[0],
|
||||
api_url: config[1]
|
||||
})
|
||||
var modulesToEnable = [false, false];
|
||||
for(var c = 2; c < 4; c++){
|
||||
if (config[c] = "true"){
|
||||
modulesToEnable[c-2] = true;
|
||||
} else if (config[c] = "false"){
|
||||
modulesToEnable[c-2] = false;
|
||||
} else {
|
||||
console.log("config.txt line " + (c+1) + ": Expected [true/false], got '" + config[c] + "' instead");
|
||||
console.log("for help: $TwitToMast.js -h");
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
support.validateArgs();
|
||||
support.logArguments();
|
||||
|
||||
//SETUP SAVE DIRECTORY VARIABLES
|
||||
|
||||
//PROCESS ARGUMENTS
|
||||
|
||||
const userName = args[2];
|
||||
const maxTweetScan = parseInt(args[3]);
|
||||
const debug = args[4];
|
||||
if (typeof args[4] == 'undefined') {debug = 0;}
|
||||
var disablePosts = false;
|
||||
if (typeof args[5] == 'undefined') {
|
||||
disablePosts = false;
|
||||
} else if (args[5] == 'noWrite') {
|
||||
disablePosts = true;
|
||||
}
|
||||
var printHeader = false;
|
||||
if (args[6] == 'printHeader'){
|
||||
printHeader = true;
|
||||
} else {
|
||||
printHeader = false;
|
||||
}
|
||||
debuglog(args,2);
|
||||
debuglog("userName: " + userName,2);
|
||||
debuglog("maxTweetScan: " + maxTweetScan,2);
|
||||
debuglog("debug: " + debug,2);
|
||||
debuglog("disable posts: " + disablePosts,2);
|
||||
|
||||
//FUNCTIONS
|
||||
|
||||
function downloadImage(url, filepath) {
|
||||
return new Promise((resolve, reject) => {
|
||||
client.get(url, (res) => {
|
||||
if (res.statusCode === 200) {
|
||||
res.pipe(fs.createWriteStream(filepath))
|
||||
.on('error', reject)
|
||||
.once('close', () => resolve(filepath));
|
||||
} else {
|
||||
res.resume();
|
||||
reject(new Error(`Request Failed With a Status Code: ${res.statusCode}`));
|
||||
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
function debuglog(debugString,logLevel) {
|
||||
prefix = "";
|
||||
switch (logLevel) {
|
||||
case 0:
|
||||
prefix = "";
|
||||
break;
|
||||
case 1:
|
||||
prefix = "-";
|
||||
break;
|
||||
case 2:
|
||||
prefix = "!";
|
||||
break;
|
||||
}
|
||||
if (logLevel <= debug) {console.log(prefix + " " + debugString);}
|
||||
}
|
||||
|
||||
function expandUrl(shortUrl) {
|
||||
var deferred = Q.defer();
|
||||
request( { method: "HEAD", url: shortUrl, followAllRedirects: true },
|
||||
function (error, response) {
|
||||
if (error) {
|
||||
deferred.reject(new Error(error));
|
||||
} else {
|
||||
deferred.resolve(response.request.href);
|
||||
}
|
||||
});
|
||||
return deferred.promise;
|
||||
}
|
||||
|
||||
debuglog("Setting up...",1);
|
||||
debuglog("userName: " + userName,1);
|
||||
debuglog("maxTweetScan: " + maxTweetScan,1);
|
||||
debuglog("debug: " + debug,1);
|
||||
debuglog("API_URL: " + config[1],1);
|
||||
debuglog("Enable Quote Tweets: " + modulesToEnable[0],1);
|
||||
debuglog("Enable Thread Tweets: " + modulesToEnable[1],1);
|
||||
debuglog("Disable posting to Mastodon: " + disablePosts,1);
|
||||
debuglog("running from loop: " + printHeader,1);
|
||||
|
||||
//SETUP REMAINDER OF VARIABLES
|
||||
|
||||
const csvFilename = "./URLList.csv";
|
||||
const localDir = './';
|
||||
const imgSavePath = (localDir + userName + '/');
|
||||
const imgSavePath = (`${localDir}imgs/${args.userName}/`);
|
||||
if (!fs.existsSync(imgSavePath)){
|
||||
fs.mkdirSync(imgSavePath);
|
||||
}
|
||||
|
||||
//XPATH CONSTANTS
|
||||
|
||||
const timeLineXPath = `//*[@id="react-root"]/div/div/div[2]/main/div/div/div/div/div/div[3]/div/div/section/div/div`; //the immediate parent div of all tweets
|
||||
|
||||
const tweetXPath = (timeLineXPath + `/div`); //the div containing individual tweet content: (tweetXpath + '[1]')
|
||||
|
||||
//the following xpaths follow an individual tweet xpath: (tweetXpath + '[1]' + variableXPath)
|
||||
|
||||
const urlCardXPath = `/div/div/div/article/div/div/div/div[*]/div[*]/div[*]/div[*]/div/div[2]/a`
|
||||
|
||||
const tweeterHandle = `/div/div/div/article/div/div/div/div[2]/div[2]/div[1]/div/div/div[1]/div/div/div[2]/div/div[1]/a/div/span[contains(text(),"@")]` //text label containing tweeter's handle
|
||||
|
||||
const tweeterName = `/div/div/div/article/div/div/div/div[2]/div[2]/div[1]/div/div/div[1]/div/div/div[1]/div/a/div/div[1]/span/span` //text label containing tweeter's name
|
||||
|
||||
const quoteTweetHandleXPath = `/div/div/div/article/div/div/div/div[2]/div[2]/div[2]/div[2]/div[*]/div[2]/div/div[1]/div/div/div/div/div/div[2]/div[1]/div/div/div/span`; //xpath to text label that reveals if a tweet is a quote tweet (leads to the quote tweeted user's handle)
|
||||
|
||||
const quoteTweetContentXPath= `/div/div/div/article/div/div/div/div[2]/div[2]/div[2]/div[2]/div[*]/div[2][div/div[1]/div/div/div/div/div/div[2]/div[1]/div/div/div/span]` //xpath to locate entirety of Quote Tweeted Content
|
||||
|
||||
const retweetIndicatorXPath = `/div/div/div/article/div/div/div/div[1]/div/div/div/div/div[2]/div/div/div/a/span`; //xpath to text label that reveals if a tweet is a retweet
|
||||
|
||||
const threadIndicatorXPath = `/div/div/div/article/div/a/div/div[2]/div/span`; //xpath to text label that reveals if a tweet is a part of a thread
|
||||
|
||||
const tweetTextXPath = `//div[@data-testid="tweetText"]`; //xpath that leads to div containing all tweet text
|
||||
|
||||
const tweetURLXPath = `//div[3]/a[contains(@href, 'status')]`; //xpath to tweet url
|
||||
|
||||
const singleImageXPath = `//div[2]/div/img[@alt="Image"]`; //xpath to image that reveals if a tweet has one image
|
||||
|
||||
const multiImageXPath = `//div[2]/div[2]/div[2]/div[2]/div/div/div/div/div[2]/div/div[1]/div[1]//a/div/div/img[@alt="Image"]`; //xpath to image that reveals if a tweet has more than one image
|
||||
|
||||
//the following xpaths follow and individual tweet xpath and are used to find all images in a tweet with multiple images: (tweetXpath + '[1]' + multiImage1XPath + x + multiImage2XPath + y + multiImage3XPath)
|
||||
// the following combinations of x,y variables point to the corresponding image
|
||||
// 1,1 = first image
|
||||
// 2,1 = second image
|
||||
// 2,2 = third image
|
||||
// 1,2 = fourth image
|
||||
const multiImage1XPath = `//div[2]/div[2]/div[2]/div[2]/div/div/div/div/div[2]/div/div[`;
|
||||
const multiImage2XPath = `]/div[`;
|
||||
const multiImage3XPath = `]//a/div/div/img[@alt="Image"]`;
|
||||
const csvSaveDir = (`${localDir}csv/`);
|
||||
const csvFileName = (`${csvSaveDir + args.userName}.csv`);
|
||||
if (!fs.existsSync(csvSaveDir)){
|
||||
fs.mkdirSync(csvSaveDir);
|
||||
}
|
||||
var csvOutput = "_";
|
||||
debuglog(`csv file name: ${csvFileName}`,2);
|
||||
debuglog(`user image save path${imgSavePath}`,2);
|
||||
|
||||
//SETUP HEADLESS WEBDRIVER
|
||||
|
||||
|
@ -212,314 +43,187 @@ const screen = {
|
|||
width: 1920,
|
||||
height: 1080
|
||||
};
|
||||
let chromeOptions = new chrome.Options().addArguments(['user-agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36']);
|
||||
if (!args.displayBrowser) {chromeOptions.headless().windowSize(screen);}
|
||||
var driver = new webdriver.Builder()
|
||||
.forBrowser('chrome')
|
||||
.setChromeOptions(new chrome.Options().headless().windowSize(screen).addArguments(['user-agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.50 Safari/537.36']))
|
||||
.setChromeOptions(chromeOptions)
|
||||
.build();
|
||||
|
||||
//START WEBDRIVER AND ZOOM OUT
|
||||
|
||||
driver.get('https://mobile.twitter.com/' + userName + '/');
|
||||
debuglog("starting webdriver...",2);
|
||||
driver.get(`https://mobile.twitter.com/${args.userName}/`);
|
||||
debuglog("started webdriver!",2);
|
||||
driver.executeScript("document.body.style.zoom='35%'");
|
||||
|
||||
(async function(){
|
||||
//WAIT UNTIL TIMELINE RENDERS
|
||||
await driver.wait(until.elementLocated(By.xpath(timeLineXPath + `[count(div) > 1]`)), 30000);
|
||||
|
||||
//OPEN CSV FILE, CREATE IF NEEDED
|
||||
|
||||
var csvOutput = " ";
|
||||
await fs.readFile(csvFilename, "utf-8", (err, data) => {
|
||||
if (err) {
|
||||
debuglog("Could not get CSV Data!",2)
|
||||
debuglog(err,1);
|
||||
writer = csvWriter({sendHeaders: false});
|
||||
writer.pipe(fs.createWriteStream(csvFilename));
|
||||
writer.write({
|
||||
header1: 'URLs'
|
||||
});
|
||||
writer.end();
|
||||
} else {
|
||||
csvOutput = data;
|
||||
}
|
||||
debuglog("opening csv",2);
|
||||
fs.readFile(csvFileName, "utf-8", (err, data) => {
|
||||
if (err) {
|
||||
debuglog("Could not get CSV Data!", 2);
|
||||
debuglog(err, 2);
|
||||
csv.initCSV(csvFileName);
|
||||
} else {
|
||||
debuglog(`CSV OUTPUT IS:\n${data}`, 2);
|
||||
csvOutput = data;
|
||||
}
|
||||
});
|
||||
|
||||
for (var i = 1; i < (maxTweetScan+1); i++) {
|
||||
//RUN THIS CODE FOR EVERY TWEET SCANNED
|
||||
debuglog("Processing tweet " + i + " of " + maxTweetScan + "...",1);
|
||||
//PER-TWEET VARIABLES
|
||||
var thisTweetXPath = tweetXPath + `[1]`;
|
||||
var keepTweet = false;
|
||||
var quotedContent = "";
|
||||
|
||||
debuglog("opened csv",2);
|
||||
var processedTweets = [];//DEFINE ARRAY THAT WILL BE POPULATED WITH TWEETS PROCESSED DURING THIS SESSION
|
||||
for (var t = 1; t < (parseInt(args.tweetCount) + 1); t++) {//LOOP THE NUMBER OF TIMES SPECIFIED IN ARGS
|
||||
|
||||
debuglog(format.notice(`Processing tweet #${t} of ${args.tweetCount}...`),1);
|
||||
var homeTweet = new Tweets("home",t); //RESET HOME TWEET FOR PROCESSING
|
||||
var threadTweet = new Tweets("thread",1); //RESET HOME TWEET FOR PROCESSING
|
||||
var threadTweetArray = []; //ARRAY OF THREAD TWEET OBJECTS
|
||||
|
||||
await elements.waitFor(driver,homeTweet.x.containsDivs,args.timeOut); //WAIT FOR TIMELINE TO POPULATE ITSELF WITH TWEETS
|
||||
|
||||
//REMOVE NON-PRIMARY TWEETS
|
||||
debuglog("Filtering out disabled tweets...",2)
|
||||
while (!keepTweet) {
|
||||
await driver.wait(until.elementLocated(By.xpath(thisTweetXPath)), 30000);
|
||||
|
||||
if (!modulesToEnable[0]) {
|
||||
//CHECK FOR QUOTE TWEETS
|
||||
isQT = await driver.findElement(webdriver.By.xpath(thisTweetXPath + quoteTweetContentXPath)).then(function() {
|
||||
return true; // It existed
|
||||
}, function(err) {
|
||||
if (err instanceof webdriver.error.NoSuchElementError) {
|
||||
return false; // It was not found
|
||||
} else {
|
||||
//webdriver.promise.rejected(err);
|
||||
}
|
||||
});
|
||||
}
|
||||
if (!modulesToEnable[1]) {
|
||||
//CHECK FOR THREAD TWEET
|
||||
isThread = await driver.findElement(webdriver.By.xpath(thisTweetXPath + threadIndicatorXPath)).then(function() {
|
||||
return true; // It existed
|
||||
}, function(err) {
|
||||
if (err instanceof webdriver.error.NoSuchElementError) {
|
||||
return false; // It was not found
|
||||
} else {
|
||||
//webdriver.promise.rejected(err);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
//CHECK FOR RETWEETS
|
||||
isRT = await driver.findElement(webdriver.By.xpath(thisTweetXPath + retweetIndicatorXPath)).then(function() {
|
||||
return true; // It existed
|
||||
}, function(err) {
|
||||
if (err instanceof webdriver.error.NoSuchElementError) {
|
||||
return false; // It was not found
|
||||
} else {
|
||||
//webdriver.promise.rejected(err);
|
||||
}
|
||||
});
|
||||
while (!homeTweet.keep) {
|
||||
debuglog(`xpath: ${homeTweet.x.path}`,2) //PRINT XPATH OF CURRENT TWEET
|
||||
await elements.waitFor(driver, homeTweet.x.path,args.timeOut); //WAIT UNTIL CURRENT TWEET IS LOADED
|
||||
|
||||
//IF TWEET IS DISABLED, MARK FOR REMOVAL
|
||||
if (isRT || ((!modulesToEnable[0] && isQT) || (!modulesToEnable[1] && isThread)) ) {
|
||||
//TWEET IS QT, RT, OR THREAD
|
||||
keepTweet = false;
|
||||
driver.executeScript('var element = document.evaluate(`' + thisTweetXPath + '`,document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue.remove();');
|
||||
await homeTweet.identifyElements(driver); //IDENTIFY WHAT ELEMENTS EXIST WITHIN TWEET
|
||||
|
||||
if ((((homeTweet.isRT || homeTweet.isAR) || homeTweet.isPin) || (!args.enableQuotes && homeTweet.isQT)) || (!args.enableThreads && homeTweet.isThread) ) {//IF TWEET IS DISABLED, MARK FOR REMOVAL
|
||||
debuglog("removing tweet",2);
|
||||
homeTweet.keep = false; //INDICATE THAT WE ARE NOT READY TO EXIT, CURRENT TWEET IS NOT ELIGIBLE FOR REPOST
|
||||
await driver.executeScript(`var element = document.evaluate(\`${homeTweet.x.path}\`,document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue.remove();`); //REMOVE TWEET FROM DOM TO PROCESS NEXT
|
||||
homeTweet = new Tweets("home",1); //RESET HOME TWEET OBJECT TO MAKE NEW TWEET READY FOR CHECKING
|
||||
} else {
|
||||
keepTweet = true;
|
||||
debuglog("keeping tweet! It is eligible for processing");
|
||||
homeTweet.keep = true; //INDICATE THAT WE ARE READY TO EXIT, CURRENT TWEET IS ELIGIBLE FOR REPOST
|
||||
}
|
||||
}
|
||||
|
||||
//GET TWEET URL
|
||||
await driver.wait(until.elementLocated(By.xpath(thisTweetXPath + tweetURLXPath)), 1000);
|
||||
mobileTweetURL = await driver.findElement(By.xpath(thisTweetXPath + tweetURLXPath)).getAttribute('href');
|
||||
tweetURL = await mobileTweetURL.replace('mobile.','');
|
||||
debuglog(tweetURL,2);
|
||||
processedTweets.forEach(function(u, uindex) { //CHECK IF TWEET HAS BEEN PROCESSED IN THIS SESSION
|
||||
debuglog(`${u.url} exists at index ${uindex} ${(u.url == homeTweet.url)}`);
|
||||
if (u.url == homeTweet.url) {homeTweet.processed = true;}
|
||||
})
|
||||
debuglog(`tweet has been proccessed: ${homeTweet.processed}`);
|
||||
|
||||
if (!csvOutput.includes(tweetURL)) {
|
||||
if (!homeTweet.processed && !csvOutput.includes(homeTweet.url)) { //IF CSV DOES NOT CONTAIN THE TWEET URL
|
||||
debuglog(`Tweet #${homeTweet.no} has not been processed.`, 1);
|
||||
|
||||
//SETUP TEXT FOR TWEET STATUS
|
||||
var tweetHasText = false;
|
||||
await driver.wait(until.elementLocated(By.xpath(timeLineXPath + tweetTextXPath)), 1000);
|
||||
tweetText = ""
|
||||
if (homeTweet.isThread){ //IF TWEET IS A THREAD, RUN TWEET THREAD STUFF
|
||||
var threadTweet = new Tweets("thread",1); //CREATE NEW THREAD TWEET OBJECT
|
||||
var threadTweetArray = []; //ARRAY OF THREAD TWEET OBJECTS
|
||||
debuglog(`THREAD TIMELINE: ${threadTweet.x.timeLine}`,2); //XPATH OF THREAD TIMELINE
|
||||
|
||||
//IS TWEET PART OF MULTISCRAPER, IF SO ADD HEADER
|
||||
if (printHeader) {
|
||||
tweeterHandleText = await driver.findElement(By.xpath(thisTweetXPath + tweeterHandle)).getText();
|
||||
tweeterNameText = await driver.findElement(By.xpath(thisTweetXPath + tweeterName)).getText();
|
||||
tweetText = (tweeterNameText + " (" + tweeterHandleText + ")\r\n" + tweetURL + "\r\n\r\n")
|
||||
}
|
||||
|
||||
//DOES TWEET HAVE TEXT
|
||||
tweetHasText = await driver.findElement(webdriver.By.xpath(thisTweetXPath + tweetTextXPath)).then(function() {
|
||||
return true; // It existed
|
||||
}, function(err) {
|
||||
if (err instanceof webdriver.error.NoSuchElementError) {
|
||||
return false; // It was not found
|
||||
} else {
|
||||
webdriver.promise.rejected(err);
|
||||
}
|
||||
});
|
||||
//IF SO, ADD BODY TEXT TO TWEET TEXT
|
||||
if (tweetHasText){
|
||||
tweetText = tweetText + await driver.findElement(By.xpath(thisTweetXPath + tweetTextXPath)).getText();
|
||||
}
|
||||
|
||||
//DOES TWEET HAVE A URL CARD
|
||||
tweetHasURL = await driver.findElement(webdriver.By.xpath(thisTweetXPath + urlCardXPath)).then(function() {
|
||||
return true; // It existed
|
||||
}, function(err) {
|
||||
if (err instanceof webdriver.error.NoSuchElementError) {
|
||||
return false; // It was not found
|
||||
} else {
|
||||
webdriver.promise.rejected(err);
|
||||
}
|
||||
});
|
||||
//IF SO, ADD URL TO TWEET TEXT
|
||||
if (tweetHasURL){
|
||||
tweetCardURL = await driver.findElement(By.xpath(thisTweetXPath + urlCardXPath)).getAttribute('href');
|
||||
await expandUrl(tweetCardURL)
|
||||
.then(function (longUrl) {
|
||||
debuglog("Long URL:" + longUrl,2);
|
||||
tweetText = tweetText + "\r\n\r\n" + longUrl;
|
||||
});
|
||||
}
|
||||
|
||||
//IS TWEET A QUOTE TWEET
|
||||
isQT = await driver.findElement(webdriver.By.xpath(thisTweetXPath + quoteTweetContentXPath)).then(function() {
|
||||
return true; // It existed
|
||||
}, function(err) {
|
||||
if (err instanceof webdriver.error.NoSuchElementError) {
|
||||
return false; // It was not found
|
||||
} else {
|
||||
//webdriver.promise.rejected(err);
|
||||
}
|
||||
});
|
||||
//IF SO, ADD QUOTE TWEET LINK TO TWEET TEXT
|
||||
if (isQT){
|
||||
await driver.sleep(1 * 1000)
|
||||
quotedContent = await driver.findElement(webdriver.By.xpath(thisTweetXPath + quoteTweetContentXPath));
|
||||
await driver.findElement(webdriver.By.xpath(thisTweetXPath + quoteTweetContentXPath)).sendKeys(webdriver.Key.CONTROL, webdriver.Key.ENTER);
|
||||
var parent = await driver.getWindowHandle();
|
||||
var windows = await driver.getAllWindowHandles();
|
||||
await driver.switchTo().window(windows[1]).then(() => {
|
||||
driver.getCurrentUrl().then(url => {
|
||||
debuglog('current url: "' + url + '"',2);
|
||||
tweetText = tweetText + "\r\n\r\n" + "Quote tweeting: " + url;
|
||||
});
|
||||
driver.switchTo().window(parent);
|
||||
});
|
||||
await driver.switchTo().window(windows[1]);
|
||||
await driver.close();
|
||||
await driver.switchTo().window(parent);
|
||||
}
|
||||
|
||||
debuglog(tweetText,1)
|
||||
|
||||
//CODE TO RUN IF TWEET IS NOT IN CSV
|
||||
debuglog("Tweet #" + i + " has not been processed.", 1);
|
||||
|
||||
//HANDLE SAVING SINGLE IMAGES
|
||||
var singleImageExisted = await driver.findElement(webdriver.By.xpath(thisTweetXPath + singleImageXPath)).then(function() {
|
||||
return true; // It existed
|
||||
}, function(err) {
|
||||
if (err instanceof webdriver.error.NoSuchElementError) {
|
||||
return false; // It was not found
|
||||
} else {
|
||||
webdriver.promise.rejected(err);
|
||||
}
|
||||
});
|
||||
if (singleImageExisted) {
|
||||
debuglog("Tweet #" + i + " contains a single image.", 2)
|
||||
imageCount = 1;
|
||||
imageURL = await driver.findElement(webdriver.By.xpath(thisTweetXPath + singleImageXPath)).getAttribute("src");
|
||||
await downloadImage(imageURL, imgSavePath + i + "." + 1 +'.jpg')
|
||||
.then(/*console.log*/)
|
||||
.catch(console.error);
|
||||
debuglog("Downloaded " + imageCount + "image from tweet #" + i + ".", 2)
|
||||
}
|
||||
|
||||
//HANDLE SAVING MULTTIPLE IMAGES
|
||||
var multiImageExisted = await driver.findElement(webdriver.By.xpath(thisTweetXPath + multiImageXPath)).then(function() {
|
||||
return true; // It existed
|
||||
}, function(err) {
|
||||
if (err instanceof webdriver.error.NoSuchElementError) {
|
||||
return false; // It was not found
|
||||
} else {
|
||||
webdriver.promise.rejected(err);
|
||||
}
|
||||
});
|
||||
if (multiImageExisted) {
|
||||
debuglog("Tweet #" + i + " contains multiple images.", 2)
|
||||
imageCount = 0;
|
||||
for (var x = 1; x < 3; x++) {
|
||||
for (var y = 1; y < 3; y++) {
|
||||
thisIteratExists = await driver.findElement(webdriver.By.xpath(thisTweetXPath + multiImage1XPath + x + multiImage2XPath + y + multiImage3XPath)).then(function() {
|
||||
return true; // It existed
|
||||
}, function(err) {
|
||||
if (err instanceof webdriver.error.NoSuchElementError) {
|
||||
return false; // It was not found
|
||||
} else {
|
||||
debuglog('I hope this doesnt break');
|
||||
//webdriver.promise.rejected(err);
|
||||
}
|
||||
});
|
||||
if (thisIteratExists) {
|
||||
debuglog(x + "," + y + " Exists!")
|
||||
iteratImgURL = await driver.findElement(webdriver.By.xpath(thisTweetXPath + multiImage1XPath + x + multiImage2XPath + y + multiImage3XPath)).getAttribute("src");
|
||||
imageCount++;
|
||||
await downloadImage(iteratImgURL, imgSavePath + i + "." + imageCount +'.jpg')
|
||||
.then(/*console.log*/)
|
||||
.catch(console.error);
|
||||
}
|
||||
}
|
||||
}
|
||||
debuglog("Downloaded " + imageCount + "images from tweet #" + i + ".", 2)
|
||||
}
|
||||
|
||||
//HANDLE POSTING TWEETS TO MASTODON
|
||||
if (!disablePosts){
|
||||
if (singleImageExisted || multiImageExisted) {var imageExisted = true} else {var imageExisted = false}
|
||||
if (imageExisted) {
|
||||
driver.executeScript(`window.open("${homeTweet.url}");`); //OPEN THREAD IN NEW TAB
|
||||
var parent = await driver.getWindowHandle();
|
||||
var windows = await driver.getAllWindowHandles();
|
||||
await driver.switchTo().window(windows[1]); //SWITCH TO NEW TAB WITH THREAD
|
||||
|
||||
//MAKE MASTODON POST WITH IMAGES
|
||||
debuglog("Uploading images to Mastodon...",1);
|
||||
var imageArray = [];
|
||||
for (var f = 1; f < (imageCount+1); f++) {
|
||||
await M.post('media', { file: fs.createReadStream(imgSavePath + i + '.' + f + '.jpg') }).then(resp => {
|
||||
imageArray.push(resp.data.id);
|
||||
}, function(err) {
|
||||
if (err) {
|
||||
debuglog(err,1);
|
||||
}
|
||||
})
|
||||
await elements.waitFor(driver,threadTweet.x.containsDivs,args.timeOut);
|
||||
await driver.executeScript("document.body.style.zoom='20%'");
|
||||
await driver.executeScript("window.scrollTo(0, 0)");
|
||||
//await driver.executeScript("window.scrollTo(0, -document.body.scrollHeight)");
|
||||
await driver.sleep(1*5000) //WAIT 5 SECONDS TO GIVE BROWSER TIME TO SET ITSELF UP
|
||||
|
||||
await elements.waitFor(driver,threadTweet.x.containsDivs,args.timeOut); //WAIT UNTIL THREAD IS POPULATED WITH DIVS
|
||||
|
||||
for (var r = 1; !threadTweet.entryIsOpen; r++) {//LOOP UNTIL INDICATED THAT WE'VE REACHED THE ENTRY TWEET
|
||||
threadTweet = new Tweets("thread", r); //RESETS ALL THREAD TWEET VARIABLES TO START FRESH
|
||||
|
||||
debuglog(threadTweet.x.path,2); //PRINTS XPATH TO CURRENT ITERATE DIV
|
||||
threadTweet.entryIsOpen = await elements.doesExist(driver,threadTweet.x.entryTweet) // CHECKS IF THE CURRENT ITERATE DIV IS THE ONE USED TO OPEN THE THREAD
|
||||
if (!threadTweet.entryIsOpen){ //CURRENT ITERATE DIV DOES NOT CONTAIN THE TWEET USED TO OPEN THE THREAD
|
||||
|
||||
await threadTweet.identifyElements(driver); //IDENTIFIES WHAT THE TWEET CONTAINS
|
||||
|
||||
debuglog(`current tweet #${threadTweet.no} is not entry to thread`,2);
|
||||
|
||||
debuglog(csvOutput);
|
||||
|
||||
if (processedTweets.some(e => e.url == processedTweets.url)) {
|
||||
debuglog("TWEET EXISTS IN PROCESSED ARRAY!!",2);
|
||||
}
|
||||
|
||||
if (!csvOutput.includes(threadTweet.url)) {//CODE TO RUN IF TWEET IS NOT IN CSV
|
||||
debuglog(`Thread tweet #${threadTweet.no} has not been processed.`, 1);
|
||||
|
||||
await threadTweet.getElementProperties(driver); //COMPILE HEADER, BODY, AND FOOTER
|
||||
|
||||
threadTweet.compileText();//COMPILE TEXT FOR CROSS-POST
|
||||
|
||||
threadTweet.printPreview()//PRINT TWEET PREVIEW
|
||||
|
||||
await threadTweet.downloadImages(driver,imgSavePath);
|
||||
|
||||
await threadTweet.uploadImages(imgSavePath);
|
||||
}
|
||||
threadTweetArray.push(threadTweet);
|
||||
processedTweets.push(threadTweet);
|
||||
}
|
||||
|
||||
}
|
||||
imageArray.length = 4
|
||||
debuglog("Publishing post to Mastodon...",1);
|
||||
await M.post('statuses', { status: tweetText, media_ids: imageArray }, (err, data) => {
|
||||
if (err) {
|
||||
debuglog("Post to Mastodon failed with error: " + err, 1);
|
||||
} else {
|
||||
//ADD TWEET TO CSV TO PREVENT FUTURE INDEXING
|
||||
debuglog("Posting tweet #" + i + " to Mastodon succeeded!", 1);
|
||||
writer = csvWriter({sendHeaders: false});
|
||||
writer.pipe(fs.createWriteStream(csvFilename, {flags: 'a'}));
|
||||
writer.write({
|
||||
header1: tweetURL
|
||||
});
|
||||
writer.end();
|
||||
}
|
||||
|
||||
var csvArray = csvOutput.split(/[\r\n]+/);
|
||||
for (var a = 0;a < threadTweetArray.length; a++) {//SET TWEET OBJECT ID TO SAVED ID IF IT EXISTS IN CSV
|
||||
debuglog(`CSV ARRAY: ${csvArray}`,2);
|
||||
csvArray.forEach(function(row, csvIndex) {
|
||||
debuglog(`csv row: ${row}`);
|
||||
rowArr = row.split(",");
|
||||
debuglog(`searching for '${threadTweetArray[a].url}' in '${row}'`,2)
|
||||
if (row.includes(threadTweetArray[a].url)){
|
||||
debuglog(`URL Exists at index ${csvIndex} of csv`,2);
|
||||
threadTweetArray[a].id = rowArr[1];
|
||||
threadTweetArray[a].posted = true;
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
threadTweetArray.forEach(twt =>{//LIST IDs THAT WERE DERIVED FROM CSV
|
||||
debuglog(`${twt.no} id: ${twt.id}`,2)
|
||||
})
|
||||
} else {
|
||||
//MAKE MASTODON POST WITHOUT IMAGES
|
||||
debuglog("Publishing post to Mastodon...",1);
|
||||
|
||||
await M.post('statuses', { status: tweetText}, (err, data) => {
|
||||
if (err) {
|
||||
debuglog("Post to Mastodon failed with error: " + err, 1);
|
||||
} else {
|
||||
//ADD TWEET TO CSV TO PREVENT FUTURE PROCESSING
|
||||
debuglog("Posting tweet #" + i + " to Mastodon succeeded!", 1);
|
||||
writer = csvWriter({sendHeaders: false});
|
||||
writer.pipe(fs.createWriteStream(csvFilename, {flags: 'a'}));
|
||||
writer.write({
|
||||
header1: tweetURL
|
||||
});
|
||||
writer.end();
|
||||
|
||||
for (var a = 0;a < threadTweetArray.length; a++) {//POST TO MASTODON REFERENCING ID OF PRIOR OBJECT AS PROMPT
|
||||
if (a != 0) {threadTweetArray[a].prompt = threadTweetArray[a - 1].id}
|
||||
if (!threadTweetArray[a].posted){
|
||||
debuglog(`posting tweet: ${threadTweetArray[a].no} to mastodon in reply to id: ${threadTweetArray[a].prompt}`, 2);
|
||||
threadTweetArray[a].id = await mastodon.postStatus(threadTweetArray[a],csvFileName,csvOutput)
|
||||
debuglog(`POSTED TO MASTODON AND GOT BACK AN ID OF: ${threadTweetArray[a].id}`,2)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
await driver.close();
|
||||
await driver.switchTo().window(parent);
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
//CODE TO RUN IF TWEET IS IN CSV
|
||||
debuglog("Tweet #" + i + " has already been processed.",1);
|
||||
}
|
||||
await homeTweet.getElementProperties(driver);
|
||||
|
||||
if (i < maxTweetScan) {driver.executeScript('var element = document.evaluate(`' + thisTweetXPath + '`,document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue.remove();');}
|
||||
homeTweet.compileText();//COMPILE TEXT FOR CROSS-POST
|
||||
|
||||
homeTweet.printPreview();//PRINT TWEET PREVIEW
|
||||
|
||||
await homeTweet.downloadImages(driver,imgSavePath);//DOWNLOAD IMAGES FROM TWITTER
|
||||
|
||||
await homeTweet.uploadImages(imgSavePath);//UPLOAD IMAGES TO MASTODON
|
||||
|
||||
if (threadTweetArray.length>0) {homeTweet.prompt = threadTweetArray[threadTweetArray.length-1].id;}
|
||||
debuglog(`Publishing post ${homeTweet.no} to Mastodon...`,2);
|
||||
homeTweet.id = await mastodon.postStatus(homeTweet,csvFileName,csvOutput);
|
||||
|
||||
processedTweets.push(homeTweet);
|
||||
} else { //HOME TWEET EXISTS IN CSV
|
||||
debuglog(`Tweet #${homeTweet.no} has already been processed.`,1); //HOME TWEET EXISTS IN CSV
|
||||
}
|
||||
|
||||
if (homeTweet.no < args.tweetCount) {driver.executeScript(`var element = document.evaluate(\`${homeTweet.x.path}\`,document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null ).singleNodeValue.remove();`);}//REMOVE TWEET FROM DOM TO PROCESS NEXT TWEET
|
||||
|
||||
}
|
||||
//REMOVE SAVED IMAGE FILES
|
||||
debuglog("Cleaning up...",1);
|
||||
fs.rm(imgSavePath, { recursive: true, force: true }, (error) => {
|
||||
//you can handle the error here
|
||||
});
|
||||
|
||||
debuglog("Finished scraping " + userName + "'s tweets",1)
|
||||
//EXIT WEBDRIVER
|
||||
driver.quit();
|
||||
debuglog("Cleaning up...",1); //REMOVE SAVED IMAGES
|
||||
fs.rm(imgSavePath, { recursive: true, force: true }, (error) => {
|
||||
debuglog(error,2);
|
||||
});
|
||||
debuglog(format.bold(`Finished scraping @${args.userName}'s tweets`),1) //CLOSE WEBDRIVER
|
||||
setTimeout(() => {
|
||||
driver.quit();
|
||||
}, 100);
|
||||
}());
|
|
@ -1 +0,0 @@
|
|||
URLs
|
|
36
multi.js
36
multi.js
|
@ -1,29 +1,39 @@
|
|||
//REQUIREMENTS
|
||||
const childProcess = require('child_process')
|
||||
const path = require('path');
|
||||
const support = require('./ref/functions/support.js');
|
||||
|
||||
//FUNCTIONS
|
||||
|
||||
async function fork(scriptPath, args = []) {
|
||||
return new Promise((resolve, reject) => {
|
||||
let process = childProcess.fork(scriptPath, args, {
|
||||
cwd: path.dirname(scriptPath)
|
||||
});
|
||||
|
||||
process.on('exit', code => resolve(code));
|
||||
process.on('error', err => reject(err));
|
||||
});
|
||||
}
|
||||
|
||||
//RUNTIME
|
||||
|
||||
(async function(){
|
||||
const args = process.argv;
|
||||
const args = [...process.argv];
|
||||
const defArgs = ["node","path","name","tweetCount","0","write","fromLoop"]
|
||||
for (var i = 0; i < 2; i++) {args.shift();}
|
||||
const config = require('fs').readFileSync("./usernameslist.txt").toString().split(/[\r\n]+/);
|
||||
for (var i = 0; i < 2; i++) {args.shift();} //REMOVES `node ./TwitToMaster` from args
|
||||
const config = require('fs').readFileSync("./usernameslist.txt").toString().split(/[\r\n]+/);//GET USERNAME LIST AS ARRAY
|
||||
|
||||
const customIndex = args.indexOf("-u");
|
||||
console.log(args);
|
||||
console.log(customIndex);
|
||||
args.splice(customIndex,2);
|
||||
console.log(args)
|
||||
|
||||
for (let name of config) {
|
||||
var pArgs = [...args];
|
||||
pArgs.splice(0, 0, name);
|
||||
for (var i = 3; i < 7; i++) {
|
||||
if (typeof pArgs[i-2] == 'undefined') {
|
||||
pArgs.push(defArgs[i]);
|
||||
}
|
||||
}
|
||||
console.log("pArgs: " + pArgs);
|
||||
await fork('./TwitToMast.js', pArgs);
|
||||
var fArgs = [...args];
|
||||
fArgs.push("-u");
|
||||
fArgs.push(name);
|
||||
console.log("args: " + fArgs);
|
||||
await fork('./TwitToMast.js', fArgs);
|
||||
}
|
||||
}());
|
||||
|
|
BIN
null
BIN
null
Binary file not shown.
Before Width: | Height: | Size: 168 KiB |
3140
package-lock.json
generated
3140
package-lock.json
generated
File diff suppressed because it is too large
Load diff
48
ref/classes/arguments.js
Normal file
48
ref/classes/arguments.js
Normal file
|
@ -0,0 +1,48 @@
|
|||
Array.prototype.findReg = function(match) {
|
||||
var reg = match;
|
||||
return this.filter(function(item){
|
||||
return typeof item == 'string' && item.match(reg);
|
||||
});
|
||||
}
|
||||
|
||||
class Args {
|
||||
constructor() {
|
||||
this.help = this.getFlag("h",); //show help screen
|
||||
this.displayBrowser = this.getFlag("b"); //show browser running (disable headless)
|
||||
this.enablePosts = this.getFlag("p"); //enable posting images or statuses to Mastodon
|
||||
this.forceCSV = this.getFlag("c"); //force logging tweets to CSV, even if not posted to Mastodon (by request or failure)
|
||||
this.printMeta = this.getFlag("m"); //include Display Name, handle, and URL in Mastodon post
|
||||
this.enableQuotes = this.getFlag("q"); //enable cross-posting quote tweets
|
||||
this.enableThreads = this.getFlag("t"); //enable cross-posting thread tweets
|
||||
this.reQuotes = this.getFlag("r"); //put links to quote tweets at top of mastodon posts
|
||||
|
||||
var userNamePreFormat = this.getArgument("-u","Twitter",false);
|
||||
this.userName = userNamePreFormat.replace('@','')
|
||||
this.tweetCount = this.getArgument("-n",5);
|
||||
this.debug = this.getArgument("-d",1);
|
||||
this.timeOut = this.getArgument("-w",30000);
|
||||
}
|
||||
|
||||
getFlag(char){
|
||||
let args = [...process.argv];
|
||||
var regex = new RegExp(`-\\S*[${char}]\\S*`, "g");
|
||||
return args.indexOf(args.findReg(regex)[0]) > -1 ? true : false;
|
||||
}
|
||||
|
||||
getArgument(flag, def, isInt = true) {
|
||||
const args = [...process.argv];
|
||||
const customIndex = args.indexOf(flag);
|
||||
const customValue = (customIndex > -1) ? args[customIndex + 1] : undefined;
|
||||
let flagValue = customValue || def;
|
||||
flagValue = isInt ? parseInt(flagValue) || def : flagValue;
|
||||
return flagValue;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
module.exports = Args
|
15
ref/classes/formats.js
Normal file
15
ref/classes/formats.js
Normal file
|
@ -0,0 +1,15 @@
|
|||
const colors = require('cli-color');
|
||||
|
||||
class Formats {
|
||||
constructor() {
|
||||
this.success = colors.green.bold;
|
||||
this.error = colors.red.bold;
|
||||
this.warn = colors.yellow;
|
||||
this.notice = colors.blue.bold;
|
||||
this.bold = colors.bold;
|
||||
this.underline = colors.underline;
|
||||
this.italic = colors.italic;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = Formats
|
245
ref/classes/tweets.js
Normal file
245
ref/classes/tweets.js
Normal file
|
@ -0,0 +1,245 @@
|
|||
const webdriver = require('selenium-webdriver');
|
||||
const By = webdriver.By;
|
||||
//const { format } = require('fast-csv');
|
||||
|
||||
const elements = require('../functions/elements.js'); //link support.js
|
||||
const XPathObjects = require('../classes/xpaths.js'); //link xpaths.js
|
||||
const Args = require('../classes/arguments.js');
|
||||
const args = new Args();
|
||||
const support = require('../functions/support.js');
|
||||
const debuglog = support.debuglog;
|
||||
const funcs = require('../functions/functions.js'); //link functions.js
|
||||
const mastodon = require('../functions/mastodon.js'); //link mastodon.js
|
||||
const Formats = require('../classes/formats.js');
|
||||
const format = new Formats();
|
||||
|
||||
//const homeX = new XPathObjects.TweetPaths("home"); //import xpath class object for home timeline
|
||||
//const threadX = new XPathObjects.TweetPaths("thread"); //import xpath class object for thread timeline
|
||||
|
||||
class Tweets {
|
||||
constructor(orig,i) {
|
||||
//parameters
|
||||
this.orig = orig;
|
||||
this.index = i-1;
|
||||
this.no = i;
|
||||
this.processed = false;
|
||||
|
||||
//detect to filter out
|
||||
this.isRT = false;
|
||||
this.isAR = false;
|
||||
this.isPin = false;
|
||||
this.keep = false;
|
||||
|
||||
//detect to move into thread
|
||||
this.isThread = false;
|
||||
|
||||
//processed text for posting
|
||||
this.text = "";
|
||||
|
||||
//header
|
||||
this.name = "";
|
||||
this.handle = "";
|
||||
this.url = "";
|
||||
this.header = "";
|
||||
|
||||
//body
|
||||
this.hasBody = false;
|
||||
this.body = "";
|
||||
this.hasLinks = false;
|
||||
this.links = "";
|
||||
|
||||
//footer
|
||||
this.hasVideo = false;
|
||||
this.isQT = false;
|
||||
this.quoteLink = "";
|
||||
this.footer = "";
|
||||
|
||||
//media
|
||||
this.hasSingleImage = false;
|
||||
this.hasMultiImage = false;
|
||||
this.hasImages = false;
|
||||
this.imgArray = [];
|
||||
this.imgCount = 0;
|
||||
this.imgUrl = "";
|
||||
this.iterateExists = false;
|
||||
this.iteratePath = "";
|
||||
|
||||
//mastodon
|
||||
this.id = 0;
|
||||
this.prompt = 0;
|
||||
this.posted = false;
|
||||
|
||||
if (orig == "thread") {
|
||||
this.threadLength = 0;
|
||||
this.entryIsOpen = false;
|
||||
}
|
||||
//xpaths of tweet & elements
|
||||
this.x = new XPathObjects.TweetPaths(orig,i);
|
||||
}
|
||||
|
||||
compileText(){
|
||||
const sectionArray = [this.header, this.body, this.footer];
|
||||
const nonEmptySections = sectionArray.filter(section => section !== '');
|
||||
this.text = nonEmptySections.join('\r\n\r\n');
|
||||
}
|
||||
|
||||
appendSection(txt, section) {
|
||||
switch (section) {
|
||||
case 'header':
|
||||
this.header += this.header ? `\r\n${txt}` : txt;
|
||||
break;
|
||||
case 'body':
|
||||
this.body += this.body ? `\r\n${txt}` : txt;
|
||||
break;
|
||||
case 'footer':
|
||||
this.footer += this.footer ? `\r\n${txt}` : txt;
|
||||
break;
|
||||
default:
|
||||
throw new Error(`Invalid section: ${section}`);
|
||||
}
|
||||
}
|
||||
|
||||
async identifyElements(driver){
|
||||
await elements.waitFor(driver,this.x.tweet,args.timeOut); //WAIT FOR TWEET URL OF CURRENT ITERATE TWEET
|
||||
|
||||
this.isAR = await elements.doesExist(driver,this.x.ageRestricted);//IS TWEET AGE-RESTRICTED?
|
||||
if (this.isAR){
|
||||
return;
|
||||
}
|
||||
|
||||
var mTweetURL = await elements.getAttribute(driver,this.x.tweetURL,'href') //GET URL OF TWEET
|
||||
this.url = await mTweetURL.replace('://mobile.','://'); //SAVE TWEET URL TO TWEET OBJECT WITHOUT MOBILE
|
||||
|
||||
this.hasBody = await elements.doesExist(driver,this.x.tweetText);//DOES TWEET HAVE BODY TEST?
|
||||
this.hasLinks = await elements.doesExist(driver,this.x.urlCard);//DOES TWEET HAVE URL CARDS
|
||||
this.hasVideo = await elements.doesExist(driver,this.x.video);//DOES TWEET HAVE VIDEO MEDIA?
|
||||
|
||||
this.isQT = await elements.doesExist(driver, this.x.quoteTweetContent);//IS TWEET A QUOTE TWEET
|
||||
this.isThread = await elements.doesExist(driver,this.x.detectThread);//IS TWEET A PART OF THREAD
|
||||
|
||||
this.isRT = await elements.doesExist(driver,this.x.detectRT);//CHECK FOR RETWEETS
|
||||
this.isPin = await elements.doesExist(driver,this.x.pinnedTweet);//IS TWEET PINNED
|
||||
|
||||
this.hasSingleImage = await elements.doesExist(driver, this.x.singleImage);//DOES TWEET HAVE SINGLE IMAGE?
|
||||
this.hasMultiImage = await elements.doesExist(driver,this.x.multiImage);//DOES TWEET HAVE MULTIPLE IMAGES?
|
||||
this.hasImages = this.hasSingleImage || this.hasMultiImage;//DOES TWEET HAVE ANY MEDIA?
|
||||
|
||||
}
|
||||
|
||||
async getElementProperties(driver){
|
||||
if (args.printMeta) { //IF TWEET HEADER IS ENABLED
|
||||
debuglog("running header stuff",2);
|
||||
this.handle = await elements.getText(driver,this.x.tweeterHandle);//GET TEXT OF TWEETER HANDLE (@)
|
||||
this.name = await elements.getText(driver,this.x.tweeterName);//GET TEXT OF TWEETER NAME (DISPLAY NAME)
|
||||
this.appendSection(`${this.name} (${this.handle})\r\n${this.url}`,'header');//COMBINE HEADER COMPONENTS WITH URL
|
||||
debuglog(`Tweet Header:\r\n${this.header}`);
|
||||
}
|
||||
|
||||
if (this.hasBody){//IF TWEET HAS BODY TEXT
|
||||
debuglog("running body text stuff",2);
|
||||
/*use this later to make emojis work? https://stackoverflow.com/questions/65328118/convert-img-with-alt-attribute-to-text-with-selenium-webdriver
|
||||
await driver.findElement(webdriver.By.xpath(this.x.timeLine)) // GETS NUMBER OF ELEMENTS IN THREAD, SHOULD NOT ITERATE MORE THAN THIS MANY TIMES. NOT USED ANYMORE
|
||||
.findElements(webdriver.By.xpath(this.x.emoji))
|
||||
.then(function(elements){
|
||||
debuglog("Found emoji!",2);
|
||||
//this.threadLength = elements.length;
|
||||
});*/
|
||||
const bodyText = await elements.getText(driver,this.x.tweetText);//SET TWEET BODY TO TEXT OF TWEET
|
||||
this.appendSection(bodyText,'body');
|
||||
debuglog(`Tweet Body:\r\n${this.body}`);
|
||||
}
|
||||
|
||||
if (this.hasLinks){//IF TWEET HAS URL CARD
|
||||
debuglog("running url card stuff",2);
|
||||
var tweetCardURL = await elements.getAttribute(driver,this.x.urlCard,"href");//GET URL OF URL CARD
|
||||
this.links = await funcs.expandUrl(tweetCardURL);
|
||||
this.appendSection(this.links,'body');
|
||||
debuglog(`Tweet link: ${this.links}`);
|
||||
}
|
||||
|
||||
if (this.isQT){ //IF THREAD IS A QUOTE TWEET, GET URL AND ADD TO EITHER HEADER OR FOOTER
|
||||
debuglog("running quote tweet stuff",2);
|
||||
await driver.findElement(By.xpath(this.x.quoteTweetContent)).sendKeys(webdriver.Key.CONTROL, webdriver.Key.ENTER);//OPEN QUOTE TWEET IN NEW TAB
|
||||
this.parent = await driver.getWindowHandle();
|
||||
var windows = await driver.getAllWindowHandles();
|
||||
await driver.switchTo().window(windows[windows.length-1]).then(() => { //SWITCH TO NEW TAB WITH QUOTED TWEET
|
||||
driver.getCurrentUrl().then(url => {
|
||||
this.quoteLink = url.replace('://mobile.','://'); //MAKE MOBILE TWEET NON-MOBILE
|
||||
const text = args.reQuotes //DETERMINE HOW TO FORMAT QUOTE LINK DEPENDING ON RELAVANT ARGUMENT
|
||||
? `Re: ${this.quoteLink}`
|
||||
: `« Quoting ${this.quoteLink} »`;
|
||||
args.reQuotes ? this.appendSection(text,'header') : this.appendSection(text,'footer'); //PLACE QUOTE LINK AT HEADER OR FOOTER OF TWEET
|
||||
});
|
||||
driver.switchTo().window(this.parent);//SWITCH BACK TO ORIGINAL TAB
|
||||
});
|
||||
await driver.switchTo().window(windows[windows.length-1]);//SWITCH TO NEW TAB AGAIN BECAUSE THAT'S THE ONLY WAY I COULD MAKE THIS PART WORK
|
||||
await driver.close();//CLOSE NEW TAB
|
||||
await driver.switchTo().window(this.parent);//SWITCH BACK TO ORIGINAL TAB... AGAIN
|
||||
debuglog(`Tweet Header: ${this.header}`)
|
||||
debuglog(`Tweet Footer: ${this.footer}`);
|
||||
}
|
||||
|
||||
if (this.hasVideo) {//IF TWEET HAS NON-POSTABLE MEDIA, APPEND FOOTER DETAILING SO
|
||||
debuglog("running video stuff",2);
|
||||
this.appendSection(`⚠ Original tweet had attachment(s) that couldn't be cross-posted. View it at ${homeTweet.url}`,'footer');
|
||||
debuglog(`Tweet Footer: ${this.footer}`);
|
||||
}
|
||||
}
|
||||
|
||||
async downloadImages(driver,imgSavePath) {
|
||||
if (this.hasSingleImage) {
|
||||
debuglog(`${this.orig} Tweet #${this.no} contains a single image.`, 2)
|
||||
this.imgCount = 1;
|
||||
this.imgUrl = await elements.getAttribute(driver,this.x.singleImage,"src")
|
||||
const jpgPath = `${imgSavePath}${this.orig == 'home' ? '' : 'r'}${this.no}.${this.imgCount}.jpg`
|
||||
await funcs.downloadImage(this.imgUrl, jpgPath)
|
||||
.then(debuglog)
|
||||
.catch(console.error);
|
||||
debuglog(`Downloaded ${this.imgCount} image from tweet #${this.no}.`, 2)
|
||||
} else if (this.hasMultiImage) {
|
||||
debuglog(`${this.orig} Tweet #${this.no} contains multiple images.`, 2)
|
||||
this.imgCount = 0;
|
||||
for (var x = 1; x < 3; x++) {
|
||||
for (var y = 1; y < 3; y++) {
|
||||
this.iterateExists = await elements.doesExist(driver,this.x.multiImages(x,y));
|
||||
if (this.iterateExists) {
|
||||
debuglog(`${x},${y} Exists!`);
|
||||
this.imgUrl = await elements.getAttribute(driver,this.x.multiImages(x,y),'src')
|
||||
debuglog(this.imgUrl,2);
|
||||
this.imgCount++
|
||||
const jpgPath = `${imgSavePath}${this.orig == 'home' ? '' : 'r'}${this.no}.${this.imgCount}.jpg`
|
||||
await funcs.downloadImage(this.imgUrl, jpgPath)
|
||||
.then(debuglog)
|
||||
.catch(console.error);
|
||||
}
|
||||
}
|
||||
}
|
||||
debuglog(`Downloaded ${this.imgCount} images from tweet #${this.no}.`,1)
|
||||
}
|
||||
}
|
||||
|
||||
async uploadImages(imgSavePath) {
|
||||
if (this.hasImages) {debuglog("Uploading images to Mastodon...",1);}
|
||||
for (var f = 1; f < (this.imgCount+1); f++) {
|
||||
var jpgPath = `${imgSavePath}${this.orig == 'home' ? '' : 'r'}${this.no}.${f}.jpg`
|
||||
debuglog(`uploading image to mastodon: ${jpgPath}`);
|
||||
var imgid = await mastodon.postMedia(jpgPath)
|
||||
debuglog(`mastodon image id: ${imgid}`);
|
||||
this.imgArray.push(imgid);
|
||||
}
|
||||
}
|
||||
|
||||
async printPreview(){
|
||||
const postPreviewMessage = `${format.success('Mastodon Post Preview:')}
|
||||
╔${'═'.repeat(process.stdout.columns-2)}╗
|
||||
${this.text}
|
||||
╚${'═'.repeat(process.stdout.columns-2)}╝`;
|
||||
debuglog(postPreviewMessage, 1);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = Tweets
|
||||
|
||||
|
||||
|
||||
|
60
ref/classes/xpaths.js
Normal file
60
ref/classes/xpaths.js
Normal file
|
@ -0,0 +1,60 @@
|
|||
class TweetPaths {
|
||||
constructor(orig,i) {
|
||||
if (orig == 'home') {
|
||||
this.timeLine = "//*[@id='react-root']/div/div/div[2]/main/div/div/div/div/div/div[3]/div/div/section/div/div"; //the immediate parent div of all tweets
|
||||
} else if (orig == 'thread') {
|
||||
this.timeLine = "/html/body/div[1]/div/div/div[2]/main/div/div/div/div[1]/div/section/div/div" //thread tweet xpath
|
||||
}
|
||||
this.tweet = (`${this.timeLine}/div`); //the div containing individual tweet content: (tweetXpath + '[1]')
|
||||
this.containsDivs = (`${this.timeLine}[count(div) > 1]`) //timeline conntaining divs
|
||||
this.path = `${this.tweet}[${orig == 'home' ? 1 : i}]`;
|
||||
|
||||
//the following xpaths follow an individual tweet xpath: (tweetXpath + '[1]' + variableXPath)
|
||||
this.urlCard = `${this.path}/div/div/div/article/div/div/div/div[*]/div[*]/div[*]/div[*]/div/div[2]/a`
|
||||
this.tweeterHandle = `${this.path}/div/div/div/article/div/div/div/div[2]/div[2]/div[1]/div/div/div[1]/div/div/div[2]/div/div[1]/a/div/span[contains(text(),'@')]` /*text label containing tweeter's handle*/
|
||||
this.tweeterName = `${this.path}/div/div/div/article/div/div/div/div[2]/div[2]/div[1]/div/div/div[1]/div/div/div[1]/div/a/div/div[1]/span` /*text label containing tweeter's name*/
|
||||
this.quoteTweetHandle = `${this.path}/div/div/div/article/div/div/div/div[2]/div[2]/div[2]/div[2]/div[*]/div[2]/div/div[1]/div/div/div/div/div/div[2]/div[1]/div/div/div/span`; //xpath to text label that reveals if a tweet is a quote tweet (leads to the quote tweeted user's handle)
|
||||
this.quoteTweetContent = `${this.path}/div/div/div/article/div/div/div/div[2]/div[2]/div[2]/div[2]/div[*]/div[2][div/div[1]/div/div/div/div/div/div[2]/div[1]/div/div/div/span]` /*xpath to locate entirety of Quote Tweeted Content*/
|
||||
|
||||
this.ageRestricted = `${this.path}/div/div/div/article//span/span[1]/span[contains(text(),'Age-restricted')]`; //xpath that reveals if tweet is age-restricted (& therefore not visible)
|
||||
this.pinnedTweet = `${this.path}/div/div/div/article/div/div/div/div[1]/div/div/div/div/div[2]/div/div/div/span[contains(text(),'Pinned')]` /*//xpath that reveals if tweet is pinned*/
|
||||
|
||||
this.tweetText = `${this.path}//div[@data-testid='tweetText']`; //xpath that leads to div containing all tweet text
|
||||
//this.emoji = this.path + "//img"; //xpath that leads to div containing all tweet text
|
||||
this.tweetURL = `${this.path}//div[3]/a[contains(@href, 'status')]`; //xpath to tweet url
|
||||
this.video = `${this.path}//div[1]//video`; //xpath that leads to video
|
||||
this.singleImage = `${this.path}//div[1]/div/div/div/div/a/div/div[2]/div/img[@alt='Image']`; //xpath to image that reveals if a tweet has one image
|
||||
this.multiImage = `${this.path}//div[2]/div[2]/div[2]/div[2]/div/div/div/div/div[2]/div/div[1]/div[1]//a/div/div/img[@alt='Image']`; //xpath to image that reveals if a tweet has more than one image
|
||||
|
||||
if (orig == 'home') { //home timeline only
|
||||
this.detectThread = `${this.path}/div/div/div/article/div/a/div/div[2]/div/span`; //xpath to text label that reveals if a tweet is a part of a thread from home timeline
|
||||
this.detectRT = `${this.path}/div/div/div/article/div/div/div/div[1]/div/div/div/div/div[2]/div/div/div/a/span`; //xpath to text label that reveals if a tweet is a retweet
|
||||
} else if (orig == 'thread'){ //thread timeline only
|
||||
this.entryTweet = `${this.path}/div/div/div/article/div/div/div/div[3]/div[5]/div/div[1]/div/a` /*xpath that reveals if tweet is open in thread //openThreadTweetTSXPath*/
|
||||
}
|
||||
|
||||
//the following xpaths follow an individual tweet xpath and are used to find all images in a tweet with multiple images: (tweetXpath + '[1]' + multiImage1XPath + x + multiImage2XPath + y + multiImage3XPath)
|
||||
// the following combinations of x,y variables point to the corresponding image
|
||||
// 1,1 = first image
|
||||
// 2,1 = second image
|
||||
// 2,2 = third image
|
||||
// 1,2 = fourth image
|
||||
this.multiImage1 = "//div[2]/div[2]/div[2]/div[2]/div/div/div/div/div[2]/div/div[";
|
||||
this.multiImage2 = "]/div[";
|
||||
this.multiImage3 = "]//a/div/div/img[@alt='Image']";
|
||||
}
|
||||
tweetElement(i, pathFromTweet) {
|
||||
let xPath = (this.path + pathFromTweet);
|
||||
return xPath;
|
||||
}
|
||||
multiImages(x,y) {
|
||||
let xPath = (this.path + this.multiImage1 + x + this.multiImage2 + y + this.multiImage3);
|
||||
return xPath;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { TweetPaths }
|
||||
|
||||
|
||||
|
||||
|
45
ref/functions/csv.js
Normal file
45
ref/functions/csv.js
Normal file
|
@ -0,0 +1,45 @@
|
|||
const csvWriter = require('csv-write-stream');
|
||||
const fs = require('fs');
|
||||
const support = require('../functions/support.js');
|
||||
const debuglog = support.debuglog;
|
||||
|
||||
async function initCSV(csvFN){
|
||||
writer = csvWriter({sendHeaders: false});
|
||||
writer.pipe(fs.createWriteStream(csvFN));
|
||||
writer.write({
|
||||
header1: 'URLs',
|
||||
header2: 'IDs',
|
||||
header3: 'Origin'
|
||||
});
|
||||
writer.end();
|
||||
}
|
||||
|
||||
async function openCSV(csvFN){
|
||||
await fs.readFile(csvFN, "utf-8", (err, data) => {
|
||||
if (!err) {
|
||||
return data;
|
||||
}
|
||||
});
|
||||
return output;
|
||||
}
|
||||
async function appendToCSV(url,id,orig,csvFN,fc){
|
||||
debuglog(`writing '${url}' to CSV!!`,2)
|
||||
writer = csvWriter({sendHeaders: false});
|
||||
writer.pipe(fs.createWriteStream(csvFN, {flags: 'a'}));
|
||||
debuglog(`file contents: ${fc}`);
|
||||
if (!fc.includes(url)){
|
||||
writer.write({
|
||||
header1: url,
|
||||
header2: id,
|
||||
header3: orig
|
||||
});
|
||||
}
|
||||
writer.end();
|
||||
}
|
||||
|
||||
module.exports = { initCSV,appendToCSV,openCSV };
|
||||
|
||||
|
||||
|
||||
|
||||
|
37
ref/functions/elements.js
Normal file
37
ref/functions/elements.js
Normal file
|
@ -0,0 +1,37 @@
|
|||
const webdriver = require('selenium-webdriver');
|
||||
const By = webdriver.By;
|
||||
const until = webdriver.until;
|
||||
|
||||
async function doesExist(drvr,path){
|
||||
exists = drvr.findElement(By.xpath(path)).then(function() {
|
||||
return true; // It existed
|
||||
}, function(err) {
|
||||
if (err instanceof webdriver.error.NoSuchElementError) {
|
||||
return false; // It was not found
|
||||
}
|
||||
});
|
||||
return exists;
|
||||
}
|
||||
|
||||
async function waitFor(drvr,xpath,ms){
|
||||
await drvr.wait(until.elementLocated(By.xpath(xpath)), ms);
|
||||
}
|
||||
|
||||
async function getAttribute(drvr,xpath,attribute){
|
||||
return drvr.findElement(By.xpath(xpath)).getAttribute(attribute);
|
||||
}
|
||||
|
||||
async function getText(drvr,xpath){
|
||||
return drvr.findElement(By.xpath(xpath)).getText();
|
||||
}
|
||||
|
||||
async function getElement(drvr,xpath){
|
||||
return drvr.findElement(By.xpath(xpath));
|
||||
}
|
||||
|
||||
module.exports = { doesExist,waitFor,getAttribute,getText,getElement };
|
||||
|
||||
|
||||
|
||||
|
||||
|
41
ref/functions/functions.js
Normal file
41
ref/functions/functions.js
Normal file
|
@ -0,0 +1,41 @@
|
|||
const fs = require('fs');
|
||||
const client = require('https');
|
||||
var { tall } = require('tall')
|
||||
|
||||
function downloadImage(url, filepath) {
|
||||
return new Promise((resolve, reject) => {
|
||||
client.get(url, (res) => {
|
||||
if (res.statusCode === 200) {
|
||||
res.pipe(fs.createWriteStream(filepath))
|
||||
.on('error', reject)
|
||||
.once('close', () => resolve(filepath));
|
||||
} else {
|
||||
res.resume();
|
||||
reject(new Error(`Request Failed With a Status Code: ${res.statusCode}`));
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function expandUrl(shortUrl) {
|
||||
try {
|
||||
const unshortenedUrl = await tall(shortUrl);
|
||||
return unshortenedUrl;
|
||||
} catch (err) {
|
||||
console.error('Error unshortening url: ', err)
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
function rand(min, max) {
|
||||
return Math.floor(
|
||||
Math.random() * (max - min + 1) + min
|
||||
)
|
||||
}
|
||||
|
||||
module.exports = { downloadImage,expandUrl,rand };
|
||||
|
||||
|
||||
|
||||
|
||||
|
80
ref/functions/mastodon.js
Normal file
80
ref/functions/mastodon.js
Normal file
|
@ -0,0 +1,80 @@
|
|||
const fs = require('fs');
|
||||
const Masto = require('mastodon');
|
||||
|
||||
const support = require('../functions/support.js');
|
||||
const csv = require('../functions/csv.js');
|
||||
const debuglog = support.debuglog;
|
||||
const funcs = require('../functions/functions.js');
|
||||
const Args = require('../classes/arguments.js');
|
||||
const args = new Args();
|
||||
const Formats = require('../classes/formats.js');
|
||||
const format = new Formats();
|
||||
|
||||
function setupMastodon(){
|
||||
const config = fs.readFileSync("./config.txt").toString().split(/[\r\n]+/);
|
||||
var M = new Masto({
|
||||
access_token: config[0],
|
||||
api_url: config[1]
|
||||
})
|
||||
return M;
|
||||
}
|
||||
|
||||
async function postMedia(path){
|
||||
id = 0;
|
||||
if (args.enablePosts){
|
||||
var M = setupMastodon();
|
||||
await M.post('media', { file: fs.createReadStream(path) }).then(resp => {
|
||||
id = resp.data.id;
|
||||
}, function(err) {
|
||||
if (err) {
|
||||
debuglog(err,0);
|
||||
return "err";
|
||||
}
|
||||
})
|
||||
} else if (args.forceCSV) {
|
||||
return funcs.rand(1,100);
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
async function postStatus(tweet,file,csvc){
|
||||
var id = 0;
|
||||
if (args.enablePosts){
|
||||
var M = setupMastodon();
|
||||
params = { status: tweet.text }
|
||||
debuglog(`${tweet.no} is a reply to ${tweet.prompt}`);
|
||||
if (tweet.hasImages) {//POST HAS IMAGES
|
||||
debuglog("post has images!!",2)
|
||||
debuglog(`images array: ${tweet.imgArray}`,2)
|
||||
Object.assign(params, { media_ids: tweet.imgArray });
|
||||
}
|
||||
if (tweet.prompt != 0) {//POST IS A REPLY
|
||||
debuglog("reply to: " + tweet.prompt,2)
|
||||
Object.assign(params, { in_reply_to_id: tweet.prompt });
|
||||
}
|
||||
await M.post('statuses', params, (err, data) => {
|
||||
if (err) {
|
||||
debuglog(format.error(`Post to Mastodon failed with error: ${err}`), 1);
|
||||
return "err";
|
||||
} else {
|
||||
//ADD TWEET TO CSV TO PREVENT FUTURE PROCESSING
|
||||
csv.appendToCSV(tweet.url,data.id,tweet.orig,file,csvc);
|
||||
debuglog(`posted to mastodon and got back id: ${data.id}`);
|
||||
debuglog(format.bold(`Successfully posted ${tweet.url} to Mastodon!`),1);
|
||||
id = data.id;
|
||||
}
|
||||
})
|
||||
} else if (args.forceCSV) {
|
||||
var fakeID = funcs.rand(1,100);
|
||||
csv.appendToCSV(tweet.url,fakeID,(`forced ${tweet.orig}`),file,csvc);
|
||||
id = fakeID;
|
||||
}
|
||||
return id;
|
||||
}
|
||||
|
||||
module.exports = { postMedia,postStatus };
|
||||
|
||||
|
||||
|
||||
|
||||
|
87
ref/functions/support.js
Normal file
87
ref/functions/support.js
Normal file
|
@ -0,0 +1,87 @@
|
|||
const fs = require('fs');
|
||||
const Args = require('../classes/arguments.js');
|
||||
const args = new Args();
|
||||
|
||||
const Formats = require('../classes/formats.js');
|
||||
const format = new Formats();
|
||||
|
||||
function printHelp() { //PRINT USAGE TO CONSOLE
|
||||
const usageText = fs.readFileSync('./usage.txt', 'utf-8');
|
||||
const formattedUsage = usageText.replace(/{([^{}]+)}/g, format.bold('$1'))
|
||||
.replace(/~([^~]+)~/g, format.underline('$1'))
|
||||
.replace(/<([^<>]+)>/g, format.italic('$1'))
|
||||
.replace(/(\r\n|\r|\n)/g, '\n░ ');
|
||||
debuglog(formattedUsage,1);
|
||||
}
|
||||
|
||||
function logArguments() {//PRINT ARGUMENTS TO CONSOLE
|
||||
debuglog("Settings: ", 2);
|
||||
debuglog(`-h help: ${args.help}`, 2);
|
||||
debuglog(`-q quotes: ${args.enableQuotes}`, 2);
|
||||
debuglog(`-t threads: ${args.enableThreads}`, 2);
|
||||
debuglog(`-b displayBrowser: ${args.displayBrowser}`, 2);
|
||||
debuglog(`-p enablePosts: ${args.enablePosts}`, 2);
|
||||
debuglog(`-c forceCSV: ${args.forceCSV}`, 2);
|
||||
debuglog(`-m printMeta: ${args.printMeta}`, 2);
|
||||
debuglog(`-u userName: ${args.userName}`, 2);
|
||||
debuglog(`-n tweetCount: ${args.tweetCount}`, 2);
|
||||
debuglog(`-d debug: ${args.debug}`, 2);
|
||||
debuglog(`-w timeout: ${args.timeOut}`, 2);
|
||||
|
||||
debuglog(`Scraping ${format.bold(args.tweetCount)} tweet(s) from ${format.bold(`@${args.userName}`)}...`, 1);
|
||||
debuglog(`Browser is${args.displayBrowser ? "" : " not"} visible`, 1);
|
||||
debuglog(`Tweets${args.enableQuotes ? ", Quote" : ""}${args.enableThreads ? ", Thread" : ""} Tweets will${args.enablePosts ? "" : " not"} be posted to Mastodon`, 1);
|
||||
debuglog(`Tweet URLs will${args.forceCSV ? "" : " not"} be forcibly added to CSV file`, 1);
|
||||
debuglog(`Name, handle, and URL will${args.printMeta ? "" : " not"} be added to the body text`, 1);
|
||||
}
|
||||
|
||||
|
||||
function debuglog(debugString,logLevel = 2) {//CUSTOM CONSOLE LOG THAT ALLOWS USER-SET DEBUG OUTPUT LEVELS
|
||||
const prefixes = {
|
||||
0: " ",
|
||||
1: "░",
|
||||
2: "█",
|
||||
};
|
||||
const prefix = prefixes[logLevel];
|
||||
if (logLevel <= args.debug) {
|
||||
console.log(`${prefix} ${debugString}`);
|
||||
}
|
||||
}
|
||||
|
||||
function validateArgs() {
|
||||
if (args.help) {
|
||||
printHelp();
|
||||
process.exit(0);
|
||||
}
|
||||
|
||||
const userNameRegex = /^@?(\w){1,15}$/g;
|
||||
const usernameError = format.error("Uh-oh! It seems like the username doesn't work! Make sure you're entering the user's handle as it appears on-screen.");
|
||||
const helpText = format.notice("For help: $node ./TwitToMast.js -h");
|
||||
const tweetCountError = format.error(`Expected Integer greater than 0, got '${args.tweetCount}' instead`);
|
||||
const debugError = format.error(`Expected 0-2, got '${args.debug}' instead`);
|
||||
|
||||
if (!userNameRegex.test(args.userName)) {
|
||||
debuglog(usernameError, 0);
|
||||
debuglog(helpText, 0);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (args.tweetCount < 1) {
|
||||
debuglog(tweetCountError, 0);
|
||||
debuglog(helpText, 0);
|
||||
process.exit(1);
|
||||
}
|
||||
|
||||
if (args.debug < 0 || args.debug > 2) {
|
||||
debuglog(debugError, 0);
|
||||
debuglog(helpText, 0);
|
||||
process.exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { printHelp,logArguments,debuglog,validateArgs };
|
||||
|
||||
|
||||
|
||||
|
||||
|
42
usage.txt
Normal file
42
usage.txt
Normal file
|
@ -0,0 +1,42 @@
|
|||
|
||||
{Usage}
|
||||
{node ./TwitToMast.js} [{-htqrpmbc}] [{-u} ~username~] [{-n} ~tweetcount~] [{-d} ~debuglevel~] [{-w} ~timeout~]
|
||||
{node ./multi.js} [{-htqrpmbc}] [{-n} ~tweetcount~] [{-d} ~debuglevel~] [{-w} ~timeout~]
|
||||
|
||||
{Arguments}
|
||||
{-h:} - show help screen (you made it here!)
|
||||
{-u:} ~username~
|
||||
- the twitter handle of the user whose account will be scraped
|
||||
<- defaults to 'Twitter' (@twitter)>
|
||||
{-n:} ~tweetcount~
|
||||
- the number of enabled tweets that will be scraped from the targeted account
|
||||
<- defaults to 5>
|
||||
{-t:} - tweets that are part of threads will be included in the scan
|
||||
{-q:} - quote tweets will be included in the scan
|
||||
{-r:} - Link to quoted tweet will appear in the header, preceded by "re: "
|
||||
- default behavior posts link at bottom of Mastodon post preceded by "Quoting "
|
||||
{-p:} - enable/disable posting to Mastodon
|
||||
{-m:} - include user's name, handle, and link to tweet
|
||||
{-b:} - display browser (disable headless mode)
|
||||
{-c:} - force URL to be logged to file if posts are disabled
|
||||
{-d:} ~debuglevel~
|
||||
- amount of information to print to console
|
||||
<0: only errors>
|
||||
<1: current task + tweet Text (default)>
|
||||
<2: pretty much everything>
|
||||
{-w:} ~timeout~
|
||||
- length of time (in ms) to wait for page elements to load
|
||||
<- defaults to 30000 (30 seconds)>
|
||||
|
||||
{config.txt}
|
||||
{Line 1: API_KEY}
|
||||
- Your Access Token obtained from Mastodon > Preferences > Development > Application
|
||||
{Line 2: API_URL}
|
||||
- https://~your mastodon server url~/api/v1/
|
||||
|
||||
{Examples}
|
||||
{Scrape 10 most recent tweets, quote tweets, and thread tweets from @twitter account, and post to Mastodon}
|
||||
$node ./TwitToMast.js -qtp -u twitter -n 10
|
||||
{Scrape 10 most recent tweets, quote tweets, and thread tweets from accounts listed in usernameslist.txt, and post to Mastodon}
|
||||
$node ./multi.js -qtp -n 10
|
||||
|
|
@ -9,4 +9,4 @@ TwitterBlue
|
|||
TwitterDesign
|
||||
TwitterEng
|
||||
Policy
|
||||
TwitterDev
|
||||
TwitterDev
|
Reference in a new issue