I have the code down below which I use to fetch a dom with puppeteer. sometimes the DOM can be buggy or i need to fetch the DOM again for some other reason, I do a check and depending on that I create a recursive call which also creates another puppeteer page and connects to a proxy and does the same stuff.
On the recursion, (not the first time the function running) the proxies are bugging out. they dont work correctly and drop. I know it is about the line useProxy(page, 'http://' + prx); because if i dont use a proxy for recursive calls, the function seems to be running smooth.
var fs = require('fs')
const { Worker, isMainThread, parentPort, workerData } = require('worker_threads');
var { parse } = require('node-html-parser');
const puppeteer = require('puppeteer');
const useProxy = require('@lem0-packages/puppeteer-page-proxy');
//var useProxy = require('puppeteer-page-proxy')
//npm i @lem0-packages/puppeteer-page-proxy => https://github.com/Cuadrix/puppeteer-page-proxy/issues/57 change the line
const proxArray = fs.readFileSync('../ProxyModules/proxies.txt', { encoding: 'utf8', flag: 'r' }).split(/r?n/)
const proxArray2 = fs.readFileSync('../ProxyModules/proxies2.txt', { encoding: 'utf8', flag: 'r' }).split(/r?n/)
async function main(){
let browser;
browser = await puppeteer.launch({ headless:true,
// executablePath: '/home/node_modules/chromium/lib/chromium/chrome-linux/chrome',
defaultViewport: null,
args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-accelerated-2d-canvas', '--no-first-run', '--no-zygote', '--single-process', '--disable-gpu'] // `--window-size=1920,1080` // executablePath: chromium.path
});
var promisesArray = [];
var wholeARR = [];
for(let z = 50; z < 101; z++){
promisesArray.push(
new Promise(async (resolve, reject) => {
//looping the page.. 3 times until a request is found
async function fetchOnPage(times, z){
try{
let page = await browser.newPage();
await page.setViewport({width: 1920, height: 1200, deviceScaleFactor: 1 }); await page.setDefaultNavigationTimeout(0); page.on("pageerror", function(err) { reject(-1);}); // page.on("error", function (err) { console.log("Error: " + err.toString())});
let indexOfProxyArray = (z % proxArray.length) + times;
let prx = proxArray[indexOfProxyArray];
useProxy(page, 'http://' + prx); //if(times == 0) useProxy(page, 'http://' + prx); this just shows, on 2nd time if i dont use the proxy it work...
console.log(`starting ${times} time on page ${z} with prx : ${prx}`);
await page.goto("https://www.google.com/", {timeout: 0, waitUntil: 'domcontentloaded'});
const DOMAray = await page.evaluate(() => {});
if(times == 2 && DOMAray.length == 0 ){
reject(-1); return;
}else if(times < 2 && DOMAray.length == 0 ){
await page.close();
fetchOnPage(times + 1, z)
}else{
Array.prototype.push.apply(wholeARR, DOMAray);
resolve(1);
return;
}
}catch(error){console.log(error);}
}
fetchOnPage(1, z);
})
)
}
return Promise.all(promisesArray)
.then(async function() {
console.log('finsihed PROMSES');
}).catch((e) => {console.log(`error ${e}`); })
}
main();
the console output shows that all the firs time requests are made well, but for the 2nd time it is called(recursion) the logs doesnt even appear and the program just hangs. i made a small line //if(times == 0) useProxy(page, 'http://' + prx); which just shows that if i use this line th eprogram runs perfect. but otherwise the program just hangs on the first recursive call to the function.
maybe i shouldn’t be using @lem0-packages/puppeteer-page-proxy, because it is weird that from 50 to 101 (50 times looping) all the pages run perfectly, but just the time i start recursive call, many requests just hang definitely the puppeteer proxy issue. I tried to avoid using a proxy address that is already used on a second recursive call by adding a line if(times == 2) prx = proxArray2[indexOfProxyArray]; yet the console still hangs.. no error no show up anything… I just want to be able to use the puppeteer pages recursively with proxies.. how to fix this hang on console ( no error hshown > )