I am trying scrape some data from an website using Tumpermonkey Browser Addon & Javascript. The goal it’s to ‘export’ parts of info into a .CSV/.txt file and than download it and Import into google SpreadSheet to do my calcs on future.
My script is working when i run in a page with +/- 270 results/lines/rows.
Problem is when i go to the page with + than 2000 results/lines/row. For this page I add more 1 variable to pick a “rarity link” class. BUT when scraping, the script/console stucks/loops on value/cicle +/-1065 results/lines/rows. and keeps in loop on that value (1065) with no data and dont finish.
I think is some problem about Variable size limit?!?.
I am some days thinking & work about an workaround for this, like Replace the a full http link in a text Word if that link include specific word, but without success, i am newer in javascript maybe the “logic” is wrong on my script.
I have tried at different browsers (Firefox and Brave) with last stable version of Tampermonkey, I have increased to disk.cache size and enable disk.smart options to in ‘about:config’ browser menus.
Script Work:
1- Detect if contains X quantity of Pieces after page loaded
2- When reach all Pieces/End of page/Last Piece in the ‘for’ cicle. Will prepare the Variable (allDump) as a file and download it as .csv.
The Scrapping code and Download zone:
(function() {
'use strict';
// Move BOTH: 0-2500 || 1-270
let piecetabsel = 0; // 0 = zone of +2000 rows Status: PROBLEM! || 1 = Zone of >280 Rows Status:OK
let totalpieces = 2500; // 2500rows/line || 270rows/lines
let piecenamea = '';
let piecebonusa = '';
let piecepowera = '';
let themnrimage = '';
let pieceDumpFile = '';
let piecerarity = '';
var piecedumper = function(totalpieces){
var countpieces = totalpieces - 1;
var intra3 = setInterval(function(){
for (koko; countpieces >= koko; koko++) {
console.log("ID:" + koko);
// >280 Pieces zone
if (piecetabsel == 1){
piecenamea = document.getElementsByClassName('piece-name')[koko].innerHTML;
piecebonusa = document.getElementsByClassName('extra-value')[koko].innerHTML;
piecepowera = document.getElementsByClassName('the-power')[koko].innerHTML;
themnrimage = document.getElementsByClassName('photo-link')[koko].currentSrc;
if(pieceDumpFile === ""){ pieceDumpFile = 'Common;' + piecenamea + ';' + piecebonusa + ';' + piecepowera + ';' + themnrimage + 'n'; }
if(pieceDumpFile !== ""){ pieceDumpFile = pieceDumpFile + 'Common;' + piecenamea + ';' + piecebonusa + ';' + piecepowera + ';' + themnrimage + 'n'; }
}
// +2000 pieces zone - STUCK ZONE one 1065
if (piecetabsel == 0){
piecenamea = document.getElementsByClassName('collection-product-name')[koko].innerHTML;
piecebonusa = document.getElementsByClassName('collection-bonus-amount')[koko].innerHTML.replace(/[+ ]/g, '').replace(".", ",");
piecepowera = document.getElementsByClassName('power-value')[koko].innerHTML.replace(".", "");
piecerarity = document.getElementsByClassName('collection-product-level-img-size-2')[koko].currentSrc;
//
// same has 270pieces zone ... BUT. with my workaround to decrease var size. NO SUCCESS
if (piecerarity.includes('lv_2')) { pieceraritya = 'Uncommon'; }
else if (piecerarity.includes('lv_3')) { pieceraritya = 'Rare'; }
else if (piecerarity.includes('lv_4')) { pieceraritya = 'Epic'; }
else if (piecerarity.includes('lv_5')) { pieceraritya = 'Legendary'; }
else if (piecerarity.includes('lv_6')) { pieceraritya = 'mythic'; }
else if (piecerarity.includes('lv_7')) { pieceraritya = 'SpecialOnes'; }
else { pieceraritya = 'Common'; }
if(pieceDumpFile === ""){ pieceDumpFile = pieceraritya + ';' + piecenamea + ';' + piecebonusa + ';' + piecepowera + 'n'; }
if(pieceDumpFile !== ""){ pieceDumpFile = pieceDumpFile + pieceraritya + ';' + piecenamea + ';' + piecebonusa + ';' + piecepowera + 'n'; }
}
}
clearInterval(intra3);
var allDump='Rarity;Name;Power;Extra;Photon'+pieceDumpFile;
downloadZone(allDump);
}, 9000);
}
var downloadZone = function(allDump){
var data=allDump;
var a = document.createElement("a");
a.href = 'data:application/csv;charset=utf-8,' + encodeURIComponent(data);
a.download = 'MyInfoScrapper.csv';
console.log("Generate CSV File and Downloading");
a.click();
}
piecedumper(totalpieces);
})();
I am Expecting some Ideas/workarounds/solution for my situation, i am out of ideas.
Maybe, something to do some ‘File-Part1.csv.’ ‘File-Part2.csv’ ?!