I’m learning about scraping and chrome extensions at the moment. So I want to build one that scrape s the UberEats websites. It grabs the links to restaurants menu’s and accesses their html file to scrape the name of restaurants.
This is just proof of concept to know if I can even scrape the html in a url in the first place. My main goal is to do the scraping when the user searches of a dish on UberEats website where it loads a bunch of restaurant, and my extensions scrapes the html of the url of those restaurants to find matching dishes.
I don’t want to use puppeteer or selenium since they require a separate backend, where my extension need to access using an api. I don’t have a server nor do I want to host one. So I want to do all of this on the extension.
So far, I got the part where it extract the URLs, and now I need to access those URLs. I learned about background scripts, and fetch(). I tried using fetch to get a response from the URLs I scraped and then return the html to be scraped later on.
But it didn’t work and my catch error keeps getting returning these for every link I try to scape. It doesn’t show up on the console but on the chrome://extensions.
Error scraping data for URL: https://www.ubereats.com/ca/store/woking-dragon-chop-suey-house-langley/zzIphhS4XhaTOx7uDFoHsQ ReferenceError: error is not defined
Context
background.js
Stack Trace
background.js:55 (anonymous function)
}
} catch (error) {
55 console.error('Error scraping data for URL:', fullURL, error);
}
}
return storeNames;
}
If there is better approach then what i’ve done, please let me know thank you.
The rest of my code:
Manifest.json
{
"manifest_version": 3,
"name": "Food Delivery Scraper",
"description": "Scrape & Summarize Food Delivery Data",
"version": "1.0",
"background": {
"service_worker": "background.js"
},
"action": {
"default_popup": "popup.html"
},
"permissions": [
"activeTab",
"scripting"
],
"host_permissions": [
"https://www.ubereats.com/*"
],
"content_scripts": [
{
"matches": ["https://www.ubereats.com/*"],
"js": ["popup.js"],
"run_at": "document_idle"
}
]
}
background.js
function parseLink(URL) {
const baseURL = "https://www.ubereats.com";
return baseURL + URL;
}
async function fetchHTML(fullURL) {
fetch(fullURL)
.then(response => {
// Checking if the response is successful
if (!response.ok) {
throw new Error('Network response was not ok');
}
// Parsing the response as text
console.log(response.text());
})
.then(html => {
// Once HTML is retrieved, you can scrape the data from it
return html;
})
.catch(error => {
console.error('There was a problem with the fetch operation:', error);
}); throw error; // Rethrow to handle outside
}
function scrapeMenuData(html) {
const tempDiv = document.createElement('div');
tempDiv.innerHTML = html;
const storeTitleElement = tempDiv.querySelector('h1[data-testid="store-title-summary"]');
if (storeTitleElement) {
const storeTitle = storeTitleElement.textContent.trim();
console.log('Store Title:', storeTitle);
return storeTitle;
} else {
console.log('Store Title element not found');
return null;
}
}
async function scrapeLinks(cards) {
const storeNames = [];
for (const card of cards) {
const fullURL = parseLink(card.url);
try {
console.log("await fetchHTML");
const html = await fetchHTML(fullURL);
console.log("scrapeMenuData");
const storeTitle = scrapeMenuData(html);
if (storeTitle) {
storeNames.push(storeTitle);
}
} catch (error) {
55:** console.error('Error scraping data for URL:', fullURL, error);**
}
}
return storeNames;
}
chrome.runtime.onMessage.addListener(
function (request, sender, sendResponse) {
if (request.action === "scrapeData") {
scrapeLinks(request.cards).then(storeNames => {
sendResponse({storeNames: storeNames});
});
return true; // Indicates response will be sent asynchronously
}
}
);
popup.js
// popup.js
document.getElementById('scrapeApp').addEventListener('click', async () => {
let [tab] = await chrome.tabs.query({active: true, currentWindow: true});
chrome.scripting.executeScript({
target: {tabId: tab.id},
function: getCardsData,
}, (injectionResults) => {
for (const frameResult of injectionResults) {
chrome.runtime.sendMessage({
action: "scrapeData",
cards: frameResult.result
}, response => {
console.log('Scraped Names:', response.storeNames);
updateUI(response.storeNames);
});
}
});
});
function getCardsData() {
const cards = Array.from(document.querySelectorAll('[data-testid="store-card"][href]')).map(card => ({
url: card.getAttribute('href')
}));
return cards;
}
function updateUI(storeNames) {
const main = document.querySelector('main');
main.innerHTML = ''; // Clear previous results
storeNames.forEach(name => {
const div = document.createElement('div');
div.textContent = name;
main.appendChild(div);
});
}