Is there any possible way to access elements inside closed shadow DOM using Puppeteer?
I understand that the point of closed shadow DOM is for the items not to be accessible, but since Puppeteer is used to scrape webpages, I think it would be reasonable that we can access it using Puppeteer.
I tried accessing elements inside a closed shadow DOM using Puppeteer but I couldn’t do it.
Example:
A simple webpage I’m trying to extract:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Document</title>
</head>
<body>
<script>
class ContainerComponent extends HTMLElement {
constructor() {
super();
const containerTemplate = document.createElement("template");
containerTemplate.innerHTML = `
<slot></slot>
<div>This is what I want</div>
`;
const shadowRoot = this.attachShadow({ mode: "closed" });
shadowRoot.appendChild(containerTemplate.content.cloneNode(true));
}
}
customElements.define("layout-container", ContainerComponent);
</script>
<layout-container>
</layout-container>
</body>
</html>
import puppeteer from "puppeteer";
async function start() {
const browser = await puppeteer.launch({
headless: true,
});
const page = await browser.newPage();
await page.goto("http://localhost:8080/iframe-test-nested.html");
const result = await page.$("body layout-container");
if (!result) {
console.log("Element not found");
} else {
const innerHTML = await page.evaluate(
(el) => el.shadowRoot?.innerHTML,
result
);
console.log(innerHTML);
}
await browser.close();
}
start();
Or even using >>>
selector:
import puppeteer from "puppeteer";
async function start() {
const browser = await puppeteer.launch({
headless: true,
});
const page = await browser.newPage();
await page.goto("http://localhost:8080/iframe-test-nested.html");
const result = await page.$("body layout-container >>> div");
if (!result) {
console.log("Element not found");
} else {
const innerHTML = await page.evaluate((el) => el.innerHTML, result);
console.log(innerHTML);
}
await browser.close();
}
start();
Thanks in advance.