I am using OpenCV & tesseract to read the text from the image. But something weird is happening. I have a CTA with text on it with a background color:
[![enter image description here][1]][1]
When I run the code it’s read the text. Now if the CTA is a part of a UI like this:
[![enter image description here][2]][2]
Then the code didn’t read the CTA text.
here is the output:[![enter image description here][3]][3]
I didn’t now what to do. Here is the code snippet:
document.getElementById('process').addEventListener('click', async () => {
if (!imageFile) {
alert("Please upload an image first.");
return;
}
const img = new Image();
img.src = URL.createObjectURL(imageFile);
img.onload = function() {
// Set canvas dimensions to match the uploaded image
canvas.width = img.width;
canvas.height = img.height;
// Draw the uploaded image onto the canvas
ctx.drawImage(img, 0, 0);
// Get image data from canvas for OpenCV processing
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height);
// Convert to BGR using OpenCV
const src = cv.matFromImageData(imageData);
const gray = new cv.Mat();
const clahe = new cv.Mat();
const thresh = new cv.Mat();
// Convert to grayscale
cv.cvtColor(src, gray, cv.COLOR_RGBA2GRAY, 0);
// Apply CLAHE to improve contrast
const claheObj = new cv.CLAHE(2.0, new cv.Size(8, 8));
claheObj.apply(gray, clahe);
// Adjust thresholding to better handle colored backgrounds
cv.threshold(clahe, thresh, 150, 255, cv.THRESH_BINARY); // Adjust threshold value as needed
// Convert thresholded image to RGBA for canvas
const rgbaImageData = new Uint8ClampedArray(thresh.cols * thresh.rows * 4);
for (let i = 0; i < thresh.rows; i++) {
for (let j = 0; j < thresh.cols; j++) {
const binaryValue = thresh.ucharPtr(i, j)[0]; // Binary value
const index = (i * thresh.cols + j) * 4;
rgbaImageData[index] = binaryValue; // R
rgbaImageData[index + 1] = binaryValue; // G
rgbaImageData[index + 2] = binaryValue; // B
rgbaImageData[index + 3] = 255; // A (Fully opaque)
}
}
// Create ImageData object with the RGBA data and put it back onto the canvas
const processedImageData = new ImageData(rgbaImageData, thresh.cols, thresh.rows);
ctx.putImageData(processedImageData, 0, 0);
// Release memory used by OpenCV
src.delete();
gray.delete();
clahe.delete();
thresh.delete();
claheObj.delete();
// Convert the processed canvas image to a Blob URL and pass it to Tesseract
canvas.toBlob((blob) => {
const processedImageURL = URL.createObjectURL(blob);
Tesseract.recognize(processedImageURL, 'eng', {
logger: (m) => console.log(m),
// Add the Tesseract options here
tessedit_pageseg_mode: 6, // --psm 6
oem: 3 // --oem 3
}).then(({ data: { text } }) => {
// Send the extracted text back to the Figma plugin
parent.postMessage({ pluginMessage: { type: 'text-extracted', text } }, '*');
}).catch((error) => {
console.error("Error extracting text:", error);
alert("Failed to extract text from the image.");
});
}, 'image/png');
};
});
I am didn’t know python that’s I used there CDN links
[1]: https://i.sstatic.net/BfLMehzu.png
[2]: https://i.sstatic.net/TLMRswJj.png
[3]: https://i.sstatic.net/8MMlKCtT.png