When I try to highlight words in chosen image it fails saying there’s no words for the text(Tessereact.js) and I tried many things to make it mark words in the image after the ocr but still fails to, this is the last code edit after failing many times, I want it to be like tesseract.js website demo where it can mark where is words after performing ocr
script.js
const langSelect = document.getElementById('langSelect');
const fileInput = document.getElementById('fileInput');
const ocrButton = document.getElementById('ocrButton');
const extractSelect = document.getElementById('extractSelect');
const extractButton = document.getElementById('extractButton');
const imageContainer = document.getElementById('imageContainer');
const textOutput = document.getElementById('textOutput');
var input_overlay = document.getElementById('highlightCanvas')
var ioctx = input_overlay.getContext('2d')
let extractedText = '';
fileInput.addEventListener('change', function () {
const file = fileInput.files[0];
imageContainer.src = URL.createObjectURL(file);
});
async function performOCR() {
const lang = langSelect.value;
if (lang && fileInput.files.length > 0) {
const file = fileInput.files[0];
ocrButton.disabled = true;
ocrButton.classList.add('disabled');
textOutput.innerHTML = '<div class="loading"><span class="loader animate" aria-label="Processing your request"></span><span>Loading...</span></div>';
try {
const { createWorker } = Tesseract;
let tessdata;
if (lang === "eng") {
tessdata = 'eng.traineddata';
}
const worker = await createWorker(lang, 1,{
workerPath: "./dist/worker.min.js",
logger: m => console.log(m),
tessdata: tessdata
});
const { data: { text } } = await worker.recognize(file);
if (text) {
result(text);
await worker.terminate();
} else {
textOutput.innerText = 'No text found.';
extractedText = '';
await worker.terminate();
}
imageContainer.innerHTML = `<img src="${URL.createObjectURL(file)}" alt="Uploaded Image">`;
} catch (error) {
textOutput.innerText = 'Error: OCR failed.';
textOutput.classList.add('error');
console.log(error);
extractedText = '';
}
ocrButton.disabled = false;
ocrButton.classList.remove('disabled');
} else {
textOutput.innerText = 'Please select a language and an image file.';
}
}
function extractText() {
const fileType = extractSelect.value;
if (fileType && extractedText) {
const blob = new Blob([extractedText], {
type: `text/${fileType}`
});
const url = URL.createObjectURL(blob);
const link = document.createElement('a');
link.href = url;
link.download = `extracted_text.${fileType}`;
link.click();
} else {
textOutput.innerText = 'Please select a file type and perform OCR first.';
}
}
function result(res){
// octx.clearRect(0, 0, output.width, output.height)
// octx.textAlign = 'left'
console.log('result was:', res)
// output_overlay.style.display = 'none'
// output_text.innerHTML = res.text
textOutput.innerText = res.text;
extractedText = res.text;
res.words.forEach(function(w){
var b = w.bbox;
ioctx.strokeWidth = 2
ioctx.strokeStyle = 'red'
ioctx.strokeRect(b.x0, b.y0, b.x1-b.x0, b.y1-b.y0)
ioctx.beginPath()
ioctx.moveTo(w.baseline.x0, w.baseline.y0)
ioctx.lineTo(w.baseline.x1, w.baseline.y1)
ioctx.strokeStyle = 'green'
ioctx.stroke()
// octx.font = '20px Times';
// octx.font = 20 * (b.x1 - b.x0) / octx.measureText(w.text).width + "px Times";
// octx.fillText(w.text, b.x0, w.baseline.y0);
})
}
index.html:
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>OCR Reader</title>
<script src="./dist/dist-tesseract.min.js"></script>
<link rel="stylesheet" href="./7.css">
<link rel="stylesheet" href="./styles.css">
</head>
<body>
<div class="center">
<select id="langSelect">
<option value="eng">English</option>
</select>
<input type="file" accept="image/png, image/jpeg" id="fileInput">
<button onclick="performOCR()" id="ocrButton">Perform OCR</button>
<div id="content">
<div>
<canvas id="highlightCanvas"></canvas>
<img id="imageContainer"><div id="textOutput" class="output-box"></div>
</div>
<select id="extractSelect">
<option value="">Select File Type</option>
<option value="txt">Text File (.txt)</option>
<option value="docx">Word Document (.docx)</option>
</select>
<button onclick="extractText()" id="extractButton">Extract Text</button>
</div>
<script src="./dist/worker.min.js"></script>
<script src="./script.js"></script>
</body>
</html>
tried many things and even libraries to make it mark the words using ocr in the chosen image and still doesn’t work