I’m currently working on running my custom ONNX segmentation model directly in the browser using ONNX Runtime for Web with HTML and JavaScript. The model is loading and running inference successfully. However, I’m stuck on the post-processing logic, specifically when it comes to filtering and visualizing the segmentation mask.
I would appreciate any guidance or example implementations for:
Converting the model output into a usable mask
Resizing or transforming the output to match the original image
Filtering/thresholding the segmentation mask correctly in JavaScript
My use case is similar to how YOLOv8-seg outputs masks, but adapted to ONNX Runtime Web. If anyone has experience with post-processing segmentation outputs in the browser using ONNX models, your input would be very helpful.
The Output segmenation :
Actual bit map :
enter image description here
HTML CODE :
<!DOCTYPE html>
<html>
<head>
<title>YOLOv8 Segmentation Viewer</title>
<script src="https://cdn.jsdelivr.net/npm/onnxruntime-web/dist/ort.min.js"></script>
<style>
canvas {
border: 1px solid black;
display: block;
margin-top: 10px;
}
</style>
</head>
<body>
<h3>YOLOv8 Segmentation Mask Demo</h3>
<input type="file" id="upload" accept="image/*" />
<canvas id="canvas"></canvas>
<script>
const inputSize = 640;
document.getElementById("upload").addEventListener("change", async (e) => {
const file = e.target.files[0];
const img = new Image();
img.src = URL.createObjectURL(file);
img.onload = async () => {
const canvas = document.getElementById("canvas");
const ctx = canvas.getContext("2d");
canvas.width = img.width;
canvas.height = img.height;
ctx.drawImage(img, 0, 0);
const offscreen = new OffscreenCanvas(inputSize, inputSize);
const offCtx = offscreen.getContext("2d");
offCtx.drawImage(img, 0, 0, inputSize, inputSize);
const imageData = offCtx.getImageData(0, 0, inputSize, inputSize);
const inputTensor = preprocess(imageData);
const session = await ort.InferenceSession.create("best_on.onnx");
const feeds = { images: inputTensor };
const results = await session.run(feeds);
const output0 = results[Object.keys(results)[0]].data;
const output1 = results[Object.keys(results)[1]].data;
const [numProposals, numChannels] = [8400, 37];
const prototypeH = 160;
const prototypeW = 160;
for (let i = 0; i < numProposals; i++) {
const conf = output0[i * numChannels + 4];
if (conf < 0.5) continue;
const maskCoeffs = output0.slice(
i * numChannels + 5,
i * numChannels + 5 + 32
);
const mask = new Float32Array(prototypeH * prototypeW).fill(0);
for (let m = 0; m < 32; m++) {
for (let j = 0; j < prototypeH * prototypeW; j++) {
mask[j] += maskCoeffs[m] * output1[m * prototypeH * prototypeW + j];
}
}
for (let j = 0; j < mask.length; j++) {
mask[j] = 1 / (1 + Math.exp(-mask[j]));
}
drawMaskOnCanvas(
mask,
prototypeH,
prototypeW,
canvas,
ctx,
img.width,
img.height
);
break;
}
};
});
function preprocess(imageData) {
const [r, g, b] = [[], [], []];
for (let i = 0; i < imageData.data.length; i += 4) {
r.push(imageData.data[i] / 255);
g.push(imageData.data[i + 1] / 255);
b.push(imageData.data[i + 2] / 255);
}
return new ort.Tensor(
"float32",
new Float32Array([...r, ...g, ...b]),
[1, 3, 640, 640]
);
}
function drawMaskOnCanvas(mask, maskH, maskW, canvas, ctx, imgW, imgH) {
const imageData = ctx.getImageData(0, 0, imgW, imgH);
const maskResized = resizeMask(mask, maskW, maskH, imgW, imgH);
for (let y = 0; y < imgH; y++) {
for (let x = 0; x < imgW; x++) {
const m = maskResized[y * imgW + x];
if (m > 1) {
const idx = (y * imgW + x) * 4;
imageData.data[idx] = 255;
imageData.data[idx + 1] = 0;
imageData.data[idx + 2] = 0;
imageData.data[idx + 3] = 100;
}
}
}
ctx.putImageData(imageData, 0, 0);
}
function resizeMask(src, srcW, srcH, dstW, dstH) {
const dst = new Float32Array(dstW * dstH);
for (let y = 0; y < dstH; y++) {
const sy = ((y + 0.5) * srcH) / dstH - 0.5;
const y0 = Math.max(Math.floor(sy), 0);
const y1 = Math.min(y0 + 1, srcH - 1);
const yLerp = sy - y0;
for (let x = 0; x < dstW; x++) {
const sx = ((x + 0.5) * srcW) / dstW - 0.5;
const x0 = Math.max(Math.floor(sx), 0);
const x1 = Math.min(x0 + 1, srcW - 1);
const xLerp = sx - x0;
const top =
(1 - xLerp) * src[y0 * srcW + x0] + xLerp * src[y0 * srcW + x1];
const bottom =
(1 - xLerp) * src[y1 * srcW + x0] + xLerp * src[y1 * srcW + x1];
dst[y * dstW + x] = (1 - yLerp) * top + yLerp * bottom;
}
}
return dst;
}
</script>
</body>
</html>
any one suggest me the i am doing wrong any where ?
My goal is to detect and process the segmentation mask on an input image using a custom ONNX model running in the browser with ONNX Runtime Web.
Like the image below:



