Myers diff algorithm treats insertion and removal equally at the same cost and treats equality at zero cost (free path).
I tried to alter this behavior with supplying a custom cost function which to begin with favours removal (with a cost of -1), then favours equality (with a cost of 0) and then favours insertion (with a cost of 1).
On running the following modified version and doing a diff on PQRSTUZ v/s PQTUARS, I get TUA as inserted and TUZ as removed.
class Keep {
constructor(line, coordinate, cost) {
this.type = 'Keep';
this.line = line;
this.coordinate = coordinate;
this.cost = cost;
}
}
class Insert {
constructor(line, coordinate, cost) {
this.type = 'Insert';
this.line = line;
this.coordinate = coordinate;
this.cost = cost;
}
}
class Remove {
constructor(line, coordinate, cost) {
this.type = 'Remove';
this.line = line;
this.coordinate = coordinate;
this.cost = cost;
}
}
// Represents a point on the frontier with its history AND cost
class Frontier {
constructor(x, history, cost) {
this.x = x; // x-coordinate in edit graph
this.history = history; // sequence of operations to reach this point
this.cost = cost; // Total cost to reach this point
}
}
function visualizePath(history, oldSequence, newSequence) {
const matrix = [];
const costMatrix = [];
const rows = newSequence.length + 1;
const cols = oldSequence.length + 1;
const firstRow = Array.from(` ${oldSequence.map(item => item.hashVal).join('')}`);
matrix[0] = firstRow;
costMatrix[0] = firstRow;
for(let r = 1, c = 0; r < rows; r++) {
matrix[r] = Array.from("·".repeat(cols))
matrix[r][c] = newSequence[r-1].hashVal;
costMatrix[r] = Array.from("·".repeat(cols))
costMatrix[r][c] = newSequence[r-1].hashVal;
}
for(const item of history) {
const {type, coordinate, cost} = item;
let sign = null;
switch(type) {
case 'Keep':
sign = '↘';
break;
case 'Insert':
sign = '↓';
break;
case 'Remove':
sign = '→';
break;
}
if(coordinate[0] === 0) {
coordinate[0] = 1;
}
matrix[coordinate[1]][coordinate[0]] = sign;
costMatrix[coordinate[1]][coordinate[0]] = cost;
}
for(const row of matrix) {
console.log(row.join(" "));
}
console.log("n")
for(const row of costMatrix) {
console.log(row.join(" "));
}
}
function myersDiff(old, current, costFunction) {
// Initialize the frontier with starting point. Start with cost 0
const frontier = { 1: new Frontier(0, [], 0) };
// Convert from 1-based to 0-based indexing
// Algorithm uses 1-based indexing, but JavaScript uses 0-based
function one(idx) {
return idx - 1;
}
const aMax = old.length; // horizontal size of edit graph
const bMax = current.length; // vertical size of edit graph
// Main loop: try increasing numbers of edits
for (let d = 0; d <= aMax + bMax; d++) { // Removed the + 1 because it was causing issues
// For each value of D, try all possible diagonals
for (let k = -d; k <= d; k += 2) {
// Determine whether to go down or right BASED ON COST
const goDown = (k === -d || // at left edge, must go down
(k !== d && frontier[k - 1] && frontier[k + 1] && frontier[k - 1].cost < frontier[k + 1].cost)); // compare COSTS. Added null checks
// Find starting point for this iteration
let oldX, history, currentCost;
if (goDown) {
// Copy from k+1 diagonal (going down)
({ x: oldX, history, cost: currentCost } = frontier[k + 1]);
var x = oldX;
} else {
// Copy from k-1 diagonal and move right
({ x: oldX, history, cost: currentCost } = frontier[k - 1]);
var x = oldX + 1;
}
// Clone history to avoid modifying previous paths
history = [...history];
let y = x - k; // Calculate y from x and k
let newCost = currentCost; //Start newCost as the cost of where we came from
// Record the edit we made to get here AND UPDATE THE COST
if (y >= 1 && y <= bMax && goDown) {
// Insert from new sequence when going down
const insertCost = costFunction(null, current[one(y)]); //Cost of inserting
history.push(new Insert(current[one(y)], [x,y], insertCost));
newCost += insertCost; //Update the cost
} else if (x >= 1 && x <= aMax) {
// Remove from old sequence when going right
const removeCost = costFunction(old[one(x)], null); //Cost of removing
history.push(new Remove(old[one(x)], [x,y], removeCost));
newCost += removeCost; //Update the cost
}
// Follow the snake: match diagonal moves as far as possible
// This is key optimization - extend path along matches
while (x < aMax && y < bMax && costFunction(old[one(x + 1)], current[one(y + 1)]) === 0) {
x += 1;
y += 1;
history.push(new Keep(current[one(y)], [x,y], 0));
}
// Check if we've reached the end
if (x >= aMax && y >= bMax) {
visualizePath(history, old, current);
return history; // Found solution
} else {
// Save this point in the frontier. Save the COST TOO.
frontier[k] = new Frontier(x, history, newCost);
}
}
}
throw new Error('Could not find edit script');
}
// Example Usage:
let text1 = "PQRSTUZ".split("").map(item => ({ hashVal: item, content: item }));
let text2 = "PQTUARS".split("").map(item => ({ hashVal: item, content: item }));
// Define a custom cost function
function myCostFunction(oldElement, newElement) {
if (oldElement === null) {
// Cost for insertion
return 1;
}
if (newElement === null) {
// Cost for deletion
return -1;
}
if (oldElement.hashVal === newElement.hashVal) {
return 0; // No cost
}
return 1; // Default cost
}
const editScript = myersDiff(text1, text2, myCostFunction);
let diff2 = editScript.map(operation => {
const element = operation.line;
if (operation instanceof Keep) {
element.status = 'unchanged';
} else if (operation instanceof Insert) {
element.status = 'inserted';
} else if (operation instanceof Remove) {
element.status = 'removed';
}
return element;
});
console.log(diff2);
console.log(diff2.length);
Here is also the path matrix for the path it chooses:
P Q R S T U Z
P ↘ · · · · · ·
Q · ↘ · · · · ·
T · ↓ · · · · ·
U · ↓ · · · · ·
A · ↓ · · · · ·
R · · ↘ · · · ·
S · · · ↘ → → →
and the cost matrix:
P Q R S T U Z
P 0 · · · · · ·
Q · 0 · · · · ·
T · 1 · · · · ·
U · 1 · · · · ·
A · 1 · · · · ·
R · · 0 · · · ·
S · · · 0 -1 -1 -1
My question is:
- Is this the correct way to approach the cost function with Myers? In my case, I do not want to treat each insertion, removal, and matching equally. So a custom cost function will be helpful. How can this be improved?
- The algorithm still prefers to minimize D by maximizing X. Should it still hold valid with the custom cost function in place?









