const features = getFeaturesList()
const b = makeData(features, { bilabial: true, voiced: true })
// bh (h is aspiration) in indian languages is common
const bh = makeData(features, { bilabial: true, voiced: true, aspiration: true })
const d = makeData(features, { dental: true, voiced: true })
const dh = makeData(features, { dental: true, voiced: true, aspiration: true })
const p = makeData(features, { bilabial: true })
const ph = makeData(features, { bilabial: true, aspiration: true })
const s = makeData(features, { fricative: true })
const z = makeData(features, { fricative: true, voiced: true })
logData(`b`, b)
logData(`bh`, bh)
logData(`d`, d)
logData(`dh`, dh)
logData(`p`, p)
logData(`ph`, ph)
logData(`s`, s)
logData(`z`, z)
logSimilarity(`b-bh`, b, bh)
logSimilarity(`b-d`, b, d)
logSimilarity(`b-dh`, b, dh)
logSimilarity(`b-p`, b, p)
logSimilarity(`b-ph`, b, ph)
logSimilarity(`b-s`, b, s)
logSimilarity(`b-z`, b, z)
function getFeaturesList() {
return {
bilabial: {
true: [1],
false: [0],
},
labiodental: {
true: [1],
false: [0],
},
dental: {
true: [1],
false: [0],
},
alveolar: {
true: [1],
false: [0],
},
retroflex: {
true: [1],
false: [0],
},
palatal: {
true: [1],
false: [0],
},
velar: {
true: [1],
false: [0],
},
labiovelar: {
true: [1],
false: [0],
},
glottal: {
true: [1],
false: [0],
},
nasal: {
true: [1],
false: [0],
},
fricative: {
true: [1],
false: [0],
},
approximant: {
true: [1],
false: [0],
},
flap: { // r
true: [1],
false: [0],
},
lateral: {
true: [1],
false: [0],
},
aspiration: {
true: [1],
false: [0],
},
click: {
true: [1],
false: [0],
},
dentalization: {
true: [1],
false: [0],
},
explosivity: {
in: [1, 0],
out: [0, 1],
false: [0, 0],
},
plosive: {
true: [1],
false: [0],
},
labialization: {
true: [1],
false: [0],
},
nasalization: {
true: [1],
false: [0],
},
palatalization: {
true: [1],
false: [0],
},
pharyngealization: {
true: [1],
false: [0],
},
stop: {
true: [1],
false: [0],
},
tense: {
true: [1],
false: [0],
},
velarization: {
true: [1],
false: [0],
},
voiced: {
true: [1],
false: [0],
},
sibilance: {
true: [1],
false: [0],
},
length: {
true: [1],
false: [0],
},
}
}
function cosineSimilarity(v1, v2) {
let dotProduct = 0
let normA = 0
let normB = 0
for (let i = 0; i < v1.length; i++) {
dotProduct += v1[i] * v2[i]
normA += Math.pow(v1[i], 2)
normB += Math.pow(v2[i], 2)
}
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB))
}
function makeData(features, mappings) {
const featureNames = getSortedFeatureNames()
const vector = new Array()
const map = {}
featureNames.forEach(name => {
if (mappings[name]) {
if (!features[name][mappings[name]]) {
throw new Error(`Missing feature ${name} value ${mappings[name]}.`)
}
}
const provided = features[name][mappings[name]]
const fallback = features[name].false
const slice = provided ?? fallback
vector.push(...slice)
if (provided && provided !== fallback) {
map[name] = mappings[name]
}
})
return { map, vector }
}
function getSortedFeatureNames() {
return [
'alveolar',
'approximant',
'aspiration',
'bilabial',
'click',
'dental',
'dentalization',
'explosivity',
'flap',
'fricative',
'glottal',
'labialization',
'labiodental',
'labiovelar',
'lateral',
'length',
'nasal',
'nasalization',
'palatal',
'palatalization',
'pharyngealization',
'plosive',
'retroflex',
'sibilance',
'stop',
'tense',
'velar',
'velarization',
'voiced',
]
}
function logSimilarity(key, a, b) {
console.log(key, cosineSimilarity(a.vector, b.vector))
}
function logData(key, data) {
console.log(key.padEnd(4, ' '), data.vector.join(''))
console.log(` ${JSON.stringify(data.map)}`)
}