So I started off with this as a base How to get this PRNG to generate numbers within the range? Now I modified it like so (sorry for the messy code):
// https://primes.utm.edu/curios/index.php?start=5&stop=5
const fs = require('fs')
const startParts = `mi
ma
mo
ne
nu
di
da
do
be
bu
ti
te
ta
to
tu
ki
ke
ka
ko
ku
si
sa
so
ze
zu
fi
fa
fo
ve
vu
xe
xu`.split(/n+/)
const endParts = `le
ru
mu
ne
du
be
te
tu
ke
ku
su
ze
fu
ve
xe
xu`.split(/n+/)
const parts = startParts.concat(endParts)
const pattern = new RegExp(parts.map(x => `${x}${x}${x}`).join('|'))
const fetch = (x, o) => {
if (x >= o) {
return x
} else {
const v = (x * x) % o
return (x <= (o / 2n)) ? v : o - v
}
}
const SIZES = {
64: {
fetch: 41223334444555556666667777777888888889999999997n,
xor: 2030507011013017019023n,
j: 272261127249452727280272961627319532734291n,
},
32: {
fetch: 3132343537383103113163n,
modulo: BigInt(Math.pow(32, 15)),
xor: 541613713n, // possibly half the size or less, and prime.
j: 975319753197531975319n // almost as big as the other.
},
// total: 68,719,476,736
// total: (32 * 16) * (32 * 16) * (32 * 16) * 32 = 4,294,967,296
14: {
fetch: 3778888999n,
modulo: BigInt(Math.pow(32 * 16, 3) * 32),
xor: 54121n,
j: 1012639687n,
max: 14
},
12: {
// 134,217,728
fetch: 134095867n,
modulo: BigInt(Math.pow(32 * 16, 3)),
xor: 7333n,
j: 118818811n,
max: 12
}
}
const SIZE = SIZES[12]
const fetchLarge = (x) => fetch(x, SIZE.fetch)
// the last number can be anything.
// MODIFIED THIS
const buildLarge = (x, o) => fetchLarge((fetchLarge(x) + o) % SIZE.modulo ^ SIZE.xor)
// )
function createArray(n, fn) {
if (!n) return [0]
let arr = []
let i = 0
while (n) {
let mod = fn(i++)
arr.push(Number(n % mod))
n /= mod
}
return arr
}
let i = 1n
let matched = {}
let matchedI = 1
const write = () => {
let maxSize = SIZE.max
const x = buildLarge(i++, SIZE.j)
let chunks = createArray(x, i => i % 2 === 0 ? 32n : 16n).map((x, i) => {
if (i % 2 === 0) {
return startParts[x]
} else {
return endParts[x]
}
}).join('')
if (chunks.length < (maxSize - 2)) {
chunks = `mimi${chunks}`
} else if (chunks.length < (maxSize)) {
chunks = `mi${chunks}`
}
if (chunks.match(pattern)) return write()
if (matched[chunks]) throw new Error(chunks + ' ' + Object.keys(matched).length + ' ' + matched[chunks])
matched[chunks] = matchedI++
const chunked = chunk(chunks.split(''), 4).map(x => x.join('')).join(':')
return chunked
}
const map = fs.readFileSync('tmp/taxon.txt', 'utf-8')
.trim()
.split(/n+/)
.map(x => x.split(/t+/)[3])
.reduce((m, x) => {
let p = x.trim().split(/s+/).join(' ')
if (p.match(/d/)) return m
m[p.toLowerCase()] = true
return m
}, {})
const list = chunk(Object.keys(map).map(key => `${write()},${key}`).sort(), 5000)
list.forEach((l, i) => {
fs.writeFileSync(`tmp/taxon/${i + 1}.csv`, l.join('n'))
})
fs.writeFileSync('tmp/code.csv', String(i))
function chunk (arr, len) {
var chunks = [],
i = 0,
n = arr.length;
while (i < n) {
chunks.push(arr.slice(i, i += len));
}
return chunks;
}
The taxon.txt
looks like this (I greatly truncated the taxon file).
It is throwing this duplicate error:
$ node tmp/taxon2
/Users/me/tmp/taxon2.js:135
if (matched[chunks]) throw new Error(chunks + ' ' + Object.keys(matched).length + ' ' + matched[chunks])
^
Error: mikukenekeku 56542 7490
at write (/Users/me/tmp/taxon2.js:135:30)
at /Users/me/tmp/taxon2.js:152:51
at Array.map (<anonymous>)
at Object.<anonymous> (/Users/me/tmp/taxon2.js:152:37)
at Module._compile (node:internal/modules/cjs/loader:1095:14)
at Object.Module._extensions..js (node:internal/modules/cjs/loader:1124:10)
at Module.load (node:internal/modules/cjs/loader:975:32)
at Function.Module._load (node:internal/modules/cjs/loader:816:12)
at Function.executeUserEntryPoint [as runMain] (node:internal/modules/run_main:79:12)
at node:internal/main/run_main_module:17:47
Why is it running into duplicates?
If I comment out these lines, it passes however:
if (chunks.length < (maxSize - 2)) {
chunks = `mimi${chunks}`
} else if (chunks.length < (maxSize)) {
chunks = `mi${chunks}`
}
But, there is an error in that case too: the text of some of the lines/strings is not long enough. I basically tried padding it with the zero element, but for some reason it’s erroring now, any ideas?
Inside the write
function I fetch the next pseudo-random number from the fancy mathematical sequencer at const x = buildLarge(i++, SIZE.j)
. Then I do createArray(x, i => i % 2 === 0 ? 32n : 16n)
which either divides by 32 on even index or 16 on odd index, so the value can be fetched from the startParts
or endParts
arrays respectively.
I tried doing .reverse()
after createArray
, but that doesn’t seem to help.
I think it just has something to do with the padding but not sure yet.