I have been experimenting with TensorFlow.js in a node.js application. I have a dataset of 8000 images I am using for training, and I ran into some memory limitations. I am trying to set up my program such so that it saves the model after X number of iterations, so if (when) it runs out of memory at least I have saved the results of some of the run.
If I define 6 epochs, it should run 3, save where it’s at, and then continue training. It will run for the first 3, save the model, but then when it passes over ‘history’ something isn’t right and it errors out. The error is in the validateModel function, where it’s trying to access the .acc property of the history object but it’s not there so it stops. Logging the model before and after, it drops the properties even though it just loaded in the model it just saved.
Initially I was compiling the model during every iteration of the for loop, but I added an if statement that should only compile it once. I get the same error afterwards, and from the log I can see that it is just not retaining the learning from the previous epochs. I’ve messed with scope of the history variable, removing layers, reducing batch size and different ones of those parameters, have not been able to get it to continue after a save point. I am using the rsmprop optimizer. Thanks in advance, I’m at my wits end trying to get this to work.
This is the log output:
Checking for model at: C:Usersaldasnode.js projectsaved_modelmodel.json
Loading existing model
Layer name: conv2d_Conv2D1
Input shape: undefined
Output shape: [null,148,148,32]
Layer name: max_pooling2d_MaxPooling2D1
Input shape: undefined
Output shape: [null,74,74,32]
Layer name: conv2d_Conv2D2
Input shape: undefined
Output shape: [null,72,72,64]
Layer name: max_pooling2d_MaxPooling2D2
Input shape: undefined
Output shape: [null,36,36,64]
Layer name: dropout_Dropout1
Input shape: undefined
Output shape: [null,36,36,64]
Layer name: flatten_Flatten1
Input shape: undefined
Output shape: [null,82944]
Layer name: dense_Dense1
Input shape: undefined
Output shape: [null,125]
Layer name: dense_Dense2
Input shape: undefined
Output shape: [null,97]
Epoch 1 / 3
eta=0.0 ========================================================================>
347004ms 174286us/step - acc=0.0545 loss=4.41
Epoch 2 / 3
eta=0.0 ========================================================================>
394713ms 198249us/step - acc=0.0545 loss=4.38
Epoch 3 / 3
eta=0.0 ========================================================================>
412389ms 207126us/step - acc=0.0545 loss=4.35
Layer name after: conv2d_Conv2D1
Input shape after: undefined
Output shape after: [null,148,148,32]
Layer name after: max_pooling2d_MaxPooling2D1
Input shape after: undefined
Output shape after: [null,74,74,32]
Layer name after: conv2d_Conv2D2
Input shape after: undefined
Output shape after: [null,72,72,64]
Layer name after: max_pooling2d_MaxPooling2D2
Input shape after: undefined
Output shape after: [null,36,36,64]
Layer name after: dropout_Dropout1
Input shape after: undefined
Output shape after: [null,36,36,64]
Layer name after: flatten_Flatten1
Input shape after: undefined
Output shape after: [null,82944]
Layer name after: dense_Dense1
Input shape after: undefined
Output shape after: [null,125]
Layer name after: dense_Dense2
Input shape after: undefined
Output shape after: [null,97]
{
"validationData": null,
"params": {
"epochs": 3,
"initialEpoch": null,
"samples": null,
"steps": null,
"batchSize": null,
"verbose": 1,
"doValidation": false,
"metrics": [
"loss",
"acc"
]
},
"epoch": [
0,
1,
2
],
"history": {
"loss": [
4.406312465667725,
4.3756232261657715,
4.34819221496582
],
"acc": [
0.05450892075896263,
0.05450892075896263,
0.05450892075896263
]
}
}
Model improved. Saving model at epoch 3
Checking for model at: C:Usersaldasnode.js projectsaved_modelmodel.json
Loading existing model
[nodemon] restarting due to changes...
Layer name: conv2d_Conv2D1
Input shape: undefined
Output shape: [null,148,148,32]
Layer name: max_pooling2d_MaxPooling2D1
Input shape: undefined
Output shape: [null,74,74,32]
Layer name: conv2d_Conv2D2
Input shape: undefined
Output shape: [null,72,72,64]
Layer name: max_pooling2d_MaxPooling2D2
Input shape: undefined
Output shape: [null,36,36,64]
Layer name: dropout_Dropout1
Input shape: undefined
Output shape: [null,36,36,64]
Layer name: flatten_Flatten1
Input shape: undefined
Output shape: [null,82944]
Layer name: dense_Dense1
Input shape: undefined
Output shape: [null,125]
Layer name: dense_Dense2
Input shape: undefined
Output shape: [null,97]
Layer name after: conv2d_Conv2D1
Input shape after: undefined
Output shape after: [null,148,148,32]
Layer name after: max_pooling2d_MaxPooling2D1
Input shape after: undefined
Output shape after: [null,74,74,32]
Layer name after: conv2d_Conv2D2
Input shape after: undefined
Output shape after: [null,72,72,64]
Layer name after: max_pooling2d_MaxPooling2D2
Input shape after: undefined
Output shape after: [null,36,36,64]
Layer name after: dropout_Dropout1
Input shape after: undefined
Output shape after: [null,36,36,64]
Layer name after: flatten_Flatten1
Input shape after: undefined
Output shape after: [null,82944]
Layer name after: dense_Dense1
Input shape after: undefined
Output shape after: [null,125]
Layer name after: dense_Dense2
Input shape after: undefined
Output shape after: [null,97]
{
"validationData": null,
"params": {
"epochs": 3,
"initialEpoch": null,
"samples": null,
"steps": null,
"batchSize": null,
"verbose": 1,
"doValidation": false,
"metrics": [
"loss",
"acc"
]
},
"epoch": [],
"history": {}
}
An error occurred validateModel: TypeError: Cannot read properties of undefined (reading 'length')
[nodemon] restarting due to changes...
[nodemon] starting `node server.js`
2023-11-01 18:05:46.521366: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
Server running on http://localhost:3000
Here is my training endpoint and variable declarations:
const IMAGE_WIDTH = 150;
const IMAGE_HEIGHT = 150;
const BATCH_SIZE = 4; // Number of images? used per session/layer?
const chunkSize = 3; // Number of epochs to run before saving a checkpoint
const totalEpochs = 6; // Total number of iterations to run
const LEARNING_RATE = 0.001; // Original/default value is .0001
// const optimizer = tf.train.adam(LEARNING_RATE); Best results: .06 accuracy
//const optimizer = tf.train.sgd(LEARNING_RATE); not used yet
const optimizer = tf.train.rmsprop(LEARNING_RATE);
const SAVE_PATH = 'file://C:/Users/aldas/node.js project/saved_model';
const trainingDir = 'C:\Users\aldas\node.js project\image_dump';
const { images, labels } = readAndLabelImages(trainingDir);
let NUM_CLASSES, model, speciesLabels, labelIndex, lastEpochAcc, history;
app.post('/train', async (req, res) => {
try {
let bestValidationMetric = 0;
const stepsPerEpoch = Math.ceil(images.length / BATCH_SIZE);
for (let startEpoch = 1; startEpoch <= totalEpochs; startEpoch += chunkSize) {
// console.log('Memory info at start of chunk:', tf.memory());
const localSavePath = SAVE_PATH.replace('file://', '');
const checkPath = path.join(localSavePath, 'model.json');
console.log(`Checking for model at: ${checkPath}`);
if (fs.existsSync(checkPath)) {
console.log('Loading existing model');
model = await tf.loadLayersModel(`${SAVE_PATH}/model.json`);
if (!model.optimizer){ // If optimizer attached, good sign model was compiled already. Only compile once
await compileModel();
}
} else {
console.log('No existing model found, using a new one');
await initializeModel();
}
try {
ds = tf.data.generator(() => imageBatchGenerator(images, labels));
model.layers.forEach(layer => {
console.log(`Layer name: ${layer.name}`);
console.log(`Input shape: ${JSON.stringify(layer.inputShape)}`);
console.log(`Output shape: ${JSON.stringify(layer.outputShape)}`);
});
history = await model.fitDataset(ds, {
epochs: chunkSize,
initialEpoch: startEpoch - 1,
stepsPerEpoch: stepsPerEpoch
});
model.layers.forEach(layer => {
console.log(`Layer name after: ${layer.name}`);
console.log(`Input shape after: ${JSON.stringify(layer.inputShape)}`);
console.log(`Output shape after: ${JSON.stringify(layer.outputShape)}`);
});
// Validate
console.log(JSON.stringify(history, null, 2))
const currentValidationMetric = await validateModel(history);
// Check if we should save this model
if (currentValidationMetric > bestValidationMetric) {
bestValidationMetric = currentValidationMetric;
await model.save(SAVE_PATH);
console.log(`Model improved. Saving model at epoch ${startEpoch + chunkSize - 1}`);
}
} catch (err) {
console.error(`An error occurred history: ${err}`);
}
}
// Save the labelIndex to a JSON file
fs.writeFileSync(path.join(__dirname, '/saved_model/speciesLabels.json'), JSON.stringify(labelIndex));
res.json({ status: 'Training complete', code: 0 });
} catch (err) {
console.error(`An error occurred: ${err}`);
res.json({ status: 'Training failed', code: 1, error: err.message });
}
});
async function validateModel(history) {
// Assume the last epoch's accuracy is the final element in the history object
try {
lastEpochAcc = history.history.acc[history.history.acc.length - 1];
return lastEpochAcc;
} catch (err) {
console.error(`An error occurred validateModel: ${err}`);
}
}
async function* imageBatchGenerator(imagePaths, labels) {
let index = 0;
// console.log('imagePaths length: ', imagePaths.length)
// console.log('labels length: ', labels.length)
while (index < imagePaths.length) {
const batchImagePaths = imagePaths.slice(index, index + BATCH_SIZE);
const batchLabels = labels.slice(index, index + BATCH_SIZE);
const imageTensors = (await Promise.all(batchImagePaths.map(async (imgPath, i) => {
const parsedResult = await parseImage(imgPath, batchLabels[i]);
return parsedResult ? parsedResult.xs : null;
}))).filter(tensor => tensor !== null);
const xsBatch = tf.stack(imageTensors);
imageTensors.forEach(tensor => tensor.dispose());
const validLabelIndices = batchLabels.slice(0, imageTensors.length).map(label => labelIndex[label]);
const ysBatch = tf.oneHot(validLabelIndices, NUM_CLASSES);
yield { xs: xsBatch, ys: ysBatch };
//ysBatch.dispose();
index += BATCH_SIZE;
}
}


