I am currently trying to get the page count of a Word document in openXML format and have been able to get to the point of where I have the XML structure of the document in a readable format, but I can’t seem to find where the page count property is. Any guidance would be appreciated.
const fs = require("fs");
const path = require("path");
const axios = require("axios");
let noRepeatDocs = ['somewebsite.com/somedocument.docx'];
const writeTheFile = async (data) => {
fs.writeFileSync("read_word_doc", data);
};
const unzipTheFile = async (data) => {
fs.createReadStream(data)
.pipe(unzipper.Parse())
.on("entry", function (entry) {
const fileName = entry.path;
const type = entry.type;
const size = entry.vars.uncompressedSize;
if (fileName === "word/document.xml") {
entry.pipe(fs.createWriteStream("./output"));
} else {
entry.autodrain();
}
});
};
const getWordBuffer = async (arr) => {
for (const wordDocLink of arr) {
const response = await axios({
url: wordDocLink,
method: "GET",
responseType: "arraybuffer",
});
const data = response.data;
await writeTheFile(data);
await unzipTheFile("./read_word_doc");
}
};
getWordBuffer(noRepeatDocs);