with the code below am able to retrieve a site’s sitemap urls and last modification dates. the problem is that there are over 600 urls retrieved, how will i filter in the script to only show urls with the last modification date of today, removing urls with old dates?
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheet = ss.getSheetByName(‘Sheet1’);
//Create function that fires when menu item clicked.
function getLinks() {
var xml = UrlFetchApp.fetch(‘https://example.com/blog_listing-sitemap.xml’).getContentText();
var xml = XmlService.parse(xml);
var root = xml.getRootElement();
var ns = XmlService.getNamespace(‘http://www.sitemaps.org/schemas/sitemap/0.9’);
//Get list of sitemap urls
var sitemaps = root.getChildren();//sitemap
//for each sitemap URL
for (i = 0; i < sitemaps.length; i++) {
//Get child elements of sitemap element
var sitemap = sitemaps[i].getChildren();
//For each child element of sitemap element
for (a = 0; a < sitemap.length; a++) {
var element = sitemap[a];
//Find loc element for sitemap URL
if (element.getName() === 'loc') {
// xml = loadXML(element.getText());
appendRows(xml.getRootElement().getChildren());
}
else if (root.getName() === 'urlset') {//if sitemap is url sitemap.
appendRows(root.getChildren());
}
}
}
}
function appendRows(items) {
var urls = [];
var sheet = SpreadsheetApp.getActiveSpreadsheet().getActiveSheet(); //to write rows
//var filtered = data.filter(function (row) {
// return row[1].getFullYear() === 2016;
// });
for (var i = 0; i < items.length; i++) {
var children = items[i].getChildren();
var row = [];
for (var a = 0; a < children.length; a++) {
var child = children[a];
if (child.getChildren().length === 0) {
row.push(child.getText());
// row.filter(items)
}
}
urls.push(row);
}
//write rows to sheet
sheet.getRange(sheet.getLastRow() + 1, 1, urls.length, urls[0].length).setValues(urls);
}