Error: Unexpected Close Tag
asontha opened this issue · comments
Hey! I have a script that loads sitemaps, and then uses xml2js to convert them into JSON for use. Previously it worked just fine, but now I seem to always be getting an unexpected close tag. Did something change? I recently tried upgrading to 0.5.0 to see if that would fix it but still running into the same issue.
Sitemap url: https://www.zurichna.com/sitemap.xml
Version ^0.4.23
Error: Unexpected close tag
Line: 6
Column: 14
Char: >
at error (<SCRIPT_PATH>/node_modules/sax/lib/sax.js:667:10)
at strictFail (<SCRIPT_PATH>/node_modules/sax/lib/sax.js:693:7)
at closeTag (<SCRIPT_PATH>/node_modules/sax/lib/sax.js:887:9)
at SAXParser.write (<SCRIPT_PATH>/node_modules/sax/lib/sax.js:1449:13)
at exports.Parser.Parser.parseString (<SCRIPT_PATH>/node_modules/xml2js/lib/parser.js:323:31)
at Parser.parseString (<SCRIPT_PATH>/node_modules/xml2js/lib/parser.js:5:59)
at <SCRIPT_PATH>/node_modules/xml2js/lib/parser.js:338:24
at new Promise (<anonymous>)
at exports.Parser.Parser.parseStringPromise (<SCRIPT_PATH>/node_modules/xml2js/lib/parser.js:336:14)
at Parser.parseStringPromise (<SCRIPT_PATH>/node_modules/xml2js/lib/parser.js:5:59)
<SCRIPT_PATH>/scraping.js:173
site_urls = site_map_json.urlset.url.map(u => u.loc[0])
^
TypeError: Cannot read properties of undefined (reading 'url')
at main (<SCRIPT_PATH>/scraping.js:173:40)
at process.processTicksAndRejections (node:internal/process/task_queues:95:5)
Version ^0.5.0
Error: Unexpected close tag
Line: 6
Column: 14
Char: >
at error (<SCRIPT_PATH>/node_modules/sax/lib/sax.js:667:10)
at strictFail (<SCRIPT_PATH>/node_modules/sax/lib/sax.js:693:7)
at closeTag (<SCRIPT_PATH>/node_modules/sax/lib/sax.js:887:9)
at SAXParser.write (<SCRIPT_PATH>/node_modules/sax/lib/sax.js:1449:13)
at exports.Parser.Parser.parseString (<SCRIPT_PATH>/node_modules/xml2js/lib/parser.js:327:31)
at Parser.parseString (<SCRIPT_PATH>/node_modules/xml2js/lib/parser.js:5:59)
at <SCRIPT_PATH>/node_modules/xml2js/lib/parser.js:342:24
at new Promise (<anonymous>)
at exports.Parser.Parser.parseStringPromise (<SCRIPT_PATH>/node_modules/xml2js/lib/parser.js:340:14)
at Parser.parseStringPromise (<SCRIPT_PATH>/node_modules/xml2js/lib/parser.js:5:59)
<SCRIPT_PATH>/scraping.js:173
site_urls = site_map_json.urlset.url.map(u => u.loc[0])
^
TypeError: Cannot read properties of undefined (reading 'url')
at main (<SCRIPT_PATH>/scraping.js:173:40)
at process.processTicksAndRejections (node:internal/process/task_queues:95:5)
Here's how I'm using xml2js:
async function fetchSiteMapXML(sitemap_url) {
return axios.get(sitemap_url)
.then(response => {
return response;
}).catch(error => {
console.log('fetchSiteMapXML Request Failed:', error);
return error;
});
}
async function convertXMLToJSON(xml) {
var parser = new xml2js.Parser();
return parser.parseStringPromise(xml)
.then(result => {
console.log("JSON conversion complete")
return result;
})
.catch(error => {
console.log("JSON conversion failed");
return error
});
}
The output of fetchSiteMapXML
is fed directly into convertXMLToJSON
.
Here's how these are being called in the script
let site_map_response = await fetchSiteMapXML(site_map_url);
let site_map_json = await convertXMLToJSON(site_map_response.data);
console.log("Sitemap JSON: ", site_map_json);
site_urls = site_map_json.urlset.url.map(u => u.loc[0])
Turns out the website had a special bot blocker on their sitemap.