From fc2420c244b682ad514363fbdb7cac3d3ae574bf Mon Sep 17 00:00:00 2001 From: Richard Hansen Date: Sun, 24 Jan 2021 18:50:50 -0500 Subject: [PATCH] contentcollector: Fix iteration over child Nodes In the DOM, `.children` only includes children that are Element objects. In cheerio 0.22.0, `.children` includes all child Nodes, not just Elements. Use `dom.numChildNodes()` and `dom.childNode()` so that browsers behave the same as cheerio. --- src/static/js/contentcollector.js | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/src/static/js/contentcollector.js b/src/static/js/contentcollector.js index 86043baf8..f940f257b 100644 --- a/src/static/js/contentcollector.js +++ b/src/static/js/contentcollector.js @@ -496,16 +496,8 @@ const makeContentCollector = (collectStyles, abrowser, apool, className2Author) // lists do not need to have a type, so before we make a wrong guess // check if we find a better hint within the node's children if (!rr && !type) { - // If `node` is from the DOM (not cheerio) then it implements the ParentNode interface - // and `node.children` is a HTMLCollection. The DOM + Web IDL specs guarantee that - // HTMLCollection implements the iterable protocol, so for..of iteration should always - // work. See: https://stackoverflow.com/a/41759532. Cheerio behaves the same with - // regard to iteration. - // - // TODO: The set of Nodes included in node.children differs between the DOM and - // cheerio 0.22.0: cheerio includes all child Nodes (including non-Element Nodes) - // whereas the DOM only includes Nodes that are Elements. - for (const child of node.children) { + for (let i = 0; i < dom.numChildNodes(node); i++) { + const child = dom.childNode(node, i); if (child && child.name === 'ul') { type = dom.getAttribute(child, 'class'); if (type) {