嵌套的异步.each系列堆栈迭代

Nested async.eachSeries stacks iterations

本文关键字:堆栈 迭代 系列 each 异步 嵌套      更新时间:2023-09-26

以下函数接收链接数组并扫描远程网站,每页获取 10 篇博客文章,然后使用 async.waterfall 获取博客文章的每条评论。

// @param {Array} url
export default function getData(url, cb) {
  const arrayOfPosts = [];
  // Outer loop
  async.eachSeries(url, (link, topLVLcb) => {
    // Waterfall
    async.waterfall([
      // Collects links to posts
      callback => {
        request(link, (err, response, body) => {
          console.log(`working on ${link}`);
          const $ = cheerio.load(body);
          // OVERALL 10 LINKS PER ONE BLOGPOST
          $('.blogpost').each((i, element) => {
            // build post ojbect
            const post =  {
              content,
              link,
              comments: []
            }
            arrayOfPosts.push(post);
          });
          callback(null, arrayOfPosts);
        });
      },
      // Looks for details in given post
      (arrOfPosts, postDetailsCallback) => {
        let counter = 1;
        // Inner loop through 10 links
        async.eachSeries(arrOfPosts, (post, eachSeriesCallback) => {
          request(post.link, (err, response, body) => {
            console.log(counter++);
            const $ = cheerio.load(body);
            $('.comment').each((i, element) => {
              // build comment
              const comment = {
                author,
                content
              };
              post.comments.push(comment);
            });
            eachSeriesCallback(null);
          });
        }, postDetailsCallback);
      }
    ], err => {
      console.log('DONE PAGE');
      console.log('*************************');
      topLVLcb(err);
    });
  }, (result, err) => {
    if (err) {
      throw err;
    } else {
      console.log('DONE ALL');
      cb(arrayOfPosts);
    }
  });
}

它提供如下输出:

working on www.mywebsite.com/
1
2
3
4
5
6
7
8
9
10
DONE PAGE
**************************************************************
working on www.mywebsite.com/posts/1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
DONE PAGE
**************************************************************

它将每个页面递增 10,从每次迭代重新开始。虽然它应该扫描每页 10 次。我认为我已经搞砸了一些回调,但我已经无法弄清楚确切的几个小时了。这是我的第一个nodejs异步代码,它非常压倒性。

似乎您将所有帖子存储在顶级arrayOfPosts中,但这意味着在第二个瀑布函数中,您将从头开始处理所有帖子,因为这是您传递给回调的内容。因此,在收集帖子链接的第一个回调中,您应该有一个本地帖子数组,您可以将这些帖子传递给下一个回调。

arrayOfPost[]对每个async.waterfall都是全球性的......对于每个 url 元素 在url你应该创建一个新的arrayOfPost[],如 遵循。。。。

export default function getData(url, cb) {
    const arrayOfPosts = [];
    // Outer loop
    async.eachSeries(url, (link, topLVLcb) => {
        var tmpArr=[];
    // Waterfall
    async.waterfall([
        // Collects links to posts
        callback => {
        request(link, (err, response, body) => {
        console.log(`working on ${link}`);
    const $ = cheerio.load(body);
    // OVERALL 10 LINKS PER ONE BLOGPOST
    $('.blogpost').each((i, element) => {
        // build post ojbect
        const post =  {
            content,
            link,
            comments: []
        }
        tmpArr.push(post);
        arrayOfPosts.push(post);
});
    callback(null, tmpArr);//this tmpArr which is being passed will always cantain 10 items(posts) so there will be 10 iterations for each element in url
});
},
    // Looks for details in given post
    (arrOfPosts, postDetailsCallback) => {
        let counter = 1;
        // Inner loop through 10 links
        async.eachSeries(arrOfPosts, (post, eachSeriesCallback) => {
            request(post.link, (err, response, body) => {
            console.log(counter++);
            const $ = cheerio.load(body);
            $('.comment').each((i, element) => {
                // build comment
                const comment = {
                    author,
                    content
                };
            post.comments.push(comment);
        });
            eachSeriesCallback(null);
        });
    }, postDetailsCallback);
    }
    ], err => {
        console.log('DONE PAGE');
        console.log('*************************');
        topLVLcb(err);
    });
}, (result, err) => {
        if (err) {
            throw err;
        } else {
            console.log('DONE ALL');
            cb(arrayOfPosts);
        }
    });
}