为什么这是未处理的承诺拒绝

Why is this an Unhandled Promise Rejection?

本文关键字:承诺 拒绝 未处理 为什么      更新时间:2023-09-26

我正在尝试重写我的代码以正确利用承诺。

完整的程序应该从T恤站点抓取数据。第一个代码块应该进入网站的首页,获取立即可用的产品页面,然后将URL存储在数组中。剩余的 URL 将存储在"剩余"中,以便稍后执行第二次抓取。

目前手动对每个部分进行单元测试:

//TASK: Create a command line application that goes to an ecommerce site to get the latest prices.
    //Save the scraped data in a spreadsheet (CSV format).

//Modules being used:
var cheerio = require('cheerio');
var request = require('request');
//harcoded url
var url = 'http://shirts4mike.com/';
//url for tshirt pages
var urlSet = new Set();
var remainder;
   const requestPromise = function(url) {
    return new Promise(function(resolve, reject) {
        request(url, function(error, response, html) {
            if(error) return reject(error);
            if(!error && response.statusCode == 200){
                return resolve(html);   
            }       
        });
    });
}

function firstScrape (url) {
    return requestPromise(url)
        .then(function(html) {
            var $ = cheerio.load(html);
            var links = [];
            //get all the links
            $('a[href*=shirt]').each(function(){
                var a = $(this).attr('href');
                //add into link array
                links.push(url + a);
            });
            return links;
            // return this array you've made
        });
}

function nextStep (arrayOfLinks) { 
    var promiseArray = [];
    for(var link in arrayOfLinks){
        promiseArray.push(requestPromise(link));
        return Promise.all(promiseArray);
    }                   
}

function lastStep (arrayOfHTMLresults){ 
    for(var html in arrayOfHTMLresults){
        var $ = cheerio.load(html);
        //if page has a submit it must be a product page
        if($('[type=submit]').length !== 0){
            //add page to set
            urlSet.add(scrapeLink);
        } else if(remainder == undefined) {
            //if not a product page, add it to remainder so it another scrape can be performed.
            remainder = scrapeLink;                         
        }
    }
    console.log(urlSet);
    console.log(remainder);
}

firstScrape(url)
    .then(nextStep)
    .then(lastStep);

我目前收到以下错误:

(node:71094) UnhandledPromiseRejectionWarning: Unhandled promise rejection (rejection id: 3): Error: Invalid URI "0"

这是我试图承诺的代码:

// Load front page of shirts4mike
function firstScrape(){
    request(url, function(error, response, html) {
        if(!error && response.statusCode == 200){
            var $ = cheerio.load(html);
        //iterate over links with 'shirt'
            $('a[href*=shirt]').each(function(){
                var a = $(this).attr('href');
                //create new link
                var scrapeLink = url + a;
                //for each new link, go in and find out if there is a submit button. 
                //If there, add it to the set
                request(scrapeLink, function(error,response, html){
                    if(!error && response.statusCode == 200) {
                        var $ = cheerio.load(html);
                        //if page has a submit it must be a product page
                        if($('[type=submit]').length !== 0){
                            //add page to set
                            urlSet.add(scrapeLink);
                        } else if(remainder == undefined) {
                            //if not a product page, add it to remainder so it another scrape can be performed.
                            remainder = scrapeLink;                         
                        }
                    }
                });
            });     
        }
    });
}

我无法解决的是,当lastStep()不知道scrapeLink是什么时,如何在中使用urlSet.add(scrapeLink);

知道为什么吗?谢谢

.add()

是一个Array.prototype方法,你也可以return promiseArray for循环,而不是将Promise推送到promiseArray并使用Promise.all()

function nextStep (arrayOfLinks) { 
    var promiseArray = [];
    for(var i = 0; i < arrayOfLinks.length; i++) {
        var link = requestPromise(arrayOfLinks[i]);
        promiseArray.push(link);
    }   
    return Promise.all(promiseArray)                
}

由于问题更改而更新:

因此,从firstScrape()中,您可以返回结果对象,而不仅仅是链接数组:

return { scrapeLink: link, result: links }

然后,由于承诺,您可以再次返回具有相同形状的东西,从而在nextStep()中得到它:

return { scrapeLink: firstStepResult.scrapLink, result: Promise.all(promiseArray) }

然后,在lastStep()而不是arrayOfHTMLresults传入,您将拥有一个对象,如下所示:

{ scrapeLink: "http://someurl.com", result: arrayOfHTMLresults }

以前的答案:

您需要在 for...循环中。例如,使用 constvarlet,具体取决于您的用例和 JS 版本。

for(var link in arrayOfLinks){
    promiseArray.add(requestPromise(link));
    return promiseArray;
}