aspx中的节点JS Http.get文件

Node JS Http.get file from aspx

本文关键字:Http get 文件 JS 节点 aspx      更新时间:2023-09-26

我正试图使用Node的Http模块从该网站上的任何"下载"按钮下载PDF文档。我如何在不下载aspx文件的情况下下载PDF文档,这就是我的代码所发生的情况?出于某种原因,我的代码下载了一个aspx文件,上面写着"错误消息-文件不存在或您没有查看此文件的权限",尽管我可以很容易地从web浏览器下载该文件。这是我的代码:

var pdf_text = require("pdf-text");
var request = require("request");
var http = require("http");
var fs = require("fs");
var cheerio = require("cheerio");
var urllib = require("url");
var path = "final.pdf";
var url = "http://www2.nationalgrid.com/UK/Industry-information/System-charges/Electricity-transmission/Assistance-for-areas-with-high-distribution-costs/";
var links = [];
request(url, function(error, response, html) {
        if(!error && response.statusCode == 200) {
                var $ = cheerio.load(html);
                $(".txtLnk").each(function() {
                        links.push("http://www2.nationalgrid.com" + $(this).attr("href"));
                });
                var file = fs.createWriteStream(urllib.parse(links[1]).pathname.split('/').pop());
                var options = {
                        host: urllib.parse(links[1]).host,
                        port: 80,
                        path: urllib.parse(links[1]).pathname,
                        headers: {
                                "User-Agent": "Mozilla/5.0 (X11; Linux i686; rv:43.0) Gecko/201001101 Firefox/43.0"
                        }
                };
                http.get(options, function(res) {
                        res.on('data', function(data) {
                                file.write(data);
                        }).on('end', function() {
                                file.end();
                        });
                });
                console.log(links);
        }
});
function data_from_pdf(pdf) {
        pdf_text("pdf/" + pdf, function(err, chunks) {
                var data = chunks.join("").substring(chunks.join("").search("(p/kWh)") + 6, chunks.join("").search("(p/kWh)") + 21);
                var date = data.substring(0, data.indexOf("/") + 3);
                var rate = data.substring(data.indexOf("/") + 3);
                var json_data = "{" + "'n'tname: " + "final.pdf" + ",'n'tdate: " + date + ",'n'trate: " + rate + "'n}";
                return json_data;
        });
}

事实证明,如果我只是用基本URL替换"options",它就可以工作了。奇怪的问题已解决。:)

试试这个:

var request = require("request");
var fs = require("fs");
var cheerio = require("cheerio");
var path = "./final.pdf";
var url = "http://www2.nationalgrid.com/UK/Industry-information/System-charges/Electricity-transmission/Assistance-for-areas-with-high-distribution-costs/";
var links = [];
request(url, function(error, response, html) {
    if(!error && response.statusCode == 200) {
            var $ = cheerio.load(html);
            $(".txtLnk").each(function() {
                    links.push("http://www2.nationalgrid.com" + $(this).attr("href"));
            });
            var r = request(links[0]);
            r.on('response', function (res) {
                    console.log(res.headers);
                    res.pipe(fs.createWriteStream(path));
            });
    }
});