CasperJS-内存耗尽

CasperJS - Memory Exhausted

本文关键字:内存 CasperJS-      更新时间:2024-03-17

当我通过命令行运行它时,它会持续一两个小时,然后命令行会显示"Memory Exhausted"。我搞不清楚发生了什么事。

此外,还有一些关于如何使其更可读或更可修改的一般建议,因为我将在一个月后完成该项目。

var fs = require('fs');
var currentPhysician = [];
var physicianData = [];
var permitMax = 99999;
var alreadyParsed = [];
var targetFile = "CMQphysicians.csv";
var startTime = new Date().getTime();
var permitNumber = -1;
var firstLicense = 0;
var utils = require('utils');
String.prototype.contains = function (s) {
    return (this.indexOf(s) != -1);
}

var casper = require('casper').create({
verbose : true,
logLevel : "info",
pageSettings : {
loadImages : false, // do not load images
loadPlugins : false // do not load NPAPI plugins (Flash, Silverlight, ...)
}
});

function getPermitNumberString() {
    var pn = permitNumber.toString();
    var l = pn.length;
    var i;
    var leadingZeros = '';
    for (i = 0; i < (5 - pn.length); i++) {
        leadingZeros = leadingZeros + '0';
    }
    return leadingZeros + pn;
}
function getDetailsData() {
    var details = document.querySelectorAll('#content-html > table.griddetails > tbody > tr > td');
    return Array.prototype.map.call(details, function (e) {
        return e.innerText;
    });
}
function getPhysicianCount() {
    return document.querySelectorAll("#GViewList > tbody > tr:nth-child(2) > td:nth-child(1) > a").length;
}
casper.on("resource.error", function (resourceError) {
    if (!resourceError.url.contains('google')) {
        this.echo("Resource error: " + "Error code: " + resourceError.errorCode + " ErrorString: " + resourceError.errorString + " url: " + resourceError.url + " id: " + resourceError.id, "ERROR");
    }
    while (resourceError.errorString.contains('undefined')) {}
});
casper.on('load.started', function () {
    //casper.echo('load started');
});
casper.on('navigation.requested', function (url, navigationType, navigationLocked, isMainFrame) {
    //casper.echo('navigation requested');
    //casper.echo(navigationType);
});
casper.on('remote.message', function (msg) {
    this.echo('from within remote page DOM' + msg);
});
casper.start('https://www.google.ca/?gws_rd=ssl', function () { // Loads the initial page.
    casper.echo('Starting!');
});
casper.on('load.finished', function (status) {
    //casper.echo('load finished');
    var date = new Date();
    var hours = date.getHours();
    var minutes = date.getMinutes();
    //casper.echo(hours.toString() + ':' + minutes.toString() + '       ' + this.getCurrentUrl().toUpperCase());
    var urlPrefix = this.getCurrentUrl().substring(0, this.getCurrentUrl().indexOf('.aspx'));
    if (urlPrefix.length == 0) {
        casper.echo('undefined');
        urlPrefix = 'https://www.google.ca/?gws_rd=ssl'.toUpperCase();
    }
    switch (urlPrefix.toUpperCase()) {
    case 'https://www.google.ca/?gws_rd=ssl'.toUpperCase():
        casper.echo('on google');
        if (fs.exists('CMQphysicians.csv')) {
            stream = fs.open('CMQphysicians.csv', 'r');
            line = stream.readLine();
            var i = 0;
            while (line) {
                if (i > 0) {
                    alreadyParsed.push(Number(line.substring(0, line.indexOf(','))));
                }
                line = stream.readLine();
                i++;
            }
            stream.close();
            permitNumber = Math.max.apply(null, alreadyParsed) + 1;
            firstLicense = permitNumber;
            casper.echo(permitNumber);
        } else {
            fs.write(targetFile, "'uFEFF" + 'Permit Number,Last Name,First Name,Gender,Permit,Status,Specialty,Activity,Authorization,Address,Phone'n', 'a');
        }
        casper.thenOpen('http://www.cmq.org/bottin/index.aspx?lang=en&a=1');
        break;
    case 'http://www.cmq.org/bottin/index'.toUpperCase():
    casper.waitForSelector('#___gcse_0 > div > form > table.gsc-search-box > tbody > tr > td.gsc-search-button > input', function() {
    var finishedSoFar = permitNumber - firstLicense;
    var timeSoFar = new Date().getTime() - startTime;
    var licensesToDo = permitMax - permitNumber;
    var msPerLicense = timeSoFar / finishedSoFar;
    var minutesToGo = (licensesToDo * msPerLicense) / 1000 / 60;
    //casper.echo(licensesToDo + ' licenses to go. ' + msPerLicense.toString() + 'ms per license. ' + minutesToGo.toString() + ' minutes remaining.');
        casper.echo('index stage');
        permitNumber++;
        if (permitNumber > permitMax) {
            casper.echo('Permit number maxed out');
        } else {
            var permitNumberString = getPermitNumberString();
            casper.echo('going to list');
            casper.sendKeys('#txbNoPermis', permitNumberString);
            //casper.wait(100);
            casper.echo('sent keys, now clicking');
            casper.thenClick('#btSubmit');
            casper.echo('after the click');
        }
    });
    break;

    case 'http://www.cmq.org/bottin/list'.toUpperCase():
    casper.waitForSelector('#___gcse_0 > div > form > table.gsc-search-box > tbody > tr > td.gsc-search-button > input', function() {
        casper.echo('list stage');
        // Three cases:
        // No results, one result, many results
        // No results: go back (00000)
        // One result: go forward (82365)
        // Many results: crash (?????)

        a = casper.evaluate(getPhysicianCount);
        if (a == 0) {
            casper.echo('No physicians for license ' + getPermitNumberString());
            casper.echo('going to index');
            casper.thenClick('#btSubmit');
            //casper.wait(1000);
        } else if (a == 1) {
            casper.echo('Physician exists for license ' + getPermitNumberString());
            casper.echo('going to details');
            casper.thenClick('#GViewList > tbody > tr:nth-child(2) > td:nth-child(1) > a');
            //casper.wait(1000);
        } else if (a > 1) {
            casper.echo('a > 1 at ') + getPermitNumberString();
            while(true){}
        } else {
            casper.echo('negative a at ') + getPermitNumberString();
            while(true){}
        }
        // No results
    });
    break;
    case 'http://www.cmq.org/bottin/details'.toUpperCase():
    casper.waitForSelector('#___gcse_0 > div > form > table.gsc-search-box > tbody > tr > td.gsc-search-button > input', function() {
        casper.echo('details stage');
        var name = casper.getHTML('#content-html > table.griddetails > tbody > tr:nth-child(1) > th').substring(0, casper.getHTML('#content-html > table.griddetails > tbody > tr:nth-child(1) > th').indexOf('(')).trim().split(',');
        tableData = (casper.evaluate(getDetailsData));
        currentPhysician.push(tableData[4]);
        currentPhysician.push(name[0].trim());
        currentPhysician.push(name[1].trim());
        for (i = 2; i < tableData.length; i++) {
            if (i % 2 == 0 && i != 4) {
                currentPhysician.push(tableData[i]);
            }
        }
        for (i = 0; i < currentPhysician.length; i++) {
            currentPhysician[i] = currentPhysician[i].replace(/,/g, ';').replace(/'n/g, ';');
        }
        var physicianString = currentPhysician.join(',') + ''n';
        casper.echo('writing to file!');
        fs.write(targetFile, physicianString, 'a');
        currentPhysician = [];
        casper.echo(casper.exists('#btNewsearch'));
            casper.echo('going to index');
            casper.thenClick('#btNewsearch');
        //casper.wait(1000);
    });
    break;
    default:
        casper.echo("Wrong URL!");
        casper.back();
        break;

}});
casper.run(function () {
    casper.echo('ending!');
    casper.echo(physicianData.length);
});

由于一个错误:

https://bugs.webkit.org/show_bug.cgi?id=154452

通过关闭图像加载解决。

编辑:似乎仍然是个问题。我的猜测是因为casperjs已经过时了,所以我放弃了它,使用python。