遍历HTML页面元素

PhantomJS Iterating through HTML page elements

本文关键字:元素 HTML 遍历      更新时间:2023-09-26

有人能帮我解决这个问题吗?

我正在用PhantomJS抓取本地HTML文件,我试图在屏幕上显示类"test"的HTML标签的内容。我确实得到了第一个标签的内容,但不知怎么的,我没有得到下一个标签的内容。

-= HTML/index.html =-

<!doctype html>
<html>
    <head>
        <meta charset="UTF-8">
        <title>Document</title>
    </head>
    <body>
        <div id="parent-id">
            <p>hello word 1</p>
            <p class="test">hello word 2</p>
            <p class="test">hello word 3</p>
            <p>hello word 4</p>
        </div>
    </body>
</html>

-= PhantomJS/test2.js =-

var fs = require('fs');
var page = require('webpage').create();
page.settings.userAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:49.0) Gecko/20100101 Firefox/49.0';
page.viewportSize = {width:1200, height:1024};
page.onConsoleMessage = function (msg) {
    //console.log(msg);
}
page.open('http://localhost/index.html', function(status) {
    if (status == 'success') {
        var products = page.evaluate(function() {
            return document.getElementsByClassName('test')
        });
        for(var i = 0; i < products.length; ++i) {
            if(products[i]) {
                console.log(products[i].innerHTML);
            }
        }
        phantom.exit();
    } else {
        console.log('Unable to load the address!');
        phantom.exit();
    }
});

当我运行phantomjs "test2.js"时,我得到:

hello word 2

而我期望得到:

hello word 2
hello word 3

您不应该从页面上下文返回元素。返回简单值。这个例子适用于您的index.html:

page.open('http://localhost/index.html', function(status) {
    if (status == 'success') {
        var products = page.evaluate(function() {
            return [].map.call(document.getElementsByClassName('test'), function(elem) {
                return elem.innerHTML;
            });
        });
        for(var i = 0; i < products.length; ++i) {
            if(products[i]) {
                console.log(products[i]);
            }
        }
        phantom.exit();
    } else {
        console.log('Unable to load the address!');
        phantom.exit();
    }
});