计算CasperJS中给定元素的XPath

Calculating an XPath of a given element in CasperJS

本文关键字:元素 XPath CasperJS 计算      更新时间:2024-01-07

我正试图使用Firebug中的getElementXPath函数在CasperJS中工作,但我似乎找不到合适的地方来调用它。这是我目前所拥有的,它只适用于已经有"id"标记的对象,但这并没有多大帮助,因为我使用XPath代替id(大多数对象没有id)

casper.then(function () {
    var Element = this.evaluate(function(){
        var elm = document.querySelector('[class="h4"]');
        return getElementXPath(elm); //Set 1
        //return elm; //Set 2
    });
    console.log('xpath: '+ Element); //Set 1
    //console.log('xpath: '+ getElementXPath(Element)); //Set 2
});

集合1总是输出"xpath:null"

只有当元素已经有"id"标记时,集合2才会输出正确的路径。"xpath://*[id="button"]"

否则,集合2将输出最后一个标记,即"xpath://a"或"xpath:/cs"

这是Firebug的getElementXPath函数,我刚刚将它粘贴在JS文件的顶部。

function getElementXPath(element)
{
    if (element && element.id)
        return '//*[@id="' + element.id + '"]';
    else
        return getElementTreeXPath(element);
};
function getElementTreeXPath(element)
{
    var paths = [];
    // Use nodeName (instead of localName) so namespace prefix is included (if any).
    for (; element && element.nodeType == Node.ELEMENT_NODE; element = element.parentNode)
    {
        var index = 0;
        var hasFollowingSiblings = false;
        for (var sibling = element.previousSibling; sibling; sibling = sibling.previousSibling)
        {
            // Ignore document type declaration.
            if (sibling.nodeType == Node.DOCUMENT_TYPE_NODE)
                continue;
            if (sibling.nodeName == element.nodeName)
                ++index;
        }
        for (var sibling = element.nextSibling; sibling && !hasFollowingSiblings;
            sibling = sibling.nextSibling)
        {
            if (sibling.nodeName == element.nodeName)
                hasFollowingSiblings = true;
        }
        var tagName = (element.prefix ? element.prefix + ":" : "") + element.localName;
        var pathIndex = (index || hasFollowingSiblings ? "[" + (index + 1) + "]" : "");
        paths.splice(0, 0, tagName + pathIndex);
    }
    return paths.length ? "/" + paths.join("/") : null;
};

当您收听"page.error"事件时,您会看到类似的内容

错误:ReferenceError:找不到变量:getElementXPath

这与您在页面中包含该代码的方式有关。以下完整的脚本适用于我:

var casper = require('casper').create();
// http://docs.casperjs.org/en/latest/events-filters.html#page-error
casper.on("page.error", function(msg, trace) {
    this.echo("Error: " + msg);
    // maybe make it a little fancier with the code from the PhantomJS equivalent
});
casper.start('http://example.com');
casper.then(function() {               
    this.evaluate(function(){
        window.getElementXPath = function(element)
        {
            if (element && element.id)
                return '//*[@id="' + element.id + '"]';
            else
                return getElementTreeXPath(element);
        };
        function getElementTreeXPath(element)
        {
            var paths = [];
            // Use nodeName (instead of localName) so namespace prefix is included (if any).
            for (; element && element.nodeType == Node.ELEMENT_NODE; element = element.parentNode)
            {
                var index = 0;
                var hasFollowingSiblings = false;
                for (var sibling = element.previousSibling; sibling; sibling = sibling.previousSibling)
                {
                    // Ignore document type declaration.
                    if (sibling.nodeType == Node.DOCUMENT_TYPE_NODE)
                        continue;
                    if (sibling.nodeName == element.nodeName)
                        ++index;
                }
                for (var sibling = element.nextSibling; sibling && !hasFollowingSiblings;
                    sibling = sibling.nextSibling)
                {
                    if (sibling.nodeName == element.nodeName)
                        hasFollowingSiblings = true;
                }
                var tagName = (element.prefix ? element.prefix + ":" : "") + element.localName;
                var pathIndex = (index || hasFollowingSiblings ? "[" + (index + 1) + "]" : "");
                paths.splice(0, 0, tagName + pathIndex);
            }
            return paths.length ? "/" + paths.join("/") : null;
        };
    });
    this.echo(this.evaluate(function(){
        return getElementXPath(document.querySelector("a"));
    }));
});
casper.run();

输出:

/html/body/div/p[2]/a

诀窍是使getElementXPath在页面上下文的全局范围内可用。这可以通过在window.getElementXPath上设置变量来轻松实现。