我怎么能等待页面准备在PhantomJS

How can I wait for the page to be ready in PhantomJS?

本文关键字:PhantomJS 怎么能 等待      更新时间:2023-09-26

我正在使用PhantomJS登录到一个站点并做一些事情。该站点使用OAuth登录。点击页面上的"Login"按钮,进入OAuth服务。在那里,你输入你的凭据,点击"提交",你被重定向回原来的网站。我的脚本工作良好,但依赖于超时,这似乎不是太强大。

我如何重写这段代码,使我可以等到页面准备好,而不是使用setTimeout。我经常看到页面没有准备好,因此没有初始化jQuery的错误。

我不太擅长Javascript,所以一个例子会有所帮助。这是我在谷歌上搜索了一大堆后拼凑出来的。下面是我的代码:

var page = require('webpage').create();
var system = require('system');
page.settings.resourceTimeout = 10000;
page.onResourceTimeout = function(e) {
  console.log("Timed out loading resource " + e.url);
};
page.open('https://mysite.com/login', function(status) {
    if (status !== 'success') {
        console.log('Error opening url');
        phantom.exit(1);
    } else {
        setTimeout(function() {
            console.log('Successfully loaded page');
            page.evaluate(function() {
                $("#submit-field").click(); //Clicking the login button
            });
            console.log('Clicked login with OAuth button');
            setTimeout(function() {
                console.log('Addding the credentials');
                page.evaluate(function() {                
                    document.getElementById("username").value = 'user@example.com';
                    document.getElementById("password").value = 'P@ssw0rd';
                    document.getElementById("Login").click();
                });
                console.log('Clicked login button');
                setTimeout(function() {
                    //Inject some jQuery into the page and invoke that here
                    console.log('Clicked the export button');
                }, 15000);
            }, 15000);
        });
    }
});

看来要做到这一点的唯一方法是从DOM到PhantomJS使用回调。

var page = require('webpage').create();
var system = require('system');
page.onInitialized = function() {
    page.onCallback = function(data) {
        console.log('Main page is loaded and ready');
        //Do whatever here
    };
    page.evaluate(function() {
        document.addEventListener('DOMContentLoaded', function() {
            window.callPhantom();
        }, false);
        console.log("Added listener to wait for page ready");
    });
};
page.open('https://www.google.com', function(status) {});

另一种方法是扩展phantomjs waitfor.js的示例。

我使用这种个人混合方法。这是我的main.js文件:

'use strict';
var wasSuccessful = phantom.injectJs('./lib/waitFor.js');
var page = require('webpage').create();
page.open('http://foo.com', function(status) {
  if (status === 'success') {
    page.includeJs('https://cdnjs.cloudflare.com/ajax/libs/jquery/3.1.1/jquery.min.js', function() {
      waitFor(function() {
        return page.evaluate(function() {
          if ('complete' === document.readyState) {
            return true;
          }
          return false;
        });
      }, function() {
        var fooText = page.evaluate(function() {
          return $('#foo').text();
        });
        phantom.exit();
      });
    });
  } else {
    console.log('error');
    phantom.exit(1);
  }
});

lib/waitFor.js文件(这只是一个复制和粘贴waifFor()函数从phantomjs waitfor.js的例子):

function waitFor(testFx, onReady, timeOutMillis) {
    var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 3000, //< Default Max Timout is 3s
        start = new Date().getTime(),
        condition = false,
        interval = setInterval(function() {
            if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
                // If not time-out yet and condition not yet fulfilled
                condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code
            } else {
                if(!condition) {
                    // If condition still not fulfilled (timeout but condition is 'false')
                    console.log("'waitFor()' timeout");
                    phantom.exit(1);
                } else {
                    // Condition fulfilled (timeout and/or condition is 'true')
                    // console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms.");
                    typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condi>
                    clearInterval(interval); //< Stop this interval
                }
            }
        }, 250); //< repeat check every 250ms
}

这个方法不是异步的,但至少我保证在我尝试使用它们之前加载了所有的资源。