屏幕抓取javascript

screen scraping javascript

本文关键字:javascript 抓取 屏幕      更新时间:2023-09-26

所以我从一个网站刮javascript,它返回下面的代码,但如果当然这不会显示flash视频或渲染javascript,因为我只是使用简单的php dom解析器返回HTML。是否有一种方法来运行这个javascript返回它输出的嵌入对象?

<script type="text/javascript">
    var attributes = {};
    attributes.id = "flashMovie";
    var flashvars = {};
    flashvars.startjs = "playerLoaded";
    flashvars.activeColor = "83A7D2";
    flashvars.themeColor = "FFFFFF";
    flashvars.config = escape("http://example/0a1cee42025e9e49d25d.fid?key=c3e868caa037531d0d709e238d93013a&VID=189988&catID=1,26,43,50&rollover=1&startThumb=19&embed=&utm_source=&multiview=0&premium=1&country=&user=0&vip=0&heightHD=480p&cd=u&ref=browse");
    //flashvars.config = escape("http://example.com/0a1cee42025e9e49d25d.fid?key=c3e868caa037531d0d709e238d93013a&VID=189988&catID=1,26,43,50&rollover=1&startThumb=19&premium=1&country=&user=0&vip=0&cd=u&ref=browse");
    flashvars.config2 = escape("http://www.example.com/player_feed_local.php?vid=189988&CHIDS=1,26,43,50&link=http%253A%252F%252Fwww.example.com%252Fjump%252FTesting-Video%252Fvideo189988%253Fref%253Dbrowse");
    var params = {};
    params.startjs = "playerLoaded";
    params.loop = "false";
    params.quality = "best";
    params.bgcolor = "#000000";
    params.allowfullscreen = "true";
    params.allowscriptaccess = "always";
    params.wmode= "opaque";
    swfobject.embedSWF("http://www.example.com/Player_v1.11.9.7.swf?v=1.0", "flashMovie", "100%", "500", "9", "expressInstall.swf", flashvars, params, attributes);
</script>

您可以像浏览器一样运行Javascript(带有真正的DOM和所有这些)并使用像PhantomJS或Crowbar这样的工具提取数据

使用webkit等浏览器渲染引擎执行JavaScript,然后可以提取结果html。

下面是一些示例代码:http://webscraping.com/blog/Scraping-JavaScript-webpages-with-webkit/