从网页中提取电话号码

Extract phone number from a Web Page

本文关键字:电话号码 提取 网页      更新时间:2023-09-26

我想用WinHttpRequest从这个网页获得电话号码。有一种javascript可以随机改变数字的顺序——我看不懂这个算法。

http://www.doska.ru/msg/work/courses-education/seminari-un-trenini/ailoo.html

下面是autohotkey的代码:
WebRequest := ComObjCreate("WinHttp.WinHttpRequest.5.1")
WebRequest.Open("GET", "http://www.doska.ru/msg/work/courses-education/seminari-un-trenini/ailoo.html", false)
WebRequest.Send()
Body := WebRequest.ResponseBody
pArr := ComObjValue(Body)
cBytes := NumGet(pArr+0, A_PtrSize = 8? 24:16, "uint")
pText := NumGet(pArr+0, A_PtrSize = 8? 16:12, "ptr")
var := StrGet(pText, cBytes, "utf-8")
RegExMatch(var, "id=""ph_td_2"">(.+?)</span>", phone)
msgbox % phone1

生成电话号码的脚本如下:

<script type="text/javascript">open_stat_lnk("");change_price( 8, 0, "", 0 );show_banner( );
var show_code = "Наберите текст с картинки"; var show_phone = "Показать номер"; var pcc_id=0;
print_phone(PH_2,2,0);if( PH_c && !pcc_id ){pcc_id=2;eval(PH_c);}
</script>

Function print_phone(PH_2,2,0)http://i.doska.ru/w_inc/js/main.ru.doska.js?v=251

function print_phone(d, k, b) {
    var g = el("ph_td_" + k);
    if (!g) {
        g = el("ph_td")
    }
    if (!g || !d) {
        return
    }
    var f = p2 = _js_decode(d);
    if (b) {
        var c = el("ptd2_" + k);
        if (c) {
            c = c.innerHTML;
            c = c.split("<");
            c = c[0];
            c = c.replace("(", "");
            c = c.replace(")", "");
            p2 = f.replace("-", "");
            p2 = p2.replace("-", "");
            p2 = p2.replace("-", "");
            p2 = c + "" + p2
        }
        g.innerHTML = '<a href="tel:' + p2 + '">' + f + "</a>"
    } else {
        g.innerHTML = f
    }
    g.style.visibility = "visible"
}

请帮我解这个谜。

您正在呼叫:print_phone(PH_2,2,0);因此,您的值就像这样作为输入:

d := PH_2, 
k := 2, 
b := 0

你正在做:

if (0) { // do some stuff } else { display(f); }

由于b == 0, 0有false的值;您只是在显示f.

f is _js_decode(d)

function _js_decode(b){ // PH_2
    return _ph_dec(b,"Hb9c0mOswgV4p{zDlf",2);
}
function _ph_dec(g,r,k){ // PH_2, "Hb9c0mOswgV4p{zDlf", 2
                         // Your issue might be there ?
    g=unescape(_b64_dec(g));
    var n=r.length; // 18
    var d=g.length;
    var c="";
    var q,p;
    for(var f=0;f<d;f++){
        q=g.substring(f,f+1); // take the g[f] char
        p=r.substring(f%n,f%n+1); // take g[f%n] char
        if(k==1){
            q=q.charCodeAt(0)-p.charCodeAt(0) // not the case we are looking for !
        } else {
            if(k==2) {
                // why the '14' is here ? 
                q=q.charCodeAt(0)-p.charCodeAt(0)+14 // the case we are looking for !
            } else {
                q=q.charCodeAt(0)^p.charCodeAt(0) // not the case we are looking for !
            }
        }
   c=c+String.fromCharCode(q)
   }
   return c
 }

使用浏览器调试器可能会给你更多的提示。

假设chanter是正确的,并且顺序总是相同的,这将完成工作:

mainHtml := HtmlGet("http://www.doska.ru/msg/work/courses-education/seminari-un-trenini/ailoo.html")
RegExMatch(mainHtml, "<td width=100% class=""ads_contacts"" nowrap id=""ptd2_[^""]*"">([^<]+)<span class=""ads_contacts_bold"" id=""ph_td[^""]*"">([^<]+)</span>", phone)
RegExMatch(mainHtml, "var ss_w='показать';document.write'( '<scr''+'ipt id=""contacts_js"" src=""([^'?]+)'?t=''+new Date'(')'+'""></scr''+'ipt>' ');", contactsJsPartUrl)
;contactsJsUrl := "http://www.doska.ru" contactsJsPartUrl1 "?t=" GetJsDate()
contactsJsUrl := "http://www.doska.ru" contactsJsPartUrl1 "?t=" GetJsDate() "0." GetRandomString(16,"0123456789") "&reload=1"
;http://www.doska.ru/js/2015-08-17/1941/VnQNHE9hRRBVfQgZVWJJQVZ6RUYLLkNaXn0A.js?t=Thu Sep 03 2015 00:16:12 GMT+0200 (W. Europe Standard Time)
secretVariablesJs := HtmlGet(contactsJsUrl)
/*
var PHONE_CNT=-1;var PHONE_CNT2=-1;var PHONE_CNT3=-1;var EMAIL_CNT=-1;var SHOW_CNT=29;var PH_c="";var PH_1=0;var PH_2=0;var PH_3=0;
pcc_id=0;PH_1=gpzd("JTgwJUEyJTdDX3BsJTdDWnUlOEYlN0UlNUJ4JThFciU5M3hpJTdGZg==","79325724");
PH_2 = gpzd( "JThFJTlEeCU1RXlvaSU4RXpobSU3RXUlN0MlOEZUJThEJThEeF8lN0MlOTF6ZHhlJTYwZw==","15372732");
PH_3 = gpzd( "JTdFJUEyelpwbSU3RSU1Q3MlOEQlN0VZcyU4RXMlOTV6ZyU3RGY=","45575927");
*/
msgbox % contactsJsUrl
_js_decode = 
(
function _js_decode(b) {
    return _ph_dec(b, "Hb9c0mOswgV4p{zDlf", 2)
}
)
_ph_dec =
(
function _ph_dec(g, r, k) {
    g = unescape(_b64_dec(g));
    var n = r.length;
    var d = g.length;
    var c = "";
    var q, p;
    for (var f = 0; f < d; f++) {
        q = g.substring(f, f + 1);
        p = r.substring(f `% n, f `% n + 1);
        if (k == 1) {
            q = q.charCodeAt(0) - p.charCodeAt(0)
        } else {
            if (k == 2) {
                q = q.charCodeAt(0) - p.charCodeAt(0) + 14
            } else {
                q = q.charCodeAt(0) ^ p.charCodeAt(0)
            }
        }
        c = c + String.fromCharCode(q)
    }
    return c
}
)
_b64_dec = 
 (
 function _b64_dec(n) {
    var f = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";
    var d, c, b, s, r, q, p, u, k = 0,
        g = "";
    do {
        s = f.indexOf(n.charAt(k++));
        r = f.indexOf(n.charAt(k++));
        q = f.indexOf(n.charAt(k++));
        p = f.indexOf(n.charAt(k++));
        u = s << 18 | r << 12 | q << 6 | p;
        d = u >> 16 & 255;
        c = u >> 8 & 255;
        b = u & 255;
        if (q == 64) {
            g += String.fromCharCode(d)
        } else {
            if (p == 64) {
                g += String.fromCharCode(d, c)
            } else {
                g += String.fromCharCode(d, c, b)
            }
        }
    } while (k < n.length);
    return g
}
)
gpzd =
(
function gpzd(data, key) {
    key = key * 6 - 47289 + 517;
    ret = _ph_dec(data, new String(key), 2);
    return ret
}
)
jsCode := secretVariablesJs "`r`n"
jsCode .= _js_decode "`r`n"
jsCode .= gpzd "`r`n"
jsCode .= _ph_dec "`r`n"
jsCode .= _b64_dec "`r`n"
sc := ComObjCreate("ScriptControl")
sc.Language := "JScript"
sc.ExecuteStatement(jsCode)
phone2Decrypted := sc.Eval("_js_decode(PH_2)")
if InStr(phone2Decrypted,"*")
    phone2Decrypted := sc.Eval("_js_decode(PH_1)")
if InStr(phone2Decrypted,"*")
    phone2Decrypted := sc.Eval("_js_decode(PH_3)")
if InStr(phone2Decrypted,"*")
     msgbox failed to get the phone number
if !InStr(phone2Decrypted,"*")
    MsgBox, % phone1 " " phone2Decrypted

HtmlGet(url) {
    Static WebRequest := ComObjCreate("WinHttp.WinHttpRequest.5.1")
    WebRequest.Open("GET", url)
    WebRequest.SetRequestHeader("User-Agent","Mozilla/5.0 (Windows NT 6.3; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0")
    WebRequest.Send()
    Body := WebRequest.ResponseBody
    pArr := ComObjValue(Body)
    cBytes := NumGet(pArr+0, A_PtrSize = 8? 24:16, "uint")
    pText := NumGet(pArr+0, A_PtrSize = 8? 16:12, "ptr")
    Return StrGet(pText, cBytes, "utf-8")
}
GetJsDate() {
    sc := ComObjCreate("ScriptControl")
    sc.Language := "JScript"
    ;Return sc.Eval("''+new Date()+''")
    Return sc.Eval("encodeURI(''+new Date()+'')")
}
GetRandomString(length,chars:="") {
    If (chars = "")
        chars := "0123456789abcdefghijklmnopqrstuvwxyz"
    charsCount := StrLen(chars)
    Loop %length% {
        Random, num, 1, % StrLen(chars)
        string .= SubStr(chars,num,1)
    }
    Return string
}

您可以在AutoHotKey中使用ImageSearch搜索电话号码标签,并通过增加Y轴移动指针并使用MouseClickDrag