How to wait for a click() event to load in phantomjs before continuing?

后端 未结 3 1572
夕颜
夕颜 2020-12-14 19:48

Phantomjs has these two really handy callbacks onLoadStarted and onLoadFinished which allow you to essentially pause execution while the page is lo

相关标签:
3条回答
  • 2020-12-14 20:06

    I think the onLoadStarted and onLoadFinished functions are everything you need. Take for example the following script:

    var page = require('webpage').create();
    
    page.onResourceReceived = function(response) {
        if (response.stage !== "end") return;
        console.log('Response (#' + response.id + ', stage "' + response.stage + '"): ' + response.url);
    };
    page.onResourceRequested = function(requestData, networkRequest) {
        console.log('Request (#' + requestData.id + '): ' + requestData.url);
    };
    page.onUrlChanged = function(targetUrl) {
        console.log('New URL: ' + targetUrl);
    };
    page.onLoadFinished = function(status) {
        console.log('Load Finished: ' + status);
    };
    page.onLoadStarted = function() {
        console.log('Load Started');
    };
    page.onNavigationRequested = function(url, type, willNavigate, main) {
        console.log('Trying to navigate to: ' + url);
    };
    
    page.open("http://example.com", function(status){
        page.evaluate(function(){
            // click
            var e = document.createEvent('MouseEvents');
            e.initMouseEvent('click', true, true, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null);
            document.querySelector("a").dispatchEvent(e);
        });
        setTimeout(function(){
            phantom.exit();
        }, 10000);
    });
    

    It prints

    Trying to navigate to: http://example.com/
    Request (#1): http://example.com/
    Load Started
    New URL: http://example.com/
    Response (#1, stage "end"): http://example.com/
    Load Finished: success
    Trying to navigate to: http://www.iana.org/domains/example
    Request (#2): http://www.iana.org/domains/example
    Load Started
    Trying to navigate to: http://www.iana.org/domains/reserved
    Request (#3): http://www.iana.org/domains/reserved
    Response (#2, stage "end"): http://www.iana.org/domains/example
    New URL: http://www.iana.org/domains/reserved
    Request (#4): http://www.iana.org/_css/2013.1/screen.css
    Request (#5): http://www.iana.org/_js/2013.1/jquery.js
    Request (#6): http://www.iana.org/_js/2013.1/iana.js
    Response (#3, stage "end"): http://www.iana.org/domains/reserved
    Response (#6, stage "end"): http://www.iana.org/_js/2013.1/iana.js
    Response (#4, stage "end"): http://www.iana.org/_css/2013.1/screen.css
    Response (#5, stage "end"): http://www.iana.org/_js/2013.1/jquery.js
    Request (#7): http://www.iana.org/_img/2013.1/iana-logo-header.svg
    Request (#8): http://www.iana.org/_img/2013.1/icann-logo.svg
    Response (#8, stage "end"): http://www.iana.org/_img/2013.1/icann-logo.svg
    Response (#7, stage "end"): http://www.iana.org/_img/2013.1/iana-logo-header.svg
    Request (#9): http://www.iana.org/_css/2013.1/print.css
    Response (#9, stage "end"): http://www.iana.org/_css/2013.1/print.css
    Load Finished: success
    

    It shows that clicking a link emits the LoadStarted event once and NavigationRequested event twice, because there is a redirect. The trick is to add the event handlers before doing the action:

    var page = require('webpage').create();
    
    page.open("http://example.com", function(status){
        page.onLoadFinished = function(status) {
            console.log('Load Finished: ' + status);
            page.render("test37_next_page.png");
            phantom.exit();
        };
        page.onLoadStarted = function() {
            console.log('Load Started');
        };
    
        page.evaluate(function(){
            var e = document.createEvent('MouseEvents');
            e.initMouseEvent('click', true, true, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null);
            document.querySelector("a").dispatchEvent(e);
        });
    });
    

    If you need to do those things, maybe it is time to try something else like CasperJS. It runs on top of PhantomJS, but has a much better API for navigating web pages.

    0 讨论(0)
  • 2020-12-14 20:07

    Here is my code based on some other answers. In my case, I didn't need to specifically evaluate any other javascript. I just needed to wait for the page to finish loading.

    var system = require('system');
    if (system.args.length === 1) {
        console.log('Try to pass some arguments when invoking this script!');
    }
    else {
        var page = require('webpage').create();
        var address = system.args[1];
    
        page.open(address, function(status){
            page.onLoadFinished = function(status) {
                console.log(page.content);
                phantom.exit();
            };    
        });     
    }
    

    Save the above in a file called "scrape.js" and call it this way:

    phantomjs --ssl-protocol=any --ignore-ssl-errors=true scrape.js https://www.example.com
    

    The SSL-related params are added to avoid other issues that I was having with certain HTTPS sites (related to certificate loading issues).

    Hope this helps someone!

    0 讨论(0)
  • 2020-12-14 20:08

    Use the high-level wrapper, nightmarejs. You can easily click there and wait afterwards.

    Here is the code (Examples section):

    var Nightmare = require('nightmare');
    new Nightmare()
      .goto('http://yahoo.com')
        .type('input[title="Search"]', 'github nightmare')
        .click('.searchsubmit')
        .run(function (err, nightmare) {
          if (err) return console.log(err);
          console.log('Done!');
        });
    

    More examples and API usage can be found at github

    0 讨论(0)
提交回复
热议问题