casper.waitForSelector, timeout and error handling

社会主义新天地 提交于 2019-12-08 13:10:02

问题


I made a quick script to capture a screen shot from a list of Reddit posts. The script fetches the reddit urls from a json file(example shown below) and then visits each page to capture a screenshot.

The script works well for the most. However, every once in a while it will hang/continue to run if casper.waitForSelector criteria is not met. Eventually it will stall the whole server. I thought I was doing sufficient error handling. How can I modify the script to ignore any page that doesn't fit the casper.waitForSelector criteria and move on to the next item for screen capture?

JS

var casper = require('casper').create({
    verbose: true,
    viewportSize: {
        width: 1280,
        height: 720
    },
    logLevel: 'error',
    pageSettings: {
        ignoreSslErrors: true,
        loadImages: true, // load images
        loadPlugins: true, // do not load NPAPI plugins (Flash, Silverlight, ...)
        webSecurityEnabled: false, // ajax
        userAgent: 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/29.0.1547.2 Safari/537.36'
    }
});

//Create random names for files
function randString(x) {
    var s = "";
    while (s.length < x && x > 0) {
        var r = Math.random();
        s += (r < 0.1 ? Math.floor(r * 100) : String.fromCharCode(Math.floor(r * 26) + (r > 0.5 ? 97 : 65)));
    }
    return s;
}


function getReddit(reddit, filename) {
    casper.thenOpen(reddit, function() {
        if (casper.exists("#thing_" + filename)) {
            casper.waitForSelector("#thing_" + filename, function() {
                casper.captureSelector(randString(10) + '.png', "#thing_" + filename, {
                    quality: 100
                });
            });
        } else {
            console.log("Reddit Error: " + filename);
        }
    });
}

var link = 'http://localhost/test.json';

casper.start(link);

casper.on("resource.received", function(resource) {
    var results = this.evaluate(function(url) {
        return __utils__.sendAJAX(url, "GET");
    }, resource.url);
    var x = JSON.parse(results);
    //this.echo(x.length);
    for (var i = 0; i < x.length; ++i) {
        var reddit = x[i].post_url;
        var filename = x[i].id;
        getReddit(reddit, filename);
    }
});

casper.run();

JSON

{
post_url: "https://www.reddit.com/r/todayilearned/comments/4marhg/til_that_in_the_16th_century_christians_called/",
bit_id: "l6KE0vzMmgQ",
id: "t3_4marhg"
},
{
post_url: "https://www.reddit.com/r/videos/comments/4mbbab/man_ignores_museum_rules_touches_priceless_clock/",
bit_id: "2GK22rGYWKx",
id: "t3_4mbbab"
},
{
post_url: "https://www.reddit.com/r/space/comments/4mc1av/an_alien_world_67p_as_seen_by_rosetta_two_days_ago/",
id: "t3_4mc1av"
},
{
post_url: "https://www.reddit.com/r/worldnews/comments/4mc8uv/young_fish_become_hooked_on_eating_plastic_in_the/",
id: "t3_4mc8uv"
},
{
post_url: "https://www.reddit.com/r/todayilearned/comments/4mcda3/til_that_the_giant_tortoise_did_not_receive_a/",
id: "t3_4mcda3"
},
{
post_url: "https://www.reddit.com/r/science/comments/4mcl0y/a_new_study_has_shown_that_mothers_who_are/",
id: "t3_4mcl0y"
},
{
post_url: "https://www.reddit.com/r/news/comments/4mcveg/bp_agrees_to_pay_175_million_to_settle_claims_by/",
id: "t3_4mcveg"
},
{
post_url: "https://www.reddit.com/r/todayilearned/comments/4mdddw/til_that_when_a_british_captive_officer/",
id: "t3_4mdddw"
}

回答1:


If you provide the onTimeout callback function to waitForSelector (3rd argument), then it will be executed instead of the default behavior (stop script) on error.

You can pass in an empty function or a function with some logging:

casper.waitForSelector("#thing_" + filename, function _then() {
    this.captureSelector(randString(10) + '.png', "#thing_" + filename, {
        quality: 100
    });
}, function _onTimeout(){
    this.echo("#thing_" + filename + " not found", "WARNING");
});

You can also change the option casper.options.silentErrors to true if you want this behavior for every function.



来源:https://stackoverflow.com/questions/37678941/casper-waitforselector-timeout-and-error-handling

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!