CasperJS loop or iterate through multiple web pages?

前端 未结 3 1645
难免孤独
难免孤独 2020-12-05 01:20

I have a CasperJS script that scrapes ratings and dates from one webpage. Now I want to scrape the same data from multiple pages under the same website. How can I loop throu

3条回答
  •  盖世英雄少女心
    2020-12-05 01:43

    Thanks Fanch and Artjom B. Both of your answers rendered the working solution. I used the recursive walk through the 'next' pages on the pagination as given by Artjom B. Next, I added a wait() function to make sure the next ratings page was loaded before scraping them. Without this wait() function, we scrape the same page multiple times between the instant that 'next' is clicked and the resp. next page is done loading. See the working code below:

    var ratings = [];
    var dates = [];
    var casper = require('casper').create({
    
        pageSettings: {
            loadImages:  false,         
            loadPlugins: false          
        },
        logLevel: "debug",               
        verbose: true                   
    });
    
    var fs = require('fs');
    
    function getRatings() {
        var ratings = document.querySelectorAll('#BVRRRatingOverall_Review_Display > div.BVRRRatingNormalImage > img');
        return Array.prototype.map.call(ratings, function(e) {
            return e.getAttribute('title');
        });
    }
    
    function getDate() {
        var dates = document.querySelectorAll('#BVSubmissionPopupContainer > div.BVRRReviewDisplayStyle5Header > div.BVRRReviewDateContainer > span.BVRRValue.BVRRReviewDate');
    
        return Array.prototype.map.call(dates, function(e) {
    
            return e.innerHTML;
    
        });
    }
    
    function getRatingsAndWrite(){
        ratings = casper.evaluate(getRatings);
        dates = casper.evaluate(getDate);
    
    
        casper.echo(ratings.length + ' ratings found:');
    
         for(var i=0; i", content, 'a'); 
    
        casper.echo(dates.length + ' dates found:');
    
        var nextLink = ".BVRRPageLink.BVRRNextPage > a";
        if (casper.visible(nextLink)) {
            casper.thenClick(nextLink);
            casper.wait(3000);
            casper.then(getRatingsAndWrite);
        } else {
            casper.echo("END")
        }
    }
    
    casper.start('http://www.t-mobile.com/cell-phones/htc-one-m8.html');
    
    casper.then(getRatingsAndWrite);
    
    casper.run();
    

提交回复
热议问题