How to follow all links in CasperJS?

谁都会走 提交于 2019-11-28 10:23:00

I have this script that first will get all links from a page then save 'href' attributes to an array, then will iterate over this array and then open each link one by one and echo the url :

var casper = require('casper').create({
    logLevel:"verbose",
    debug:true
});
var links;

casper.start('http://localhost:8000');

casper.then(function getLinks(){
     links = this.evaluate(function(){
        var links = document.getElementsByTagName('a');
        links = Array.prototype.map.call(links,function(link){
            return link.getAttribute('href');
        });
        return links;
    });
});
casper.then(function(){
    this.each(links,function(self,link){
        self.thenOpen(link,function(a){
            this.echo(this.getCurrentUrl());
        });
    });
});
casper.run(function(){
    this.exit();
});
Artjom B.

rusln's answer works great if all the links have a meaningful href attribute (actual URL). If you want to click every a that also triggers a javascript function, you may need to iterate some other way over the elements.

I propose using the XPath generator from stijn de ryck for an element.

  1. You can then sample all XPaths that are on the page.
  2. Then you open the page for every a that you have the XPath for and click it by XPath.
  3. Wait a little if it is a single page application
  4. Do something
var startURL = 'http://localhost:8000',
    xPaths
    x = require('casper').selectXPath;

casper.start(startURL);

casper.then(function getLinks(){
    xPaths = this.evaluate(function(){
        // copied from https://stackoverflow.com/a/5178132/1816580
        function createXPathFromElement(elm) {
            var allNodes = document.getElementsByTagName('*'); 
            for (var segs = []; elm && elm.nodeType == 1; elm = elm.parentNode) { 
                if (elm.hasAttribute('id')) { 
                        var uniqueIdCount = 0; 
                        for (var n=0;n < allNodes.length;n++) { 
                            if (allNodes[n].hasAttribute('id') && allNodes[n].id == elm.id) uniqueIdCount++; 
                            if (uniqueIdCount > 1) break; 
                        }; 
                        if ( uniqueIdCount == 1) { 
                            segs.unshift('id("' + elm.getAttribute('id') + '")'); 
                            return segs.join('/'); 
                        } else { 
                            segs.unshift(elm.localName.toLowerCase() + '[@id="' + elm.getAttribute('id') + '"]'); 
                        } 
                } else if (elm.hasAttribute('class')) { 
                    segs.unshift(elm.localName.toLowerCase() + '[@class="' + elm.getAttribute('class') + '"]'); 
                } else { 
                    for (i = 1, sib = elm.previousSibling; sib; sib = sib.previousSibling) { 
                        if (sib.localName == elm.localName)  i++; }; 
                        segs.unshift(elm.localName.toLowerCase() + '[' + i + ']'); 
                }; 
            }; 
            return segs.length ? '/' + segs.join('/') : null; 
        };
        var links = document.getElementsByTagName('a');
        var xPaths = Array.prototype.map.call(links, createXPathFromElement);
        return xPaths;
    });
});
casper.then(function(){
    this.each(xPaths, function(self, xpath){
        self.thenOpen(startURL);
        self.thenClick(x(xpath));
        // waiting some time may be necessary for single page applications
        self.wait(1000);
        self.then(function(a){
            // do something meaningful here
            this.echo(this.getCurrentUrl());
        });

        // Uncomment the following line in case each click opens a new page instead of staying at the same page
        //self.back()
    });
});
casper.run(function(){
    this.exit();
});
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!