How to follow all links in CasperJS?

前端 未结 2 770
长发绾君心
长发绾君心 2020-12-10 08:21

I\'m having trouble clicking all JavaScript based links in a DOM and saving the output. The links have the form



        
相关标签:
2条回答
  • 2020-12-10 08:49

    I have this script that first will get all links from a page then save 'href' attributes to an array, then will iterate over this array and then open each link one by one and echo the url :

    var casper = require('casper').create({
        logLevel:"verbose",
        debug:true
    });
    var links;
    
    casper.start('http://localhost:8000');
    
    casper.then(function getLinks(){
         links = this.evaluate(function(){
            var links = document.getElementsByTagName('a');
            links = Array.prototype.map.call(links,function(link){
                return link.getAttribute('href');
            });
            return links;
        });
    });
    casper.then(function(){
        this.each(links,function(self,link){
            self.thenOpen(link,function(a){
                this.echo(this.getCurrentUrl());
            });
        });
    });
    casper.run(function(){
        this.exit();
    });
    
    0 讨论(0)
  • 2020-12-10 08:50

    rusln's answer works great if all the links have a meaningful href attribute (actual URL). If you want to click every a that also triggers a javascript function, you may need to iterate some other way over the elements.

    I propose using the XPath generator from stijn de ryck for an element.

    1. You can then sample all XPaths that are on the page.
    2. Then you open the page for every a that you have the XPath for and click it by XPath.
    3. Wait a little if it is a single page application
    4. Do something
    var startURL = 'http://localhost:8000',
        xPaths
        x = require('casper').selectXPath;
    
    casper.start(startURL);
    
    casper.then(function getLinks(){
        xPaths = this.evaluate(function(){
            // copied from https://stackoverflow.com/a/5178132/1816580
            function createXPathFromElement(elm) {
                var allNodes = document.getElementsByTagName('*'); 
                for (var segs = []; elm && elm.nodeType == 1; elm = elm.parentNode) { 
                    if (elm.hasAttribute('id')) { 
                            var uniqueIdCount = 0; 
                            for (var n=0;n < allNodes.length;n++) { 
                                if (allNodes[n].hasAttribute('id') && allNodes[n].id == elm.id) uniqueIdCount++; 
                                if (uniqueIdCount > 1) break; 
                            }; 
                            if ( uniqueIdCount == 1) { 
                                segs.unshift('id("' + elm.getAttribute('id') + '")'); 
                                return segs.join('/'); 
                            } else { 
                                segs.unshift(elm.localName.toLowerCase() + '[@id="' + elm.getAttribute('id') + '"]'); 
                            } 
                    } else if (elm.hasAttribute('class')) { 
                        segs.unshift(elm.localName.toLowerCase() + '[@class="' + elm.getAttribute('class') + '"]'); 
                    } else { 
                        for (i = 1, sib = elm.previousSibling; sib; sib = sib.previousSibling) { 
                            if (sib.localName == elm.localName)  i++; }; 
                            segs.unshift(elm.localName.toLowerCase() + '[' + i + ']'); 
                    }; 
                }; 
                return segs.length ? '/' + segs.join('/') : null; 
            };
            var links = document.getElementsByTagName('a');
            var xPaths = Array.prototype.map.call(links, createXPathFromElement);
            return xPaths;
        });
    });
    casper.then(function(){
        this.each(xPaths, function(self, xpath){
            self.thenOpen(startURL);
            self.thenClick(x(xpath));
            // waiting some time may be necessary for single page applications
            self.wait(1000);
            self.then(function(a){
                // do something meaningful here
                this.echo(this.getCurrentUrl());
            });
    
            // Uncomment the following line in case each click opens a new page instead of staying at the same page
            //self.back()
        });
    });
    casper.run(function(){
        this.exit();
    });
    
    0 讨论(0)
提交回复
热议问题