How to browse a whole website using selenium?

前端 未结 5 815
悲哀的现实
悲哀的现实 2020-12-03 15:39

Is it possible to go through all the URIs of a given URL (website) using selenium ?

My aim is to launch firefox browser using selenium with a given URL of my choice

5条回答
  •  情歌与酒
    2020-12-03 16:11

    I know you asked for a python example, but I was just in the middle of setting up a simple rep o for protractor testings and the task you want to accomplish seems to be very easy to do with protractor (which is just a wrapper around webdriver)

    here is the code in javascript:

    describe( 'stackoverflow scrapping', function () {
      var ptor = protractor.getInstance();
    
      beforeEach(function () {
        browser.ignoreSynchronization = true;
      } );
    
      afterEach(function () {
    
      } );
    
      it( 'should find the number of links in a given url', function () {
        browser.get( 'http://stackoverflow.com/questions/24257802/how-to-browse-a-whole-website-using-selenium' );
    
        var script = function () {
          var cb = arguments[ 0 ];
          var nodes = document.querySelectorAll( 'a' );
          nodes = [].slice.call( nodes ).map(function ( a ) {
            return a.href;
          } );
          cb( nodes );
        };
    
        ptor.executeAsyncScript( script ).then(function ( res ) {
          var visit = function ( url ) {
            console.log( 'visiting url', url );
            browser.get( url );
            return ptor.sleep( 1000 );
          };
    
          var doVisit = function () {
            var url = res.pop();
            if ( url ) {
              visit( url ).then( doVisit );
            } else {
              console.log( 'done visiting pages' );
            }
          };
    
          doVisit();
    
        } );
      } );
    
    } );
    

    You can clone the repo from here

    Note: I know protractor is probably not the best tool for it, but it was so simple to do it with it that I just give it a try.

    I tested this with firefox (you can use the firefox-conf branch for it, but it will require that you fire webdriver manually) and chrome. If you're using osx this should work with no problem (assuming you have nodejs installed)

提交回复
热议问题