How to get all the URLs in a web site using JavaScript?

前端 未结 4 2135
借酒劲吻你
借酒劲吻你 2021-01-02 17:50

Any one knows a way to get all the URLs in a website using JavaScript?

I only need the links starting with the same domain name.no need to consider other links.

4条回答
  •  忘掉有多难
    2021-01-02 18:14

    Javascript to extract (and display) the domains, urls, and links from a page The "for(var i = document.links.length; i --> 0;)" method is a good collection to work with. Here is a example to pulls it from specific parts of the html page.

    You could alter it to select and filter to whatever you want. And then use the list however you want. I wanted to show a working example.

    var re = /^((http[s]?|ftp|mailto):(?:\/\/)?)?\/?(([^\/\.]+\.)*?([^\/\.]+\.[^:\/\s\.]{1,4})?(\.[^:\/\s\.]{1,2})?(:\d+)?)($|\/)([^#?\s]+)?(.*?)?(#[\w\-]+)?$/i;
    var reG = /^((http[s]?|ftp|mailto):(?:\/\/)?)?\/?(([^\/\.]+\.)*?([^\/\.]+\.[^:\/\s\.]{1,4})?(\.[^:\/\s\.]{1,2})?(:\d+)?)($|\/)([^#?\s]+)?(.*?)?(#[\w\-]+)?$/ig;
    var printList = document.getElementById("domains");
    var unorderedList =  document.createElement("ul");
    unorderedList.setAttribute("id", "domainsList");
    unorderedList.setAttribute("class", "list-group");
    printList.appendChild(unorderedList);
    var domainsList = document.getElementById("domainsList");
    
    
    var list = document.getElementsByTagName("a");
    //console.log(list);
    var listArray = Array.from(list);
    
    //loop through the list
    listArray.forEach(function(link){
    	//console.log(link.href);
      //console.log(typeof(link.href));
    
    var listItem =  document.createElement("li");
    listItem.setAttribute("class", "list-group-item domain"); 
    domainsList.appendChild(listItem);
      var str = link.href;
      var match = str.match(reG);
      var matchGroup = str.match(re);
      //console.log(matchGroup[5]);
      var domainNode = document.createTextNode("Domain: " + matchGroup[5]);
      listItem.appendChild(domainNode);
      var breakNode  =  document.createElement("br");
      listItem.appendChild(breakNode);
      var websiteNode = document.createTextNode("Website: " + matchGroup[3]);
      listItem.appendChild(websiteNode);
      var breakNode  =  document.createElement("br");
      listItem.appendChild(breakNode);
      var fullNode = document.createTextNode("Full Link: " + match);
      listItem.appendChild(fullNode);
      domainsList.appendChild(listItem);
      unorderedList.appendChild(listItem)
    });
    
    
    
        
        
        
        Pull Domains form a page
        
    
        
    
    
    
    
    
    
    

    JSFiddle of a working copy

提交回复
热议问题