Any one knows a way to get all the URLs in a website using JavaScript?
I only need the links starting with the same domain name.no need to consider other links.
Javascript to extract (and display) the domains, urls, and links from a page The "for(var i = document.links.length; i --> 0;)" method is a good collection to work with. Here is a example to pulls it from specific parts of the html page.
You could alter it to select and filter to whatever you want. And then use the list however you want. I wanted to show a working example.
var re = /^((http[s]?|ftp|mailto):(?:\/\/)?)?\/?(([^\/\.]+\.)*?([^\/\.]+\.[^:\/\s\.]{1,4})?(\.[^:\/\s\.]{1,2})?(:\d+)?)($|\/)([^#?\s]+)?(.*?)?(#[\w\-]+)?$/i;
var reG = /^((http[s]?|ftp|mailto):(?:\/\/)?)?\/?(([^\/\.]+\.)*?([^\/\.]+\.[^:\/\s\.]{1,4})?(\.[^:\/\s\.]{1,2})?(:\d+)?)($|\/)([^#?\s]+)?(.*?)?(#[\w\-]+)?$/ig;
var printList = document.getElementById("domains");
var unorderedList = document.createElement("ul");
unorderedList.setAttribute("id", "domainsList");
unorderedList.setAttribute("class", "list-group");
printList.appendChild(unorderedList);
var domainsList = document.getElementById("domainsList");
var list = document.getElementsByTagName("a");
//console.log(list);
var listArray = Array.from(list);
//loop through the list
listArray.forEach(function(link){
//console.log(link.href);
//console.log(typeof(link.href));
var listItem = document.createElement("li");
listItem.setAttribute("class", "list-group-item domain");
domainsList.appendChild(listItem);
var str = link.href;
var match = str.match(reG);
var matchGroup = str.match(re);
//console.log(matchGroup[5]);
var domainNode = document.createTextNode("Domain: " + matchGroup[5]);
listItem.appendChild(domainNode);
var breakNode = document.createElement("br");
listItem.appendChild(breakNode);
var websiteNode = document.createTextNode("Website: " + matchGroup[3]);
listItem.appendChild(websiteNode);
var breakNode = document.createElement("br");
listItem.appendChild(breakNode);
var fullNode = document.createTextNode("Full Link: " + match);
listItem.appendChild(fullNode);
domainsList.appendChild(listItem);
unorderedList.appendChild(listItem)
});
Pull Domains form a page
JSFiddle of a working copy