Fastest way to detect external URLs

前端 未结 9 1430
孤街浪徒
孤街浪徒 2020-12-02 14:23

What\'s the fastest method to detect if foo=\'http://john.doe\' is an external url (in comparsion to window.location.href)?

相关标签:
9条回答
  • 2020-12-02 15:13

    Update: I did some more research and found that using new URL is actually the fastest, and IMO most straight-forward way of doing this.

    It is important to note that every method I've tried takes less than 1ms to run even on an old phone. So performance probably shouldn't be your primary consideration unless you are doing some large batch processing.

    These are the three methods I tried:

    new URL:

    function isExternalURL(url) {
      return new URL(url).host !== (location.host);
    }
    

    String.replace:

    function isExternalReplace(url) {
      var domain = function(url) {
        return url.replace('http://','').replace('https://','').split('/')[0];
      };
            
      return domain(location.href) !== domain(url);
    }
    

    Regex:

    const isExternalRegex = (function(){
      var domainRe = /https?:\/\/((?:[\w\d-]+\.)+[\w\d]{2,})/i;
    
      return function(url) {
        function domain(url) {
          return domainRe.exec(url)[1];  
      }
    
        return domain(location.href) !== domain(url);
      }
    })();
    

    Here is some basic tests I used to test performance: https://is-external-url-test.glitch.me/

    0 讨论(0)
  • 2020-12-02 15:13

    I had to build on pseudosavant's and Jon's answers because, I needed to also catch cases of URLs beginning with "//" and URLs that do not include a sub-domain. Here's what worked for me:

    var getDomainName = function(domain) {
        var parts = domain.split('.').reverse();
        var cnt = parts.length;
        if (cnt >= 3) {
            // see if the second level domain is a common SLD.
            if (parts[1].match(/^(com|edu|gov|net|mil|org|nom|co|name|info|biz)$/i)) {
                return parts[2] + '.' + parts[1] + '.' + parts[0];
            }
        }
        return parts[1]+'.'+parts[0];
    };
    var isExternalUrl = function(url) {
    	var curLocationUrl = getDomainName(location.href.replace("http://", "").replace("https://", "").replace("//", "").split("/")[0].toLowerCase());
    	var destinationUrl = getDomainName(url.replace("http://", "").replace("https://", "").replace("//", "").split("/")[0].toLowerCase());
    	return !(curLocationUrl === destinationUrl)
    };
    
    $(document).delegate('a', 'click', function() {
    	var aHrefTarget = $(this).attr('target');
    	if(typeof aHrefTarget === 'undefined')
    		return;
    	if(aHrefTarget !== '_blank')
    		return;  // not an external link
    	var aHrefUrl = $(this).attr('href');
    	if(aHrefUrl.substr(0,2) !== '//' && (aHrefUrl.substr(0,1) == '/' || aHrefUrl.substr(0,1) == '#'))
    		return;  // this is a relative link or anchor link
    	if(isExternalUrl(aHrefUrl))
    		alert('clicked external link');
    });
    <h3>Internal URLs:</h3>
    <ul>
      <li><a href="stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls" target="_blank">stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls</a></li>
      <li><a href="www.stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls" target="_blank">www.stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls</a></li>
      <li><a href="//stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls" target="_blank">//stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls</a></li>
      <li><a href="//www.stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls" target="_blank">//www.stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls</a></li>
    </ul>
    <h3>External URLs:</h3>
    <ul>
      <li><a href="http://www.yahoo.com" target="_blank">http://www.yahoo.com</a></li>
      <li><a href="yahoo.com" target="_blank">yahoo.com</a></li>
      <li><a href="www.yahoo.com" target="_blank">www.yahoo.com</a></li>
      <li><a href="//www.yahoo.com" target="_blank">//www.yahoo.com</a></li>
    </ul>

    0 讨论(0)
  • 2020-12-02 15:16

    pseudosavant's answer didn't exactly work for me, so I improved it.

    var isExternal = function(url) {
        return !(location.href.replace("http://", "").replace("https://", "").split("/")[0] === url.replace("http://", "").replace("https://", "").split("/")[0]);   
    }
    
    0 讨论(0)
  • 2020-12-02 15:18

    I've been using psuedosavant's method, but ran into a few cases where it triggered false positives, such as domain-less links ( /about, image.jpg ) and anchor links ( #about ). The old method would also give inaccurate results for different protocols ( http vs https ).

    Here's my slightly modified version:

    var checkDomain = function(url) {
      if ( url.indexOf('//') === 0 ) { url = location.protocol + url; }
      return url.toLowerCase().replace(/([a-z])?:\/\//,'$1').split('/')[0];
    };
    
    var isExternal = function(url) {
      return ( ( url.indexOf(':') > -1 || url.indexOf('//') > -1 ) && checkDomain(location.href) !== checkDomain(url) );
    };
    

    Here are some tests with the updated function:

    isExternal('http://google.com'); // true
    isExternal('https://google.com'); // true
    isExternal('//google.com'); // true (no protocol)
    isExternal('mailto:mail@example.com'); // true
    isExternal('http://samedomain.com:8080/port'); // true (same domain, different port)
    isExternal('https://samedomain.com/secure'); // true (same domain, https)
    
    isExternal('http://samedomain.com/about'); // false (same domain, different page)
    isExternal('HTTP://SAMEDOMAIN.COM/about'); // false (same domain, but different casing)
    isExternal('//samedomain.com/about'); // false (same domain, no protocol)
    isExternal('/about'); // false
    isExternal('image.jpg'); // false
    isExternal('#anchor'); // false
    

    It's more accurate overall, and it even ends up being marginally faster, according to some basic jsperf tests. If you leave off the .toLowerCase() for case-insensitive testing, you can speed it up even more.

    0 讨论(0)
  • 2020-12-02 15:21

    If you consider a URL being external if either the scheme, host or port is different, you could do something like this:

    function isExternal(url) {
        var match = url.match(/^([^:\/?#]+:)?(?:\/\/([^\/?#]*))?([^?#]+)?(\?[^#]*)?(#.*)?/);
        if (typeof match[1] === "string" && match[1].length > 0 && match[1].toLowerCase() !== location.protocol) return true;
        if (typeof match[2] === "string" && match[2].length > 0 && match[2].replace(new RegExp(":("+{"http:":80,"https:":443}[location.protocol]+")?$"), "") !== location.host) return true;
        return false;
    }
    
    0 讨论(0)
  • 2020-12-02 15:23

    For my purpose I just did a little modification to shshaw's answer to verify if links are not empty or just a single character (supposing it's '#'), which original answer method returns false positive. This was for my purpose to indicate to users they will leave my page by adding some FA icon.

    // same thing here, no edit
    function checkDomain(url) {
        if ( url.indexOf('//') === 0 ) { url = location.protocol + url; }
        return url.toLowerCase().replace(/([a-z])?:\/\//,'$1').split('/')[0];
    };
    
    function isExternal(url) {
        // verify if link is empty or just 1 char + original answer
        return (url.length > 1 && url.indexOf(':') > -1 || url.indexOf('//') > -1 ) && checkDomain(location.href) !== checkDomain(url);
    };
    
    // add some icon to external links (function is called in an init method)
    function addExternalLinkIcon(){
        $("a[href]").each(function(i,ob){
            // we check it
            if(isExternal($(ob).attr("href"))){
                // then add some beauty if it's external
                // (we assume Font Awesome CSS and font is loaded for my example, of course :-P)
                $(ob).append(" <i class='fa fa-external-link'></i> ");
            }
        });
    }
    
    0 讨论(0)
提交回复
热议问题