Fastest way to detect external URLs

只谈情不闲聊 提交于 2019-12-17 15:37:14

问题


What's the fastest method to detect if foo='http://john.doe' is an external url (in comparsion to window.location.href)?


回答1:


I know the regex version has already been accepted but I would bet this is "faster" than doing that complex of a regex. String.replace is quite fast.

var isExternal = function(url) {
    var domain = function(url) {
        return url.replace('http://','').replace('https://','').split('/')[0];
    };

    return domain(location.href) !== domain(url);
}

Update

I decided to do a little more research on this and found a faster method that uses a Regex.

var isExternalRegexClosure = (function(){
    var domainRe = /https?:\/\/((?:[\w\d-]+\.)+[\w\d]{2,})/i;

    return function(url) {
        function domain(url) {
          return domainRe.exec(url)[1];  
        }

        return domain(location.href) !== domain(url);
    }
})();

In IE this is slightly faster than the String.replace method. However in Chrome and Firefox it is about twice as fast. Also, defining the Regex only once inside the closure instead of just inside the function normally is about 30% faster in Firefox.

Here is a jsperf examining four different ways of determining an external hostname.

It is important to note that every method I've tried takes less than 1ms to run even on an old phone. So performance probably shouldn't be your primary consideration unless you are doing some large batch processing.




回答2:


If you consider a URL being external if either the scheme, host or port is different, you could do something like this:

function isExternal(url) {
    var match = url.match(/^([^:\/?#]+:)?(?:\/\/([^\/?#]*))?([^?#]+)?(\?[^#]*)?(#.*)?/);
    if (typeof match[1] === "string" && match[1].length > 0 && match[1].toLowerCase() !== location.protocol) return true;
    if (typeof match[2] === "string" && match[2].length > 0 && match[2].replace(new RegExp(":("+{"http:":80,"https:":443}[location.protocol]+")?$"), "") !== location.host) return true;
    return false;
}



回答3:


I've been using psuedosavant's method, but ran into a few cases where it triggered false positives, such as domain-less links ( /about, image.jpg ) and anchor links ( #about ). The old method would also give inaccurate results for different protocols ( http vs https ).

Here's my slightly modified version:

var checkDomain = function(url) {
  if ( url.indexOf('//') === 0 ) { url = location.protocol + url; }
  return url.toLowerCase().replace(/([a-z])?:\/\//,'$1').split('/')[0];
};

var isExternal = function(url) {
  return ( ( url.indexOf(':') > -1 || url.indexOf('//') > -1 ) && checkDomain(location.href) !== checkDomain(url) );
};

Here are some tests with the updated function:

isExternal('http://google.com'); // true
isExternal('https://google.com'); // true
isExternal('//google.com'); // true (no protocol)
isExternal('mailto:mail@example.com'); // true
isExternal('http://samedomain.com:8080/port'); // true (same domain, different port)
isExternal('https://samedomain.com/secure'); // true (same domain, https)

isExternal('http://samedomain.com/about'); // false (same domain, different page)
isExternal('HTTP://SAMEDOMAIN.COM/about'); // false (same domain, but different casing)
isExternal('//samedomain.com/about'); // false (same domain, no protocol)
isExternal('/about'); // false
isExternal('image.jpg'); // false
isExternal('#anchor'); // false

It's more accurate overall, and it even ends up being marginally faster, according to some basic jsperf tests. If you leave off the .toLowerCase() for case-insensitive testing, you can speed it up even more.




回答4:


pseudosavant's answer didn't exactly work for me, so I improved it.

var isExternal = function(url) {
    return !(location.href.replace("http://", "").replace("https://", "").split("/")[0] === url.replace("http://", "").replace("https://", "").split("/")[0]);   
}



回答5:


I had to build on pseudosavant's and Jon's answers because, I needed to also catch cases of URLs beginning with "//" and URLs that do not include a sub-domain. Here's what worked for me:

var getDomainName = function(domain) {
    var parts = domain.split('.').reverse();
    var cnt = parts.length;
    if (cnt >= 3) {
        // see if the second level domain is a common SLD.
        if (parts[1].match(/^(com|edu|gov|net|mil|org|nom|co|name|info|biz)$/i)) {
            return parts[2] + '.' + parts[1] + '.' + parts[0];
        }
    }
    return parts[1]+'.'+parts[0];
};
var isExternalUrl = function(url) {
	var curLocationUrl = getDomainName(location.href.replace("http://", "").replace("https://", "").replace("//", "").split("/")[0].toLowerCase());
	var destinationUrl = getDomainName(url.replace("http://", "").replace("https://", "").replace("//", "").split("/")[0].toLowerCase());
	return !(curLocationUrl === destinationUrl)
};

$(document).delegate('a', 'click', function() {
	var aHrefTarget = $(this).attr('target');
	if(typeof aHrefTarget === 'undefined')
		return;
	if(aHrefTarget !== '_blank')
		return;  // not an external link
	var aHrefUrl = $(this).attr('href');
	if(aHrefUrl.substr(0,2) !== '//' && (aHrefUrl.substr(0,1) == '/' || aHrefUrl.substr(0,1) == '#'))
		return;  // this is a relative link or anchor link
	if(isExternalUrl(aHrefUrl))
		alert('clicked external link');
});
<h3>Internal URLs:</h3>
<ul>
  <li><a href="stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls" target="_blank">stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls</a></li>
  <li><a href="www.stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls" target="_blank">www.stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls</a></li>
  <li><a href="//stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls" target="_blank">//stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls</a></li>
  <li><a href="//www.stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls" target="_blank">//www.stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls</a></li>
</ul>
<h3>External URLs:</h3>
<ul>
  <li><a href="http://www.yahoo.com" target="_blank">http://www.yahoo.com</a></li>
  <li><a href="yahoo.com" target="_blank">yahoo.com</a></li>
  <li><a href="www.yahoo.com" target="_blank">www.yahoo.com</a></li>
  <li><a href="//www.yahoo.com" target="_blank">//www.yahoo.com</a></li>
</ul>



回答6:


For my purpose I just did a little modification to shshaw's answer to verify if links are not empty or just a single character (supposing it's '#'), which original answer method returns false positive. This was for my purpose to indicate to users they will leave my page by adding some FA icon.

// same thing here, no edit
function checkDomain(url) {
    if ( url.indexOf('//') === 0 ) { url = location.protocol + url; }
    return url.toLowerCase().replace(/([a-z])?:\/\//,'$1').split('/')[0];
};

function isExternal(url) {
    // verify if link is empty or just 1 char + original answer
    return (url.length > 1 && url.indexOf(':') > -1 || url.indexOf('//') > -1 ) && checkDomain(location.href) !== checkDomain(url);
};

// add some icon to external links (function is called in an init method)
function addExternalLinkIcon(){
    $("a[href]").each(function(i,ob){
        // we check it
        if(isExternal($(ob).attr("href"))){
            // then add some beauty if it's external
            // (we assume Font Awesome CSS and font is loaded for my example, of course :-P)
            $(ob).append(" <i class='fa fa-external-link'></i> ");
        }
    });
}



回答7:


Shouldn't

function is_external( url ) {
    return url.match( /[a-zA-Z0-9]*:\/\/[^\s]*/g ) != null;
}

do the trick? Doesn't work for absolute (internal) urls.




回答8:


The main problem, is how to parse an URL, and get a host name our of it. It can be done with following way:

var _getHostname = function(url) {
  var parser = document.createElement('a');
  parser.href = url;

  return parser.hostname;
}

var isExternal = (_getHostname(window.location.href) !== _getHostname('http://john.doe'));

Or you can use is-url-external module.

var isExternal = require('is-url-external');
isExternal('http://john.doe'); // true | false 



回答9:


You can simply use use npm package is-internal-link

Installation

npm install --save is-internal-link

Usage

import { isInternalLink } from "is-internal-link"
isInternalLink('https://www.google.com') // false
isInternalLink('/page1') // true

I also usually this with react like this

import React from 'react'

import { Link as ReactRouterLink} from 'react-router-dom'
import { isInternalLink } from 'is-internal-link'

const Link = ({ children, to, activeClassName, ...other }) => {
  if (isInternalLink(to)) {
    return (
      <ReactRouterLink to={to} activeClassName={activeClassName} {...other}>
        {children}
      </ReactRouterLink>
    )
  }
  return (
    <a href={to} target="_blank" {...other}>
      {children}
    </a>
  )
}

export default Link

Disclaimer: I am the author of this lib



来源:https://stackoverflow.com/questions/6238351/fastest-way-to-detect-external-urls

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!