Extract hostname name from string

后端 未结 28 1961
情歌与酒
情歌与酒 2020-11-22 07:15

I would like to match just the root of a URL and not the whole URL from a text string. Given:

http://www.youtube.co         


        
28条回答
  •  暖寄归人
    2020-11-22 08:02

    All url properties, no dependencies, no JQuery, easy to understand

    This solution gives your answer plus additional properties. No JQuery or other dependencies required, paste and go.

    Usage

    getUrlParts("https://news.google.com/news/headlines/technology.html?ned=us&hl=en")
    

    Output

    {
      "origin": "https://news.google.com",
      "domain": "news.google.com",
      "subdomain": "news",
      "domainroot": "google.com",
      "domainpath": "news.google.com/news/headlines",
      "tld": ".com",
      "path": "news/headlines/technology.html",
      "query": "ned=us&hl=en",
      "protocol": "https",
      "port": 443,
      "parts": [
        "news",
        "google",
        "com"
      ],
      "segments": [
        "news",
        "headlines",
        "technology.html"
      ],
      "params": [
        {
          "key": "ned",
          "val": "us"
        },
        {
          "key": "hl",
          "val": "en"
        }
      ]
    }
    

    Code
    The code is designed to be easy to understand rather than super fast. It can be called easily 100 times per second, so it's great for front end or a few server usages, but not for high volume throughput.

    function getUrlParts(fullyQualifiedUrl) {
        var url = {},
            tempProtocol
        var a = document.createElement('a')
        // if doesn't start with something like https:// it's not a url, but try to work around that
        if (fullyQualifiedUrl.indexOf('://') == -1) {
            tempProtocol = 'https://'
            a.href = tempProtocol + fullyQualifiedUrl
        } else
            a.href = fullyQualifiedUrl
        var parts = a.hostname.split('.')
        url.origin = tempProtocol ? "" : a.origin
        url.domain = a.hostname
        url.subdomain = parts[0]
        url.domainroot = ''
        url.domainpath = ''
        url.tld = '.' + parts[parts.length - 1]
        url.path = a.pathname.substring(1)
        url.query = a.search.substr(1)
        url.protocol = tempProtocol ? "" : a.protocol.substr(0, a.protocol.length - 1)
        url.port = tempProtocol ? "" : a.port ? a.port : a.protocol === 'http:' ? 80 : a.protocol === 'https:' ? 443 : a.port
        url.parts = parts
        url.segments = a.pathname === '/' ? [] : a.pathname.split('/').slice(1)
        url.params = url.query === '' ? [] : url.query.split('&')
        for (var j = 0; j < url.params.length; j++) {
            var param = url.params[j];
            var keyval = param.split('=')
            url.params[j] = {
                'key': keyval[0],
                'val': keyval[1]
            }
        }
        // domainroot
        if (parts.length > 2) {
            url.domainroot = parts[parts.length - 2] + '.' + parts[parts.length - 1];
            // check for country code top level domain
            if (parts[parts.length - 1].length == 2 && parts[parts.length - 1].length == 2)
                url.domainroot = parts[parts.length - 3] + '.' + url.domainroot;
        }
        // domainpath (domain+path without filenames) 
        if (url.segments.length > 0) {
            var lastSegment = url.segments[url.segments.length - 1]
            var endsWithFile = lastSegment.indexOf('.') != -1
            if (endsWithFile) {
                var fileSegment = url.path.indexOf(lastSegment)
                var pathNoFile = url.path.substr(0, fileSegment - 1)
                url.domainpath = url.domain
                if (pathNoFile)
                    url.domainpath = url.domainpath + '/' + pathNoFile
            } else
                url.domainpath = url.domain + '/' + url.path
        } else
            url.domainpath = url.domain
        return url
    }
    

提交回复
热议问题