Extract hostname name from string

By | December 21, 2017
Questions:

I would like to match just the root of a URL and not the whole URL from a text string. Given:

http://www.youtube.com/watch?v=ClkQA2Lb_iE

http://www.example.com/12xy45
http://example.com/random

I want to get the 2 last instances resolving to the www.example.com or example.com domain.

I heard regex is slow and this would be my second regex expression on the page so If there is anyway to do it without regex let me know.

I’m seeking a JS/jQuery version of this solution.

Answers:

Try:

function extractHostname(url) {
    var hostname;
    //find & remove protocol (http, ftp, etc.) and get hostname

    if (url.indexOf("://") > -1) {
        hostname = url.split('/')[2];
    }
    else {
        hostname = url.split('/')[0];
    }

    //find & remove port number
    hostname = hostname.split(':')[0];
    //find & remove "?"
    hostname = hostname.split('?')[0];

    return hostname;
}

document.write('Using "extractHostname":<br/>');
document.write(extractHostname("http://www.blog.classroom.me.uk/index.php") + "<br/>");
document.write(extractHostname("http://www.youtube.com/watch?v=ClkQA2Lb_iE") + "<br/>");
document.write(extractHostname("https://www.youtube.com/watch?v=ClkQA2Lb_iE") + "<br/>");
document.write(extractHostname("www.youtube.com/watch?v=ClkQA2Lb_iE") + "<br/>");
document.write(extractHostname("ftps://ftp.websitename.com/dir/file.txt") + "<br/>");
document.write(extractHostname("websitename.com:1234/dir/file.txt") + "<br/>");
document.write(extractHostname("ftps://websitename.com:1234/dir/file.txt") + "<br/>");
document.write(extractHostname("example.com?param=value") + "<br/>");
document.write('<br/>');

To address those who want the “root domain,” use this function:

function extractRootDomain(url) {
    var domain = extractHostname(url),
        splitArr = domain.split('.'),
        arrLen = splitArr.length;

    //extracting the root domain here
    //if there is a subdomain 
    if (arrLen > 2) {
        domain = splitArr[arrLen - 2] + '.' + splitArr[arrLen - 1];
        //check to see if it's using a Country Code Top Level Domain (ccTLD) (i.e. ".me.uk")
        if (splitArr[arrLen - 1].length == 2 && splitArr[arrLen - 1].length == 2) {
            //this is using a ccTLD
            domain = splitArr[arrLen - 3] + '.' + domain;
        }
    }
    return domain;
}

document.write('Using "extractRootDomain":<br/>');
document.write(extractRootDomain("http://www.blog.classroom.me.uk/index.php") + "<br/>");
document.write(extractRootDomain("http://www.youtube.com/watch?v=ClkQA2Lb_iE") + "<br/>");
document.write(extractRootDomain("https://www.youtube.com/watch?v=ClkQA2Lb_iE") + "<br/>");
document.write(extractRootDomain("www.youtube.com/watch?v=ClkQA2Lb_iE") + "<br/>");
document.write(extractRootDomain("ftps://ftp.websitename.com/dir/file.txt") + "<br/>");
document.write(extractRootDomain("websitename.com:1234/dir/file.txt") + "<br/>");
document.write(extractRootDomain("ftps://websitename.com:1234/dir/file.txt") + "<br/>");
document.write(extractRootDomain("example.com?param=value") + "<br/>");

Yields:

Using “extractHostname”:

www.blog.classroom.me.uk
www.youtube.com
www.youtube.com
www.youtube.com
ftp.websitename.com
websitename.com
websitename.com
example.com

Using “extractRootDomain”:

classroom.me.uk
youtube.com
youtube.com
youtube.com
websitename.com
websitename.com
websitename.com
example.com

Regardless having the protocol or even port number, you can extract the domain. This is a very simplified, non-regex solution, so I think this will do.

Note: If you try to use the “URL” object on input without a protocol (e.g. http, ftp, etc.), it will break.

*Thank you @Timmerz, @renoirb, @rineez, @BigDong, @ra00l, @ILikeBeansTacos, @CharlesRobertson for your suggestions!

Questions:
Answers:

A neat trick without using regular expressions:

var tmp        = document.createElement ('a');
;   tmp.href   = "http://www.example.com/12xy45";

// tmp.hostname will now contain 'www.example.com'
// tmp.host will now contain hostname and port 'www.example.com:80'

Wrap the above in a function such as the below and you have yourself a superb way of snatching the domain part out of an URI.

function url_domain(data) {
  var    a      = document.createElement('a');
         a.href = data;
  return a.hostname;
}

Questions:
Answers:

Try this:

var matches = url.match(/^https?\:\/\/([^\/?#]+)(?:[\/?#]|$)/i);
var domain = matches && matches[1];  // domain will be null if no match is found

If you want to exclude the port from your result, use this expression instead:

/^https?\:\/\/([^\/:?#]+)(?:[\/:?#]|$)/i

Edit: To prevent specific domains from matching, use a negative lookahead. (?!youtube.com)

/^https?\:\/\/(?!(?:www\.)?(?:youtube\.com|youtu\.be))([^\/:?#]+)(?:[\/:?#]|$)/i

Questions:
Answers:

Parsing a URL can be tricky because you can have port numbers and special chars. As such, I recommend using something like parseUri to do this for you. I doubt performance is going to be a issue unless you are parsing hundreds of URLs.

Questions:
Answers:

There is no need to parse the string, just pass your URL as an argument to URL constructor:

var url = 'http://www.youtube.com/watch?v=ClkQA2Lb_iE';
var hostname = (new URL(url)).hostname;

assert(hostname === 'www.youtube.com');

Questions:
Answers:

I tried to use the Given solutions, the Chosen one was an overkill for my purpose and “Creating a element” one messes up for me.

It’s not ready for Port in URL yet. I hope someone finds it useful

function parseURL(url){
    parsed_url = {}

    if ( url == null || url.length == 0 )
        return parsed_url;

    protocol_i = url.indexOf('://');
    parsed_url.protocol = url.substr(0,protocol_i);

    remaining_url = url.substr(protocol_i + 3, url.length);
    domain_i = remaining_url.indexOf('/');
    domain_i = domain_i == -1 ? remaining_url.length - 1 : domain_i;
    parsed_url.domain = remaining_url.substr(0, domain_i);
    parsed_url.path = domain_i == -1 || domain_i + 1 == remaining_url.length ? null : remaining_url.substr(domain_i + 1, remaining_url.length);

    domain_parts = parsed_url.domain.split('.');
    switch ( domain_parts.length ){
        case 2:
          parsed_url.subdomain = null;
          parsed_url.host = domain_parts[0];
          parsed_url.tld = domain_parts[1];
          break;
        case 3:
          parsed_url.subdomain = domain_parts[0];
          parsed_url.host = domain_parts[1];
          parsed_url.tld = domain_parts[2];
          break;
        case 4:
          parsed_url.subdomain = domain_parts[0];
          parsed_url.host = domain_parts[1];
          parsed_url.tld = domain_parts[2] + '.' + domain_parts[3];
          break;
    }

    parsed_url.parent_domain = parsed_url.host + '.' + parsed_url.tld;

    return parsed_url;
}

Running this:

parseURL('https://www.facebook.com/100003379429021_356001651189146');

Result:

Object {
    domain : "www.facebook.com",
    host : "facebook",
    path : "100003379429021_356001651189146",
    protocol : "https",
    subdomain : "www",
    tld : "com"
}

Questions:
Answers:

If you end up on this page and you are looking for the best REGEX of URLS try this one:

^(?:https?:)?(?:\/\/)?([^\/\?]+)

https://regex101.com/r/pX5dL9/1

It works for urls without http:// , with http, with https, with just // and dont grab the path and query path as well.

Good Luck

Questions:
Answers:
// use this if you know you have a subdomain
// www.domain.com -> domain.com
function getDomain() {
  return window.location.hostname.replace(/([a-zA-Z0-9]+.)/,"");
}

Questions:
Answers:
String.prototype.trim = function(){return his.replace(/^\s+|\s+$/g,"");}
function getHost(url){
    if("undefined"==typeof(url)||null==url) return "";
    url = url.trim(); if(""==url) return "";
    var _host,_arr;
    if(-1<url.indexOf("://")){
        _arr = url.split('://');
        if(-1<_arr[0].indexOf("/")||-1<_arr[0].indexOf(".")||-1<_arr[0].indexOf("\?")||-1<_arr[0].indexOf("\&")){
            _arr[0] = _arr[0].trim();
            if(0==_arr[0].indexOf("//")) _host = _arr[0].split("//")[1].split("/")[0].trim().split("\?")[0].split("\&")[0];
            else return "";
        }
        else{
            _arr[1] = _arr[1].trim();
            _host = _arr[1].split("/")[0].trim().split("\?")[0].split("\&")[0];
        }
    }
    else{
        if(0==url.indexOf("//")) _host = url.split("//")[1].split("/")[0].trim().split("\?")[0].split("\&")[0];
        else return "";
    }
    return _host;
}
function getHostname(url){
    if("undefined"==typeof(url)||null==url) return "";
    url = url.trim(); if(""==url) return "";
    return getHost(url).split(':')[0];
}
function getDomain(url){
    if("undefined"==typeof(url)||null==url) return "";
    url = url.trim(); if(""==url) return "";
    return getHostname(url).replace(/([a-zA-Z0-9]+.)/,"");
}

Questions:
Answers:
function hostname(url) {
    var match = url.match(/:\/\/(www[0-9]?\.)?(.[^/:]+)/i);
    if ( match != null && match.length > 2 && typeof match[2] === 'string' && match[2].length > 0 ) return match[2];
}

The above code will successfully parse the hostnames for the following example urls:

http://WWW.first.com/folder/page.html
first.com

http://mail.google.com/folder/page.html
mail.google.com

https://mail.google.com/folder/page.html
mail.google.com

http://www2.somewhere.com/folder/page.html?q=1
somewhere.com

https://www.another.eu/folder/page.html?q=1
another.eu

Original credit goes to: http://www.primaryobjects.com/CMS/Article145

Questions:
Answers:

Okay, I know this is an old question, but I made a super-efficient url parser so I thought I’d share it.

As you can see, the structure of the function is very odd, but it’s for efficiency. No prototype functions are used, the string doesn’t get iterated more than once, and no character is processed more than necessary.

function getDomain(url) {
    var dom = "", v, step = 0;
    for(var i=0,l=url.length; i<l; i++) {
        v = url[i]; if(step == 0) {
            //First, skip 0 to 5 characters ending in ':' (ex: 'https://')
            if(i > 5) { i=-1; step=1; } else if(v == ':') { i+=2; step=1; }
        } else if(step == 1) {
            //Skip 0 or 4 characters 'www.'
            //(Note: Doesn't work with www.com, but that domain isn't claimed anyway.)
            if(v == 'w' && url[i+1] == 'w' && url[i+2] == 'w' && url[i+3] == '.') i+=4;
            dom+=url[i]; step=2;
        } else if(step == 2) {
            //Stop at subpages, queries, and hashes.
            if(v == '/' || v == '?' || v == '#') break; dom += v;
        }
    }
    return dom;
}

Questions:
Answers:

Was looking for a solution to this problem today. None of the above answers seemed to satisfy. I wanted a solution that could be a one liner, no conditional logic and nothing that had to be wrapped in a function.

Here’s what I came up with, seems to work really well:

hostname="http://www.example.com:1234"
hostname.split("//").slice(-1)[0].split(":")[0].split('.').slice(-2).join('.')   // gives "example.com"

May look complicated at first glance, but it works pretty simply; the key is using ‘slice(-n)’ in a couple of places where the good part has to be pulled from the end of the split array (and [0] to get from the front of the split array).

Each of these tests return “example.com”:

"http://example.com".split("//").slice(-1)[0].split(":")[0].split('.').slice(-2).join('.')
"http://example.com:1234".split("//").slice(-1)[0].split(":")[0].split('.').slice(-2).join('.')
"http://www.example.com:1234".split("//").slice(-1)[0].split(":")[0].split('.').slice(-2).join('.')
"http://foo.www.example.com:1234".split("//").slice(-1)[0].split(":")[0].split('.').slice(-2).join('.')

Questions:
Answers:

Here’s the jQuery one-liner:

$('<a>').attr('href', url).prop('hostname');

Questions:
Answers:

in short way you can do like this

var url = "http://www.someurl.com/support/feature"

function getDomain(url){
  domain=url.split("//")[1];
  return domain.split("/")[0];
}
eg:
  getDomain("http://www.example.com/page/1")

  output:
   "www.example.com"

Use above function to get domain name

Questions:
Answers:

This is not a full answer, but the below code should help you:

function myFunction() {
    var str = "https://www.123rf.com/photo_10965738_lots-oop.html";
    matches = str.split('/');
    return matches[2];
}

I would like some one to create code faster than mine. It help to improve my-self also.

Questions:
Answers:

All url properties, no dependencies, no JQuery, easy to understand

This solution gives your answer plus additional properties. No JQuery or other dependencies required, paste and go.

Usage

getUrlParts("https://news.google.com/news/headlines/technology.html?ned=us&hl=en")

Output

{
  "origin": "https://news.google.com",
  "domain": "news.google.com",
  "subdomain": "news",
  "domainroot": "google.com",
  "domainpath": "news.google.com/news/headlines",
  "tld": ".com",
  "path": "news/headlines/technology.html",
  "query": "ned=us&hl=en",
  "protocol": "https",
  "port": 443,
  "parts": [
    "news",
    "google",
    "com"
  ],
  "segments": [
    "news",
    "headlines",
    "technology.html"
  ],
  "params": [
    {
      "key": "ned",
      "val": "us"
    },
    {
      "key": "hl",
      "val": "en"
    }
  ]
}

Code
The code is designed to be easy to understand rather than super fast. It can be called easily 100 times per second, so it’s great for front end or a few server usages, but not for high volume throughput.

function getUrlParts(fullyQualifiedUrl) {
    var url = {},
        tempProtocol
    var a = document.createElement('a')
    // if doesn't start with something like https:// it's not a url, but try to work around that
    if (fullyQualifiedUrl.indexOf('://') == -1) {
        tempProtocol = 'https://'
        a.href = tempProtocol + fullyQualifiedUrl
    } else
        a.href = fullyQualifiedUrl
    var parts = a.hostname.split('.')
    url.origin = tempProtocol ? "" : a.origin
    url.domain = a.hostname
    url.subdomain = parts[0]
    url.domainroot = ''
    url.domainpath = ''
    url.tld = '.' + parts[parts.length - 1]
    url.path = a.pathname.substring(1)
    url.query = a.search.substr(1)
    url.protocol = tempProtocol ? "" : a.protocol.substr(0, a.protocol.length - 1)
    url.port = tempProtocol ? "" : a.port ? a.port : a.protocol === 'http:' ? 80 : a.protocol === 'https:' ? 443 : a.port
    url.parts = parts
    url.segments = a.pathname === '/' ? [] : a.pathname.split('/').slice(1)
    url.params = url.query === '' ? [] : url.query.split('&')
    for (var j = 0; j < url.params.length; j++) {
        var param = url.params[j];
        var keyval = param.split('=')
        url.params[j] = {
            'key': keyval[0],
            'val': keyval[1]
        }
    }
    // domainroot
    if (parts.length > 2) {
        url.domainroot = parts[parts.length - 2] + '.' + parts[parts.length - 1];
        // check for country code top level domain
        if (parts[parts.length - 1].length == 2 && parts[parts.length - 1].length == 2)
            url.domainroot = parts[parts.length - 3] + '.' + url.domainroot;
    }
    // domainpath (domain+path without filenames) 
    if (url.segments.length > 0) {
        var lastSegment = url.segments[url.segments.length - 1]
        var endsWithFile = lastSegment.indexOf('.') != -1
        if (endsWithFile) {
            var fileSegment = url.path.indexOf(lastSegment)
            var pathNoFile = url.path.substr(0, fileSegment - 1)
            url.domainpath = url.domain
            if (pathNoFile)
                url.domainpath = url.domainpath + '/' + pathNoFile
        } else
            url.domainpath = url.domain + '/' + url.path
    } else
        url.domainpath = url.domain
    return url
}

Questions:
Answers:

Code:

var regex = /\w+.(com|co\.kr|be)/ig;
var urls = ['http://www.youtube.com/watch?v=ClkQA2Lb_iE',
            'http://youtu.be/ClkQA2Lb_iE',
            'http://www.example.com/12xy45',
            'http://example.com/random'];


$.each(urls, function(index, url) {
    var convertedUrl = url.match(regex);
    console.log(convertedUrl);
});

Result:

youtube.com
youtu.be
example.com
example.com

Questions:
Answers:

Try below code for exact domain name using regex,

String line = “http://www.youtube.com/watch?v=ClkQA2Lb_iE“;

  String pattern3="([\\w\\W]\\.)+(.*)?(\\.[\\w]+)";

  Pattern r = Pattern.compile(pattern3);


  Matcher m = r.matcher(line);
  if (m.find( )) {

    System.out.println("Found value: " + m.group(2) );
  } else {
     System.out.println("NO MATCH");
  }

Leave a Reply

Your email address will not be published. Required fields are marked *