Check if 2 URLs are equal

后端 未结 6 1528
囚心锁ツ
囚心锁ツ 2020-12-15 06:10

Is there a method around that tests if 2 URLs are equal, ie point to the same place. I am not talking about 2 URLs with different domain names pointing to the same IP addres

相关标签:
6条回答
  • 2020-12-15 06:52

    for the record, here is the translation of http://en.wikipedia.org/wiki/URL%5Fnormalization to C#:

    using System;
    using System.Web;
    
    namespace UrlNormalizationTest
    {
        public static class UrlNormalization
        {
            public static bool AreTheSameUrls(this string url1, string url2)
            {
                url1 = url1.NormalizeUrl();
                url2 = url2.NormalizeUrl();
                return url1.Equals(url2);
            }
    
            public static bool AreTheSameUrls(this Uri uri1, Uri uri2)
            {
                var url1 = uri1.NormalizeUrl();
                var url2 = uri2.NormalizeUrl();
                return url1.Equals(url2);
            }
    
            public static string[] DefaultDirectoryIndexes = new[]
                {
                    "default.asp",
                    "default.aspx",
                    "index.htm",
                    "index.html",
                    "index.php"
                };
    
            public static string NormalizeUrl(this Uri uri)
            {
                var url = urlToLower(uri);
                url = limitProtocols(url);
                url = removeDefaultDirectoryIndexes(url);
                url = removeTheFragment(url);
                url = removeDuplicateSlashes(url);
                url = addWww(url);
                url = removeFeedburnerPart(url);
                return removeTrailingSlashAndEmptyQuery(url);
            }
    
            public static string NormalizeUrl(this string url)
            {
                return NormalizeUrl(new Uri(url));
            }
    
            private static string removeFeedburnerPart(string url)
            {
                var idx = url.IndexOf("utm_source=", StringComparison.Ordinal);
                return idx == -1 ? url : url.Substring(0, idx - 1);
            }
    
            private static string addWww(string url)
            {
                if (new Uri(url).Host.Split('.').Length == 2 && !url.Contains("://www."))
                {
                   return url.Replace("://", "://www.");
                }
                return url;
            }
    
            private static string removeDuplicateSlashes(string url)
            {
                var path = new Uri(url).AbsolutePath;
                return path.Contains("//") ? url.Replace(path, path.Replace("//", "/")) : url;
            }
    
            private static string limitProtocols(string url)
            {
                return new Uri(url).Scheme == "https" ? url.Replace("https://", "http://") : url;
            }
    
            private static string removeTheFragment(string url)
            {
                var fragment = new Uri(url).Fragment;
                return string.IsNullOrWhiteSpace(fragment) ? url : url.Replace(fragment, string.Empty);
            }
    
            private static string urlToLower(Uri uri)
            {
                return HttpUtility.UrlDecode(uri.AbsoluteUri.ToLowerInvariant());
            }
    
            private static string removeTrailingSlashAndEmptyQuery(string url)
            {
                return url
                        .TrimEnd(new[] { '?' })
                        .TrimEnd(new[] { '/' });
            }
    
            private static string removeDefaultDirectoryIndexes(string url)
            {
                foreach (var index in DefaultDirectoryIndexes)
                {
                    if (url.EndsWith(index))
                    {
                        url = url.TrimEnd(index.ToCharArray());
                        break;
                    }
                }
                return url;
            }
        }
    }
    

    With the following tests:

    using NUnit.Framework;
    using UrlNormalizationTest;
    
    namespace UrlNormalization.Tests
    {
        [TestFixture]
        public class UnitTests
        {
            [Test]
            public void Test1ConvertingTheSchemeAndHostToLowercase()
            {
                var url1 = "HTTP://www.Example.com/".NormalizeUrl();
                var url2 = "http://www.example.com/".NormalizeUrl();
    
                Assert.AreEqual(url1, url2);
            }
    
            [Test]
            public void Test2CapitalizingLettersInEscapeSequences()
            {
                var url1 = "http://www.example.com/a%c2%b1b".NormalizeUrl();
                var url2 = "http://www.example.com/a%C2%B1b".NormalizeUrl();
    
                Assert.AreEqual(url1, url2);
            }
    
            [Test]
            public void Test3DecodingPercentEncodedOctetsOfUnreservedCharacters()
            {
                var url1 = "http://www.example.com/%7Eusername/".NormalizeUrl();
                var url2 = "http://www.example.com/~username/".NormalizeUrl();
    
                Assert.AreEqual(url1, url2);
            }
    
            [Test]
            public void Test4RemovingTheDefaultPort()
            {
                var url1 = "http://www.example.com:80/bar.html".NormalizeUrl();
                var url2 = "http://www.example.com/bar.html".NormalizeUrl();
    
                Assert.AreEqual(url1, url2);
            }
    
            [Test]
            public void Test5AddingTrailing()
            {
                var url1 = "http://www.example.com/alice".NormalizeUrl();
                var url2 = "http://www.example.com/alice/?".NormalizeUrl();
    
                Assert.AreEqual(url1, url2);
            }
    
            [Test]
            public void Test6RemovingDotSegments()
            {
                var url1 = "http://www.example.com/../a/b/../c/./d.html".NormalizeUrl();
                var url2 = "http://www.example.com/a/c/d.html".NormalizeUrl();
    
                Assert.AreEqual(url1, url2);
            }
    
            [Test]
            public void Test7RemovingDirectoryIndex1()
            {
                var url1 = "http://www.example.com/default.asp".NormalizeUrl();
                var url2 = "http://www.example.com/".NormalizeUrl();
    
                Assert.AreEqual(url1, url2);
            }
    
            [Test]
            public void Test7RemovingDirectoryIndex2()
            {
                var url1 = "http://www.example.com/default.asp?id=1".NormalizeUrl();
                var url2 = "http://www.example.com/default.asp?id=1".NormalizeUrl();
    
                Assert.AreEqual(url1, url2);
            }
    
            [Test]
            public void Test7RemovingDirectoryIndex3()
            {
                var url1 = "http://www.example.com/a/index.html".NormalizeUrl();
                var url2 = "http://www.example.com/a/".NormalizeUrl();
    
                Assert.AreEqual(url1, url2);
            }
    
            [Test]
            public void Test8RemovingTheFragment()
            {
                var url1 = "http://www.example.com/bar.html#section1".NormalizeUrl();
                var url2 = "http://www.example.com/bar.html".NormalizeUrl();
    
                Assert.AreEqual(url1, url2);
            }
    
            [Test]
            public void Test9LimitingProtocols()
            {
                var url1 = "https://www.example.com/".NormalizeUrl();
                var url2 = "http://www.example.com/".NormalizeUrl();
    
                Assert.AreEqual(url1, url2);
            }
    
            [Test]
            public void Test10RemovingDuplicateSlashes()
            {
                var url1 = "http://www.example.com/foo//bar.html".NormalizeUrl();
                var url2 = "http://www.example.com/foo/bar.html".NormalizeUrl();
    
                Assert.AreEqual(url1, url2);
            }
    
            [Test]
            public void Test11AddWww()
            {
                var url1 = "http://example.com/".NormalizeUrl();
                var url2 = "http://www.example.com".NormalizeUrl();
    
                Assert.AreEqual(url1, url2);
            }
    
            [Test]
            public void Test12RemoveFeedburnerPart()
            {
                var url1 = "http://site.net/2013/02/firefox-19-released/?utm_source=rss&utm_medium=rss&utm_campaign=firefox-19-released".NormalizeUrl();
                var url2 = "http://site.net/2013/02/firefox-19-released".NormalizeUrl();
    
                Assert.AreEqual(url1, url2);
            }
        }
    }
    
    0 讨论(0)
  • 2020-12-15 06:57

    Maybe this tutorial can be of help to you?

    "...You want to see how to handle identical Urls in the sitemap (which is forbidden by the out-of-the-box SiteMapProvider)..."

    /// <summary>
    /// SiteMap datasources cannot have duplicate Urls with the default provider.
    /// This finds duplicate urls in your heirarchy and tricks the provider into treating
    /// them correctly
    /// </summary>
    private void modifyDuplicateUrls()
    {
    StringCollection urls = new StringCollection();
    string rowUrl = String.Empty;
    uint duplicateCounter = 0;
    string urlModifier = String.Empty;
    foreach (DataTable dt in this.DataSource.Tables)
    {
    foreach (DataRow dr in dt.Rows)
    {
    rowUrl = (string)dr["Url"];
    if (urls.Contains(rowUrl))
    {
    duplicateCounter++;
    if (rowUrl.Contains("?"))
    {
    urlModifier = "&instance=" + duplicateCounter.ToString();
    }
    else
    {
    urlModifier = "?instance=" + duplicateCounter.ToString();
    }
    dr["Url"] = rowUrl + urlModifier;
    }
    else
    {
    urls.Add(rowUrl);
    }
    }
    }
    }
    }
    
    0 讨论(0)
  • 2020-12-15 07:01

    You might be looking for URL normalization techniques. They might be a good starting point :)

    Once you have normalized the URLs, you simply need to check if they are equal (keep in mind your assumptions, for instance, you discard the querystring).

    0 讨论(0)
  • 2020-12-15 07:09

    frankly, just load the URLs and compare their html contents?

    0 讨论(0)
  • 2020-12-15 07:12

    You could probably use the Uri class to check individual parts of the urls, after converting each to the right format.

    // Create the URI objects
    // TODO: Use the right constructor overloads, 
    // or do some processing beforehand to accomodate for the different scenarios
    Uri uri1 = new Uri(url1);
    Uri uri2 = new Uri(url2);
    
    // There are overlaods for the constructor too
    Uri uri3 = new Uri(url3, UriKind.Absolute);
    
    // Check the correct properties
    // TODO: Use the right properties...
    if (uri1.AbsolutePath == uri2.AbsolutePath)
    {
        // Urls match
    }
    
    0 讨论(0)
  • 2020-12-15 07:14

    What about seeing if Server.MapPath is equal for both urls? (assuming this is an ASP.NET application, not ASP.NET MVC)

    if (Server.MapPath(url1).ToLower() == Server.MapPath(url2).ToLower())
    {
      return true;
    }
    else
    {
      return false;
    }
    
    0 讨论(0)
提交回复
热议问题