import urllib2
website = \"WEBSITE\"
openwebsite = urllib2.urlopen(website)
html = getwebsite.read()
print html
So far so good.
But I wa
Simplest way for me:
from urlextract import URLExtract
from requests import get
url = "sample.com/samplepage/"
req = requests.get(url)
text = req.text
# or if you already have the html source:
# text = "This is html for ex Google Yahoo"
text = text.replace(' ', '').replace('=','')
extractor = URLExtract()
print(extractor.find_urls(text))
output:
['http://google.com/', 'http://yahoo.com/']