How do I get specific path sections from a url? For example, I want a function which operates on this:
http://www.mydomain.com/hithere?image=2934
Here is an example using urlparse and rpartition.
# Python 2x:
from urlparse import urlparse
# Python 3x:
from urllib.parse import urlparse
def printPathTokens(full_url):
print('printPathTokens() called: %s' % full_url)
p_full = urlparse(full_url).path
print(' . p_full url: %s' % p_full)
# Split the path using rpartition method of string
# rpartition "returns a tuple containing the part the before separator,
# argument string and the part after the separator"
(rp_left, rp_match, rp_right) = p_full.rpartition('/')
if rp_match == '': # returns the rpartition separator if found
print(' . No slashes found in path')
else:
print(' . path to last resource: %s' % rp_left)
if rp_right == '': # Ended with a slash
print(' . last resource: (none)')
else:
print(' . last resource: %s' % (rp_right))
printPathTokens('http://www.example.com/temp/something/happen/index.html')
# Output:
# printPathTokens() called: http://www.example.com/temp/something/happen/index.html
# . p_full url: /temp/something/happen/index.html
# . path to last resource: /temp/something/happen
# . last resource: index.html
printPathTokens('http://www.example.com/temp/something/happen/')
# Output:
# printPathTokens() called: http://www.example.com/temp/something/happen/
# . p_full url: /temp/something/happen/
# . path to last resource: /temp/something/happen
# . last resource: (none)
printPathTokens('http://www.example.com/temp/something/happen')
# Output:
# printPathTokens() called: http://www.example.com/temp/something/happen
# . p_full url: /temp/something/happen
# . path to last resource: /temp/something
# . last resource: happen