Given the URL (single line):
http://test.example.com/dir/subdir/file.html
How can I extract the following parts using regular expressions:
Propose a much more readable solution (in Python, but applies to any regex):
def url_path_to_dict(path):
pattern = (r'^'
r'((?P.+?)://)?'
r'((?P.+?)(:(?P.*?))?@)?'
r'(?P.*?)'
r'(:(?P\d+?))?'
r'(?P/.*?)?'
r'(?P[?].*?)?'
r'$'
)
regex = re.compile(pattern)
m = regex.match(path)
d = m.groupdict() if m is not None else None
return d
def main():
print url_path_to_dict('http://example.example.com/example/example/example.html')
Prints:
{
'host': 'example.example.com',
'user': None,
'path': '/example/example/example.html',
'query': None,
'password': None,
'port': None,
'schema': 'http'
}