Is there a way to split a string without splitting escaped character? For example, I have a string and want to split by \':\' and not by \'\\:\'
http\\://ww
I really know this is an old question, but i needed recently an function like this and not found any that was compliant with my requirements.
Rules:
/ and escape are \ then (\a\b\c/abc bacame ['\a\b\c', 'abc']\\ became \)So, for the record and if someone look anything like, here my function proposal:
def str_escape_split(str_to_escape, delimiter=',', escape='\\'):
"""Splits an string using delimiter and escape chars
Args:
str_to_escape ([type]): The text to be splitted
delimiter (str, optional): Delimiter used. Defaults to ','.
escape (str, optional): The escape char. Defaults to '\'.
Yields:
[type]: a list of string to be escaped
"""
if len(delimiter) > 1 or len(escape) > 1:
raise ValueError("Either delimiter or escape must be an one char value")
token = ''
escaped = False
for c in str_to_escape:
if c == escape:
if escaped:
token += escape
escaped = False
else:
escaped = True
continue
if c == delimiter:
if not escaped:
yield token
token = ''
else:
token += c
escaped = False
else:
if escaped:
token += escape
escaped = False
token += c
yield token
For the sake of sanity, i'm make some tests:
# The structure is:
# 'string_be_split_escaped', [list_with_result_expected]
tests_slash_escape = [
('r/casa\\/teste/g', ['r', 'casa/teste', 'g']),
('r/\\/teste/g', ['r', '/teste', 'g']),
('r/(([0-9])\\s+-\\s+([0-9]))/\\g<2>\\g<3>/g',
['r', '(([0-9])\\s+-\\s+([0-9]))', '\\g<2>\\g<3>', 'g']),
('r/\\s+/ /g', ['r', '\\s+', ' ', 'g']),
('r/\\.$//g', ['r', '\\.$', '', 'g']),
('u///g', ['u', '', '', 'g']),
('s/(/[/g', ['s', '(', '[', 'g']),
('s/)/]/g', ['s', ')', ']', 'g']),
('r/(\\.)\\1+/\\1/g', ['r', '(\\.)\\1+', '\\1', 'g']),
('r/(?<=\\d) +(?=\\d)/./', ['r', '(?<=\\d) +(?=\\d)', '.', '']),
('r/\\\\/\\\\\\/teste/g', ['r', '\\', '\\/teste', 'g'])
]
tests_bar_escape = [
('r/||/|||/teste/g', ['r', '|', '|/teste', 'g'])
]
def test(test_array, escape):
"""From input data, test escape functions
Args:
test_array ([type]): [description]
escape ([type]): [description]
"""
for t in test_array:
resg = str_escape_split(t[0], '/', escape)
res = list(resg)
if res == t[1]:
print(f"Test {t[0]}: {res} - Pass!")
else:
print(f"Test {t[0]}: {t[1]} != {res} - Failed! ")
def test_all():
test(tests_slash_escape, '\\')
test(tests_bar_escape, '|')
if __name__ == "__main__":
test_all()