What would be the best way in Python to parse out chunks of text contained in matching brackets?
\"{ { a } { b } { { { c } } } }\"
should i
Here is a solution I came up with for a similar use case. This was loosely based on the accepted psuedo code answer. I didn't want to add any dependencies for external libraries:
def parse_segments(source, recurse=False):
"""
extract any substring enclosed in parenthesis
source should be a string
"""
unmatched_count = 0
start_pos = 0
opened = False
open_pos = 0
cur_pos = 0
finished = []
segments = []
for character in source:
#scan for mismatched parenthesis:
if character == '(':
unmatched_count += 1
if not opened:
open_pos = cur_pos
opened = True
if character == ')':
unmatched_count -= 1
if opened and unmatched_count == 0:
segment = source[open_pos:cur_pos+1]
segments.append(segment)
clean = source[start_pos:open_pos]
if clean:
finished.append(clean)
opened = False
start_pos = cur_pos+1
cur_pos += 1
assert unmatched_count == 0
if start_pos != cur_pos:
#get anything that was left over here
finished.append(source[start_pos:cur_pos])
#now check on recursion:
for item in segments:
#get rid of bounding parentheses:
pruned = item[1:-1]
if recurse:
results = parse_tags(pruned, recurse)
finished.expand(results)
else:
finished.append(pruned)
return finished