I got a little confused about Python raw string. I know that if we use raw string, then it will treat \'\\\' as a normal backslash (ex. r\'\\n\' wo
def clean_with_puncutation(text):
from string import punctuation
import re
punctuation_token={p:'' for p in punctuation}
punctuation_token['
']=""
punctuation_token['\n']=""
punctuation_token['']=''
punctuation_token['']=''
#punctuation_token
regex = r"(
)|()|()|[\n\!\@\#\$\%\^\&\*\(\)\[\]\
{\}\;\:\,\.\/\?\|\`\_\\+\\\=\~\-\<\>]"
###Always put new sequence token at front to avoid overlapping results
#text = '!@#$%^&*()[]{};:,./<>?\|`~-= _+\
\n \ '
text_=""
matches = re.finditer(regex, text)
index=0
for match in matches:
#print(match.group())
#print(punctuation_token[match.group()])
#print ("Match at index: %s, %s" % (match.start(), match.end()))
text_=text_+ text[index:match.start()] +" "
+punctuation_token[match.group()]+ " "
index=match.end()
return text_