For instance, say I wanted a function to escape a string for use in HTML (as in Django\'s escape filter):
def escape(string):
\"\"\"
Retu
How about we just test various ways of doing this and see which comes out faster (assuming we are only caring about the fastest way to do it).
def escape1(input):
return input.replace('&', '&').replace('<', '<').replace('>', '>').replace("'", ''').replace('"', '"')
translation_table = {
'&': '&',
'<': '<',
'>': '>',
"'": ''',
'"': '"',
}
def escape2(input):
return ''.join(translation_table.get(char, char) for char in input)
import re
_escape3_re = re.compile(r'[&<>\'"]')
def _escape3_repl(x):
s = x.group(0)
return translation_table.get(s, s)
def escape3(x):
return _escape3_re.sub(_escape3_repl, x)
def escape4(x):
return unicode(x).translate(translation_table)
test_strings = (
'Nothing in there.',
' ',
'Something & Something else',
'This one is pretty long. ' * 50
)
import time
for test_i, test_string in enumerate(test_strings):
print repr(test_string)
for func in escape1, escape2, escape3, escape4:
start_time = time.time()
for i in xrange(1000):
x = func(test_string)
print '\t%s done in %.3fms' % (func.__name__, (time.time() - start_time))
print
Running this gives you:
'Nothing in there.'
escape1 done in 0.002ms
escape2 done in 0.009ms
escape3 done in 0.001ms
escape4 done in 0.005ms
' '
escape1 done in 0.002ms
escape2 done in 0.012ms
escape3 done in 0.009ms
escape4 done in 0.007ms
'Something & Something else'
escape1 done in 0.002ms
escape2 done in 0.012ms
escape3 done in 0.003ms
escape4 done in 0.007ms
'This one is pretty long. '
escape1 done in 0.008ms
escape2 done in 0.386ms
escape3 done in 0.011ms
escape4 done in 0.310ms
Looks like just replacing them one after another goes the fastest.
Edit: Running the tests again with 1000000 iterations gives the following for the first three strings (the fourth would take too long on my machine for me to wait =P):
'Nothing in there.'
escape1 done in 0.001ms
escape2 done in 0.008ms
escape3 done in 0.002ms
escape4 done in 0.005ms
' '
escape1 done in 0.002ms
escape2 done in 0.011ms
escape3 done in 0.009ms
escape4 done in 0.007ms
'Something & Something else'
escape1 done in 0.002ms
escape2 done in 0.011ms
escape3 done in 0.003ms
escape4 done in 0.007ms
The numbers are pretty much the same. In the first case they are actually even more consistent as the direct string replacement is fastest now.