Format number number with specific mask regex python

耗尽温柔 提交于 2019-12-23 09:32:46

问题


I need to format a number with a specifc mask: 9.9.9.9.99.999, depending on the length of number string.
For example:

- 123456789 => 1.2.3.4.56.789
- 123456    => 1.2.3.4.56
- 1234      => 1.2.3.4
- 123       => 1.2.3
- 12        => 1.2

It will not occur a number string with 7 or 8 digits in the input.

How could that be implemented with regex, preferably in python?

Thanks in advance.


回答1:


You can use this pattern:

(?:(?<=^\d)|(?<=^\d{2})|(?<=^\d{3})|(?<=^\d{4})|(?<=^\d{6}))(?=\d)

with . as replacement.

example:

re.sub(r'(?:(?<=^\d)|(?<=^\d{2})|(?<=^\d{3})|(?<=^\d{4})|(?<=^\d{6}))(?=\d)', '.', yourstr)



回答2:


Does it have to be a regular expression?

num = "123456789"

def fmt(num):
    block1 = list(num[:4])
    block2 = len(num) > 4 and [num[4:6]] or []
    block3 = len(num) > 6 and [num[6:]] or []
    return ".".join(block1 + block2 + block3)

print fmt(num)

I'm confident I still understand that in two years. Not so sure about the regex.




回答3:


Something more general without help of regexp:

from itertools import islice
def formatn(n, pat='9.9.9.9.99.999', sep='.'):
    pos = map(len, pat.split(sep))
    it = iter(str(n))
    spl = [''.join(islice(it, p)) for p in pos]
    return sep.join(x for x in spl if x)

Demo:

>>> formatn(1234)
'1.2.3.4'
>>> formatn(123456789)
'1.2.3.4.56.789'



回答4:


A non-regexp way: (this reminds me that I should learn regexp asap)

def get_formated_number(num, split_at):
    nums = list(str(num))
    for i in sorted(split_at[len(nums)], reverse=True):
        nums.insert(i, '.')
    return ''.join(nums)

nums = [12, 123, 1234, 123456, 123456789]
split_at = {2: [1], 
            3: [1, 2],
            4: [1, 2, 3],
            6: [1, 2, 3, 4],
            9: [1, 2, 3, 4, 6]}

for num in nums:
    print get_formated_number(num, split_at)

Output

1.2
1.2.3
1.2.3.4
1.2.3.4.56
1.2.3.4.56.789



回答5:


EDIT 2

I found a solution 2 times faster than my regex solution that was the fastest one.
And it doesn't need a regex:

def fmt3(num):
   return '.'.join((num[0:1],num[1:2],num[2:3],num[3:4],
                     num[4:6],num[6:])).rstrip('.')

I think it's because access to elements of a string is extremely fast.

.

It can be generalized, as did alko, but it keeps an acceptable execution's time, similar to other solutions, while alko's solution is 10 times slower than all the other solutions.

def fmt4(num,mask = '9.9.9.9.99.999'):
    def gen(mask,a = 0,b = 0,li = []):
        for c in mask:
            if c=='.':
                yield num[a:b]
                a = b
            else:
                b += 1
        yield num[a:b]
    return '.'.join(gen(mask)).strip('.')

print fmt4('123456789')
print fmt4('123456')
print fmt4('1234')
print fmt4('123')
print fmt4('12')
print
print fmt4('123456789',mask = '.9.99.9.99.99.9')
print fmt4('123456789',mask = '9.99.9.99.99.9')
print fmt4('123456789',mask = '9...99.9.99.99.9')
print fmt4('123456789',mask = '9.99.9.99.99.9.')
print fmt4('123456789',mask = '9.99.99999.9')

result

1.2.3.4.56.789
1.2.3.4.56
1.2.3.4
1.2.3
1.2

1.23.4.56.78.9
1.23.4.56.78.9
1...23.4.56.78.9
1.23.4.56.78.9
1.23.45678.9

MY INITIAL ANSWER

My following solution ,
with pat1 = '(\d)(\d)?(\d)?(\d)?(\d\d)?(\d\d\d)?'
and '.'.join(filter(None,r1.match(thestring).groups('')))
seems to be the fastest;

import re
from time import clock

from itertools import islice
def formatn(n, pat='9.9.9.9.99.999', sep='.'):
    pos = map(len, pat.split(sep))
    it = iter(str(n))
    spl = [''.join(islice(it, p)) for p in pos]
    return sep.join(x for x in spl if x)

def fmt(num):
    block1 = list(num[:4])
    block2 = len(num) > 4 and [num[4:6]] or []
    block3 = len(num) > 6 and [num[6:]] or []
    return ".".join(block1 + block2 + block3)

pat1 = '(\d)(\d)?(\d)?(\d)?(\d\d)?(\d\d\d)?'
r1 = re.compile(pat1)

pat2 = '(?:(?<=^\d)|(?<=^\d{2})|(?<=^\d{3})|(?<=^\d{4})|(?<=^\d{6}))(?=\d)'
r2 = re.compile(pat2)

iterat = 20000

te = clock()
for i in xrange(iterat):
    '.'.join(filter(None,r1.match('123456789').groups('')))
print clock()-te
print '  ','.'.join(filter(None,r1.match('123456789').groups('')))

te = clock()
for i in xrange(iterat):
    r2.sub('.','123456789')
print clock()-te
print '  ',r2.sub('.','123456789')

te = clock()
for i in xrange(iterat):
    fmt('123456789')
print clock()-te
print '  ',fmt('123456789')

te = clock()
for i in xrange(iterat):
    formatn('123456789')
print clock()-te
print '  ',formatn('123456789')

print '-----------------------------'

te = clock()
for i in xrange(iterat):
    '.'.join(filter(None,r1.match('123456').groups()))
print clock()-te
print '  ','.'.join(filter(None,r1.match('123456').groups()))

te = clock()
for i in xrange(iterat):
    r2.sub('.','123456')
print clock()-te
print "  ",r2.sub('.','123456')

te = clock()
for i in xrange(iterat):
    fmt('123456')
print clock()-te
print '  ',fmt('123456')

te = clock()
for i in xrange(iterat):
    formatn('123456789')
print clock()-te
print '  ',formatn('123456789')

print '-----------------------------'

te = clock()
for i in xrange(iterat):
    '.'.join(filter(None,r1.match('1234').groups()))
print clock()-te
print '  ','.'.join(filter(None,r1.match('1234').groups()))

te = clock()
for i in xrange(iterat):
    r2.sub('.','1234')
print clock()-te
print '  ',r2.sub('.','1234')

te = clock()
for i in xrange(iterat):
    fmt('1234')
print clock()-te
print '  ',fmt('1234')

te = clock()
for i in xrange(iterat):
    formatn('1234')
print clock()-te
print '  ',formatn('1234')

result

0.186308036357
   1.2.3.4.56.789
0.397971250536
   1.2.3.4.56.789
0.258452959804
   1.2.3.4.56.789
1.9979410791
   1.2.3.4.56.789
-----------------------------
0.208518959812
   1.2.3.4.56
0.319339748488
   1.2.3.4.56
0.247042291688
   1.2.3.4.56
1.97725548918
   1.2.3.4.56.789
-----------------------------
0.179872581571
   1.2.3.4
0.273376644238
   1.2.3.4
0.207427200943
   1.2.3.4
1.9792909434
   1.2.3.4

EDIT

Inspired by Lukas Graf's answer:

def fmt2(num):
    a = '.'.join(num[:4])
    b = num[4:6]
    c = num[6:]
    return '%s.%s.%s' % (a,b,c) if c \
           else a + '.' + b if b else a


来源:https://stackoverflow.com/questions/21031668/format-number-number-with-specific-mask-regex-python

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!