Mass string replace in python?

前端 未结 13 2219
我寻月下人不归
我寻月下人不归 2020-11-28 20:14

Say I have a string that looks like this:

str = \"The &yquick &cbrown &bfox &Yjumps over the &ulazy dog\"

You\'ll notic

13条回答
  •  执笔经年
    2020-11-28 21:08

    Here is the C Extensions Approach for python

    const char *dvals[]={
        //"0-64
        "","","","","","","","","","",
        "","","","","","","","","","",
        "","","","","","","","","","",
        "","","","","","","","","","",
        "","","","","","","","","","",
        "","","","","","","","","","",
        "","","","","",
        //A-Z
        "","","","","",
        "","","","","",
        "","","","","",
        "","","","","",
        "","","","","33",
        "",
        //
        "","","","","","",
        //a-z
        "","32","31","","",
        "","","","","",
        "","","","","",
        "","","","","",
        "34","","","","30",
        ""
    };
    
    int dsub(char*d,char*s){
        char *ofs=d;
        do{
            if(*s=='&' && s[1]<='z' && *dvals[s[1]]){
    
                //\033[0;
                *d++='\\',*d++='0',*d++='3',*d++='3',*d++='[',*d++='0',*d++=';';
    
                //consider as fixed 2 digits
                *d++=dvals[s[1]][0];
                *d++=dvals[s[1]][1];
    
                *d++='m';
    
                s++; //skip
    
            //non &,invalid, unused (&) ampersand sequences will go here.
            }else *d++=*s;
    
        }while(*s++);
    
        return d-ofs-1;
    }
    

    Python codes I have tested

    from mylib import *
    import time
    
    start=time.time()
    
    instr="The &yquick &cbrown &bfox &Yjumps over the &ulazy dog, skip &Unknown.\n"*100000
    x=dsub(instr)
    
    end=time.time()
    
    print "time taken",end-start,",input str length",len(x)
    print "first few lines"
    print x[:1100]
    

    Results

    time taken 0.140000104904 ,input str length 11000000
    first few lines
    The \033[0;30mquick \033[0;31mbrown \033[0;32mfox \033[0;33mjumps over the \033[0;34mlazy dog, skip &Unknown.
    The \033[0;30mquick \033[0;31mbrown \033[0;32mfox \033[0;33mjumps over the \033[0;34mlazy dog, skip &Unknown.
    The \033[0;30mquick \033[0;31mbrown \033[0;32mfox \033[0;33mjumps over the \033[0;34mlazy dog, skip &Unknown.
    The \033[0;30mquick \033[0;31mbrown \033[0;32mfox \033[0;33mjumps over the \033[0;34mlazy dog, skip &Unknown.
    The \033[0;30mquick \033[0;31mbrown \033[0;32mfox \033[0;33mjumps over the \033[0;34mlazy dog, skip &Unknown.
    The \033[0;30mquick \033[0;31mbrown \033[0;32mfox \033[0;33mjumps over the \033[0;34mlazy dog, skip &Unknown.
    The \033[0;30mquick \033[0;31mbrown \033[0;32mfox \033[0;33mjumps over the \033[0;34mlazy dog, skip &Unknown.
    The \033[0;30mquick \033[0;31mbrown \033[0;32mfox \033[0;33mjumps over the \033[0;34mlazy dog, skip &Unknown.
    The \033[0;30mquick \033[0;31mbrown \033[0;32mfox \033[0;33mjumps over the \033[0;34mlazy dog, skip &Unknown.
    The \033[0;30mquick \033[0;31mbrown \033[0;32mfox \033[0;33mjumps over the \033[0;34mlazy dog, skip &Unknown.
    

    Its suppose to able to run at O(n), and Only took 160 ms (avg) for 11 MB string in My Mobile Celeron 1.6 GHz PC

    It will also skip unknown characters as is, for example &Unknown will return as is

    Let me know If you have any problem with compiling, bugs, etc...

提交回复
热议问题