Python: split a list based on a condition?

前端 未结 30 2312
误落风尘
误落风尘 2020-11-22 06:56

What\'s the best way, both aesthetically and from a performance perspective, to split a list of items into multiple lists based on a conditional? The equivalent of:

30条回答
  •  独厮守ぢ
    2020-11-22 07:06

    This is the fastest way.

    It uses if else, (like dbr's answer) but creates a set first. A set reduces the number of operations from O(m * n) to O(log m) + O(n), resulting in a 45%+ boost in speed.

    good_list_set = set(good_list)  # 45% faster than a tuple.
    
    good, bad = [], []
    for item in my_origin_list:
        if item in good_list_set:
            good.append(item)
        else:
            bad.append(item)
    

    A little shorter:

    good_list_set = set(good_list)  # 45% faster than a tuple.
    
    good, bad = [], []
    for item in my_origin_list:
        out = good if item in good_list_set else bad
        out.append(item)
    

    Benchmark results:

    filter_BJHomer                  80/s       --   -3265%   -5312%   -5900%   -6262%   -7273%   -7363%   -8051%   -8162%   -8244%
    zip_Funky                       118/s    4848%       --   -3040%   -3913%   -4450%   -5951%   -6085%   -7106%   -7271%   -7393%
    two_lst_tuple_JohnLaRoy         170/s   11332%    4367%       --   -1254%   -2026%   -4182%   -4375%   -5842%   -6079%   -6254%
    if_else_DBR                     195/s   14392%    6428%    1434%       --    -882%   -3348%   -3568%   -5246%   -5516%   -5717%
    two_lst_compr_Parand            213/s   16750%    8016%    2540%     967%       --   -2705%   -2946%   -4786%   -5083%   -5303%
    if_else_1_line_DanSalmo         292/s   26668%   14696%    7189%    5033%    3707%       --    -331%   -2853%   -3260%   -3562%
    tuple_if_else                   302/s   27923%   15542%    7778%    5548%    4177%     343%       --   -2609%   -3029%   -3341%
    set_1_line                      409/s   41308%   24556%   14053%   11035%    9181%    3993%    3529%       --    -569%    -991%
    set_shorter                     434/s   44401%   26640%   15503%   12303%   10337%    4836%    4345%     603%       --    -448%
    set_if_else                     454/s   46952%   28358%   16699%   13349%   11290%    5532%    5018%    1100%     469%       --
    

    The full benchmark code for Python 3.7 (modified from FunkySayu):

    good_list = ['.jpg','.jpeg','.gif','.bmp','.png']
    
    import random
    import string
    my_origin_list = []
    for i in range(10000):
        fname = ''.join(random.choice(string.ascii_lowercase) for i in range(random.randrange(10)))
        if random.getrandbits(1):
            fext = random.choice(list(good_list))
        else:
            fext = "." + ''.join(random.choice(string.ascii_lowercase) for i in range(3))
    
        my_origin_list.append((fname + fext, random.randrange(1000), fext))
    
    # Parand
    def two_lst_compr_Parand(*_):
        return [e for e in my_origin_list if e[2] in good_list], [e for e in my_origin_list if not e[2] in good_list]
    
    # dbr
    def if_else_DBR(*_):
        a, b = list(), list()
        for e in my_origin_list:
            if e[2] in good_list:
                a.append(e)
            else:
                b.append(e)
        return a, b
    
    # John La Rooy
    def two_lst_tuple_JohnLaRoy(*_):
        a, b = list(), list()
        for e in my_origin_list:
            (b, a)[e[2] in good_list].append(e)
        return a, b
    
    # # Ants Aasma
    # def f4():
    #     l1, l2 = tee((e[2] in good_list, e) for e in my_origin_list)
    #     return [i for p, i in l1 if p], [i for p, i in l2 if not p]
    
    # My personal way to do
    def zip_Funky(*_):
        a, b = zip(*[(e, None) if e[2] in good_list else (None, e) for e in my_origin_list])
        return list(filter(None, a)), list(filter(None, b))
    
    # BJ Homer
    def filter_BJHomer(*_):
        return list(filter(lambda e: e[2] in good_list, my_origin_list)), list(filter(lambda e: not e[2] in good_list,                                                                             my_origin_list))
    
    # ChaimG's answer; as a list.
    def if_else_1_line_DanSalmo(*_):
        good, bad = [], []
        for e in my_origin_list:
            _ = good.append(e) if e[2] in good_list else bad.append(e)
        return good, bad
    
    # ChaimG's answer; as a set.
    def set_1_line(*_):
        good_list_set = set(good_list)
        good, bad = [], []
        for e in my_origin_list:
            _ = good.append(e) if e[2] in good_list_set else bad.append(e)
        return good, bad
    
    # ChaimG set and if else list.
    def set_shorter(*_):
        good_list_set = set(good_list)
        good, bad = [], []
        for e in my_origin_list:
            out = good if e[2] in good_list_set else bad
            out.append(e)
        return good, bad
    
    # ChaimG's best answer; if else as a set.
    def set_if_else(*_):
        good_list_set = set(good_list)
        good, bad = [], []
        for e in my_origin_list:
            if e[2] in good_list_set:
                good.append(e)
            else:
                bad.append(e)
        return good, bad
    
    # ChaimG's best answer; if else as a set.
    def tuple_if_else(*_):
        good_list_tuple = tuple(good_list)
        good, bad = [], []
        for e in my_origin_list:
            if e[2] in good_list_tuple:
                good.append(e)
            else:
                bad.append(e)
        return good, bad
    
    def cmpthese(n=0, functions=None):
        results = {}
        for func_name in functions:
            args = ['%s(range(256))' % func_name, 'from __main__ import %s' % func_name]
            t = Timer(*args)
            results[func_name] = 1 / (t.timeit(number=n) / n) # passes/sec
    
        functions_sorted = sorted(functions, key=results.__getitem__)
        for f in functions_sorted:
            diff = []
            for func in functions_sorted:
                if func == f:
                    diff.append("--")
                else:
                    diff.append(f"{results[f]/results[func]*100 - 100:5.0%}")
            diffs = " ".join(f'{x:>8s}' for x in diff)
    
            print(f"{f:27s} \t{results[f]:,.0f}/s {diffs}")
    
    
    if __name__=='__main__':
        from timeit import Timer
    cmpthese(1000, 'two_lst_compr_Parand if_else_DBR two_lst_tuple_JohnLaRoy zip_Funky filter_BJHomer if_else_1_line_DanSalmo set_1_line set_if_else tuple_if_else set_shorter'.split(" "))
    

提交回复
热议问题