Combine two columns of text in pandas dataframe

后端 未结 18 1325
-上瘾入骨i
-上瘾入骨i 2020-11-22 01:32

I have a 20 x 4000 dataframe in Python using pandas. Two of these columns are named Year and quarter. I\'d like to create a variable called p

18条回答
  •  没有蜡笔的小新
    2020-11-22 02:01

    Using zip could be even quicker:

    df["period"] = [''.join(i) for i in zip(df["Year"].map(str),df["quarter"])]
    

    Graph:

    import pandas as pd
    import numpy as np
    import timeit
    import matplotlib.pyplot as plt
    from collections import defaultdict
    
    df = pd.DataFrame({'Year': ['2014', '2015'], 'quarter': ['q1', 'q2']})
    
    myfuncs = {
    "df['Year'].astype(str) + df['quarter']":
        lambda: df['Year'].astype(str) + df['quarter'],
    "df['Year'].map(str) + df['quarter']":
        lambda: df['Year'].map(str) + df['quarter'],
    "df.Year.str.cat(df.quarter)":
        lambda: df.Year.str.cat(df.quarter),
    "df.loc[:, ['Year','quarter']].astype(str).sum(axis=1)":
        lambda: df.loc[:, ['Year','quarter']].astype(str).sum(axis=1),
    "df[['Year','quarter']].astype(str).sum(axis=1)":
        lambda: df[['Year','quarter']].astype(str).sum(axis=1),
        "df[['Year','quarter']].apply(lambda x : '{}{}'.format(x[0],x[1]), axis=1)":
        lambda: df[['Year','quarter']].apply(lambda x : '{}{}'.format(x[0],x[1]), axis=1),
        "[''.join(i) for i in zip(dataframe['Year'].map(str),dataframe['quarter'])]":
        lambda: [''.join(i) for i in zip(df["Year"].map(str),df["quarter"])]
    }
    
    d = defaultdict(dict)
    step = 10
    cont = True
    while cont:
        lendf = len(df); print(lendf)
        for k,v in myfuncs.items():
            iters = 1
            t = 0
            while t < 0.2:
                ts = timeit.repeat(v, number=iters, repeat=3)
                t = min(ts)
                iters *= 10
            d[k][lendf] = t/iters
            if t > 2: cont = False
        df = pd.concat([df]*step)
    
    pd.DataFrame(d).plot().legend(loc='upper center', bbox_to_anchor=(0.5, -0.15))
    plt.yscale('log'); plt.xscale('log'); plt.ylabel('seconds'); plt.xlabel('df rows')
    plt.show()
    

提交回复
热议问题