Pandas and Matplotlib - fill_between() vs datetime64

后端 未结 4 1321
旧巷少年郎
旧巷少年郎 2020-11-29 07:15

There is a Pandas DataFrame:


Int64Index: 300 entries, 5220 to 5519
Data columns (total 3 columns):
Date                 


        
4条回答
  •  萌比男神i
    2020-11-29 07:33

    As WillZ pointed out, Pandas 0.21 broke unutbu's workaround. Converting datetimes to dates, however, can have significantly negative impacts on data analysis. This solution currently works and keeps datetime:

    import numpy as np
    import matplotlib.pyplot as plt
    import pandas as pd
    
    N = 300
    dates = pd.date_range('2000-1-1', periods=N, freq='ms')
    x = np.linspace(0, 2*np.pi, N)
    data = pd.DataFrame({'A': np.sin(x), 'B': np.cos(x),
               'Date': dates})
    d = data['Date'].dt.to_pydatetime()
    plt.plot_date(d, data['A'], '-')
    plt.plot_date(d, data['B'], '-')
    
    
    plt.fill_between(d, data['A'], data['B'],
                where=data['A'] >= data['B'],
                facecolor='green', alpha=0.2, interpolate=True)
    plt.xticks(rotation=25)
    plt.show()
    

    EDIT: As per jedi's comment, I set out to determine the fastest approach of the three options below:

    • method1 = original answer
    • method2 = jedi's comment + original answer
    • method3 = jedi's comment

    method2 was slightly faster, but much more consistent, and thus I have edited the above answer to reflect the best approach.

    import numpy as np
    import matplotlib.pyplot as plt
    import pandas as pd
    import time
    
    
    N = 300
    dates = pd.date_range('2000-1-1', periods=N, freq='ms')
    x = np.linspace(0, 2*np.pi, N)
    data = pd.DataFrame({'A': np.sin(x), 'B': np.cos(x),
               'Date': dates})
    time_data = pd.DataFrame(columns=['1', '2', '3', '4', '5', '6', '7', '8', '9', '10'])
    method1 = []
    method2 = []
    method3 = []
    for i in range(0, 10):
        start = time.clock()
        for i in range(0, 500):
            d = [pd.Timestamp(x).to_pydatetime() for x in data['Date']]
            #d = data['Date'].dt.to_pydatetime()
            plt.plot_date(d, data['A'], '-')
            plt.plot_date(d, data['B'], '-')
    
    
            plt.fill_between(d, data['A'], data['B'],
                where=data['A'] >= data['B'],
                facecolor='green', alpha=0.2, interpolate=True)
            plt.xticks(rotation=25)
            plt.gcf().clear()
        method1.append(time.clock() - start)
    
    for i  in range(0, 10):
        start = time.clock()
        for i in range(0, 500):
            #d = [pd.Timestamp(x).to_pydatetime() for x in data['Date']]
            d = data['Date'].dt.to_pydatetime()
            plt.plot_date(d, data['A'], '-')
            plt.plot_date(d, data['B'], '-')
    
    
            plt.fill_between(d, data['A'], data['B'],
                where=data['A'] >= data['B'],
                facecolor='green', alpha=0.2, interpolate=True)
            plt.xticks(rotation=25)
            plt.gcf().clear()
        method2.append(time.clock() - start)
    
    for i in range(0, 10):
        start = time.clock()
        for i in range(0, 500):
            #d = [pd.Timestamp(x).to_pydatetime() for x in data['Date']]
            #d = data['Date'].dt.to_pydatetime()
            plt.plot_date(data['Date'].dt.to_pydatetime(), data['A'], '-')
            plt.plot_date(data['Date'].dt.to_pydatetime(), data['B'], '-')
    
    
            plt.fill_between(data['Date'].dt.to_pydatetime(), data['A'], data['B'],
                where=data['A'] >= data['B'],
                facecolor='green', alpha=0.2, interpolate=True)
            plt.xticks(rotation=25)
            plt.gcf().clear()
        method3.append(time.clock() - start)
    
    time_data.loc['method1'] = method1
    time_data.loc['method2'] = method2
    time_data.loc['method3'] = method3
    print(time_data)
    plt.errorbar(time_data.index, time_data.mean(axis=1), yerr=time_data.std(axis=1))
    

提交回复
热议问题