I have enormous files that look like this:
05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25
05/31/2012,15:30:00.029,1306.25,8,E,0,,1306.25
I can easily rea
I got an incredible speedup (50X) with the following cython code:
call from python: timestamps = convert_date_cython(df["date"].values, df["time"].values)
cimport numpy as np
import pandas as pd
import datetime
import numpy as np
def convert_date_cython(np.ndarray date_vec, np.ndarray time_vec):
cdef int i
cdef int N = len(date_vec)
cdef out_ar = np.empty(N, dtype=np.object)
date = None
for i in range(N):
if date is None or date_vec[i] != date_vec[i - 1]:
dt_ar = map(int, date_vec[i].split("/"))
date = datetime.date(dt_ar[2], dt_ar[0], dt_ar[1])
time_ar = map(int, time_vec[i].split(".")[0].split(":"))
time = datetime.time(time_ar[0], time_ar[1], time_ar[2])
out_ar[i] = pd.Timestamp(datetime.datetime.combine(date, time))
return out_ar