I have enormous files that look like this:
05/31/2012,15:30:00.029,1306.25,1,E,0,,1306.25
05/31/2012,15:30:00.029,1306.25,8,E,0,,1306.25
I can easily rea
I got an incredible speedup (50X) with the following cython code:
call from python: timestamps = convert_date_cython(df["date"].values, df["time"].values)
cimport numpy as np
import pandas as pd
import datetime
import numpy as np
def convert_date_cython(np.ndarray date_vec, np.ndarray time_vec):
    cdef int i
    cdef int N = len(date_vec)
    cdef out_ar = np.empty(N, dtype=np.object)
    date = None
    for i in range(N):
        if date is None or date_vec[i] != date_vec[i - 1]:
            dt_ar = map(int, date_vec[i].split("/"))
            date = datetime.date(dt_ar[2], dt_ar[0], dt_ar[1])
        time_ar = map(int, time_vec[i].split(".")[0].split(":"))
        time = datetime.time(time_ar[0], time_ar[1], time_ar[2])
        out_ar[i] = pd.Timestamp(datetime.datetime.combine(date, time))
    return out_ar