I have a dataframe which is like as shown below
df1_new = pd.DataFrame({\'person_id\': [1, 1, 3, 3, 5, 5],\'obs_date\': [\'7/23/2377 12:00:00 AM\', \'NA-NA-
You can convert values to datetimes and then to day Period
for only possible format in pandas for represent out of bound
values.
If omit it, then working with python datetimes objects, not with pandas datetimes (timestamps).
from datetime import datetime
def str2time(x):
try:
return pd.Period(datetime.strptime(x, '%m/%d/%Y %I:%M:%S %p'), 'D')
except:
return np.nan
df1_new['obs_date'] = df1_new['obs_date'].apply(str2time)
print(df1_new)
person_id obs_date
0 1 2377-07-23
1 1 NaT
2 3 NaT
3 3 2277-07-27
4 5 2077-07-13
5 5 NaT
print(df1_new['obs_date'].dtype)
period[D]
If possible multiple formats:
def str2time(x):
try:
#MM/DD/YYYY II:MM:SS pp like 7/23/2377 12:00:00 AM
return pd.Period(datetime.strptime(x, '%m/%d/%Y %I:%M:%S %p'), 'D')
except:
try:
#YYYY-MM-DD HH:MM:SS like 2377-07-23 00:00:00
return pd.Period(datetime.strptime(x, '%Y-%m-%d %H:%M:%S'), 'D')
except:
return np.nan
df1_new['obs_date'] = df1_new['obs_date'].apply(str2time)