问题
This question is related to this question:How to add extra time into the time column with data using python
Here I have a dataset with date, time and one input column. So here my time column is not good. So I want to give time range into that time column. So here first I did I just convert start time into 0 and convert whole time column into minutes.
Then next what I want to give time range like 0,60,120....
Mean what I expected output is:
first time convert
date time time convert
10/3/2018 6:15:00 0
10/3/2018 6:45:00 30
10/3/2018 7:45:00 90
10/3/2018 9:00:00 165
10/3/2018 9:25:00 190
10/3/2018 9:30:00 195
10/3/2018 11:00:00 285
10/3/2018 11:30:00 315
10/3/2018 13:30:00 435
10/3/2018 13:50:00 455
10/3/2018 15:00:00 525
10/3/2018 15:25:00 550
10/3/2018 16:25:00 610
10/3/2018 18:00:00 705
10/3/2018 19:00:00 765
10/3/2018 19:30:00 795
10/3/2018 20:00:00 825
10/3/2018 22:05:00 950
10/3/2018 22:15:00 960
10/3/2018 23:40:00 1045
10/4/2018 6:58:00 0
10/4/2018 13:00:00 2
10/4/2018 16:00:00 47
10/4/2018 17:00:00 152
Expected output
expected time
0.0
30.0
60
90.0
120
165.0
180
190.0
195.0
240
285.0
300
315.0
360
420
435
455
480
525
540
550
:
:
:
:
0 new date ,start time=0
2
47
60
120
152
180
So here every new day start time should be 0 and then should be added 60 min 60min into time.
Here I tried one code and it gave me this error "cannot reindex from a duplicate axis"
code :
data['date']= pd.to_datetime(data['date'] + " " + data['time'],
format='%d/%m/%Y %H:%M:%S', dayfirst=True)
lastday = data.loc[0, 'date']
def convert_time(x):
global lastday
if x.date() == lastday.date():
tm = x - lastday
return tm.total_seconds()/60
else:
lastday = x
return 0
data['time'] = data['date'].apply(convert_time).values
data = data.reset_index()
arr = np.arange(0, int(data['time'].max()), 60)
union = np.union1d(data['time'], arr)
data = data.set_index('time').reindex(union, fill_value=0).reset_index()
Error :
ValueError Traceback (most recent call last)
<ipython-input-27-bdb960d8efe1> in <module>()
26 arr = np.arange(0, int(data['time'].max()), 60)
27 union = np.union1d(data['time'], arr)
---> 28 data = data.set_index('time').reindex(union, fill_value=0).reset_index()
~\Anaconda3\lib\site-packages\pandas\util\_decorators.py in wrapper(*args, **kwargs)
184 @wraps(func)
185 def wrapper(*args, **kwargs):
--> 186 return func(*args, **kwargs)
187
188 if not PY2:
~\Anaconda3\lib\site-packages\pandas\core\frame.py in reindex(self, *args, **kwargs)
3561 kwargs.pop('axis', None)
3562 kwargs.pop('labels', None)
-> 3563 return super(DataFrame, self).reindex(**kwargs)
3564
3565 @Appender(_shared_docs['reindex_axis'] % _shared_doc_kwargs)
~\Anaconda3\lib\site-packages\pandas\core\generic.py in reindex(self, *args, **kwargs)
3683 # perform the reindex on the axes
3684 return self._reindex_axes(axes, level, limit, tolerance, method,
-> 3685 fill_value, copy).__finalize__(self)
3686
3687 def _reindex_axes(self, axes, level, limit, tolerance, method, fill_value,
~\Anaconda3\lib\site-packages\pandas\core\frame.py in _reindex_axes(self, axes, level, limit, tolerance, method, fill_value, copy)
3496 if index is not None:
3497 frame = frame._reindex_index(index, method, copy, level,
-> 3498 fill_value, limit, tolerance)
3499
3500 return frame
~\Anaconda3\lib\site-packages\pandas\core\frame.py in _reindex_index(self, new_index, method, copy, level, fill_value, limit, tolerance)
3507 return self._reindex_with_indexers({0: [new_index, indexer]},
3508 copy=copy, fill_value=fill_value,
-> 3509 allow_dups=False)
3510
3511 def _reindex_columns(self, new_columns, method, copy, level,
~\Anaconda3\lib\site-packages\pandas\core\generic.py in _reindex_with_indexers(self, reindexers, fill_value, copy, allow_dups)
3804 fill_value=fill_value,
3805 allow_dups=allow_dups,
-> 3806 copy=copy)
3807
3808 if copy and new_data is self._data:
~\Anaconda3\lib\site-packages\pandas\core\internals.py in reindex_indexer(self, new_axis, indexer, axis, fill_value, allow_dups, copy)
4412 # some axes don't allow reindexing with dups
4413 if not allow_dups:
-> 4414 self.axes[axis]._can_reindex(indexer)
4415
4416 if axis >= self.ndim:
~\Anaconda3\lib\site-packages\pandas\core\indexes\base.py in _can_reindex(self, indexer)
3557 # trying to reindex on an axis with duplicates
3558 if not self.is_unique and len(indexer):
-> 3559 raise ValueError("cannot reindex from a duplicate axis")
3560
3561 def reindex(self, target, method=None, level=None, limit=None,
ValueError: cannot reindex from a duplicate axis
Subset of my csv:
date time X3
10/3/2018 6:15:00 7
10/3/2018 6:45:00 5
10/3/2018 7:45:00 7
10/3/2018 9:00:00 7
10/3/2018 9:25:00 7
10/3/2018 9:30:00 5
10/3/2018 11:00:00 7
10/3/2018 11:30:00 7
10/3/2018 13:30:00 7
10/3/2018 13:50:00 5
10/3/2018 15:00:00 7
10/3/2018 15:25:00 7
10/3/2018 16:25:00 7
10/3/2018 18:00:00 7
10/3/2018 19:00:00 5
My csv : enter link description here
来源:https://stackoverflow.com/questions/57989459/how-to-add-extra-time-to-time-column-when-the-time-is-changed-with-date