Following-up from this question years ago, is there a canonical \"shift\" function in numpy? I don\'t see anything from the documentation.
Here\'s a simple version o
You can convert ndarray
to Series
or DataFrame
with pandas
first, then you can use shift
method as you want.
Example:
In [1]: from pandas import Series
In [2]: data = np.arange(10)
In [3]: data
Out[3]: array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [4]: data = Series(data)
In [5]: data
Out[5]:
0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
dtype: int64
In [6]: data = data.shift(3)
In [7]: data
Out[7]:
0 NaN
1 NaN
2 NaN
3 0.0
4 1.0
5 2.0
6 3.0
7 4.0
8 5.0
9 6.0
dtype: float64
In [8]: data = data.values
In [9]: data
Out[9]: array([ nan, nan, nan, 0., 1., 2., 3., 4., 5., 6.])
For those who want to just copy and paste the fastest implementation of shift, there is a benchmark and conclusion(see the end). In addition, I introduce fill_value parameter and fix some bugs.
import numpy as np
import timeit
# enhanced from IronManMark20 version
def shift1(arr, num, fill_value=np.nan):
arr = np.roll(arr,num)
if num < 0:
arr[num:] = fill_value
elif num > 0:
arr[:num] = fill_value
return arr
# use np.roll and np.put by IronManMark20
def shift2(arr,num):
arr=np.roll(arr,num)
if num<0:
np.put(arr,range(len(arr)+num,len(arr)),np.nan)
elif num > 0:
np.put(arr,range(num),np.nan)
return arr
# use np.pad and slice by me.
def shift3(arr, num, fill_value=np.nan):
l = len(arr)
if num < 0:
arr = np.pad(arr, (0, abs(num)), mode='constant', constant_values=(fill_value,))[:-num]
elif num > 0:
arr = np.pad(arr, (num, 0), mode='constant', constant_values=(fill_value,))[:-num]
return arr
# use np.concatenate and np.full by chrisaycock
def shift4(arr, num, fill_value=np.nan):
if num >= 0:
return np.concatenate((np.full(num, fill_value), arr[:-num]))
else:
return np.concatenate((arr[-num:], np.full(-num, fill_value)))
# preallocate empty array and assign slice by chrisaycock
def shift5(arr, num, fill_value=np.nan):
result = np.empty_like(arr)
if num > 0:
result[:num] = fill_value
result[num:] = arr[:-num]
elif num < 0:
result[num:] = fill_value
result[:num] = arr[-num:]
else:
result[:] = arr
return result
arr = np.arange(2000).astype(float)
def benchmark_shift1():
shift1(arr, 3)
def benchmark_shift2():
shift2(arr, 3)
def benchmark_shift3():
shift3(arr, 3)
def benchmark_shift4():
shift4(arr, 3)
def benchmark_shift5():
shift5(arr, 3)
benchmark_set = ['benchmark_shift1', 'benchmark_shift2', 'benchmark_shift3', 'benchmark_shift4', 'benchmark_shift5']
for x in benchmark_set:
number = 10000
t = timeit.timeit('%s()' % x, 'from __main__ import %s' % x, number=number)
print '%s time: %f' % (x, t)
benchmark result:
benchmark_shift1 time: 0.265238
benchmark_shift2 time: 0.285175
benchmark_shift3 time: 0.473890
benchmark_shift4 time: 0.099049
benchmark_shift5 time: 0.052836
shift5 is winner! It's OP's third solution.