1。去除不必要的显式for循环,使用向量化计算。
1 import time
2 import numpy as np
3
4
5 def for_time():
6 """Make a array, len = 1000000, use for loop add one."""
7 start = time.time()
8 list_data = np.arange(0, 10000000, 1)
9 for i in range(1000000):
10 list_data[i] += 1
11 print 'for loop used time: ', time.time() - start
12
13
14 def vector_time():
15 """make a array, use vector calculation add one."""
16 start = time.time()
17 list_data = np.arange(0, 10000000, 1)
18 list_data += 1
19 print 'vector calculation used time: ', time.time() - start
20
21
22 if __name__ == '__main__':
23 for_time()
24 vector_time()
for loop used time: 0.359999895096
vector calculation used time: 0.0160000324249
2. 使用多进程,开核。
1 import multiprocessing
2
3
4 def use_pool(func, args):
5 pool = multiprocessing.Pool(processes=2)
6 res = pool.map(func, args)
7 pool.close()
8 pool.join()
9 return res
3.使用sklearn.extenals.joblib 扩展库
1 from sklearn.externals.joblib import Parallel, delayed
2
3
4 def parallel(func, arg):
5 Parallel(-1)(delayed(func)(i) for i in arg)
4. 使用bottleneck库。
该库基于Cpython实现,着眼于高性能。
来源:oschina
链接:https://my.oschina.net/u/4385353/blog/3278288