#coding:utf-8
import pandas as pd
import numpy as np
from scipy.interpolate import lagrange#拉格朗日函数
data=pd.read_excel('./data/test_1.xlsx')
a = data[:1]
print(a)
#自定义列向量插值函数
def ploy(s,n,k=6):
y=s[list(range(n-k,n))+list(range(n+1,n+1+k))]#取数
y=y[y.notnull()]
return lagrange(y.index,list(y))(n)
def time_split(str_time):
#函数描述:对字符串形式的时间信息进行划分,结果为int64形式的整数年、月、日,时、分、秒
normal_int_time = []
time_str = str_time.split(" ")
day_time = time_str[0].split("/")
secend_time = time_str[1].split(".")
secend_time = secend_time[0].split(":")
for t in day_time:
normal_int_time.append(np.int64(t))
for t in secend_time:
normal_int_time.append(np.int64(t))
return normal_int_time
def normal_time_add(int_time):
#时间变量的更替 程序默认时间戳由上至下按时间排序
for idxxx in range(len(int_time)):
int_time[idxxx] = np.int64(int_time[idxxx])
cut_flag = [1, 13, 31, 24, 60, 60]
int_time[5] = int_time[5] + 1
for i in range(5):
if (int_time[5-i] / cut_flag[5-i]) >= 1:
int_time[5-i] = int_time[5-i] % cut_flag[5-i]
int_time[5-i-1] = int_time[5-i-1] + 1
# int_time[5] = int_time[5] + 1
# if (int_time[5] / 60) >= 1:
# int_time[5] = int_time[5] % 60
# int_time[4] = int_time[4] + 1
# if (int_time[4]/60) >= 1:
# int_time[4] = int_time[4] % 60
# int_time[3] = int_time[3] + 1
# if (int_time[3]/24) >= 1:
# int_time[3] = int_time[3] % 24
# int_time[2] = int_time[2] + 1
# if (int_time[2]/30) >= 1:
# int_time[2] = int_time[2] % 30
# int_time[1] = int_time[1] + 1
# if (int_time[1]/12) >= 1:
# int_time[1] = int_time[1] % 12
# int_time[0] = int_time[0] + 1
return int_time
def Adjust_TimeList(complete_data, row_idx, time):
#函数:利用row_idx所指向的行下方添加一行
copy_single_data = complete_data[row_idx:row_idx+1]
new_single_data = pd.DataFrame(copy_single_data.values.tolist(),index=np.arange(1),columns=copy_single_data.columns)
if time[3]<10:
time[3] = '0{}'.format(time[3])
if time[4]<10:
time[4] = '0{}'.format(time[4])
if time[5]<10:
time[5] = '0{}'.format(time[5])
new_single_data.time = '{}/{}/{} {}:{}:{}.000.'.format(time[0], time[1], time[2], time[3], time[4], time[5])
#print(copy_single_data)
head_data = complete_data[:row_idx+1]
down_data = complete_data[row_idx+1:]
# print(head_data)
# print(new_single_data)
# print(down_data)
complete_data = head_data.append(new_single_data, ignore_index=True).append(down_data, ignore_index=True)
print(complete_data[row_idx-5:row_idx+5])
print("a")
return complete_data
def time_data_prepare(data):
#对时间的连续性进行判断
# 思路:获取初始时间作为初始计数值,年月日,时分秒,再对每一行时间进行逐一的判断,对于不连续的时间戳,用前一时间的记录进行填充
data_time = data[data.columns[0]]
#print(data_time)
normal_time = time_split(data_time[0])
#print(normal_time)
count = 0
for idx in range(1, len(data_time)):
cur_time = time_split(data_time[idx-count])
normal_time = normal_time_add(normal_time)
#print(time_data[idx])
flag = True
for element_idx in range(len(normal_time)):
if normal_time[element_idx] != cur_time[element_idx]:
flag = False
if flag == False:
count = count+1
#在当前行的上一行添加一行, 值复制,除了时间之外
data = Adjust_TimeList(data, idx-1, normal_time)
idx = idx-1
return data
def data_Prepare(data):
#对per_data数据结构进行数据预处理, 输入值per_data为单行数据,
#(1)对时间连续性判别
#data = data[1:2]
data = time_data_prepare(data)
#pass
return data
data = data_Prepare(data)
# for i in data.columns:
# for j in range(len(data)-1):
# # if(data[i].isnull())[j]:
# # data[i][j]=ploy(data[i],j)
# # #data.loc[i,j]=ploy(data[i],j)
# #length = len(data)
# time_data = data[i]
# data_Prepare(data)
data.to_excel('data/outfile_1.xlsx')
print("finish...")
来源:https://my.oschina.net/u/4217074/blog/3109132