问题
enter image description hereenter image description hereenter image description hereI got some csv files. The file names are as filename1.in.csv and filename1.out.csv , filename2.in.csv and filename2.out.csv. These files are in folders and sub folders. I'm trying to calculate some statistical values of .in.csv files separately and .out.csv files separately. But at the end all these needs to get written into one csv file (known as OutputFile in my code) row by row with headings. Each row gets the name of the input file as className along with calculated values. I have attached an image of a csv file that I take as the input (inFile) to calculate statistical values. I am not getting the desired output. gives NameError: maxTimeIn, minTimeIn, stdTimeIn, qual1TimeIn, qual2TimeIn, maxLenIn, minLenIn, stdLenIn, qual1LenIn, qua12LenIn, maxTimeOut, minTimeOut, stdTimeOut, qual1TimeOut, qual2TimeOut, maxLenOut, minLenOut, stdLenOut, qual1LenOut, qua12LenOut, className not defined
.
I am new to Python, so I am not sure if my code will give the output as required, Any help is greatly appreciated. Thanks
import os
import pandas as pd
import csv
startdir= '.'
suffix= '.csv'
for root,dirs, files, in os.walk(startdir):
for name in files:
if not name.endswith(suffix):
continue
inFile = os.path.join(root,name)
data = pd.read_csv(inFile)
base = os.path.basename(inFile)
className = os.path.splitext(base)[0]
if inFile.endswith('.in.csv'):
maxTimeIn = data['frame.time_delta_displayed'].max()
minTimeIn = data['frame.time_delta_displayed'].min()
stdTimeIn = data['frame.time_delta_displayed'].std()
qual1TimeIn = data['frame.time_delta_displayed'].quantile(0.25)
qual2TimeIn = data['frame.time_delta_displayed'].quantile(0.5)
maxLenIn = data['frame.len'].max()
minLenIn = data['frame.len'].min()
stdLenIn = data['frame.len'].std()
qual1LenIn = data['frame.len'].quantile(0.25)
qua12LenIn = data['frame.len'].quantile(0.5)
if inFile.endswith('.out.csv'):
maxTimeOut = data['frame.time_delta_displayed'].max()
minTimeOut = data['frame.time_delta_displayed'].min()
stdTimeOut = data['frame.time_delta_displayed'].std()
qual1TimeOut = data['frame.time_delta_displayed'].quantile(0.25)
qual2TimeOut = data['frame.time_delta_displayed'].quantile(0.5)
maxLenOut = data['frame.len'].max()
minLenOut = data['frame.len'].min()
stdLenOut = data['frame.len'].std()
qual1LenOut = data['frame.len'].quantile(0.25)
qua12LenOut = data['frame.len'].quantile(0.5)
csvData = [['maxTimeIn', 'minTimeIn', 'stdTimeIn', 'q1TimeIn', 'q2TimeIn', 'maxLenIn', 'minLenIn', 'stdLenIn', 'q1LenIn', 'q2LenIn', 'maxTimeOut', 'minTimeOut', 'stdTimeOut', 'q1TimeOut', 'q2TimeOut', 'maxLenOut', 'minLenOut', 'stdLenOut', 'q1LenOut', 'q2LenOut','activity'],
[maxTimeIn, minTimeIn, stdTimeIn, qual1TimeIn, qual2TimeIn, maxLenIn, minLenIn, stdLenIn, qual1LenIn, qua12LenIn, maxTimeOut, minTimeOut, stdTimeOut, qual1TimeOut, qual2TimeOut, maxLenOut, minLenOut, stdLenOut, qual1LenOut, qua12LenOut, className]]
with open('/root/Desktop/OutputFile.csv','w') as csvFile:
writer = csv.writer(csvFile)
writer.writerows(csvData)
csvFile.close()
回答1:
Try this code where I used pathlib
instead of os.path
and refactored functions to utilize Pandas methods:
from pathlib import Path
import pandas as pd
def prepare_values(df):
df_columns = ['frame.time_delta_displayed', 'frame.len']
df_values = []
for col in df_columns:
df_values += [
df[col].max(),
df[col].min(),
df[col].std(),
df[col].quantile(0.25),
df[col].quantile(0.5),
]
return df_values
source_dir = Path('stat')
in_data = []
for file in source_dir.glob('**/*.in.csv'):
activity = {'activity': file.stem.split('.')[0]}
df = pd.read_csv(file)
cols = ['maxTimeIn', 'minTimeIn', 'stdTimeIn', 'q1TimeIn', 'q2TimeIn',
'maxLenIn', 'minLenIn', 'stdLenIn', 'q1LenIn', 'q2LenIn']
values = prepare_values(df)
file_data = {**activity, **dict(zip(cols, values))}
in_data.append(file_data)
out_data = []
for file in source_dir.glob('**/*.out.csv'):
activity = {'activity': file.stem.split('.')[0]}
df = pd.read_csv(file)
cols = ['maxTimeOut', 'minTimeOut', 'stdTimeOut', 'q1TimeOut', 'q2TimeOut',
'maxLenOut', 'minLenOut', 'stdLenOut', 'q1LenOut', 'q2LenOut']
values = prepare_values(df)
file_data = {**activity, **dict(zip(cols, values))}
out_data.append(file_data)
in_df = pd.DataFrame(in_data)
out_df = pd.DataFrame(out_data)
all_df = in_df.join(out_df.set_index('activity'), on='activity', how='outer')
all_df.dropna(subset=df_all.columns.tolist()[1:], how='all', inplace=True)
all_df.fillna(0, inplace=True)
all_df.to_csv('all_data.csv', index=False)
来源:https://stackoverflow.com/questions/56890270/writing-to-csv-file-python-3-7