Writing to csv file - python 3.7

人盡茶涼 提交于 2019-12-31 07:46:44

问题


enter image description hereenter image description hereenter image description hereI got some csv files. The file names are as filename1.in.csv and filename1.out.csv , filename2.in.csv and filename2.out.csv. These files are in folders and sub folders. I'm trying to calculate some statistical values of .in.csv files separately and .out.csv files separately. But at the end all these needs to get written into one csv file (known as OutputFile in my code) row by row with headings. Each row gets the name of the input file as className along with calculated values. I have attached an image of a csv file that I take as the input (inFile) to calculate statistical values. I am not getting the desired output. gives NameError: maxTimeIn, minTimeIn, stdTimeIn, qual1TimeIn, qual2TimeIn, maxLenIn, minLenIn, stdLenIn, qual1LenIn, qua12LenIn, maxTimeOut, minTimeOut, stdTimeOut, qual1TimeOut, qual2TimeOut, maxLenOut, minLenOut, stdLenOut, qual1LenOut, qua12LenOut, className not defined. I am new to Python, so I am not sure if my code will give the output as required, Any help is greatly appreciated. Thanks

import os
import pandas as pd
import csv

startdir= '.'
suffix= '.csv'
for root,dirs, files, in os.walk(startdir):
    for name in files:
        if not name.endswith(suffix):
            continue
        inFile = os.path.join(root,name)

        data = pd.read_csv(inFile)

        base = os.path.basename(inFile)
        className = os.path.splitext(base)[0]

        if inFile.endswith('.in.csv'):

            maxTimeIn = data['frame.time_delta_displayed'].max()
            minTimeIn = data['frame.time_delta_displayed'].min()
            stdTimeIn = data['frame.time_delta_displayed'].std()
            qual1TimeIn = data['frame.time_delta_displayed'].quantile(0.25)
            qual2TimeIn = data['frame.time_delta_displayed'].quantile(0.5)

            maxLenIn = data['frame.len'].max()
            minLenIn = data['frame.len'].min()
            stdLenIn = data['frame.len'].std()
            qual1LenIn = data['frame.len'].quantile(0.25)
            qua12LenIn = data['frame.len'].quantile(0.5)

        if inFile.endswith('.out.csv'):

            maxTimeOut = data['frame.time_delta_displayed'].max()
            minTimeOut = data['frame.time_delta_displayed'].min()
            stdTimeOut = data['frame.time_delta_displayed'].std()
            qual1TimeOut = data['frame.time_delta_displayed'].quantile(0.25)
            qual2TimeOut = data['frame.time_delta_displayed'].quantile(0.5)

            maxLenOut = data['frame.len'].max()
            minLenOut = data['frame.len'].min()
            stdLenOut = data['frame.len'].std()
            qual1LenOut = data['frame.len'].quantile(0.25)
            qua12LenOut = data['frame.len'].quantile(0.5)

            csvData = [['maxTimeIn', 'minTimeIn', 'stdTimeIn', 'q1TimeIn', 'q2TimeIn', 'maxLenIn', 'minLenIn', 'stdLenIn', 'q1LenIn', 'q2LenIn', 'maxTimeOut', 'minTimeOut', 'stdTimeOut', 'q1TimeOut', 'q2TimeOut', 'maxLenOut', 'minLenOut', 'stdLenOut', 'q1LenOut', 'q2LenOut','activity'],
                       [maxTimeIn, minTimeIn, stdTimeIn, qual1TimeIn, qual2TimeIn, maxLenIn, minLenIn, stdLenIn, qual1LenIn, qua12LenIn, maxTimeOut, minTimeOut, stdTimeOut, qual1TimeOut, qual2TimeOut, maxLenOut, minLenOut, stdLenOut, qual1LenOut, qua12LenOut, className]]

        with open('/root/Desktop/OutputFile.csv','w') as csvFile:
            writer = csv.writer(csvFile)
            writer.writerows(csvData)

        csvFile.close()

回答1:


Try this code where I used pathlib instead of os.path and refactored functions to utilize Pandas methods:

from pathlib import Path
import pandas as pd


def prepare_values(df):
    df_columns = ['frame.time_delta_displayed', 'frame.len']
    df_values = []
    for col in df_columns:
        df_values += [
            df[col].max(),
            df[col].min(),
            df[col].std(),
            df[col].quantile(0.25),
            df[col].quantile(0.5),
        ]
    return df_values


source_dir = Path('stat')

in_data = []
for file in source_dir.glob('**/*.in.csv'):
    activity = {'activity': file.stem.split('.')[0]}
    df = pd.read_csv(file)
    cols = ['maxTimeIn', 'minTimeIn', 'stdTimeIn', 'q1TimeIn', 'q2TimeIn',
            'maxLenIn', 'minLenIn', 'stdLenIn', 'q1LenIn', 'q2LenIn']
    values = prepare_values(df)
    file_data = {**activity, **dict(zip(cols, values))}
    in_data.append(file_data)

out_data = []
for file in source_dir.glob('**/*.out.csv'):
    activity = {'activity': file.stem.split('.')[0]}
    df = pd.read_csv(file)
    cols = ['maxTimeOut', 'minTimeOut', 'stdTimeOut', 'q1TimeOut', 'q2TimeOut',
            'maxLenOut', 'minLenOut', 'stdLenOut', 'q1LenOut', 'q2LenOut']
    values = prepare_values(df)
    file_data = {**activity, **dict(zip(cols, values))}
    out_data.append(file_data)

in_df = pd.DataFrame(in_data)
out_df = pd.DataFrame(out_data)
all_df = in_df.join(out_df.set_index('activity'), on='activity', how='outer')

all_df.dropna(subset=df_all.columns.tolist()[1:], how='all', inplace=True)
all_df.fillna(0, inplace=True)
all_df.to_csv('all_data.csv', index=False)


来源:https://stackoverflow.com/questions/56890270/writing-to-csv-file-python-3-7

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!