How to split a huge csv file based on content of first column?

后端 未结 7 2094
一个人的身影
一个人的身影 2020-12-01 17:01
  • I have a 250MB+ huge csv file to upload
  • file format is group_id, application_id, reading and data could look like


        
7条回答
  •  执念已碎
    2020-12-01 17:32

    Here some food for though for you:

    import csv
    from collections import namedtuple
    
    csvfile = namedtuple('scvfile',('file','writer'))
    
    class CSVFileCollections(object):
    
        def __init__(self,prefix,postfix):
            self.prefix = prefix
            self.files = {}
    
        def __getitem__(self,item):
            if item not in self.files:
                file = open(self.prefix+str(item)+self.postfix,'wb')
                writer = csv.writer(file,delimiter = ',', quotechar = "'",quoting=csv.QUOTE_MINIMAL)
                self.files[item] = csvfile(file,writer) 
            return self.files[item].writer
    
        def __enter__(self): pass
    
        def __exit__(self, exc_type, exc_value, traceback):
            for csvfile in self.files.values() : csvfile.file.close()
    
    
    with open('huge.csv') as readFile, CSVFileCollections('output','.csv') as output:
        reader = csv.reader(readFile, delimiter=",", quotechar="'")
        for row in reader:
            writer = output[row[0]]
            writer.writerow(row)
    

提交回复
热议问题