Save Dataframe to csv directly to s3 Python

前端 未结 10 943
独厮守ぢ
独厮守ぢ 2020-11-28 02:02

I have a pandas DataFrame that I want to upload to a new CSV file. The problem is that I don\'t want to save the file locally before transferring it to s3. Is there any meth

10条回答
  •  陌清茗
    陌清茗 (楼主)
    2020-11-28 02:43

    I read a csv with two columns from bucket s3, and the content of the file csv i put in pandas dataframe.

    Example:

    config.json

    {
      "credential": {
        "access_key":"xxxxxx",
        "secret_key":"xxxxxx"
    }
    ,
    "s3":{
           "bucket":"mybucket",
           "key":"csv/user.csv"
       }
    }
    

    cls_config.json

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    
    import os
    import json
    
    class cls_config(object):
    
        def __init__(self,filename):
    
            self.filename = filename
    
    
        def getConfig(self):
    
            fileName = os.path.join(os.path.dirname(__file__), self.filename)
            with open(fileName) as f:
            config = json.load(f)
            return config
    

    cls_pandas.py

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    
    import pandas as pd
    import io
    
    class cls_pandas(object):
    
        def __init__(self):
            pass
    
        def read(self,stream):
    
            df = pd.read_csv(io.StringIO(stream), sep = ",")
            return df
    

    cls_s3.py

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    
    import boto3
    import json
    
    class cls_s3(object):
    
        def  __init__(self,access_key,secret_key):
    
            self.s3 = boto3.client('s3', aws_access_key_id=access_key, aws_secret_access_key=secret_key)
    
        def getObject(self,bucket,key):
    
            read_file = self.s3.get_object(Bucket=bucket, Key=key)
            body = read_file['Body'].read().decode('utf-8')
            return body
    

    test.py

    #!/usr/bin/env python
    # -*- coding: utf-8 -*-
    
    from cls_config import *
    from cls_s3 import *
    from cls_pandas import *
    
    class test(object):
    
        def __init__(self):
            self.conf = cls_config('config.json')
    
        def process(self):
    
            conf = self.conf.getConfig()
    
            bucket = conf['s3']['bucket']
            key = conf['s3']['key']
    
            access_key = conf['credential']['access_key']
            secret_key = conf['credential']['secret_key']
    
            s3 = cls_s3(access_key,secret_key)
            ob = s3.getObject(bucket,key)
    
            pa = cls_pandas()
            df = pa.read(ob)
    
            print df
    
    if __name__ == '__main__':
        test = test()
        test.process()
    

提交回复
热议问题