I\'m trying to read a CSV file from a private S3 bucket to a pandas dataframe:
df = pandas.read_csv(\'s3://mybucket/file.csv\')
I can read
In addition to other awesome answers, if a custom endpoint is required, it is possible to use pd.read_csv('s3://...')
syntax by monkey patching the s3fs init method.
import s3fs
s3fsinit = s3fs.S3FileSystem.__init__
def s3fsinit_patched(self, *k, *kw):
s3fsinit(self, *k, client_kwargs={'endpoint_url': 'https://yourcustomendpoint'}, **kw)
s3fs.S3FileSystem.__init__ = s3fsinit_patched
Or, a more elegant way:
import s3fs
class S3FileSystemPatched(s3fs.S3FileSystem):
def __init__(self, *k, **kw):
super(S3FileSystemPatched, self).__init__( *k,
key = os.environ['aws_access_key_id'],
secret = os.environ['aws_secret_access_key'],
client_kwargs={'endpoint_url': 'https://yourcustomendpoint'},
**kw)
print('S3FileSystem is patched')
s3fs.S3FileSystem = S3FileSystemPatched
Also see: s3fs custom endpoint url
import pandas as pd
import boto3
from io import StringIO
# Read CSV
s3 = boto3.client('s3',endpoint_url,aws_access_key_id=,aws_secret_access_key)
read_file = s3.get_object(Bucket, Key)
df = pd.read_csv(read_file['Body'],sep=',')
# Write CSV
csv_buffer = StringIO()
df.to_csv(csv_buffer)
s3.put_object(Bucket, Key,Body=csv_buffer.getvalue())