Is it possible to copy all files from one S3 bucket to another with s3cmd?

前端 未结 11 1752
陌清茗
陌清茗 2020-12-12 12:30

I\'m pretty happy with s3cmd, but there is one issue: How to copy all files from one S3 bucket to another? Is it even possible?

EDIT: I\'ve found a way to copy files

11条回答
  •  醉酒成梦
    2020-12-12 13:02

    I wrote a script that backs up an S3 bucket: https://github.com/roseperrone/aws-backup-rake-task

    #!/usr/bin/env python
    from boto.s3.connection import S3Connection
    import re
    import datetime
    import sys
    import time
    
    def main():
        s3_ID = sys.argv[1]
        s3_key = sys.argv[2]
        src_bucket_name = sys.argv[3]
        num_backup_buckets = sys.argv[4]
        connection = S3Connection(s3_ID, s3_key)
        delete_oldest_backup_buckets(connection, num_backup_buckets)
        backup(connection, src_bucket_name)
    
    def delete_oldest_backup_buckets(connection, num_backup_buckets):
        """Deletes the oldest backup buckets such that only the newest NUM_BACKUP_BUCKETS - 1 buckets remain."""
        buckets = connection.get_all_buckets() # returns a list of bucket objects
        num_buckets = len(buckets)
    
        backup_bucket_names = []
        for bucket in buckets:
            if (re.search('backup-' + r'\d{4}-\d{2}-\d{2}' , bucket.name)):
                backup_bucket_names.append(bucket.name)
    
        backup_bucket_names.sort(key=lambda x: datetime.datetime.strptime(x[len('backup-'):17], '%Y-%m-%d').date())
    
        # The buckets are sorted latest to earliest, so we want to keep the last NUM_BACKUP_BUCKETS - 1
        delete = len(backup_bucket_names) - (int(num_backup_buckets) - 1)
        if delete <= 0:
            return
    
        for i in range(0, delete):
            print 'Deleting the backup bucket, ' + backup_bucket_names[i]
            connection.delete_bucket(backup_bucket_names[i])
    
    def backup(connection, src_bucket_name):
        now = datetime.datetime.now()
        # the month and day must be zero-filled
        new_backup_bucket_name = 'backup-' + str('%02d' % now.year) + '-' + str('%02d' % now.month) + '-' + str(now.day);
        print "Creating new bucket " + new_backup_bucket_name
        new_backup_bucket = connection.create_bucket(new_backup_bucket_name)
        copy_bucket(src_bucket_name, new_backup_bucket_name, connection)
    
    
    def copy_bucket(src_bucket_name, dst_bucket_name, connection, maximum_keys = 100):
        src_bucket = connection.get_bucket(src_bucket_name);
        dst_bucket = connection.get_bucket(dst_bucket_name);
    
        result_marker = ''
        while True:
            keys = src_bucket.get_all_keys(max_keys = maximum_keys, marker = result_marker)
    
            for k in keys:
                print 'Copying ' + k.key + ' from ' + src_bucket_name + ' to ' + dst_bucket_name
    
                t0 = time.clock()
                dst_bucket.copy_key(k.key, src_bucket_name, k.key)
                print time.clock() - t0, ' seconds'
    
            if len(keys) < maximum_keys:
                print 'Done backing up.'
                break
    
            result_marker = keys[maximum_keys - 1].key
    
    if  __name__ =='__main__':main()
    

    I use this in a rake task (for a Rails app):

    desc "Back up a file onto S3"
    task :backup do
         S3ID = "*****"
         S3KEY = "*****"
         SRCBUCKET = "primary-mzgd"
         NUM_BACKUP_BUCKETS = 2
    
         Dir.chdir("#{Rails.root}/lib/tasks")
         system "./do_backup.py #{S3ID} #{S3KEY} #{SRCBUCKET} #{NUM_BACKUP_BUCKETS}"
    end
    

提交回复
热议问题