How to zip a very large file in python

浪子不回头ぞ 提交于 2019-12-03 16:14:02

I have added a new method to the zipfile library. This enhanced zipfile library is open source and can be found on github (EnhancedZipFile). I added a new method with the inspiration from the zipfile.write() method and the zipfile.writestr()method

def writebuffered(self, zinfo_or_arcname, file_pointer, file_size, compress_type=None):
    if not isinstance(zinfo_or_arcname, ZipInfo):
        zinfo = ZipInfo(filename=zinfo_or_arcname,
                        date_time=time.localtime(time.time())[:6])

        zinfo.compress_type = self.compression
        if zinfo.filename[-1] == '/':
            zinfo.external_attr = 0o40775 << 16   # drwxrwxr-x
            zinfo.external_attr |= 0x10           # MS-DOS directory flag
        else:
            zinfo.external_attr = 0o600 << 16     # ?rw-------
    else:
        zinfo = zinfo_or_arcname

    zinfo.file_size = file_size            # Uncompressed size
    zinfo.header_offset = self.fp.tell()    # Start of header bytes
    self._writecheck(zinfo)
    self._didModify = True

    fp = file_pointer
    # Must overwrite CRC and sizes with correct data later
    zinfo.CRC = CRC = 0
    zinfo.compress_size = compress_size = 0
    # Compressed size can be larger than uncompressed size
    zip64 = self._allowZip64 and \
            zinfo.file_size * 1.05 > ZIP64_LIMIT
    self.fp.write(zinfo.FileHeader(zip64))
    if zinfo.compress_type == ZIP_DEFLATED:
        cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
             zlib.DEFLATED, -15)
    else:
        cmpr = None
    file_size = 0
    while 1:
        buf = fp.read(1024 * 8)
        if not buf:
            break
        file_size = file_size + len(buf)
        CRC = crc32(buf, CRC) & 0xffffffff
        if cmpr:
            buf = cmpr.compress(buf)
            compress_size = compress_size + len(buf)
        self.fp.write(buf)

    if cmpr:
        buf = cmpr.flush()
        compress_size = compress_size + len(buf)
        self.fp.write(buf)
        zinfo.compress_size = compress_size
    else:
        zinfo.compress_size = file_size
    zinfo.CRC = CRC
    zinfo.file_size = file_size
    if not zip64 and self._allowZip64:
        if file_size > ZIP64_LIMIT:
            raise RuntimeError('File size has increased during compressing')
        if compress_size > ZIP64_LIMIT:
            raise RuntimeError('Compressed size larger than uncompressed size')
    # Seek backwards and write file header (which will now include
    # correct CRC and file sizes)
    position = self.fp.tell()       # Preserve current position in file
    self.fp.flush()
    self.filelist.append(zinfo)
    self.NameToInfo[zinfo.filename] = zinfo

Points to note

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!