Split a zip archive into multiple chunks

陌路散爱 提交于 2021-02-05 05:34:10

问题


I'm trying to create a zip archive of a possibly huge folder. For this purpose I'm using the python zipfile module, but as far as I can see there is no option to split the created archive into multiple chunks with a max size.

The zipped archive is supposed to be sent via Telegram, which has a size limitation of 1.5 GB per file. Thereby I need to split the resulting zip archive.

I would really like to not use a subprocess and shell commands for creating this archive.

My current code looks like this:

def create_zip(archive_name, directory):
    """Create a zip file from given dir path."""
    with ZipFile(archive_name, "w", ZIP_LZMA) as target_zip_file:
        for root, _, files in os.walk(directory):
            for file_to_zip in files:
                absolute_path = os.path.join(root, file_to_zip)
                zip_file_name = absolute_path[len(directory) + len(os.sep):]
                target_zip_file.write(absolute_path, zip_file_name)

    return target_zip_file

Thanks in Advance


回答1:


In case you don't find a better, native way with zipfile, you could still write the file splitting algorithm yourself. Something like this:

outfile = archive_name
packet_size = int(1.5 * 1024**3)   # bytes

with open(outfile, "rb") as output:
    filecount = 0
    while True:
        data = output.read(packet_size)
        print(len(data))
        if not data:
            break   # we're done
        with open("{}{:03}".format(outfile, filecount), "wb") as packet:
            packet.write(data)
        filecount += 1

And similar to put it back together on the receiver's side.




回答2:


Here is what i use to send file to telegram channel by telegram bot. The file size limit is 50MB in upload by telegram bot The file size limit is 1500MB in upload by telegram client but you may add some text or other info so 1495 is more safe

#! /usr/bin/python3
# -*- coding:utf-8 -*-
# apt-get install p7zip-full

import subprocess
import os
import math
import logzero

logger = logzero.logger

MAX_SPLIT_SIZE = 1495

    def file_split_7z(file_path, split_size=MAX_SPLIT_SIZE):
        file_path_7z_list = []
        # if origin file is 7z file rename it
        origin_file_path = ""
        if os.path.splitext(file_path)[1] == ".7z":
            origin_file_path = file_path
            file_path = os.path.splitext(origin_file_path)[0] + ".7zo"
            os.rename(origin_file_path, file_path)
        # do 7z compress
        fz = os.path.getsize(file_path) / 1024 / 1024
        pa = math.ceil(fz / split_size)
        head, ext = os.path.splitext(os.path.abspath(file_path))
        archive_head = "".join((head, ext.replace(".", "_"))) + ".7z"
        for i in range(pa):
            check_file_name = "{}.{:03d}".format(archive_head, i + 1)
            if os.path.isfile(check_file_name):
                logger.debug("remove exists file | {}".format(check_file_name))
                os.remove(check_file_name)
        cmd_7z = ["7z", "a", "-v{}m".format(split_size), "-y", "-mx0", archive_head, file_path]
        proc = subprocess.Popen(cmd_7z, shell=False, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        out, err = proc.communicate()
        if b"Everything is Ok" not in out:
            logger.error("7z output | {}".format(out.decode("utf-8")))
            logger.error("7z error | {}".format(err.decode("utf-8")))
            return file_path_7z_list

        for i in range(pa):
            file_path_7z_list.append("{}.{:03d}".format(archive_head, i + 1))
        # if origin file is 7z file rename it back
        if origin_file_path:
            os.rename(file_path, origin_file_path)
        return file_path_7z_list

    def do_file_split(file_path, split_size=MAX_SPLIT_SIZE):
        """caculate split size 
           example max split size is 1495 file size is 2000
           than the split part num should be int(2000 / 1495 + 0.5) = 2
           so the split size should be 1000 + 1000 but not 1495 + 505
           with the file size increase the upload risk would be increase too
        """
        file_size = os.path.getsize(file_path) / 2 ** 20
        split_part = math.ceil(file_size / split_size)
        new_split_size = math.ceil(file_size / split_part)
        logger.info("file size | {} | split num | {} | split size | {}".format(file_size, split_part, new_split_size))
        file_path_7z_list = file_split_7z(file_path, split_size=new_split_size)
        return file_path_7z_list


来源:https://stackoverflow.com/questions/52193680/split-a-zip-archive-into-multiple-chunks

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!