M3U8 文件下载 | 易学教程

最近在线看一个教育视频,发现经常卡顿,缓冲太慢了,低分辨率也看不清, 然后想趴一下看看,果然可以下载, 网上也有些参考,也有工具,但是每个网站都是不一样的,最好还是需要自己整, 提出来给大家参考:
import os
import urllib.request, urllib.error
from Crypto.Cipher import AES
# pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple pycryptodome

# 最大下载数限制
DOWNLOAD_FILE_NUM_MAX = 0

def download_data(url,headers):
    try:
        opener = urllib.request.build_opener()
        opener.addheaders = headers
        urllib.request.install_opener(opener)
        urlData = urllib.request.urlopen(url=url)
        page_source = urlData.read().decode('utf-8')
        return page_source
    except Exception as err:
        print(f'error download_data({url})\n', err)
        return -1

def download_stream(url,headers):
    try:
        opener = urllib.request.build_opener()
        opener.addheaders = headers
        urllib.request.install_opener(opener)
        urlData = urllib.request.urlopen(url=url)
        page_source = urlData.read()
        return page_source
    except Exception as err:
        print(f'error download_stream({url})\n', err)
        return -1

def download_file(url,headers, file_path):
    file_remove(file_path);
    try:
        opener = urllib.request.build_opener()
        opener.addheaders = headers
        urllib.request.install_opener(opener)
        urllib.request.urlretrieve(url, filename=file_path)
        return True
    except urllib.error.URLError as e:
        # hasttr(e, 'code')，判断e 是否有.code属性，因为不确定是不是HTTPError错误，URLError包含HTTPError，但是HTTPError以外的错误是不返回错误码(状态码)的
        if hasattr(e, 'code'):
            print('error=',e.code)  # 打印服务器返回的错误码（状态码），如403，404,501之类的
        elif hasattr(e, 'reason'):
            print('error=',e.reason)  # 打印错误原因


def download_video_m3u8(url_m3u8,url_headers, download_path, videoName):
    # for local file
    if len(videoName)==0:
        videoName = url_m3u8.split('/')[-2]+'.mp4'
    print("videoName=",videoName,' download_path=',download_path)
    path = download_path
    if os.path.exists(path)==False:
        os.makedirs(path)

    # for download m3u8 head
    m3u8_head = download_data(url_m3u8,url_headers)
    # for key
    key_file = ''
    key_data = ''
    m3u8_all_ts = 0
    for key_name in m3u8_head.split('\n'):
        # #EXT-X-KEY:METHOD=AES-128,URI="77-"
        if '#EXT-X-KEY' in key_name:
            method_pos = key_name.find("METHOD")
            comma_pos = key_name.find(",")
            method = key_name[method_pos:comma_pos].split('=')[1]
            print("Decode Method：", method)
            uri_pos = key_name.find("URI")
            quotation_mark_pos = key_name.rfind('"')
            key_name = key_name[uri_pos:quotation_mark_pos].split('"')[1]
            key_url = key_name
            key_file = os.path.join(path, f'{method}.key')
            if not key_name.startswith('http'):
                # 判断字符串是否以'http'开头，如果不是则说明url链接不完整，需要拼接
                # 拼接ts流视频的url
                key_url = url_m3u8.replace(url_m3u8.split('/')[-1], key_name)
            print("key_url：", key_url)
            download_file(key_url,url_headers, key_file)

        if '.ts' in key_name:
            m3u8_all_ts+=1

    print('m3u8_all_ts = ', m3u8_all_ts)
    key_data = file_read(key_file,'rb')
    print('key_data len = ', len(key_data))
    print('@start download ts')
    tempName_video = os.path.join(path, 'download.ts')  # f'{}' 相当于'{}'.format() 或 '%s'%videoName
    count_num = 0
    for line in m3u8_head.split('\n'):
        url_ts = line
        tempName_ts = os.path.join(path, f'{count_num}.ts')  # f'{}' 相当于'{}'.format()
        if not '.ts' in url_ts:
            continue
        else:
            if not url_ts.startswith('http'):
                # 判断字符串是否以'http'开头，如果不是则说明url链接不完整，需要拼接
                # 拼接ts流视频的url
                url_ts = url_m3u8.replace(url_m3u8.split('/')[-1], url_ts)
        print('url_ts=',url_ts)
        ts_data = download_stream(url_ts,url_headers)  # 下载视频流

        if len(key_data)>0:# 需要解密
            file_save(decode_aes(ts_data,key_data),tempName_ts,'wb+')
        else:
            file_save(ts_data, tempName_ts, 'wb+')
        print('file_save = ', tempName_ts,', tempName_video = ', tempName_video)
        print('percent = ', f'{count_num/m3u8_all_ts*100}%')

        if count_num == 0:
            file_remove(tempName_video)
            file_rename(tempName_ts, tempName_video)
            count_num += 1
            continue

        if file_merge_bf(tempName_video,tempName_ts) == True:
            file_remove(tempName_ts)
            if DOWNLOAD_FILE_NUM_MAX!=0 and count_num == DOWNLOAD_FILE_NUM_MAX:
                break
            count_num += 1
            continue
        print(f'Wrong, copy {count_num}.ts-->{videoName}.ts failure')
        return False
    #os.system(f'del {path}/*.ts')  # 调用windows命令行（即cmd）工具，运行命令
    filename = os.path.join(path, f'{videoName}')
    file_rename(tempName_video, filename)
    print(f'{videoName}.mp4 finish down!')

def file_save(file_data,new_file,op_mode):
    if os.path.exists(new_file)==True:
        os.remove(new_file)
        return
    with open(new_file, op_mode) as f:
        f.write(file_data)

def file_read(file_path,op_mode):
    if os.path.exists(file_path)==False:
        return ''
    with open(file_path, op_mode) as f:
        return f.read()

def file_read_utf8(file_path,op_mode):
    if os.path.exists(file_path)==False:
        return ''
    with open(file_path, op_mode,encoding='utf-8') as f:
        return f.read()

def file_remove(file_path):
    if os.path.exists(file_path)==True:
        os.remove(file_path)
        return

def file_rename(old,new):
    if os.path.exists(new)==True:
        os.remove(new)

    os.rename(old,new)

def file_merge_bf(main_file,add_file):
    if os.path.exists(main_file) == False or os.path.exists(add_file) == False:
        print('error no file ', main_file, add_file)
        return False

    with open(main_file,'ab+') as  main:
        with open(add_file, 'rb') as  add:
            main.write(add.read())
            return True

    return False

def decode_file_with_key(m3u8_file_path,new_file,key_file_path):
    if os.path.exists(m3u8_file_path)==False or os.path.exists(key_file_path)==False:
        print('error no file ',m3u8_file_path,key_file_path)
        return

    key_fo = open(key_file_path, mode='rb')
    key_data = key_fo.read()
    print('key len=',len(key_data))

    with open(m3u8_file_path, 'rb') as f:
        m3u8_data = f.read()
        m3u8_data_decode = decode_aes(m3u8_data,key_data)
        if os.path.exists(new_file) == True:
            os.remove(new_file)
        with open(new_file, 'ab+') as f:
            f.write(m3u8_data_decode)

    print('ok ,new m3u8 file is ', new_file)

def decode_aes(data, key):
    """AES解密
    :param key:  密钥（16.32）一般16的倍数
    :param data:  要解密的数据
    :return:  处理好的数据
    """
    cryptor = AES.new(key,AES.MODE_CBC,key)
    plain_text = cryptor.decrypt(data)
    return plain_text.rstrip(b'\0')

def main_config():
    url_headers = []
    urls = []
    download_dir = ''
    tag = ''
    config_data = file_read_utf8('M3U8Config','r')
    for item in config_data.split('\n'):
        if item.startswith('#'):
            continue
        if item == 'Header:' or item == 'Urls:' or item == 'DowloadDir:':
            tag = item
        elif tag == 'Header:':
            list = item.split('==')
            url_headers.append(list)
        elif tag == 'Urls:':
            list = item.split('=')
            urls.append(list)
        elif tag == 'DowloadDir:':
            download_dir = item

    print('urls = ',urls)
    print('url_headers = ',url_headers)
    for url in urls:
        download_video_m3u8(url[1], url_headers, download_dir, url[0])

if __name__ == '__main__':
    main_config()
来源：oschina
链接：https://my.oschina.net/lsfx/blog/3270045
标签
OpENer
def