最近在线看一个教育视频,发现经常卡顿,缓冲太慢了,低分辨率也看不清, 然后想趴一下看看,果然可以下载, 网上也有些参考,也有工具,但是每个网站都是不一样的,最好还是需要自己整, 提出来给大家参考:
import os
import urllib.request, urllib.error
from Crypto.Cipher import AES
# pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple pycryptodome
# 最大下载数限制
DOWNLOAD_FILE_NUM_MAX = 0
def download_data(url,headers):
try:
opener = urllib.request.build_opener()
opener.addheaders = headers
urllib.request.install_opener(opener)
urlData = urllib.request.urlopen(url=url)
page_source = urlData.read().decode('utf-8')
return page_source
except Exception as err:
print(f'error download_data({url})\n', err)
return -1
def download_stream(url,headers):
try:
opener = urllib.request.build_opener()
opener.addheaders = headers
urllib.request.install_opener(opener)
urlData = urllib.request.urlopen(url=url)
page_source = urlData.read()
return page_source
except Exception as err:
print(f'error download_stream({url})\n', err)
return -1
def download_file(url,headers, file_path):
file_remove(file_path);
try:
opener = urllib.request.build_opener()
opener.addheaders = headers
urllib.request.install_opener(opener)
urllib.request.urlretrieve(url, filename=file_path)
return True
except urllib.error.URLError as e:
# hasttr(e, 'code'),判断e 是否有.code属性,因为不确定是不是HTTPError错误,URLError包含HTTPError,但是HTTPError以外的错误是不返回错误码(状态码)的
if hasattr(e, 'code'):
print('error=',e.code) # 打印服务器返回的错误码(状态码),如403,404,501之类的
elif hasattr(e, 'reason'):
print('error=',e.reason) # 打印错误原因
def download_video_m3u8(url_m3u8,url_headers, download_path, videoName):
# for local file
if len(videoName)==0:
videoName = url_m3u8.split('/')[-2]+'.mp4'
print("videoName=",videoName,' download_path=',download_path)
path = download_path
if os.path.exists(path)==False:
os.makedirs(path)
# for download m3u8 head
m3u8_head = download_data(url_m3u8,url_headers)
# for key
key_file = ''
key_data = ''
m3u8_all_ts = 0
for key_name in m3u8_head.split('\n'):
# #EXT-X-KEY:METHOD=AES-128,URI="77-"
if '#EXT-X-KEY' in key_name:
method_pos = key_name.find("METHOD")
comma_pos = key_name.find(",")
method = key_name[method_pos:comma_pos].split('=')[1]
print("Decode Method:", method)
uri_pos = key_name.find("URI")
quotation_mark_pos = key_name.rfind('"')
key_name = key_name[uri_pos:quotation_mark_pos].split('"')[1]
key_url = key_name
key_file = os.path.join(path, f'{method}.key')
if not key_name.startswith('http'):
# 判断字符串是否以'http'开头,如果不是则说明url链接不完整,需要拼接
# 拼接ts流视频的url
key_url = url_m3u8.replace(url_m3u8.split('/')[-1], key_name)
print("key_url:", key_url)
download_file(key_url,url_headers, key_file)
if '.ts' in key_name:
m3u8_all_ts+=1
print('m3u8_all_ts = ', m3u8_all_ts)
key_data = file_read(key_file,'rb')
print('key_data len = ', len(key_data))
print('@start download ts')
tempName_video = os.path.join(path, 'download.ts') # f'{}' 相当于'{}'.format() 或 '%s'%videoName
count_num = 0
for line in m3u8_head.split('\n'):
url_ts = line
tempName_ts = os.path.join(path, f'{count_num}.ts') # f'{}' 相当于'{}'.format()
if not '.ts' in url_ts:
continue
else:
if not url_ts.startswith('http'):
# 判断字符串是否以'http'开头,如果不是则说明url链接不完整,需要拼接
# 拼接ts流视频的url
url_ts = url_m3u8.replace(url_m3u8.split('/')[-1], url_ts)
print('url_ts=',url_ts)
ts_data = download_stream(url_ts,url_headers) # 下载视频流
if len(key_data)>0:# 需要解密
file_save(decode_aes(ts_data,key_data),tempName_ts,'wb+')
else:
file_save(ts_data, tempName_ts, 'wb+')
print('file_save = ', tempName_ts,', tempName_video = ', tempName_video)
print('percent = ', f'{count_num/m3u8_all_ts*100}%')
if count_num == 0:
file_remove(tempName_video)
file_rename(tempName_ts, tempName_video)
count_num += 1
continue
if file_merge_bf(tempName_video,tempName_ts) == True:
file_remove(tempName_ts)
if DOWNLOAD_FILE_NUM_MAX!=0 and count_num == DOWNLOAD_FILE_NUM_MAX:
break
count_num += 1
continue
print(f'Wrong, copy {count_num}.ts-->{videoName}.ts failure')
return False
#os.system(f'del {path}/*.ts') # 调用windows命令行(即cmd)工具,运行命令
filename = os.path.join(path, f'{videoName}')
file_rename(tempName_video, filename)
print(f'{videoName}.mp4 finish down!')
def file_save(file_data,new_file,op_mode):
if os.path.exists(new_file)==True:
os.remove(new_file)
return
with open(new_file, op_mode) as f:
f.write(file_data)
def file_read(file_path,op_mode):
if os.path.exists(file_path)==False:
return ''
with open(file_path, op_mode) as f:
return f.read()
def file_read_utf8(file_path,op_mode):
if os.path.exists(file_path)==False:
return ''
with open(file_path, op_mode,encoding='utf-8') as f:
return f.read()
def file_remove(file_path):
if os.path.exists(file_path)==True:
os.remove(file_path)
return
def file_rename(old,new):
if os.path.exists(new)==True:
os.remove(new)
os.rename(old,new)
def file_merge_bf(main_file,add_file):
if os.path.exists(main_file) == False or os.path.exists(add_file) == False:
print('error no file ', main_file, add_file)
return False
with open(main_file,'ab+') as main:
with open(add_file, 'rb') as add:
main.write(add.read())
return True
return False
def decode_file_with_key(m3u8_file_path,new_file,key_file_path):
if os.path.exists(m3u8_file_path)==False or os.path.exists(key_file_path)==False:
print('error no file ',m3u8_file_path,key_file_path)
return
key_fo = open(key_file_path, mode='rb')
key_data = key_fo.read()
print('key len=',len(key_data))
with open(m3u8_file_path, 'rb') as f:
m3u8_data = f.read()
m3u8_data_decode = decode_aes(m3u8_data,key_data)
if os.path.exists(new_file) == True:
os.remove(new_file)
with open(new_file, 'ab+') as f:
f.write(m3u8_data_decode)
print('ok ,new m3u8 file is ', new_file)
def decode_aes(data, key):
"""AES解密
:param key: 密钥(16.32)一般16的倍数
:param data: 要解密的数据
:return: 处理好的数据
"""
cryptor = AES.new(key,AES.MODE_CBC,key)
plain_text = cryptor.decrypt(data)
return plain_text.rstrip(b'\0')
def main_config():
url_headers = []
urls = []
download_dir = ''
tag = ''
config_data = file_read_utf8('M3U8Config','r')
for item in config_data.split('\n'):
if item.startswith('#'):
continue
if item == 'Header:' or item == 'Urls:' or item == 'DowloadDir:':
tag = item
elif tag == 'Header:':
list = item.split('==')
url_headers.append(list)
elif tag == 'Urls:':
list = item.split('=')
urls.append(list)
elif tag == 'DowloadDir:':
download_dir = item
print('urls = ',urls)
print('url_headers = ',url_headers)
for url in urls:
download_video_m3u8(url[1], url_headers, download_dir, url[0])
if __name__ == '__main__':
main_config()
来源:oschina
链接:https://my.oschina.net/lsfx/blog/3270045