(二)python爬取猫眼电影TOP100榜并将电影信息写入到Excel(Excel列宽自适应)

人走茶凉 提交于 2020-03-18 19:08:07
# -*- coding:utf-8 -*-
import requests
from bs4 import BeautifulSoup
import xlrd,xlwt

urls = [
    "https://maoyan.com/board/4?offset={}".format(i)
    for i in range(0,100,10)
]

header = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ("
              "KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36"
}
length = {}



def len_byte(value):
    length = len(value)
    utf8_length = len(value.encode('utf-8'))
    length = (utf8_length - length)/2 + length
    return int(length)

#电影 主演 时间 国家 评分
def FilmInformation(url):
    content = []
    r = requests.get(url,headers = header)
    respond = r.text
    soup = BeautifulSoup(respond,"html.parser")
    films = soup.select(".board-item-main")
    for film in films:
        name = (film.select("[title]")[0].text)
        # name = (film.select(".name a")[0].text) 也可以 为什么是0呢,因为返回的是一个列表
        staring = (film.select(".star")[0].text).strip().split(":")[1]  #中文的冒号
        releasetime = (film.select(".releasetime")[0].text).split(":")[1].split("(")[0]
        country = (film.select(".releasetime")[0].text).split(":")[1][10:]
        if country:
            country = country.replace("(","").replace(")","") #lstrip() rstip()等也可以实现
        else:
            country = "(暂无)"
        integer = (film.select(".integer")[0].text)
        fraction = (film.select(".fraction")[0].text)
        score = integer + fraction
        content.append([name,staring,releasetime,country,score])
    return content

def WriteExcel(data):
    global length
    title = ["电影","主演","时间","国家","评分"]
    workbook = xlwt.Workbook(encoding = "utf-8")
    sheet = workbook.add_sheet("猫眼前100")
    row = 1
    for i in range(len(title)):
        sheet.write(0,i,title[i])
    for num in data:
        for num_num in num:
            #for num_num_num in num_num:
            for col in range(len(num_num)):
                sheet.write(row,col,num_num[col])
                if col in length:
                    if length[col] < len(num_num[col]):
                        length[col] = len(num_num[col])
                else:
                    length.setdefault(col, len(num_num[col]))
            row +=1
    for key,value in length.items():
        sheet.col(key).width = int(256*value*2)

    workbook.save("maoyan.xls")



def main():
    all = []
    for url in urls:
        result = FilmInformation(url)
        all.append(result)
    WriteExcel(all)

if __name__ =="__main__":
    main()
    print (length)

 

标签
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!