微博爬虫及舆情分析-1.爬取微博数据
本文以“Mate30”为关键字进行分析 import requests as re import time #搜索关键字Mate30并获取url target_url = "https://m.weibo.cn/api/container/getIndex?containerid=100103type%3D1%26q%3DMate30&page_type=searchall&page=" # 爬取的微博总页数 total_page = 400 #存放微博与用户数据 mblog = list ( ) user = list ( ) # 根据是否为长文本采集不同的微博文本数据 long_text_count = 0 for page in range ( total_page ) : print ( 'Crawling page:%d/%d' % ( page + 1 , total_page ) ) cur_url = target_url + str ( page + 1 ) source_json = re . get ( cur_url ) . json ( ) time . sleep ( 1 ) source_data = source_json [ 'data' ] cards = source_data [ 'cards' ] if 'cards' in source