import itchat import pandas as pd itchat.auto_login(hotReload=True) friends=itchat.get_friends(update=True) def get_attr(friends,key): return list(map(lambda user:user.get(key),friends)) nickname=get_attr(friends,"NickName") sex=get_attr(friends,"Sex") province=get_attr(friends,"Province") city=get_attr(friends,"City") signature=get_attr(friends,"Signature") data={'nickname':nickname,'sex':sex,'province':province,'city':city,'signature':signature} data=pd.DataFrame(data,columns=['nickname','sex','province','city','signature']) import matplotlib.pyplot as plt import matplotlib as mpl import seaborn as sns from pyecharts import Map sns.set(font='SimHei') mpl.rcParams['font.sans-serif']=['SimHei'] sns.set_style('white') sns.set_context('notebook') data['sex']=data['sex'].apply(lambda x: 'male' if x==1 else ('female' if x==2 else 'unkknown')) plt.figure(figsize=(10,10)) sns.countplot(x=data['sex'],palette = "pastel") plt.show 注意重启服务器,大部分的报错就是服务器失联了!!!!
data['province']=data['province'].apply(lambda x:'unknown' if x=='' else x) pro=data['province'].value_counts() pro_map =Map('中国地图',title_color="#fff",title_pos='center',width=1200,height=600,background_color='#404a59') pro_map.add('',pro.index,pro.values,is_label_show=True,is_visualmap=True,visual_text_color='#000') pro_map.render('China attribute.html') 柱状图 plt.figure(figsize=(20,10)) sns.barplot(x=pro.index,y=pro.values,palette='Set2') sns.set_xticklabels(pro.index,rotation=90) plt.show sc_data=data.query('province=="湖北"') sc_data['city']=sc_data['city'].apply(lambda x:'unknown' if not x else x+'市') sc=sc_data['city'].value_counts() sc_map=Map('湖北',title_color="#fff",title_pos='center',width=1200,height=600,background_color='#404a59') sc_map.add('',sc.index,sc.values,maptype='湖北',is_label_show=True,is_visualmap=True,visual_text_color='#000') sc_map.render('湖北.html') import jieba import re data['signature']=data['signature'].apply(lambda x:re.sub(re.compile(r"<span.*?</span>"),"emoji",x) if '<span' in x else x) signature=''.join(data['signature']) wordlist=jieba.cut(signature,cut_all=False) #stopwords_chinese = [line.strip() for line in open ('stopwords_chinese.txt',encoding='UTF-8').readlines()] stopwords_chinese = [line.strip() for line in open ('C:/Users/baihua/Desktop/ciyun.txt','r',encoding='utf-8').readlines()] word_list=[] for seg in wordlist: if seg not in stopwords_chinese: word_list.append(seg) word_list=pd.DataFrame({'signature':word_list}) word_rank = word_list["signature"].value_counts() from pyecharts import WordCloud wordcloud_chinese=WordCloud(width=1500,height=820) wordcloud_chinese.add("",word_rank.index[0:100],word_rank.values[0:100],word_size_range=[20,200],is_more_utils=True) wordcloud_chinese.render("signature.html") 用python处理文本,处理数据显得特别重要!!!
关于词云核心代码和原理:http://python.jobbole.com/87496/
关于python处理txt文本:https://blog.csdn.net/J__Max/article/details/82810144
文章来源: https://blog.csdn.net/qiu_zhi_liao/article/details/85687596