import pandas as pd
t=pd.DataFrame(pd.read_excel('C:\\Users\\ASUS\\Desktop\\lw\\python高级设计test\\数据文件\\titanic.xlsx'))
s=t['survived'].value_counts()
print('存活人数为{}\n死亡人数为{}'.format(s[0],s[1]))

s=t['sex'].value_counts()
print('male人数为{}\nfemale人数为{}'.format(s[0],s[1]))

a = 0
b = 0
for i in t.index:
if t['alive'][i] == 'yes':
if t['sex'][i] == 'male':
a += 1
elif t['sex'][i] == 'female':
b += 1
print("男的获救人数为{}\n女的获救人数为{}".format(a, b))

print(t['class'].value_counts())

t = pd.DataFrame(pd.read_excel(file_path)) a = t[['survived', 'pclass']] print(a.corr())

print(t.boxplot(['fare'], ['pclass']))

从图中可看出,船舱等级为1时票价范围较大,船舱等级为2,3时票价范围相对最小