Python pandas

匿名 (未验证) 提交于 2019-12-02 22:54:36

1.得到指定行的索引值。

In [7]:   dframe = pd.DataFrame({"A":list("abcde"), "B":list("fghij")}, index=[10,11,12,13,14]) #dframe """ Out[7]:     A   B 10  a   f 11  b   g 12  c   h 13  d   i 14  e   j """ # 得到指定行的索引值 dframe.index[2] #output: 12 #删除指定行 dframe.drop(dframe.index[2]) """ Out[99]:     A  B 10  a  f 11  b  g 13  d  i 14  e  j """

2.一些作业记录

Question 1:

import pandas as pd import string import re energy = pd.read_excel(Energy Indicators.xls, usecols=[2,3,4,5], skiprows=16, skipfooter=38, na_values=[...]) energy1 = energy.drop([0])  for col in energy1.columns:     if col[:7] == Unnamed:         energy1.rename(columns={col:Country}, inplace=True)     if col[-6:] == capita:         energy1.rename(columns={col:col[:-6] + Capita}, inplace=True)     if col[-10:] == Production:         energy1.rename(columns={col:%  + col[:9]}, inplace=True)  #nergy1.reset_index()gy1.set_index(‘Country‘) energy1 = energy1.set_index(Country) #nergy1 #GDP = pd.read_csv(‘world_bank.csv‘, skiprows=4) #GDP   for row in energy1.index:     if row[:17] == "Republic of Korea":         energy1.rename(index = {row : "South Korea"}, inplace=True)     if row[:24] == "United States of America":         energy1.rename(index = {row : "United States"}, inplace=True)     if row[:14] == "United Kingdom":         energy1.rename(index = {row : "United Kingdom"}, inplace=True)     if row[:16] ==  "China, Hong Kong":         energy1.rename(index = {row : "Hong Kong"}, inplace=True) for row in energy1.index:     energy1.rename(index = {row : re.sub(u"\\(.*?\\)","",row)}, inplace=True) for row in energy1.index:     energy1.rename(index = {row : row.rstrip(string.digits)}, inplace=True) for row in energy1.index:     energy1.rename(index = {row : row.rstrip()}, inplace=True)  #====read_csv============ GDP = pd.read_csv(world_bank.csv, skiprows=4) """ "Korea, Rep.": "South Korea",  "Iran, Islamic Rep.": "Iran", "Hong Kong SAR, China": "Hong Kong" """ for col in GDP.columns:     if col == Country Name:         GDP.rename(columns = {col : "Country"},inplace=True) GDP = GDP.set_index(Country)  for row in GDP.index:     if row[:11]== "Korea, Rep." :                  GDP.rename(index = {row : "South Korea"}, inplace=True)     if row[:18]=="Iran, Islamic Rep.":         GDP.rename(index = {row : "Iran"}, inplace=True)     if row[:9] == Hong kong:         GDP.rename(index = {row : "Hong Kong"}, inplace=True) #===========read_excel====== ScimEn = pd.read_excel(scimagojr-3.xlsx) ScimEn = ScimEn.set_index(Country) #===========merge========== df_merged = pd.merge(energy1, GDP, how=inner, left_index=True, right_index =True) df_merged = pd.merge(ScimEn, df_merged, how = inner, left_index=True, right_index=True) df_merged = df_merged.sort([Rank], ascending=True)  df_merged = df_merged[df_merged[Rank] <= 15]

Question 3:

def answer_three():     Top15 = answer_one()     #print(Top15.columns)     cols = [Rank, Documents, Citable documents, Citations, Self-citations,        Citations per document, H index, Energy Supply,        Energy Supply per Capita, % Renewable, Country Code,        Indicator Name, Indicator Code]     Top15_new = Top15.drop(cols, axis =1)     #print(Top15_new.columns)     Top15_new[avgGDP] = Top15_new.mean(axis=1)     Top15_new = Top15_new.sort([avgGDP], ascending=False)     #Top15_new = Top15_new[‘avgGDP‘]     result = pd.Series(Top15_new[avgGDP])     return result

Question 4:

def answer_four():     Top15 = answer_one()     result = (Top15.loc[[United Kingdom]][2015] -Top15.loc[[United Kingdom]][2006])/(Top15.loc[[United Kingdom]][2006])     return result

Question 5:

def answer_five():     Top15 = answer_one()     mean = Top15[Energy Supply per Capita].mean()          return mean print(answer_five())

Question 6:

Question 7:

def answer_seven():     Top15 = answer_one()     Top = Top15[Self-citations]     Top2 = Top15[Citations]     Top15[ratio] = Top.div(Top2)     Top15.reset_index()     Top_new = Top15.loc[:,[Country, ratio]]     Top_new.set_index(Country)     return (Top_new.idxmax(), Top_new.max()) answer_seven()

Question 8:

def answer_eight():     Top15 = answer_one()     Top15[populations] = Top15[Energy Supply].div(Top15[Energy Supply per Capita])     Top15 = Top15.sort([populations], ascending=False)     Top15 = Top15.loc[:, [populations]]     print(Top15)     return Top15.index[2] answer_eight()

#出现错误 """ AttributeError: ‘float‘ object has no attribute ‘sqrt‘ """  def answer_nine():     Top15 = answer_one()     Top15[populations] = Top15[Energy Supply].div(Top15[Energy Supply per Capita])     Top15[Docs per Capita] = Top15[Documents].div(Top15[populations])     Top = Top15.loc[:, [Docs per Capita, Energy Supply per Capita]]     print(Top)     corr = Top15[Docs per Capita].corr(Top15[Energy Supply per Capita])     return corr print(answer_nine())

修改后:

def answer_nine():     Top15 = answer_one()     Top15[populations] = Top15[Energy Supply].div(Top15[Energy Supply per Capita])     Top15[Docs per Capita] = Top15[Documents].div(Top15[populations])     Top = Top15.loc[:, [Docs per Capita, Energy Supply per Capita]]     print(Top) #修改后的方法         correlation=Top15[Docs per Capita].astype(float64).corr(Top15[Energy Supply per Capita].astype(float64))     return correlation print(answer_nine())

Question 10:

def answer_ten():     Top15 = answer_one()     Top15 = Top15.sort([% Renewable], ascending = False)     print(Top15)     median = Top15[% Renewable][7]     Top15[HighRenew] = Top15[% Renewable]     Top15[HighRenew] = Top15[HighRenew].apply(lambda x : 1 if x >= median else 0)     Top15 = Top15.sort([Rank], ascending=True)      print(Top15)     return Top15[HighRenew] answer_ten()

Python pandas

原文:https://www.cnblogs.com/Shinered/p/9239476.html

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!