Python pandas | 易学教程

1.得到指定行的索引值。

In [7]:   dframe = pd.DataFrame({"A":list("abcde"), "B":list("fghij")}, index=[10,11,12,13,14]) #dframe """ Out[7]:     A   B 10  a   f 11  b   g 12  c   h 13  d   i 14  e   j """ # 得到指定行的索引值 dframe.index[2] #output: 12 #删除指定行 dframe.drop(dframe.index[2]) """ Out[99]:     A  B 10  a  f 11  b  g 13  d  i 14  e  j """

2.一些作业记录

Question 1:

import pandas as pd import string import re energy = pd.read_excel(‘Energy Indicators.xls‘, usecols=[2,3,4,5], skiprows=16, skipfooter=38, na_values=[‘...‘]) energy1 = energy.drop([0])  for col in energy1.columns:     if col[:7] == ‘Unnamed‘:         energy1.rename(columns={col:‘Country‘}, inplace=True)     if col[-6:] == ‘capita‘:         energy1.rename(columns={col:col[:-6] + ‘Capita‘}, inplace=True)     if col[-10:] == ‘Production‘:         energy1.rename(columns={col:‘% ‘ + col[:9]}, inplace=True)  #nergy1.reset_index()gy1.set_index(‘Country‘) energy1 = energy1.set_index(‘Country‘) #nergy1 #GDP = pd.read_csv(‘world_bank.csv‘, skiprows=4) #GDP   for row in energy1.index:     if row[:17] == "Republic of Korea":         energy1.rename(index = {row : "South Korea"}, inplace=True)     if row[:24] == "United States of America":         energy1.rename(index = {row : "United States"}, inplace=True)     if row[:14] == "United Kingdom":         energy1.rename(index = {row : "United Kingdom"}, inplace=True)     if row[:16] ==  "China, Hong Kong":         energy1.rename(index = {row : "Hong Kong"}, inplace=True) for row in energy1.index:     energy1.rename(index = {row : re.sub(u"\\(.*?\\)","",row)}, inplace=True) for row in energy1.index:     energy1.rename(index = {row : row.rstrip(string.digits)}, inplace=True) for row in energy1.index:     energy1.rename(index = {row : row.rstrip()}, inplace=True)  #====read_csv============ GDP = pd.read_csv(‘world_bank.csv‘, skiprows=4) """ "Korea, Rep.": "South Korea",  "Iran, Islamic Rep.": "Iran", "Hong Kong SAR, China": "Hong Kong" """ for col in GDP.columns:     if col == ‘Country Name‘:         GDP.rename(columns = {col : "Country"},inplace=True) GDP = GDP.set_index(‘Country‘)  for row in GDP.index:     if row[:11]== "Korea, Rep." :                  GDP.rename(index = {row : "South Korea"}, inplace=True)     if row[:18]=="Iran, Islamic Rep.":         GDP.rename(index = {row : "Iran"}, inplace=True)     if row[:9] == ‘Hong kong‘:         GDP.rename(index = {row : "Hong Kong"}, inplace=True) #===========read_excel====== ScimEn = pd.read_excel(‘scimagojr-3.xlsx‘) ScimEn = ScimEn.set_index(‘Country‘) #===========merge========== df_merged = pd.merge(energy1, GDP, how=‘inner‘, left_index=True, right_index =True) df_merged = pd.merge(ScimEn, df_merged, how = ‘inner‘, left_index=True, right_index=True) df_merged = df_merged.sort([‘Rank‘], ascending=True)  df_merged = df_merged[df_merged[‘Rank‘] <= 15]

Question 3:

def answer_three():     Top15 = answer_one()     #print(Top15.columns)     cols = [‘Rank‘, ‘Documents‘, ‘Citable documents‘, ‘Citations‘, ‘Self-citations‘,        ‘Citations per document‘, ‘H index‘, ‘Energy Supply‘,        ‘Energy Supply per Capita‘, ‘% Renewable‘, ‘Country Code‘,        ‘Indicator Name‘, ‘Indicator Code‘]     Top15_new = Top15.drop(cols, axis =1)     #print(Top15_new.columns)     Top15_new[‘avgGDP‘] = Top15_new.mean(axis=1)     Top15_new = Top15_new.sort([‘avgGDP‘], ascending=False)     #Top15_new = Top15_new[‘avgGDP‘]     result = pd.Series(Top15_new[‘avgGDP‘])     return result

Question 4:

def answer_four():     Top15 = answer_one()     result = (Top15.loc[[‘United Kingdom‘]][‘2015‘] -Top15.loc[[‘United Kingdom‘]][‘2006‘])/(Top15.loc[[‘United Kingdom‘]][‘2006‘])     return result

Question 5:

def answer_five():     Top15 = answer_one()     mean = Top15[‘Energy Supply per Capita‘].mean()          return mean print(answer_five())

Question 6:

Question 7:

def answer_seven():     Top15 = answer_one()     Top = Top15[‘Self-citations‘]     Top2 = Top15[‘Citations‘]     Top15[‘ratio‘] = Top.div(Top2)     Top15.reset_index()     Top_new = Top15.loc[:,[‘Country‘, ‘ratio‘]]     Top_new.set_index(‘Country‘)     return (Top_new.idxmax(), Top_new.max()) answer_seven()

Question 8:

def answer_eight():     Top15 = answer_one()     Top15[‘populations‘] = Top15[‘Energy Supply‘].div(Top15[‘Energy Supply per Capita‘])     Top15 = Top15.sort([‘populations‘], ascending=False)     Top15 = Top15.loc[:, [‘populations‘]]     print(Top15)     return Top15.index[2] answer_eight()

#出现错误 """ AttributeError: ‘float‘ object has no attribute ‘sqrt‘ """  def answer_nine():     Top15 = answer_one()     Top15[‘populations‘] = Top15[‘Energy Supply‘].div(Top15[‘Energy Supply per Capita‘])     Top15[‘Docs per Capita‘] = Top15[‘Documents‘].div(Top15[‘populations‘])     Top = Top15.loc[:, [‘Docs per Capita‘, ‘Energy Supply per Capita‘]]     print(Top)     corr = Top15[‘Docs per Capita‘].corr(Top15[‘Energy Supply per Capita‘])     return corr print(answer_nine())

修改后：

def answer_nine():     Top15 = answer_one()     Top15[‘populations‘] = Top15[‘Energy Supply‘].div(Top15[‘Energy Supply per Capita‘])     Top15[‘Docs per Capita‘] = Top15[‘Documents‘].div(Top15[‘populations‘])     Top = Top15.loc[:, [‘Docs per Capita‘, ‘Energy Supply per Capita‘]]     print(Top) #修改后的方法         correlation=Top15[‘Docs per Capita‘].astype(‘float64‘).corr(Top15[‘Energy Supply per Capita‘].astype(‘float64‘))     return correlation print(answer_nine())

Question 10:

def answer_ten():     Top15 = answer_one()     Top15 = Top15.sort([‘% Renewable‘], ascending = False)     print(Top15)     median = Top15[‘% Renewable‘][7]     Top15[‘HighRenew‘] = Top15[‘% Renewable‘]     Top15[‘HighRenew‘] = Top15[‘HighRenew‘].apply(lambda x : 1 if x >= median else 0)     Top15 = Top15.sort([‘Rank‘], ascending=True)      print(Top15)     return Top15[‘HighRenew‘] answer_ten()

Python pandas

原文：https://www.cnblogs.com/Shinered/p/9239476.html

标签

pandas

python

energy