# 1. How to import pandas and check the version? import pandas as pd print(pd.__version__) print(pd.show_versions(as_json=True))
0.23.4 {'system': {'commit': None, 'python': '3.7.0.final.0', 'python-bits': 64, 'OS': 'Windows', 'OS-release': '10', 'machine': 'AMD64', 'processor': 'Intel64 Family 6 Model 142 Stepping 10, GenuineIntel', 'byteorder': 'little', 'LC_ALL': 'None', 'LANG': 'None', 'LOCALE': 'None.None'}, 'dependencies': {'pandas': '0.23.4', 'pytest': '3.8.0', 'pip': '19.2.1', 'setuptools': '40.2.0', 'Cython': '0.28.5', 'numpy': '1.17.2', 'scipy': '1.1.0', 'pyarrow': None, 'xarray': None, 'IPython': '6.5.0', 'sphinx': '1.7.9', 'patsy': '0.5.0', 'dateutil': '2.7.3', 'pytz': '2018.5', 'blosc': None, 'bottleneck': '1.2.1', 'tables': '3.4.4', 'numexpr': '2.6.8', 'feather': None, 'matplotlib': '2.2.3', 'openpyxl': '2.5.6', 'xlrd': '1.1.0', 'xlwt': '1.3.0', 'xlsxwriter': '1.1.0', 'lxml': '4.2.5', 'bs4': '4.6.3', 'html5lib': '1.0.1', 'sqlalchemy': '1.2.11', 'pymysql': None, 'psycopg2': None, 'jinja2': '2.10', 's3fs': None, 'fastparquet': None, 'pandas_gbq': None, 'pandas_datareader': None}} None
# 2. How to create a series from a list, numpy array and dict? import numpy as np mylist = list('abcedfghijklmnopqrstuvwxyz') myarr = np.arange(26) mydict = dict(zip(mylist, myarr)) # ser1 = pd.Series(mylist) # ser2 = pd.Series(myarr) ser3 = pd.Series(mydict) print(ser3.head(3))
a 0 b 1 c 2 dtype: int64
# 3. How to convert the index of a series into a column of a dataframe? mylist = list('abcedfghijklmnopqrstuvwxyz') myarr = np.arange(26) mydict = dict(zip(mylist, myarr)) ser = pd.Series(mydict) ser.describe() df = ser.to_frame().reset_index() print(df.head())
index 0 0 a 0 1 b 1 2 c 2 3 e 3 4 d 4
# 4. How to combine many series to form a dataframe? import numpy as np ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz')) ser2 = pd.Series(np.arange(26)) # Solution 1 df = pd.concat([ser1, ser2], axis=1) # Solution 2 # df = pd.DataFrame({'col1':ser1, 'col2': ser2}) print(df.head())
0 1 0 a 0 1 b 1 2 c 2 3 e 3 4 d 4
# 5. How to assign name to the series’ index? # Give a name to the series ser calling it ‘alphabets’. ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz')) ser.name = 'alphabets' ser.head()
0 a 1 b 2 c 3 e 4 d Name: alphabets, dtype: object
# 6. How to get the items of series A not present in series B? # From ser1 remove items present in ser2. ser1 = pd.Series([1, 2, 3, 4, 5]) ser2 = pd.Series([4, 5, 6, 7, 8]) # print(ser1.isin(ser2)) ser1[~ser1.isin(ser2)]
0 1 1 2 2 3 dtype: int64
# 7. How to get the items not common to both series A and series B? # Get all items of ser1 and ser2 not common to both. ser1 = pd.Series([1, 2, 3, 4, 5]) ser2 = pd.Series([4, 5, 6, 7, 8]) ser_u = pd.Series(np.union1d(ser1, ser2)) ser_i = pd.Series(np.intersect1d(ser1, ser2)) ser_u[~ser_u.isin(ser_i)]
0 1 1 2 2 3 5 6 6 7 7 8 dtype: int64
# 8. How to get the minimum, 25th percentile, median, 75th, and max of a numeric series? # Compute the minimum, 25th percentile, median, 75th, and maximum of ser. ser = pd.Series(np.random.normal(10, 5, 25)) np.percentile(ser, q=[0, 25, 50, 75, 100])
array([ 1.6294664 , 6.63669818, 9.88911315, 12.63793738, 19.94314505])
# 9. How to get frequency counts of unique items of a series? # Calculte the frequency counts of each unique value ser. ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30))) ser.value_counts()
c 6 h 6 b 4 f 4 g 4 a 3 e 2 d 1 dtype: int64
# 10. How to keep only top 2 most frequent values as it is and replace everything else as ‘Other’? # From ser, keep the top 2 most frequent items as it is and replace everything else as ‘Other’. np.random.RandomState(100) ser = pd.Series(np.random.randint(1, 5, [12])) print("Top 2 Freq:", ser.value_counts()) ser[~ser.isin(ser.value_counts().index[:2])] = 'Other' ser
Top 2 Freq: 3 5 2 3 4 2 1 2 dtype: int64 0 3 1 2 2 Other 3 2 4 Other 5 3 6 2 7 Other 8 Other 9 3 10 3 11 3 dtype: object
# 11. How to bin a numeric series to 10 groups of equal size? # Bin the series ser into 10 equal deciles and replace the values with the bin name. # Input ser = pd.Series(np.random.random(20)) print(ser.head()) # Solution pd.qcut(ser, q=[0, .10, .20, .3, .4, .5, .6, .7, .8, .9, 1], labels=['1st', '2nd', '3rd', '4th', '5th', '6th', '7th', '8th', '9th', '10th']).head()
0 0.733123 1 0.512086 2 0.325354 3 0.634904 4 0.802665 dtype: float64 0 8th 1 5th 2 3rd 3 7th 4 9th dtype: category Categories (10, object): [1st < 2nd < 3rd < 4th ... 7th < 8th < 9th < 10th]
# 12. How to convert a numpy array to a dataframe of given shape? # Reshape the series ser into a dataframe with 7 rows and 5 columns ser = pd.Series(np.random.randint(1, 10, 35)) pd.DataFrame(ser.values.reshape(7, 5))
0 | 1 | 2 | 3 | 4 | |
---|---|---|---|---|---|
0 | 8 | 7 | 9 | 5 | 5 |
1 | 2 | 4 | 1 | 5 | 9 |
2 | 5 | 1 | 7 | 6 | 3 |
3 | 6 | 2 | 7 | 3 | 5 |
4 | 2 | 6 | 1 | 9 | 5 |
5 | 7 | 8 | 1 | 4 | 5 |
6 | 6 | 2 | 2 | 3 | 2 |
# 13. How to find the positions of numbers that are multiples of 3 from a series? # Find the positions of numbers that are multiples of 3 from ser. ser = pd.Series(np.random.randint(1, 10, 7)) print(ser) np.argwhere(ser % 3 == 0)
0 8 1 9 2 5 3 8 4 6 5 7 6 7 dtype: int32 array([[1], [4]], dtype=int64)
# 14. How to extract items at given positions from a series # From ser, extract the items at positions in list pos. ser = pd.Series(list('abcdefghijklmnopqrstuvwxyz')) pos = [0, 4, 8, 14, 20] # ser[pos] ser.take(pos)
0 a 4 e 8 i 14 o 20 u dtype: object
# 15. How to stack two series vertically and horizontally ? # Stack ser1 and ser2 vertically and horizontally (to form a dataframe). ser1 = pd.Series(range(5)) ser2 = pd.Series(list('abcde')) # Vertical ser1.append(ser2) # Horizontal df = pd.concat([ser1, ser2], axis=1) print(df)
0 1 0 0 a 1 1 b 2 2 c 3 3 d 4 4 e
# 16. How to get the positions of items of series A in another series B? # Get the positions of items of ser2 in ser1 as a list. ser1 = pd.Series([10, 9, 6, 5, 3, 1, 12, 8, 13]) ser2 = pd.Series([1, 3, 10, 13]) # Solution 1 [np.where(i == ser1)[0].tolist()[0] for i in ser2] # Solution 2 [pd.Index(ser1).get_loc(i) for i in ser2]
[5, 4, 0, 8]
# 17. How to compute the mean squared error on a truth and predicted series? # Compute the mean squared error of truth and pred series.
来源:https://www.cnblogs.com/ohou/p/11933928.html