数据分析中常用的Python技巧

1. 条件表达式

import math

# 普通写法

import math  def get_log(x):     if x > 0:         y = math.log(x)     else:         y = float('nan')     return y

# 使用条件表达式

x = 5 log_val1 = get_log(x) # 使用条件表达式 log_val2 = math.log(x) if x > 0 else float('nan')  print(log_val1) print(log_val2)

2. 列表推导式

print('找出1000内的偶数(for循环)：') l1 = [] for i in range(1000):     if i % 2 == 0:         l1.append(i) print(l1)  print('找出1000内的偶数(列表推导式)：') l2 = [i for i in range(1000) if i % 2 == 0] print(l2)

# list列表 l = [1, 'a', 2, 'b'] print(type(l)) print('修改前：', l)  # 修改list的内容 l[0] = 3 print('修改后：', l)  # 末尾添加元素 l.append(4) print('添加后：', l)  # 遍历list print('遍历list(for循环)：') for item in l:     print(item)      # 通过索引遍历list print('遍历list(while循环)：') i = 0 while i != len(l):     print(l[i])     i += 1      # 列表合并 print('列表合并(+)：', [1, 2] + [3, 4])  # 列表重复 print('列表重复(*)：', [1, 2] * 5)  # 判断元素是否在列表中 print('判断元素存在(in)：', 1 in [1, 2])

# 2 tuple元组 t = (1, 'a', 2, 'b') print(type(t))  #元组的内容不能修改，否则会报错 # t[0] = 3   # 遍历tuple print('遍历list(for循环)：') for item in t:     print(item)      # 通过索引遍历tuple print('遍历tuple(while循环)：') i = 0 while i != len(t):     print(t[i])     i += 1      # 解包 unpack a, b, _, _ = t print('unpack: ', c)  # 确保unpack接收的变量个数和tuple的长度相同，否则报错 # 经常出现在函数返回值的赋值时 # a, b, c = t

# 3 dictiona字典 d = {'小象学院': 'http://www.chinahadoop.cn/',     '百度': 'https://www.baidu.com/',     '阿里巴巴': 'https://www.alibaba.com/',     '腾讯': 'https://www.tencent.com/'}  print('通过key获取value: ', d['小象学院'])  # 遍历key print('遍历key: ') for key in d.keys():     print(key)      # 遍历value print('遍历value: ') for value in d.values():     print(value)      # 遍历item print('遍历item: ') for key, value in d.items():     print(key + ': ' + value)  # format输出格式 print('format输出格式：') for key, value in d.items():     print('{}的网址是{}'.format(key, value))

# 4 set集合 print('创建set:') my_set = {1, 2, 3} print(my_set) my_set = set([1, 2, 3, 2]) print(my_set)  print('添加单个元素:') my_set.add(3) print('添加3', my_set)  my_set.add(4) print('添加4', my_set)  print('添加多个元素：') my_set.update([4, 5, 6]) print(my_set)

4. Counter

初始化

import collections c1 = collections.Counter(['a', 'b', 'c', 'a', 'b', 'b']) c2 = collections.Counter({'a':2, 'b':3, 'c':1}) c3 = collections.Counter(a=2, b=3, c=1)  print(c1) print(c2) print(c3)

更新内容

# 注意这里是做“加法”，不是“替换” c1.update({'a': 4, 'c': -2, 'd': 4}) print(c1)访问内容

print('a=', c1['a']) print('b=', c1['b']) # 对比和dict的区别 print('e=', c1['e'])

element()方法

most_common()方法

c1.most_common(3)

5. defaultdict

# 统计每个字母出现的次数 s = 'chinadoop' # 使用Counter print(collections.Counter(s)) # 使用dict counter = {} for c in s:     if c not in counter:         counter[c] = 1     else:         counter[c] += 1          print(counter.items())  # 使用defaultdict counter2 = collections.defaultdict(int) for c in s:     counter2[c] += 1 print(counter2.items())  # 记录相同元素的列表 colors = [('yellow', 1), ('blue', 2), ('yellow', 3), ('blue', 4), ('red', 1)] d = collections.defaultdict(list) for k, v in colors:     d[k].append(v)  print(d.items())

import math  print('示例1，获取两个列表对应位置上的最小值：') l1 = [1, 3, 5, 7, 9] l2 = [2, 4, 6, 8, 10] mins = map(min, l1, l2) print(mins)  # map()函数操作时，直到访问数据时才会执行 for item in mins:     print(item)  print('示例2，对列表中的元素进行平方根操作：') squared = map(math.sqrt, l2) print(squared) print(list(squared))

# my_func = lambda a, b, c: a * b # print(my_func) # print(my_func(1, 2, 3))  # 结合map print('lambda结合map') l1 = [1, 3, 5, 7, 9] l2 = [2, 4, 6, 8, 10] result = map(lambda x, y: x * 2 + y, l1, l2) print(list(result))

点击打开链接

import csv with open('grades.csv') as csvfile:     grades_data = list(csv.DictReader(csvfile))      print('记录个数：', len(grades_data)) print('前2条记录：', grades_data[:2]) print('列名：', list(grades_data[0].keys()))

assign1_sub_month = set(row['assignment1_submission'][:7] for row in grades_data) print(assign1_sub_month)

文章来源: 数据分析中常用的Python技巧

标签

数据分析

python