1.获取字符串的去重后列表
2.构造全为0的数组(DataFrame), columns为字符串的列表
3.给全为0的数组赋值
第一步
import pandas as pd
import numpy as np
df = pd.DataFrame({'a': range(7),
'b': range(7, 0, -1),
'c': ['one,two,three',
'one,two',
'two,four',
'two,five,four,six',
'seven,eight,one',
'nine,ten,six,four',
'ten,six,two,seven'],
'd': list('hjklmno')})
# print(df)
print('=' * 40)
print(df['c'])
"""
0 one,two,three
1 one,two
2 two,four
3 two,five,four,six
4 seven,eight,one
5 nine,ten,six,four
6 ten,six,two,seven
Name: c, dtype: object
"""
a = df['c'].str.split(',')
print(a)
"""
0 [one, two, three]
1 [one, two]
2 [two, four]
3 [two, five, four, six]
4 [seven, eight, one]
5 [nine, ten, six, four]
6 [ten, six, two, seven]
Name: c, dtype: object
"""
print('=' * 50)
a_lst = df['c'].str.split(',').tolist()
print(a_lst)
# [['one', 'two', 'three'], ['one', 'two'], ['two', 'four'],
# ['two', 'five', 'four', 'six'], ['seven', 'eight', 'one'],
# ['nine', 'ten', 'six', 'four'], ['ten', 'six', 'two', 'seven']]
print('*' * 60)
new_lst = []
for i in a_lst:
for j in i:
if j not in new_lst:
new_lst.append(j)
print(new_lst)
# ['one', 'two', 'three', 'four', 'five',
# 'six', 'seven', 'eight', 'nine', 'ten']
第二步
df_zeros = pd.DataFrame(data=np.zeros((df.shape[0], len(new_lst))), columns=new_lst) print(df_zeros) """ one two three four five six seven eight nine ten 0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 6 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 """
第三步
for i in range(df_zeros.shape[0]):
df_zeros.loc[i, a_lst[i]] = 1
print(df_zeros)
"""
one two three four five six seven eight nine ten
0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 0.0 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0
3 0.0 1.0 0.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0
4 1.0 0.0 0.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0
5 0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 1.0 1.0
6 0.0 1.0 0.0 0.0 0.0 1.0 1.0 0.0 0.0 1.0
"""