XLRD/Python: Reading Excel file into dict with for-loops

喜夏-厌秋 提交于 2019-11-28 17:55:35

The idea is to, first, read the header into the list. Then, iterate over the sheet rows (starting from the next after the header), create new dictionary based on header keys and appropriate cell values and append it to the list of dictionaries:

from xlrd import open_workbook

book = open_workbook('forum.xlsx')
sheet = book.sheet_by_index(3)

# read header values into the list    
keys = [sheet.cell(0, col_index).value for col_index in xrange(sheet.ncols)]

dict_list = []
for row_index in xrange(1, sheet.nrows):
    d = {keys[col_index]: sheet.cell(row_index, col_index).value 
         for col_index in xrange(sheet.ncols)}
    dict_list.append(d)

print dict_list

For a sheet containing:

A   B   C   D
1   2   3   4
5   6   7   8

it prints:

[{'A': 1.0, 'C': 3.0, 'B': 2.0, 'D': 4.0}, 
 {'A': 5.0, 'C': 7.0, 'B': 6.0, 'D': 8.0}]

UPD (expanding the dictionary comprehension):

d = {}
for col_index in xrange(sheet.ncols):
    d[keys[col_index]] = sheet.cell(row_index, col_index).value 
from xlrd import open_workbook

dict_list = []
book = open_workbook('forum.xlsx')
sheet = book.sheet_by_index(3)

# read first row for keys  
keys = sheet.row_values(0)

# read the rest rows for values
values = [sheet.row_values(i) for i in range(1, sheet.nrows)]

for value in values:
    dict_list.append(dict(zip(keys, value)))

print dict_list

Try this one. This function below will return generator contains dict of each row and column.

from xlrd import open_workbook

for row in parse_xlsx():
    print row # {id: 4, thread_id: 100, forum_id: 3, post_time: 1377000566, votes: 1, post_text: 'here is some text'}

def parse_xlsx():
    workbook = open_workbook('excelsheet.xlsx')
    sheets = workbook.sheet_names()
    active_sheet = workbook.sheet_by_name(sheets[0])
    num_rows = active_sheet.nrows
    num_cols = active_sheet.ncols
    header = [active_sheet.cell_value(0, cell).lower() for cell in range(num_cols)]
    for row_idx in xrange(1, num_rows):
        row_cell = [active_sheet.cell_value(row_idx, col_idx) for col_idx in range(num_cols)]
        yield dict(zip(header, row_cell))

This script allow you to transform a excel data to list of dictionnary

import xlrd

workbook = xlrd.open_workbook('forum.xls')
workbook = xlrd.open_workbook('forum.xls', on_demand = True)
worksheet = workbook.sheet_by_index(0)
first_row = [] # The row where we stock the name of the column
for col in range(worksheet.ncols):
    first_row.append( worksheet.cell_value(0,col) )
# tronsform the workbook to a list of dictionnary
data =[]
for row in range(1, worksheet.nrows):
    elm = {}
    for col in range(worksheet.ncols):
        elm[first_row[col]]=worksheet.cell_value(row,col)
    data.append(elm)
print data

Try to first set up your keys by parsing just the first line, all columns, another function to parse the data, then call them in order.

all_fields_list = []
header_dict = {}
def parse_data_headers(sheet):
   global header_dict
   for c in range(sheet.ncols):
       key = sheet.cell(1, c) #here 1 is the row number where your header is
       header_dict[c] = key   #store it somewhere, here I have chosen to store in a dict
def parse_data(sheet):
   for r in range(2, sheet.nrows):
       row_dict = {}
       for c in range(sheet.ncols):
           value = sheet.cell(r,c)
           row_dict[c] = value
       all_fields_list.append(row_dict)

This answer helped me out a lot! I was fiddling with a way to do this for about two hours. Then I found this elegant and short answer. Thanks!

I needed some way to convert xls to json using keys.

So I adapted the script above with a json print statement like so:

from xlrd import open_workbook
import simplejson as json
#http://stackoverflow.com/questions/23568409/xlrd-python-reading-excel-file-into-dict-with-for-loops?lq=1

book = open_workbook('makelijk-bomen-herkennen-schors.xls')
sheet = book.sheet_by_index(0)

# read header values into the list
keys = [sheet.cell(0, col_index).value for col_index in xrange(sheet.ncols)]
print "keys are", keys

dict_list = []
for row_index in xrange(1, sheet.nrows):
    d = {keys[col_index]: sheet.cell(row_index, col_index).value
         for col_index in xrange(sheet.ncols)}
    dict_list.append(d)

#print dict_list
j = json.dumps(dict_list)

# Write to file
with open('data.json', 'w') as f:
    f.write(j)
易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!