I have some *.xls(excel 2003) files, and I want to convert those files into xlsx(excel 2007).
I use the uno python package, when I save the documents, I can set the
I tried @Jhon Anderson's solution, works well but got an "year is out of range" error when there are cells of time format like HH:mm:ss without date. There for I improved the algorithm again:
def xls_to_xlsx(*args, **kw):
"""
open and convert an XLS file to openpyxl.workbook.Workbook
----------
@param args: args for xlrd.open_workbook
@param kw: kwargs for xlrd.open_workbook
@return: openpyxl.workbook.Workbook对象
"""
book_xls = xlrd.open_workbook(*args, formatting_info=True, ragged_rows=True, **kw)
book_xlsx = openpyxl.workbook.Workbook()
sheet_names = book_xls.sheet_names()
for sheet_index in range(len(sheet_names)):
sheet_xls = book_xls.sheet_by_name(sheet_names[sheet_index])
if sheet_index == 0:
sheet_xlsx = book_xlsx.active
sheet_xlsx.title = sheet_names[sheet_index]
else:
sheet_xlsx = book_xlsx.create_sheet(title=sheet_names[sheet_index])
for crange in sheet_xls.merged_cells:
rlo, rhi, clo, chi = crange
sheet_xlsx.merge_cells(start_row=rlo + 1, end_row=rhi,
start_column=clo + 1, end_column=chi,)
def _get_xlrd_cell_value(cell):
value = cell.value
if cell.ctype == xlrd.XL_CELL_DATE:
datetime_tup = xlrd.xldate_as_tuple(value,0)
if datetime_tup[0:3] == (0, 0, 0): # time format without date
value = datetime.time(*datetime_tup[3:])
else:
value = datetime.datetime(*datetime_tup)
return value
for row in range(sheet_xls.nrows):
sheet_xlsx.append((
_get_xlrd_cell_value(cell)
for cell in sheet_xls.row_slice(row, end_colx=sheet_xls.row_len(row))
))
return book_xlsx
Then work perfect!