import xlrd
def get_data_by_sheet_name(file_path, sheet_name):
tecs_excel_content = xlrd.open_workbook(file_path)
table = tecs_excel_content.sheet_by_name(sheet_name)
merged_cells = get_merged_cells(table.merged_cells)
return get_sheet_data(table, merged_cells)
def parse_cell(cell_contents, cell_type):
if cell_type == xlrd.XL_CELL_NUMBER:
# GH5394 - Excel 'numbers' are always floats
val = int(cell_contents)
if val == cell_contents:
cell_contents = str(val)
else:
cell_contents = str(cell_contents)
elif cell_type == xlrd.XL_CELL_BOOLEAN:
cell_contents = str(bool(cell_contents))
return cell_contents
def get_merged_cells(merged_cells):
all_cells = []
for item in merged_cells:
for i in range(item[0], item[1]):
for j in range(item[2], item[3]):
all_cells.append((i, j))
return all_cells
def get_sheet_data(table, merged_cells):
row_n = table.nrows
sheet_data = []
if row_n < 1:
return sheet_data
col_title = table.row_values(0)
for i in range(1, row_n):
if not table.cell_value(i, 0) and (i, 0) in merged_cells:
for j, col_name in enumerate(col_title):
cell_v = parse_cell(table.cell_value(i, j),
table.cell_type(i, j))
if sheet_data and ((i, j) not in merged_cells or cell_v):
last_value = sheet_data[-1][col_name]
if isinstance(last_value, list):
sheet_data[-1][col_name].append(cell_v)
else:
sheet_data[-1][col_name] = [last_value, cell_v]
else:
row_values = [parse_cell(value, type)
for value, type in
zip(table.row_values(i), table.row_types(i))]
row_data = dict(zip(col_title, row_values))
sheet_data.append(row_data)
return sheet_data