import pandas as pd
from openpyxl import load_workbook
import os
import openpyxl
from openpyxl.drawing.image import Image
import sys
from openpyxl.styles import Font, Border, Side
from openpyxl.styles import Alignment
from openpyxl.styles import PatternFill
import random
import glob
import natsort
SHEET_NAMES = ['BK11', 'P', 'H', 'W', 'T', 'HL', 'J', 'Z', 'C', 'R', 'DB', 'LV']
BK_MAP = {'B0B2ZTZZ3F': 'BK11', 'B0C32WC8S2': 'W', 'B0BRPMS42P': 'H', 'B0C8MCJX7M': 'J',
'B0C32W5HR7': 'HL', 'B0C8MJD479': 'Z', 'B0BJ6W8XGL': 'P', 'B0C32YF1RK': 'T', 'B0CNK95SS7': 'C'}
# 取出文件夹中的csv文件,按数字顺序排序,并给出开始的时间。
# 遍历解析csv文件,得到每个工作表中的关键数值。
# 为了方便Excel表格的数据写入,那就按工作表来存放数值。
# 每个工作表,如BK11中,对每个值,存放:时间,流量(会话次数),销量(已订购商品数量)
class ParseCsvs2Excel():
def __init__(self, csv_folder='./csv_folder', pattern=True) -> None:
self._get_input()
self.csv_folder = csv_folder
self.bk11_s = []
self.p_s = []
self.h_s = []
self.w_s = []
self.t_s = []
self.hl_s = []
self.j_s = []
self.z_s = []
self.c_s = []
self.r_s = []
self.db_s = []
self.pattern = pattern
self._set_color()
# 获取当前工作目录
current_dir = os.getcwd()
# 使用glob模块搜索当前目录下的所有.xlsx文件
xlsx_files = glob.glob(os.path.join(current_dir, '*.xlsx'))
# 打印找到的文件, 一般只有1个,先取1个吧
if len(xlsx_files) > 1:
print("存在多个xlsx格式的文件, 请小芳芳保留1个, 不然跑不通")
sys.exit(-1)
self.xlsx_path = xlsx_files[0]
self.workbook = openpyxl.load_workbook(self.xlsx_path)
# 在当前文件夹中创建new_xlsw文件夹,用于存放新生成的xlsw表格数据。
self.save_xlsw_dir = os.path.join(current_dir, 'new_xlsw')
os.makedirs(self.save_xlsw_dir, exist_ok=True)
def _set_color(self):
# 定义“红橙黄绿青蓝紫”对应的RGB颜色值
colors = {
'红': "E60000", # 暗红色,降低了亮度
'橙': "FFA500", # 橙色保持不变,因为它通常已经比较柔和
'黄': "FFFF99", # 淡黄色,提高了亮度但降低了饱和度
'绿': "00B300", # 深绿色,降低了亮度
'青': "00E6E6", # 淡青色,提高了亮度但降低了饱和度
'蓝': "0066FF", # 深蓝色,降低了亮度
'紫': "9966CC", # 淡紫色,提高了亮度并稍微改变了色调
}
# 随机选择一个颜色
random_color = random.choice(list(colors.values()))
# 创建填充样式
self.fill = PatternFill(start_color=random_color, end_color=random_color, fill_type="solid")
# 创建边框样式
self.border = Border(left=Side(border_style='thin', color='000000'),
right=Side(border_style='thin', color='000000'),
top=Side(border_style='thin', color='000000'),
bottom=Side(border_style='thin', color='000000'))
# 1先解析csv的指定内容到BK11中去 2解析更多内容,解析到子表格 3解析多个csv
def main(self):# 20240305
files = os.listdir(self.csv_folder)
if len(files) > 10:
print(f'小芳芳: CSV文件超过10个, 现在可以分析了')
# 筛选出.csv文件,并排序好。
csv_s = []
for file in files:
if file.endswith('.csv'):
csv_s.append(file)
# new_csv_s = sorted(csv_s)
# 使用natsort进行排序
new_csv_s = natsort.natsorted(csv_s)
new_csv_s = [new_csv_s[-1]] + new_csv_s[:-1]
for file in new_csv_s:
csv_path = os.path.join(self.csv_folder, file)
# 读取CSV文件
print(f'Read csv path: {csv_path}')
self.parse_csv(csv_path, self.start_date)
self.start_date += 1
self.deal_multi_sheet_excel()
def _get_input(self):
date_input = input("请输入日期(如: 20240306)(格式:YYYYMMDD):\n")
try:
print('输入长度:',len(date_input))
if (len(date_input) >= 8):
# 尝试将输入的日期转换为整数
self.start_date = int(date_input[0:8])
print(f'输入的参数为:{self.start_date}')
else:
print("小芳芳, 输入长度必须为8, 如: 20240306。否则无法运行。")
return
except ValueError:
# 如果转换失败,打印错误信息并退出
print("输入的日期格式不正确,请使用YYYYMMDD的格式输入。")
return
def parse_csv(self, csv_file, date):
df = pd.read_csv(csv_file) # df: DataFrame
print('df index:', df.index)
print(df.iloc[0,1])
asin_lines = df['(子)ASIN']
session_count = df['会话次数 – 总计']
ordered_count = df['已订购商品数量']
for i, asin in enumerate(asin_lines):
if asin in BK_MAP:
bk_name = BK_MAP[asin]
bk = []
bk.append(date)
session = str(session_count.iloc[i])
print(f'session: {session}')
# 原本是用于将表格中的去除。这里加个判断。
if ',' in session:
session = session.replace(',', '')
bk.append(int(session))
bk.append(int(ordered_count.iloc[i]))
print(f'{bk_name}, value: {bk}')
if bk_name == 'BK11': #
self.bk11_s.append(bk)
elif bk_name == 'P':
self.p_s.append(bk)
elif bk_name == 'H':
self.h_s.append(bk)
elif bk_name == 'W':
self.w_s.append(bk)
elif bk_name == 'T':
self.t_s.append(bk)
elif bk_name == 'HL':
self.hl_s.append(bk)
elif bk_name == 'J':
self.j_s.append(bk)
elif bk_name == 'Z':
self.z_s.append(bk)
elif bk_name == 'C':
self.c_s.append(bk)
elif bk_name == 'R':
self.r_s.append(bk)
elif bk_name == 'DB':
self.db_s.append(bk)
def deal_multi_sheet_excel(self):
# self.workbook = openpyxl.load_workbook(self.xlsx_path)
# 遍历所有工作表
last_time = ''
for sheet_name in self.workbook.sheetnames:
sheet = self.workbook[sheet_name]
print(f"工作表: {sheet_name}")
if sheet_name in SHEET_NAMES: #不处理“关键词”,“竞品分析表”等表格。
cell2_time_index, cell2_sales_index, cell3_flow_index, cell3_convert_index, cell2_remark_index = self.find_target_index(sheet)
# 存储了如BK11多天的数据,[[时间,流量,销量], []]
sheet_infos = self.get_sheet_infos(sheet_name)
for sheet_info in sheet_infos:
sheet_time = str(sheet_info[0]) # 20240314
last_time = sheet_time
sheet_year = int(sheet_time[0:4])
sheet_month = int(sheet_time[4:6])
sheet_day = int(sheet_time[6:8])
sheet_flow = sheet_info[1]
sheet_sale = sheet_info[2]
# 遍历工作表的行(从第四行开始,即数据行)
for index, row in enumerate(sheet.iter_rows(min_row=4, values_only=False)): # 使用values_only=False以获取单元格对象
time_cell = row[cell2_time_index] # 时间所在索引
cell_time = time_cell.value
if cell_time is None: break # 空的值,就不继续遍历了
year, month, day = cell_time.year, cell_time.month, cell_time.day
# 读取所在列的格式,仅读取一次。字体,线条,对齐
if index == 0:
flow_font = row[cell3_flow_index].font
sales_font = row[cell2_sales_index].font
convert_font = row[cell3_convert_index].font
target_border = self.get_cell_border(row[cell3_flow_index]) #
font_alignment = row[cell3_flow_index].alignment
sales_alignment = row[cell2_sales_index].alignment
convert_alignment = row[cell3_convert_index].alignment
# 按照时间,找到所在行,然后修改其中的“流量,销量,转化率”的值。
if year==sheet_year and month==sheet_month and day==sheet_day: # 检查时间是否匹配
if self.pattern: self.fill_color(sheet, index+4, cell2_remark_index+2) # 给所在行上色,要额外加4行
flow_cell = row[cell3_flow_index]
sales_cell = row[cell2_sales_index]
convert_cell = row[cell3_convert_index]
print(f"时间:{year}-{month}-{day}, 修改前--流量:{flow_cell.value}, 销量:{sales_cell.value}, 转化率:{convert_cell.value}")
# 修改流量和销量的值
flow_cell.value = sheet_flow
sales_cell.value = sheet_sale
convert_cell.value = round(sales_cell.value/flow_cell.value, 4)
print(f"修改后-流量:{flow_cell.value}, 销量:{sales_cell.value}, 转化率:{convert_cell.value}")
# 格式刷
flow_cell.font = Font(name=flow_font.name, size=flow_font.size, bold=flow_font.bold, italic=flow_font.italic, color=flow_font.color)
sales_cell.font = Font(name=sales_font.name, size=sales_font.size, bold=sales_font.bold, italic=sales_font.italic, color=sales_font.color)
convert_cell.font = Font(name=convert_font.name, size=convert_font.size, bold=convert_font.bold, italic=convert_font.italic, color=convert_font.color)
flow_cell.border = target_border
sales_cell.border = target_border
convert_cell.border = target_border
flow_cell.alignment = Alignment(horizontal=font_alignment.horizontal,
vertical=font_alignment.vertical,
text_rotation=font_alignment.text_rotation,
wrap_text=font_alignment.wrap_text,
shrink_to_fit=font_alignment.shrink_to_fit,
indent=font_alignment.indent)
sales_cell.alignment = Alignment(horizontal=sales_alignment.horizontal,
vertical=sales_alignment.vertical,
text_rotation=sales_alignment.text_rotation,
wrap_text=sales_alignment.wrap_text,
shrink_to_fit=sales_alignment.shrink_to_fit,
indent=sales_alignment.indent)
convert_cell.alignment = Alignment(horizontal=convert_alignment.horizontal,
vertical=convert_alignment.vertical,
text_rotation=convert_alignment.text_rotation,
wrap_text=convert_alignment.wrap_text,
shrink_to_fit=convert_alignment.shrink_to_fit,
indent=convert_alignment.indent)
# else:
# print(f"{sheet_name}工作表, 单元格时间:{year}-{month}-{day}, 未找到所有需要的列。")
# 保存修改后的工作簿
file_name = os.path.basename(self.xlsx_path)
save_path = os.path.join(self.save_xlsw_dir, file_name)
save_path = f'{save_path[:-5]}_{last_time}.xlsx'
self.workbook.save(save_path)
print(f'小芳芳, 新的Excel文件名及保存地址: {save_path}')
def fill_color(self, sheet, row, last_col):
# # 定义要填充的颜色
# fill_color = "FFFF00" # 黄色,这里是RGB的十六进制表示,但没有alpha通道,所以实际上是BGR顺序
# fill = PatternFill(start_color=fill_color, end_color=fill_color, fill_type="solid")
# 给第5行的前20列上颜色
for col in range(1, last_col): # 在Excel中列是从1开始的,所以范围是1到20
cell = sheet.cell(row, column=col)
cell.fill = self.fill
cell.border = self.border # 应用边框样式
def get_cell_border(self, source_cell):
source_border = source_cell.border
# 复制边框可能会稍微复杂一些,因为您需要复制每条边的属性
top = Side(style=source_border.top.style, color=source_border.top.color)
right = Side(style=source_border.right.style, color=source_border.right.color)
bottom = Side(style=source_border.bottom.style, color=source_border.bottom.color)
left = Side(style=source_border.left.style, color=source_border.left.color)
target_border = Border(top=top, right=right, bottom=bottom, left=left)
return target_border
def find_target_index(self, sheet):
cell2_time_index = -1
cell2_sales_index = -1
cell3_flow_index = -1
cell3_convert_index = -1
cell2_remark_index = -1
for i,cell in enumerate(sheet[2]):
if cell.value == '时间':
print(f'时间列索引: {cell.value}, index: {i}')
cell2_time_index = i
if cell.value == '销量':
print(f'销量列索引: {cell.value}, index: {i}')
cell2_sales_index = i
if cell.value == '备注':
print(f'备注列索引: {cell.value}, index: {i}')
cell2_remark_index = i
for j,cell in enumerate(sheet[3]):
if cell.value == '流量':
print(f'流量列索引: {cell.value}, index: {j}')
cell3_flow_index = j
if cell.value == '转化率':
cell3_convert_index = j
if cell2_time_index<0 or cell2_sales_index<0 or cell3_flow_index<0 or cell3_convert_index<0:
print(f'异常, 有需要的列索引未找到: {cell2_time_index}, {cell2_sales_index}, {cell3_flow_index}, {cell3_convert_index}')
sys.exit(-1)
return cell2_time_index, cell2_sales_index, cell3_flow_index, cell3_convert_index, cell2_remark_index
def get_sheet_infos(self, sheet_name):
sheet_informations = []
if sheet_name == 'BK11': # ['BK11', 'P', 'H', 'W', 'T', 'HL', 'J', 'Z', 'C', 'R', 'DB', 'LV']
sheet_informations = self.bk11_s
elif sheet_name == 'P':
sheet_informations = self.p_s
elif sheet_name == 'H':
sheet_informations = self.h_s
elif sheet_name == 'W':
sheet_informations = self.w_s
elif sheet_name == 'T':
sheet_informations = self.t_s
elif sheet_name == 'HL':
sheet_informations = self.hl_s
elif sheet_name == 'J':
sheet_informations = self.j_s
elif sheet_name == 'Z':
sheet_informations = self.z_s
elif sheet_name == 'C':
sheet_informations = self.c_s
elif sheet_name == 'R':
sheet_informations = self.r_s
elif sheet_name == 'DB':
sheet_informations = self.db_s
return sheet_informations
if __name__ == '__main__':
parseCsvs = ParseCsvs2Excel(pattern=False)
parseCsvs.main()
抓取CSV文件指定内容到Excel表格中
于 2024-04-11 13:32:54 首次发布