合并csv表

1.0版本

import faker
from openpyxl import load_workbook
from openpyxl import Workbook
from openpyxl.styles import Alignment
import csv


def csv_to_xlsx():
    # csv = pd.read_csv(r'rawdatapy.csv', encoding='utf-8')
    # csv.to_excel(r'rawdatapy.xlsx', sheet_name='data')
    with open('rawdatapy.csv', 'r', encoding='utf-8') as f:
        read = csv.reader(f)
        wb = Workbook()
        ws = wb.active
        for line in read:
            ws.append(line)
        wb.save('rawdatapy.xlsx')  # 保存Excel
    print("---将原始的csv文件转化为xlsx文件---")


def xlsx_to_csv():
    # df = pd.read_excel("rawdatapy.xlsx")
    # df.to_csv('result.csv')
    ob = csv.writer(open("result.csv", 'w', newline="", encoding='utf-8'))
    data = load_workbook('rawdatapy.xlsx').active
    for r in data.rows:
        row = [a.value for a in r]
        ob.writerow(row)
    print("---成功将xlsx转化为csv文件---")


# a代表城市数量 b代表每个城市的街道数量 c代表的每个城市每个街道的每个数据(假设都是一样的)
def write_data(a=3, b=20, c=300):
    wb = Workbook()
    my_sheet = wb.active
    my_sheet.title = "data"

    my_sheet.cell(1, 1, str("城市"))
    my_sheet.cell(1, 2, str("街道"))
    my_sheet.cell(1, 3, str("地址"))

    # 产生数据时 保证城市名称不同 所有的街道名称互不相同
    city_list = []
    street_list = []

    cnt = 2
    for i in range(0, a):
        f = faker.Faker(locale='zh_CN')
        while city_list.count(f.city_name()) > 0:
            f = faker.Faker(locale='zh_CN')
        city = f.city_name()
        city_list.append(city)
        for j in range(0, b):
            f = faker.Faker(locale='zh_CN')
            while street_list.count(f.street_name()) > 0:
                f = faker.Faker(locale='zh_CN')
            street = f.street_name()
            street_list.append(street)
            for k in range(0, c):
                f = faker.Faker(locale='zh_CN')
                addr = f.address()
                my_sheet.cell(row=cnt, column=1, value=str(city))
                my_sheet.cell(row=cnt, column=2, value=str(street))
                my_sheet.cell(row=cnt, column=3, value=str(addr))
                cnt = cnt + 1
    wb.save("rawdatapy.xlsx")
    print("xlsx格式表格写入数据成功!")
    wb.close()


# 前多少列需要合并
def read_data(num=2):
    wb1 = load_workbook("rawdatapy.xlsx")
    my_sheet = wb1.worksheets[0]
    # 获取行数
    rows = my_sheet.max_row

    need_to_merge = []
    for i in range(1, num+1):
        dict_from = {}
        # 行
        for ii in range(2, rows + 1):
            # 读取数值
            r = ""
            if i == 1:  # 如果是第一列 只拿第一列数据作为判断标准
                r = str(my_sheet.cell(ii, i).value)
            if i == 2:  # 如果是第二列 防止出现不同城市有相同的街道 拿前两列的数据拼起来作为判断标准
                r = str(my_sheet.cell(ii, i-1).value) + str(my_sheet.cell(ii, i).value)
            if i == 3:  # 如果是第三列 前三列数据拼起来合并
                r = str(my_sheet.cell(ii, i-2).value) + str(my_sheet.cell(ii, i-1).value) + str(my_sheet.cell(ii, i).value)
            if not dict_from.get(r):
                dict_from[r] = [i, ii, ii]
            else:
                dict_from[r][2] = dict_from[r][2] + 1
        dict_cross = {1: 'A', 2: 'B', 3: 'C'}  # 仅仅针对三列来合并
        # print(dict_from)
        for key, value in dict_from.items():
            list_value0 = dict_cross[value[0]]
            if not (f'{list_value0}{value[1]}' == f'{list_value0}{value[2]}'):
                # print(f'{list_value0}{value[1]}:{list_value0}{value[2]}')
                # my_sheet.merge_cells(f'{list_value0}{value[1]}:{list_value0}{value[2]}')
                # my_sheet[f'{list_value0}{value[1]}'].alignment = Alignment(horizontal='center', vertical='center')
                # wb1.save("rawdatapy.xlsx")
                temp = [f'{list_value0}{value[1]}', f'{list_value0}{value[2]}']
                need_to_merge.append(temp)

    # 真正开始合并
    for kk in need_to_merge:
        # tt = f'kk[0]:kk[1]'
        my_sheet.merge_cells(f'{kk[0]}:{kk[1]}')
        my_sheet[f'{kk[0]}'].alignment = Alignment(horizontal='center', vertical='center')
        # wb1.save("rawdatapy.xlsx")
    wb1.save("rawdatapy.xlsx")
    wb1.close()
    print("--合并结束finish--")


# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    # write_data()
    # print("get--data")
    # csv_to_xlsx()  # 将输入的csv问价转化为同名的xlxs文件
    read_data()  # 读取数据后进行合并
    # xlsx_to_csv()  # 将合并后的文件转化为csv的后缀


1.1 版本 新思路实现

import faker
from openpyxl import load_workbook
from openpyxl import Workbook
from openpyxl.styles import Alignment
import csv


def csv_to_xlsx():
    # csv = pd.read_csv(r'rawdatapy.csv', encoding='utf-8')
    # csv.to_excel(r'rawdatapy.xlsx', sheet_name='data')
    with open('rawdatapy.csv', 'r', encoding='utf-8') as f:
        read = csv.reader(f)
        wb = Workbook()
        ws = wb.active
        for line in read:
            ws.append(line)
        wb.save('rawdatapy.xlsx')  # 保存Excel
    print("---将原始的csv文件转化为xlsx文件---")


def xlsx_to_csv():
    # df = pd.read_excel("rawdatapy.xlsx")
    # df.to_csv('result.csv')
    ob = csv.writer(open("result.csv", 'w', newline="", encoding='utf-8'))
    data = load_workbook('rawdatapy.xlsx').active
    for r in data.rows:
        row = [a.value for a in r]
        ob.writerow(row)
    print("---成功将xlsx转化为csv文件---")


# a代表城市数量 b代表每个城市的街道数量 c代表的每个城市每个街道的每个数据(假设都是一样的)
def write_data(a=3, b=20, c=300):
    wb = Workbook()
    my_sheet = wb.active
    my_sheet.title = "data"

    my_sheet.cell(1, 1, str("城市"))
    my_sheet.cell(1, 2, str("街道"))
    my_sheet.cell(1, 3, str("地址"))

    # 产生数据时 保证城市名称不同 所有的街道名称互不相同
    city_list = []
    street_list = []

    cnt = 2
    for i in range(0, a):
        f = faker.Faker(locale='zh_CN')
        while city_list.count(f.city_name()) > 0:
            f = faker.Faker(locale='zh_CN')
        city = f.city_name()
        city_list.append(city)
        for j in range(0, b):
            f = faker.Faker(locale='zh_CN')
            while street_list.count(f.street_name()) > 0:
                f = faker.Faker(locale='zh_CN')
            street = f.street_name()
            street_list.append(street)
            for k in range(0, c):
                f = faker.Faker(locale='zh_CN')
                addr = f.address()
                my_sheet.cell(row=cnt, column=1, value=str(city))
                my_sheet.cell(row=cnt, column=2, value=str(street))
                my_sheet.cell(row=cnt, column=3, value=str(addr))
                cnt = cnt + 1
    wb.save("rawdatapy.xlsx")
    print("xlsx格式表格写入数据成功!")
    wb.close()


# 前多少列需要合并
def read_data(num=2):
    wb1 = load_workbook("rawdatapy.xlsx")
    my_sheet = wb1.worksheets[0]
    # 获取行数
    rows = my_sheet.max_row
    # 获取列数
    cols = my_sheet.max_column
    # 创建辅助数组
    help_array = [[0 for i in range(num)] for j in range(rows)]  # 创建一个rows*cols的数组

    # 开始遍历每一列来给辅助数组赋值
    for i in range(1, num + 1):
        cnt = 0
        for j in range(2, rows + 1):  # 遍历每一行(第1行直接掠过)
            k_new = str(my_sheet.cell(j, i).value)
            k_old = str(my_sheet.cell(j - 1, i).value)
            if k_old != k_new:
                cnt = cnt + 1
            help_array[j - 1][i - 1] = cnt

    # 得到辅助数组后 利用辅助数组来合并
    for i in range(0, num):
        dict_from = {}
        # 行
        for ii in range(0, rows):
            # 读取数值
            r = ""
            if i == 0:  # 如果是第一列 只拿第一列数据作为判断标准
                r = str(help_array[ii][i])
            if i == 1:  # 如果是第二列 防止出现不同城市有相同的街道 拿前两列的数据拼起来作为判断标准
                r = str(help_array[ii][i-1]) + str(help_array[ii][i])
            if i == 2:  # 如果是第三列 前三列数据拼起来合并
                r = str(help_array[ii][i-2]) + str(help_array[ii][i-1]) + str(help_array[ii][i])
            if not dict_from.get(r):
                dict_from[r] = [i, ii, ii]
            else:
                dict_from[r][2] = dict_from[r][2] + 1
        dict_cross = {1: 'A', 2: 'B', 3: 'C'}  # 仅仅针对三列来合并
        for key, value in dict_from.items():
            list_value0 = dict_cross[value[0]+1]
            if not (f'{list_value0}{value[1]+1}' == f'{list_value0}{value[2]+1}'):
                my_sheet.merge_cells(f'{list_value0}{value[1]+1}:{list_value0}{value[2]+1}')
                my_sheet[f'{list_value0}{value[1]+1}'].alignment = Alignment(horizontal='center', vertical='center')
                # wb1.save("rawdatapy.xlsx")
    # 合并结束
    wb1.save("rawdatapy.xlsx")
    wb1.close()
    print("--合并结束finish--")


# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    # write_data()
    # print("get--data")
    # csv_to_xlsx()  # 将输入的csv问价转化为同名的xlxs文件
    read_data(3)  # 读取数据后进行合并
    # xlsx_to_csv()  # 将合并后的文件转化为csv的后缀

1.2 最新版本 优化

import faker
from openpyxl import load_workbook
from openpyxl import Workbook
from openpyxl.styles import Alignment
import csv


def csv_to_xlsx():
    # csv = pd.read_csv(r'rawdatapy.csv', encoding='utf-8')
    # csv.to_excel(r'rawdatapy.xlsx', sheet_name='data')
    with open('rawdatapy.csv', 'r', encoding='utf-8') as f:
        read = csv.reader(f)
        wb = Workbook()
        ws = wb.active
        for line in read:
            ws.append(line)
        wb.save('rawdatapy.xlsx')  # 保存Excel
    print("---将原始的csv文件转化为xlsx文件---")


def xlsx_to_csv():
    # df = pd.read_excel("rawdatapy.xlsx")
    # df.to_csv('result.csv')
    ob = csv.writer(open("result.csv", 'w', newline="", encoding='utf-8'))
    data = load_workbook('rawdatapy.xlsx').active
    for r in data.rows:
        row = [a.value for a in r]
        ob.writerow(row)
    print("---成功将xlsx转化为csv文件---")


# a代表城市数量 b代表每个城市的街道数量 c代表的每个城市每个街道的每个数据(假设都是一样的)
def write_data(a=3, b=20, c=300):
    wb = Workbook()
    my_sheet = wb.active
    my_sheet.title = "data"

    my_sheet.cell(1, 1, str("城市"))
    my_sheet.cell(1, 2, str("街道"))
    my_sheet.cell(1, 3, str("地址"))

    # 产生数据时 保证城市名称不同 所有的街道名称互不相同
    city_list = []
    street_list = []

    cnt = 2
    for i in range(0, a):
        f = faker.Faker(locale='zh_CN')
        while city_list.count(f.city_name()) > 0:
            f = faker.Faker(locale='zh_CN')
        city = f.city_name()
        city_list.append(city)
        for j in range(0, b):
            f = faker.Faker(locale='zh_CN')
            while street_list.count(f.street_name()) > 0:
                f = faker.Faker(locale='zh_CN')
            street = f.street_name()
            street_list.append(street)
            for k in range(0, c):
                f = faker.Faker(locale='zh_CN')
                addr = f.address()
                my_sheet.cell(row=cnt, column=1, value=str(city))
                my_sheet.cell(row=cnt, column=2, value=str(street))
                my_sheet.cell(row=cnt, column=3, value=str(addr))
                cnt = cnt + 1
    wb.save("rawdatapy.xlsx")
    print("xlsx格式表格写入数据成功!")
    wb.close()


# 前多少列需要合并
def read_data(num=2):
    wb1 = load_workbook("rawdatapy.xlsx")
    my_sheet = wb1.worksheets[0]
    # 获取行数
    rows = my_sheet.max_row
    # 获取列数 其实不需要用到
    cols = my_sheet.max_column
    # 创建辅助数组
    help_array = [[0 for i in range(num)] for j in range(rows)]  # 创建一个rows*num的数组

    # 开始遍历每一列来给辅助数组赋值
    for i in range(1, num + 1):
        cnt = 0
        for j in range(2, rows + 1):  # 遍历每一行(第1行直接掠过)
            k_new = str(my_sheet.cell(j, i).value)
            k_old = str(my_sheet.cell(j - 1, i).value)
            if k_old != k_new:
                cnt = cnt + 1
            help_array[j - 1][i - 1] = cnt

    # 用来生成excel表里的可能需要用到的列 A, B, C这样
    dict_cross = {}
    for i in range(1, num+1):   # i取值为[1, num]
        dict_cross[i] = chr(ord('A')+i-1)   # ord()是获得字符的ascii值 chr()是根据ascii获得字符

    # 得到辅助数组后 利用辅助数组来合并
    for i in range(0, num):
        dict_from = {}
        # 遍历行
        for ii in range(0, rows):

            r = ""
            # 利用for循环来拼接前面的信息 作为dict的判断依据 用' '隔开
            for k in range(0, i+1):
                r = r + ' ' + str(help_array[ii][k])

            # dict_from:[key, value] key:单元格的实际数据  value:是一个三元组[x, y, z] x:记录列数 y:记录第一次出现的行数 z:记录最后一次
            if not dict_from.get(r):
                dict_from[r] = [i, ii, ii]
            else:
                dict_from[r][2] = dict_from[r][2] + 1

        for key, value in dict_from.items():
            # list_value0 = dict_cross[value[0]+1]
            letter = dict_cross[value[0]+1]     # letter取值为 A, B, C.....
            if not (f'{letter}{value[1]+1}' == f'{letter}{value[2]+1}'):
                my_sheet.merge_cells(f'{letter}{value[1]+1}:{letter}{value[2]+1}')
                my_sheet[f'{letter}{value[1]+1}'].alignment = Alignment(horizontal='center', vertical='center')
                # wb1.save("rawdatapy.xlsx")
    # 合并结束
    wb1.save("rawdatapy.xlsx")
    wb1.close()
    print("--合并结束finish--")


# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    # write_data()
    # print("get--data")
    # csv_to_xlsx()  # 将输入的csv问价转化为同名的xlxs文件
    read_data(3)  # 读取数据后进行合并
    # xlsx_to_csv()  # 将合并后的文件转化为csv的后缀

1.3版本 优化变量名称

import faker
from openpyxl import load_workbook
from openpyxl import Workbook
from openpyxl.styles import Alignment
import csv


def csv_to_xlsx():
    with open('rawdatapy.csv', 'r', encoding='utf-8') as f:
        read = csv.reader(f)
        wb = Workbook()
        ws = wb.active
        for line in read:
            ws.append(line)
        wb.save('rawdatapy.xlsx')  # 保存Excel
    print("---将原始的csv文件转化为xlsx文件---")


def xlsx_to_csv():
    ob = csv.writer(open("result.csv", 'w', newline="", encoding='utf-8'))
    data = load_workbook('rawdatapy.xlsx').active
    for r in data.rows:
        row = [a.value for a in r]
        ob.writerow(row)
    print("---成功将xlsx转化为csv文件---")


# a代表城市数量 b代表每个城市的街道数量 c代表的每个城市每个街道的每个数据(假设都是一样的)
def write_data(a=3, b=20, c=300):
    wb = Workbook()
    my_sheet = wb.active
    my_sheet.title = "data"

    my_sheet.cell(1, 1, str("城市"))
    my_sheet.cell(1, 2, str("街道"))
    my_sheet.cell(1, 3, str("地址"))

    # 产生数据时 保证城市名称不同 所有的街道名称互不相同
    city_list = []
    street_list = []

    cnt = 2
    for i in range(0, a):
        f = faker.Faker(locale='zh_CN')
        while city_list.count(f.city_name()) > 0:
            f = faker.Faker(locale='zh_CN')
        city = f.city_name()
        city_list.append(city)
        for j in range(0, b):
            f = faker.Faker(locale='zh_CN')
            while street_list.count(f.street_name()) > 0:
                f = faker.Faker(locale='zh_CN')
            street = f.street_name()
            street_list.append(street)
            for k in range(0, c):
                f = faker.Faker(locale='zh_CN')
                addr = f.address()
                my_sheet.cell(row=cnt, column=1, value=str(city))
                my_sheet.cell(row=cnt, column=2, value=str(street))
                my_sheet.cell(row=cnt, column=3, value=str(addr))
                cnt = cnt + 1
    wb.save("rawdatapy.xlsx")
    print("xlsx格式表格写入数据成功!")
    wb.close()


# 前多少列需要合并
def read_data(num=2):
    wb1 = load_workbook("rawdatapy.xlsx")
    my_sheet = wb1.worksheets[0]
    # 获取行数
    MaxRows = my_sheet.max_row
    # 创建辅助数组
    help_array = [[0 for i in range(num)] for j in range(MaxRows)]  # 创建一个MaxRows*num的数组

    # 开始遍历每一列来给辅助数组赋值
    for i in range(1, num + 1):
        cnt = 0
        for j in range(2, MaxRows + 1):  # 遍历每一行(第1行直接掠过)
            k_new = str(my_sheet.cell(j, i).value)
            k_old = str(my_sheet.cell(j - 1, i).value)
            if k_old != k_new:
                cnt = cnt + 1
            help_array[j - 1][i - 1] = cnt

    # 用来生成excel表里的可能需要用到的列 A, B, C这样
    letterList = {}
    for i in range(1, num+1):   # i取值为[1, num]
        letterList[i] = chr(ord('A')+i-1)   # ord()是获得字符的ascii值 chr()是根据ascii获得字符

    # 得到辅助数组后 利用辅助数组来合并
    for i in range(0, num):
        dictList = {}
        # 遍历行
        for row in range(0, MaxRows):

            r = ""
            # 利用for循环来拼接前面的信息 作为dictList的判断依据 用' '隔开
            for k in range(0, i+1):
                r = r + ' ' + str(help_array[row][k])

            # dictList:[key, value] key:单元格的实际数据  value:是一个三元组[x, y, z] x:记录列数 y:记录第一次出现的行数 z:记录最后一次
            if not dictList.get(r):
                dictList[r] = [i, row, row]
            else:
                dictList[r][2] = dictList[r][2] + 1

        for key, value in dictList.items():
            letter = letterList[value[0]+1]     # letter取值为 A, B, C.....
            if not (f'{letter}{value[1]+1}' == f'{letter}{value[2]+1}'):
                my_sheet.merge_cells(f'{letter}{value[1]+1}:{letter}{value[2]+1}')
                my_sheet[f'{letter}{value[1]+1}'].alignment = Alignment(horizontal='center', vertical='center')
    # 合并结束
    wb1.save("rawdatapy.xlsx")
    wb1.close()
    print("--合并结束finish--")


# Press the green button in the gutter to run the script.
if __name__ == '__main__':
    # write_data()
    # print("get--data")
    # csv_to_xlsx()  # 将输入的csv问价转化为同名的xlxs文件
    read_data(3)  # 读取数据后进行合并
    # xlsx_to_csv()  # 将合并后的文件转化为csv的后缀

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值