系列文章目录
文章目录
前言
指定路径下有多级目录,每个目录下有指定的csv文件,我们需要将每个csv文件的指定一列的和&平均值计算出来,并保存到新的excel文件中;
一、csv文件处理
#xlwt只支持xls格式,xlsx格式需要用openpyxl或pandas
# -*- coding: utf-8 -*-
# coding=gbk
import pandas as pd
import os
import xlrd
import xlwt
import csv
from xlutils.copy import copy
from openpyxl import workbook
from openpyxl import load_workbook
from os.path import dirname
from decimal import Decimal
from openpyxl.utils.dataframe import dataframe_to_rows
# 读写2007 excel
import openpyxl
global_var = None
def some_function():
global global_var
global_var = col1
def get_allfile_msg(file_dir):
for root, dirs, files in os.walk(file_dir):
return root, dirs, [file for file in files if file.endswith('.xls') or file.endswith('.xlsx') or file.endswith('.csv')]
def get_allfile_url(root, files):
allFile_url = []
for file_name in files:
file_url = root + "/" + file_name
allFile_url.append(file_url)
return allFile_url
def get_file_name(path, suffix = ['.xlsx', '.xls','.csv']): #'.xlsx', '.xls',
tmp_lst = []
for root,dirs,files in os.walk(path):
for file in files:
tmp_lst.append(os.path.join(root, file))
return tmp_lst
def convert_csv_to_xlsx(csv_file_path,xlsx_file_path):
df = pd.read_csv(csv_file_path)
df.to_excel(xlsx_file_path,index=False)
#定义读取csv_pandas
def read_csv_file(file_path):
#参数:error_bad_lines=False跳过错误的行 delimiter=',',encoding = 'gbk',header = 0, engine='python' sep = r"\s+\s{0}" encoding = "iso-8859-1"