pandas和SQL数据分析实战视频教程
统计excel C列 化学 和D列 补充申请 两个条件药品数量
程序优势在于批量统计,把参数写到excel里,行为单位
# -*- coding: utf-8 -*-
"""
Created on Thu Apr 7 08:38:51 2016
content内容必须是逗号分隔,例如'化药,中药'
用于统计excel--sheet---二级条件筛选的数据统计
算法:
1.读取所有excel内容,保存到二维列表中
2.二级筛选
@author: Administrator
"""
import csv,xlrd,os,openpyxl
#获取所有excel文件名
def Get_excelFileNames():
excelFiles_list=[]
for excelFilename in os.listdir('.'):
if excelFilename.endswith('.xlsx') or excelFilename.endswith('.xls'):
excelFiles_list.append(excelFilename)
return excelFiles_list
#获取一个excel内首页表格
def Get_sheet1_from_oneExcel(excelFileName):
wb=xlrd.open_workbook(excelFileName)
sheet=wb.sheets()[0]
return sheet
#获取excel的首页信息,去除首行
def Get_sheet1_Data_NoFirstLine(excelFileName):
sheet=Get_sheet1_from_oneExcel(excelFileName)
#获取表格行数和列数
highest_row=sheet.nrows
highest_column=sheet.ncols
sheet_data=[]
for rowNum in range(1,highest_row):
#获取行数据
rowData=sheet.row_values(rowNum)
sheet_data.append(rowData)
return sheet_data
#把csv文档内容转换成一个列表,不包括首行
def Read_CsvRows_noFirstLine(csvFilename):
csvRows = []
csvFileObj = open(csvFilename)
readerObj = csv.reader(csvFileObj)
for row in readerObj:
if readerObj.line_num == 1:
continue # skip first row
csvRows.append(row)
csvFileObj.close()
return csvRows
def one_excel_column_statistic(csv_row):
#csv特定行包含excel名字,sheet名,列,搜索内容
excelFilename=csv_row[0]
#print("excelFilename",excelFilename)
sheetName=csv_row[1]
#print("sheetName",sheetName)
column=int(csv_row[2])
#print("column",column)
content=csv_row[3]
#'化药,中药'必须转换为['化药', '中药'],否则无法遍历
content_list=content.split(",")
wb=xlrd.open_workbook(excelFilename)
sheet=wb.sheet_by_name(sheetName)
#获取excel指定列的类容
column_content_list=sheet.col_values(column)
#print("column_content_list",column_content_list)
number=0
for i in content_list:
count=column_content_list.count(i)
number+=count
print("excel文件名",excelFilename)
print("统计内容:",content)
print("数量:",number)
print("-"*20)
return number
def all_excel_column_statistic(csvFileName):
csv_content=Read_CsvRows_noFirstLine(csvFileName)
for csv_row in csv_content:
one_excel_column_statistic(csv_row)
def second_condition_statistic(csv_row):
count=0
excelFileName=csv_row[0]
sheet_data=Get_sheet1_Data_NoFirstLine(excelFileName)
Condition1=csv_row[2].split(',')[1]
Column1_letter=(csv_row[2].split(',')[0])
index1=column_letter_index_convert(Column1_letter)
Condition2=csv_row[3].split(',')[1]
Column2_letter=(csv_row[3].split(',')[0])
index2=column_letter_index_convert(Column2_letter)
for rowData in sheet_data:
if rowData[index1]==Condition1 and rowData[index2]==Condition2:
count+=1
print("excel文件名",excelFileName)
print("统计内容:",Condition1+">"+Condition2)
print("数量:",count)
print("-"*20)
return count
def all_second_condition_statistic(csvFileName):
csvRows=Read_CsvRows_noFirstLine(csvFileName)
for csv_row in csvRows:
second_condition_statistic(csv_row)
#excel列的
def column_letter_index_convert(letter):
list_letter_index=[["A",0],["B",1],["C",2],["D",3],["E",4],["F",5],["G",6],["H",7],["I",8],\
["J",9],["K",10],["L",11],["M",12],["N",13],["O",14],["P",15],["Q",16],["R",17],\
["S",18],["T",19],["U",20],["V",21],["W",22],["X",23],["Y",24],["Z",25]]
for i in list_letter_index:
if i[0]==letter.upper():
index=i[1]
return index
csvFileName="config.csv"
all_second_condition_statistic(csvFileName)
'''
excelFileName='1月.xlsx'
csv_row=['1月.xlsx', 'Sheet1', 'C,化药', '3,补充申请']
'''
print("问题反馈邮箱:231469242@qq.com")
print(" .-. ")
print(" \ \ ")
print(" \ \ ")
print(" 喵 喵 喵 | | ")
print(" | | ")
print(" /\---/\ _,---._ | | ")
print(" /^ ^ \,' `. ; ")
print(" ( O O ) ; ")
print(" `.=o=__,' \ ")
print(" / _,--.__ \ ")
print(" / _ ) ,' `-. `-. \ ")
print(" / ,' / ,' \ \ \ \ ")
print(" / / / ,' (,_)(,_) ")
print("(,; (,,) ")
var=input("enter anykey to quit")