一、excel表格内容的合并排序
现在有一张美国2010年人口普查简表 ,我们需要对这张表的,根据表格的C、D列的信息对表格进行合并,即将每一个Country的总人数统计出来,并另存为。
分析:
我们对表格进行分析,发现表格的同一个County信息都是连续存在的,一旦我们在excel表中的几行中将一个County的信息全部处理完,那么excel表之后再也不会出现该County的信息了。这样极大的减少了我们的处理的工作量。
import openpyxl
import os
def CreateWorkBook(filename):
"""
创建一个新的工作簿
:param filename: 新建的工作簿的名称(可以添加绝对路径)
:return:
"""
pwd = os.path.split(filename)
if not pwd[1].endswith('.xlsx'):
new_filename = pwd[1]+'.xlsx'
else:
new_filename = pwd[1]
wb = openpyxl.Workbook(new_filename)
wb.save(pwd[0]+os.path.sep+new_filename)
print("新建Excel[%s]成功!" % (pwd[0]+os.path.sep+new_filename))
return pwd[0]+os.path.sep+new_filename
def ReadWb_MergeSheet(filename,new_filename):
"""
:param filename: 源excel文件
:param new_filename: 合并后保存的文件名
:return:
"""
wb = openpyxl.load_workbook(filename)
new_wb = openpyxl.load_workbook(new_filename)
sheet = wb.active
new_sheet = new_wb.active
#新表的第一个County信息,用于和旧表扫描的当前的行的County进行对比
now_County = sheet.cell(row = 2,column = 3).value
People_number = 0 #当前同一个County的人数
new_sheet_row= 1 #新表的最新的行数
for index,row in enumerate(sheet.rows):
#逐行对表进行扫描,得到该行的对象和索引,因为第一行( index = 0 )是表头,单独处理
if index == 0:
#表头拷贝
new_sheet.cell(row=index+1, column=1).value = sheet.cell(row=new_sheet_row, column=2).value
new_sheet.cell(row=index+1, column=2).value = sheet.cell(row=new_sheet_row, column=3).value
new_sheet.cell(row=index+1, column=3).value = sheet.cell(row=new_sheet_row, column=4).value
new_sheet_row =new_sheet_row +1 #新表表头已经添加完成,行数再次加1,变为2
else:
if sheet.cell(row = index+1,column = 3).value == now_County:
#当前行的County和上一行的County值相同,说明是一个同一个County,需要将人数加起来
People_number = People_number+ sheet.cell(row = index+1,column = 4).value
else:
# 当前行的County和上一行的County值不相同,说明是两个不同的County,
# 且上一个County的信息已经扫描完毕,需要写入新表
new_sheet.cell(row = new_sheet_row, column = 1).value = \
sheet.cell(row = index, column = 2).value
new_sheet.cell(row = new_sheet_row, column = 2).value = now_County
new_sheet.cell(row = new_sheet_row, column = 3).value = People_number
now_County = sheet.cell(row = index+1,column = 3).value
new_sheet_row += 1
People_number = sheet.cell(row = index+1,column = 4).value
#当前行的County的人数需要重新开始计算
new_wb.save(new_filename) #保存新表
filename = '/root/PycharmProjects/day14/censuspopdata.xlsx'
new_filename = '/root/PycharmProjects/day14/new_censuspopdata.xlsx'
ReadWb_MergeSheet(filename,CreateWorkBook(new_filename))
我们对旧表处理完后,得到一个新表new_censuspopdata.xlsx,这个表只有三列,state、County、POP2010 。
二、数据库应用
随机生成100个人名和对应的密码,人名由三个汉字或者2个汉字组成,姓 = [许, 张, 赵, 钱, 孙, 李, 朱, 杨]
名 = [彬, 群, 宁, 盼, 龙, 欢, 丹]
密码统一6位, 由字母和字符组成,存储上述用户信息到数据库中, 保存在数据库mariadb_learn中的userinfo表中
import pymysql
import random
def Create_Passwd():
"""
常用字符的ASCII(包含空格和127(DEL键的操作码))为32~127
我们的密码不能有空格和EDL键,因此取ASCII码为33-126的字符
:return:一个长度为6的随机字符串
"""
char_list = list()
for num in range(6):
char = str(chr(random.randint(33,126)))
char_list.append(char)
return ''.join(char_list)
def Create_Username():
"""
:return: 一个username
"""
surnames = ['许','张','赵','钱','孙','李','朱','杨']
name = ['彬','群','宁','盼','龙','欢','丹']
return random.choice(surnames)+random.choice(name)
def Create_Table(tablename,**kwargs):
conn = pymysql.connect(**kwargs)
cur = conn.cursor()
try:
cur.execute('drop table if exists %s;'%(tablename))
cur.execute('create table %s(username varchar(10),passwd '
'varchar(10));'%(tablename))
except Exception as Error:
print(Error)
else:
print('table create sucess!')
finally:
conn.commit()
cur.close()
conn.close()
def Save_name_into_db(tablename,**kwargs):
Create_Table(tablename,**kwargs)
conn = pymysql.connect(**kwargs)
cur = conn.cursor()
list_infor=[(Create_Username(),Create_Passwd())for num in range(100)
try:
insert_sqli = 'insert into %s'%(tablename)+' values(%s,%s);'
cur.executemany(insert_sqli,list_infor)
except Exception as Error:
print(Error)
else:
print('sql insert success!!')
finally:
conn.commit()
conn.close()
cur.close()
dbconfig = {'host' :'localhost',
'user' : 'root',
'passwd' : '123456',
'db' :'mariadb_learn',
'charset': 'utf8'}
Save_name_into_db('userinfor',**dbconfig)
三、wc命令的实现
from optparse import OptionParser
def Statistical_File_Lines(filename):
try :
with open(filename,'r') as file_it:
#使用file_it进行循环比file_it.readlines()
#更好,file_it,是一个文件对象的生成器,这样可以适合大文件读取
for index,line in enumerate(file_it):
pass
except Exception as Erorr:
raise Erorr
else:
return index+1
def Statistical_File_Words(filename):
words = 0
try:
with open(filename,'r') as file_it:
for line in file_it:
words += len(line.split())
except Exception as Erorr:
raise Erorr
else:
return words
def Statistical_File_bytes(filename):
byte = 0
try:
with open(filename, 'r') as file_it:
for line in file_it:
byte += len(line.encode('utf-8'))
except Exception as Erorr:
raise Erorr
else:
return byte
def main():
USAGE = "wc [-clw][--help][--version][文件...]"
parser = OptionParser(USAGE)
words = 0
lines = 0
byte = 0
#新加属性
parser.add_option('-l','--lines',action = 'store_true',default = 'False',
dest ='lines',help ='print the newline counts')
parser.add_option('-w','--words',action = 'store_true',default = 'False',
dest = 'words',help ='print the word counts')
parser.add_option('-c','--bytes',action = 'store_true',default = 'False',
dest = 'bytes',help ='print the byte counts')
options ,args = parser.parse_args()
if len(args) == 1:
filename = args[0] #通过args接受文件名参数
if options.lines==True : #命令行参数存在-l,统计文件的行数.
lines = Statistical_File_Lines(filename)
if options.words == True : #命令行参数存在-w,统计文件的单词数.
words = Statistical_File_Words(filename)
if options.bytes == True: #命令行参数存在-c,统计文件的字节数.
byte = Statistical_File_bytes(filename)
print(lines," ",words," ",byte," ",filename)
else:
print ('arg'+' '+args[1]+' '+'Error!!!')
if __name__ == '__main__':
main()
本代码仅仅只是实现了wc 命令的三个参数-l、-c、-w ,功能相对比较单一。