5.1:
import sys
import csv
import glob
import os
from datetime import date
from xlrd import open_workbook,xldate_as_tuple
path="C:\\Users\\wenmiao_\\Desktop\\pya\\11"
outputfile=r"C:\Users\wenmiao_\Desktop\pya\output\output.csv"
findpath=r"C:\Users\wenmiao_\Desktop\11\foundations-for-analytics-with-python-master\applications\item_numbers_to_find.csv"
filefind=csv.reader(open(findpath,'r',newline=""))
finditems=[]
for row in filefind:
for i in row:
finditems.append(i)
result=[]
headerJudge=True
for input_file in glob.glob(os.path.join(path,"suppliers*")):
if(input_file.endswith("csv")):
filereader=csv.reader(open(input_file,'r',newline=""))
header=next(filereader)
if(headerJudge):
header2=header
header2.append("from")
header2.append("fromsheet")
result.append(header2)
headerJudge=False
for row in filereader:
if(row[0] in finditems):
row2=row
row2[3]=str(int(float(str(row2[3].lstrip("$")).replace(",",""))))
row2.append(os.path.basename(input_file))
row2.append("")
result.append(row2)
else:
with open_workbook(input_file) as workbook:
for worksheet in workbook.sheets():
if(headerJudge):
header3=[]
headerJudge=False
for i in range(worksheet.ncols):
header3.append(worksheet.cell_value(0,i))
header3.append("from")
header3.append("fromsheet")
result.append(header3)
for rownum in range(1,worksheet.nrows):
if(worksheet.cell_value(rownum,0)!="" and str(int(worksheet.cell_value(rownum,0))) in finditems):
a=[]
for i in range(worksheet.ncols):
if(worksheet.cell_type(rownum,i)==3):
datecell=xldate_as_tuple(worksheet.cell_value(rownum,i),workbook.datemode)
datecell=date(*datecell[0:3]).strftime('%m/%d/%Y')
a.append(datecell)
else:
a.append(worksheet.cell_value(rownum,i))
a.append(os.path.basename(input_file))
a.append(worksheet.name)
result.append(a)
for i in result:
print(i)
with open(outputfile,'w',newline='') as filewriters:
filewriter=csv.writer(filewriters)
for i in result:
filewriter.writerow(i)
自己实现的,与书上实现方法略有不同。
string类型表示浮点数转化成string类型表示整型还可用split分割后切片。
date转化留在这里,以备后续忘记。
5.2:
import csv
from datetime import date,datetime
inputfile=r"C:\Users\lsy\Desktop\11\foundations-for-analytics-with-python-master\applications\customer_category_history.csv"
outputfile=r"C:\Users\lsy\Desktop\pya\output\output.csv"
result=[]
def date_diff(date1,date2):
try:
diff=str(datetime.strptime(date1,'%m/%d/%Y')-datetime.strptime(date2,'%m/%d/%Y')).split()[0]
except:
diff=0
if(diff=='0:00:00'):
diff=0
return diff
today=date.today().strftime('%m/%d/%Y')
with open(inputfile,'r',newline='') as f:
filereader=csv.reader(f)
header=next(filereader)
firstline=next(filereader)
name=firstline[0]
date=firstline[3]
cate=firstline[1]
count=0
for i in filereader:
a=[]
if(name==""):
name=i[0]
cate=i[1]
date=i[3]
elif(cate==""):
cate=i[1]
date=i[3]
if(i[0]!=name):
summ=int(date_diff(today, date))+count
count=0
date=i[3]
a.append(name)
a.append(cate)
a.append(summ)
cate=i[1]
name=i[0]
result.append(a)
elif(i[1]!=cate):
a.append(name)
a.append(cate)
summ=int(date_diff(i[3], date))+count
date=i[3]
count=0
a.append(summ)
cate=i[1]
result.append(a)
else:
count=count+int(date_diff(i[3], date))
date=i[3]
for i in result:
print(i)
5.3:
inputfile=r"C:\Users\lsy\Desktop\11\foundations-for-analytics-with-python-master\applications\mysql_server_error_log.txt"
result=[]
types=["InnoDB: Compressed tables use zlib 1.2.3","InnoDB: Using atomics to ref count buffer pool pages",
"InnoDB: 5.6.16 started; log sequence number 1234567","InnoDB: Completed initialization of buffer pool",
"InnoDB: IPv6 is available","/usr/local/mysql/bin/mysqld: Shutdown complete"]
aa=[]
ii=-1
with open(inputfile,'r',newline='') as textfile:
textfile=list(textfile)
#print(type(textfile))
for i in range(len(textfile)):
if(i<ii):
continue
a=[0,0,0,0,0,0]
dd=[]
if("mysqld_safe Starting" in textfile[i]):
ii=i+1
k=-1
for j in range(len(types)):
if(types[j] in textfile[ii]):
k=j;
break;
a[k]=a[k]+1
dd.append(textfile[ii].split(" ")[0])
ii=ii+1
while(k!=5):
k=-1
for j in range(len(types)):
if(types[j] in textfile[ii]):
k=j;
break;
ii=ii+1
a[k]=a[k]+1
#print(a)
dd.append(a)
aa.append(dd)
for i in aa:
print(i)
因不可抗拒因素,python中for循环内i值无法改变。