Python数据分析基础第5章例

5.1: 

import sys
import csv
import glob
import os
from datetime import date
from xlrd import open_workbook,xldate_as_tuple

path="C:\\Users\\wenmiao_\\Desktop\\pya\\11"
outputfile=r"C:\Users\wenmiao_\Desktop\pya\output\output.csv"
findpath=r"C:\Users\wenmiao_\Desktop\11\foundations-for-analytics-with-python-master\applications\item_numbers_to_find.csv"
filefind=csv.reader(open(findpath,'r',newline=""))
finditems=[]
for row in filefind:
    for i in row:
        finditems.append(i)

result=[]

headerJudge=True
for input_file in glob.glob(os.path.join(path,"suppliers*")):
    if(input_file.endswith("csv")):
        filereader=csv.reader(open(input_file,'r',newline=""))
        header=next(filereader)
        if(headerJudge):
            header2=header
            header2.append("from")
            header2.append("fromsheet")
            result.append(header2)
            headerJudge=False
        for row in filereader:
            if(row[0] in finditems):
                row2=row
                row2[3]=str(int(float(str(row2[3].lstrip("$")).replace(",",""))))
                row2.append(os.path.basename(input_file))
                row2.append("")
                result.append(row2)
    else:
        with open_workbook(input_file) as workbook:
            for worksheet in workbook.sheets():
                if(headerJudge):
                    header3=[]
                    headerJudge=False
                    for i in range(worksheet.ncols):
                        header3.append(worksheet.cell_value(0,i))
                    header3.append("from")
                    header3.append("fromsheet")
                    result.append(header3)           
                for rownum in range(1,worksheet.nrows):
                    if(worksheet.cell_value(rownum,0)!="" and str(int(worksheet.cell_value(rownum,0))) in finditems):
                        a=[]
                        for i in range(worksheet.ncols):
                            if(worksheet.cell_type(rownum,i)==3):
                                datecell=xldate_as_tuple(worksheet.cell_value(rownum,i),workbook.datemode)
                                datecell=date(*datecell[0:3]).strftime('%m/%d/%Y')
                                a.append(datecell)
                            else:
                                a.append(worksheet.cell_value(rownum,i))
                        a.append(os.path.basename(input_file))
                        a.append(worksheet.name)
                        result.append(a)

for i in result:
    print(i)

with open(outputfile,'w',newline='') as filewriters:
    filewriter=csv.writer(filewriters)
    for i in result:
        filewriter.writerow(i)

自己实现的,与书上实现方法略有不同。

string类型表示浮点数转化成string类型表示整型还可用split分割后切片。

date转化留在这里,以备后续忘记。

 

5.2:

import csv
from datetime import date,datetime

inputfile=r"C:\Users\lsy\Desktop\11\foundations-for-analytics-with-python-master\applications\customer_category_history.csv"
outputfile=r"C:\Users\lsy\Desktop\pya\output\output.csv"
result=[]

def date_diff(date1,date2):
    try:
        diff=str(datetime.strptime(date1,'%m/%d/%Y')-datetime.strptime(date2,'%m/%d/%Y')).split()[0]
    except:
        diff=0
    if(diff=='0:00:00'):
        diff=0
    return diff

today=date.today().strftime('%m/%d/%Y')
with open(inputfile,'r',newline='') as f:
    filereader=csv.reader(f)
    header=next(filereader)
    firstline=next(filereader)
    name=firstline[0]
    date=firstline[3]
    cate=firstline[1]
    count=0
    for i in filereader:
        a=[]
        if(name==""):
            name=i[0]
            cate=i[1]
            date=i[3]
        elif(cate==""):
            cate=i[1]
            date=i[3]
        if(i[0]!=name):
            summ=int(date_diff(today, date))+count
            count=0
            date=i[3]
            a.append(name)
            a.append(cate)
            a.append(summ)
            cate=i[1]
            name=i[0]
            result.append(a)
        elif(i[1]!=cate):
            a.append(name)
            a.append(cate)
            summ=int(date_diff(i[3], date))+count
            date=i[3]
            count=0
            a.append(summ)
            cate=i[1]
            result.append(a)
        else:
            count=count+int(date_diff(i[3], date))
            date=i[3]
for i in result:
    print(i)

5.3:

inputfile=r"C:\Users\lsy\Desktop\11\foundations-for-analytics-with-python-master\applications\mysql_server_error_log.txt"
result=[]
types=["InnoDB: Compressed tables use zlib 1.2.3","InnoDB: Using atomics to ref count buffer pool pages",
      "InnoDB: 5.6.16 started; log sequence number 1234567","InnoDB: Completed initialization of buffer pool",
      "InnoDB: IPv6 is available","/usr/local/mysql/bin/mysqld: Shutdown complete"]
aa=[]
ii=-1
with open(inputfile,'r',newline='') as textfile:
    textfile=list(textfile)
    #print(type(textfile))
    for i in range(len(textfile)):
        if(i<ii):
            continue
        a=[0,0,0,0,0,0]
        dd=[]
        if("mysqld_safe Starting" in textfile[i]):
            ii=i+1
            k=-1
            for j in range(len(types)):
                if(types[j] in textfile[ii]):
                    k=j;
                    break;
            a[k]=a[k]+1
            dd.append(textfile[ii].split(" ")[0])
            ii=ii+1
            while(k!=5):
                k=-1
                for j in range(len(types)):
                    if(types[j] in textfile[ii]):
                        k=j;
                        break;
                ii=ii+1
                a[k]=a[k]+1
                #print(a)
            dd.append(a)
            aa.append(dd)
for i in aa:
    print(i)

因不可抗拒因素,python中for循环内i值无法改变。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值