测试时间
主要是利用time这个包,并不复杂
import time
start=time.time()#开始时间
doubledata=0.0
for i in range(1000000):
doubledata+=i
print(doubledata)
end=time.time()#结束时间
print("cost time一共用了",end-start)
装饰器
我有一点点不明白,costtime里调用的func为啥是下面的函数func,但是装饰器的定义是把函数当做参数,所以我认为就是costtime里的func是下面func()参数,但看到下面的就晓得func只是一个变量名。
def costtime(func): #装饰器,把函数当作参数,
import time
start=time.time()#开始时间
func()
end=time.time()#结束时间
print("cost time一共用了",end-start)
def func():
doubledata=0.0
for i in range(1000000):
doubledata+=i
print(doubledata)
costtime(func)
装饰器参数
加上了可输入的n
def costtime(func,n): #装饰器,把函数当作参数,
import time
start=time.time()#开始时间
func(n)
end=time.time()#结束时间
print("cost time一共用了",end-start)
def func(n):
doubledata=0.0
for i in range(n):
doubledata+=i
print(doubledata)
costtime(func,1000000)
数据查询装饰器
我照着源代码敲了一遍,思路跟之前一样,只是多了很多步骤,一步一步的完成
class CSDNfind:
def __init__(self):#初始化函数
self.file=open(r"C:\Users\Tsinghua-yincheng\Desktop\YinchengDay8\csdn.txt","rb")
pass
def memload(self):
self.memlist=self.file.readlines()#文件载入内存
self.file.seek(0,0)#文件回到开头
def diskfind(self,findstr):#硬盘查询
self.file.seek(0,0)#文件回到开头
while True:
line =self.file.readline()
if not line: #为空跳出
break
line=line.decode("gbk",errors="ignore")#解码,
linelist=line.split(" # ")#切割
if findstr==linelist[0]: #判断
print(line)
break #找到就退出
def memfind(self,findstr):#内存查询
for line in self.memlist: #遍历列表
line=line.decode("gbk",errors="ignore")#解码,
linelist=line.split(" # ")#切割
if findstr==linelist[0]: #判断
print(line)
break #找到就退出
def findcosttime(self,func,findstr,searchtype):#查询时间
import time
start=time.time()#开始时间
func(findstr)
end=time.time()#结束时间
print(searchtype,"cost time一共用了",end-start)
def __del__(self):#删除
self.file.close()
pass
csdn=CSDNfind()
csdn.findcosttime(csdn.diskfind,"yincheng01","硬盘")
csdn.findcosttime(csdn.diskfind,"yincheng0571","硬盘")
'''
csdn.memload()
print("内存载入完成")
csdn.findcosttime(csdn.memfind,"yincheng01","内存")
csdn.findcosttime(csdn.memfind,"yincheng0571","内存")
csdn.findcosttime(csdn.memfind,"yincheng","内存")
'''
行缓冲
linecache.updatecache:当文件内容发生改变时,如果需要获得到最新的文件内容,那么需要调用这个方法。如果不调用,仍然使用在缓存中的老文件内容。类似的操作还可以是linecache.clearcache之后再getlines也行。
import os
import linecache
#linecache仅仅处理utf-8编码文件,
#随机跳到哪一行
path=r"C:\Users\Tsinghua-yincheng\Desktop\YinchengDay8\csdnmail.txt"
linecache.updatecache(path) #抓取路径进行预处理
print("OK")
mylist=linecache.getlines(path)[2500000:2500100]#列表,抓取2500000-2500100
print(mylist)
二分查找
方法:考研的时候算是最简单的一种,先排序,最左最右取个中间值,比大小,所到范围之中后,再取中间值一直如此
def search2(finddata,findlist):
low=0 #第一个
high=len(findlist)-1 #最后一个
while low <= high: #重合跳出循环
mid=(low+high)//2 #取得索引中间值
midata=findlist[mid]#取得中间值
if finddata<midata:
high=mid-1 #移动位置
elif finddata >midata:
low=mid+1 #移动位置
else:
print("找到",finddata,mid)
return mid
print("找不到")
return -1
def search(finddata,findlist):
for data in findlist:
if data==finddata:
print(data,"找到")
return 0
def costtime(func,finddata,findlist):
import time
start=time.time()#开始时间
func(finddata,findlist)
end=time.time()#结束时间
print("cost time一共用了",end-start)
findlist=[x for x in range(100000000)]
finddata=98000000
costtime(search2,finddata,findlist)
costtime(search,finddata,findlist)
内存检索二分查找法
将原本for循环的方式改为二分法
def getuser(line):
line=line.decode("gbk",errors="ignore")#解码,
linelist=line.split(" # ")#切割
return linelist[0] #抓取用户名
class CSDNfind:
def __init__(self):#初始化函数
self.file=open(r"C:\Users\Tsinghua-yincheng\Desktop\YinchengDay8\csdn.txt","rb")
pass
def memload(self):
self.memlist=self.file.readlines()#文件载入内存
self.file.seek(0,0)#文件回到开头
def memsort(self):
self.memlist.sort(key = lambda x:getuser(x))#根据user排序
def memsearch2(self,findstr):
low=0
high=len(self.memlist)-1
while low<=high:
mid=(low+high)//2
line=self.memlist[mid]
line=line.decode("gbk",errors="ignore")#解码,
linelist=line.split(" # ")
midstr=linelist[0] #对比user
if findstr<midstr:
high=mid-1
elif findstr>midstr:
low=mid+1
else:
print("找到",mid,line)
return mid
print("找不到")
return -1
def diskfind(self,findstr):#硬盘查询
self.file.seek(0,0)#文件回到开头
while True:
line =self.file.readline()
if not line: #为空跳出
break
line=line.decode("gbk",errors="ignore")#解码,
linelist=line.split(" # ")#切割
if findstr==linelist[0]: #判断
print(line)
break #找到就退出
def memfind(self,findstr):#内存查询
for line in self.memlist: #遍历列表
line=line.decode("gbk",errors="ignore")#解码,
linelist=line.split(" # ")#切割
if findstr==linelist[0]: #判断
print(line)
break #找到就退出
def findcosttime(self,func,findstr,searchtype):#查询时间
import time
start=time.time()#开始时间
func(findstr)
end=time.time()#结束时间
print(searchtype,"cost time一共用了",end-start)
def __del__(self):#删除
self.file.close()
pass
csdn=CSDNfind()
csdn.memload()
csdn.findcosttime(csdn.memfind,"yincheng01","内存")
csdn.findcosttime(csdn.memfind,"yincheng0571","内存")
csdn.findcosttime(csdn.diskfind,"yincheng01","硬盘")
csdn.findcosttime(csdn.diskfind,"yincheng0571","硬盘")
csdn.memsort()
print("排序完成")
csdn.findcosttime(csdn.memsearch2,"yincheng01","内存 2分")
csdn.findcosttime(csdn.memsearch2,"yincheng0571","内存 2分")
'''
csdn=CSDNfind()
csdn.findcosttime(csdn.diskfind,"yincheng01","硬盘")
csdn.findcosttime(csdn.diskfind,"yincheng0571","硬盘")
'''
'''
csdn.memload()
print("内存载入完成")
csdn.findcosttime(csdn.memfind,"yincheng01","内存")
csdn.findcosttime(csdn.memfind,"yincheng0571","内存")
csdn.findcosttime(csdn.memfind,"yincheng","内存")
'''
不消耗内存的二分查找法
90g文件0.0s完成查询
#6428632
def search2(findstr):
low=0 #开始
high=6428632-1 #结束
while low<=high:
mid= (low+high)//2
indexfile.seek(10*(mid),0)#跳到索引的位置
midval=indexfile.read(10)#读取10个字符
midval=eval(midval)#取出索引数据
csdnfile.seek(midval,0)#调到这个索引
line=csdnfile.readline()#读取一行
line=line.decode("gbk",errors="ignore")#解码,
linelist=line.split(" # ")#切割
midstr=linelist[0]
#midstr=None
if findstr>midstr:
low=mid+1
elif findstr <midstr:
high=mid-1
else:
print("找到",mid, line)
return mid
print("没有找到")
return -1
csdnfile=open(r"C:\Users\Tsinghua-yincheng\Desktop\YinchengDay8\csdnsort.txt","rb")
indexfile=open(r"C:\Users\Tsinghua-yincheng\Desktop\YinchengDay8\csdnsortindex.txt","rb")
while True:
mystr=input("输入要查找的数据")
import time
start=time.time()#开始时间
search2( mystr)
end=time.time()#结束时间
print("cost time一共用了",end-start)
csdnfile.close()
indexfile.close()