继续今天的python学习
昨天还差一个代码没有实现,今天补上,只需要做一点小小的改动就实现了,很简单
import os
import os.path
import random
import time
from multiprocessing import Pool,Manager
def read_filepath(top_path, AllFilePath):
os.chdir(str(top_path))
the_iter = os.walk(os.getcwd())
while True:
try:
the_tuple = next(the_iter)
for each in the_tuple[2]:
theFilePath = str(the_tuple[0]) + "\\" +str(each)
if os.path.splitext(theFilePath)[1] == ".py":
AllFilePath.append(theFilePath)
except StopIteration:
break
def writefile(old_filepath, new_filepath,q):
with open(old_filepath, "rb") as fr:
file_contents = fr.read()
with open(new_filepath, "wb") as fw:
fw.write(file_contents)
q.put(random.random()) # 在这里随机入队一个数就可以
time.sleep(1) # 在这里延时一下方便观察
def main():
AllFilePath = []
file_dict = dict()
top_path = input("请输入顶层路径:")
the_new_path = input("请输入新路径:")
read_filepath(top_path, AllFilePath)
po = Pool(5)
q = Manager().Queue()
for each_path in AllFilePath:
filename = os.path.basename(each_path)
if filename in file_dict.keys():
file_dict[filename] += 1
new_path = the_new_path + "\\" + os.path.splitext(filename)[0] + "-" + str(file_dict[filename]) + ".py"
# print(new_path)
else:
file_dict[filename] = 1
new_path = the_new_path + "\\" + filename
po.apply_async(writefile,(each_path,new_path,q))
# writefile(each_path, (new_path + "\\" + os.path.basename(each_path)), file_dict)
# print(file_dict)
number = len(AllFilePath)
nowNum = 0
while True: #这个循环来控制出队
q.get()
nowNum += 1
rate = nowNum/number
print("\r进度:%.2f%%"%(rate*100), end="")
if nowNum == number:
break
#其实在这里也可以不用堵塞,因为上面的while已经堵塞了
po.close()
po.join()
if __name__ == "__main__":
main()
其中有一些模块是我之前学习到的,读者可以阅读python文档来查看模块的具体使用方法。
今天接触到了新的知识就是线程,我们用到了threading模块中的Thread类,这个类的用法和之前的Process模块很像
from threading import Thread
the_number = 0
def work1():
global the_number
for i in range(1000000):
the_number += 1
print("the_number = %d"%the_number)
def work2():
global the_number
for i in range(1000000):
the_number += 1
print("the_number = %d"%the_number)
t = Thread(target = work1)
t.start()
t2 = Thread(target = work2)
t2.start()
线程究竟是什么呢,网上搜索了一下线程的概念:
线程,有时被称为轻量进程(Lightweight Process,LWP),是程序执行流的最小单元。一个标准的线程由线程ID,当前指令指针(PC),寄存器集合和堆栈组成。另外,线程是进程中的一个实体,是被系统独立调度和分派的基本单位,线程自己不拥有系统资源,只拥有一点儿在运行中必不可少的资源,但它可与同属一个进程的其它线程共享进程所拥有的全部资源。一个线程可以创建和撤消另一个线程,同一进程中的多个线程之间可以并发执行。由于线程之间的相互制约,致使线程在运行中呈现出间断性。线程也有就绪、阻塞和运行三种基本状态。就绪状态是指线程具备运行的所有条件,逻辑上可以运行,在等待处理机;运行状态是指线程占有处理机正在运行;阻塞状态是指线程在等待一个事件(如某个信号量),逻辑上不可执行。每一个程序都至少有一个线程,若程序只有一个线程,那就是程序本身。
线程是程序中一个单一的顺序控制流程。进程内有一个相对独立的、可调度的执行单元,是系统独立调度和分派CPU的基本单位指令运行时的程序的调度单位。在单个程序中同时运行多个线程完成不同的工作,称为多线程。
我的理解就是,线程是包含在进程之中的,进程相当于一个鸟巢,而鸟巢中的鸟就是线程,他们各自有各自的生命,但是他们也会去抢同一个鸟巢中的食物(python中的全局变量)。
上面代码的运行结果:
the_number = 1220263
the_number = 1372958
两个线程都是100w次,但是最后的结果为什么不是200w,这就涉及到了操作系统的一个调度的问题,接下来看一张图
那么我们要怎么解决这个问题呢,首先提到了轮询的方法
from threading import Thread
the_number = 0
the_flag = 1
def work1():
global the_number
global the_flag
if the_flag == 1:
for i in range(1000000):
the_number += 1
the_flag = 0
print("the_number = %d"%the_number)
def work2():
global the_number
global the_flag
while True:
if the_flag != 1:
for i in range(1000000):
the_number += 1
print("the_number = %d"%the_number)
break
t = Thread(target = work1)
t.start()
t2 = Thread(target = work2)
t2.start()
work2会等待work1执行完毕后才去执行,但是在这个时候work2会占用cpu,个人感觉还不如去使用单线程
运行结果:
the_number = 1000000
the_number = 2000000
那么为了解决这个问题我们学习了一个新的东西叫做互斥锁(The Mutex),用到的是threading模块当中的Lock类
from threading import Thread,Lock
the_number = 0
def work1():
global the_number
mutex.acquire()
for i in range(1000000):
the_number += 1
mutex.release()
print("the_number = %d"%the_number)
def work2():
global the_number
mutex.acquire()
for i in range(1000000):
the_number += 1
mutex.release()
print("the_number = %d"%the_number)
mutex = Lock()
t = Thread(target = work1)
t.start()
t2 = Thread(target = work2)
t2.start()
运行结果:
the_number = 1000000
the_number = 2000000
Lock创建出来的实例对象用到了两个方法分别是acquire()和release(),作用分别是上锁和解锁,程序的过程就是work1或者work2有一个先上锁了,一把锁只能针对一个work,先抢到锁的会先执行锁中的代码而没抢到锁的会先休息,等待另一个work解锁之后再给自己上锁
接下来谈一下上锁的效率,老师在上课时候讲到了一种新的上锁方式,就是在for循环内上锁,下面看一下效率对比
import datetime
from threading import Thread
the_number = 0
the_flag = 1
def work1():
global the_number
global the_flag
if the_flag == 1:
for i in range(1000000):
the_number += 1
the_flag = 0
print("the_number = %d"%the_number)
def work2():
global the_number
global the_flag
while True:
if the_flag != 1:
for i in range(1000000):
the_number += 1
print("the_number = %d"%the_number)
break
starttime = datetime.datetime.now()
t = Thread(target = work1)
t.start()
t2 = Thread(target = work2)
t2.start()
t.join()
t2.join()
endtime = datetime.datetime.now()
sp = str(endtime - starttime).split(":")[2]
print("Runtime: %.2f seconds"%(float(sp)))
用轮询方式测试,运行结果:
the_number = 1000000
the_number = 2000000
Runtime: 0.33 seconds
接下来是定义在for循环外的互斥锁
import datetime
from threading import Thread,Lock
the_number = 0
def work1():
global the_number
mutex.acquire()
for i in range(1000000):
the_number += 1
mutex.release()
print("the_number = %d"%the_number)
def work2():
global the_number
mutex.acquire()
for i in range(1000000):
the_number += 1
mutex.release()
print("the_number = %d"%the_number)
starttime = datetime.datetime.now()
mutex = Lock()
t = Thread(target = work1)
t.start()
t2 = Thread(target = work2)
t2.start()
t.join()
t2.join()
endtime = datetime.datetime.now()
sp = str(endtime - starttime).split(":")[2]
print("Runtime: %.2f seconds"%(float(sp)))
运行结果:
the_number = 1000000
the_number = 2000000
Runtime: 0.22 seconds
接着是定义在for循环内的互斥锁
import datetime
from threading import Thread,Lock
the_number = 0
def work1():
global the_number
for i in range(1000000):
mutex.acquire()
the_number += 1
mutex.release()
print("the_number = %d"%the_number)
def work2():
global the_number
for i in range(1000000):
mutex.acquire()
the_number += 1
mutex.release()
print("the_number = %d"%the_number)
starttime = datetime.datetime.now()
mutex = Lock()
t = Thread(target = work1)
t.start()
t2 = Thread(target = work2)
t2.start()
t.join()
t2.join()
endtime = datetime.datetime.now()
sp = str(endtime - starttime).split(":")[2]
print("Runtime: %.2f seconds"%(float(sp)))
运行结果:
the_number = 1958549
the_number = 2000000
Runtime: 3.63 seconds
可以看出在某些方面,定义在for循环外的互斥锁效率最高
如果读者有兴趣的话可以用多进程或者多线程实现一下我之前发布的一篇爬取妹子图的例子,自己动手丰衣足食^ _ ^