有了简单爬虫,但是效率实在是太慢,于是决定启用线程进行爬取数据
但是对于临界资源的定义不好把握,思路如下:
1、定义队列(Queue的数据结构,List也可,安全性待考究)
demo:https://blog.csdn.net/the_fool_/article/details/80843644
2、对页码数++的部分进行加锁
在此之前,要讲一下继承,网上看了一个博主写的两个demo清晰明了,列在下面:
以下代码为博主转载,个人记录做笔记用,尊重原博主原创,侵删:
原文链接:https://blog.csdn.net/cityzenoldwang/article/details/78336992
1、单继承:
# 定义基本类
class people():
#定义基本属性
name = ''
age = 0
#定义私有属性,私有属性在类外部无法直接进行访问
__weight = 0
#定义构造方法构造器
def __init__(self,n,a,w):
self.name = n
self.age = a
self.__weight = w
def speak(self):
print("%s 说: 我 %d 岁。" %(self.name,self.age))
# 定义单继承类
class student(people):
grade = ''
def __init__(self,n,a,w,g):
#调用父类的构造函数
people.__init__(self,n,a,w)
self.grade = g
# 覆盖写入父类的方法
def speak(self):
print("%s 说: 我 %d 岁了,我在读 %d 年级"%(self.name,self.age,self.grade))
s = student('ken',10,60,3)
s.speak()
2、多继承
#类定义
class people:
#定义基本属性
name = ''
age = 0
#定义私有属性,私有属性在类外部无法直接进行访问
__weight = 0
#定义构造方法
def __init__(self,n,a,w):
self.name = n
self.age = a
self.__weight = w
def speak(self):
print("%s 说: 我 %d 岁。" %(self.name,self.age))
#单继承示例
class student(people):
grade = ''
def __init__(self,n,a,w,g):
#调用父类的构函
people.__init__(self,n,a,w)
self.grade = g
#覆写父类的方法
def speak(self):
print("%s 说: 我 %d 岁了,我在读 %d 年级"%(self.name,self.age,self.grade))
#另一个类,多重继承之前的准备
class speaker():
topic = ''
name = ''
def __init__(self,n,t):
self.name = n
self.topic = t
def speak(self):
print("我叫 %s,我是一个演说家,我演讲的主题是 %s"%(self.name,self.topic))
#多重继承
class sample(speaker,student):
a =''
def __init__(self,n,a,w,g,t):
student.__init__(self,n,a,w,g)
speaker.__init__(self,n,t)
test = sample("Tim",25,80,4,"Python")
test.speak() #方法名同,默认调用的是在括号中排前地父类的方法
好了,知道了继承的语法,看多线程的代码就顺利多了,正文开始:
总结中。。。。
https://www.cnblogs.com/smallmars/p/7149507.html
https://blog.csdn.net/sunhuaqiang1/article/details/70168015
1、两种基本使用方式
方法一
from threading import Thread
import time
def sayhi(name):
time.sleep(2)
print("hello"+name)
if __name__ == '__main__':
t=Thread(target=sayhi,args=('hh',))
t.start()
print('主线程')
方法二
from threading import Thread
import time
class Sayhi(Thread):
def __init__(self,name):
super().__init__()
self.name=name
def run(self):
time.sleep(2)
print('%s say hello' % self.name)
if __name__ == '__main__':
t = Sayhi('hh')
t.start()
print('主线程')
线程安全demo1:
import time
import threading
def addNum():
global num #在每个线程中都获取这个全局变量
#num-=1
temp=num
time.sleep(0.1)
num =temp-1 # 对此公共变量进行-1操作
num = 100 #设定一个共享变量
thread_list = []
for i in range(100):
t = threading.Thread(target=addNum)
t.start()
thread_list.append(t)
for t in thread_list: #等待所有线程执行完毕
t.join()
print('Result: ', num)
import threading
R=threading.Lock()
R.acquire()
'''
对公共数据的操作
'''
R.release()
线程安全demo2:
import threading
import time
class myThread (threading.Thread):
def __init__(self, threadID, name, counter):
threading.Thread.__init__(self)
self.threadID = threadID
self.name = name
self.counter = counter
def run(self):
print ("开启线程: " + self.name)
# 获取锁,用于线程同步
threadLock.acquire()
print_time(self.name, self.counter, 3)
# 释放锁,开启下一个线程
threadLock.release()
def print_time(threadName, delay, counter):
while counter:
time.sleep(delay)
print ("%s: %s" % (threadName, time.ctime(time.time())))
counter -= 1
threadLock = threading.Lock()
threads = []
# 创建新线程
thread1 = myThread(1, "Thread-1", 1)
thread2 = myThread(2, "Thread-2", 2)
# 开启新线程
thread1.start()
thread2.start()
# 添加线程到线程列表
threads.append(thread1)
threads.append(thread2)
# 等待所有线程完成
for t in threads:
t.join()
print ("退出主线程")