标题:
1.Queue线程安全队列:
# encoding=utf-8
from queue import Queue
import time
import threading
# # 队列有四个
#q = Queue(4)
# for x in range(4):
# q.put(x)
#
# for x in range(4):
# print(q.get())
# # get 默认阻塞
# q.get(block=True)
# # put 一直阻塞,直到不满
# q.put(block=True)
def set_value(q):
index = 0
while True:
q.put (index)
index += 1
time.sleep(3)
def get_value(q):
while True:
print(q.get())
def main():
q = Queue(4)
# args 需一个列表或参数
t1 = threading.Thread(target=set_value,args=[q])
t2 = threading.Thread(target=get_value,args=[q])
t1.start()
t2.start()
if __name__ == '__main__':
main()
演练: 爬取表情包(使用xpath方法)
# xpath
# encoding=utf-8
import requests
from lxml import etree
from urllib import request
# os : 获取文件后缀名
import os
import re
def parse_page(url):
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
'72.0.3626.81 Safari/537.36 SE 2.X MetaSr 1.0'
}
response = requests.get(url,headers=headers)
# text 自动解码正确
# print(response.text)
text = response.text
html = etree.HTML(text)
imgs = html.xpath('//div[@class="col-sm-9 center-wrap"]//img[@class!="gif"]')
for img in imgs:
img_url = img.get('data-original')
alt = img.get('alt')
alt = re.sub(r'[\??\.,。!!]','',alt)
# extension : 扩展名 进行分割
suffix = os.path.splitext(img_url)[1]
filename = alt + suffix
request.urlretrieve(img_url,'biaoqingbao/'+filename)
def main():
for x in range(1,18):
url = 'https://www.doutula.com/photo/list/?page={}'.format(x)
parse_page(url)
break
if __name__ == '__main__':
main()