import time
//************* 单线程
def func():
for i in range(100):
print("函数内", i)
if __name__ == '__main__': # 程序的入口
for i in range(100):
print("函数外", i)
func()
//************ 多线程
//****** 第一种
from threading import Thread # 线程
# 1. 定义好. 线程要做哪些任务
def func():
for i in range(1000):
print("子线程", i)
# 2. 写main, 创建子线程
if __name__ == '__main__': # 要写这个
# 主线程继续执行下去.
for i in range(1000): # 主线程
print("主线程", i)
t1 = Thread(target=func) # 创造线程
t2 = Thread(target=func)
t1.start() # 启动一个线程
t2.start()
//*************
def func(url):
# 编写爬虫的工作
print("我要编写爬虫的工作", url)
if __name__ == '__main__':
urls = ["第一个", "第二个", "第三个"]
for u in urls:
# 注意, 线程不是创建的越多就越好. CPU核心数 * 4
t = Thread(target=func, args=(u, )) # args可以给线程传递参数. 但是必须是元组.
# t = Thread(target=func, args=("麦合",))
t.start()
//*** 面向对象 第二种
class MyThread(Thread): # 自己定义一个类. 继承Thread
def __init__(self, name):
super(MyThread, self).__init__()
self.name = name
def run(self): # 固定的. # 必须要编写run方法
for i in range(1000):
print(self.name, i)
if __name__ == '__main__':
t1 = MyThread("线程1")
t2 = MyThread("线程2")
t1.start()
t2.start()
// 线程池
from concurrent.futures import ThreadPoolExecutor
def func(name,t):
time.sleep(t)
for i in range(100):
print(name, i)
def fn(res):
print(res)
if __name__ == '__main__':
with ThreadPoolExecutor(10) as t:
for i in range(100):
t.submit(func,f"麦合{i},2").add_done_callback(fn) # 把返回值给fn
t.submit(func, f"麦合哈{i},1").add_done_callback(fn)
t.submit(func, f"麦{i},3").add_done_callback(fn)
# t.submit(func, task).add_done_callback(fn) # 直接提交任务
# 缺点:callback返回的值是按照当时情况返回的。有可能会乱~
result = t.map(func, ["麦合","麦合哈","麦"],["2","1","3"]) # map:映射
for item in result:
print(item) # print 的顺序:麦合 麦合哈 麦 与睡眠时间无关!!!跟map里面传进去的顺序有关系
//********** 北京新发地 多线程
import requests
from lxml import etree
from concurrent.futures import ThreadPoolExecutor
import time
# csv: 逗号分隔内容的文件
# 周润发,李嘉诚,李佳琦,
f = open("data.csv", mode="w", encoding='utf-8')
def download_xinfadi(url):
resp = requests.get(url)
content = resp.text
tree = etree.HTML(content)
# tr_list = tree.xpath("//table[@class='hq_table']/tr")[1:]
tr_list = tree.xpath("//table[@class='hq_table']/tr[position()>1]")
for tr in tr_list: # 每一行
tds = tr.xpath("./td/text()")
f.write(",".join(tds))
f.write("\n")
if __name__ == '__main__':
start = time.time()
with ThreadPoolExecutor(30) as t:
for i in range(1, 16):
url = f"http://www.xinfadi.com.cn/marketanalysis/0/list/{i}.shtml"
# download_xinfadi(url)
t.submit(download_xinfadi, url)
print("多线程用了", time.time() - start)
start = time.time()
for i in range(1, 16):
url = f"http://www.xinfadi.com.cn/marketanalysis/0/list/{i}.shtml"
download_xinfadi(url)
print("单线程用了", time.time() - start)
f.close()
//**************** 多进程
from multiprocessing import Process
from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor
def func(name):
for i in range(1000):
print(name, i)
if __name__ == '__main__':
p1 = Process(target=func, args=("进程1",))
p2 = Process(target=func, args=("进程2",))
p1.start()
p2.start()
# 多线程:任务相对统一,荷香结构特别相似(一模一样)
# 多进程:任务相互独立
# 多个任务极其雷同. 使用多线程
# 多个任务几乎无关联的情况用多进程
# 免费的IP代理池. 这三个任务,完全不一样,那就用多进程。但是,每个人物都重复的进行操作,内部之间用多线程
# 1. 去各大免费代理ip网站去抓取IP
# 2. 验证各个IP是否可用
# 3. 准备对外的接口
//************* 多进程,多线程,案例(合并)
"""
以下言论仅限今天的案例:
进程 1. 访问主页面, 在主页面中拿到详情页的url.
进入到详情页. 在详情页中提取到图片的下载地址
进程 2. 批量的下载图片
进程之间的通信
队列
"""
import requests
from urllib import parse # 转化
from lxml import etree
from multiprocessing import Process, Queue
from concurrent.futures import ThreadPoolExecutor
def get_img_src(q):
url = "http://www.591mm.com/mntt/6.html"
resp = requests.get(url)
resp.encoding = 'utf-8'
# print(resp.text)
tree = etree.HTML(resp.text)
href_list = tree.xpath("//div[@class='MeinvTuPianBox']/ul/li/a[1]/@href")
for href in href_list:
# http://www.591mm.com/mntt/6.html
# /mntt/hgmn/307626.html
# 拼接url地址
child_url = parse.urljoin(url, href)
# print(child_url)
resp_child = requests.get(child_url)
resp_child.encoding = "utf-8"
child_tree = etree.HTML(resp_child.text)
src = child_tree.xpath("//img[@id='mouse_src']/@src")[0]
q.put(src) # 往里怼
q.put("OK了")
def download(url):
file_name = url.split("/")[-1]
with open(file_name, mode="wb") as f:
resp = requests.get(url)
f.write(resp.content) # 完成下载
def download_all(q):
# 在进程里创建线程池
with ThreadPoolExecutor(10) as t:
while 1:
src = q.get() # 往出拿
if src == "OK了":
break
print(src)
t.submit(download, src)
if __name__ == '__main__':
q = Queue()
p1 = Process(target=get_img_src, args=(q,))
p2 = Process(target=download_all, args=(q,))
p1.start()
p2.start()
多线程
于 2023-12-25 16:56:33 首次发布