# 进程、线程
# 进程是资源单位,每一个进程至少要有一个线程
# 线程是执行单位
# if __name__ == '__main__':
# print('hello')
# 多线程实现方法一
from threading import Thread # 多线程
def func():
for i in range(1000):
print('func',i)
if __name__ == '__main__':
t = Thread(target=func) #创建线程并给线程安排认为
t.start() # 多线程状态为可以开始工作状态,具体的执行时间由CPU决定
for i in range(1000):
print('main',i)
# 方法二
# class MyThread(Thread):
# def run(self): # 固定的 ->当线程被执行之后,被执行的就是run
# for i in range(1000):
# print('子线程',i)
#
#
# if __name__ == '__main__':
# t = MyThread()
# t.start() # 开启线程
# for i in range(1000):
# print('主线程',i)
# 新发地实战
# 1.如何提取单个页面的数据
# 2.上线程池,多个页面同时抓取
import requests
from concurrent.futures import ThreadPoolExecutor
def download_one_page(url):
print(url)
response = requests.post(url, data=kw)
data1 = response.json()['list']
for i in data1:
print(i)
# response.encoding='UTF-8'
# print(response.text)
pass
if __name__ == '__main__':
# download_one_page('http://www.xinfadi.com.cn/getPriceData.html')
with ThreadPoolExecutor(50) as t:
for i in range(1,10):
kw = {
'limit': '20',
'current': i,
}
t.submit(download_one_page,f'http://www.xinfadi.com.cn/getPriceData.html')
pass