面向对象补充:对象中设置值
classFoo(object):def __init__(self):
object.__setattr__(self, 'info', {}) #在对象中设置值的本质
def __setattr__(self, key, value):
self.info[key]=valuedef __getattr__(self, item):print(item)returnself.info[item]
obj=Foo()
obj.name= 'alex'
print(obj.name)
v=[]for i in range(10000):
v.append(i)print(v)
1. 进程
进程间数据不共享
data_list =[]deftask(arg):
data_list.append(arg)print(data_list)defrun():for i in range(10):
p= multiprocessing.Process(target=task,args=(i,))#p = threading.Thread(target=task,args=(i,))
p.start()if __name__ == '__main__':
run()
常用功能:
importtimedeftask(arg):
time.sleep(2)print(arg)defrun():print('111111111')
p1= multiprocessing.Process(target=task,args=(1,))
p1.name= 'pp1'p1.start()print('222222222')
p2= multiprocessing.Process(target=task, args=(2,))
p2.name= 'pp2'p2.start()print('333333333')if __name__ == '__main__':
run()
类继承方式创建进程:
classMyProcess(multiprocessing.Process):defrun(self):print('当前进程',multiprocessing.current_process())defrun():
p1=MyProcess()
p1.start()
p2=MyProcess()
p2.start()if __name__ == '__main__':
run()
2.进程间数据共享: (multiprocessing.Queue , Manager)
importmultiprocessingimportthreading #第一种importqueueimporttime
q=multiprocessing.Queue()deftask(arg,q):
q.put(arg)defrun():for i in range(10):
p= multiprocessing.Process(target=task, args=(i, q,))
p.start()whileTrue:
v=q.get()print(v)if __name__ == '__main__':
run()deftask(arg,dic):
time.sleep(2)
dic[arg]= 100
if __name__ == '__main__':
m=multiprocessing.Manager() #第二种
dic={}
process_list=[]for i in range(10):
p= multiprocessing.Process(target=task, args=(i,dic,))
p.start()
process_list.append(p)whileTrue:
count=0for p inprocess_list:if notp.is_alive():
count+= 1
if count ==len(process_list):break
print(dic)
进程间的数据其他电脑:
lock =multiprocessing.RLock()deftask(arg,):print("鬼子扛枪")
lock.acquire()
time.sleep(2)print(arg)
lock.release()if __name__ == '__main__':whileTrue:
........
3.进程锁:
importtimeimportthreadingimportmultiprocessing
lock=multiprocessing.RLock()deftask(arg):print('鬼子来了')
lock.acquire()
time.sleep(2)print(arg)
lock.release()if __name__ == '__main__':
p1= multiprocessing.Process(target=task,args=(1,))
p1.start()
p2= multiprocessing.Process(target=task, args=(2,))
p2.start()
4.进程池
importtimefrom concurrent.futures importThreadPoolExecutor,ProcessPoolExecutordeftask(arg):
time.sleep(2)print(arg)if __name__ == '__main__':
pool= ProcessPoolExecutor(5)for i in range(10):
pool.submit(task,i)
5.初始爬虫.
安装 : pip3 install requests
pip3 install beautifulsoup4
问题 : 找不到内部指令?
方式一 : C:\Users\SF\AppData\Local\Programs\Python\Python36\Scripts\pip3 install requests
方式二 : C:\Users\SF\AppData\Local\Programs\Python\Python36\Scripts
pip3 install requests
简单爬虫示例:
importrequestsfrom bs4 importBeautifulSoupfrom concurrent.futures importThreadPoolExecutor,ProcessPoolExecutor#模拟浏览器发送请求#内部创建 sk = socket.socket()#和抽屉进行socket连接 sk.connect(...)#sk.sendall('...')#sk.recv(...)
deftask(url):print(url)
r1=requests.get(
url=url,
headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.92 Safari/537.36'}
)#查看下载下来的文本信息
#soup = BeautifulSoup(r1.text,'html.parser')
#print(soup.text)
#content_list = soup.find('div',attrs={'id':'content-list'})
#for item in content_list.find_all('div',attrs={'class':'item'}):
#title = item.find('a').text.strip()
#target_url = item.find('a').get('href')
#print(title,target_url)
defrun():
pool= ThreadPoolExecutor(5)for i in range(1,50):
pool.submit(task,'https://dig.chouti.com/all/hot/recent/%s' %i)if __name__ == '__main__':
run()
进程和线程那个好?
回答是: 线程好
进程池/ 线程池的应用 与爬虫有关