最近在b站上学习了python基础编程入门,其中比较有意思的是用python写爬虫的项目
代码如下:
import urllib.request
from bs4 import BeautifulSoup
import re
import ssl
import os
def find_img(page_url):
html = openurl(page_url)
soup = BeautifulSoup(html, "html.parser")
pic_addres = []
for each in soup.find_all(class_=re.compile("lazy")):
# print(each.attrs['data-original'])
pic_addres.append(each.attrs['data-original'])
print('next page')
return pic_addres
def openurl(url):
req = urllib.request.Request(url)
req.add_header('User-Agent','Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36')
#使用代理ip
# proxy_ = urllib.request.ProxyHandler('','')
# opener = urllib.request.build_opener(proxy_)
# urllib.request.install_opener(opener)
response