废话先不多说,先上代码:
import requests
from bs4 import BeautifulSoup
import os,re
import sys
reload(sys)
sys.setdefaultencoding('utf8')
url = 'http://www.bfpgf.com/yld'
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = { 'User-Agent' : user_agent }#写一个headers
def get_soup(url):#获取bs4的对象
r = requests.get(url,headers=headers) # 使用requests获取网页,在这添加请求头,应对反爬虫
print r.status_code
soup = BeautifulSoup(r.content, 'html.parser') # 把页面处理成bs的对象,好进行下一步操作
return soup#返回bs4对象
def download_img(url,page_number):
soup = get_soup(url)
img_urls = soup.article.find_all('img')
numerb_of_arr = len(img_urls)#获取当前页面图片的数量
title = soup.find_all('