https://www.bilibili.com/video/av12721444/
代码如下(python3版本20180704可用)
#-*-coding:utf-8 -*- from bs4 import BeautifulSoup import urllib #python 3.x中urllib库和urilib2库合并成了urllib库,像你这个 import urlib 然后把urllib2.urlopen() 改成 urllib.request.urlopen()即可 #import sys #reload(sys) #sys.setdefaultencoding('utf-8') url='http://www.dbmeinv.com/?paper_offset=1' def crawl(url): #反爬虫,获取不到想要的内容或者请求失败,模拟模拟器访问,加上头部信息 headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'} req=urllib.request.Request(url,headers=headers) #创建对象 page=urllib.request.urlopen(req,timeout=20) #设置超时 contents=page.read() #print(contents) soup=BeautifulSoup(contents,'html.parser') my_girl=soup.find_all('img') #找到img标签 x=0 for girl in my_girl: #遍历 link=girl.get('src') #获取src路径 print(link) #global x #全局变量 urllib.request.urlretrieve(link,'image\%s.jpg'% x) #下载 x +=1 print("正在下载第%s张"%x) crawl(url) ''' html='' soup=BeautifulSoup(open('a.html'),'html.parser') print(soup.prettify()) ''' ''' html='<title>同学们都很棒</title>' soup=BeautifulSoup(html,'html.parser') #创建对象,解析网页 print(soup.title) ''' #'http://www.dbmeinv.com/?paper_offset=%s'%'2' %占位符 #'http://www.dbmeinv.com/?paper_offset=()'.format()