先上图:
源码:
from urllib import request
from bs4 import BeautifulSoup
import os
# 新建文件夹
curPath = os.getcwd()
temPath = 'tu2345'
targetPath = curPath+os.path.sep+temPath
if not os.path.exists(targetPath):
os.makedirs(targetPath)
print("tiantang新建成功")
else:
print("路径已经存在")
#get_img函数是取数据的
def get_img(list_url):
response = request.urlopen(list_url)
page = response.read()
html_doc = page.decode('gbk')
soup = BeautifulSoup(html_doc, "html5lib")
#print(soup)
div = soup.find('div',class_='main_cont')
# for name in div.find_all('p'):
# print(name)
# n1 = name.find("p",attrs={"style","text-align: center"})
# n2 = n1.find('img').get('src')
# img_path = "tu2345/"+n2.string+".jpg"
i = 0
for name in div.find_all('img'):
i += 1
#print(name.get('alt'))
print(name.get('src'))
img_path = "tu2345/" + str(i)+".jpg"
#将远程数据下载到本地,第二个参数就是要保存到本地的文件名
request.urlretrieve(name.get('src'), img_path)
print("----第"+str(i)+"张保存完成----")
#获取网页
url="http://news.duote.com/55/162784.html"
get_img(url)
print("当页网址:"+url)
初学,作记号。