BeautifulSoup需要掌握:
1.find()方法
2.find_all()方法
requests基本掌握.get就行。
代码如下:
import requests
from bs4 import BeautifulSoup
url = "https://www.umei.cc/meinvtupian/meinvxiezhen/"
resp = requests.get(url)
resp.encoding = "gbk", "gb232"
#print(resp.text)
soup = BeautifulSoup(resp.text, "html.parser")
fath_page = soup.find("div", class_="TypeList").find_all("a", class_="TypeBigPics")
n = 1
for a in fath_page:
# print(a.get("href"))
c_resp = requests.get(a.get("href"))
c_resp.encoding="utf-8"
#print(c_resp)
c_soup=BeautifulSoup(c_resp.text,"html.parser")
c_page = c_soup.find("div",class_="ImageBody").find("img").get("src")
#c_url = c_page.get("href")
new_url = requests.get(c_page).content
with open(f"{n}.jpg" ,mode="wb") as f:
f.write(new_url)
print(f"下载了{n}张")
n+=1
beta版本:(嘻嘻,有待改进。)
import requests
from bs4 import BeautifulSoup
url = "https://www.umei.cc/meinvtupian/meinvxiezhen/"
resp = requests.get(url)
resp.encoding = "gbk", "gb232"
#print(resp.text)
soup = BeautifulSoup(resp.text, "html.parser")
fath_page = soup.find("div", class_="TypeList").find_all("a", class_="TypeBigPics")
for a in fath_page:
for i in range(2, 20):
#print(a.get("href"))
str_url = str(a.get("href"))[0:-4]
#print(str_url)
stu=str_url+"_"+str(i)+".htm"
print(stu)
def get_stu(stu):
try:
c_resp=requests.get(stu)
except AttributeError:
c_resp.encoding="utf-8"
print(c_resp)
c_soup=BeautifulSoup(c_resp.text,"html.parser")
c_page = c_soup.find("div",class_="ImageBody").find("img").get("src")
#print(c_page)
new_url = requests.get(c_page).content
return new_url
with open(f"{n}.jpg" ,mode="wb") as f:
f.write(new_url)
print(f"下载了{n}张")
n+=1