(1)
code:获取图片path及图片下面的名字
通过find_all("img)获取所有path url
from bs4 import BeautifulSoup
import requests
url = "https://car.autohome.com.cn//pic/series/5605-53.html#pvareaid=2042220"
source = requests.get(url).text
soup = BeautifulSoup(source,'lxml')
details= soup.find_all("img") # 找所有的img
for i in details:
print("======================================================")
print(i)
name = str(i).rsplit("alt=")[1].rsplit(" src=")[0][1:-1]
pic_path = str(i).rsplit("src=")[1][1:].rsplit(" title=")[0][:-1]
pic_url = "https://" + pic_path
print(pic_url)
print:
======================================================
<img alt="2021款 R" src="//car2.autoimg.cn/cardfs/product/g24/M0A/00/40/240x180_0_q95_c42_autohomecar__Chtk3WCU5BWAYilJAB_CUi1NNDw970.jpg" title="2021款 R"/>
https:car2.autoimg.cn/cardfs/product/g24/M0A/00/40/240x180_0_q95_c42_autohomecar__Chtk3WCU5BWAYilJAB_CUi1NNDw970.jpg
======================================================
<img alt="" height="80" sizes="80px" src="//x.autoimg.cn/car/images/loginrb1x.png" srcset="//x.autoimg.cn/car/images/loginrb1x.png
80w, //x.autoimg.cn/car/images/loginrb2x.png 160w" width="80"/>
https:x.autoimg.cn/car/images/loginrb1x.png" srcset="//x.autoimg.cn/car/images/loginrb1x.png
80w, //x.autoimg.cn/car/images/loginrb2x.png 160w" width="80"/
网页截图:
2、
code:通过find class_="page",获取text
from bs4 import BeautifulSoup
import requests
url = "https://car.autohome.com.cn//pic/series/5605-53.html#pvareaid=2042220"
source = requests.get(url).text
soup = BeautifulSoup(source,'lxml')
details_page= soup.find(class_="page") #@@@@@
print(details_page.text) #@@@@@
txt_path = "./url/" + str("fdas") + ".txt"
cou =1
for i in details_page:
# print(str(i))
if "href" in str(i) and "class" not in str(i):
print(i)
next_page = "https://car.autohome.com.cn/"# + i.title()
for j in range(10):
cou += 1
new_url = url + str(cou)
print:
上一页12345下一页
<a href="/pic/series/5605-53-p2.html">2</a>
<a href="/pic/series/5605-53-p3.html">3</a>
<a href="/pic/series/5605-53-p4.html">4</a>
<a href="/pic/series/5605-53-p5.html">5</a>
3、
from bs4 import BeautifulSoup
import requests
url = "https://car.autohome.com.cn//pic/series/5605-53.html#pvareaid=2042220"
source = requests.get(url).text
soup = BeautifulSoup(source,'lxml')
details_page= soup.find(class_="page") #@@@@@
print(details_page.text) #@@@@@
txt_path = "./url/" + str("fdas") + ".txt"
cou =1
for i in details_page:
if "href" in str(i) and "class" not in str(i):
print(i.contents) ####
next_page = "https://car.autohome.com.cn/" #+ i.
for j in range(10):
cou += 1
new_url = url + str(cou)
print():
上一页12345下一页
['2']
['3']
['4']
['5']