##尝试爬取当当网上的各种信息
import requests
from bs4 import BeautifulSoup
import time
import json
header = {"Referer":"http://search.dangdang.com/?key=python&%253Bact=input&%253Bpage_index=%7B%7D&_ddclickunion=P-295132-199857_64_0_ZGljdHNfZ29vZ2xl_1%7Cad_type&page_index=3",
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36 EastBrowser/2.1',
"Accept-Language":"zh-CN,zh;q=0.9",
"Accept-Ranges":"bytes",
"Accept":"*/*"}
def get_links(url,list):
wb_data = requests.get(url, headers=header)
soup = BeautifulSoup(wb_data.text, 'lxml')
links = soup.select('p.name > a')#将url链接都放在links里面去
for link in links:
href = link.get("href")#这里取得整个页面的链接
get_inf
当当网数据爬取
最新推荐文章于 2023-10-19 00:18:29 发布