import requests from bs4 import BeautifulSoup from fake_useragent import UserAgent #京东图书信息抓取 def getImg(isbn): url = 'https://search.jd.com/Search?keyword='+isbn+'&enc=utf-8&pvid=ce5ca7fd71e34233be029f688ac48bce' ua = UserAgent() headers = {'User-Agent':ua.random} data = requests.get(url, headers=headers) data.encoding = 'utf-8' #乱码转码 #print(data.text) soup = BeautifulSoup(data.text,'lxml') img_div = soup.find_all('div',{'class':'p-img'}) #print(len(img_div)) imgs = [] for index in range(len(img_div)): imgs.append('http:'+img_div[index].find('img').get('source-data-lazy-img')) return imgs if __name__=='__main__': print('京东图书信息抓取开始。。。') imgs = getImg('9787533735609') print(imgs) print('京东网图书信息抓取结束。。。')
#以上代码仅供学习交流参考