《2018年7月17日》【连续289天】
标题:使用BeautifulSoup练习京东商品爬取;
内容:
1.
import requests
from urllib.parse import urlencode
from bs4 import BeautifulSoup
from requests.exceptions import RequestException
def find_goods(page,s,keyword):
params = {
'keyword': keyword,
'enc' :'utf-8',
'qrst': '1',
'rt': '1',
'stop': '1',
'vt' :'2',
'wq' :keyword,
'page':page,
's':s,
'click':'0',
}
url ='https://search.jd.com/Search?'+urlencode(params)
headers ={'User-Agent':'User-Agent: Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1'}
try:
response = requests.get(url, headers =headers)
response.encoding="utf-8"
if response.status_code == 200:
return response
return None
except RequestException:
return None
def print_goods(r):
soup =BeautifulSoup(r.text,'lxml')
so=soup.find(name='ul',attrs={"class":['gl-warp','clearfix']})
for s in so.find_all(name='a'):
print('http:'+s['href'])
def main(keyword):
keyword=keyword
page =1
s =1
for i in range(5):
page +=2*i
s +=53*i
re =find_goods(page,s,keyword)
print_goods(re)
if __name__ == "__main__":
keyword="手表"
main(keyword)