python爬取京东笔记本标题、品牌、价格、评论数

import requests
import re
import json
import time
from bs4 import BeautifulSoup
headers={
        'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'
        }

#获取一页编号
def get_bianhao(url):
    res=requests.get(url,headers=headers,timeout=20)
    res.encoding=res.apparent_encoding
    soup=BeautifulSoup(res.text,'html.parser')
    bianhao=soup.select('#J_goodsList > ul > li > div > div.p-img > a')
    bianhao1=list(map(lambda x:x['href'],bianhao))
    bianhaos=[]
    for bh in bianhao1:
        bianhao2=re.findall('//item.jd.com/(\d+).html',bh)
        if bianhao2 != []:
            bianhaos.append(bianhao2[0])
        else:
            pass
    return bianhaos
#获取笔记本标题和品牌
total=[]
def get_shangpin(url):
        res=requests.get(url,headers=headers,timeout=20)
        res.encoding=res.apparent_encoding
        soup=BeautifulSoup(res.text,'html.parser')
        try:
            title=soup.select('body > div > div > div.itemInfo-wrap > div.sku-name')
            title1=title[0].text.strip()
            pinpai=soup.select('#parameter-brand > li > a')
            pinpai1=pinpai[0].text.strip()
            total.append({'标题':title1})
            total.append({'品牌':pinpai1})
            return total
        except:
            total.append({'标题':'空'})
            total.append({'品牌':'空'})
            return total
#获取笔记本价格
def get_jiage(url):  
    rj=requests.get(url,headers=headers,timeout=20)
    jdj=json.loads(rj.text.lstrip('jQuery2224728([').rstrip(']);\n'))
    total.append({'价格':jdj['p']}) 
    return total
#获取笔记本评论数
def get_pinglun(url):  
    r=requests.get(url,headers=headers,timeout=20)
    jd=json.loads(r.text.strip('\''))
    jd1=jd['CommentsCount']
    for com in jd1:
        total.append({'评论':com['CommentCountStr']})
    return total
#主页链接,商品编号,代入到价格和评论数url
if __name__ == '__main__':
    for i in range(1,11):
        URL='https://search.jd.com/search?keyword=%E7%AC%94%E8%AE%B0%E6%9C%AC%E7%94%B5%E8%84%91&enc=utf-8&qrst=1&rt=1&stop=1&vt=2&suggest=1.def.0.V01&wq=%E7%AC%94%E8%AE%B0%E6%9C%AC&page='+str(2 * i - 1)  
        for id in get_bianhao(URL):
            URLS='https://item.jd.com/'+id+'.html'
            print(id)
            get_shangpin(URLS)
            URLJ='https://p.3.cn/prices/mgets?callback=jQuery2224728&type=1&area=1_72_2799_0&pdtk=&pduid=401650232&pdpin=&pin=null&pdbp=0&skuIds=J_'+id+'%2C'
            get_jiage(URLJ)
            URLP='https://club.jd.com/comment/productCommentSummaries.action?referenceIds='+id
            get_pinglun(URLP)
            time.sleep(0.5)

print(total)
import numpy as np
import pandas as pd
df=pd.DataFrame(total)
#df.to_excel('jingdongbijiben.xls')

a=df['品牌'].dropna().values
b=df['价格'].dropna().values
c=df['评论'].dropna().values
d=df['标题'].dropna().values
vv=np.vstack((a,b,c,d)).T
df1=pd.DataFrame(vv,columns=['品牌','价格','评论','标题'])
df1.to_excel('笔记本.xls')

  • 0
    点赞
  • 15
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值