python通过关键字搜索淘宝商品详细信息

最新推荐文章于 2024-08-09 10:04:21 发布

消灭八阿哥

最新推荐文章于 2024-08-09 10:04:21 发布

阅读量1.5w

点赞数 2

分类专栏： python

本文链接：https://blog.csdn.net/lx91216/article/details/77989081

版权

python 专栏收录该内容

3 篇文章 0 订阅

订阅专栏

# coding=utf-8
import urllib.request
import re
from urllib.request import urlopen, urlretrieve

#打开网页，获取网页内容
def url_open(url):
    headers=("user-agent","Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.22 Safari/537.36 SE 2.X MetaSr 1.0")
    opener=urllib.request.build_opener()
    opener.addheaders=[headers]
    urllib.request.install_opener(opener)
    data=urllib.request.urlopen(url).read().decode("utf-8","ignore")
    return data

if __name__=='__main__':
    try:
        #定义要查询的商品关键词
        keywd="女朋友礼物"
        keywords=urllib.request.quote(keywd)
        #定义要爬取的页数
        num=100
        for i in range(num):
            url="https://s.taobao.com/search?q="+keywords+"&imgfile=&commend=all&ssid=s5-e&search_type=item&sourceId=tb.index&spm=a21bo.50862.201856-taobao-item.1&ie=utf8&bcoffset=4&ntoffset=4&p4ppushleft=1%2C48&s="+str(i*44)
            data=url_open(url)
            #定义各个字段正则匹配规则
            img_pat='"pic_url":"(//.*?)"'
            name_pat='"raw_title":"(.*?)"'
            nick_pat='"nick":"(.*?)"'
            price_pat='"view_price":"(.*?)"'
            fee_pat='"view_fee":"(.*?)"'
            sales_pat='"view_sales":"(.*?)"'
            comment_pat='"comment_count":"(.*?)"'
            city_pat='"item_loc":"(.*?)"'
            #查找满足匹配规则的内容，并存在列表中
            imgL=re.compile(img_pat).findall(data)
            nameL=re.compile(name_pat).findall(data)
            nickL=re.compile(nick_pat).findall(data)
            priceL=re.compile(price_pat).findall(data)
            feeL=re.compile(fee_pat).findall(data)
            salesL=re.compile(sales_pat).findall(data)
            commentL=re.compile(comment_pat).findall(data)
            cityL=re.compile(city_pat).findall(data)

            for j in range(len(imgL)):
                img="http:"+imgL[j]#商品图片链接
                name=nameL[j]#商品名称
                nick=nickL[j]#淘宝店铺名称
                price=priceL[j]#商品价格
                fee=feeL[j]#运费
                sales=salesL[j]#商品付款人数
                comment=commentL[j]#商品评论数，会存在为空值的情况
                if(comment==""):
                    comment=0
                city=cityL[j]#店铺所在城市
                print('正在爬取第'+str(i)+"页，第"+str(j)+"个商品信息...")
                description=str(price)+"#"+str(sales)+"#"+nick+"#"+name
                try:
                    urllib.request.urlretrieve(img,'e://taobao//'+description+".jpg")
                except Exception as e:
                    print(e)
    except Exception as e:
        print(str(e))
    print("任务完成")