import gevent
import gevent.monkey
gevent.monkey.patch_all()
import requests
from lxml import html
import re
import json
import csv
word_lists=["麦斯威尔"]
# key_word="资生堂"
for j in range(len(word_lists)):
# def main(word):
url="https://list.tmall.com/search_product.htm?q={}".format(word_lists[j])
print(url)
# try:
htmll=requests.get(url).text
# print(html)
etree = html.etree
html1=etree.HTML(htmll)
print(len(html1.xpath('//div[@class="product-iWrap"]')))
print("++++++++++++++++++")
for i in range(3):
# for i in range(len(html1.xpath('//div[@class="product-iWrap"]'))):
product_url=html1.xpath('//div[@class="product-iWrap"]//*[@class="productTitle" or contains(@class,"productTitle")]/a[1]/@href')[i]
print(product_url)
product_id=re.findall('[&\?]id=(.*?)&',product_url,re.S)[0]
print(product_id)
sell_id=re.findall('user_id=(.*?)&',product_url,re.S)[0]
url1 = "https://detail.tmall.com/item.htm?id&#
Python-天猫数据爬虫
最新推荐文章于 2024-05-01 08:56:31 发布