import re
import time
from urllib.parse import urlencode
import requests
#手动复制浏览器内的cookies
cookie = "登录后的cookies"
cookie = dict([i.split('=',1) for i in cookie.split(";")])
class TaoBao():
def __init__(self,query):
self.headers ={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.131 Safari/537.36"}
self.cookies = cookie
self.base_url = "https://s.taobao.com/search?"
self.query = query
def run(self):
url_list = self.get_url()
for url in url_list:
print(url)
res = self.get_message(url)
self.get_parse(res)
def get_parse(self,res):
pattern = r'"raw_title":"(.*?)","pic_url":".*?","detail_url":"(.*?)","view_price":"(.*?)","view_fee":".*?","item_loc":"(.*?)","view_sales":"(.*?)","comment_count":"(.*?)","user_id":".*?","nick":"(.*?)","shopcard"'
s = re.compile(pattern,re.S)
# ('Apple/苹果 iPhone XS Max XS 手机全新港版美版日版韩版有锁分期', '//item.taobao.com/item.htm?id\\u003d577133467672\\u0026ns\\u003d1\\u0026abbucket\\u003d1#detail', '4399.00', '广东 深圳', '752人付款', '5508', 'wh80008')
detail_message_list = s.findall(res.content.decode())
for detail_message in detail_message_list:
item = {}
item['raw_title']=detail_message[0]
item['detail_url']=detail_message[1]
item['view_price']=detail_message[2]
item['item_loc']=detail_message[3]
item['view_sales']=detail_message[4]
item['comment_count']=detail_message[5]
item['nick']=detail_message[6]
print(item)
def get_message(self,url):
res = requests.get(url,headers=self.headers,cookies=self.cookies)
return res
def get_url(self):
for i in range(2):
time.sleep(3)
params = {
"q": self.query,
"bcoffset": "4",
"p4ppushleft": "%2C48",
"ntoffset": "4",
"s": str(48 * i),
}
url = self.base_url + urlencode(params)
yield url
print("第", i+1, "下载结束")
#需要查询的产品的名字
taobao = TaoBao("电脑")
taobao.run()