import requests
import re
import traceback
import cchardet
class taobao_test():
def __init__(self):
self.headers = {'authority': 's.taobao.com', 'cache-control': 'max-age=0', 'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36',
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'sec-fetch-site': 'same-origin', 'sec-fetch-mode': 'navigate', 'sec-fetch-user': '?1',
'sec-fetch-dest': 'document', 'accept-language': 'zh-CN,zh;q=0.9',
'cookie': 't=a29d623a8f843100e07a63fa6a9be7ba; hng=CN%7Czh-CN%7CCNY%7C156; thw=cn; _uab_collina=159425481667321500770523; enc=9k7j%2BefQC8DY0alPn6s1Qbz46U%2FdE2Gx6O5BWuJzsMWrKaO78Lg0E7NUzrhm%2B1NA%2FEiWvg5F%2FgKjcgthXhL6hf3tDTWy4ketlUj6Yv%2FUMtI%3D; _m_h5_tk=0acf5876dddb30e57299fa1407e2c930_1594299384839; _m_h5_tk_enc=a15cf27eb68d099f08337ee4e959149d; cna=Ah+LF0eH8SgCAXPI7lIwMTDC; tfstk=cq71BdNX7AD_oMEq71NeQIQLGgLlawcWGl9G1MlrSK6Ek71ppsf-zLOEsiDxJ3dC.; mt=ci=0_1; miid=1887535964136107701; UM_distinctid=173380f98c721c-0d53f3349bb708-3b634404-1fa400-173380f98c84e2; _samesite_flag_=true; cookie2=12ff06ecb739e1bc76dfeb455bfe4d5e; _tb_token_=5553bee7e13e6; v=0; alitrackid=www.taobao.com; lastalitrackid=www.taobao.com; unb=2208491329497; uc3=nk2=AQWXT8s4%2FnI%3D&vt3=F8dBxGPozEqMcvXi%2B%2F4%3D&id2=UUphwoPq8l6RSWmJuw%3D%3D&lg2=WqG3DMC9VAQiUQ%3D%3D; csg=f1edc9ec; lgc=baibo_10; cookie17=UUphwoPq8l6RSWmJuw%3D%3D; dnk=baibo_10; skt=e72529c01c753298; existShop=MTU5NDc5MzI5OA%3D%3D; uc4=id4=0%40U2grGRvmz3awCzNAzYX1fCs5tlpkNbS8&nk4=0%40A6jN89MCWyt2dd100r8Ufl3OMg%3D%3D; tracknick=baibo_10; _cc_=URm48syIZQ%3D%3D; _l_g_=Ug%3D%3D; sg=076; _nk_=baibo_10; cookie1=UoM%2BHZ3d9KVW%2FcqOuJndb4N9gmDoxQfoATwuiyb0MCI%3D; sgcookie=E7rpYI1B8csTVgxratdKp; JSESSIONID=B243796F8BFBEBF83614D43A87D99F4D; uc1=cookie14=UoTV6OZXG1oslA%3D%3D&cookie16=VFC%2FuZ9az08KUQ56dCrZDlbNdA%3D%3D&existShop=false&cookie21=VT5L2FSpdiBh&cookie15=V32FPkk%2Fw0dUvg%3D%3D&pas=0; l=eBQmYnO4Ojps2AjCBOfwourza77OSIRAguPzaNbMiOCPO35p58mVWZlFjGY9C3GVh6VeR379TqKgBeYBqIv4n5U62j-la_kmn; isg=BKqqAMuTgWv1KA0Pv1eUOgTP-xBMGy515em90TRjVv2IZ0ohHKt-hfCd95P7l6YN',
}
def num_page(self):
init_url = num_url.format(search_key=search_key, s=0)
status, html, redirected_url= self.downloader(url=init_url, headers=self.headers)
page_count = self.parsePage(html)
print(f'{search_key}的商品总页数:{page_count}')
return
def downloader(self, url, timeout=10, headers=None, params=None, debug=False, binary=False):
"""
:rtype: object
"""
_headers = {'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; ''Windows NT 6.1; Win64; x64; Trident/5.0)', }
redirected_url = url
if headers:
_headers = headers
try:
r = requests.get(url, headers=_headers, params=params, timeout=timeout)
if binary:
html = r.content
else:
# encoding = cchardet.detect(r.content)['encoding']
# html = r.content.decode(encoding, errors='ignore')
html = r.text
status = r.status_code
redirected_url = r.url
except:
if debug:
traceback.print_exc()
msg = 'failed download: {}'.format(url)
print(msg)
if binary:
html = b''
else:
html = ''
status = 0
return status, html, redirected_url
def parsePage(self, html):
try:
re_url = re.compile(r'\"totalPage\":(.*?),')
totalPage = re_url.findall(html)[0].strip()
return totalPage
except:
return ""
if __name__ == '__main__':
num_url = 'https://s.taobao.com/search?q={search_key}&s={s}'
search_key = 'ipad'
begin = taobao_test()
begin.num_page()
python项目实例——登录淘宝拿总页数
最新推荐文章于 2023-01-16 14:46:30 发布