python项目实例——登录淘宝拿总页数

import requests
import re
import traceback
import cchardet


class taobao_test():
	def __init__(self):
		self.headers = {'authority': 's.taobao.com', 'cache-control': 'max-age=0', 'upgrade-insecure-requests': '1',
			'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36',
			'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
			'sec-fetch-site': 'same-origin', 'sec-fetch-mode': 'navigate', 'sec-fetch-user': '?1',
			'sec-fetch-dest': 'document', 'accept-language': 'zh-CN,zh;q=0.9',
			'cookie': 't=a29d623a8f843100e07a63fa6a9be7ba; hng=CN%7Czh-CN%7CCNY%7C156; thw=cn; _uab_collina=159425481667321500770523; enc=9k7j%2BefQC8DY0alPn6s1Qbz46U%2FdE2Gx6O5BWuJzsMWrKaO78Lg0E7NUzrhm%2B1NA%2FEiWvg5F%2FgKjcgthXhL6hf3tDTWy4ketlUj6Yv%2FUMtI%3D; _m_h5_tk=0acf5876dddb30e57299fa1407e2c930_1594299384839; _m_h5_tk_enc=a15cf27eb68d099f08337ee4e959149d; cna=Ah+LF0eH8SgCAXPI7lIwMTDC; tfstk=cq71BdNX7AD_oMEq71NeQIQLGgLlawcWGl9G1MlrSK6Ek71ppsf-zLOEsiDxJ3dC.; mt=ci=0_1; miid=1887535964136107701; UM_distinctid=173380f98c721c-0d53f3349bb708-3b634404-1fa400-173380f98c84e2; _samesite_flag_=true; cookie2=12ff06ecb739e1bc76dfeb455bfe4d5e; _tb_token_=5553bee7e13e6; v=0; alitrackid=www.taobao.com; lastalitrackid=www.taobao.com; unb=2208491329497; uc3=nk2=AQWXT8s4%2FnI%3D&vt3=F8dBxGPozEqMcvXi%2B%2F4%3D&id2=UUphwoPq8l6RSWmJuw%3D%3D&lg2=WqG3DMC9VAQiUQ%3D%3D; csg=f1edc9ec; lgc=baibo_10; cookie17=UUphwoPq8l6RSWmJuw%3D%3D; dnk=baibo_10; skt=e72529c01c753298; existShop=MTU5NDc5MzI5OA%3D%3D; uc4=id4=0%40U2grGRvmz3awCzNAzYX1fCs5tlpkNbS8&nk4=0%40A6jN89MCWyt2dd100r8Ufl3OMg%3D%3D; tracknick=baibo_10; _cc_=URm48syIZQ%3D%3D; _l_g_=Ug%3D%3D; sg=076; _nk_=baibo_10; cookie1=UoM%2BHZ3d9KVW%2FcqOuJndb4N9gmDoxQfoATwuiyb0MCI%3D; sgcookie=E7rpYI1B8csTVgxratdKp; JSESSIONID=B243796F8BFBEBF83614D43A87D99F4D; uc1=cookie14=UoTV6OZXG1oslA%3D%3D&cookie16=VFC%2FuZ9az08KUQ56dCrZDlbNdA%3D%3D&existShop=false&cookie21=VT5L2FSpdiBh&cookie15=V32FPkk%2Fw0dUvg%3D%3D&pas=0; l=eBQmYnO4Ojps2AjCBOfwourza77OSIRAguPzaNbMiOCPO35p58mVWZlFjGY9C3GVh6VeR379TqKgBeYBqIv4n5U62j-la_kmn; isg=BKqqAMuTgWv1KA0Pv1eUOgTP-xBMGy515em90TRjVv2IZ0ohHKt-hfCd95P7l6YN',
			}

	def num_page(self):
		init_url = num_url.format(search_key=search_key, s=0)
		status, html, redirected_url= self.downloader(url=init_url, headers=self.headers)
		page_count = self.parsePage(html)
		print(f'{search_key}的商品总页数:{page_count}')
		return

	def downloader(self, url, timeout=10, headers=None, params=None, debug=False, binary=False):
		"""

		:rtype: object
		"""
		_headers = {'User-Agent': 'Mozilla/5.0 (compatible; MSIE 9.0; ''Windows NT 6.1; Win64; x64; Trident/5.0)', }
		redirected_url = url
		if headers:
			_headers = headers
		try:
			r = requests.get(url, headers=_headers, params=params, timeout=timeout)
			if binary:
				html = r.content
			else:
				# encoding = cchardet.detect(r.content)['encoding']
				# html = r.content.decode(encoding, errors='ignore')
				html = r.text
			status = r.status_code
			redirected_url = r.url
		except:
			if debug:
				traceback.print_exc()
			msg = 'failed download: {}'.format(url)
			print(msg)
			if binary:
				html = b''
			else:
				html = ''
			status = 0
		return status, html, redirected_url

	def parsePage(self, html):
		try:
			re_url = re.compile(r'\"totalPage\":(.*?),')
			totalPage = re_url.findall(html)[0].strip()
			return totalPage
		except:
			return ""


if __name__ == '__main__':
	num_url = 'https://s.taobao.com/search?q={search_key}&s={s}'
	search_key = 'ipad'
	begin = taobao_test()
	begin.num_page()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值