淘宝商品信息 并且按表格排列。

import requests
import re

headers = {
'cookie': 'l=Aj8/z1CVFeqHt7/Nk9kSI9v3TxnJEZPG; miid=5178119511105888855; cna=cDBEEgUJsxMCARsRgoXUNkvN; x=e%3D1%26p%3D*%26s%3D0%26c%3D0%26f%3D0%26g%3D0%26t%3D0%26__ll%3D-1%26_ato%3D0; lgc=chenweikang_1990; tracknick=chenweikang_1990; t=12b682560f90759a73b124c9b38000ea; tg=0; enc=nsJLb%2F9xre%2BVeCfxdugebbJI5D6FzKxOOR7ZLMScT4tuYsIBFdylHzlVMMXkkK7J2pGFcAvO%2BgcM75E8ykLxzw%3D%3D; UM_distinctid=163d87e9851bc7-0b5a3f8d0abf51-44410a2e-1fa400-163d87e985247e; thw=cn; hng=CN%7Czh-CN%7CCNY%7C156; mt=ci=51_1&np=; v=0; cookie2=3474da1d4675e074883d30f30f28eb02; _tb_token_=edb5eb3db7735; publishItemObj=Ng%3D%3D; dnk=chenweikang_1990; unb=88300548; sg=080; _l_g_=Ug%3D%3D; skt=71d71e2e97ab7fa6; cookie1=AVAFJ88JH306i3uSYmZtRpAphJi3XQS9FIe4hV4wHFU%3D; csg=38ad3de8; uc3=vt3=F8dByRmr2dXaPpbs4bA%3D&id2=W8HZF5qrxsY%3D&nk2=AHLS8YCdcitRxxJGffdxwA%3D%3D&lg2=WqG3DMC9VAQiUQ%3D%3D&sg2=UU6kU9wi8YiZz3JSLQQ4TAWYIhvfrUtILdr9O7XcR1I%3D; existShop=MTUzOTg2MDE2Mg%3D%3D; _cc_=U%2BGCWk%2F7og%3D%3D; _nk_=chenweikang_1990; cookie17=W8HZF5qrxsY%3D; uc1=cookie16=W5iHLLyFPlMGbLDwA%2BdvAGZqLg%3D%3D&cookie21=URm48syIZJTgtchfymSXVA%3D%3D&cookie15=UtASsssmOIJ0bQ%3D%3D&existShop=false&pas=0&cookie14=UoTfItjVbQYLfQ%3D%3D&tag=8&lng=zh_CN; alitrackid=www.taobao.com; lastalitrackid=www.taobao.com; JSESSIONID=C89238CEEEA46E25E007B25207921216; isg=BIyMWY2xOxeJwC0e1WENXq_fXep-bTFsfGo0a-ZNmDfacSx7DtUA_4LDFTlsOWjH',
#referer: https://www.taobao.com/
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
}


def getHTMLtext(url): #获取页面信息
try:
r = requests.get(url,headers=headers) #18.10月起 需要cookie才能正常爬取
print(type(r))
print(r)
r.raise_for_status()
r.encoding = r.apparent_encoding
print(r.encoding)
print(r.apparent_encoding)

return r.text
except:
return ""
def parsepage(ilt,html): #解析页面信息

plt = re.findall(r'"view_price":"[\d.]*"',html) #提取价格的正则 \d. 0-9的数字 []集 *出现无数次 0-9 0-9 0-9.....
tlt = re.findall(r'"raw_title":".*?"',html) #
print(len(plt))
print(len(tlt))
for i in range(len(plt)):

price = eval(plt[i].split(":")[1]) # split()[] ()输入分隔符
title = eval(tlt[i].split(":")[1]) # eval 可以按照格式互相转化 
#print(price)
#print([price, title])
ilt.append([price,title]) #把得来的两个元素组成新的列表 加入ilt列表 
#print(price)
#print(ilt)



def printgoodlist(ilt): #打印出商品信息
tplt ="{:4}\t{:8}\t{:16}" #\n为转行符 \t为制表符  
print(tplt.format("序号","价格","名称")) #按格式打印标题 
count = 0
for g in ilt:
count = count +1
print(tplt.format(count,g[0],g[1])) #打印完了标题 打印相关信息 g遍历ilt中每个小列表 然后打印出第一个和第二个数值

def main(): #主函数
goods = "书包"
depth = 2
start_url = "https://s.taobao.com/search?q=" + goods #通过和关键词的整合 来搜索商品
infolist = []
for i in range(depth): #
try:
url = start_url + "&s=" + str(44 * i) #设置翻页
html = getHTMLtext(url)
parsepage(infolist,html) #解析每个页面 #将返回的html(r.text)和一个空列表传入parsepage()函数
except:
continue
printgoodlist(infolist) #打印出结果
main()

转载于:https://www.cnblogs.com/cwkcwk/p/9784223.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值