import requests
import re
def getHTMLTxt(url):
try:
r = requests.get(url,timeout=30)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return ""
def dealHTML(ulist,html):
vpl = re.findall(r'\"view_price\"\:\"[\d\.]*\"',html)
rpl = re.findall(r'\"raw_title\"\:\".*?\"',html)
for i in range(len(vpl)):
vstr = eval(vpl[i].split(":")[1])
rstr = eval(rpl[i].split(":")[1])
ulist.append([vstr,rstr])
def printHTML(ulist,html):
count=0
print("{0:{3}<6}{1:{3}<8}\t{2:{3}^16}".format("序号","价格","商品名称",chr(12288)))
for i in range(len(ulist)):
count=count+1
print("{0:{3}<4}\t{1:{3}^8}\t{2:{3}^16}".format(count,ulist[i][0],ulist[i][1],chr(12288)))
def wTXT(ulist,html):
count=0
f = open(r'C:\Users\lenovo\Desktop\书包价格.txt','w+')
f.write("{0:{3}<6}{1:{3}<8}\t{2:{3}^16}".format("序号","价格","商品名称",chr(12288))+'\n')
for i in range(len(ulist)):
count = count+1
f.write("{0:{3}<4}\t{1:{3}^8}\t{2:{3}^16}".format(count,ulist[i][0],ulist[i][1],chr(12288))+'\n')
f.close()
def main():
page = 3
urlm = "https://s.taobao.com/search?q="
goods = "书包"
ulist = []
for i in range(page):
url = urlm+goods+"&s="+str(44*i)
html = getHTMLTxt(url)
dealHTML(ulist,html)
printHTML(ulist,html)
wTXT(ulist,html)
main()