注:这个代码,以前确实能运行,但是最近那个e+网城访问不了了,所以,现在也就运行不了了,不过可以拿来参考,思想很简单
import requests
import bs4
from bs4 import BeautifulSoup
import re
import time
def getHtmlSrc(url):#获取页面文档结构
#proxies={"http":"60.191.201.38:45461"}
header={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.22 Safari/537.36 SE 2.X MetaSr 1.0'}
try:
r=requests.get(url,timeout=20,headers=header)
r.raise_for_status()
r.encoding=r.apparent_encoding
return r.text
except:
print("获得页面失败")
def formatHtml(html):#处理格式化#获取详细页面标签
soup=BeautifulSoup(html,"html.parser")
had=soup.find(text=re.compile("<li>.*?</li>"))
msg=BeautifulSoup(had,"html.parser")
return msg
name=[]#名字
num=[]#销量
last_price=[]#原价
now_price=[]#现价
def dealKno(msg):
lis=msg.select("li div")
for item in lis:#遍历每一个div标签
if isinstance(item,bs4.element.Tag):
na=item.find("a",{"class":"sort-tit"})
if na!=None:
name.append(na.string)
else:
name.append("-1")
nu=item.find("b")
if nu!=None:
num.append(nu.string)
else:
num.append("-1")
last=item.find("em")
if last!=None:
last_price.append(last.string)
else:
last_price.append("-1")
now=item.find("span")
if now!=None:
now_price.append(now.string)
else:
now_price.append("-1")
def disPlay():
for i in range(len(name)):
print("商品名称是({}) 商品销售量是({}) 商品原价是({}) 商品现价是({})".format(name[i],num[i],last_price[i],now_price[i]))
# def writeFile():
# dir="D:\\goodsInfo\\"
# if os.path.exists(dir):
# os.mkdir(dir)
# path=dir+"info.txt"
# if os.path.exists(path):
# with open(path,"w") as f:
# f.write()
def main():
for index in range(2,259):#259
time.sleep(1)
url="http://e.chychg.com/app/index.php?i=1604&c=entry&p=list&do=shop&m=sz_yi&page="+str(index)
html=getHtmlSrc(url)
msg=formatHtml(html)
dealKno(msg)
print(index)
disPlay()
if __name__ == '__main__':
main()