直接上代码
import requests
from bs4 import BeautifulSoup
class Spider:
def getHtmlData(self):
url = "http://datachart.500.com/pls/history/inc/history.php?limit=100"
response = requests.get(url)
# html = response.content.decode("utf8")
html = response.content
self.getDataFromHtml(html)
def getDataFromHtml(self,html):
soup = BeautifulSoup(html,"html.parser")
cfont2_list = soup.select(".chart .t_tr1 .cfont2")
for number in cfont2_list:
# print(number.string)
self.saveData(number.string)
def saveData(self,numberStr):
with open("number_info.txt",'a',encoding="utf8") as f:
f.write(numberStr)
f.write("\n")
f.flush()
def beginSpider(self):
self.getHtmlData()
if __name__=="__main__":
spider = Spider()
spider.beginSpider()
使用Google浏览器打开网址:http://datachart.500.com/pls/history/history.shtml
然后点击右键-检查,如下图显示: