爬取股票信息并保存到自己电脑上
import requests
from bs4 import BeautifulSoup
import re
def getHTMLText(url, code='utf-8'):
try:
r = requests.get(url, timeout=30)
r.raise_for_status()
r.encoding = code
return r.text
except:
return ""
def getStockList(lst, stockURL):
html = getHTMLText(stockURL, 'GB2312')
soup = BeautifulSoup(html, 'html.parser')
a = soup.find_all('a')
for i in a:
try:
href = i.attrs['href']
lst.append(re.findall(r'[s][hz]\d{6}', href)[0])
except:
continue
return ""
def getStockInfo(lst, stockURL, fpath):
count = 0
for stock in lst:
url = stockURL + stock + ".html"
html = getHTMLText(url)
try:
if html == "":
continue
infoDict = {}
soup = BeautifulSoup(html, 'html.parser')
stockInfo = soup.find('div', attrs={'class': 'stock-bets'})
name = stockInfo.find_all(attrs={'class': 'bets-name'})[0]
infoDict.update({'股票名称': name.text.split()[0]})
keyList = stockInfo.find_all('dt')
valueList = stockInfo.find_all('dd')
for i in range(len(keyList)):
key = keyList[i].text
val = valueList[i].text
infoDict[key] = val
with open(fpath, 'a', encoding='utf-8') as f:
f.write(str(infoDict) + '\n')
count = count + 1
print('\r当前进度:{:.2f}%'.format(count * 100 / len(lst)), end='')
except:
count = count + 1
print('\r当前进度:{:.2f}%'.format(count * 100 / len(lst)), end='')
continue
def main():
stock_list_url = 'http://quote.eastmoney.com/stocklist.html'
stock_info_url = 'https://gupiao.baidu.com/stock/'
output_file = 'E://BaiduStockInfo.txt'
slist = []
getStockList(slist, stock_list_url)
getStockInfo(slist, stock_info_url, output_file)
main()
获取大学排名
'''
1.从网络上获取大学排名网页内容getHTMLText()
2.提取网页内容中信息到合适的数据结构fillUnivlIST()
3.利用数据结构展示并输出结果PRINTuNIVlIST()
'''
import requests
from bs4 import BeautifulSoup
import bs4
def getHTMLText(url):
try:
r = requests.get(url,timeout = 30)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return ""
def fillUnivList(ulist,html):
soup = BeautifulSoup(html,"html.parser")
for tr in soup.find('tbody').children:
if isinstance(tr,bs4.element.Tag):
tds = tr('td')
ulist.append([tds[0].string,tds[1].string,tds[2].string])
def printUnivList(ulist,num):
tplt = "{0:^10}\t{1:{3}^10}\t{2:^10}"
print(tplt.format("排名","学校","总分",chr(12288)))
for i in range(num):
u = ulist[i]
print(tplt.format(u[0],u[1],u[2],chr(12288)))
def main():
uinfo = []
url = 'http://www.zuihaodaxue.cn/zuihaodaxuepaiming2016.html'
html = getHTMLText(url)
fillUnivList(uinfo,html)
printUnivList(uinfo,20)
main()
参考学习网址:
https://www.icourse163.org/course/BIT-1001870001