import requests
from bs4 import BeautifulSoup
import traceback #异常相关的库
import re
#获得url页面
def getHTMLText(url):
try:
r=requests.get(url,timeout=30)
r.raise_for_status()#解决乱码问题
r.encoding=r.apparent_encoding
return r.text
except:
return ""
#获取股票列表(将股票的代码保存在lst里面)
def getStockList(lst,stockURL):
html=getHTMLText(stockURL)
soup=BeautifulSoup(html,'html.parser')
a=soup.find_all('a')
for i in a:
try:
href=i.attrs['href']
print(href)
#lst.append(re.findall(r"[s][hz]\d{6}",href)[0])
lst.append(re.findall(r"[01].\d{6}",href)[0])
print(re.findall(r"[01].\d{6}",href)[0])
except:
continue
#获取股票信息
def getStockInfo(lst,stockURL,fpath):
for stock in lst:
url=stockURL+"sz"+stock+"/nc.shtml"
html=getHTMLText(url)
try:
if html=="":
continue
infoDict={}
soup=BeautifulSoup(html,'html.parser')
stockInfo=soup.find('div',attrs={'class':'hq_L M'})
name=stockInfo.find_all(attrs={'class':'c8_name'})[0]
#将股票名称添加到字典中
infoDict.update({'股票名称':name.text.split()[0]})
keyList = stockInfo.find_all('th')
valueList = stockInfo.find_all('td')
for i in range(len(keyList)):
key=keyList[i].text
val=valueList[i].text
infoDict[key]=val
#将相关的信息保存在文件中
with open(fpath,'a',encoding='utf-8') as f:
f.write(str(infoDict)+'\n')
except:
traceback.print_exc()
continue
def main():
stock_list_url='http://quote.eastmoney.com/center/gridlist.html#hs_a_board' #东方财富网
stock_info_url='http://finance.sina.com.cn/realstock/company/' #新浪股票
#输出文件保存在d盘根目录下
output_file='D://stock.txt'
slist=[]
getStockList(slist,stock_list_url)#获取股票列表
getStockInfo(slist,stock_info_url,output_file)#获取股票信息
main()