爬取搜狐股票
完整程序
利用搜狐新闻的股票列表,构造url爬取信息
import requests
from bs4 import BeautifulSoup
import json
import csv
def getnum():
html = requests.get("https://q.stock.sohu.com/cn/bk_3137.shtml")#获取想要的股票号码
html.raise_for_status
text = html.text
soup = BeautifulSoup(text,'html.parser')
tdL1 = soup.find_all('td',attrs={"class": "e1"})
tdL2 = soup.find_all('td',attrs={"class": "e2"})
numL =[]
for td1,td2 in zip(tdL1,tdL2):
try:
numL.append([td1.text,td2.text])
except:
continue
return numL#返回所有股票号码
def getgupiao(numL):
for num in numL:
try:
url = 'https://q.stock.sohu.com/hisHq?code=cn_'+num[0]+'&stat=1&order=D&period=d&callback=historySearchHandler&rt=jsonp&0.13888967033291877'
r = requests.get(url)
r.raise_for_status()
r.encoding = "gbk"
html = r.text[21:-2]#去BOM头
data = json.loads(html)
datalist = data[0]['hq']
with open(num[1]+'.csv', "w",newline='') as csvFile:#写入股票信息
csvWriter = csv.writer(csvFile)
csvWriter.writerow(['日期','开盘','收盘','涨跌额','涨跌幅 ','最低','最高','成交量(手)','成交金额(万)','换手率'])
for data in datalist:
csvWriter.writerow(data)
csvFile.close
print(num[1],'爬取成功')
except:
continue
def main():
numL = getnum()
getgupiao(numL)
print("爬取完成!")
main()