数组越界解决不了,请各位指教,直接上代码,这是跟着视频敲的,但是我的就是有问题,在线等答案
import re
import requests
from bs4 import BeautifulSoup
import sys
def getHtmlPage(url):
try:
r=requests.get(url, timeout=10)
r.raise_for_status()
r.encoding=r.apparent_encoding
return r.text
except:
s = sys.exc_info()
print(("Error '%s' happened on line %d")% (s[1],s[2].tb_lineno))
def getID(id, url):
html=getHtmlPage(url)
soup=BeautifulSoup(html,"html.parser")
a=soup.find_all("a")
for i in a:
try:
# i.attrs 是字典类型,属性名是键,属性值是值,但是 attrs 使用时出错,直接用 get 获得属性值
h = i.get("href")
f = re.compile(r"[s][zh]\d{6}")
b = f.findall(h)
c = re.findall(r"[0-9]\d{5}", str(b)) # 得到 ['900923'] 前面的 sh 标识是哪个股市的
if b != []:
id.append(b)
except:
s = sys.exc_info()
print(s[1], s[2].tb_lineno)
print("lalalalallala")
def getInfo(id, htmlurl, fpath):
count = 1
for i in id:
url = htmlurl + "".join(i) + ".html" # 一定要将列表转换为字符串
html = getHtmlPage(url)
try:
infodict = {}
soup = BeautifulSoup(html, "html.parser")
stockInfo = soup.find("div", attrs={"class": "stock-bets"}) # 最外层 div
# 这里得到的是一个 tag 对象集所以要取第一个值
stockname = soup.find_all("a", attrs={"class": "bets-name"})[0]
infodict.update({"股票名称": stockname.text.split()[0]})#
keylist = stockInfo.find_all("dt")
valuelist = stockInfo.find_all("dd")
for i in range(len(valuelist)):
infodict[keylist[i].text] = valuelist[i].text # 存入字典,数组越界在这里!
with open(fpath, "a", encoding="utf-8") as f:
f.write(str(infodict) + '\n')
count = count + 1 # 只爬取20个页面的,太多了电脑遭不住
if count > 20:
break
except:
s = sys.exc_info()
print(s[1],s[2].tb_lineno)
continue
def main():
baiduURl = "https://gupiao.baidu.com/stock/"
dongfangURl = "http://quote.eastmoney.com/stocklist.html"
fpath = "F:/la.txt"
id = []
getID(id, dongfangURl)
getInfo(id, baiduURl, fpath)
print("lalal")
main()
错误提示信息
所得到的文本结果