基本算是一个中小型爬虫,2000多股票信息全爬取,不知道发爬虫是否合乎规矩,因此稍微隐藏了两行超简单的代码,略懂python的人应该很轻松能补上。
import requests
import re
import time
import traceback
from random import uniform
from bs4 import BeautifulSoup
# 单个页面获取页面内容
def get_html_text(url, code='utf-8'):
try:
r = requests.get(url, timeout=30)
r.raise_for_status()
r.encoding = code
return r.text
except BaseException as b:
print(b)
# 根据页面中的a标签,获取股票编码信息
def get_stock_list(lst, stock_url):
html = get_html_text(stock_url, code='GB2312')
soup = BeautifulSoup(html, 'html.parser')
al = soup.find_all('a')
for a in al:
try:
href