I'm essentially using urllib to open the desired webpage for each
stock in the argument list and reading the full contents of the html
code for that page. Then I'm slicing that in order to find the quote
I'm looking for.
以下是^{}和requests中的实现:import requests
from bs4 import BeautifulSoup
def get_quotes(*stocks):
quotelist = {}
base = 'https://finance.google.com/finance?q={}'
for stock in stocks:
url = base.format(stock)
soup = BeautifulSoup(requests.get(url).text, 'html.parser')
quote = soup.find('span', attrs={'class' : 'pr'}).get_text().strip()
quotelist[stock] = float(quote)
return quotelist
print(get_quotes('AAPL', 'GE', 'C'))
{'AAPL': 160.86, 'GE': 23.91, 'C': 68.79}
# 1 loop, best of 3: 1.31 s per loop
如评论中所述,您可能需要查看multithreading或{a3}。在
使用grequests发出异步HTTP请求:
^{pr2}$
更新:这是Dusty Phillips的python3面向对象编程的一个修改版本,它使用了内置的threading模块。在from threading import Thread
from bs4 import BeautifulSoup
import numpy as np
import requests
class QuoteGetter(Thread):
def __init__(self, ticker):
super().__init__()
self.ticker = ticker
def run(self):
base = 'https://finance.google.com/finance?q={}'
response = requests.get(base.format(self.ticker))
soup = BeautifulSoup(response.text, 'html.parser')
try:
self.quote = float(soup.find('span', attrs={'class':'pr'})
.get_text()
.strip()
.replace(',', ''))
except AttributeError:
self.quote = np.nan
def get_quotes(tickers):
threads = [QuoteGetter(t) for t in tickers]
for thread in threads:
thread.start()
for thread in threads:
thread.join()
quotes = dict(zip(tickers, [thread.quote for thread in threads]))
return quotes
tickers = [
'A', 'AAL', 'AAP', 'AAPL', 'ABBV', 'ABC', 'ABT', 'ACN', 'ADBE', 'ADI',
'ADM', 'ADP', 'ADS', 'ADSK', 'AEE', 'AEP', 'AES', 'AET', 'AFL', 'AGN',
'AIG', 'AIV', 'AIZ', 'AJG', 'AKAM', 'ALB', 'ALGN', 'ALK', 'ALL', 'ALLE',
]
%time get_quotes(tickers)
# Wall time: 1.53 s