import re
import asyncio
from bs4 import BeautifulSoup
import pyppeteer as pyp
async def antiAntiCrawler(page): #为page添加反反爬虫手段
await page.setUserAgent('Mozilla/5.0 (Windows NT 6.1; \Win64; x64)')
await page.evaluateOnNewDocument('()=>{Object.defineProperties(navigator,\{webdriver:{get:()=>false}})}')
async def getStockCodes(page):
codes=[]
html=await page.content()
soup=BeautifulSoup(html,"html.parser")
for x in soup.find_all("li"):
a=x.find("a")
if ("(" in a.text and ")" in a.text):
codes.append(a.text)
return codes
async def getStockInfo(url):
broser = await pyp.launch(headless=False, executablePath='D:\chromium\chrome-win\chrome.exe') #启动Chromium,非隐藏启动,路径根据自己情况而定
page = await broser.newPage() #在浏览器中打开一个新页面
await antiAntiCrawler(page)
await page.goto(url) #装入url对应的网页
codes=await getStockCodes(page)
for x in codes[:3]:
print("-----",x)
pos1,pos2=x.index("("),x.index(")")
code=x[pos1+1:pos2]
url="http://quote.eastmoney.com/sh"+code+".html"
await page.goto(url)
html=await page.content()
pt=r'<td>([^<]*)</td>.*?<td[^>]*id="gt\d*?"[^>]*>([^<]*)</td>'
for x in re.findall(pt,html,re.DOTALL):
print(x[0],x[1])
await broser.close()
url="https://www.banban.cn/gupiao/list_sh.html"
loop=asyncio.get_event_loop()
loop.run_until_complete(getStockInfo(url))
python网络爬虫之每日股票交易信息
最新推荐文章于 2024-07-31 14:30:10 发布