import requests # 爬虫 爬数据 from urllib import request # 下载图片 from bs4 import BeautifulSoup headers = { 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36 Edg/125.0.0.0' } # 1.数据要么在网页源码 html # 2. 数据要么在接口里面(检查=>网络=>找接口url) url = "https://quote.stockstar.com/stock/stock_index.htm" # 开始爬虫 res = requests.get(url, headers=headers) # result = res.text # 如果是utf-8可以直接解析 # result = res.content.decode() # 解析出现中文乱码 可以用这个 字节类型 => 字符串 result = res.content.decode('GBK') # print(result) # <class 'str'> # 2.解析html源码 soup = BeautifulSoup(result,"lxml") li_list = soup.select('#index_data_0 li') # 遍历每一个li(每一个企业) for li in li_list: # 股票代码 code = li.select('a')[0].text # 企业名称 name = li.select('a')[1].text print(code, name)
bs4练手-gp网
最新推荐文章于 2024-08-17 14:58:02 发布