import requests from bs4 import BeautifulSoup import re import pandas as pd import time def ask_html(url,headers): response = requests.get(url,headers=headers).text response.encode('GBK') return response def bs_html(html): soup = BeautifulSoup(html, "html.parser") table = soup.find("tbody") trs = table.find_all('tr') lists=[] for tr in trs: tr=str(tr) #每页改成20个字符串,让每个串对应一支票 find_code = re.compile(r'.*?target="_blank">(.*?)</a></td>',re.S) codename = find_code.findall(tr) #代码,名称 code = codename[0] name = codename[1] present_price = re.compile('<td class="c-rise">(.*?)</td>').findall(tr) #现价,涨跌幅(%),涨跌,振幅(%) price = present_price[0] updown = present_price[1] amount = re.findall('<td>(.*?)</td>',tr) #成交额,流通股,流通市值,市盈率 number =amount[0] #序号 Turnover = amount[-5] outstanding = amount[-4] market_value = amount[-3] pe = amount[-2] # print(code,name,price,updown,Turnover,outstanding,market_value,pe) list = [number,code,name,price,updown,Turnover,outstanding,market_value,pe] lists.append(list) df=pd.DataFrame(lists, index=None,columns = ['序号','代码','名称','现价','涨跌幅(%)','成交额','流通股','流通市值','市盈率']) print(df) if __name__ == '__main__': # url = "http://q.10jqka.com.cn/" for i in range(1,234): #234页 url = r"http://q.10jqka.com.cn/index/index/board/all/field/zdf/order/desc/page/{}/ajax/1/".format(i) print(url) headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36" } reponse = ask_html(url,headers) bs_html(reponse) time.sleep(1)
【无标题】为什么只能爬取第一页,打开第二页可以爬第二页
最新推荐文章于 2023-04-15 21:29:28 发布