import requests
import os
class GuBa:
def __init__(self, page):
self.run(page)
def run(self, page):
'''
http://so.eastmoney.com/web/s?keyword=%E5%AE%B6%E7%94%B5%E8%A1%8C%E4%B8%9A&pageindex=1
http://so.eastmoney.com/web/s?keyword=%E5%AE%B6%E7%94%B5%E8%A1%8C%E4%B8%9A&pageindex=2
'''
path = "guba/"
if not os.path.exists(path):
os.makedirs(path)
for p in range(page):
pn = str(p + 1)
base_url = "http://so.eastmoney.com/web/s?keyword=%E5%AE%B6%E7%94%B5%E8%A1%8C%E4%B8%9A&pageindex=" + pn
response = requests.get(base_url)
html = response.text
with open(path + pn + ".html", "w", encoding="utf-8") as f:
f.write(html)
if __name__ == '__main__':
GuBa(10)
股吧网站翻页爬取前十页
最新推荐文章于 2023-02-28 20:59:23 发布