import requests
from bs4 import BeautifulSoup
import json
user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:78.0) Gecko/20100101 Firefox/78.0'
headers = {'User-Agent': user_agent}
r = requests.get('http://seputu.com/', headers=headers)
soup = BeautifulSoup(r.text, 'html.parser')
content = []
for mulu in soup.find_all(class_="mulu"):
h2 = mulu.find('h2')
if h2 != None:
h2_title = h2.string
list = []
for a in mulu.find(class_='box').find_all('a'):
href = a.get('href')
box_title = a.get('title')
list.append({'href':href,'box_title':box_title})
content.append({'title':h2_title,'content':list})
with open('qiye.json','wb') as fp:
json.dump(content,fp=fp,indent=4)
存储html为json格式
最新推荐文章于 2023-05-10 17:34:15 发布