完整代码:
import json
import requests
from bs4 import BeautifulSoup
import csv
import pandas as pd
with open('web.html','r',encoding='gbk') as f:
html=BeautifulSoup(f,'html.parser')
html.list=html.find_all('div',attrs={'class':'e'})
# print(html)
# print(len(html.list))
# print(html.list)
job=[]
for i,item in enumerate(html.list):
try:
print(item.find_all('span',attrs={'class':'jname at'})[0].text)
print(item.find_all('span', attrs={'class': 'sal'})[0].text)
print(item.find_all('span', attrs={'class': 'd at'})[0].text)
# print(i)
# print(item.find('span',attrs={'class':'jname'}).text)
job.append({
'jobname':item.find_all('span',attrs={'class':'jname at'})[0].text,
'jobincome':item.find_all('span', attrs={'class': 'sal'})[0].text,
'jobrequire':item.find_all('span', attrs={'class': 'd at'})[0].text,
})
#写入csv
df=pd.DataFrame()
df['jobname']=item.find_all('span',attrs={'class':'jname at'})[0].text,
df['jobincome']=item.find_all('span', attrs={'class': 'sal'})[0].text,
df['jobrequire']=item.find_all('span', attrs={'class': 'd at'})[0].text,
except:
continue
# header = ['jobname', 'jobincome', 'jobrequire']
df.to_csv('webT.csv',mode='a',header=None,index=None,encoding='utf-8-sig')#写入csv,mode=a+表示追加
with open('web.json','w',encoding='utf-8') as f:
json.dump(job,f,indent=1,ensure_ascii=False)
#写入csv
with open('web.json','r',encoding='utf-8') as f:
data=json.load(f)
with open('web.csv',mode='a',encoding='utf-8-sig',newline='') as f:
writer=csv.writer(f)
# header = ['jobname', 'jobincome', 'jobrequire']
# writer.writerow(header)
for item in data:
writer.writerow([item['jobname'], item['jobincome'], item['jobrequire']])
# for item in range(len(data)):
# writer.writerows(item)
# writer.writerows(data)
f.close()
f.close()