import requests
from bs4 import BeautifulSoup
import re
import csv
def soup_url(url,bm,ck):
h={"user-agent":"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36"}
h["cookie"]=ck
r=requests.get(url,headers=h)
r.encoding=bm
soup=BeautifulSoup(r.text,"html.parser")
return soup
url="https://www.lagou.com/wn/"
bm="utf-8"
ck="index_location_city=%E5%85%A8%E5%9B%BD; JSESSIONID=ABAABJAABIEABFB50F928A7AA3026053B2C454F54F3C967; WEBTJ-ID=20240621145007-190398f3e8cb4-0a76489a0e0c0e-45410429-2073600-190398f3e8dd; sajssdk_2015_cross_new_user=1; sensorsdata2015session=%7B%7D; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%22190398f43845f-084e3629e4909c-45410429-2073600-190398f4385275%22%2C%22first_id%22%3A%22%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E5%BC%95%E8%8D%90%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC%22%2C%22%24latest_referrer%22%3A%22https%3A%2F%2Fwww.hao123.com%2F%3Ftn%3D49055317_1_hao_pg%22%2C%22%24os%22%3A%22Windows%22%2C%22%24browser%22%3A%22Chrome%22%2C%22%24browser_version%22%3A%2286.0.4240.198%22%7D%2C%22%24device_id%22%3A%22190398f43845f-084e3629e4909c-45410429-2073600-190398f4385275%22%7D"
soup=soup_url(url,bm,ck)
s=str(soup.head)
'''
print(s)
'''
re_t=re.compile(r'<title>([\-\u4e00-\u9fa5\u3000-\u303f]+)</title>')
title=re_t.search(s)
print(title.group(1))
dds=[]
divs=soup.find_all("div",{"class":"companyItem__1JIlL"})
for div in divs:
gs=div.a["title"]
wz=div.p.a["href"]
pm=div.find_all("h4")
js=pm[0].span.string
gg=pm[1].string
ss=pm[0].text.split()[2]
pm2=div.find_all("a")
mspj=pm2[2].text
zzzw=pm2[3].text
jlcll=pm2[4].text
lb=[gs,wz,js,gg,ss,mspj,zzzw,jlcll]
dds.append(lb)
try:
with open("c://Users//student//Desktop//dyz.csv","w",newline="",encoding=bm) as f:
w=csv.writer(f)
h=["公司","网址","介绍","广告","上市公司","面试评价","在招职位","简历处理率"]
w.writerow(h)
w.writerow(dds)
except:
print("可能是文集键打开错误,或数据写入错误")
with open ("c://Users//student//Desktop//dyz.csv","r",encoding=bm) as f:
r=csv.reader(f)
bt=next(r)
books=[]
for n in r:
books.append(n)
for n in books:
print(n[0],n[2],n[3],n[4],n[5],n[6],n[7])
08-04
441
08-11
4453