import requests,time,re
from lxml import etree
# 获取前程无忧网所有招聘信息
count = 0
object1 = open('前程无忧.csv', 'w', encoding='GB18030')
for page in range(1, 3):
url = 'https://search.51job.com/list/010000%252C020000%252C030200%252C040000%252C180200,000000,0000,00,9,99,python,2,'+str(page)+'.html?lang=c&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare='
# 注释:假如路径起始于正斜杠( / ),则此路径始终代表到某元素的绝对路径!
response = requests.get(url)#获取网站源码
root = etree.HTML(response.content)#将源码加工成xml格式
big_cate_list = root.xpath("//div[@class='dw_table']/div[@class='el']")#""内容是正则表达式,双引号内部是必须使用单引号;xpath是获取符合正则表达式的内容
for big_cate in big_cate_list:
count = count + 1
big_cate_职位名 = big_cate.xpath("p/span/a/@title")[0]
big_cate_公司名 = big_cate.xpath("span[@class='t2']/a/@title")[0]
big_cate_工作地点 = big_cate.xpath("span[@class='t3']/text()")[0]
money = big_cate.xpath("span[@class='t4']/text()")
big_cate_薪资 = money[0] if money else '面议'
pattern = re.compile('[-万/月年天元千]')
result = pattern.split(big_cate_薪资)#split(字符串)的输出结果是一个列表
print(result[0],result[1])
print(result[1])
time = big_cate.xpath("span[@class='t5']/text()")[0]
big_cate_发布时间 = time if money else '根据个人居住地就近决定'
# print(count, big_cate_职位名, big_cate_公司名, big_cate_工作地点, big_cate_薪资, big_cate_发布时间)
# object1.write(str(count)+","+big_cate_职位名+','+big_cate_公司名+','+big_cate_工作地点+','+big_cate_薪资+','+big_cate_发布时间+'\n')
object1.close()
# object.write('%s' %count)
# object.write(big_cate_alt)
# object.write(big_cate_src)
# object.write(big_cate_intro)
# object.write(big_cate_intro1)
# object.write(big_cate_quote)
# object.write('\n')
python练习
最新推荐文章于 2024-06-18 15:07:16 发布