今天爬了前程无忧,分享下代码~
可以直接运行的,也很简单,就不做注释了。
原创:禁止转载
2019-09-05更新---------------------------
今天有人关注了这个代码 之前的代码都是刚开始学习的时候写的,自己都有点看不下去哈哈,现在贴一个简单些的代码,大家一起进步哈哈~
#coding:utf-8
import json
import time
import urllib
import urllib2
import re
from bs4 import BeautifulSoup
#__author__='小菜菜1223'
def run(num,typ):
url = 'https://search.51job.com/list/120300,000000,0000,00,9,99,' + str(typ) + ',2,' + str(num) + '.html?lang=c&stype=1&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&lonlat=0%2C0&radius=-1&ord_field=0&confirmdate=9&fromType=&dibiaoid=0&address=&line=&specialarea=00&from=&welfare='
a = urllib2.urlopen(url)
html = a.read()
soup = BeautifulSoup(html,'lxml')
main(soup)
def main(soup):
#获取公司名称
account_unitnames = soup.find_all(attrs={'class':'t2'})
for n, v in enumerate(account_unitnames):
if n == 0:continue
print n,v.a['title']
#获取地点
positions = soup.find_all(attrs={'class':'t3'})
for n, v in enumerate(positions):
print n,v.contents
#获取薪资
money = soup.find_all(attrs={'class':'t4'})
for n, v in enumerate(money):
print n,v.contents
#获取日期
date = soup.find_all(attrs={'class':'t5'})
for n, v in enumerate(date):
print n,v.contents
if __name__ == '__main__':
#第一个参数是职位 第二个参数是页数
run('java',0)