#coding:utf8
import re
import requests
import urllib
from lxml import etree
city = urllib.quote("北京") #把utf8编码的\x改为适合网址的%
kw = "python"
url = """http://sou.zhaopin.com/jobs/searchresult.ashx?\
jl=%s&kw=%s&sm=0&sg=27ce606676a743128f9fbb1fa5dd09e7&p=1"""%(city,kw)
ret = requests.get(url)
reg = '<td class="Jobname">.*?href="(.*?)".*?</span>.*?</td>'
urlAll = re.findall(reg,ret.content,re.S)
for url1 in urlAll:
ret1 = requests.get(url1)
cont1 = ret1.content
title = re.findall('<td colspan="2">.*?<h1>(.*?)</h1>.*?</td>',cont1,re.S)
cmName = re.findall('<td colspan="2">.*?<h2>.*?<a target="_blank" href=".*?>(.*?)</a></h2>.*?</td>',cont1,re.S)
money = re.findall('<tr>.*?职位月薪:</td>.*?<td valign="top">(.*?)</td>.*?</tr>',cont1,re.S)
position = re.findall('td class=.*?>工作地点:</td>.*?<td.*?<a.*?>(.*?)</a>',cont1,re.S)
print title[0],"+",cmName[0],"+月薪:",money[0],"+工作地点:",position[0]
import re
import requests
import urllib
from lxml import etree
city = urllib.quote("北京") #把utf8编码的\x改为适合网址的%
kw = "python"
url = """http://sou.zhaopin.com/jobs/searchresult.ashx?\
jl=%s&kw=%s&sm=0&sg=27ce606676a743128f9fbb1fa5dd09e7&p=1"""%(city,kw)
ret = requests.get(url)
reg = '<td class="Jobname">.*?href="(.*?)".*?</span>.*?</td>'
urlAll = re.findall(reg,ret.content,re.S)
for url1 in urlAll:
ret1 = requests.get(url1)
cont1 = ret1.content
title = re.findall('<td colspan="2">.*?<h1>(.*?)</h1>.*?</td>',cont1,re.S)
cmName = re.findall('<td colspan="2">.*?<h2>.*?<a target="_blank" href=".*?>(.*?)</a></h2>.*?</td>',cont1,re.S)
money = re.findall('<tr>.*?职位月薪:</td>.*?<td valign="top">(.*?)</td>.*?</tr>',cont1,re.S)
position = re.findall('td class=.*?>工作地点:</td>.*?<td.*?<a.*?>(.*?)</a>',cont1,re.S)
print title[0],"+",cmName[0],"+月薪:",money[0],"+工作地点:",position[0]