直接上代码。初步获取url内容。添加header防止返回403错误。
import urllib2
url = "https://www.zhipin.com/job_detail/?query=php&scity=101010100&industry=&position="
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}
req = urllib2.Request(url=url,headers=headers)
html = urllib2.urlopen(req).read()
# req = urllib.request.Request(url="http://en.wikipedia.org"+pageUrl)
print html