Python Boss

1、cookie经常更换

2、请求速度进行限制

3、请求地址在js里

#coding:utf-8
import json
import time
import urllib
import urllib2
import re
from bs4 import BeautifulSoup
import requests
 
#__author__='小菜菜1223'
 
 
def run(h, page):
    url = "https://www.zhipin.com/c101010100/b_%E9%BE%99%E5%B2%97%E5%8C%BA/?query=%E6%B7%B1%E5%9C%B3+%E5%A4%96%E8%B4%B8&page="+str(page)+"&ka=page-"+str(page) 
    headers = {'referer':'https://www.zhipin.com/job_detail/?query=%E6%B7%B1%E5%9C%B3+%E5%A4%96%E8%B4%B8&city=101010100&industry=&position=',
               'Cookie':'lastCity=101010100; _uab_collina=155894826851450523301609; sid=sem_pz_bdpc_dasou_title; __c=1588755259; __g=sem_pz_bdpc_dasou_title; __l=l=%2Fwww.zhipin.com%2Fbeijing%2F%3Fsid%3Dsem_pz_bdpc_dasou_title&r=https%3A%2F%2Fsp0.baidu.com%2F9q9JcDHa2gU2pMbgoY3K%2Fadrc.php%3Ft%3D06KL00c00fDdiHC088qh0KZEgs77XeFX000007hOm-C00000LIIMxh.THdBULP1doZA8QMu1x60UWdBmy-bIfK15yRknWT3nHfknj0sn1TduhR0IHdjPW64rDmznDRLnH0znH9jwWPAPHm3fH6drHbdwbRvr0K95gTqFhdWpyfqn1nYn1mLnjDzniusThqbpyfqnHm0uHdCIZwsT1CEQLILIz4lpA-spy38mvqVQ1q1pyfqTvNVgLKlgvFbTAPxuA71ULNxIA-YUAR0mLFW5Hn4nH0s%26tpl%3Dtpl_11534_21264_17382%26l%3D1516420953%26attach%3Dlocation%253D%2526linkName%253D%2525E6%2525A0%252587%2525E5%252587%252586%2525E5%2525A4%2525B4%2525E9%252583%2525A8-%2525E6%2525A0%252587%2525E9%2525A2%252598-%2525E4%2525B8%2525BB%2525E6%2525A0%252587%2525E9%2525A2%252598%2526linkText%253DBOSS%2525E7%25259B%2525B4%2525E8%252581%252598%2525E2%252580%252594%2525E2%252580%252594%2525E6%252589%2525BE%2525E5%2525B7%2525A5%2525E4%2525BD%25259C%2525EF%2525BC%25258C%2525E6%252588%252591%2525E8%2525A6%252581%2525E8%2525B7%25259F%2525E8%252580%252581%2525E6%25259D%2525BF%2525E8%2525B0%252588%2525EF%2525BC%252581%2526xp%253Did(%252522m3343670121_canvas%252522)%25252FDIV%25255B1%25255D%25252FDIV%25255B1%25255D%25252FDIV%25255B1%25255D%25252FDIV%25255B1%25255D%25252FDIV%25255B1%25255D%25252FH2%25255B1%25255D%25252FA%25255B1%25255D%2526linkType%253D%2526checksum%253D140%26ie%3Dutf-8%26f%3D3%26tn%3Dbaidu%26wd%3Dboss%25E7%259B%25B4%25E8%2581%2598%25E5%25AE%2598%25E7%25BD%2591%26oq%3Dlagou%26rqlang%3Dcn%26inputT%3D2119%26prefixsug%3Dboss%26rsp%3D0&g=%2Fwww.zhipin.com%2Fbeijing%2F%3Fsid%3Dsem_pz_bdpc_dasou_title&friend_source=0&friend_source=0; __zp_seo_uuid__=9b582547-62f6-40da-bc9c-07ee84e05e24; Hm_lvt_194df3105ad7148dcf2b98a91b5e727a=1588755259,1588755596,1588757599,1588759051; __a=92101704.1554684563.1583218762.1588755259.75.5.33.33; Hm_lpvt_194df3105ad7148dcf2b98a91b5e727a=1588816418; __zp_stoken__=3ac4%2F0P0uAy6VOL%2B25qF9xRXyhApD5Y8y23Z22kWm%2BWfFbijkLuFTP1FbrKhGhfzsxPzJ%2F%2FU0PNRY7EudEghGAdUfnYKrc7yHrloIsimIbb5uPgrVOtCUE%2FrkVUHwXwpD1hn',
               'User-Agent':'Mozilla/5.0(Macintosh;Intel Mac OS X 10_11_4) AppleWebKit/537.36(KHTML,like Gecko) Chrome/52.0.2743.116 Safari/537.36'}
    
    r = requests.get(url,headers=headers)
    html = r.text
    #print html
    soup = BeautifulSoup(html,'lxml')
    main(h, soup)
    
def main(h, soup):
    #职位地点信息
    acc = []
    account_unitnames = soup.find_all(attrs={'class':'job-area'})
    for n, v in enumerate(account_unitnames): 
        acc_name = v.contents[0]
        a = ''.join(acc_name)
        acc.append(a)
        
    #获取职位信息
    pos = []
    positions = soup.find_all(attrs={'class':'name'})
    for n, v in enumerate(positions):
        try:
            p = v.a['title']
            pos.append(p)
        except:
            continue
        
    print len(acc), len(pos)
    for i in range(len(acc)):
        a = acc[i]
        p = pos[i]        
        h.write(a.encode('utf-8')+'~'+p.encode('utf-8')+'\n')
    time.sleep(5)            
    
if __name__ == '__main__':
    h = open('res.txt', 'w')
    for i in range(1,10):
        print i
        run(h, i)
    h.flush()

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值