#!/usr/bin/env python # -*- coding: utf-8 -*- # @Time : 2018/10/15 14:03 # @Author : zhangz # @File : day4_yanzhengma.py # @Software: PyCharm import requests from lxml import etree import chaojiying as cj #采集人人网 id=966723459 url='http://www.renren.com/'+str(id)+'/profile?portal=homeFootprint&ref=home_footprint' cookie={'t':'8a9a0c45f5434d8de1d4fc34e9260bfa3'} with requests.Session() as s: s.cookies.update(cookie) # #判断是否为验证码的页面 # 如果不是,正常的解析 # 如果是 # 拿到验证码 html = etree.HTML(s.get(url).text) title=str(html.xpath('//title/text()')) if '验证码' in title: url_code='http://icode.renren.com/getcode.do?t=ninki&rnd=1531726003146' #拿到验证码图片的二进制流 im=s.get(url_code).content #给打码平台进行破解,拿到验证码 code=cj.get_code(im) url_validate='http://www.renren.com/validateuser.d