文章目录
博文配套视频课程:24小时实现从零到AI人工智能
PIL库基本介绍
PIL:Python Imaging Library,已经是Python平台事实上的图像处理标准库了。PIL功能非常强大,但API却非常简单易用
# 加载图片
data = Image.open("../data/getcode.do.jpg")
data.show()
# 把图片转化为矩阵类型
data = np.array(data)
print(data,data.shape)
完成登录验证码识别操作
# -*- coding: utf-8 -*-
import scrapy
from scrapy import cmdline
from urllib import request
from PIL import Image
class DoubanSpiderSpider(scrapy.Spider):
name = 'douban_spider'
allowed_domains = ['movie.douban.com']
# 启动url地址
start_urls = ['http://www.renren.com/PLogin.do']
def parse(self, response):
formdata = {
'email': '970138074@qq.com',
'password': 'pythonspider'
}
# 获取当前的验证码地址
img_url = response.xpath("//*[@id='verifyPic_login']/@src")
print(img_url)
if img_url:
# 根据图片地址获取图片信息
check_code = self.parse_image(img_url)
formdata['icode'] = check_code
yield scrapy.FormRequest(url='http://www.renren.com/PLogin.do', formdata=formdata, callback=self.after_login)
# next(parse)
def parse_image(self, image_url):
# from urllib import request
request.urlretrieve(image_url, 'check_code.png')
image = Image.open('check_code.png')
image.show()
check_code = input("请输入验证码")
return check_code
def after_login(self, response):
print('---->' , response.url)
if __name__ == "__main__":
# cmdline.execute("scrapy crawl -s LOG_FILE=all.log douban_spider".split(' '))
cmdline.execute("scrapy crawl douban_spider".split(' '))
# cmdline.execute("scrapy crawl -o douban.csv douban_spider".split(' '))