Python 批量下载验证码图片及切割验证码图片，识别验证码，并以识别的文字重命令验证码

最新推荐文章于 2023-12-01 20:15:00 发布

〆WangBenYan゜

最新推荐文章于 2023-12-01 20:15:00 发布

阅读量3.9k

点赞数 1

分类专栏： Python 资料

本文链接：https://blog.csdn.net/qq_18808965/article/details/72954833

版权

Python 资料专栏收录该内容

40 篇文章 2 订阅

订阅专栏

1、Python 批量下载验证码图片

2、批量切割验证码图片

3、识别验证码，并以识别的文字重命令验证码

#!C:/Python27
#coding=utf-8
import pytesseract
from pytesser import *
from PIL import Image,ImageEnhance,ImageFilter
import os
import fnmatch
import re,time


import urllib, random




def GetVerficode(): #下载验证码图片
    
    for i in range(1,101):  


        url = 'https://cas.sf-express.com/cas/imgcode?a=0.7860542547321294'  


        print "download", i  


        file("./code/%04d.jpg" % random.randrange(10000), "wb").write(urllib.urlopen(url).read()) 
        
def CutCrop():#分隔验证图片


    'global data 全局变量在整个函数中有效 '
    j = 1000  


    for f in os.listdir(".//code//"):
    


        if f.endswith(".jpg"):


            print f


            img = Image.open('.//code//'+f).convert('L')


            print img.size


            w, h = img.size


            #rowheight = h // rownum
            #colwidth = w // colnum
            #imgry.show()


            for i in range(4):                


                x = 10 + i*24  #验证码的x,y坐标


                y = 6  


                img.crop((x-4, y,x+6, y+14)).save("font/%d.bmp" % j)  


                print "j=",j  


                j += 1


#if __name__ == '__main__':


#排序读取的文件名,按顺序输出


def ReadFileName():


    global vcode
    l = fnmatch.filter(os.listdir('./font'), '*.bmp')


    for i in range(len(l)):  
        l[i] = l[i].split('.')  
        l[i][0] = int(l[i][0])


    l.sort()


    for i in range(len(l)):  
        l[i][0] = str(l[i][0])  
        l[i] = l[i][0] + '.' + l[i][1]
    
    #print '\n排序后:\n',l


    for line  in  l:
    
        #识别图片
        image = Image.open('./font/'+line)


        #二值化处理


        threshold = 90
        table = []
        for i in range(256):
            if i < threshold:
                table.append(0)
            else:
                table.append(1)
        out = image.point(table, '1')
    
        vcode = image_to_string(out)
        
        if re.match('^[0-9a-z]+$',vcode.strip()):#如果是数字或小写字母就打印
            
            print vcode.strip()
                      


    #out.show()
'''
#修改图片的名字为识别的名字+图片后缀


curDir = os.getcwd()
        
'''
def finddupl(lst):
    """找出 lst 中有重复的项
        (与重复次数无关，且与重复位置无关)
    """
    exists, dupl = set(), set()
    for item in lst:
        if item in exists:
            dupl.add(temp)
        else:
            exists.add(temp)
    return dupl


def rename():#修改图片的名字为识别的名字+图片后缀


        path=r'E:\pythonScript\Model\font'


        filelist=os.listdir(path)#该文件夹下所有的文件（包括文件夹）
        global Newname,Olddir,Name


        Oldname = []
        Newname = []
        Name = {}
        
        for files in filelist:#遍历所有文件


            Olddir=os.path.join(path,files).replace("\\","/");#原来的文件路径


            Oldname.append(Olddir)
                   
            if os.path.isdir(Olddir):#如果是文件夹则跳过


                        continue;


            filename=os.path.splitext(files)[0];#文件名


            filetype=os.path.splitext(files)[1];#文件扩展名


                #Newdir=os.path.join(path,filename[0]+filetype);#新的文件路径


                #print u'旧的路径：',Olddir


                #print u'新的路径：',Newdir


            a =filename+filetype


                #print u"a的文件名:",a
            time_begin = time.clock()#记录开始时间


            image = Image.open('./font/'+a)


                #二值化处理


            threshold = 105 #90
            table = []
            for i in range(256):
                if i < threshold:
                    table.append(0)
                else:
                    table.append(1)
            out = image.point(table, '1')
    
            vcode = image_to_string(out)


            vcode = vcode.strip()
                
            image.close()  #修改文件名称时，记得要关闭流，否则会报错
            
                  
            if re.match('^[0-9a-z]+$',vcode.strip()):#如果是数字或小写字母就打印


                Newdir=os.path.join(path,vcode.strip()+filetype).replace("\\","/");#新的文件路径
                #转换\为/   m = os.path.join('路径','文件名.txt') m.replace('\\','/')


                Newname.append(Newdir)
                        
                #print u'新的路径：',Newdir


                print u'识别出的验证码：',vcode


                Alert_FileName(vcode)
                
                #print "Use time: %s" % (time.clock() - time_begin)#输出花费时间


                #把新的地址放入一个集合中
                   
        #Re_name()
        print "最后:",Name
        #os.rename(Olddir,Newdir)#重命名
        for i in Name:
            a = Name.get(i)
            num = 0
            for b in a:
                os.rename(b,path+'\\'+i+unicode(num)+".bmp")  
                num = num +1
          
            
        
#重复名称重命名方法
def Re_name():#继续遍历 Newname 把相同的分为一个集合
    global l;
    l = Newname
    d = {}  #空字典
    for a in set(l):
        d[a] = l.count(a)        
    print u'统计重复个数：\n',d
'''
    识别到的文字,当做Key,相同Key值的放到一个List中,有多个Key 就有多少个List 
'''
def Alert_FileName(vcode):
    alist = [Olddir]
    
    #先判断Key 是否存在,存在返回False, 追加到集合中 dict.has_key(key)
    if Name.has_key(vcode):
        #print u"vcode的值为:",vcode,Name.has_key(vcode)
        Name.get(vcode).append(Olddir)
        #print u"Map 集合:",Name
    else:
        Name.setdefault(vcode,alist)


#GetVerficode() #下载验证码图片
 
#CutCrop()       #分隔验证码图片
    
#ReadFileName()#识别 图片
rename()#重复名称

〆WangBenYan゜

关注

1
点赞
踩
8

收藏

觉得还不错? 一键收藏
1
评论
Python 批量下载验证码图片及切割验证码图片，识别验证码，并以识别的文字重命令验证码

1、Python 批量下载验证码图片2、批量切割验证码图片3、识别验证码，并以识别的文字重命令验证码#!C:/Python27#coding=utf-8import pytesseractfrom pytesser import *from PIL import Image,ImageEnhance,ImageFilterimport osimport fnmatc
复制链接

扫一扫