前言
验证码?我也能破解?
关于验证码的介绍就不多说了,各种各样的验证码在人们生活中时不时就会冒出来,身为学生日常接触最多的就是教务处系统的验证码了,比如如下的验证码:
识别办法
模拟登陆有着复杂的步骤,在这里咱们不管其他操作,只负责根据输入的一张验证码图片返回一个答案字符串。
我们知道验证码为了制作干扰,会把图片弄成五颜六色的样子,而我们首先就是要去除这些干扰,这一步就需要不断试验了,增强图片色彩,加大对比度等等都可以产生帮助。
在经过各种对图片的操作之后,终于找到了比较完美的去除干扰方案。可以看到在去除干扰之后,最优情况下,我们将得到一张十分纯净的黑白字符图片。一张图片上有四个字符,没办法一下子就把四个字符全部识别,需要把图片进行裁剪,裁剪成每张小图只有一个字符的样子,再对每张图片分别进行识别。
接下来就是识别文字了,我们首先把得到的小图转换成01表示的矩阵,每个矩阵代表一个字符。
比如数字六的矩阵
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
|
num_6=[ 0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,1,1,0,0,0,0,0,0, 0,0,0,0,1,1,1,0,0,0,0,0,0, 0,0,0,1,1,1,0,0,0,0,0,0,0, 0,0,0,1,1,0,0,0,0,0,0,0,0, 0,0,1,1,0,0,0,0,0,0,0,0,0, 0,0,1,1,0,0,0,0,0,0,0,0,0, 0,1,1,1,1,1,1,1,0,0,0,0,0, 0,1,1,1,1,1,1,1,1,0,0,0,0, 0,1,1,0,0,0,0,1,1,1,0,0,0, 0,1,1,0,0,0,0,0,1,1,0,0,0, 0,1,1,0,0,0,0,0,1,1,0,0,0, 0,1,1,1,0,0,0,1,1,1,0,0,0, 0,0,1,1,1,1,1,1,1,0,0,0,0, 0,0,0,1,1,1,1,1,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0, ] |
远远望过去,眯着眼睛还是能分辨出来的。
因为验证码十分规整,每个数字所在的位置都是固定的,所以并不需要涉及什么机器学习的算法,只是简单的进行一下矩阵的比对就可以了,在所有的实现做好的矩阵中找到相似度最高的矩阵就可以了,在这里的比对方法多种多样,反正数据简单能正确识别出来就好。
至此,咱们的验证码识别工作就结束了。
这次进行的验证码识别主要采用python的PIL进行图片操作,模拟登陆自动填写验证码的全部代码请看这里:
示例代码
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
|
# -*- coding: utf-8 -* import sys reload (sys) sys.setdefaultencoding( "utf-8" ) import re import requests import io import os import json from PIL import Image from PIL import ImageEnhance from bs4 import BeautifulSoup import mdata class Student: def __init__( self , user,password): self .user = str (user) self .password = str (password) self .s = requests.Session() def login( self ): url = "http://202.118.31.197/ACTIONLOGON.APPPROCESS?mode=4" res = self .s.get(url).text imageUrl = 'http://202.118.31.197/' + re.findall( '<img src="(.+?)" width="55"' ,res)[ 0 ] im = Image. open (io.BytesIO( self .s.get(imageUrl).content)) enhancer = ImageEnhance.Contrast(im) im = enhancer.enhance( 7 ) x,y = im.size for i in range (y): for j in range (x): if (im.getpixel((j,i))! = ( 0 , 0 , 0 )): im.putpixel((j,i),( 255 , 255 , 255 )) num = [ 6 , 19 , 32 , 45 ] verifyCode = "" for i in range ( 4 ): a = im.crop((num[i], 0 ,num[i] + 13 , 20 )) l = [] x,y = a.size for i in range (y): for j in range (x): if (a.getpixel((j,i)) = = ( 0 , 0 , 0 )): l.append( 1 ) else : l.append( 0 ) his = 0 chrr = ""; for i in mdata.data: r = 0 ; for j in range ( 260 ): if (l[j] = = mdata.data[i][j]): r + = 1 if (r>his): his = r chrr = i verifyCode + = chrr # print "辅助输入验证码完毕:",verifyCode data = { 'WebUserNO' : str ( self .user), 'Password' : str ( self .password), 'Agnomen' :verifyCode, } url = "http://202.118.31.197/ACTIONLOGON.APPPROCESS?mode=4" t = self .s.post(url,data = data).text if re.findall( "images/Logout2" ,t) = = []: l = '[0,"' + re.findall( 'alert((.+?));' ,t)[ 1 ][ 1 ][ 2 : - 2 ] + '"]' + " " + self .user + " " + self .password + "\n" # print l # return '[0,"'+re.findall('alert((.+?));',t)[1][1][2:-2]+'"]' return [ False ,l] else : l = '登录成功 ' + re.findall( '! (.+?) ' ,t)[ 0 ] + " " + self .user + " " + self .password + "\n" # print l return [ True ,l] def getInfo( self ): imageUrl = 'http://202.118.31.197/ACTIONDSPUSERPHOTO.APPPROCESS' data = self .s.get( 'http://202.118.31.197/ACTIONQUERYBASESTUDENTINFO.APPPROCESS?mode=3' ).text #学籍信息 data = BeautifulSoup(data, "lxml" ) q = data.find_all( "table" ,attrs = { 'align' : "left" }) a = [] for i in q[ 0 ]: if type (i) = = type (q[ 0 ]) : for j in i : if type (j) = = type (i): a.append(j.text) for i in q[ 1 ]: if type (i) = = type (q[ 1 ]) : for j in i : if type (j) = = type (i): a.append(j.text) data = {} for i in range ( 1 , len (a), 2 ): data[a[i - 1 ]] = a[i] # data['照片'] = io.BytesIO(self.s.get(imageUrl).content) return json.dumps(data) def getPic( self ): imageUrl = 'http://202.118.31.197/ACTIONDSPUSERPHOTO.APPPROCESS' pic = Image. open (io.BytesIO( self .s.get(imageUrl).content)) return pic def getScore( self ): score = self .s.get( 'http://202.118.31.197/ACTIONQUERYSTUDENTSCORE.APPPROCESS' ).text #成绩单 score = BeautifulSoup(score, "lxml" ) q = score.find_all(attrs = { 'height' : "36" })[ 0 ] point = q.text print point[point.find( '平均学分绩点' ):] table = score.html.body.table people = table.find_all(attrs = { 'height' : '36' })[ 0 ].string r = table.find_all( 'table' ,attrs = { 'align' : 'left' })[ 0 ].find_all( 'tr' ) subject = [] lesson = [] for i in r[ 0 ]: if type (r[ 0 ]) = = type (i): subject.append(i.string) for i in r: k = 0 temp = {} for j in i: if type (r[ 0 ]) = = type (j): temp[subject[k]] = j.string k + = 1 lesson.append(temp) lesson.pop() lesson.pop( 0 ) return json.dumps(lesson) def logoff( self ): return self .s.get( 'http://202.118.31.197/ACTIONLOGOUT.APPPROCESS' ).text if __name__ = = "__main__" : a = Student( 20150000 , 20150000 ) r = a.login() print r[ 1 ] if r[ 0 ]: r = json.loads(a.getScore()) for i in r: for j in i: print i[j], print q = json.loads(a.getInfo()) for i in q: print i,q[i] a.getPic().show() a.logoff() |
总结
以上就是这篇文章的全部内容了,希望本文的内容对大家的学习或者使用python能带来一定的帮助,如果有疑问大家可以留言交流,谢谢大家对服务器之家的支持。
原文链接:http://www.cnblogs.com/xfangs/p/6500611.html