最近的答题赢钱很火爆,我也参与了几次,有些题目确实很难答,但是10秒钟的时间根本不够百度的,所以写了个辅助挂,这样可以出现题目时自动百度,这个时间也就花掉2秒钟,剩下的7、8秒钟可以进行分析和作答,提升了赢钱概率。
源码可以见我的github:点击链接
原理分析下:使用adb命令,抓取手机视频播放的界面,然后通过python的截取和ocr,获得到题目和答案, 然后百度得到结果。这个环境怎么搭建,有需要的童鞋可以联系我,因为使用本地的ocr所以解析不花钱,也没有使用的限制。
github上的代码中
ocr_bw.py,这个是自动根据题目去百度,然后打开浏览器,展示检索结果
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
# -*- coding: utf-8 -*- import pytesseract import time import webbrowser import subprocess from PIL import Image def main(): """ 主函数 """ op = yes_or_no( '请确保手机打开了 ADB 并连接了电脑,' if not op: print ( 'bye' ) return #核心递归 ocr_subject_parent() # for root, sub_dirs, files in os.walk('E:/临时接收的文件/知乎答题/百万/'): # for file in files: # print('发现图片:' + file) # img = Image.open('E:/临时接收的文件/知乎答题/百万/'+file) # ocr_subject(img) def yes_or_no(prompt, true_value = 'y' , false_value = 'n' , default = True ): """ 检查是否已经为启动程序做好了准备 """ default_value = true_value if default else false_value prompt = '{} {}/{} [{}]: ' . format (prompt, true_value, false_value, default_value) i = input (prompt) if not i: return default while True : if i = = true_value: return True elif i = = false_value: return False prompt = 'Please input {} or {}: ' . format (true_value, false_value) i = input (prompt) def screenImg(true_value = '', default = True ): prompt = '当出现题目时,请按下回车进行识别 ' i = input (prompt) if not i: return default while True : if i = = true_value: return True else : return False i = input (prompt) def ocr_subject(p): # 截取 距离上530开始 940结束 # 截取 距离上260 570结束 p = cut_img(p) pytesseract.pytesseract.tesseract_cmd = 'E:/Program Files (x86)/Tesseract-OCR/tesseract' subject = pytesseract.image_to_string(p, lang = 'chi_sim' ) subject = "".join(subject.split()) subject = subject.split( '.' )[ 1 ] print (subject) openPage(subject) ocr_subject_parent() def ocr_subject_parent(): result = screenImg() if result: start = time.time() # screenshot.check_screenshot() process = subprocess.Popen( 'adb shell screencap -p' , shell = True , stdout = subprocess.PIPE) binary_screenshot = process.stdout.read() binary_screenshot = binary_screenshot.replace(b '\r\n' , b '\n' ) f = open ( 'autojump.png' , 'wb' ) f.write(binary_screenshot) f.close() # screenshot.pull_screenshot() img = Image. open ( 'autojump.png' ) print ( "耗时:" + str (time.time() - start)) ocr_subject(img) def openPage(subject): url = 'https://www.baidu.com/s?wd={}' . format ( subject) webbrowser. open (url) webbrowser.get() def cut_img(img): region = img.crop(( 70 , 260 , 1025 , 570 )) #region.save("temp/cut_first.png") return region if __name__ = = '__main__' : main() |
ocr_bw2.py,这个是根据题目+答案,去百度检索,通过爬虫抓取百度的收录数,然后在控制台打印结果
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
|
__author__ = 'zjy' # -*- coding:utf-8 -*- import pytesseract import time import webbrowser import subprocess from PIL import Image import urllib import urllib.request import threading from urllib.parse import quote def main(): """ 主函数 """ op = yes_or_no( '请确保手机打开了 ADB 并连接了电脑,' '然后打开西瓜视频后再用本程序,确定开始?' ) if not op: print ( 'bye' ) return # 核心递归 ocr_subject_parent() # for root, sub_dirs, files in os.walk('E:/临时接收的文件/知乎答题/百万/'): # for file in files: # print('发现图片:' + file) # img = Image.open('E:/临时接收的文件/知乎答题/百万/'+file) # ocr_subject(img) def yes_or_no(prompt, true_value = 'y' , false_value = 'n' , default = True ): """ 检查是否已经为启动程序做好了准备 """ default_value = true_value if default else false_value prompt = '{} {}/{} [{}]: ' . format (prompt, true_value, false_value, default_value) i = input (prompt) if not i: return default while True : if i = = true_value: return True elif i = = false_value: return False prompt = 'Please input {} or {}: ' . format (true_value, false_value) i = input (prompt) def screenImg(true_value = '', default = True ): prompt = '当出现题目时,请按下回车进行识别 \n' i = input (prompt) if not i: return default while True : if i = = true_value: return True else : return False i = input (prompt) def ocr_subject(p): # 截取 距离上530开始 940结束 # 截取 距离上260 570结束 subImg = cut_img(p) pytesseract.pytesseract.tesseract_cmd = 'E:/Program Files (x86)/Tesseract-OCR/tesseract' subject = pytesseract.image_to_string(subImg, lang = 'chi_sim' ) subject = "".join(subject.split()) subject = subject.split( '.' )[ 1 ].replace( "\"" , "") print (subject) ocr_answer(p, subject) # openPage(subject) # print("结束:" + str(time.time())) ocr_subject_parent() def getSearchNum(key): key = quote(key) # print(key) url = 'http://www.baidu.com/s?wd={}' . format (key) # print(url) response = urllib.request.urlopen(url) page = response.read().decode( "utf-8" ) i = int (page.index( '百度为您找到相关结果约' )) start = i + 10 end = i + 25 page = page[start: end] return page def ocr_answer(p, subject): list = cut_question(p) pytesseract.pytesseract.tesseract_cmd = 'E:/Program Files (x86)/Tesseract-OCR/tesseract' for p in list : t = threading.Thread(target = ocr_answer_thread, args = (p, subject)) t.start() def ocr_answer_thread(p, subject): answer = pytesseract.image_to_string(p, lang = 'chi_sim' ) answer = "".join(answer.split()) v = getSearchNum(subject + ' ' + answer) print (answer + ' ' + v) # print(time.time()) def ocr_subject_parent(): result = screenImg() if result: start = time.time() # print("开始:" + str(start)) # screenshot.check_screenshot() process = subprocess.Popen( 'adb shell screencap -p' , shell = True , stdout = subprocess.PIPE) binary_screenshot = process.stdout.read() binary_screenshot = binary_screenshot.replace(b '\r\n' , b '\n' ) f = open ( 'autojump.png' , 'wb' ) f.write(binary_screenshot) f.close() # screenshot.pull_screenshot() img = Image. open ( 'autojump.png' ) ocr_subject(img) def openPage(subject): url = 'https://www.baidu.com/s?wd={}' . format ( subject) webbrowser. open (url) webbrowser.get() def cut_img(img): region = img.crop(( 70 , 260 , 1025 , 570 )) # region.save("temp/cut_first.png") return region def cut_question(img): list = [] question1 = img.crop(( 70 , 590 , 1025 , 768 )) question2 = img.crop(( 70 , 769 , 1025 , 947 )) question3 = img.crop(( 70 , 948 , 1025 , 1130 )) list .append(question1) list .append(question2) list .append(question3) # question1.save("temp/cut_1.png") # question2.save("temp/cut_2.png") # question3.save("temp/cut_3.png") return list if __name__ = = '__main__' : main() |
由于很多题目是下列哪个不是,所以我更喜欢用第一个方式,基本上识别时间在0.5-0.6秒之间。
最后里面的ocr_zh.py是可以用来抓取头脑王者的辅助。
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。
原文链接:http://blog.csdn.net/zjy105/article/details/79034520