本文为大家分享了python实现大音频文件语音识别功能的具体代码,供大家参考,具体内容如下
实现思路:先用ffmpeg将其他非wav格式的音频转换为wav格式,并转换音频的声道(百度支持声道为1),采样率(值为8000),格式转换完成后,再用ffmpeg将音频切成百度。
支持的时长(30秒和60秒2种,本程序用的是30秒)。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
|
# coding: utf-8 import json import time import base64 from inc import rtysdb import urllib2 import requests import os import uuid from inc import db_config class BaiduRest: def __init__( self , cu_id, api_key, api_secert): self .token_url = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client_id=%s&client_secret=%s" self .getvoice_url = "http://tsn.baidu.com/text2audio?tex=%s&lan=zh&cuid=%s&ctp=1&tok=%s" self .upvoice_url = 'http://vop.baidu.com/server_api' self .cu_id = cu_id self .get_token(api_key, api_secert) return def get_token( self , api_key, api_secert): token_url = self .token_url % (api_key, api_secert) r_str = urllib2.urlopen(token_url).read() token_data = json.loads(r_str) self .token_str = token_data[ 'access_token' ] return True # 语音合成 def text2audio( self , text, filename): get_url = self .getvoice_url % (urllib2.quote(text), self .cu_id, self .token_str) voice_data = urllib2.urlopen(get_url).read() voice_fp = open (filename, 'wb+' ) voice_fp.write(voice_data) voice_fp.close() return True ##语音识别 def audio2text( self , filename): data = {} data[ 'format' ] = 'wav' data[ 'rate' ] = 8000 data[ 'channel' ] = 1 data[ 'cuid' ] = self .cu_id data[ 'token' ] = self .token_str wav_fp = open (filename, 'rb' ) voice_data = wav_fp.read() data[ 'len' ] = len (voice_data) # data['speech'] = base64.b64encode(voice_data).decode('utf-8') data[ 'speech' ] = base64.b64encode(voice_data).replace( '\n' , '') # post_data = json.dumps(data) result = requests.post( self .upvoice_url, json = data, headers = { 'Content-Type' : 'application/json' }) data_result = result.json() if (data_result[ 'err_msg' ] = = 'success.' ): return data_result[ 'result' ][ 0 ] else : return False def test_voice(voice_file): api_key = "vossGHIgEETS6IMRxBDeahv8" api_secert = "3c1fe6a6312f41fa21fa2c394dad5510" bdr = BaiduRest( "0-57-7B-9F-1F-A1" , api_key, api_secert) # 生成 #start = time.time() #bdr.text2audio("你好啊", "out.wav") #using = time.time() - start #print using # 识别 #start = time.time() result = bdr.audio2text(voice_file) # result = bdr.audio2text("weather.pcm") #using = time.time() - start return result def get_master_audio(check_status = 'cut_status' ): if check_status = = 'cut_status' : sql = "SELECT id,url, time_long,sharps FROM ocenter_recognition WHERE status=0" elif check_status = = 'finished_status' : sql = "SELECT id,url, time_long,sharps FROM ocenter_recognition WHERE finished_status=0" else : return False data = rtysdb.select_data(sql, 'more' ) if data: return data else : return False def go_recognize(master_id): section_path = db_config.SYS_PATH sql = "SELECT id,rid,url,status FROM ocenter_section WHERE rid=%d AND status=0 order by id asc limit 10" % (master_id) #print sql record = rtysdb.select_data(sql, 'more' ) #print record if not record: return False for rec in record: #print section_path+'/'+rec[1] voice_file = section_path + '/' + rec[ 2 ] if not os.path.exists(voice_file): continue result = test_voice(voice_file) print result exit( 0 ) if result: #rtysdb.update_by_pk('ocenter_section',rec[0],{'content':result,'status':1}) sql = "update ocenter_section set content='%s', status='%d' where id=%d" % (result, 1 ,rec[ 0 ]) #print sql rtysdb.do_exec_sql(sql) parent_content = rtysdb.select_data( "SELECT id,content FROM ocenter_recognition WHERE id=%d" % (rec[ 1 ])) #print parent_content if parent_content: new_content = parent_content[ 1 ] + result update_content_sql = "update ocenter_recognition set content='%s' where id=%d" % (new_content,rec[ 1 ]) rtysdb.do_exec_sql(update_content_sql) else : rtysdb.do_exec_sql( "update ocenter_section set status='%d' where id=%d" % (result, 1 ,rec[ 0 ])) time.sleep( 5 ) else : rtysdb.do_exec_sql( "UPDATE ocenter_recognition SET finished_status=1 WHERE id=%d" % (master_id)) #对百度语音识别不了的音频文件进行转换 def ffmpeg_convert(): section_path = db_config.SYS_PATH #print section_path used_audio = get_master_audio( 'cut_status' ) #print used_audio if used_audio: for audio in used_audio: audio_path = section_path + '/' + audio[ 1 ] new_audio = uuid.uuid1() command_line = "ffmpeg -i " + audio_path + " -ar 8000 -ac 1 -f wav " + section_path + "/Uploads/Convert/convert_" + str (new_audio) + ".wav" ; #print command_line os.popen(command_line) if os.path.exists(section_path + "/Uploads/Convert/convert_" + str (new_audio) + ".wav" ): convert_name = "Uploads/Convert/convert_" + str (new_audio) + ".wav" ffmpeg_cut(convert_name,audio[ 3 ],audio[ 0 ]) sql = "UPDATE ocenter_recognition SET status=1,convert_name='%s' where id=%d" % (convert_name,audio[ 0 ]) rtysdb.do_exec_sql(sql) #将大音频文件切成碎片 def ffmpeg_cut(convert_name,sharps,master_id): section_path = db_config.SYS_PATH if sharps> 0 : for i in range ( 0 ,sharps): timeArray = time.localtime(i * 30 ) h = time.strftime( "%H" , timeArray) h = int (h) - 8 h = "0" + str (h) ms = time.strftime( "%M:%S" ,timeArray) start_time = h + ':' + str (ms) cut_name = section_path + '/' + convert_name db_store_name = "Uploads/Section/" + str (uuid.uuid1()) + '-' + str (i + 1 ) + ".wav" section_name = section_path + "/" + db_store_name command_line = "ffmpeg.exe -i " + cut_name + " -vn -acodec copy -ss " + start_time + " -t 00:00:30 " + section_name #print command_line os.popen(command_line) data = {} data[ 'rid' ] = master_id data[ 'url' ] = db_store_name data[ 'create_time' ] = int (time.time()) data[ 'status' ] = 0 rtysdb.insert_one( 'ocenter_section' ,data) if __name__ = = "__main__" : ffmpeg_convert() audio = get_master_audio( 'finished_status' ) if audio: for ad in audio: go_recognize(ad[ 0 ]) |
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。
原文链接:https://blog.csdn.net/septwolves2015/article/details/78554524