脚本之家,脚本语言编程技术及教程分享平台!
分类导航

Python|VBS|Ruby|Lua|perl|VBA|Golang|PowerShell|Erlang|autoit|Dos|bat|

服务器之家 - 脚本之家 - Python - python实现隐马尔科夫模型HMM

python实现隐马尔科夫模型HMM

2021-01-24 01:06adzhua Python

这篇文章主要为大家详细介绍了python实现隐马尔科夫模型HMM,具有一定的参考价值,感兴趣的小伙伴们可以参考一下

一份完全按照李航<<统计学习方法>>介绍的HMM代码,供大家参考,具体内容如下

?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
#coding=utf8
'''''
Created on 2017-8-5
里面的代码许多地方可以精简,但为了百分百还原公式,就没有精简了。
@author: adzhua
'''
 
import numpy as np
 
class HMM(object):
  def __init__(self, A, B, pi):
    '''''
    A: 状态转移概率矩阵
    B: 输出观察概率矩阵
    pi: 初始化状态向量
    '''
    self.A = np.array(A)
    self.B = np.array(B)
    self.pi = np.array(pi)
    self.N = self.A.shape[0# 总共状态个数
    self.M = self.B.shape[1# 总共观察值个数  
    
   
  # 输出HMM的参数信息
  def printHMM(self):
    print ("==================================================")
    print ("HMM content: N =",self.N,",M =",self.M)
    for i in range(self.N):
      if i==0:
        print ("hmm.A ",self.A[i,:]," hmm.B ",self.B[i,:])
      else:
        print ("   ",self.A[i,:],"    ",self.B[i,:])
    print ("hmm.pi",self.pi)
    print ("==================================================")
           
   
  # 前向算法 
  def forwar(self, T, O, alpha, prob):
    '''''
    T: 观察序列的长度
    O: 观察序列
    alpha: 运算中用到的临时数组
    prob: 返回值所要求的概率
    '''  
     
    # 初始化
    for i in range(self.N):
      alpha[0, i] = self.pi[i] * self.B[i, O[0]]
 
    # 递归
    for t in range(T-1):
      for j in range(self.N):
        sum = 0.0
        for i in range(self.N):
          sum += alpha[t, i] * self.A[i, j]
        alpha[t+1, j] = sum * self.B[j, O[t+1]]    
     
    # 终止
    sum = 0.0
    for i in range(self.N):
      sum += alpha[T-1, i]
     
    prob[0] *= sum  
 
   
  # 带修正的前向算法
  def forwardWithScale(self, T, O, alpha, scale, prob):
    scale[0] = 0.0
     
    # 初始化
    for i in range(self.N):
      alpha[0, i] = self.pi[i] * self.B[i, O[0]]
      scale[0] += alpha[0, i]
       
    for i in range(self.N):
      alpha[0, i] /= scale[0]
     
    # 递归
    for t in range(T-1):
      scale[t+1] = 0.0
      for j in range(self.N):
        sum = 0.0
        for i in range(self.N):
          sum += alpha[t, i] * self.A[i, j]
         
        alpha[t+1, j] = sum * self.B[j, O[t+1]]
        scale[t+1] += alpha[t+1, j]
       
      for j in range(self.N):
        alpha[t+1, j] /= scale[t+1]
      
    # 终止
    for t in range(T):
      prob[0] += np.log(scale[t])    
       
       
  def back(self, T, O, beta, prob): 
    '''''
    T: 观察序列的长度  len(O)
    O: 观察序列
    beta: 计算时用到的临时数组
    prob: 返回值;所要求的概率
    '''
     
    # 初始化        
    for i in range(self.N):
      beta[T-1, i] = 1.0
     
    # 递归
    for t in range(T-2, -1, -1): # 从T-2开始递减;即T-2, T-3, T-4, ..., 0
      for i in range(self.N):
        sum = 0.0
        for j in range(self.N):
          sum += self.A[i, j] * self.B[j, O[t+1]] * beta[t+1, j]
         
        beta[t, i] = sum
     
    # 终止
    sum = 0.0
    for i in range(self.N):
      sum += self.pi[i]*self.B[i,O[0]]*beta[0,i]
     
    prob[0] = sum  
     
     
  # 带修正的后向算法
  def backwardWithScale(self, T, O, beta, scale):
    '''''
    T: 观察序列的长度 len(O)
    O: 观察序列
    beta: 计算时用到的临时数组
    '''
    # 初始化
    for i in range(self.N):
      beta[T-1, i] = 1.0
     
    # 递归        
    for t in range(T-2, -1, -1):
      for i in range(self.N):
        sum = 0.0
        for j in range(self.N):
          sum += self.A[i, j] * self.B[j, O[t+1]] * beta[t+1, j]
         
        beta[t, i] = sum / scale[t+1]    
         
   
  # viterbi算法      
  def viterbi(self, O):
    '''''
    O: 观察序列
    '''
    T = len(O)
    # 初始化
    delta = np.zeros((T, self.N), np.float)
    phi = np.zeros((T, self.N), np.float)
    I = np.zeros(T)
     
    for i in range(self.N):
      delta[0, i] = self.pi[i] * self.B[i, O[0]]
      phi[0, i] = 0.0
     
    # 递归
    for t in range(1, T):
      for i in range(self.N):
        delta[t, i] = self.B[i, O[t]] * np.array([delta[t-1, j] * self.A[j, i] for j in range(self.N)] ).max()
        phi = np.array([delta[t-1, j] * self.A[j, i] for j in range(self.N)]).argmax()
       
    # 终止
    prob = delta[T-1, :].max()
    I[T-1] = delta[T-1, :].argmax()
     
    for t in range(T-2, -1, -1):
      I[t] = phi[I[t+1]]
       
     
    return prob, I
   
   
  # 计算gamma(计算A所需的分母;详情见李航的统计学习) : 时刻t时马尔可夫链处于状态Si的概率
  def computeGamma(self, T, alpha, beta, gamma):
    ''''''''
    for t in range(T):
      for i in range(self.N):
        sum = 0.0
        for j in range(self.N):
          sum += alpha[t, j] * beta[t, j]
         
        gamma[t, i] = (alpha[t, i] * beta[t, i]) / sum  
   
  # 计算sai(i,j)(计算A所需的分子) 为给定训练序列O和模型lambda时
  def computeXi(self, T, O, alpha, beta, Xi):
     
    for t in range(T-1):
      sum = 0.0
      for i in range(self.N):
        for j in range(self.N):
          Xi[t, i, j] = alpha[t, i] * self.A[i, j] * self.B[j, O[t+1]] * beta[t+1, j]
          sum += Xi[t, i, j]
       
      for i in range(self.N):
        for j in range(self.N):
          Xi[t, i, j] /= sum
   
   
  # 输入 L个观察序列O,初始模型:HMM={A,B,pi,N,M}
  def BaumWelch(self, L, T, O, alpha, beta, gamma):                  
    DELTA = 0.01 ; round = 0 ; flag = 1 ; probf = [0.0]
    delta = 0.0; probprev = 0.0 ; ratio = 0.0 ; deltaprev = 10e-70
     
    xi = np.zeros((T, self.N, self.N)) # 计算A的分子
    pi = np.zeros((T), np.float# 状态初始化概率
     
    denominatorA = np.zeros((self.N), np.float) # 辅助计算A的分母的变量
    denominatorB = np.zeros((self.N), np.float)
    numeratorA = np.zeros((self.N, self.N), np.float# 辅助计算A的分子的变量
    numeratorB = np.zeros((self.N, self.M), np.float# 针对输出观察概率矩阵
    scale = np.zeros((T), np.float)
     
    while True:
      probf[0] =0
       
      # E_step
      for l in range(L):
        self.forwardWithScale(T, O[l], alpha, scale, probf)
        self.backwardWithScale(T, O[l], beta, scale)
        self.computeGamma(T, alpha, beta, gamma)  # (t, i)
        self.computeXi(T, O[l], alpha, beta, xi)  #(t, i, j)
         
        for i in range(self.N):
          pi[i] += gamma[0, i]
          for t in range(T-1):
            denominatorA[i] += gamma[t, i]
            denominatorB[i] += gamma[t, i]
          denominatorB[i] += gamma[T-1, i]
         
          for j in range(self.N):
            for t in range(T-1):
              numeratorA[i, j] += xi[t, i, j]
             
          for k in range(self.M): # M为观察状态取值个数
            for t in range(T):
              if O[l][t] == k:
                numeratorB[i, k] += gamma[t, i]  
                 
       
      # M_step。 计算pi, A, B
      for i in range(self.N): # 这个for循环也可以放到for l in range(L)里面
        self.pi[i] = 0.001 / self.N + 0.999 * pi[i] / L
         
        for j in range(self.N):
          self.A[i, j] = 0.001 / self.N + 0.999 * numeratorA[i, j] / denominatorA[i]          
          numeratorA[i, j] = 0.0
         
        for k in range(self.M):
          self.B[i, k] = 0.001 / self.N + 0.999 * numeratorB[i, k] / denominatorB[i]
          numeratorB[i, k] = 0.0  
         
        #重置
        pi[i] = denominatorA[i] = denominatorB[i] = 0.0
         
      if flag == 1:
        flag = 0
        probprev = probf[0]
        ratio = 1
        continue
       
      delta = probf[0] - probprev 
      ratio = delta / deltaprev  
      probprev = probf[0]
      deltaprev = delta
      round += 1
       
      if ratio <= DELTA :
        print('num iteration: ', round)  
        break
     
 
if __name__ == '__main__':
  print ("python my HMM")
   
  # 初始的状态概率矩阵pi;状态转移矩阵A;输出观察概率矩阵B; 观察序列
  pi = [0.5,0.5]
  A = [[0.8125,0.1875],[0.2,0.8]]
  B = [[0.875,0.125],[0.25,0.75]]
  O = [
     [1,0,0,1,1,0,0,0,0],
     [1,1,0,1,0,0,1,1,0],
     [0,0,1,1,0,0,1,1,1]
    ]
  L = len(O)
  T = len(O[0])  # T等于最长序列的长度就好了
   
  hmm = HMM(A, B, pi)
  alpha = np.zeros((T,hmm.N),np.float)
  beta = np.zeros((T,hmm.N),np.float)
  gamma = np.zeros((T,hmm.N),np.float)
   
  # 训练
  hmm.BaumWelch(L,T,O,alpha,beta,gamma)
   
  # 输出HMM参数信息
  hmm.printHMM()

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。

原文链接:https://blog.csdn.net/adzhua/article/details/76746895

延伸 · 阅读

精彩推荐