我已经实现了 Baum-Welch 算法,并且正在使用一些玩具数据,这些数据是用已知分布生成的。数据呈正态分布,根据隐藏状态具有不同的均值和标准差。有2个状态。除了隐藏状态的初始分布之外,该算法似乎对大多数参数都收敛,根据随机数据,隐藏状态总是收敛到 (0; 1) 或 (1; 0)。
这个算法正常吗?如果是这样,我将不胜感激一些参考,如果不是一些提示如何找到错误。
代码 (F#)。首先是一个辅助模块:
module MyMath
let sqr (x:float) = x*x
let inline (./) (array:float[]) (d:float) =
Array.map (fun x -> x/d) array
let inline (.*) (array:float[]) (d:float) =
Array.map (fun x -> x*d) array
let map f s =
s |> Seq.map f |> Seq.toArray
let normalize v =
let sum = Seq.sum v
map (fun x -> x/sum) v
let row i array = seq { for j in 0 .. (Array2D.length2 array)-1 do yield array.[i,j]}
let column j array = seq { for i in 0 .. (Array2D.length1 array)-1 do yield array.[i,j]}
let sum (v:float[]) = v |> Array.sum
let sumTo N (f:int->float) = Seq.init N f |> Seq.sum
let sum_column j (array:float[,]) = column j array |> Seq.sum
let sum_row i (array:float[,]) = row i array |> Seq.sum
let mean data = (sum data)/(float (Array.length data))
let var data =
let m=mean data
let N=Array.length data
let sum=Seq.sumBy (fun x -> sqr(x)) data
sum/(float N)
let induction start T nextRow =
let result = Array.zeroCreate T
result.[0] <- start
for t=1 to T-1 do
result.[t] <- nextRow t result.[t-1]
result
let backInduction last T previousRow =
let result = Array.zeroCreate T
result.[T-1] <- last
for t=T-2 downto 0 do
result.[t] <- previousRow t result.[t+1]
result
let inductionNormalized start T nextRow =
let result = Array.zeroCreate T
let norm = Array.zeroCreate T
norm.[0] <- sum start
result.[0] <- start./norm.[0]
for t=1 to T-1 do
result.[t] <- nextRow t result.[t-1]
norm.[t] <- sum result.[t]
result.[t] <- result.[t]./norm.[t]
(result, norm)
主要模块:
module BaumWelch
open System
open MyMath
let mu (theta : float[,]) q = theta.[q,0]
let sigma (theta : float[,]) q = theta.[q,1]
let likelihood getDrift getVol dt parameters state observation =
let mu = getDrift parameters state
let sigma = Math.Abs (getVol parameters state:float)
let sqrt_dt = Math.Sqrt dt
let residueSquared =
let r = Likelihood.normalizedResidue mu sigma dt sqrt_dt observation in r*r
let result = (Math.Exp (-0.5*residueSquared))/(sigma * (Math.Sqrt (2.0*Math.PI*dt)))
if result<0.0 then failwith "Negative density, it certainly shouldn't have happened"
else result
let alphaBeta b (initialPi:float[]) initialA observations= //notation in comments from the Erratum for Rabiner
let T = Array.length observations
let N = Array2D.length1 initialA
let alphaStart = Array.init N (fun i -> initialPi.[i] * (b i observations.[0])) //this contains \bar{\alpha}
let alpha_j_t (previousRow:float[]) t j = (sumTo N (fun i -> previousRow.[i]*initialA.[i, j]))* (b j observations.[t]) //this contains \bar{\alpha}
let alphaInductionStep t previousRow = Array.init N (alpha_j_t previousRow t)
let (alpha, norm) = inductionNormalized alphaStart T alphaInductionStep
let betaStart = Array.init N (fun i -> 1.0/norm.[T-1])
let beta_j_t (nextRow:float[]) t j = (sumTo N (fun i -> initialA.[j, i]*nextRow.[i]*(b i observations.[t+1])))/norm.[t]
let betaInductionStep t nextRow = Array.init N (beta_j_t nextRow t)
let beta = backInduction betaStart T betaInductionStep
(alpha, beta, norm) //c_t = 1/norm_t
let log_P_O norm =
let result = norm |> Seq.sumBy (fun norm_t -> Math.Log norm_t)//c_t = 1/norm_t
if Double.IsNaN result then failwith "log likelihood is NaN"
else result
let gamma (alpha:float[][], beta:float[][], norm:float[]) i t =
alpha.[t].[i]*beta.[t].[i]*norm.[t]
let xi b (initialA:float[,]) (alpha:float[][]) (beta:float[][]) (observations:float[]) i j t =
alpha.[t].[i]*initialA.[i,j]*(b j observations.[t+1])*beta.[t+1].[j]
let oneStep llFunction dt (initialPi, initialA, initialTheta) observations =
let T = Array.length observations
let N = Array2D.length1 initialA
let b = llFunction dt initialTheta
let (alpha, beta, norm) = alphaBeta b initialPi initialA observations
let gamma = gamma (alpha, beta, norm)
let xi = xi b initialA alpha beta observations
let pi = Array.init N (fun i -> gamma i 0) //Rabiner (40a)
let A = //Rabiner (40b)
let A_func i j = (sumTo (T-1) (xi i j))/(sumTo (T-1) (gamma i))
Array2D.init N N A_func
let mean i = (sumTo T (fun t -> (gamma i t) * observations.[t]))/(sumTo T (gamma i))//Rabiner (53)
let var i =
let numerator = sumTo T (fun t -> (gamma i t) * (sqr (observations.[t]-(mean i))))
let denumerator = sumTo T (gamma i)
numerator/denumerator
let mu i = ((mean i) + 0.5*(var i))/dt
let sigma i = Math.Sqrt ((var i)/dt)
let theta = Array2D.init N 2 (fun i k -> if k=0 then mu i else sigma i)
let logLikelihood = log_P_O norm //Rabiner (103)
(logLikelihood, (pi, A, theta))
let print (ll, (pi, A, theta)) =
printfn "pi = %A" pi
printfn "A = %A" A
printfn "theta = %A" theta
printfn "logLikelihood = %f" ll
let baumWelch likelihood dt initialParams observations =
let tolerance = 10e-5
let rec doStep parameters previousLL =
//print (previousLL, parameters)
let (logLikelihood, parameters) = oneStep likelihood dt parameters observations
if Math.Abs(previousLL - logLikelihood) < tolerance then (logLikelihood, parameters)
else doStep parameters logLikelihood
doStep initialParams -10e100