# (C) May 2009, Mathieu Blondel require 'java' require '/home/mathieu/Desktop/sources/jahmm-0.6.1.jar' include_class 'be.ac.ulg.montefiore.run.jahmm.ObservationInteger' include_class 'be.ac.ulg.montefiore.run.jahmm.OpdfInteger' include_class 'be.ac.ulg.montefiore.run.jahmm.Hmm' include_class 'be.ac.ulg.montefiore.run.jahmm.toolbox.MarkovGenerator' include_class 'be.ac.ulg.montefiore.run.jahmm.learn.BaumWelchLearner' import java.util.ArrayList STATES = ["rainy", "sunny"] OBSERVATIONS = ["walk", "shop", "clean"] class Array # Converts a Ruby array of floats to a Java double array. # Supports one-dimension and two-dimension arrays. def to_double if JRUBY_VERSION =~ /^1.0/ # This works for both one and two-dimensonal arrays. self.to_java(:double) else if self.length > 0 and self[0].is_a? Array self.to_java(Java::double[]) else self.to_java(:double) end end end end def state_indices_to_names(indices) indices.to_a.map { |i| STATES[i] } end def observations_to_names(obs) obs.to_a.map { |o| OBSERVATIONS[o.value] } end def names_to_observations(names) arr = names.map { |n| ObservationInteger.new(OBSERVATIONS.index(n)) } ArrayList.new(arr) end rainyopdf = OpdfInteger.new([0.1, 0.4, 0.5].to_double) sunnyopdf = OpdfInteger.new([0.6, 0.3, 0.1].to_double) pi = [0.6, 0.4].to_double a = [[0.7, 0.3], [0.4, 0.6]].to_double b = ArrayList.new([rainyopdf, sunnyopdf]) hmm = Hmm.new(pi, a, b) # We generate fake data with the Markov generator. # In the real world, we would use real training data. # Don't forget that HMM are *generative* models ;) obs_set = ArrayList.new generator = MarkovGenerator.new(hmm) 100.times do # we generate 100 sequences of 20 + k observations where k is variable # to show that sequences can be of variable length obs_set << generator.observation_sequence(20 + rand(10)) end # We train our HMM with our fake data puts "HMM, before training" puts hmm.to_s + "\n" learner = BaumWelchLearner.new hmm = learner.learn(hmm, obs_set) # Note that it shouldn't change that much since we trained the HMM # with observations generated from itself puts "HMM, after training" puts hmm.to_s + "\n" # Now let's find the log likelihood of a sample sequence obs = ["walk", "walk", "shop", "clean", "clean"] puts "Log likelihood for #{obs.join(", ")}" puts hmm.ln_probability(names_to_observations(obs)) puts "Viterbi sequence" seq = hmm.most_likely_state_sequence(names_to_observations(obs)) puts state_indices_to_names(seq).join(", ")