Open2

ギブスサンプリングを勉強する

yuuyuu

正規分布の平均と分散をギブスサンプリングを使って推定する

gibbs_normal_dist <- function(mc_sample, y, mu0, t2_0, s2_0, nu0) {
  
  y_mean <- mean(y)
  y_var <- var(y)
  n <- length(y)
  
  PHI <- matrix(0, nrow = mc_sample, ncol = 2)
  PHI[1, ] <- phi <- c(y_mean, 1 / y_var)
  
  set.seed(1)
  for (s in 2:mc_sample) {
    
    # - sampling theta
    mu_n <- (mu0 / t2_0 + n * y_mean * phi[2]) / (1/t2_0 + n * phi[2])
    t2_n <- 1 / (1/t2_0 + n * phi[2])
    phi[1] <- rnorm(1, mu_n, sqrt(t2_n))
    
    # - sampling 1/s2
    nu_n <- nu0 + n
    s2_n <- (nu0 * s2_0 + (n - 1) * y_var + n * (y_mean - phi[1])^2 ) / nu_n
    phi[2] <- rgamma(1, nu_n / 2, nu_n * s2_n / 2)
    
    # - save
    PHI[s, ] <- phi
  }
  
  return(PHI)
}

res_gibbs <- gibbs_normal_dist(1000, y, mu0, t2_0, s2_0, nu0)
plot(res_gibbs[, 1], type = "l")

\frac{1}{2}
yuuyuu

混合正規分布

  • 関数は未完成
  • ステップ
    1. 初期値の設定
    2. どの分布かをサンプリング
    3. どの分布かが決まると、各分布のパラメーターをサンプリング
      (問題に応じてサンプリングの時に混合されてる分布すべてを計算するのもあれば、各サンプリングに個々の分布をサンプリングして計算する方法もある)
gibbs_gaus_mix <- function(n_sample) {
  
  sampling_theta <- function(n, y_mean, mu0, t2_0, s2_post) {
    
    t2_n <- 1 / (1 / t2_0 + n / s2_post)
    mu_n <- (mu0 / t2_0 + n * y_mean / s2_post) / (1 / t2_0 + n / s2_post)
    theta_post <- rnorm(1, mu_n, sqrt(t2_n))
    
    return(theta_post)
  }
  
  sampling_sigma2 <- function(n, nu0, s2_0, y_var, y_mean, theta_post) {
    
    nu_n <- nu0 + n
    s2_n <- (nu0 * s2_0 + (n - 1) * y_var + n * (y_mean - theta_post)^2) / nu_n
    s2_post <- 1 / rgamma(1, nu_n / 2, s2_n * nu_n / 2)
    
    return(s2_post)
  }
  
  PHI <- matrix(0, nrow = n_sample, ncol = 5)

  for (s in 2:n_sample) {
    
    # sampling for which distribution
    p1_prop <- p * dnorm(y, theta1_post, sqrt(s2_1_post))
    p2_prop <- (1 - p) * dnorm(y, theta2_post, sqrt(s2_2_post))
    bernoulli_p <- p1_prop / (p1_prop + p2_prop)
    x_post <- rbinom(n, 1, bernoulli_p)

    n1 <- sum(x_post)
    n2 <- n - n1
    y1 <- y[x_post == 1] # x = 1
    y2 <- y[x_post == 0] # X = 2
    # いろいろと計算...
    p_post <- rbeta(1, a + n1, b + n2)
    p <- p_post
    
    # theta1_post
    theta1_post <- sampling_theta(n1, y1_mean, mu0, t2_0, s2_1_post)
    
    # s2_1_post
    s2_1_post <- sampling_sigma2(n1, nu0, s2_0, y1_var, y1_mean, theta1_post)
    
    # theta2_post
    theta2_post <- sampling_theta(n2, y2_mean, mu0, t2_0, s2_2_post)
    
    # s2_2_post
    s2_2_post <- sampling_sigma2(n2, nu0, s2_0, y2_var, y2_mean, theta2_post)
    
    PHI[s, ] <- c(p_post, theta1_post, s2_1_post, theta2_post, s2_2_post)
    
  }
  
}