😺

Gibbs samplers for Latent Dirichet Allocation

2024/04/24に公開

The generative process of Latent Dirichlet Allocation (LDA) is as follows:

  • for each document d = 1, 2, \dots, D
    • \theta_d \sim \mathrm{Dir}(\alpha) , \alpha = (\alpha_1, \dots, \alpha_K)
      • \theta_d means topic distribution of the document d.
  • for each topic k = 1, 2, \dots, K
    • \phi_k \sim \mathrm{Dir}(\beta) , \beta = (\beta_1, \dots, \beta_V)
      • Let \mathcal{V} = \set{1, 2, \dots, V} be index set of vocablaries.
      • \phi_k means word distribution of topic k.
  • for each word position i in the document d
    • z_{d,i} \sim \mathrm{Mult}(\theta_d) , z_{d,i} \in \set{1, 2, \dots, K}
    • w_{d,i} \sim \mathrm{Mult}(\phi_{z_{d,i}}) , w_{d, i} \in \mathcal{V}

The joint probability is

\begin{align*} p(w, z, \theta, \phi | \alpha, \beta) &= p(w|z,\phi) p(z|\theta) p(\theta|\alpha) p(\phi|\beta) \\ &= \left( \prod_{d=1}^D \prod_{i=1}^{n_d} p(w_{d,i} | \phi_{z_{d,i}}) p(z_{d,i} | \theta_{d}) \right) \left( \prod_{d=1}^D p(\theta_d | \alpha) \right) \left( \prod_{k=1}^K p(\phi_k | \beta) \right) \end{align*}

To obtain Gibbs samplers, we need posterior distributions of latent variables z, \theta, \phi.

\begin{align*} p(z_{d,i} &= k | w_{d,i} = v, w^{\backslash d,i}, z^{\backslash d,i}, \phi, \theta, \alpha, \beta) \\ &\propto p(z_{d,i} = k, w_{d,i} = v, w^{\backslash d,i}, z^{\backslash d,i}, \phi, \theta | \alpha, \beta) \\ &= p(w_{d,i} | z_{d,i}, \phi) p(z_{d,i}|\theta_d) p(w^{\backslash d,i} | z^{\backslash d,i}, \phi) p(z^{\backslash d,i} | \theta^{\backslash d}) p(\phi | \beta) p(\theta | \alpha) \\ &\propto p(w_{d,i} | z_{d,i}, \phi) p(z_{d,i}|\theta_d) \\ &= \phi_{k,v} \theta_{d,k} \end{align*}

\begin{align*} p(\theta_d &| w, z, \phi, \theta^{\backslash d}, \alpha, \beta) \\ &\propto p(\theta_d, w, z, \phi, \theta^{\backslash d} | \alpha, \beta) \\ &= p(w|z,\phi) p(z_d|\theta_d) p(z^{\backslash d} | \theta^{\backslash d}) p(\theta_d | \alpha) p(\theta^{\backslash d} | \alpha) p(\phi|\beta) \\ &\propto p(z_d|\theta_d) p(\theta_d | \alpha) \\ \end{align*}
\begin{align*} p(z_d|\theta_d) &= \prod_{i=1}^{n_d} p(z_{d,i} | \theta_d) = \prod_{k=1}^K \prod_{i=1}^{n_d} \theta_{d,k}^{\delta(z_{d,i}, k)} = \prod_{k=1}^{K} \theta_{d,k}^{\sum_{i=1}^{n_d} \delta(z_{d,i}, k)} = \prod_{k=1}^{K} \theta_{d,k}^{n_{d,k}} \end{align*}
\begin{align*} p(\theta_d | \alpha) = \mathrm{Dir}(\alpha) \propto \prod_{k=1}^K \theta_{d,k}^{\alpha_k - 1} \end{align*}
\begin{align*} \therefore p(\theta_d &| w, z, \phi, \theta^{\backslash d}, \alpha, \beta) \propto \prod_{k=1}^{K} \theta_{d,k}^{n_{d,k}} \theta_{d,k}^{\alpha_k - 1} = \prod_{k=1}^{K} \theta_{d,k}^{n_{d,k} + \alpha_k - 1} \end{align*}

\begin{align*} p(\phi_k &| w, z, \phi^{\backslash k}, \alpha, \beta) \\ &\propto p(\phi_k, w, z, \phi^{\backslash k} | \alpha, \beta) \\ &= p(w|z, \phi_k) p(\phi|\beta) \cancel{p(z|\theta) p(\theta|\alpha)} \\ &\propto \left( \prod_{d=1}^D \prod_{i=1}^{n_d} p(w_{d,i} | \phi, z_{d,i}) \right) \prod_{k=1}^K p(\phi_k | \beta) \\ \end{align*}
\begin{align*} \prod_{d=1}^D \prod_{i=1}^{n_d} p(w_{d,i} | \phi, z_{d,i}) &= \prod_{d=1}^D \prod_{i=1}^{n_d} \prod_{k=1}^K \prod_{v=1}^V \phi_{k,v}^{\delta(w_{d,i}=v, z_{d,i}=k)} \\ &= \prod_{k=1}^K \prod_{v=1}^V \phi_{k,v}^{\sum_{k=1}^K \sum_{v=1}^V \delta(w_{d,i}=v, z_{d,i}=k)} \\ &= \prod_{k=1}^K \prod_{v=1}^V \phi_{k,v}^{n_{k,v}} \\ p(\phi_k | \beta) &\propto \prod_{v=1}^V \phi_{k,v}^{\beta_v -1} \end{align*}
\begin{align*} \therefore p(\phi_k | w, z, \phi^{\backslash k}, \alpha, \beta) &= \prod_{k=1}^K \prod_{v=1}^V \phi_{k,v}^{n_{k,v}} \prod_{v=1}^V \phi_{k,v}^{\beta_v -1} \\ &= \prod_{v=1}^V \phi_{k,v}^{n_{k,v} + \beta_v -1} \\ \end{align*}

Discussion