Let's think about
\begin{align*}
p(y|\theta) &= \int p(y,f|\theta) df = \int p(y|f) p(f|\theta) df \\
& p(y|f) = \mathcal{N}(y|f, \sigma^2I) \\
& p(f|\theta) = \mathcal{N}(f|0, K_\theta) \\
\end{align*}
When
\begin{align*}
p(y, f|\theta)
&= \mathcal{N}\left(
\begin{pmatrix}
y \\
f
\end{pmatrix}
\bigg\vert
\begin{pmatrix}
\mu_y \\
\mu_f
\end{pmatrix}
,
\begin{pmatrix}
\Sigma_{yy} & \Sigma_{yf}\\
\Sigma_{fy} & \Sigma_{ff}
\end{pmatrix}
\right) \\
\end{align*}
p(y|\theta) = \mathcal{N}(y|\mu_y, \Sigma_{yy}) , so we have to obtain \mu_y, \Sigma_{yy}.
Let \Lambda = \Sigma^{-1},
\begin{align*}
\ln p(y,f|\theta)
&\propto - \frac{1}{2}
\left\{
\begin{pmatrix}
y - \mu_y \\
f - \mu_f
\end{pmatrix}^\top
\begin{pmatrix}
\Lambda_{yy} & \Lambda_{yf} \\
\Lambda_{fy} & \Lambda_{ff} \\
\end{pmatrix}
\begin{pmatrix}
y - \mu_y \\
f - \mu_f
\end{pmatrix} \\
\right\} \\
&\propto - \frac{1}{2}
\left\{
\underbrace{
\begin{pmatrix}
y \\
f
\end{pmatrix}^\top
\begin{pmatrix}
\Lambda_{yy} & \Lambda_{yf} \\
\Lambda_{fy} & \Lambda_{ff} \\
\end{pmatrix}
\begin{pmatrix}
y \\
f
\end{pmatrix}
}_{\text{second order term}} \\
-2
\underbrace{
\begin{pmatrix}
\mu_y \\
\mu_f
\end{pmatrix}^\top
\begin{pmatrix}
\Lambda_{yy} & \Lambda_{yf} \\
\Lambda_{fy} & \Lambda_{ff} \\
\end{pmatrix}
\begin{pmatrix}
y \\
f
\end{pmatrix}
}_{\text{linear term}} \tag{1}\\
\right\}
\end{align*}
On the other hand,
\begin{align*}
\ln p(y,f|\theta) &= \ln p(y|f) p(f|\theta) \\
&= \ln \mathcal{N}(y|f, \sigma^2I) \mathcal{N}(f|0, K_\theta) \\
&\propto - \frac{1}{2} \left\{
(y-f)^\top \frac{1}{\sigma^2} I (y-f)
+ f^\top K_\theta^{-1} f
\right\} \\
&= - \frac{1}{2} \left\{
\frac{1}{\sigma^2} y^\top y
- 2 y^\top \frac{1}{\sigma^2} f
+ f^\top (K_\theta^{-1} + \frac{1}{\sigma^2} I) f
\right\} \\
&= -\frac{1}{2} \begin{pmatrix}
y \\
f
\end{pmatrix}^\top
\underbrace{
\begin{pmatrix}
\frac{1}{\sigma^2} & -\frac{1}{\sigma^2} \\
-\frac{1}{\sigma^2} & (K_\theta^{-1} + \frac{1}{\sigma^2})
\end{pmatrix}
}_{\Lambda = \Sigma^{-1}}
\begin{pmatrix}
y \\
f
\end{pmatrix} \tag{2}
\end{align*}
According to comparing the second order term in (1) and (2)
\begin{align*}
\Sigma = \Lambda^{-1} &=
\begin{pmatrix}
\frac{1}{\sigma^2}I & -\frac{1}{\sigma^2}I \\
-\frac{1}{\sigma^2}I & (K_\theta^{-1} + \frac{1}{\sigma^2})
\end{pmatrix}^{-1} \\
&= \begin{pmatrix}
\Sigma_{yy} & \Sigma_{yf} \\
\Sigma_{fy} & \Sigma_{ff} \\
\end{pmatrix} \\
\end{align*}
Because the linear term in (2) does not exist, means are
\begin{align*}
\begin{pmatrix}
\mu_y \\
\mu_f
\end{pmatrix}
&=
\begin{pmatrix}
0 \\
0
\end{pmatrix}
\\
\end{align*}
Here we have to remember the following matrix identity.
\begin{align*}
\begin{pmatrix}
A & B \\
C & D
\end{pmatrix}^{-1}
&=
\begin{pmatrix}
M & -M B D^{^-1} \\
-D^{-1} C M & D^{-1} + D^{-1} C M B D^{-1}
\end{pmatrix} \\
M &= (A - B D^{-1} C)^{-1}
\end{align*}
or
\begin{align*}
\begin{pmatrix}
A & B \\
C & D
\end{pmatrix}^{-1}
&=
\begin{pmatrix}
A^{-1} + A^{-1} B M C A^{-1} & -A^{-1} B M \\
-M C A^{-1} & M
\end{pmatrix} \\
M &= (D - C A^{-1} B)^{-1}
\end{align*}
In this case the second identity is better because we can easily obtain A^{-1}.
\begin{align*}
M &= \left( (K_\theta^{-1} + \frac{1}{\sigma^2})
- (-\frac{1}{\sigma^2}I)
(\frac{1}{\sigma^2}I)^{-1}
(-\frac{1}{\sigma^2}I)
\right)^{-1}
= K_\theta
\end{align*}
\begin{align*}
\Sigma_{yy} &= (\frac{1}{\sigma^2}I)^{-1}
+ (\frac{1}{\sigma^2}I)^{-1}
(-\frac{1}{\sigma^2}I)
K_\theta
(-\frac{1}{\sigma^2}I)
(\frac{1}{\sigma^2}I)^{-1}
\\
&= K_\theta + \sigma^2 I
\end{align*}
\begin{align*}
\therefore p(y|\theta) = \mathcal{N}(y | 0, K_\theta + \sigma^2 I)
\end{align*}
Discussion