## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.5     ✔ purrr   0.3.4
## ✔ tibble  3.1.5     ✔ dplyr   1.0.7
## ✔ tidyr   1.1.4     ✔ stringr 1.4.0
## ✔ readr   2.0.1     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

1 Data

Part of the iris dataset

X <- iris[1:5,1:4] %>% as.matrix
X
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1          5.1         3.5          1.4         0.2
## 2          4.9         3.0          1.4         0.2
## 3          4.7         3.2          1.3         0.2
## 4          4.6         3.1          1.5         0.2
## 5          5.0         3.6          1.4         0.2

2 Centering

\[ \mathbf{H} = \mathbf{I} - \frac{1}{n} \mathbf{1}\mathbf{1}^T , \] \[ \mathbf{X}_c = \mathbf{H}\mathbf{X} \] \[ \mathbf{H}\mathbf{X}_c = \mathbf{X}_c \]

H <- diag(nrow(X)) - matrix(1/nrow(X),nrow=nrow(X),ncol=nrow(X))
Xc <- H%*%X
colMeans(Xc)
##  Sepal.Length   Sepal.Width  Petal.Length   Petal.Width 
##  1.776357e-16 -8.881784e-17 -4.440892e-17  0.000000e+00
H%*%Xc-Xc
##       Sepal.Length  Sepal.Width Petal.Length Petal.Width
## [1,] -1.665335e-16 8.326673e-17 4.440892e-17           0
## [2,] -1.804112e-16 1.110223e-16 4.440892e-17           0
## [3,] -1.665335e-16 8.326673e-17 4.163336e-17           0
## [4,] -1.665335e-16 8.326673e-17 4.163336e-17           0
## [5,] -1.665335e-16 1.110223e-16 4.440892e-17           0

3 Gram matrix

Gram matrix: \(\mathbf{G}=\mathbf{X}\mathbf{X}^T\) Here we work on centered data so \(\mathbf{G}=\mathbf{X}_c\mathbf{X}_c^T\)

G <- Xc%*%t(Xc)

4 Squared Distance matrix

\[ \mathbf{D}_X = \mathbf{N} - 2\mathbf{X}\mathbf{X}^T + \mathbf{N}^T , \]

N <- matrix(diag(G),nrow(Xc),nrow(Xc))
N
##       [,1]  [,2]  [,3]  [,4]  [,5]
## [1,] 0.106 0.106 0.106 0.106 0.106
## [2,] 0.080 0.080 0.080 0.080 0.080
## [3,] 0.042 0.042 0.042 0.042 0.042
## [4,] 0.110 0.110 0.110 0.110 0.110
## [5,] 0.122 0.122 0.122 0.122 0.122
dist2 <- N-2*G+t(N)
dist2
##      [,1] [,2] [,3] [,4] [,5]
## [1,] 0.00 0.29 0.26 0.42 0.02
## [2,] 0.29 0.00 0.09 0.11 0.37
## [3,] 0.26 0.09 0.00 0.06 0.26
## [4,] 0.42 0.11 0.06 0.00 0.42
## [5,] 0.02 0.37 0.26 0.42 0.00
dist(Xc)^2
##      1    2    3    4
## 2 0.29               
## 3 0.26 0.09          
## 4 0.42 0.11 0.06     
## 5 0.02 0.37 0.26 0.42

6 Show that N cancels out when multiplying with H

\[ \mathbf{H N H} = \mathbf{0}\]

\[ \mathbf{H N}^T \mathbf{H} = \mathbf{0}\]

N%*%H
##               [,1]          [,2] [,3]         [,4] [,5]
## [1,]  0.000000e+00  0.000000e+00    0 0.000000e+00    0
## [2,] -6.938894e-18 -6.938894e-18    0 3.469447e-18    0
## [3,]  0.000000e+00  0.000000e+00    0 0.000000e+00    0
## [4,] -6.938894e-18 -6.938894e-18    0 6.938894e-18    0
## [5,] -6.938894e-18 -6.938894e-18    0 3.469447e-18    0
H%*%t(N)
##      [,1]          [,2] [,3]          [,4]          [,5]
## [1,]    0 -6.938894e-18    0 -6.938894e-18 -6.938894e-18
## [2,]    0 -6.938894e-18    0 -6.938894e-18 -6.938894e-18
## [3,]    0  0.000000e+00    0  0.000000e+00  0.000000e+00
## [4,]    0  3.469447e-18    0  6.938894e-18  3.469447e-18
## [5,]    0  0.000000e+00    0  0.000000e+00  0.000000e+00
LS0tCnRpdGxlOiAiTURTOiBMaW5rIFNxdWFyZWQgRGlzdGFuY2UgTWF0cml4IGFuZCBHcmFtIE1hdHJpeCBpbiBQcmFjdGljZSIKYXV0aG9yOiAiTGlldmVuIENsZW1lbnQiCmRhdGU6ICJzdGF0T21pY3MsIEdoZW50IFVuaXZlcnNpdHkgKGh0dHBzOi8vc3RhdG9taWNzLmdpdGh1Yi5pbykiCi0tLQoKYGBge3IgZWNobz1GQUxTRX0KbGlicmFyeSh0aWR5dmVyc2UpCmBgYAoKIyBEYXRhCgpQYXJ0IG9mIHRoZSBpcmlzIGRhdGFzZXQKCmBgYHtyfQpYIDwtIGlyaXNbMTo1LDE6NF0gJT4lIGFzLm1hdHJpeApYCmBgYAoKIyBDZW50ZXJpbmcKClxbCiAgXG1hdGhiZntIfSA9IFxtYXRoYmZ7SX0gLSBcZnJhY3sxfXtufSBcbWF0aGJmezF9XG1hdGhiZnsxfV5UICwKXF0KXFsKIFxtYXRoYmZ7WH1fYyA9IFxtYXRoYmZ7SH1cbWF0aGJme1h9ClxdClxbClxtYXRoYmZ7SH1cbWF0aGJme1h9X2MgPSBcbWF0aGJme1h9X2MKXF0KCmBgYHtyfQpIIDwtIGRpYWcobnJvdyhYKSkgLSBtYXRyaXgoMS9ucm93KFgpLG5yb3c9bnJvdyhYKSxuY29sPW5yb3coWCkpClhjIDwtIEglKiVYCmNvbE1lYW5zKFhjKQpIJSolWGMtWGMKYGBgCgojIEdyYW0gbWF0cml4CgoqKkdyYW0gbWF0cml4Kio6ICRcbWF0aGJme0d9PVxtYXRoYmZ7WH1cbWF0aGJme1h9XlQkCkhlcmUgd2Ugd29yayBvbiBjZW50ZXJlZCBkYXRhIHNvCiRcbWF0aGJme0d9PVxtYXRoYmZ7WH1fY1xtYXRoYmZ7WH1fY15UJAoKYGBge3J9CkcgPC0gWGMlKiV0KFhjKQpgYGAKCiMgU3F1YXJlZCBEaXN0YW5jZSBtYXRyaXgKClxbCiAgXG1hdGhiZntEfV9YID0gXG1hdGhiZntOfSAtIDJcbWF0aGJme1h9XG1hdGhiZntYfV5UICsgXG1hdGhiZntOfV5UICwKXF0KCmBgYHtyfQpOIDwtIG1hdHJpeChkaWFnKEcpLG5yb3coWGMpLG5yb3coWGMpKQpOCmRpc3QyIDwtIE4tMipHK3QoTikKZGlzdDIKZGlzdChYYyleMgpgYGAKCiMgTGluayBHcmFtIE1hdHJpeCBhbmQgU3F1YXJlZCBEaXN0YW5jZSBNYXRyaXgKClxbXG1hdGhiZntHfSA9XG1hdGhiZntYfV9jXG1hdGhiZntYfV9jXlQ9LVxmcmFjezF9ezJ9XG1hdGhiZntIfVxtYXRoYmZ7RH1fWFxtYXRoYmZ7SH1cXQoKYGBge3J9CkcKLTEvMipIJSolZGlzdDIlKiVICi0xLzIqSCUqJWRpc3QyJSolSCAtIEcKYGBgCgojIFNob3cgdGhhdCBOIGNhbmNlbHMgb3V0IHdoZW4gbXVsdGlwbHlpbmcgd2l0aCBICgpcWyBcbWF0aGJme0ggTiBIfSA9IFxtYXRoYmZ7MH1cXQoKXFsgXG1hdGhiZntIIE59XlQgXG1hdGhiZntIfSA9IFxtYXRoYmZ7MH1cXQoKYGBge3J9Ck4lKiVICkglKiV0KE4pCmBgYAo=