1 Data

Part of the iris dataset

X <- iris[1:5,1:4] %>% as.matrix
X
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1          5.1         3.5          1.4         0.2
## 2          4.9         3.0          1.4         0.2
## 3          4.7         3.2          1.3         0.2
## 4          4.6         3.1          1.5         0.2
## 5          5.0         3.6          1.4         0.2

2 Centering

\[ \mathbf{H} = \mathbf{I} - \frac{1}{n} \mathbf{1}\mathbf{1}^T , \] \[ \mathbf{X}_c = \mathbf{H}\mathbf{X} \] \[ \mathbf{H}\mathbf{X}_c = \mathbf{X}_c \]

H <- diag(nrow(X)) - matrix(1/nrow(X),nrow=nrow(X),ncol=nrow(X))
Xc <- H%*%X
colMeans(Xc)
##  Sepal.Length   Sepal.Width  Petal.Length   Petal.Width 
##  1.776357e-16 -8.881784e-17 -4.440892e-17  0.000000e+00
H%*%Xc-Xc
##       Sepal.Length  Sepal.Width Petal.Length Petal.Width
## [1,] -1.665335e-16 8.326673e-17 4.440892e-17           0
## [2,] -1.804112e-16 1.110223e-16 4.440892e-17           0
## [3,] -1.665335e-16 8.326673e-17 4.163336e-17           0
## [4,] -1.665335e-16 8.326673e-17 4.163336e-17           0
## [5,] -1.665335e-16 1.110223e-16 4.440892e-17           0

3 Gram matrix

Gram matrix: \(\mathbf{G}=\mathbf{X}\mathbf{X}^T\) Here we work on centered data so \(\mathbf{G}=\mathbf{X}_c\mathbf{X}_c^T\)

G <- Xc%*%t(Xc)

4 Squared Distance matrix

\[ \mathbf{D}_X = \mathbf{N} - 2\mathbf{X}\mathbf{X}^T + \mathbf{N}^T , \]

N <- matrix(diag(G),nrow(Xc),nrow(Xc))
N
##       [,1]  [,2]  [,3]  [,4]  [,5]
## [1,] 0.106 0.106 0.106 0.106 0.106
## [2,] 0.080 0.080 0.080 0.080 0.080
## [3,] 0.042 0.042 0.042 0.042 0.042
## [4,] 0.110 0.110 0.110 0.110 0.110
## [5,] 0.122 0.122 0.122 0.122 0.122
dist2 <- N-2*G+t(N)
dist2
##      [,1] [,2] [,3] [,4] [,5]
## [1,] 0.00 0.29 0.26 0.42 0.02
## [2,] 0.29 0.00 0.09 0.11 0.37
## [3,] 0.26 0.09 0.00 0.06 0.26
## [4,] 0.42 0.11 0.06 0.00 0.42
## [5,] 0.02 0.37 0.26 0.42 0.00
dist(Xc)^2
##      1    2    3    4
## 2 0.29               
## 3 0.26 0.09          
## 4 0.42 0.11 0.06     
## 5 0.02 0.37 0.26 0.42

6 Show that N cancels out when multiplying with H

\[ \mathbf{H N H} = \mathbf{0}\]

\[ \mathbf{H N}^T \mathbf{H} = \mathbf{0}\]

N%*%H
##               [,1]          [,2] [,3]         [,4] [,5]
## [1,]  0.000000e+00  0.000000e+00    0 0.000000e+00    0
## [2,] -6.938894e-18 -6.938894e-18    0 3.469447e-18    0
## [3,]  0.000000e+00  0.000000e+00    0 0.000000e+00    0
## [4,] -6.938894e-18 -6.938894e-18    0 6.938894e-18    0
## [5,] -6.938894e-18 -6.938894e-18    0 3.469447e-18    0
H%*%t(N)
##      [,1]          [,2] [,3]          [,4]          [,5]
## [1,]    0 -6.938894e-18    0 -6.938894e-18 -6.938894e-18
## [2,]    0 -6.938894e-18    0 -6.938894e-18 -6.938894e-18
## [3,]    0  0.000000e+00    0  0.000000e+00  0.000000e+00
## [4,]    0  3.469447e-18    0  6.938894e-18  3.469447e-18
## [5,]    0  0.000000e+00    0  0.000000e+00  0.000000e+00
LS0tCnRpdGxlOiAiTURTOiBMaW5rIFNxdWFyZWQgRGlzdGFuY2UgTWF0cml4IGFuZCBHcmFtIE1hdHJpeCBpbiBQcmFjdGljZSIKYXV0aG9yOiAiTGlldmVuIENsZW1lbnQiCmRhdGU6ICJzdGF0T21pY3MsIEdoZW50IFVuaXZlcnNpdHkgKGh0dHBzOi8vc3RhdG9taWNzLmdpdGh1Yi5pbykiCm91dHB1dDoKICAgIGh0bWxfZG9jdW1lbnQ6CiAgICAgIGNvZGVfZG93bmxvYWQ6IHRydWUKICAgICAgdGhlbWU6IGNvc21vCiAgICAgIHRvYzogdHJ1ZQogICAgICB0b2NfZmxvYXQ6IHRydWUKICAgICAgaGlnaGxpZ2h0OiB0YW5nbwogICAgICBudW1iZXJfc2VjdGlvbnM6IHRydWUKLS0tCgpgYGB7ciBlY2hvPUZBTFNFfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKYGBgCgojIERhdGEKClBhcnQgb2YgdGhlIGlyaXMgZGF0YXNldAoKYGBge3J9ClggPC0gaXJpc1sxOjUsMTo0XSAlPiUgYXMubWF0cml4ClgKYGBgCgojIENlbnRlcmluZwoKXFsKICBcbWF0aGJme0h9ID0gXG1hdGhiZntJfSAtIFxmcmFjezF9e259IFxtYXRoYmZ7MX1cbWF0aGJmezF9XlQgLApcXQpcWwogXG1hdGhiZntYfV9jID0gXG1hdGhiZntIfVxtYXRoYmZ7WH0KXF0KXFsKXG1hdGhiZntIfVxtYXRoYmZ7WH1fYyA9IFxtYXRoYmZ7WH1fYwpcXQoKYGBge3J9CkggPC0gZGlhZyhucm93KFgpKSAtIG1hdHJpeCgxL25yb3coWCksbnJvdz1ucm93KFgpLG5jb2w9bnJvdyhYKSkKWGMgPC0gSCUqJVgKY29sTWVhbnMoWGMpCkglKiVYYy1YYwpgYGAKCiMgR3JhbSBtYXRyaXgKCioqR3JhbSBtYXRyaXgqKjogJFxtYXRoYmZ7R309XG1hdGhiZntYfVxtYXRoYmZ7WH1eVCQKSGVyZSB3ZSB3b3JrIG9uIGNlbnRlcmVkIGRhdGEgc28KJFxtYXRoYmZ7R309XG1hdGhiZntYfV9jXG1hdGhiZntYfV9jXlQkCgpgYGB7cn0KRyA8LSBYYyUqJXQoWGMpCmBgYAoKIyBTcXVhcmVkIERpc3RhbmNlIG1hdHJpeAoKXFsKICBcbWF0aGJme0R9X1ggPSBcbWF0aGJme059IC0gMlxtYXRoYmZ7WH1cbWF0aGJme1h9XlQgKyBcbWF0aGJme059XlQgLApcXQoKYGBge3J9Ck4gPC0gbWF0cml4KGRpYWcoRyksbnJvdyhYYyksbnJvdyhYYykpCk4KZGlzdDIgPC0gTi0yKkcrdChOKQpkaXN0MgpkaXN0KFhjKV4yCmBgYAoKIyBMaW5rIEdyYW0gTWF0cml4IGFuZCBTcXVhcmVkIERpc3RhbmNlIE1hdHJpeAoKXFtcbWF0aGJme0d9ID1cbWF0aGJme1h9X2NcbWF0aGJme1h9X2NeVD0tXGZyYWN7MX17Mn1cbWF0aGJme0h9XG1hdGhiZntEfV9YXG1hdGhiZntIfVxdCgpgYGB7cn0KRwotMS8yKkglKiVkaXN0MiUqJUgKLTEvMipIJSolZGlzdDIlKiVIIC0gRwpgYGAKCiMgU2hvdyB0aGF0IE4gY2FuY2VscyBvdXQgd2hlbiBtdWx0aXBseWluZyB3aXRoIEgKClxbIFxtYXRoYmZ7SCBOIEh9ID0gXG1hdGhiZnswfVxdCgpcWyBcbWF0aGJme0ggTn1eVCBcbWF0aGJme0h9ID0gXG1hdGhiZnswfVxdCgpgYGB7cn0KTiUqJUgKSCUqJXQoTikKYGBgCg==