1 Data

Part of the iris dataset

X <- iris[1:5,1:4] %>% as.matrix
X
#>   Sepal.Length Sepal.Width Petal.Length Petal.Width
#> 1          5.1         3.5          1.4         0.2
#> 2          4.9         3.0          1.4         0.2
#> 3          4.7         3.2          1.3         0.2
#> 4          4.6         3.1          1.5         0.2
#> 5          5.0         3.6          1.4         0.2

2 Centering

\[ \mathbf{H} = \mathbf{I} - \frac{1}{n} \mathbf{1}\mathbf{1}^T , \] \[ \mathbf{X}_c = \mathbf{H}\mathbf{X} \] \[ \mathbf{H}\mathbf{X}_c = \mathbf{X}_c \]

H <- diag(nrow(X)) - matrix(1/nrow(X),nrow=nrow(X),ncol=nrow(X))
Xc <- H%*%X
colMeans(Xc)
#>  Sepal.Length   Sepal.Width  Petal.Length   Petal.Width 
#>  2.220446e-16  5.551115e-18  1.620969e-17 -8.326673e-18
H%*%Xc-Xc
#>       Sepal.Length   Sepal.Width  Petal.Length  Petal.Width
#> [1,] -2.220446e-16  5.551115e-17 -1.521006e-17 8.326673e-18
#> [2,] -2.289835e-16  0.000000e+00 -1.521006e-17 8.326673e-18
#> [3,] -2.498002e-16 -1.387779e-17 -2.775558e-17 8.326673e-18
#> [4,] -2.220446e-16 -2.775558e-17 -1.387779e-17 8.326673e-18
#> [5,] -2.220446e-16  0.000000e+00 -1.756928e-17 8.326673e-18

3 Gram matrix

Gram matrix: \(\mathbf{G}=\mathbf{X}\mathbf{X}^T\) Here we work on centered data so \(\mathbf{G}=\mathbf{X}_c\mathbf{X}_c^T\)

G <- Xc%*%t(Xc)

4 Squared Distance matrix

\[ \mathbf{D}_X = \mathbf{N} - 2\mathbf{X}\mathbf{X}^T + \mathbf{N}^T , \]

N <- matrix(diag(G),nrow(Xc),nrow(Xc))
N
#>       [,1]  [,2]  [,3]  [,4]  [,5]
#> [1,] 0.106 0.106 0.106 0.106 0.106
#> [2,] 0.080 0.080 0.080 0.080 0.080
#> [3,] 0.042 0.042 0.042 0.042 0.042
#> [4,] 0.110 0.110 0.110 0.110 0.110
#> [5,] 0.122 0.122 0.122 0.122 0.122
dist2 <- N-2*G+t(N)
dist2
#>      [,1] [,2] [,3] [,4] [,5]
#> [1,] 0.00 0.29 0.26 0.42 0.02
#> [2,] 0.29 0.00 0.09 0.11 0.37
#> [3,] 0.26 0.09 0.00 0.06 0.26
#> [4,] 0.42 0.11 0.06 0.00 0.42
#> [5,] 0.02 0.37 0.26 0.42 0.00
dist(Xc)^2
#>      1    2    3    4
#> 2 0.29               
#> 3 0.26 0.09          
#> 4 0.42 0.11 0.06     
#> 5 0.02 0.37 0.26 0.42

6 Show that N cancels out when multiplying with H

\[ \mathbf{H N H} = \mathbf{0}\]

\[ \mathbf{H N}^T \mathbf{H} = \mathbf{0}\]

N%*%H
#>               [,1]          [,2]          [,3]         [,4]          [,5]
#> [1,]  9.048318e-19  9.048318e-19 -6.034062e-18 9.048318e-19 -3.619327e-18
#> [2,] -8.520962e-18 -1.582068e-18  5.356826e-18 1.887379e-18  6.328271e-18
#> [3,] -4.662937e-19  3.003153e-18  3.003153e-18 1.268430e-18  1.865175e-18
#> [4,]  8.604228e-19  8.604228e-19  8.604228e-19 8.604228e-19 -3.441691e-18
#> [5,]  7.271961e-19  7.271961e-19  7.271961e-19 7.271961e-19 -2.908784e-18
H%*%t(N)
#>               [,1]          [,2]          [,3]          [,4]          [,5]
#> [1,]  9.048318e-19 -8.520962e-18 -4.662937e-19  8.604228e-19  7.271961e-19
#> [2,]  9.048318e-19 -1.582068e-18  3.003153e-18  8.604228e-19  7.271961e-19
#> [3,] -6.034062e-18  5.356826e-18  3.003153e-18  8.604228e-19  7.271961e-19
#> [4,]  9.048318e-19  1.887379e-18  1.268430e-18  8.604228e-19  7.271961e-19
#> [5,] -3.619327e-18  6.328271e-18  1.865175e-18 -3.441691e-18 -2.908784e-18

Session info

Session info
#> [1] "2024-10-03 06:25:01 UTC"
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value
#>  version  R version 4.1.3 (2022-03-10)
#>  os       Ubuntu 22.04.5 LTS
#>  system   x86_64, linux-gnu
#>  ui       X11
#>  language (EN)
#>  collate  C.UTF-8
#>  ctype    C.UTF-8
#>  tz       UTC
#>  date     2024-10-03
#>  pandoc   3.1.11 @ /opt/hostedtoolcache/pandoc/3.1.11/x64/ (via rmarkdown)
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  ! package     * version date (UTC) lib source
#>  P assertthat    0.2.1   2019-03-21 [?] CRAN (R 4.1.3)
#>  P backports     1.4.1   2021-12-13 [?] CRAN (R 4.1.3)
#>  P BiocManager   1.30.16 2021-06-15 [?] CRAN (R 4.1.3)
#>  P bookdown      0.24    2021-09-02 [?] CRAN (R 4.1.3)
#>  P broom         0.7.11  2022-01-03 [?] CRAN (R 4.1.3)
#>  P bslib         0.3.1   2021-10-06 [?] CRAN (R 4.1.3)
#>  P cellranger    1.1.0   2016-07-27 [?] CRAN (R 4.1.3)
#>  P cli           3.1.1   2022-01-20 [?] CRAN (R 4.1.3)
#>  P colorspace    2.0-2   2021-06-24 [?] CRAN (R 4.1.3)
#>  P crayon        1.4.2   2021-10-29 [?] CRAN (R 4.1.3)
#>  P DBI           1.1.2   2021-12-20 [?] CRAN (R 4.1.3)
#>  P dbplyr        2.1.1   2021-04-06 [?] CRAN (R 4.1.3)
#>  P digest        0.6.29  2021-12-01 [?] CRAN (R 4.1.3)
#>  P dplyr       * 1.0.7   2021-06-18 [?] CRAN (R 4.1.3)
#>  P ellipsis      0.3.2   2021-04-29 [?] CRAN (R 4.1.3)
#>  P evaluate      0.14    2019-05-28 [?] CRAN (R 4.1.3)
#>  P fansi         1.0.2   2022-01-14 [?] CRAN (R 4.1.3)
#>  P fastmap       1.1.0   2021-01-25 [?] CRAN (R 4.1.3)
#>  P forcats     * 0.5.1   2021-01-27 [?] CRAN (R 4.1.3)
#>  P fs            1.5.2   2021-12-08 [?] CRAN (R 4.1.3)
#>  P generics      0.1.1   2021-10-25 [?] CRAN (R 4.1.3)
#>  P ggplot2     * 3.3.5   2021-06-25 [?] CRAN (R 4.1.3)
#>  P glue          1.6.1   2022-01-22 [?] CRAN (R 4.1.3)
#>  P gtable        0.3.0   2019-03-25 [?] CRAN (R 4.1.3)
#>  P haven         2.4.3   2021-08-04 [?] CRAN (R 4.1.3)
#>  P hms           1.1.1   2021-09-26 [?] CRAN (R 4.1.3)
#>  P htmltools     0.5.2   2021-08-25 [?] CRAN (R 4.1.3)
#>  P httr          1.4.2   2020-07-20 [?] CRAN (R 4.1.3)
#>  P jquerylib     0.1.4   2021-04-26 [?] CRAN (R 4.1.3)
#>  P jsonlite      1.7.3   2022-01-17 [?] CRAN (R 4.1.3)
#>  P knitr         1.37    2021-12-16 [?] CRAN (R 4.1.3)
#>  P lifecycle     1.0.1   2021-09-24 [?] CRAN (R 4.1.3)
#>  P lubridate     1.8.0   2021-10-07 [?] CRAN (R 4.1.3)
#>  P magrittr      2.0.2   2022-01-26 [?] CRAN (R 4.1.3)
#>  P modelr        0.1.8   2020-05-19 [?] CRAN (R 4.1.3)
#>  P munsell       0.5.0   2018-06-12 [?] CRAN (R 4.1.3)
#>  P pillar        1.6.5   2022-01-25 [?] CRAN (R 4.1.3)
#>  P pkgconfig     2.0.3   2024-10-02 [?] Github (r-lib/pkgconfig@b81ae03)
#>  P purrr       * 0.3.4   2020-04-17 [?] CRAN (R 4.1.3)
#>  P R6            2.5.1   2021-08-19 [?] CRAN (R 4.1.3)
#>  P Rcpp          1.0.8   2022-01-13 [?] CRAN (R 4.1.3)
#>  P readr       * 2.1.1   2021-11-30 [?] CRAN (R 4.1.3)
#>  P readxl        1.3.1   2019-03-13 [?] CRAN (R 4.1.3)
#>    renv          0.15.2  2022-01-24 [1] CRAN (R 4.1.3)
#>  P reprex        2.0.1   2021-08-05 [?] CRAN (R 4.1.3)
#>  P rlang         1.0.0   2022-01-26 [?] CRAN (R 4.1.3)
#>  P rmarkdown     2.11    2021-09-14 [?] CRAN (R 4.1.3)
#>  P rstudioapi    0.13    2020-11-12 [?] CRAN (R 4.1.3)
#>  P rvest         1.0.2   2021-10-16 [?] CRAN (R 4.1.3)
#>  P sass          0.4.0   2021-05-12 [?] CRAN (R 4.1.3)
#>  P scales        1.1.1   2020-05-11 [?] CRAN (R 4.1.3)
#>  P sessioninfo   1.2.2   2021-12-06 [?] CRAN (R 4.1.3)
#>  P stringi       1.7.6   2021-11-29 [?] CRAN (R 4.1.3)
#>  P stringr     * 1.4.0   2019-02-10 [?] CRAN (R 4.1.3)
#>  P tibble      * 3.1.6   2021-11-07 [?] CRAN (R 4.1.3)
#>  P tidyr       * 1.1.4   2021-09-27 [?] CRAN (R 4.1.3)
#>  P tidyselect    1.1.1   2021-04-30 [?] CRAN (R 4.1.3)
#>  P tidyverse   * 1.3.1   2021-04-15 [?] CRAN (R 4.1.3)
#>  P tzdb          0.2.0   2021-10-27 [?] CRAN (R 4.1.3)
#>  P utf8          1.2.2   2021-07-24 [?] CRAN (R 4.1.3)
#>  P vctrs         0.3.8   2021-04-29 [?] CRAN (R 4.1.3)
#>  P withr         2.4.3   2021-11-30 [?] CRAN (R 4.1.3)
#>  P xfun          0.29    2021-12-14 [?] CRAN (R 4.1.3)
#>  P xml2          1.3.3   2021-11-30 [?] CRAN (R 4.1.3)
#>  P yaml          2.2.2   2022-01-25 [?] CRAN (R 4.1.3)
#> 
#>  [1] /home/runner/work/HDDA23/HDDA23/renv/library/R-4.1/x86_64-pc-linux-gnu
#>  [2] /opt/R/4.1.3/lib/R/library
#> 
#>  P ── Loaded and on-disk path mismatch.
#> 
#> ──────────────────────────────────────────────────────────────────────────────
LS0tCnRpdGxlOiAiTURTOiBMaW5rIFNxdWFyZWQgRGlzdGFuY2UgTWF0cml4IGFuZCBHcmFtIE1hdHJpeCBpbiBQcmFjdGljZSIKYXV0aG9yOiAiTGlldmVuIENsZW1lbnQiCmRhdGU6ICJzdGF0T21pY3MsIEdoZW50IFVuaXZlcnNpdHkgKGh0dHBzOi8vc3RhdG9taWNzLmdpdGh1Yi5pbykiCi0tLQoKYGBge3IsIGNoaWxkPSJfc2V0dXAuUm1kIn0KYGBgCgpgYGB7ciBlY2hvPUZBTFNFfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKYGBgCgojIERhdGEKClBhcnQgb2YgdGhlIGlyaXMgZGF0YXNldAoKYGBge3J9ClggPC0gaXJpc1sxOjUsMTo0XSAlPiUgYXMubWF0cml4ClgKYGBgCgojIENlbnRlcmluZwoKXFsKICBcbWF0aGJme0h9ID0gXG1hdGhiZntJfSAtIFxmcmFjezF9e259IFxtYXRoYmZ7MX1cbWF0aGJmezF9XlQgLApcXQpcWwogXG1hdGhiZntYfV9jID0gXG1hdGhiZntIfVxtYXRoYmZ7WH0KXF0KXFsKXG1hdGhiZntIfVxtYXRoYmZ7WH1fYyA9IFxtYXRoYmZ7WH1fYwpcXQoKYGBge3J9CkggPC0gZGlhZyhucm93KFgpKSAtIG1hdHJpeCgxL25yb3coWCksbnJvdz1ucm93KFgpLG5jb2w9bnJvdyhYKSkKWGMgPC0gSCUqJVgKY29sTWVhbnMoWGMpCkglKiVYYy1YYwpgYGAKCiMgR3JhbSBtYXRyaXgKCioqR3JhbSBtYXRyaXgqKjogJFxtYXRoYmZ7R309XG1hdGhiZntYfVxtYXRoYmZ7WH1eVCQKSGVyZSB3ZSB3b3JrIG9uIGNlbnRlcmVkIGRhdGEgc28KJFxtYXRoYmZ7R309XG1hdGhiZntYfV9jXG1hdGhiZntYfV9jXlQkCgpgYGB7cn0KRyA8LSBYYyUqJXQoWGMpCmBgYAoKIyBTcXVhcmVkIERpc3RhbmNlIG1hdHJpeAoKXFsKICBcbWF0aGJme0R9X1ggPSBcbWF0aGJme059IC0gMlxtYXRoYmZ7WH1cbWF0aGJme1h9XlQgKyBcbWF0aGJme059XlQgLApcXQoKYGBge3J9Ck4gPC0gbWF0cml4KGRpYWcoRyksbnJvdyhYYyksbnJvdyhYYykpCk4KZGlzdDIgPC0gTi0yKkcrdChOKQpkaXN0MgpkaXN0KFhjKV4yCmBgYAoKIyBMaW5rIEdyYW0gTWF0cml4IGFuZCBTcXVhcmVkIERpc3RhbmNlIE1hdHJpeAoKXFtcbWF0aGJme0d9ID1cbWF0aGJme1h9X2NcbWF0aGJme1h9X2NeVD0tXGZyYWN7MX17Mn1cbWF0aGJme0h9XG1hdGhiZntEfV9YXG1hdGhiZntIfVxdCgpgYGB7cn0KRwotMS8yKkglKiVkaXN0MiUqJUgKLTEvMipIJSolZGlzdDIlKiVIIC0gRwpgYGAKCiMgU2hvdyB0aGF0IE4gY2FuY2VscyBvdXQgd2hlbiBtdWx0aXBseWluZyB3aXRoIEgKClxbIFxtYXRoYmZ7SCBOIEh9ID0gXG1hdGhiZnswfVxdCgpcWyBcbWF0aGJme0ggTn1eVCBcbWF0aGJme0h9ID0gXG1hdGhiZnswfVxdCgpgYGB7cn0KTiUqJUgKSCUqJXQoTikKYGBgCgpgYGB7ciwgY2hpbGQ9Il9zZXNzaW9uLWluZm8uUm1kIn0KYGBgCg==