Let’s first create a synthetic data to play with.

x <- sin(seq(0, 6.5*pi, length.out = 360))
y <- sin(seq(pi/16, 6.5*pi, length.out = 365))
z <- jitter(y, amount = 0.5) # jitter data the amount of 0.5
t <- z[6:365] # lag 5 units
y <- y[1:360]; z <- z[1:360]
k <- x + cos(seq(0, 128*pi, length.out = 360)) * 0.2
df <- data.frame(x = x, y = y, z = z, t = t, k = k)
pca <- prcomp(df, center = TRUE, scale. = TRUE)

Basic statistics:

round(cor(df), 2) # correlation
     x    y    z    t    k
x 1.00 0.99 0.91 0.90 0.98
y 0.99 1.00 0.92 0.90 0.97
z 0.91 0.92 1.00 0.84 0.90
t 0.90 0.90 0.84 1.00 0.88
k 0.98 0.97 0.90 0.88 1.00
round(var(df), 2) # variance & covariance
     x    y    z    t    k
x 0.50 0.49 0.50 0.49 0.50
y 0.49 0.50 0.50 0.49 0.49
z 0.50 0.50 0.59 0.50 0.50
t 0.49 0.49 0.50 0.60 0.49
k 0.50 0.49 0.50 0.49 0.52
round(sqrt(diag(var(df))), 2) # std dev
   x    y    z    t    k 
0.71 0.71 0.77 0.77 0.72 

Variances of the principal components

eig <- (pca$sdev) ^ 2 # Eigenvalues
print(eig)
[1] 4.679858124 0.164460242 0.118961143 0.028328308 0.008392184
variance <- eig * 100/sum(eig) # variance in percentage
print(variance)
[1] 93.5971625  3.2892048  2.3792229  0.5665662  0.1678437
cumvar <- cumsum(variance) # Cumulative variance
print(cumvar)
[1]  93.59716  96.88637  99.26559  99.83216 100.00000

Coordinates of variables on the principal components

The correlation between variables and principal components

var_cor_func <- function(var.loadings, comp.sdev) var.loadings*comp.sdev
# Variable correlation/coordinates
loadings <- pca$rotation
sdev <- pca$sdev
var.coord <- var.cor <- t(apply(loadings, 1, var_cor_func, sdev))
head(var.coord[, 1:4])
        PC1         PC2         PC3          PC4
x 0.9889953 -0.04130559 -0.11390012  0.047278525
y 0.9889361 -0.05546916 -0.07499642  0.101283916
z 0.9458278 -0.19979572  0.25486642 -0.022802695
t 0.9329968  0.34406664  0.10542347 -0.004574899
k 0.9791213 -0.03710902 -0.15586033 -0.123667801
print(pca)
Standard deviations (1, .., p=5):
[1] 2.16329797 0.40553698 0.34490744 0.16831015 0.09160886

Rotation (n x k) = (5 x 5):
        PC1         PC2        PC3         PC4         PC5
x 0.4571702 -0.10185406 -0.3302339  0.28090120  0.76984498
y 0.4571428 -0.13677954 -0.2174393  0.60176949 -0.60241633
z 0.4372157 -0.49266956  0.7389415 -0.13548021  0.04158981
t 0.4312845  0.84842235  0.3056573 -0.02718136 -0.00283363
k 0.4526058 -0.09150588 -0.4518903 -0.73476138 -0.20662894
s.pca <- summary(pca) # variation explained by each component (as percentage)
print(s.pca)
Importance of components%s:
                         PC1     PC2     PC3     PC4     PC5
Standard deviation     2.163 0.40554 0.34491 0.16831 0.09161
Proportion of Variance 0.936 0.03289 0.02379 0.00567 0.00168
Cumulative Proportion  0.936 0.96886 0.99266 0.99832 1.00000
layout(matrix(c(1,2,3,4), 2, 2, byrow = TRUE), widths = c(2,2), heights = c(2,2))
plot(pca) # Variance explained by each component
# or let's plot proportion of variance as percentage
plot(s.pca$importance[2,]*100, type = "b", bty = "n", ylab = "proportion of variance (%)", xlab = "PC")
screeplot(pca, type = "lines") # another function to plot variation of variance
abline(h = 1,lty = 3, col = "red")
biplot(pca)

How many values to keep?

  • Kaiser Criterion: retain only factors with eigenvalues > 1 (Note: eigenvalues = variances)

Homemade biplot

myplot <- function(r, d1 = 1, d2 = 2) {
  r <- r[, c(d1, d2)]
  mx <- max(range(r[,1]))
  plot(r, type = "n", ylim = c(-1, 1), xlim = c(-mx*1.2, mx*1.2), xlab = paste0("PC", d1), ylab = paste0("PC", d2))
  abline(v = 0, h = 0, lty = 3)
  arrows(0, 0, r[,1], r[,2], len = 0.1, col = "darkred")
  text(1.1 * r, rownames(r), col = "darkred", xpd = TRUE, cex = 1)
}
r <- pca$rotation
layout(matrix(c(1,2,3,4,5,6), 2, 3, byrow = TRUE), widths = c(1,1,1), heights = c(1,1,1))
myplot(r)
myplot(r, 2, 3)
myplot(r, 1, 3)
myplot(r, 3, 4)
myplot(r, 4, 5)
myplot(r, 2, 5)

library(scatterplot3d)
mx1 <- max(range(r[,1]))
mx2 <- max(range(r[,2]))
mx3 <- max(range(r[,3]))
plt <- scatterplot3d(r[,1], r[,2], r[,3], angle = 130, asp = 0.5,
              xlim = c(-mx1, mx1), ylim = c(-mx2, mx2), zlim = c(-mx3, mx3),
              xlab = "PC1", ylab = "PC2", zlab = "PC3")
plt$points3d(x=c(0,0), y=c(0,0),z=c(-mx3*1.2, mx3), type="l", col="black", lwd = 2)
plt$points3d(x=c(-mx1,mx1), y=c(0,0),z=c(0,0), type="l", col="black", lwd = 2)
plt$points3d(x=c(0,0), y=c(-mx2,mx2),z=c(0,0), type="l", col="black", lwd = 2)
for (i in 1:5)
  plt$points3d(x=c(0,r[i,1]), y=c(0,r[i,2]),z=c(0, r[i,3]), type="l", col="blue", lwd=2)

PC Calculation

pc1 <- drop(scale(as.matrix(df), center = pca$center,
             scale = pca$scale) %*% pca$rotation[, 1])
all.equal(pc1, pca$x[,1]) # TRUE
[1] TRUE

Contributions of the variables to the principal components

var.cos2 <- var.coord^2
comp.cos2 <- apply(var.cos2, 2, sum)
contrib <- function(var.cos2, comp.cos2){var.cos2*100/comp.cos2}
var.contrib <- t(apply(var.cos2,1, contrib, comp.cos2))
print(var.contrib)
       PC1        PC2       PC3         PC4          PC5
x 20.90045  1.0374250 10.905442  7.89054864 5.926613e+01
y 20.89796  1.8708642  4.727983 36.21265199 3.629054e+01
z 19.11576 24.2723293 54.603454  1.83548867 1.729712e-01
t 18.60063 71.9820489  9.342637  0.07388265 8.029461e-04
k 20.48520  0.8373325 20.420484 53.98742805 4.269552e+00
colSums(var.contrib)
PC1 PC2 PC3 PC4 PC5 
100 100 100 100 100 

RESOURCES:

LS0tCnRpdGxlOiAiUENBIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpMZXQncyBmaXJzdCBjcmVhdGUgYSBzeW50aGV0aWMgZGF0YSB0byBwbGF5IHdpdGguCgpgYGB7cn0KeCA8LSBzaW4oc2VxKDAsIDYuNSpwaSwgbGVuZ3RoLm91dCA9IDM2MCkpCnkgPC0gc2luKHNlcShwaS8xNiwgNi41KnBpLCBsZW5ndGgub3V0ID0gMzY1KSkKeiA8LSBqaXR0ZXIoeSwgYW1vdW50ID0gMC41KSAjIGppdHRlciBkYXRhIHRoZSBhbW91bnQgb2YgMC41CnQgPC0gels2OjM2NV0gIyBsYWcgNSB1bml0cwp5IDwtIHlbMTozNjBdOyB6IDwtIHpbMTozNjBdCmsgPC0geCArIGNvcyhzZXEoMCwgMTI4KnBpLCBsZW5ndGgub3V0ID0gMzYwKSkgKiAwLjIKZGYgPC0gZGF0YS5mcmFtZSh4ID0geCwgeSA9IHksIHogPSB6LCB0ID0gdCwgayA9IGspCnBjYSA8LSBwcmNvbXAoZGYsIGNlbnRlciA9IFRSVUUsIHNjYWxlLiA9IFRSVUUpCmBgYAoKYGBge3IsIGVjaG89RkFMU0V9Cm1hdHBsb3QoZGYsIHR5cGUgPSAibCIsIGJ0eSA9ICJuIikKcGxvdChkZiwgcGNoID0gIi4iLCBjZXggPSAwLjEpCmBgYAoKQmFzaWMgc3RhdGlzdGljczoKCmBgYHtyfQpyb3VuZChjb3IoZGYpLCAyKSAjIGNvcnJlbGF0aW9uCnJvdW5kKHZhcihkZiksIDIpICMgdmFyaWFuY2UgJiBjb3ZhcmlhbmNlCnJvdW5kKHNxcnQoZGlhZyh2YXIoZGYpKSksIDIpICMgc3RkIGRldgpgYGAKCiMjIyBWYXJpYW5jZXMgb2YgdGhlIHByaW5jaXBhbCBjb21wb25lbnRzCgpgYGB7cn0KZWlnIDwtIChwY2Ekc2RldikgXiAyICMgRWlnZW52YWx1ZXMKcHJpbnQoZWlnKQp2YXJpYW5jZSA8LSBlaWcgKiAxMDAvc3VtKGVpZykgIyB2YXJpYW5jZSBpbiBwZXJjZW50YWdlCnByaW50KHZhcmlhbmNlKQpjdW12YXIgPC0gY3Vtc3VtKHZhcmlhbmNlKSAjIEN1bXVsYXRpdmUgdmFyaWFuY2UKcHJpbnQoY3VtdmFyKQpgYGAKCiMjIyBDb29yZGluYXRlcyBvZiB2YXJpYWJsZXMgb24gdGhlIHByaW5jaXBhbCBjb21wb25lbnRzCgpUaGUgY29ycmVsYXRpb24gYmV0d2VlbiB2YXJpYWJsZXMgYW5kIHByaW5jaXBhbCBjb21wb25lbnRzCgpgYGB7cn0KdmFyX2Nvcl9mdW5jIDwtIGZ1bmN0aW9uKHZhci5sb2FkaW5ncywgY29tcC5zZGV2KSB2YXIubG9hZGluZ3MqY29tcC5zZGV2CiMgVmFyaWFibGUgY29ycmVsYXRpb24vY29vcmRpbmF0ZXMKbG9hZGluZ3MgPC0gcGNhJHJvdGF0aW9uCnNkZXYgPC0gcGNhJHNkZXYKdmFyLmNvb3JkIDwtIHZhci5jb3IgPC0gdChhcHBseShsb2FkaW5ncywgMSwgdmFyX2Nvcl9mdW5jLCBzZGV2KSkKaGVhZCh2YXIuY29vcmRbLCAxOjRdKQpgYGAKCmBgYHtyfQpwcmludChwY2EpCnMucGNhIDwtIHN1bW1hcnkocGNhKSAjIHZhcmlhdGlvbiBleHBsYWluZWQgYnkgZWFjaCBjb21wb25lbnQgKGFzIHBlcmNlbnRhZ2UpCnByaW50KHMucGNhKQpsYXlvdXQobWF0cml4KGMoMSwyLDMsNCksIDIsIDIsIGJ5cm93ID0gVFJVRSksIHdpZHRocyA9IGMoMiwyKSwgaGVpZ2h0cyA9IGMoMiwyKSkKcGxvdChwY2EpICMgVmFyaWFuY2UgZXhwbGFpbmVkIGJ5IGVhY2ggY29tcG9uZW50CiMgb3IgbGV0J3MgcGxvdCBwcm9wb3J0aW9uIG9mIHZhcmlhbmNlIGFzIHBlcmNlbnRhZ2UKcGxvdChzLnBjYSRpbXBvcnRhbmNlWzIsXSoxMDAsIHR5cGUgPSAiYiIsIGJ0eSA9ICJuIiwgeWxhYiA9ICJwcm9wb3J0aW9uIG9mIHZhcmlhbmNlICglKSIsIHhsYWIgPSAiUEMiKQpzY3JlZXBsb3QocGNhLCB0eXBlID0gImxpbmVzIikgIyBhbm90aGVyIGZ1bmN0aW9uIHRvIHBsb3QgdmFyaWF0aW9uIG9mIHZhcmlhbmNlCmFibGluZShoID0gMSxsdHkgPSAzLCBjb2wgPSAicmVkIikKYmlwbG90KHBjYSkKYGBgCgojIyMjIEhvdyBtYW55IHZhbHVlcyB0byBrZWVwPwoKKiBLYWlzZXIgQ3JpdGVyaW9uOiByZXRhaW4gb25seSBmYWN0b3JzIHdpdGggZWlnZW52YWx1ZXMgPiAxIChOb3RlOiBlaWdlbnZhbHVlcyA9IHZhcmlhbmNlcykKCiMjIyBIb21lbWFkZSBiaXBsb3QKCmBgYHtyfQpteXBsb3QgPC0gZnVuY3Rpb24ociwgZDEgPSAxLCBkMiA9IDIpIHsKICByIDwtIHJbLCBjKGQxLCBkMildCiAgbXggPC0gbWF4KHJhbmdlKHJbLDFdKSkKICBwbG90KHIsIHR5cGUgPSAibiIsIHlsaW0gPSBjKC0xLCAxKSwgeGxpbSA9IGMoLW14KjEuMiwgbXgqMS4yKSwgeGxhYiA9IHBhc3RlMCgiUEMiLCBkMSksIHlsYWIgPSBwYXN0ZTAoIlBDIiwgZDIpKQogIGFibGluZSh2ID0gMCwgaCA9IDAsIGx0eSA9IDMpCiAgYXJyb3dzKDAsIDAsIHJbLDFdLCByWywyXSwgbGVuID0gMC4xLCBjb2wgPSAiZGFya3JlZCIpCiAgdGV4dCgxLjEgKiByLCByb3duYW1lcyhyKSwgY29sID0gImRhcmtyZWQiLCB4cGQgPSBUUlVFLCBjZXggPSAxKQp9CnIgPC0gcGNhJHJvdGF0aW9uCmxheW91dChtYXRyaXgoYygxLDIsMyw0LDUsNiksIDIsIDMsIGJ5cm93ID0gVFJVRSksIHdpZHRocyA9IGMoMSwxLDEpLCBoZWlnaHRzID0gYygxLDEsMSkpCm15cGxvdChyKQpteXBsb3QociwgMiwgMykKbXlwbG90KHIsIDEsIDMpCm15cGxvdChyLCAzLCA0KQpteXBsb3QociwgNCwgNSkKbXlwbG90KHIsIDIsIDUpCmBgYAoKYGBge3J9CmxpYnJhcnkoc2NhdHRlcnBsb3QzZCkKbXgxIDwtIG1heChyYW5nZShyWywxXSkpCm14MiA8LSBtYXgocmFuZ2UoclssMl0pKQpteDMgPC0gbWF4KHJhbmdlKHJbLDNdKSkKcGx0IDwtIHNjYXR0ZXJwbG90M2QoclssMV0sIHJbLDJdLCByWywzXSwgYW5nbGUgPSAxMzAsIGFzcCA9IDAuNSwKICAgICAgICAgICAgICB4bGltID0gYygtbXgxLCBteDEpLCB5bGltID0gYygtbXgyLCBteDIpLCB6bGltID0gYygtbXgzLCBteDMpLAogICAgICAgICAgICAgIHhsYWIgPSAiUEMxIiwgeWxhYiA9ICJQQzIiLCB6bGFiID0gIlBDMyIpCnBsdCRwb2ludHMzZCh4PWMoMCwwKSwgeT1jKDAsMCksej1jKC1teDMqMS4yLCBteDMpLCB0eXBlPSJsIiwgY29sPSJibGFjayIsIGx3ZCA9IDIpCnBsdCRwb2ludHMzZCh4PWMoLW14MSxteDEpLCB5PWMoMCwwKSx6PWMoMCwwKSwgdHlwZT0ibCIsIGNvbD0iYmxhY2siLCBsd2QgPSAyKQpwbHQkcG9pbnRzM2QoeD1jKDAsMCksIHk9YygtbXgyLG14Miksej1jKDAsMCksIHR5cGU9ImwiLCBjb2w9ImJsYWNrIiwgbHdkID0gMikKZm9yIChpIGluIDE6NSkKICBwbHQkcG9pbnRzM2QoeD1jKDAscltpLDFdKSwgeT1jKDAscltpLDJdKSx6PWMoMCwgcltpLDNdKSwgdHlwZT0ibCIsIGNvbD0iYmx1ZSIsIGx3ZD0yKQpgYGAKCiMjIyBQQyBDYWxjdWxhdGlvbgoKYGBge3J9CnBjMSA8LSBkcm9wKHNjYWxlKGFzLm1hdHJpeChkZiksIGNlbnRlciA9IHBjYSRjZW50ZXIsCiAgICAgICAgICAgICBzY2FsZSA9IHBjYSRzY2FsZSkgJSolIHBjYSRyb3RhdGlvblssIDFdKQphbGwuZXF1YWwocGMxLCBwY2EkeFssMV0pICMgVFJVRQpgYGAKCiMjIyBDb250cmlidXRpb25zIG9mIHRoZSB2YXJpYWJsZXMgdG8gdGhlIHByaW5jaXBhbCBjb21wb25lbnRzCgpgYGB7cn0KdmFyLmNvczIgPC0gdmFyLmNvb3JkXjIKY29tcC5jb3MyIDwtIGFwcGx5KHZhci5jb3MyLCAyLCBzdW0pCmNvbnRyaWIgPC0gZnVuY3Rpb24odmFyLmNvczIsIGNvbXAuY29zMil7dmFyLmNvczIqMTAwL2NvbXAuY29zMn0KdmFyLmNvbnRyaWIgPC0gdChhcHBseSh2YXIuY29zMiwxLCBjb250cmliLCBjb21wLmNvczIpKQpwcmludCh2YXIuY29udHJpYikKY29sU3Vtcyh2YXIuY29udHJpYikKYGBgCgoKIyMgUkVTT1VSQ0VTOgoKKiBodHRwOi8vcmNvdXJzZS5pb3Aua2NsLmFjLnVrLzIwMTQvNWZyaS8zL3VwZGF0ZWRfa2hfcGNhX3NsaWRlc18wMzA2MTQucGRmCiogaHR0cHM6Ly9jcmFuLnItcHJvamVjdC5vcmcvd2ViL3BhY2thZ2VzL0hTQVVSL3ZpZ25ldHRlcy9DaF9wcmluY2lwYWxfY29tcG9uZW50c19hbmFseXNpcy5wZGYKKiBodHRwczovL3N0YXRzLnN0YWNrZXhjaGFuZ2UuY29tL3F1ZXN0aW9ucy8yNjI2MTEvci1wY2EtcHJpbmNpcGFsLXBzeWNoLXBhY2thZ2UtdnMtcHJjb21wLWxvYWRpbmdzCiogaHR0cDovL3d3dy5zdGhkYS5jb20vZW5nbGlzaC93aWtpL3ByaW5jaXBhbC1jb21wb25lbnQtYW5hbHlzaXMtaW4tci1wcmNvbXAtdnMtcHJpbmNvbXAtci1zb2Z0d2FyZS1hbmQtZGF0YS1taW5pbmcKCgo=