Let’s first create a synthetic data to play with.
x <- sin(seq(0, 6.5*pi, length.out = 360))
y <- sin(seq(pi/16, 6.5*pi, length.out = 365))
z <- jitter(y, amount = 0.5) # jitter data the amount of 0.5
t <- z[6:365] # lag 5 units
y <- y[1:360]; z <- z[1:360]
k <- x + cos(seq(0, 128*pi, length.out = 360)) * 0.2
df <- data.frame(x = x, y = y, z = z, t = t, k = k)
pca <- prcomp(df, center = TRUE, scale. = TRUE)
Basic statistics:
round(cor(df), 2) # correlation
x y z t k
x 1.00 0.99 0.91 0.90 0.98
y 0.99 1.00 0.92 0.90 0.97
z 0.91 0.92 1.00 0.84 0.90
t 0.90 0.90 0.84 1.00 0.88
k 0.98 0.97 0.90 0.88 1.00
round(var(df), 2) # variance & covariance
x y z t k
x 0.50 0.49 0.50 0.49 0.50
y 0.49 0.50 0.50 0.49 0.49
z 0.50 0.50 0.59 0.50 0.50
t 0.49 0.49 0.50 0.60 0.49
k 0.50 0.49 0.50 0.49 0.52
round(sqrt(diag(var(df))), 2) # std dev
x y z t k
0.71 0.71 0.77 0.77 0.72
Variances of the principal components
eig <- (pca$sdev) ^ 2 # Eigenvalues
print(eig)
[1] 4.679858124 0.164460242 0.118961143 0.028328308 0.008392184
variance <- eig * 100/sum(eig) # variance in percentage
print(variance)
[1] 93.5971625 3.2892048 2.3792229 0.5665662 0.1678437
cumvar <- cumsum(variance) # Cumulative variance
print(cumvar)
[1] 93.59716 96.88637 99.26559 99.83216 100.00000
Coordinates of variables on the principal components
The correlation between variables and principal components
var_cor_func <- function(var.loadings, comp.sdev) var.loadings*comp.sdev
# Variable correlation/coordinates
loadings <- pca$rotation
sdev <- pca$sdev
var.coord <- var.cor <- t(apply(loadings, 1, var_cor_func, sdev))
head(var.coord[, 1:4])
PC1 PC2 PC3 PC4
x 0.9889953 -0.04130559 -0.11390012 0.047278525
y 0.9889361 -0.05546916 -0.07499642 0.101283916
z 0.9458278 -0.19979572 0.25486642 -0.022802695
t 0.9329968 0.34406664 0.10542347 -0.004574899
k 0.9791213 -0.03710902 -0.15586033 -0.123667801
print(pca)
Standard deviations (1, .., p=5):
[1] 2.16329797 0.40553698 0.34490744 0.16831015 0.09160886
Rotation (n x k) = (5 x 5):
PC1 PC2 PC3 PC4 PC5
x 0.4571702 -0.10185406 -0.3302339 0.28090120 0.76984498
y 0.4571428 -0.13677954 -0.2174393 0.60176949 -0.60241633
z 0.4372157 -0.49266956 0.7389415 -0.13548021 0.04158981
t 0.4312845 0.84842235 0.3056573 -0.02718136 -0.00283363
k 0.4526058 -0.09150588 -0.4518903 -0.73476138 -0.20662894
s.pca <- summary(pca) # variation explained by each component (as percentage)
print(s.pca)
Importance of components%s:
PC1 PC2 PC3 PC4 PC5
Standard deviation 2.163 0.40554 0.34491 0.16831 0.09161
Proportion of Variance 0.936 0.03289 0.02379 0.00567 0.00168
Cumulative Proportion 0.936 0.96886 0.99266 0.99832 1.00000
layout(matrix(c(1,2,3,4), 2, 2, byrow = TRUE), widths = c(2,2), heights = c(2,2))
plot(pca) # Variance explained by each component
# or let's plot proportion of variance as percentage
plot(s.pca$importance[2,]*100, type = "b", bty = "n", ylab = "proportion of variance (%)", xlab = "PC")
screeplot(pca, type = "lines") # another function to plot variation of variance
abline(h = 1,lty = 3, col = "red")
biplot(pca)
How many values to keep?
- Kaiser Criterion: retain only factors with eigenvalues > 1 (Note: eigenvalues = variances)
Homemade biplot
myplot <- function(r, d1 = 1, d2 = 2) {
r <- r[, c(d1, d2)]
mx <- max(range(r[,1]))
plot(r, type = "n", ylim = c(-1, 1), xlim = c(-mx*1.2, mx*1.2), xlab = paste0("PC", d1), ylab = paste0("PC", d2))
abline(v = 0, h = 0, lty = 3)
arrows(0, 0, r[,1], r[,2], len = 0.1, col = "darkred")
text(1.1 * r, rownames(r), col = "darkred", xpd = TRUE, cex = 1)
}
r <- pca$rotation
layout(matrix(c(1,2,3,4,5,6), 2, 3, byrow = TRUE), widths = c(1,1,1), heights = c(1,1,1))
myplot(r)
myplot(r, 2, 3)
myplot(r, 1, 3)
myplot(r, 3, 4)
myplot(r, 4, 5)
myplot(r, 2, 5)
library(scatterplot3d)
mx1 <- max(range(r[,1]))
mx2 <- max(range(r[,2]))
mx3 <- max(range(r[,3]))
plt <- scatterplot3d(r[,1], r[,2], r[,3], angle = 130, asp = 0.5,
xlim = c(-mx1, mx1), ylim = c(-mx2, mx2), zlim = c(-mx3, mx3),
xlab = "PC1", ylab = "PC2", zlab = "PC3")
plt$points3d(x=c(0,0), y=c(0,0),z=c(-mx3*1.2, mx3), type="l", col="black", lwd = 2)
plt$points3d(x=c(-mx1,mx1), y=c(0,0),z=c(0,0), type="l", col="black", lwd = 2)
plt$points3d(x=c(0,0), y=c(-mx2,mx2),z=c(0,0), type="l", col="black", lwd = 2)
for (i in 1:5)
plt$points3d(x=c(0,r[i,1]), y=c(0,r[i,2]),z=c(0, r[i,3]), type="l", col="blue", lwd=2)
PC Calculation
pc1 <- drop(scale(as.matrix(df), center = pca$center,
scale = pca$scale) %*% pca$rotation[, 1])
all.equal(pc1, pca$x[,1]) # TRUE
[1] TRUE
Contributions of the variables to the principal components
var.cos2 <- var.coord^2
comp.cos2 <- apply(var.cos2, 2, sum)
contrib <- function(var.cos2, comp.cos2){var.cos2*100/comp.cos2}
var.contrib <- t(apply(var.cos2,1, contrib, comp.cos2))
print(var.contrib)
PC1 PC2 PC3 PC4 PC5
x 20.90045 1.0374250 10.905442 7.89054864 5.926613e+01
y 20.89796 1.8708642 4.727983 36.21265199 3.629054e+01
z 19.11576 24.2723293 54.603454 1.83548867 1.729712e-01
t 18.60063 71.9820489 9.342637 0.07388265 8.029461e-04
k 20.48520 0.8373325 20.420484 53.98742805 4.269552e+00
colSums(var.contrib)
PC1 PC2 PC3 PC4 PC5
100 100 100 100 100
LS0tCnRpdGxlOiAiUENBIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpMZXQncyBmaXJzdCBjcmVhdGUgYSBzeW50aGV0aWMgZGF0YSB0byBwbGF5IHdpdGguCgpgYGB7cn0KeCA8LSBzaW4oc2VxKDAsIDYuNSpwaSwgbGVuZ3RoLm91dCA9IDM2MCkpCnkgPC0gc2luKHNlcShwaS8xNiwgNi41KnBpLCBsZW5ndGgub3V0ID0gMzY1KSkKeiA8LSBqaXR0ZXIoeSwgYW1vdW50ID0gMC41KSAjIGppdHRlciBkYXRhIHRoZSBhbW91bnQgb2YgMC41CnQgPC0gels2OjM2NV0gIyBsYWcgNSB1bml0cwp5IDwtIHlbMTozNjBdOyB6IDwtIHpbMTozNjBdCmsgPC0geCArIGNvcyhzZXEoMCwgMTI4KnBpLCBsZW5ndGgub3V0ID0gMzYwKSkgKiAwLjIKZGYgPC0gZGF0YS5mcmFtZSh4ID0geCwgeSA9IHksIHogPSB6LCB0ID0gdCwgayA9IGspCnBjYSA8LSBwcmNvbXAoZGYsIGNlbnRlciA9IFRSVUUsIHNjYWxlLiA9IFRSVUUpCmBgYAoKYGBge3IsIGVjaG89RkFMU0V9Cm1hdHBsb3QoZGYsIHR5cGUgPSAibCIsIGJ0eSA9ICJuIikKcGxvdChkZiwgcGNoID0gIi4iLCBjZXggPSAwLjEpCmBgYAoKQmFzaWMgc3RhdGlzdGljczoKCmBgYHtyfQpyb3VuZChjb3IoZGYpLCAyKSAjIGNvcnJlbGF0aW9uCnJvdW5kKHZhcihkZiksIDIpICMgdmFyaWFuY2UgJiBjb3ZhcmlhbmNlCnJvdW5kKHNxcnQoZGlhZyh2YXIoZGYpKSksIDIpICMgc3RkIGRldgpgYGAKCiMjIyBWYXJpYW5jZXMgb2YgdGhlIHByaW5jaXBhbCBjb21wb25lbnRzCgpgYGB7cn0KZWlnIDwtIChwY2Ekc2RldikgXiAyICMgRWlnZW52YWx1ZXMKcHJpbnQoZWlnKQp2YXJpYW5jZSA8LSBlaWcgKiAxMDAvc3VtKGVpZykgIyB2YXJpYW5jZSBpbiBwZXJjZW50YWdlCnByaW50KHZhcmlhbmNlKQpjdW12YXIgPC0gY3Vtc3VtKHZhcmlhbmNlKSAjIEN1bXVsYXRpdmUgdmFyaWFuY2UKcHJpbnQoY3VtdmFyKQpgYGAKCiMjIyBDb29yZGluYXRlcyBvZiB2YXJpYWJsZXMgb24gdGhlIHByaW5jaXBhbCBjb21wb25lbnRzCgpUaGUgY29ycmVsYXRpb24gYmV0d2VlbiB2YXJpYWJsZXMgYW5kIHByaW5jaXBhbCBjb21wb25lbnRzCgpgYGB7cn0KdmFyX2Nvcl9mdW5jIDwtIGZ1bmN0aW9uKHZhci5sb2FkaW5ncywgY29tcC5zZGV2KSB2YXIubG9hZGluZ3MqY29tcC5zZGV2CiMgVmFyaWFibGUgY29ycmVsYXRpb24vY29vcmRpbmF0ZXMKbG9hZGluZ3MgPC0gcGNhJHJvdGF0aW9uCnNkZXYgPC0gcGNhJHNkZXYKdmFyLmNvb3JkIDwtIHZhci5jb3IgPC0gdChhcHBseShsb2FkaW5ncywgMSwgdmFyX2Nvcl9mdW5jLCBzZGV2KSkKaGVhZCh2YXIuY29vcmRbLCAxOjRdKQpgYGAKCmBgYHtyfQpwcmludChwY2EpCnMucGNhIDwtIHN1bW1hcnkocGNhKSAjIHZhcmlhdGlvbiBleHBsYWluZWQgYnkgZWFjaCBjb21wb25lbnQgKGFzIHBlcmNlbnRhZ2UpCnByaW50KHMucGNhKQpsYXlvdXQobWF0cml4KGMoMSwyLDMsNCksIDIsIDIsIGJ5cm93ID0gVFJVRSksIHdpZHRocyA9IGMoMiwyKSwgaGVpZ2h0cyA9IGMoMiwyKSkKcGxvdChwY2EpICMgVmFyaWFuY2UgZXhwbGFpbmVkIGJ5IGVhY2ggY29tcG9uZW50CiMgb3IgbGV0J3MgcGxvdCBwcm9wb3J0aW9uIG9mIHZhcmlhbmNlIGFzIHBlcmNlbnRhZ2UKcGxvdChzLnBjYSRpbXBvcnRhbmNlWzIsXSoxMDAsIHR5cGUgPSAiYiIsIGJ0eSA9ICJuIiwgeWxhYiA9ICJwcm9wb3J0aW9uIG9mIHZhcmlhbmNlICglKSIsIHhsYWIgPSAiUEMiKQpzY3JlZXBsb3QocGNhLCB0eXBlID0gImxpbmVzIikgIyBhbm90aGVyIGZ1bmN0aW9uIHRvIHBsb3QgdmFyaWF0aW9uIG9mIHZhcmlhbmNlCmFibGluZShoID0gMSxsdHkgPSAzLCBjb2wgPSAicmVkIikKYmlwbG90KHBjYSkKYGBgCgojIyMjIEhvdyBtYW55IHZhbHVlcyB0byBrZWVwPwoKKiBLYWlzZXIgQ3JpdGVyaW9uOiByZXRhaW4gb25seSBmYWN0b3JzIHdpdGggZWlnZW52YWx1ZXMgPiAxIChOb3RlOiBlaWdlbnZhbHVlcyA9IHZhcmlhbmNlcykKCiMjIyBIb21lbWFkZSBiaXBsb3QKCmBgYHtyfQpteXBsb3QgPC0gZnVuY3Rpb24ociwgZDEgPSAxLCBkMiA9IDIpIHsKICByIDwtIHJbLCBjKGQxLCBkMildCiAgbXggPC0gbWF4KHJhbmdlKHJbLDFdKSkKICBwbG90KHIsIHR5cGUgPSAibiIsIHlsaW0gPSBjKC0xLCAxKSwgeGxpbSA9IGMoLW14KjEuMiwgbXgqMS4yKSwgeGxhYiA9IHBhc3RlMCgiUEMiLCBkMSksIHlsYWIgPSBwYXN0ZTAoIlBDIiwgZDIpKQogIGFibGluZSh2ID0gMCwgaCA9IDAsIGx0eSA9IDMpCiAgYXJyb3dzKDAsIDAsIHJbLDFdLCByWywyXSwgbGVuID0gMC4xLCBjb2wgPSAiZGFya3JlZCIpCiAgdGV4dCgxLjEgKiByLCByb3duYW1lcyhyKSwgY29sID0gImRhcmtyZWQiLCB4cGQgPSBUUlVFLCBjZXggPSAxKQp9CnIgPC0gcGNhJHJvdGF0aW9uCmxheW91dChtYXRyaXgoYygxLDIsMyw0LDUsNiksIDIsIDMsIGJ5cm93ID0gVFJVRSksIHdpZHRocyA9IGMoMSwxLDEpLCBoZWlnaHRzID0gYygxLDEsMSkpCm15cGxvdChyKQpteXBsb3QociwgMiwgMykKbXlwbG90KHIsIDEsIDMpCm15cGxvdChyLCAzLCA0KQpteXBsb3QociwgNCwgNSkKbXlwbG90KHIsIDIsIDUpCmBgYAoKYGBge3J9CmxpYnJhcnkoc2NhdHRlcnBsb3QzZCkKbXgxIDwtIG1heChyYW5nZShyWywxXSkpCm14MiA8LSBtYXgocmFuZ2UoclssMl0pKQpteDMgPC0gbWF4KHJhbmdlKHJbLDNdKSkKcGx0IDwtIHNjYXR0ZXJwbG90M2QoclssMV0sIHJbLDJdLCByWywzXSwgYW5nbGUgPSAxMzAsIGFzcCA9IDAuNSwKICAgICAgICAgICAgICB4bGltID0gYygtbXgxLCBteDEpLCB5bGltID0gYygtbXgyLCBteDIpLCB6bGltID0gYygtbXgzLCBteDMpLAogICAgICAgICAgICAgIHhsYWIgPSAiUEMxIiwgeWxhYiA9ICJQQzIiLCB6bGFiID0gIlBDMyIpCnBsdCRwb2ludHMzZCh4PWMoMCwwKSwgeT1jKDAsMCksej1jKC1teDMqMS4yLCBteDMpLCB0eXBlPSJsIiwgY29sPSJibGFjayIsIGx3ZCA9IDIpCnBsdCRwb2ludHMzZCh4PWMoLW14MSxteDEpLCB5PWMoMCwwKSx6PWMoMCwwKSwgdHlwZT0ibCIsIGNvbD0iYmxhY2siLCBsd2QgPSAyKQpwbHQkcG9pbnRzM2QoeD1jKDAsMCksIHk9YygtbXgyLG14Miksej1jKDAsMCksIHR5cGU9ImwiLCBjb2w9ImJsYWNrIiwgbHdkID0gMikKZm9yIChpIGluIDE6NSkKICBwbHQkcG9pbnRzM2QoeD1jKDAscltpLDFdKSwgeT1jKDAscltpLDJdKSx6PWMoMCwgcltpLDNdKSwgdHlwZT0ibCIsIGNvbD0iYmx1ZSIsIGx3ZD0yKQpgYGAKCiMjIyBQQyBDYWxjdWxhdGlvbgoKYGBge3J9CnBjMSA8LSBkcm9wKHNjYWxlKGFzLm1hdHJpeChkZiksIGNlbnRlciA9IHBjYSRjZW50ZXIsCiAgICAgICAgICAgICBzY2FsZSA9IHBjYSRzY2FsZSkgJSolIHBjYSRyb3RhdGlvblssIDFdKQphbGwuZXF1YWwocGMxLCBwY2EkeFssMV0pICMgVFJVRQpgYGAKCiMjIyBDb250cmlidXRpb25zIG9mIHRoZSB2YXJpYWJsZXMgdG8gdGhlIHByaW5jaXBhbCBjb21wb25lbnRzCgpgYGB7cn0KdmFyLmNvczIgPC0gdmFyLmNvb3JkXjIKY29tcC5jb3MyIDwtIGFwcGx5KHZhci5jb3MyLCAyLCBzdW0pCmNvbnRyaWIgPC0gZnVuY3Rpb24odmFyLmNvczIsIGNvbXAuY29zMil7dmFyLmNvczIqMTAwL2NvbXAuY29zMn0KdmFyLmNvbnRyaWIgPC0gdChhcHBseSh2YXIuY29zMiwxLCBjb250cmliLCBjb21wLmNvczIpKQpwcmludCh2YXIuY29udHJpYikKY29sU3Vtcyh2YXIuY29udHJpYikKYGBgCgoKIyMgUkVTT1VSQ0VTOgoKKiBodHRwOi8vcmNvdXJzZS5pb3Aua2NsLmFjLnVrLzIwMTQvNWZyaS8zL3VwZGF0ZWRfa2hfcGNhX3NsaWRlc18wMzA2MTQucGRmCiogaHR0cHM6Ly9jcmFuLnItcHJvamVjdC5vcmcvd2ViL3BhY2thZ2VzL0hTQVVSL3ZpZ25ldHRlcy9DaF9wcmluY2lwYWxfY29tcG9uZW50c19hbmFseXNpcy5wZGYKKiBodHRwczovL3N0YXRzLnN0YWNrZXhjaGFuZ2UuY29tL3F1ZXN0aW9ucy8yNjI2MTEvci1wY2EtcHJpbmNpcGFsLXBzeWNoLXBhY2thZ2UtdnMtcHJjb21wLWxvYWRpbmdzCiogaHR0cDovL3d3dy5zdGhkYS5jb20vZW5nbGlzaC93aWtpL3ByaW5jaXBhbC1jb21wb25lbnQtYW5hbHlzaXMtaW4tci1wcmNvbXAtdnMtcHJpbmNvbXAtci1zb2Z0d2FyZS1hbmQtZGF0YS1taW5pbmcKCgo=