I’ve managed to get a decent-looking 4-cluster segmentation based on the opportunity scores from the Org questions. I’ve uploaded a file with the cluster assignments for the 4-cluster and a slightly different 3-cluster segmentation. I’ve provided a preliminary analysis, but the more detailed analysis is beyond my remit.
library(ggplot2)
library(reshape)
library(Hmisc)
allData <- read.csv("sample-survey-data.csv", stringsAsFactors = F, row.names = 1)
allData[allData == ""] <- NA
sum(!is.na(rowSums(allData[,6:26], na.rm = F)))
## [1] 117
cleanData <- allData[!is.na(rowSums(allData[,6:26], na.rm = F)), ]
details # Stuff like team size, number of clients, etc.
impPer # important personally
impOrg # important to the organisation
satis # satisfaction
oppPer <- impPer + ((impPer - satis) > 0) * (impPer - satis)
colnames(oppPer) <- paste("Q", seq(1,7), "_OP", sep = "")
oppOrg <- impOrg + ((impOrg - satis) > 0) * (impOrg - satis)
colnames(oppOrg) <- paste("Q", seq(1,7), "_OO", sep = "")
I used the prcomp() function to perform a principal components analysis (pretty similar to the factor analysis). I added the first two components to the details table.
From the biplot we can see that Q1-3 have similar loadings, and Q4-7 have similar loadings which are roughly orthogonal to Q1-3.
modelPer <- prcomp(oppPer)
details$pc1Per <- predict(modelPer)[, 1]
details$pc2Per <- predict(modelPer)[, 2]
I have used k-means clustering here as it is an easily accessible non-hierarchical clustering method.
K-means clustering uses a random seed, so I repeated this calculation 100 times and took the average for each number of clusters (plot below).
wss <- (nrow(details)-1)*sum(apply(details[, c("pc1Per", "pc2Per")],2,var))
wssAve <- as.data.frame(matrix(wss, 100, length(wss)))
for (i in 1:100){
for (j in 2:15) wssAve[i, j] <- sum(kmeans(details[, c("pc1Per", "pc2Per")], centers=j)$withinss)
}
set.seed(1)
fit <- kmeans(details[, c("pc1Per", "pc2Per")], 3)
# get cluster means
aggregate(details[, c("pc1Per", "pc2Per")],by=list(fit$cluster),FUN=mean)
# append cluster assignment
details$clusterPer <- fit$cluster
modelOrg <- prcomp(oppOrg)
biplot(modelOrg) ## As before, Q1-3 have similar loadings. Q4-7 have similar loadings.
details$pc1Org <- predict(modelOrg)[, 1]
details$pc2Org <- predict(modelOrg)[, 2]
## Determine number of clusters
wss <- (nrow(details)-1)*sum(apply(details[, c("pc1Org", "pc2Org")],2,var))
wssAve <- as.data.frame(matrix(wss, 100, length(wss)))
for (i in 1:100){
for (j in 2:15) wssAve[i, j] <- sum(kmeans(details[, c("pc1Org", "pc2Org")], centers=j)$withinss)
}
## K-Means Cluster Analysis
set.seed(2)
fit <- kmeans(details[, c("pc1Org", "pc2Org")], 4)
# get cluster means
foo = aggregate(details[, c("pc1Org", "pc2Org")],by=list(fit$cluster),FUN=mean)
foo[order(foo[,2]),]
# append cluster assignment
details$clusterOrg <- fit$cluster
details$Org.Size <- factor(details$Org.Size,
levels = levels(factor(details$Org.Size))[c(6, 3, 2, 5, 1, 4)])
details$Team.Size <- factor(details$Team.Size,
levels = levels(factor(details$Team.Size))[c(1, 3, 4, 2)])
details$Clients <- factor(details$Clients,
levels = levels(factor(details$Clients)))
details$Team.Manager <- factor(details$Team.Manager)
details$Consultant <- factor(details$Consultant)
breakdown <- melt(details,
id.vars = "clusterOrg",
measure.vars = c("Org.Size", "Team.Size", "Clients", "Team.Manager","Consultant"))
## clusterPer clusterOrg
## f311a55b829601478f2416a5e34fb26e 3 3
## eef720801373bd4abb6f96ead2ecc6e2 2 1
## 9f3bbb3ba73b4b8a48198c1e14eb5c5a 1 2
## 56144a2419cfba0dd857ad0696966d0b 2 1
## 0ed706da6b57f36072d0939eb885a78a 3 3
## 95f75c3edc4f9c7ce19c7d4040083b8b 2 1
## 4d835bbd8705463cd0fe90e58cdbced6 3 1
## 9201bb6fc4a33a56105045181cd55612 3 3
## 7c4ddb804f123e36b076f7f6fd02c516 2 1
## 6bf3237a91fa2b0e17a044c63624dfab 2 1
## 902b55a6006c65e21ad451519610ba82 3 4
## 046da6d2d8ce1a1d5f99f91ca672d818 2 1
## 060cd6879c4dc179918b6d3d07d3804c 3 1
## 5f34fb61b4e062900b3f2b0b59b4f883 1 4
## c165008701808a7a7b3beee5d5425c8b 3 3
## 18e4334505664686dcf8c691db591393 3 1
## 752958ac8228cf9044cb09bc257de97a 2 1
## ee4359ebaeda83fec5a82a793515e9a2 1 4
## 4cdd12aefea2d79563523cb9765c0830 3 3
## 848313a7e2a3f20c683546d14bc23e8a 1 4
## c5ab4355b69d4569a63d0fac1cd978fc 3 2
## 74f653deb3d2ce1a37704cf4bbc1942d 2 1
## 54b60afeae96f2e3c52fdbd6ba0a2db9 3 3
## 9f99e61e6a3c28dd5c55c1243c10f3ed 2 2
## 5df9e328158d0f9a5b0a1544c73e9311 3 3
## 1f2542e33eeab4dcc2e4c6f960e0fdec 3 4
## 39cf38e08cac29b80a23e4c906efcec9 2 1
## b44d34f270a16c970a48a210ae7087d4 2 3
## 83196f65f1e3c4cf82867477e05ec937 2 1
## 8c0e1a5debaeffa50b97bd101cb554da 2 3
## 2cb4c37c6b992ec5f3a3ddab48e42cba 2 1
## c6d5ec920e7fa6368637bc1e2d53bcc2 3 3
## 3a64cf15007ebfee48e98280ccfa9f99 3 3
## b17615da6d1436c4aea4b7addc9abd6c 2 2
## 4ca980ce5988c50a528aa81e7bd74a31 2 1
## 5157547d1effff8e9a99a1da4d3b8085 2 3
## 6f5fa1ad60fd6c31ec70aaed9628e4cd 3 4
## 149af43c40eaeb3716f19e25874328ad 1 4
## cc1c475bf4132fd9648f90d5736e1dc4 1 1
## 209668875cc29f5f823c4f8bf3b50d9e 2 1
## 37b650dbf9ddc0e289ed2cfe3ef82416 2 3
## c335ee458d800498750f0187a96280c1 2 1
## 11495196b00ace8179eb0443fa5d4436 2 3
## 67d1916ce29c5fb0e8938b567b231aa3 3 3
## 8bd67448eb07d405f341811054ea37e9 2 1
## 4a20cc914ead89025b53c152e432010b 2 1
## cc022ede9c88a01e332fa6ac980f9085 1 4
## a7df77b60031fed265a12992fd2baaf4 2 2
## 8170b79a0c5ae31328ecd17b0c305d9d 2 2
## 43f31e573de2ae5362d2f13f24ac638f 2 2
## 3dfac92d23d78f8fec18d306335c8728 1 4
## fc36ccc5c41c901cdb1a98bff60572b0 2 1
## e0b8c7ddca504dc2ebc7510754430d16 2 1
## da78209de584fd6239b17afe04b176a1 2 1
## 2cac3ba23be17a63b5f0c7b413c141e1 3 3
## ba2274766d33248cb4153ade19d9dd71 2 1
## 2dbd8fe841a601862ab9acdbfca66798 1 4
## d2766a424ca71f41239b7880b5391e00 3 3
## 4fedfa7a82e230fc688259373ec7c0cd 2 3
## 807e38b32e6869b6f323761358df5601 2 1
## e7a02758a282d8affd2db489755235da 1 3
## 000e2f0468382f7ccba50f53db404a94 3 3
## 8e2fd6f2f2a250632aab69943d3cbb00 2 1
## b2d06cf15e44bdce3dbd2acff256ade0 3 1
## d87d74af22d88d55c0821d6cd629fe35 1 4
## 06f6b5e35fff2628f0430ef2e5720f52 3 3
## 7b0cfdaaeb8799a363763e5a7fe22dfa 1 4
## 0c7f80480c163be55649dce503c023b3 3 4
## b863cf9b1b767813257b70566419c338 1 4
## a158e46366b471af40186d17f62412ed 1 2
## dfa440f53312fe85300664e0a7e94e41 2 3
## c11d6f6c54ea5646e330329422df03f5 2 2
## 2d1e9c78bcdcb4a378bb9d257fab4e45 3 3
## a60c2b2d4884f689c2570f906e98fd64 2 2
## eeebe0f8c736459d6dfc3aa73b009085 3 3
## 607f555154994dba74f019512c0ac823 3 3
## c6c4f3d9d142eb35f65cdb335a913d2f 3 3
## 3952785e81abde6e345eadef1ec079c3 2 2
## 513609248d13d707cd4d7e2270e5be05 3 3
## 9a4196648fafd5e1911d244681291518 3 3
## c1c5c01f75af2477673be46423f82d2d 1 4
## bfade85ddc43eca0d5363c306f164ab9 3 3
## cf544a793bd3e303280c6d37b1a9a3d2 3 3
## 6007893633b0ca9e129d0a35848f871a 3 4
## ed8c615752b29409dd626a6e496df2d4 2 1
## 5ee84539e27482da45c372f691328dfd 3 3
## f937a5cb496f017887010ad42262efba 2 2
## 5cd5cd661cac292bc0f1151df25159c6 2 3
## 1b8ec1e995bd6e89137b9dc634f798d0 2 2
## 82868ffd616ea7e594de95cd2d32abe3 3 3
## 54d3a6461656e5fb61390e4e0861fa24 2 1
## 5c6b30c25f5b18b0483e99b5faa395ff 2 3
## 40d2d2e8661be8d8ddcec554f5478ef6 2 1
## a43e20d181dd34d6b61798f057464f60 3 3
## 9e6542ae8ae9d21d11f65fb6b5a992c4 3 1
## dbfe72756613eb472fafad339b114e15 1 4
## 98efd92cdab667dda9537dfb8054492e 3 3
## 7fb91f4e4ec9860231b94e7ccdb1ed25 2 1
## bcd6b3eb981fccb42054749793707052 2 1
## 5063fd00f6d622c778014a0d8fe98c4a 3 1
## 9717d6a01d08e798b138623fbb05fd8f 3 3
## ccf3880a2b9eeeac5b1e484f20035085 2 1
## 6c64642f11980f9bb34c6cb659573371 2 1
## 7ca41821e3e2435ee12c4bb6892677f7 3 3
## f89b6fbc9f5a92c6fe5a41b831dbddbf 3 4
## f718c44179fc8162ce752e4ebcbcdd0c 3 3
## f49247b1705abe857421a6a65ff9df62 1 2
## b82b7f76ac472a40a9cab1f050af9209 2 1
## 1416d0aea0cd1bee131bfda4aaee78e0 2 1
## 66b4d8f52bb78ade6eb1f38798a7b703 2 1
## 7cce7db5c1b827d9fb0c8e631adfdf3a 2 3
## ae0df331967650dfaafe0e506e3f11bd 2 1
## a7eaf2a1d4e01854a9738b6bd7a7aeb3 2 1
## fa23f9c989073ea0dc92318d337d266e 2 1
## 273d1e5745efb4cd004e902868ea6f51 2 3
## 46d752e837cd16a01cc59dcba867bd91 3 1
## 6da70d2502b3d4e726a06279385bba98 2 1