Created
July 22, 2017 17:45
-
-
Save drewhendrickson/557c5e4b6f91a6a5d6276166c23e367e to your computer and use it in GitHub Desktop.
R code to build a co-occurnace matrix from two clustering solutions
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # build some sample data | |
| entries = 1000 | |
| data <- data.frame(nomem_encr = round(runif(entries, 100, 10000)), | |
| Cluster.x = sample(1:4, entries, replace=T), | |
| Cluster.y = sample(1:4, entries, replace=T)) | |
| # check that my sample data looks ok | |
| head(data) | |
| # get the unique cluster labels for each dimension | |
| # this protects me if the cluster labels are numbers or letters | |
| x_cluster_labels = unique(data$Cluster.x) | |
| y_cluster_labels = unique(data$Cluster.y) | |
| # build an empty cooccurance matrix count | |
| # the names of the dimensions are based on the previous variables | |
| cooccurance_counts <- matrix(0, nrow=length(x_cluster_labels), | |
| ncol=length(y_cluster_labels), | |
| dimnames = list(paste0("x_", x_cluster_labels), | |
| paste0("y_", y_cluster_labels))) | |
| # loop through each row in the data | |
| for (i in 1:nrow(data)) { | |
| # determine which row and column of my coocurrance matrix should be updated | |
| # match returns the index in the cluster_label variable that matches | |
| # the current value | |
| current_x_index = match(data[i,]$Cluster.x, x_cluster_labels) | |
| current_y_index = match(data[i,]$Cluster.y, y_cluster_labels) | |
| # update the appropriate value of the cooccurance matrix | |
| cooccurance_counts[current_x_index, current_y_index] = | |
| cooccurance_counts[current_x_index, current_y_index] +1 | |
| } | |
| # print out my co-occurance counts | |
| cooccurance_counts | |
| # simple check: should be true that I end up with one | |
| # entry in the cooccurance matrix for each row | |
| sum(cooccurance_counts) == nrow(data) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment