Last active
August 27, 2017 20:02
-
-
Save EpiDemos82/6b536fc6ab27c4cd9404597c0281ce34 to your computer and use it in GitHub Desktop.
Mean SNP/genetic distance between groups/clades using R
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| library(ape) | |
| library(vegan) | |
| library(adegenet) #Might as well load if you are doing phyo stuff | |
| setwd("/Users/../floder") | |
| Alignment <- read.dna("mfa.fasta",format = "fasta") #Importing alignment in fasta format | |
| fasta.seq.labels <- as.data.frame(labels(Alignment)) #Obtaining ordered taxa | |
| colnames(fasta.seq.labels) <- "taxa" | |
| clades <- as.data.frame(read_delim("~/clade_assignments_for_tax.txt")) #importing clade assigns for each isolate (in the future this could be automated using PCA) | |
| fasta.seq.labels$id <- 1:nrow(clades) #adding row number to maintain order for sorting after merge | |
| labels.clades <- merge(fasta.seq.labels, clades, by="taxa") #merging | |
| labels.clades <- labels.clades[order(labels.clades$id), ] #ordering | |
| #https://rdrr.io/cran/ape/man/dist.dna.html | |
| Alignment.dist <- dist.dna(Alignment, model = "N") #Matrix of SNP differences - other distances could be used | |
| hist(Alignment.dist, main=NULL, xlab = "Distribution of Genetic Distances") #Plot histogram of SNP distances to confirm expected range of distances | |
| md <- meandist(Alignment.dist, labels.clades$Clade) #calculating mean distance between clades | |
| summary(md) #summary of meandistances |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment