# on server turtle set the lib path as follows: .libPaths("/home/dirk/R/x86_64-pc-linux-gnu-library/3.5") library(abc) library(pegas) system("ms 21 1 -T -I 3 10 10 1 0 -m 1 2 0.5 -m 2 1 0.5 -ej 1.5 1 2 -ej 5 3 2 | tail -n +4 | grep -v // > tree") system("seq-gen -mHKY -s 0.01 tree > sequences.txt") simulate.sequences <- function(mutation.rate, migration.rate, time.since.split, nloci=10, seq.length=1000, sampleA=10, sampleB=10) { s <- list() classic.stats <- matrix(NA,ncol=10,nrow=nloci) ## classical summary statistics as a basis for computing summary statistics for abc dimnames(classic.stats) <- list(1:nloci, c("Taj.D", "Taj.D.A","Taj.D.B", "thetaW", "thetaW.A", "thetaW.B", "Taj.pi", "Taj.pi.A", "Taj.pi.B", "Hudsons.Fst") ) ## do simulation separately for each locus for (i in 1:nloci) { ## we simulate two population and a single outgroup sequence system(paste("/home/dirk/usr/src/msdir/ms ",sampleA+sampleB+1," 1 -T -I 3 ",sampleA,sampleB,1,"0 -m 1 2", migration.rate," -m 2 1",migration.rate,"-ej",time.since.split,"1 2 -eM",time.since.split, "0 -ej 5 3 2 | tail -n +4 | grep -v // > tree")) system(paste("seq-gen -mHKY -s",mutation.rate,"-l",seq.length,"tree > sequences.txt")) s[[i]] <- read.dna("sequences.txt") pop <- rep("A",sampleA+sampleB+1) pop[as.numeric(labels(s[[i]])) > sampleA] <- "B" pop[as.numeric(labels(s[[i]])) > sampleA+sampleB] <- "C" tpi <- 0 for(k in 1:sampleA) { for(j in sampleA+1:sampleB) { tpi <- tpi+nuc.div(s[[i]][as.numeric(labels(s[[i]]))==k | as.numeric(labels(s[[i]]))==j,]) } } tpi <- tpi/(sampleA*sampleB) classic.stats[i,] <- c(tajima.test(s[[i]])$D, tajima.test(s[[i]][pop=="A",])$D, tajima.test(s[[i]][pop=="B",])$D, theta.s(length(seg.sites(s[[i]])),nrow(s[[i]])), theta.s(length(seg.sites(s[[i]][pop=="A",])),nrow(s[[i]][pop=="A",])), theta.s(length(seg.sites(s[[i]][pop=="B",])),nrow(s[[i]][pop=="B",])), nuc.div(s[[i]]), nuc.div(s[[i]][pop=="A",]), nuc.div(s[[i]][pop=="B",]), (tpi-(nuc.div(s[[i]][pop=="A",])+nuc.div(s[[i]][pop=="B",]))/2)/tpi ) } cat("\n") return(list(loci=s,statistics=classic.stats)) } D <- simulate.sequences(0.01,0.5,1.5) str(D) D$loci D$statistics ss <- c(mean(D$statistics[,"Taj.D"]),mean(D$statistics[,"Taj.D.A"]),mean(D$statistics[,"Taj.D.B"]), mean(D$statistics[,"thetaW"]),mean(D$statistics[,"thetaW.A"]),mean(D$statistics[,"thetaW.B"]), mean(D$statistics[,"Hudsons.Fst"])) sim.param <- matrix(c(0.001+rexp(1000,20),rexp(1000,10),rexp(1000,0.5)),nrow=1000,ncol=3) ## for a start, we use an exponential prior sim.ss <- matrix(NA,nrow=1000,ncol=7) for(n in 1:1000) { cat("Simulation",n,"\n") sim <- simulate.sequences(sim.param[n,1],sim.param[n,2],sim.param[n,3]) sim.ss[n,] <- c(mean(sim$statistics[,"Taj.D"]),mean(sim$statistics[,"Taj.D.A"]),mean(sim$statistics[,"Taj.D.B"]), mean(sim$statistics[,"thetaW"]),mean(sim$statistics[,"thetaW.A"]),mean(sim$statistics[,"thetaW.B"]), mean(sim$statistics[,"Hudsons.Fst"])) } ##write.table(file="sumstats.txt",sim.ss) ##write.table(file="jsfs.txt",sim.jsfs) ##write.table(file="params.txt",sim.param) ##write.table(file="observed.txt",ss) ##sim.ss <- as.matrix(read.table(file="sumstats.txt")) ##sim.param <- as.matrix(read.table(file="params.txt")) result <- abc(ss,sim.param,sim.ss,tol=0.05,method="loclinear") summary(result) pdf(file="abcres.pdf") plot(result,param=sim.param) dev.off()