transistion.prob <- function(nuc1,nuc2,brlen,params) { ## given that two nodes are connected by a branch of length brlen and that ## nuc1 is the nucleotide in one of the nodes, this function returns the ## probability that nuc2 is the nucleotide in the other node. For this, ## params is the list of parameters of the F84 substitution model. ##.... } daughter.trees <- function(tree) { ## given a string representing a tree in newick format, this function ## returns the subtrees that stem from the root and the corresponding ## branch lenghts. For example, if ## tree="((A:2.3,B:3):1.2,((C:3,D:1):1.2,G:4.3):2.1,F:9.7);" or ## tree="((A:2.3,B:3):1.2,((C:3,D:1):1.2,G:4.3):2.1,F:9.7)", the output ## must be the following data, e.g. in a data frame or a list of vectors: ## ## "(A:2.3,B:3)" 1.2 ## "((C:3,D:1):1.2,G:4.3)" 2.1 ## "F" 9.7 ## ## hint: it might be useful to split a string into a vector of characters ## with strsplit, and do the opposite with paste with collapse="" ##.... } calculate.partial.likelihoods <- function(tree,params,column) { ## Returns partial likelihoods "w" for the root of the tree given as ## newick string. column is an alignment column represented as a vector or ## list with element names, such that a nucleotide is associated each tip ## label. F84 parameters are specified in params. if( "tree is only a tip label xyz" ) { ## replace this pseudo code by real R code ## return, e.g. 0,1,0,0 if column["xyz"]=="c" } ## hint: define this function recursively, e.g. as follows subtree <- daughter.trees(tree) subtree.partial.L <- list() for(i in 1:nrow(subtree)) { ## change this if it does not fit to output format of daughter.trees subtree.partial.L[[i]] <- calculate.partial.likelihoods(subtree[i,1],params,column) ## use partial likelihoods subtree.partial.L[[i]] and branch length ## subtree[i,2] for calculation of factors that are used for partial ## likelihood calculations } ## .... } logLik <- function(tree,alignment,params) { ## returns log likelihood of tree given as string in newick format for ## sequence data given in alignment. params are parameters of F84 ## substitution model. ##.... }