# R course, Wedmesday 14/02/2018 # Algorithmics in R (continued) # Scripts used in class ## Writing your own functions # example # GC content in DNA sequence ?gc # oops there is already a function named gc ?GC # ok this time ?gregexpr # function we want to used dna <- "AATTCGCTTA" # example of dna sequence gregexpr("C",dna) # outputs a list of positions gregexpr("C|G",dna) # searches both bases length(gregexpr("C|G",dna)[[1]]) # number of occurences of G or C length(gregexpr("C|G",dna)[[1]])/nchar(dna) # proportion # define function GC <- function(dna){ gc.cont <- length(gregexpr("C|G",dna)[[1]])/nchar(dna) return(gc.cont) } # test function GC("AATTCGCTTA") # are we sure our function is correct? GC("AATTAAATTA") # what happened? gregexpr("C|G","AATTAAATTA") # the answer is -1 and thus has length 1 # idea: we should test if the value is positive GC <- function(dna){ gc1 <- gregexpr("C|G",dna)[[1]] if (length(gc1)>1){ gc.cont <- length(gc1)/nchar(dna) } else { if (gc1>0){ gc.cont <- 1/nchar(dna) } else { gc.cont <- 0 } } return(gc.cont) } # test again GC("AATTAAATTA") # deal with wrong argument GC(23) GC(TRUE) GC("notDNA") GC("cool") # find a solution is.character(23) is.character(TRUE) is.character("notDNA") # we can test if not character and output an error message ## error and warning message x <- sum("hello") # error x <- mean("hello") # warning # back to example # deal with not character GC <- function(dna){ # test for type of argument if (!is.character(dna)){ stop("The argument must be of type character.") } gc1 <- gregexpr("C|G",dna)[[1]] if (length(gc1)>1){ gc.cont <- length(gc1)/nchar(dna) } else { if (gc1>0){ gc.cont <- 1/nchar(dna) } else { gc.cont <- 0 } } return(gc.cont) } # now how can we test if the data contains only DNA? # function grep ?grep grep("[^GCAT]", dna) grep("[^GCAT]", "fACTG") # updated function GC <- function(dna){ # test for type of argument if (!is.character(dna)){ stop("The argument must be of type character.") } if (length(grep("[^GCAT]", dna))>0){ warning("The input contains characters other than A, C, T, G - value should not be trusted!") } gc1 <- gregexpr("C|G",dna)[[1]] if (length(gc1)>1){ gc.cont <- length(gc1)/nchar(dna) } else { if (gc1>0){ gc.cont <- 1/nchar(dna) } else { gc.cont <- 0 } } return(gc.cont) } # test GC("fATG") # Giving several arguments ?mean ?hist mean(c(1,2,NA)) mean(c(1,2,NA), na.rm=TRUE) # example with our function GC <- function(dna, AT){ gc1 <- gregexpr("C|G",dna)[[1]] if (length(gc1)>1){ gc.cont <- length(gc1)/nchar(dna) } else { if (gc1>0){ gc.cont <- 1/nchar(dna) } else { gc.cont <- 0 } } if (AT==TRUE){ return(1-gc.cont) } else { return(gc.cont) } } # test GC(dna,AT=TRUE) # giving a default value to an argument # test GC(dna) # default value FALSE GC <- function(dna, AT=FALSE){ gc1 <- gregexpr("C|G",dna)[[1]] if (length(gc1)>1){ gc.cont <- length(gc1)/nchar(dna) } else { if (gc1>0){ gc.cont <- 1/nchar(dna) } else { gc.cont <- 0 } } if (AT==TRUE){ return(1-gc.cont) } else { return(gc.cont) } } # test GC(dna) GC(dna,AT=TRUE) ## return several values # Example for list output ci.norm <- function(x,conf=0.95) { q <- qnorm(1-(1-conf)/2) return( list(lower=mean(x)-q*sqrt(var(x)/length(x)),upper=mean(x)+q*sqrt(var(x)/length(x))) ) } ci.norm(rnorm(100)) ci.norm(rnorm(100),conf=0.99) ## apply v <- 1:4 sapply(v,factorial)