# Version 0 gcContent <- function(dna, counter = 0){ for(i in 1:length(dna)){ if(dna[i] == "C" | dna[i] == "G") {counter = counter + 1} } return(counter/length(dna)) } # Test case 1: gcContent("AACGTGGCTA") # [1] 0 # Unexpected - we should get 0.5 - how can we check our function? # What is 'dna'? gcContent <- function(dna, counter = 0){ for(i in 1:length(dna)){ if(dna[i] == "C" | dna[i] == "G") {counter = counter + 1} } return(dna) } # for loop is not working as expected, the dna stays one complete string # how can we split our string? -> function strsplit() gcContent <- function(dna, counter = 0){ dna <- strsplit(dna, "") for(i in 1:length(dna)){ if(dna[i] == "C" | dna[i] == "G") {counter = counter + 1} } return(dna) } gcContent("AACGTGGCTA") # [[1]] # [1] "A" "A" "C" "G" "T" "G" "G" "C" "T" "A" # What do we have here? gcContent <- function(dna, counter = 0){ dna <- strsplit(dna, "") for(i in 1:length(dna)){ if(dna[i] == "C" | dna[i] == "G") {counter = counter + 1} } return(typeof(dna)) } gcContent("AACGTGGCTA") # [1] "list" # We have a list, but want to have a vector -> function unlist() # Now we have solved the problems that brought us to Version 1 from lecture 1 # Version 1 (from lecture 1) gcContent <- function(dna, counter = 0){ dna <- unlist(strsplit(dna, "")) for(i in 1:length(dna)){ if(dna[i] == "C" | dna[i] == "G") {counter = counter + 1} } return(counter/length(dna)) } gcContent("AACGTGGCTA") # [1] 0.5 # OKAY gcContent("AATATATTAT") # [1] 0 # OKAY gcContent(23) # Error in strsplit(dna, "") : non-character argument gcContent(TRUE) # Error in strsplit(dna, "") : non-character argument # We now want to define our own error message # Version 2 gcContent <- function(dna, counter = 0){ if (!is.character(dna)){ stop("The argument must be of type character.") } dna <- unlist(strsplit(dna, "")) for(i in 1:length(dna)){ if(dna[i] == "C" | dna[i] == "G") {counter = counter + 1} } return(counter/length(dna)) } gcContent("AACGTGGCTA") gcContent("AATATATTAT") gcContent(23) gcContent(TRUE) gcContent("notDNA") # [1] 0 gcContent("Cool") # [1] 0.25 # We want to make sure that the input value is DNA (consists of A, C, T, G) # If there is another character than A, C, G or T we compute the value but give # a warning # We can work with the function grep() # Examples for grep() # [^ACGT]: matches anything except the characters A, C, G, T dna <- "ACGTATGA" grep("[^ACGT]", dna) # integer(0) length(grep("[^ACGT]", dna)) # [1] 0 dna <- "NACGTATGA" grep("[^ACGT]", dna) # [1] 1 length(grep("[^ACGT]", dna)) # [1] 1 # If the length of the construct above is > 0, we have other characters than # A, C, G or T # In this case, we want to through a warning # Version 3 gcContent <- function(dna, counter = 0){ if (!is.character(dna)){ stop("The argument must be of type character.") } if (length(grep("[^ACGT]", dna)) > 0){ warning("The input contains characters other than A, C, G or T - value should not be trusted!") } dna <- unlist(strsplit(dna, "")) for(i in 1:length(dna)){ if(dna[i] == "C" | dna[i] == "G") {counter = counter + 1} } return(counter/length(dna)) } # Version 4 # We want to add the argument 'AT' to our function to be able to also calculate # the AT content of a given DNA gcContent <- function(dna, counter = 0, AT){ if (!is.character(dna)){ stop("The argument must be of type character.") } if (length(grep("[^ACGT]", dna)) > 0){ warning("The input contains characters other than A, C, G or T - value should not be trusted!") } dna <- unlist(strsplit(dna, "")) for(i in 1:length(dna)){ if(dna[i] == "C" | dna[i] == "G") {counter = counter + 1} } if (AT == TRUE){ return(1 - counter/length(dna)) } else { return(counter/length(dna)) } } gcContent("AACGTGTTTA", AT = TRUE) gcContent("AACGTGTTTA") # Version 4 only works if we specify the argument AT! # Version 5 # GCContent: Calculates GC content. It is also possible to calculate the AT # content (AT = TRUE) # Author: Sonja Grath # Last update: 2019-03-12 # Input: DNA sequence # Output: GC (AT) content of the DNA sequence gcContent <- function(dna, counter = 0, AT = FALSE){ if (!is.character(dna)){ stop("The argument must be of type character.") } if (length(grep("[^ACGT]", dna)) > 0){ warning("The input contains characters other than A, C, G or T - value should not be trusted!") } dna <- unlist(strsplit(dna, "")) for(i in 1:length(dna)){ if(dna[i] == "C" | dna[i] == "G") {counter = counter + 1} } if (AT == TRUE){ return(1 - counter/length(dna)) } else { return(counter/length(dna)) } } ###################### # TEST CASES ###################### gcContent("AACGTGGCTA") gcContent("AATATATTAT") gcContent(23) gcContent(TRUE) gcContent("notDNA") gcContent("Cool") gcContent("AATATATTGC", AT = TRUE)