##requirements: ## instalation of vsearch, Usearch, cutadapt, blast ## vsearch here: https://github.com/torognes/vsearch ## usearch here: http://drive5.com/usearch/manual/install.html ## cutadapt here: http://cutadapt.readthedocs.io/en/stable/installation.html ## blast here: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.4.0/ncbi-blast-2.4.0+.dmg ## blast here: https://blast.ncbi.nlm.nih.gov/Blast.cgi?PAGE_TYPE=BlastDocs&DOC_TYPE=Download ## libraries needed library(ShortRead) library("parallel") library("foreach") library("doParallel") library(vegan) library(stringr) #for extracting words library(VennDiagram) library(reshape) library(ggplot2) ##General purpose functions: setPaths <- function(user = Sys.info()[7]) { ##Convenience function used to set the paths to software based on the user. ##Define the code necessary to run usearch, vsearch, cutadapt, and blast plus the paths for blast database directory. paths <- list(usearch= "XXX" , #put path to downloaded Usearch tool instead of XXX vsearch= "XXX" , #put path to downloaded vsearch tool instead of XXX cutadapt = "XXX" , #put path to downloaded cutadapt tool instead of XXX blast = "XXX" , #put path to downloaded blast tool instead of XXX dbPath = "XXX" , #put path to merged sequences database instead of XXX dbNCBIPath = "XXX" , #put path to downloaded NCBI RefSeq ITS database instead of XXX dbUNITEPath = "XXX" , #put path to downloaded UNITE database instead of XXX blastdb.path = "XXX" #put path to downloaded makeblastdb command instead of XXX) return(paths) } writeFast <- function(seqs, desc, file) #Much faster function to write a fasta-like format without any line breaks within sequences { fasta <- c() seqs <- as.character(seqs) desc <- as.character(desc) if(substr(desc[1],1,1)!=">") { desc <- paste(">", desc, sep="") } fasta[seq(1,length(seqs)*2, by=2)] <- desc fasta[seq(2,length(seqs)*2, by=2)] <- seqs write(fasta, file=file) } readFAST <- function(file) { ##the function readFASTA in Biostrings is slow as based on a loop. This is faster...but not by tons (10% faster) in.file <- readLines(file, encoding="latin1") in.file <- iconv(in.file, from="latin1", to="ASCII", ".") desc.lines <- grep(">", in.file) seq.lines <- data.frame(desc.lines+1, stop.line = c(desc.lines[-1]-1, length(in.file))) sequence.line.list <- apply(seq.lines, 1, function(x) seq(x[1], x[2])) if(class( sequence.line.list) == "matrix") { sequence.line.list <- split(t(sequence.line.list), f=in.file[desc.lines]) descs <- names(sequence.line.list) } else { descs <- gsub(">", "", in.file[desc.lines]) } sequences <- lapply(sequence.line.list, function(x) paste(in.file[x], collapse="")) list(descs = descs, sequences = sequences) } iupacGrep <- function(pattern,x,...) #Convenience package, this uses regular expressions to run grep with IUPAC codes { pattern <- gsub("R", "[A|G]", pattern) pattern <- gsub("Y", "[C|T]", pattern) pattern <- gsub("S", "[G|C]", pattern) pattern <- gsub("W", "[A|T]", pattern) pattern <- gsub("K", "[G|T]", pattern) pattern <- gsub("M", "[A|C]", pattern) pattern <- gsub("B", "[C|G|T]", pattern) pattern <- gsub("D", "[A|G|T]", pattern) pattern <- gsub("H", "[A|C|T]", pattern) pattern <- gsub("V", "[A|C|G]", pattern) pattern <- gsub("N", "[A|C|T|G]", pattern) pattern <- gsub("I", "[A|C|T|G]", pattern) ##Note that Deoxyinosine is not a true universal match, see http://sg.idtdna.com/site/Catalog/Modifications/Category/7 grep(pattern, x, ...) } iupacSplit <- function(x, pattern,...) #Convenience package, this uses regular expressions to run grep with IUPAC codes { pattern <- gsub("R", "[A|G]", pattern) pattern <- gsub("Y", "[C|T]", pattern) pattern <- gsub("S", "[G|C]", pattern) pattern <- gsub("W", "[A|T]", pattern) pattern <- gsub("K", "[G|T]", pattern) pattern <- gsub("M", "[A|C]", pattern) pattern <- gsub("B", "[C|G|T]", pattern) pattern <- gsub("D", "[A|G|T]", pattern) pattern <- gsub("H", "[A|C|T]", pattern) pattern <- gsub("V", "[A|C|G]", pattern) pattern <- gsub("N", "[A|C|T|G]", pattern) pattern <- gsub("I", "[A|C|T|G]", pattern) ##Note that Deoxyinosine is not a true universal match, see http://sg.idtdna.com/site/Catalog/Modifications/Category/7 strsplit( x, pattern, ...) } iupacAgrep <- function(pattern, x, ...) #Convenience package, this uses regular expressions to run grep with IUPAC codes { pattern <- gsub("R", "[A|G]", pattern) pattern <- gsub("Y", "[C|T]", pattern) pattern <- gsub("S", "[G|C]", pattern) pattern <- gsub("W", "[A|T]", pattern) pattern <- gsub("K", "[G|T]", pattern) pattern <- gsub("M", "[A|C]", pattern) pattern <- gsub("B", "[C|G|T]", pattern) pattern <- gsub("D", "[A|G|T]", pattern) pattern <- gsub("H", "[A|C|T]", pattern) pattern <- gsub("V", "[A|C|G]", pattern) pattern <- gsub("N", "[A|C|T|G]", pattern) pattern <- gsub("I", "[A|C|T|G]", pattern) ##Note that Deoxyinosine is not a true universal match, see http://sg.idtdna.com/site/Catalog/Modifications/Category/7 agrep(pattern, x, ...) } #DNA functions to reverse or reverse-complement strings strrev <- function(x) paste(rev(strsplit(x, "")[[1]]), collapse = "") strcomp <- function(x) paste(c("A","T","G","C","Y","R","S","W","K","M","B","D","H","V","N", ".")[match(strsplit(x, "")[[1]], c("T","A","C","G","R","Y","S","W","M","K","V","H","D","B","N", "." ))], collapse = "") strrevcomp <- function(x) paste(c("A","T","G","C","Y","R","S","W","K","M","B","D","H","V","N", ".")[match(rev(strsplit(x, "")[[1]]), c("T","A","C","G","R","Y","S","W","M","K","V","H","D","B","N", "." ))], collapse = "") paths <- setPaths() #additional paths original.seqs<- "XXX" , #put path processed trimmed ITS sequences instead of XXX dire <- "XXX" #put path to folder above original.seqs instead of XXX forwardPrimer <- "AACTTTYRRCAAYGGATCWCT" reversePrimer <- "AGCCTCCGCTTATTGATATGCTTAART" ### more paths and filenames mergedf2oldastq <- "merged_reads.fastq" notmergedf2oldastq <- "not_merged.fastq" fwdtrimmed <- "trim_merge_reads_fwd.fastq" trimmed <- "trim_merge_reads.fastq" trimmed.fa <- "trim_merge_reads.fa" filtered <- "filter_trim_merge_reads.fa" trimseqOTU <- "trim_match_to_Otus.txt" trimseqZOTU <- "trim_match_to_Zotus.txt" setwd(original.seqs) #put sample name in every sequence identificator in every file in cdw - list files reads into store, chagnes every 4th line starting with first and writes a new file in a directory above sapply(list.files(pattern="_001.fastq"), function(x) { setwd(original.seqs) store <- readLines(x) store[seq(1,length(readLines(x)), by= 4)] <- paste(store[seq(1,length(readLines(x)), by= 4)], ":sample:", sub("-|_", "", substr(x,11,14)), sep = "") setwd(dire) writeLines(store, paste("sampled_", sub(".trimmed", "", x), sep="")) }) #merging system(paste(paths$usearch, " -fastq_mergepairs *_R1_001.fastq -fastqout ", mergedf2oldastq, " -fastqout_notmerged_fwd ", notmergedf2oldastq ,sep="")) #cuts primers system(paste(paths$cutadapt, " -g ^", forwardPrimer, " -o ", fwdtrimmed, " ", mergedf2oldastq, sep="")) system(paste(paths$cutadapt, " -a ", strrevcomp(reversePrimer), " -o ", trimmed, " ", fwdtrimmed, sep="")) #converts fastq trimemed to fasta writeFasta(readFastq(trimmed), trimmed.fa) #max expected error maxee <- 1.0 #minimal sequence length minseqlength <- 200 # default minimal abundance of a sequence to be considered OTU minsize = 2 #filters low quality merged sequences system(paste(paths$vsearch," -fastq_filter ", trimmed, " -fastq_maxee ",maxee," -fastaout ",filtered, sep="")) #finds unique sequences system(paste(paths$vsearch," -derep_fulllength ", filtered, " -relabel Uniq --minseqlength ", minseqlength," -sizeout -fastaout uniques.fa -output uniques.fa", sep= "")) #cluster otus with Uparse out algorithm #this has build in chimera detection system(paste(paths$usearch, " -cluster_otus uniques.fa -minsize ", minsize," -otus otus.fa -relabel Otu -uparseout ",dire,"uparse.out.txt", sep="")) #gets rid of line breaks and stuff in otus.fa writeFast(readFAST("otus.fa")[[2]], readFAST("otus.fa")[[1]], "otus.fasta") # assigns otus to sequences from trimmed merged sequences (needs to be fasta), increase the sensitivity allowing filtered reads to map, and produces a huge file trimseqOTU system(paste(paths$vsearch," --usearch_global ", trimmed.fa ," -db otus.fa -strand plus -id 0.97 --blast6out ",trimseqOTU, sep="")) # from the "sequence_match_to_Otus.txt" file make a table with OTUS samples and sequence frequencies #extract sample name samples <- sapply(read.csv(trimseqOTU, sep = "\t", header =F)[,1], function(x) tail(unlist(strsplit(as.character(x), split = ":")), n=1)) otus <- read.csv(trimseqOTU, sep = "\t", header =F)[,2] otusam<-as.data.frame.matrix(table(otus, samples)) #creates a data frame with otus and sequences OtusFasta <- data.frame(descs = unlist(readFAST("otus.fasta")[[1]]), sequences =unlist(readFAST("otus.fasta")[[2]])) #detects cores and leaves one for idle processes cores <- parallel::detectCores()-1 if(length(OtusFasta$descs) < 50) { cores <- 1 } blockstarts <- floor(seq(1, length(OtusFasta$descs), length.out=cores+1))[-(cores+1)] blockends <- c(blockstarts[-1]-1,length(OtusFasta$descs)) print(paste("Running", cores, "cores.")) database <-"/sh_general_release_dynamic_22.08.2016.fasta" # creates temporary files with blast results for(core.i in 1:cores) { writeFast(unlist(OtusFasta$sequences[blockstarts[core.i]:blockends[core.i]]),OtusFasta$descs[blockstarts[core.i]:blockends[core.i]], file=paste(dire, "tempFasta",core.i,".fasta", sep="")) } print(paste("file divided into N =",cores,"parts as tempFastaN.fasta in",dire)) cl <- makeCluster(cores) registerDoParallel(cl, cores=cores) blastpath <- setPaths()$blast dbpath <- setPaths()$dbUNITEPath setwd(dbpath) tempOtuBlast <- foreach(core.i = 1:cores, .packages = c("stats"), .combine=rbind) %dopar% { system(paste(blastpath, " -query ", dire, "tempFasta",core.i, ".fasta -db ", dbpath, database," -task blastn -dust no -outfmt '7 stitle pident length mismatch gapopen qstart qend sstart send evalue bitscore' -num_alignments 1 -num_descriptions 1 -out ", dire,"OTU_UNITEold_Blast_matching", core.i, ".txt", sep="")) } closeAllConnections() print("Blast run complete, wrapping up dataFrame") #create data.frame df2old with data from temporary files - otu, otuMatch, identity, length, sequence temp <- c() for(core.i in 1:cores) { temp <- append(temp, readLines(paste(dire, "OTU_UNITEold_Blast_matching", core.i, ".txt", sep=""))) } matches <- grep("Query", temp)+4 df2old <- c() df2old$otu <- sub("# Query: ", "", temp[matches-4]) #OtusFasta$descs df2old$otuMatchUNITE <- sapply(temp[matches], function(x) as.character(unlist(strsplit(x, "[\t]"))[1])) df2old$identityUNITE <- sapply(temp[matches], function(x) as.numeric(unlist(strsplit(x, "\t"))[2])) df2old$lengthUNITE <- sapply(temp[matches], function(x) as.numeric(unlist(strsplit(x, "\t"))[3])) df2old$sequence <- OtusFasta$sequences[match(df2old$otu, OtusFasta$descs)] df2old$lineageUNITE <- sapply(df2old$otuMatchUNITE, function(x) as.character(unlist(strsplit(x, "[|]"))[5])) z <- unname(strsplit(df2old$lineageUNITE, split = ";")) df2old$speciesUNITE <- gsub('^.{0,3}', '',sapply(z, "[[", 7 )) df2old$genusUNITE <- gsub('^.{0,3}', '',sapply(z, "[[", 6 )) ################### ncbi adding database <-"/fungi.ITS.fna" cl <- makeCluster(cores) registerDoParallel(cl, cores=cores) blastpath <- setPaths()$blast dbpath <- setPaths()$dbNCBIPath setwd(dbpath) tempOtuBlast <- foreach(core.i = 1:cores, .packages = c("stats"), .combine=rbind) %dopar% { system(paste(blastpath, " -query ", dire, "tempFasta",core.i, ".fasta -db ", dbpath, database, " -task blastn -dust no -outfmt '7 stitle pident length mismatch gapopen qstart qend sstart send evalue bitscore' -num_alignments 1 -num_descriptions 1 -out ", dire,"OTU_NCBI_Blast_matching", core.i, ".txt", sep="")) } closeAllConnections() print("Blast run complete, wrapping up dataFrame") #temp blast results temp <- c() for(core.i in 1:cores) { temp <- append(temp, readLines(paste(dire, "OTU_NCBI_Blast_matching", core.i, ".txt", sep=""))) } matches <- grep("Query", temp)+4 #add ncbi blast results to respective df2old columns df2old$identityNCBI <- sapply(temp[matches], function(x) as.numeric(unlist(strsplit(x, "\t"))[2])) df2old$lengthNCBI <- sapply(temp[matches], function(x) as.numeric(unlist(strsplit(x, "\t"))[3])) df2old$otuMatchNCBI <- sapply(temp[matches], function(x) as.character(unlist(strsplit(x, "[\t]"))[1])) df2old$speciesNCBI <- paste(word(temp[matches], 2), word(temp[matches],3), sep ="_") df2old$genusNCBI <- word(temp[matches], 2) #final sci names are based on UNITE results scinames <- as.character(df2old$speciesUNITE) length <- df2old$lengthUNITE identity <- df2old$identityUNITE #insert NCBI species name for otus with incomplete sp name (GENUS_sp) for those with same genus name from both db scinames[intersect(grep("_sp$", df2old$speciesUNITE), which(df2old$genusUNITE==df2old$genusNCBI))] <- df2old$speciesNCBI[intersect(grep("_sp$", df2old$speciesUNITE), which(df2old$genusUNITE==df2old$genusNCBI))] length[intersect(grep("_sp$", df2old$speciesUNITE), which(df2old$genusUNITE==df2old$genusNCBI))] <- df2old$lengthNCBI[intersect(grep("_sp$", df2old$speciesUNITE), which(df2old$genusUNITE==df2old$genusNCBI))] identity[intersect(grep("_sp$", df2old$speciesUNITE), which(df2old$genusUNITE==df2old$genusNCBI))] <- df2old$identityNCBI[intersect(grep("_sp$", df2old$speciesUNITE), which(df2old$genusUNITE==df2old$genusNCBI))] #insert NCBI species name for otus with only family orderm class or phylum scinames[grep("Fungi_sp|aceae_sp$|ales_sp$|cetes_sp$|cota_sp$|Incertae_sedis_sp$", df2old$speciesUNITE)]<- df2old$speciesNCBI[grep("Fungi_sp|aceae_sp$|ales_sp$|cetes_sp$|cota_sp$|Incertae_sedis_sp$", df2old$speciesUNITE)] length[grep("Fungi_sp|aceae_sp$|ales_sp$|cetes_sp$|cota_sp$|Incertae_sedis_sp$", df2old$speciesUNITE)]<- df2old$lengthNCBI[grep("Fungi_sp|aceae_sp$|ales_sp$|cetes_sp$|cota_sp$|Incertae_sedis_sp$", df2old$speciesUNITE)] identity[grep("Fungi_sp|aceae_sp$|ales_sp$|cetes_sp$|cota_sp$|Incertae_sedis_sp$", df2old$speciesUNITE)]<- df2old$identityNCBI[grep("Fungi_sp|aceae_sp$|ales_sp$|cetes_sp$|cota_sp$|Incertae_sedis_sp$", df2old$speciesUNITE)] # resulting genera are the first word of scinames genus <- sapply(scinames, function(x) unlist(strsplit(x, "_"))[1] ) #store entire UNITE database unite <- readLines(paste(setPaths()$dbUNITEPath,"/sh_general_release_dynamic_22.08.2016.fasta", sep ="")) #resulting lineage based on search in UNITE db for matches with genus name lineage <- sapply(genus, function(x) as.character(unlist(strsplit(grep(x, unite, val =T)[1], "[|]"))[5])) # deal with few instances where genus is not found in the unite db and support lineage from UNITE lineage[which(is.na(lineage))] <- as.character(df2old$lineageUNITE)[which(is.na(lineage))] z <- unname(strsplit(lineage, split = ";")) df2old$species <- scinames df2old$genus <- genus df2old$identity <- identity df2old$length <- length #extract respective list item and discard first 3 symbols df2old$family <- gsub('^.{0,3}', '',sapply(z, "[[", 5 )) df2old$order <- gsub('^.{0,3}', '',sapply(z, "[[", 4)) df2old$class <- gsub('^.{0,3}', '',sapply(z, "[[", 3 )) df2old$phylum <- gsub('^.{0,3}', '',sapply(z, "[[", 2 )) #make a data.frame df2old <- as.data.frame(df2old) #name rows by Otus row.names(df2old) <- df2old$otu setwd(dire) #reorder otusam according to df2old otusam <- otusam[match(df2old$otu,row.names(otusam)),] # add samples to df2old with frequencies of OTUs df2old.exp <- cbind(df2old, otusam) # create column with otu frequencies with both controls df2old.exp$otuall <- rowSums(otusam) # create column with otu frequencies without the negative control df2old.exp$otufreq <- df2old.exp$otuall - df2old.exp$NC2 # create column with otu frequencies without both controls df2old.exp$otufreq_samples <- df2old.exp$otufreq - df2old.exp$PC2 dnes <- format(Sys.time(), "%Y%m%d") write.csv(df2old.exp, paste("table_df2old_", dnes, ".csv", sep ="")) setwd(dire) df2old.exp <- read.csv(tail(list.files(pattern="table_df2old_2018"),1), na.strings=c("", " ", NA))[,-1] #clean conatminations based on beauveria #samples likely to be contaminated bov <- sort(c(0:23*4+1, 0:23*4+2,80, 95)) #classifier conta <- rep("clear", 96) conta[bov] <- "cont" otusam.prp <- prop.table(as.matrix(otusam[,-c(97,98)]),2) #which are statistically higher/lower kont.abs <- which(apply(otusam[,-c(97,98)],1, function(x) summary(aov(x~conta))[[1]]$"Pr(>F)"[1])<0.01) #negative control to sample ratio NC.samp.r <- 30 #positive control to sample ratio PC.samp.r <- 0.3 # no need to trimm pc2 fatsa as is amplified with giTS7 abd ITS4 system(paste(paths$blast, " -query ", dire, "PC2.fasta -db ", dire, "otus.fasta -task blastn -dust no -outfmt '7 stitle pident length mismatch gapopen qstart qend sstart send evalue bitscore' -num_alignments 1 -num_descriptions 1 -out ", dire,"PC2_match_to_Otus.txt", sep="")) #OTUs that do not follow thresholds based for negative or positive control kontrols <- sort(union(which(df2old.exp$otufreq_samples/df2old.exp$NC2<=NC.samp.r), which(df2old.exp$otufreq_samples/df2old.exp$PC2<=PC.samp.r))) #plant OTUs plantae <- sort(union(grep("Plant", df2old.exp$otuMatchUNITE), grep("Plant", df2old.exp$otuMatchNCBI))) # low abundance OTUs - quintupletons and lower low.abundance <- which(df2old.exp$otufreq_samples<=1) throw <- sort(union(union(kontrols, kont.abs), union(low.abundance, plantae))) #row.names(df2old.c)<- df2old.c$otu df2old.c <- df2old.exp[-throw, ] writeFast(df2old.c$sequence, df2old.c$otu, "otus_c.fasta") #write a date stamped csv table wit OTUs and species and lineage dnes <- format(Sys.time(), "%Y%m%d") write.csv(df2old.c, paste("table_df2old_c_", dnes, ".csv", sep ="")) ## Preparing communities based on various grouping criteria and taxonomic level community2old.sample.abs <- t(df2old.c[,22:117]) community2old.sample <- prop.table(community2old.sample.abs, 1) community2old.sample.obs <- community2old.sample.abs community2old.sample.obs[community2old.sample.obs!=0] <- 1 region.sample <- substr(rownames(community2old.sample),1,1) plant.sample <- ifelse(nchar(rownames(community2old.sample))==3, substr(rownames(community2old.sample),1,2),substr(rownames(community2old.sample),1,3)) part <- ifelse(nchar(rownames(community2old.sample))==3, substr(rownames(community2old.sample),3,3),substr(rownames(community2old.sample),4,4)) # merged community2old - plant community2old.abs <- matrix(nrow = nrow(community2old.sample.abs)/2, ncol = ncol(community2old.sample.abs)) dimnames(community2old.abs) <- list(unique(plant.sample) ,dimnames(community2old.sample.abs)[[2]]) for(i in 1:48) { community2old.abs [i,] <- community2old.sample.abs[2*i-1,]+community2old.sample.abs[2*i,] } community2old <- as.matrix(prop.table(community2old.abs, 1)) community2old.obs <- community2old.abs community2old.obs[community2old.obs!=0] <- 1 region <- substr(rownames(community2old),1,1) # dvojice <- substr(rownames(community2old),1,2) plant <- unique(plant.sample) #endup with community2old community2old.sample (both proportional and absolute) #community2old analysis with species # for samples and plants as islands respectively #prepare community2old matrix community2old.spec.sample.abs <- t(aggregate(x =df2old.c[,22:117], list(df2old.c$species), sum)[,-1]) colnames(community2old.spec.sample.abs) <- t(aggregate(x =df2old.c[,22:117], list(df2old.c$species), sum)[,1]) community2old.spec.sample <- prop.table(community2old.spec.sample.abs, 1) community2old.spec.sample.obs <- community2old.spec.sample.abs community2old.spec.sample.obs[community2old.spec.sample.obs!=0] <- 1 # merged community2old - plant community2old.spec.abs <- matrix(nrow = nrow(community2old.spec.sample.abs)/2, ncol = ncol(community2old.spec.sample.abs)) dimnames(community2old.spec.abs) <- list(unique(plant.sample) ,dimnames(community2old.spec.sample.abs)[[2]]) for(i in 1:48) { community2old.spec.abs [i,] <- community2old.spec.sample.abs[2*i-1,]+community2old.spec.sample.abs[2*i,] } community2old.spec <- as.matrix(prop.table(community2old.spec.abs, 1)) community2old.spec.obs <- community2old.spec community2old.spec.obs[community2old.spec.obs!=0] <- 1 #community2old analysis with genera # for samples and plants as islands respectively #prepare community2old matrix community2old.gen.sample.abs <- t(aggregate(x =df2old.c[,22:117], list(df2old.c$genus), sum)[,-1]) colnames(community2old.gen.sample.abs) <- t(aggregate(x =df2old.c[,22:117], list(df2old.c$genus), sum)[,1]) community2old.gen.sample <- prop.table(community2old.gen.sample.abs, 1) community2old.gen.sample.obs <- community2old.gen.sample.abs community2old.gen.sample.obs[community2old.gen.sample.obs!=0] <- 1 # merged community2old - plant community2old.gen.abs <- matrix(nrow = nrow(community2old.gen.sample.abs)/2, ncol = ncol(community2old.gen.sample.abs)) dimnames(community2old.gen.abs) <- list(unique(plant.sample) ,dimnames(community2old.gen.sample.abs)[[2]]) for(i in 1:48) { community2old.gen.abs [i,] <- community2old.gen.sample.abs[2*i-1,]+community2old.gen.sample.abs[2*i,] } community2old.gen <- as.matrix(prop.table(community2old.gen.abs, 1)) community2old.gen.obs <- community2old.gen community2old.gen.obs[community2old.gen.obs!=0] <- 1 #community2old analysis with family # for samples and plants as islands respectively #prepare community2old matrix community2old.fam.sample.abs <- t(aggregate(x =df2old.c[,22:117], list(df2old.c$family), sum)[,-1]) colnames(community2old.fam.sample.abs) <- t(aggregate(x =df2old.c[,22:117], list(df2old.c$family), sum)[,1]) community2old.fam.sample <- prop.table(community2old.fam.sample.abs, 1) community2old.fam.sample.obs <- community2old.fam.sample.abs community2old.fam.sample.obs[community2old.fam.sample.obs!=0] <- 1 # merged community2old - plant community2old.fam.abs <- matrix(nrow = nrow(community2old.fam.sample.abs)/2, ncol = ncol(community2old.fam.sample.abs)) dimnames(community2old.fam.abs) <- list(unique(plant.sample) ,dimnames(community2old.fam.sample.abs)[[2]]) for(i in 1:48) { community2old.fam.abs [i,] <- community2old.fam.sample.abs[2*i-1,]+community2old.fam.sample.abs[2*i,] } community2old.fam <- as.matrix(prop.table(community2old.fam.abs, 1)) community2old.fam.obs <- community2old.fam community2old.fam.obs[community2old.fam.obs!=0] <- 1 #community2old analysis with order # for samples and plants as islands respectively #prepare community2old matrix community2old.order.sample.abs <- t(aggregate(x =df2old.c[,22:117], list(df2old.c$order), sum)[,-1]) colnames(community2old.order.sample.abs) <- t(aggregate(x =df2old.c[,22:117], list(df2old.c$order), sum)[,1]) community2old.order.sample <- prop.table(community2old.order.sample.abs, 1) community2old.order.sample.obs <- community2old.order.sample.abs community2old.order.sample.obs[community2old.order.sample.obs!=0] <- 1 # merged community2old - plant community2old.order.abs <- matrix(nrow = nrow(community2old.order.sample.abs)/2, ncol = ncol(community2old.order.sample.abs)) dimnames(community2old.order.abs) <- list(unique(plant.sample) ,dimnames(community2old.order.sample.abs)[[2]]) for(i in 1:48) { community2old.order.abs [i,] <- community2old.order.sample.abs[2*i-1,]+community2old.order.sample.abs[2*i,] } community2old.order <- as.matrix(prop.table(community2old.order.abs, 1)) community2old.order.obs <- community2old.order community2old.order.obs[community2old.order.obs!=0] <- 1 #community2old analysis with class # for samples and plants as islands respectively #prepare community2old matrix community2old.class.sample.abs <- t(aggregate(x =df2old.c[,22:117], list(df2old.c$class), sum)[,-1]) colnames(community2old.class.sample.abs) <- t(aggregate(x =df2old.c[,22:117], list(df2old.c$class), sum)[,1]) community2old.class.sample <- prop.table(community2old.class.sample.abs, 1) community2old.class.sample.obs <- community2old.class.sample.abs community2old.class.sample.obs[community2old.class.sample.obs!=0] <- 1 # merged community2old - plant community2old.class.abs <- matrix(nrow = nrow(community2old.class.sample.abs)/2, ncol = ncol(community2old.class.sample.abs)) dimnames(community2old.class.abs) <- list(unique(plant.sample) ,dimnames(community2old.class.sample.abs)[[2]]) for(i in 1:48) { community2old.class.abs [i,] <- community2old.class.sample.abs[2*i-1,]+community2old.class.sample.abs[2*i,] } community2old.class <- as.matrix(prop.table(community2old.class.abs, 1)) community2old.class.obs <- community2old.class community2old.class.obs[community2old.class.obs!=0] <- 1 #community2old analysis with phylum # for samples and plants as islands respectively #prepare community2old matrix community2old.phyl.sample.abs <- t(aggregate(x =df2old.c[,22:117], list(df2old.c$phylum), sum)[,-1]) colnames(community2old.phyl.sample.abs) <- t(aggregate(x =df2old.c[,22:117], list(df2old.c$phylum), sum)[,1]) community2old.phyl.sample <- prop.table(community2old.phyl.sample.abs, 1) community2old.phyl.sample.obs <- community2old.phyl.sample.abs community2old.phyl.sample.obs[community2old.phyl.sample.obs!=0] <- 1 # merged community2old - plant community2old.phyl.abs <- matrix(nrow = nrow(community2old.phyl.sample.abs)/2, ncol = ncol(community2old.phyl.sample.abs)) dimnames(community2old.phyl.abs) <- list(unique(plant.sample) ,dimnames(community2old.phyl.sample.abs)[[2]]) for(i in 1:48) { community2old.phyl.abs [i,] <- community2old.phyl.sample.abs[2*i-1,]+community2old.phyl.sample.abs[2*i,] } community2old.phyl <- as.matrix(prop.table(community2old.phyl.abs, 1)) community2old.phyl.obs <- community2old.phyl community2old.phyl.obs[community2old.phyl.obs!=0] <- 1 ################################################################# #################### ISOLATION 2015 ######################## ################################################################### #read in table with isolates, sequences and assigned species according to NCBI or UNITE tabulka <- read.csv("table_isolation_15.csv", na.strings=c("", " ", NA)) # community15 analysis # community15 analysis - species # samples, parts and plants as islands respectively # absolute and observation tables generated (abs, obs) # can do similarly for any taxonomic resolution (genus, family, order, class) community15.spec.sample.abs <- as.data.frame.matrix(table(tabulka$fragment, tabulka$species)) riadok <- rowSums(community15.spec.sample.abs) riadok[riadok == 0 ] <- 1 community15.spec.sample <- apply(community15.spec.sample.abs, 2, function(x) x/riadok) community15.spec.sample.obs <- community15.spec.sample.abs community15.spec.sample.obs[community15.spec.sample.obs!=0] <- 1 # merged community15 - part community15.spec.part.abs <- as.data.frame.matrix(table(tabulka$part, tabulka$species)) riadok <- rowSums(community15.spec.part.abs) riadok[riadok == 0 ] <- 1 community15.spec.part <- apply(community15.spec.part.abs, 2, function(x) x/riadok) community15.spec.part.obs <- community15.spec.part.abs community15.spec.part.obs[community15.spec.part.obs!=0] <- 1 # merged community15 - plant community15.spec.abs <- as.data.frame.matrix(table(tabulka$plant, tabulka$species)) riadok <- rowSums(community15.spec.abs) riadok[riadok == 0 ] <- 1 community15.spec <- apply(community15.spec.abs, 2, function(x) x/riadok) community15.spec.obs <- community15.spec.abs community15.spec.obs[community15.spec.obs!=0] <- 1 #################### ## Comparing methods #################### # make dish identifier community <- community15.spec.sample.abs well.sample.pos<- which(!substr(rownames(community), 6,6) %in% c("a", "b","c","d","e")) RPD.sample.pos<- which(substr(rownames(community), 6,6) %in% c("a", "b","c","d","e")) PDAwell.clean.sample.pos <- which(substr(rownames(community), 6,6) %in% c("2")) PDAwell.sample.pos <- which(substr(rownames(community), 6,6) %in% c("2", "5")) SAwell.sample.pos <- which(substr(rownames(community), 6,6) %in% c("1", "4")) AMMwell.sample.pos <- which(substr(rownames(community), 6,6) %in% c("3", "6")) PDA.sample.pos <- append(RPD.sample.pos, PDAwell.sample.pos) dish.sample<- rep("6well", nrow(community15.spec.sample.abs)) dish.sample[RPD.sample.pos] <- "RPD" medium <- rep("PDA", nrow(community)) medium[SAwell.sample.pos] <- "SA" medium[AMMwell.sample.pos] <- "AMM" bengal <- rep("none", nrow(community)) bengal[which(substr(rownames(community), 6,6) %in% c("4","5", "6"))] <- "bengal" ## species accumulation png(filename="accum_Isolation2015_method.png",units="in", width=10, height=8, pointsize=12, res=72) plot(specaccum(community[well.sample.pos,], "rare"), ci.type = "polygon", ci.col=rgb(1,0,0, alpha = 0.1), col=rgb(1,0,0, alpha = 0.5), ci.lty =0, , xlab ="Fragments", ylab="Species", lty =3) plot(specaccum(community[RPD.sample.pos,], "rare"), ci.type = "polygon", col=rgb(0,0,1, alpha = 0.5), ci.col =rgb(0,0,1, alpha = 0.1), lty =3, ci.lty =0,add =T) plot(specaccum(community[well.sample.pos,], "coll"), col=rgb(1,0,0), add =T) plot(specaccum(community[RPD.sample.pos,], "coll"), col=rgb(0,0,1), add =T) legend( "topleft", c("Multiwell plate with different media","Petri dish with single medium", " ", " "), col=c("red", "blue" ,"red", "blue"), title=c("Observed Randomized"), title.adj=0.03, lty=c(1,1,2,2), ncol=2, cex =1.2, bty="n") dev.off() #Chao estimator of community richness specpool(community)$chao ## nmds #####whole community agregat <- aggregate(. ~substr(rownames(community),1,3)+dish.sample, data = community, sum) rownames(agregat) <- paste(agregat[,1], agregat[,2], sep =" ") x<- 2 agr.num <- c(1:x) agregat.n <- agregat[, -agr.num] mds1 <- metaMDS(agregat.n, trymax =100000, k=2, dist ="jaccard") png(filename="NMDS_Isolation2015_method.png",units="in", width=10, height=8, pointsize=12, res=72) plot(mds1 , type ="n") points(mds1$points, pch = c(15, 16)[as.factor(agregat[,2])], col=c( "red" ,"blue")[as.factor(agregat[,2])], cex = 2) ordihull(mds1$points, as.factor(agregat[,2])) legend( "topleft", c("Multiwell plate with multiple media","Petri dish with single medium"), pch=c(15,16), col=c( "red" ,"blue"), cex =1.2, bty="n") legend( "bottomright", paste("stress = ", mds1$stress, sep =""), bty="n") dev.off() adonis(agregat.n~substr(agregat[,1], 1,1)*agregat[,2]) ######## multiwell #community[well.sample.pos,], sum) agregat <- aggregate(. ~substr(rownames(community[well.sample.pos,]),1,3)+medium[well.sample.pos]+bengal[well.sample.pos], data =community[well.sample.pos,], sum) rownames(agregat) <- paste(agregat[,1], agregat[,2], agregat[,3], sep =" ") agregat <- agregat[which(rowSums(agregat[,-agr.num])!=0),] x<-3 agr.num <- c(1:x) agregat.n <- agregat[, -agr.num] agregat.n <- agregat.n[, which(colSums(agregat.n)!=0)] # dispose 213 SA to be able to plot meaningful distances agregat.x <- agregat.n[-27,] mds1 <- metaMDS(agregat.x, trymax =100000, k=2, dist ="jaccard") png(filename="NMDS_Isolation2015_multimedia.png",units="in", width=10, height=8, pointsize=12, res=72) plot(mds1 , type ="n") points(mds1$points+0.005, pch = c(15, 16,17)[as.factor(agregat[-27,2])], col=c( "red" ,"orange")[as.factor(agregat[-27,3])], cex = 2) ordihull(mds1$points+0.005, as.factor(agregat[-27,2])) legend( "topleft", c(" Aspergillus minimal mediun"," Potato dextrose agar", " Sabordough agar", " ", " ", " "), pch=c(15:17,15:17), col=rep(c("red" ,"orange"),each=3), title=c("Rose bengal no Rose bengal"), title.adj=0.03, ncol=2, cex =1.2, bty="n") legend( "bottomright", paste("stress = ", mds1$stress, sep =""), bty="n") dev.off() ########## VENN DIAGRAM library(VennDiagram) #community <- aggregate(.~ dish.sample + substr(rownames(community),1,4), data = community, sum)[,-c(1,2)] Tissue <- factor(rep(rep(c("Leaves", "Roots", "Stems"),each =2, 10)), levels =c("Leaves", "Stems", "Roots")) Field <- factor(rep(c("Conventional", "Organic"), each =30), levels =c("Conventional", "Organic")) dish.sample<- factor(rep(c("6well","RPD"), 30), levels =c("6well","RPD")) overlap<- calculate.overlap(x=list(colnames(community[,(colSums(community[RPD.sample.pos,])!=0)]),colnames(community[,(colSums(community[well.sample.pos,])!=0)]))) # draw venn from overlap png(filename="Venn_Isolation2015_method.png",units="in", width=6, height=6, pointsize=12, res=72) #dev.off() draw.pairwise.venn(length(overlap$a1), length(overlap$a2), length(overlap$a3), scaled =F, fill = c("blue", "red"), alpha =0.4, cex =4 ) dev.off() #Isolation in fields v1 <- venn.diagram( list( "Conventional"= colnames(community15.spec)[colSums(community15.spec[1:5,])!=0], "Organic" =colnames(community15.spec)[colSums(community15.spec[6:10,])!=0]), filename =NULL, # cex.prop= log10, cex=4, cat.cex =2, cat.pos =c(330, 150), cat.dist =0.05) png(filename="Venn_Field_Isolation_2015.png", units="in", width=6, height=6, pointsize=12, res=300) grid.newpage() grid.draw(v1) dev.off() #Isolation in tissue Tissue <- rep(c("Leaves","Roots","Stems"), 10) v1 <- venn.diagram( list( Leaves= colnames(community15.spec.part.abs)[colSums(community15.spec.part.abs[Tissue=="Leaves",])!=0], Stems =colnames(community15.spec.part.abs)[colSums(community15.spec.part.abs[Tissue=="Stems",])!=0], Roots =colnames(community15.spec.part.abs)[colSums(community15.spec.part.abs[Tissue=="Roots",])!=0]), filename =NULL, fill=c("green", "yellowgreen", "saddlebrown"), alpha =0.3, # cex.prop= log10, cex=4, cat.cex =2) png(filename="Venn_Tissue_Isolation_2015.png", units="in", width=6, height=6, pointsize=12, res=300) grid.newpage() grid.draw(v1) dev.off() ## rarefaction with anova comparing 1:450 subsets of both fragments 1000 times and extracting the p values for later comparison pival <- c() for(j in 1:1000) { meth <- c() rpdspecnum <- c() wellspecnum <- c() specnum <- c() for(i in 1:length(RPD.sample.pos)) { rpdspecnum <- append(rpdspecnum, sum(colSums(community[sample(RPD.sample.pos,i),])!=0)) wellspecnum <- append(wellspecnum, sum(colSums(community[sample(well.sample.pos,i),])!=0)) specnum <- append(specnum, c(sum(colSums(community[sample(RPD.sample.pos,i),])!=0), sum(colSums(community[sample(well.sample.pos,i),])!=0))) meth <- append(meth, c("PDAwell", "RPD")) } pival <- append(pival, summary(aov(specnum~meth))[[1]]$"Pr(>F)"[1]) } hist(pival) ## rankabundances of species in sampled plants community <- community15.spec.sample.abs[,order(colSums(community15.spec.abs))] ilol <- data.frame( Species = factor(rep(colnames(community),2), levels = colnames(community) ), Count = c( colSums(community[RPD.sample.pos,]), colSums(community[well.sample.pos,])), Method = rep(c("Single medium", "Multiple media"), each =length(colnames(community)))) g0 <- ggplot(ilol, aes(x=Species, y=Count, fill = Method)) g0 + geom_bar(stat="identity") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + coord_flip() ggsave(filename="ranked_abundance_stacked_Isolation_2015.png",units="in", width=8, height=16) ## null model Venn Diagram example community <- community15.spec.sample.obs community <- aggregate(.~ dish.sample + substr(rownames(community),1,4), data = community, sum)[,-c(1,2)] Tissue <- factor(rep(rep(c("Leaves", "Roots", "Stems"),each =2, 10)), levels =c("Leaves", "Stems", "Roots")) Field <- factor(rep(c("Conventional", "Organic"), each =30), levels =c("Conventional", "Organic")) dish.sample<- factor(rep(c("6well","RPD"), 30), levels =c("6well","RPD")) # random community permutations randomCom <- permatfull(community>0, fixedmar="both", mtype="prab", burnin =270, times=9999) # functions to extract number of species by cultivation method meth2F <- function(x) sum(colSums(x[dish.sample=="RPD",]>0)>0 & colSums(x[dish.sample!="RPD",]>0)==0) meth1F <- function(x) sum(colSums(x[dish.sample=="6well",]>0)>0 & colSums(x[dish.sample!="6well",]>0)==0) bothF <- function(x) sum(colSums(x[dish.sample=="RPD",]>0)>0 & colSums(x[dish.sample=="6well",]>0)>0) # lee and sti as intermediate variables with means and confidence interval values of expected species count funlist <- list(meth1F, bothF ) lee<- sapply(funlist, function (x) { result <- unlist(lapply(randomCom$perm, x)) paste( sort(result)[floor(0.01*length(result))], " - ", round(median(result),1), " - ",sort(result)[ceiling(0.99*length(result))], sep ="") print(paste(sort(result)[floor(0.01*length(result))], " - ", round(mean(result)), " - ",sort(result)[ceiling(0.99*length(result))], sep ="")) }) funlist <- list(meth2F, bothF) sti <- sapply(funlist, function (x) { result <- unlist(lapply(randomCom$perm, x)) paste(sort(result)[floor(0.01*length(result))], " - ", round(median(result),1), " - ",sort(result)[ceiling(0.99*length(result))], sep ="") print(paste( sort(result)[floor(0.01*length(result))], " - ", round(mean(result)), " - ",sort(result)[ceiling(0.99*length(result))], sep ="")) }) #create Venn Diagram stored in v1 with labels extracted from lee and sti variables v1 <- draw.pairwise.venn(length(overlap$a1), length(overlap$a2), length(overlap$a3), scaled =F, fill = c("blue", "red"), alpha =0.4, cex =2 ) v1[[5]]$label <- lee[1] v1[[6]]$label <- sti[1] v1[[7]]$label <- lee[2] # export venn diagram to png file png(filename="Venn_Isolation2015_method_exp.png", units="in", width=6, height=6, pointsize=12, res=300) grid.newpage() grid.draw(v1) dev.off()