Hello! I can put together somewhat 5 to 7 runs without problems for analysis.
I usually run all steps on all samples at the same time. It is working fine. But I ran into a problem where I needed to analyse more runs together and could not due to computer ressources. Here is what I did.
Hope it helped.
Fisrtly: run mothur on as much sample as possible and save the results.
Secondly, run mothur on the sample you want to add to the database.
Merge both fasta and count files using the approrpiate functions.
Reanalyze starting from the alignment step.
Here is my full pipeline for a set of data I added to another one. Cheers!
set.logfile(name=pouletchamp2merge_logFile_clustersplit)
set.current(processors=32)
set.seed(seed=100)
make.contigs(file=pouletchamp2.file, oligos=primers.oligo.txt, checkorient=t, pdiffs=2, deltaq=5, maxambig=0, maxlength=300, maxhomop=20, processors=32)
unique.seqs(fasta=current, count=current)
summary.seqs(fasta=current, count=current)
screen.seqs(fasta=current, count=current, summary=current, maxambig=0, maxhomop=20)
classify.seqs(fasta=current, count=current, iters=500, reference=silva.nr_v138.align, taxonomy=silva.nr_v138.tax, cutoff=80, processors=32)
align.seqs(fasta=current, reference=silva.nr_v132.pcr.align, flip=t, processors=32)
summary.seqs(fasta=current, count=current)
count.groups(count=current)
screen.seqs(fasta=current, count=current, summary=current, start=1968, end=11550, maxambig=0, maxhomop=20)
count.groups(count=current)
summary.seqs(fasta=current, count=current)
filter.seqs(fasta=current, vertical=T, trump=.)
unique.seqs(fasta=current, count=current)
count.groups(count=current)
summary.seqs(fasta=current, count=current)
set.seed(seed=100)
pre.cluster(fasta=current, count=current, diffs=4, processors=32)
count.groups(count=current)
summary.seqs(fasta=current, count=current)
chimera.vsearch(fasta=current, count=current, dereplicate=t, processors=32)
count.groups(count=current)
summary.seqs(fasta=current, count=current)
set.seed(seed=100)
classify.seqs(fasta=current, count=current, iters=1000, reference=silva.nr_v138.align, taxonomy=silva.nr_v138.tax, cutoff=80)
remove.lineage(fasta=current, count=current, taxonomy=current, taxon=Chloroplast-Mitochondria-unknown-Eukaryota)
summary.tax(taxonomy=current, count=current)
summary.seqs(fasta=current, count=current)
count.groups(count=current)
merge.files(input=pouletchamp2.trim.contigs.unique.good.good.filter.unique.precluster.denovo.vsearch.pick.fasta-combined_saracperf.good.filter.unique.precluster.denovo.vsearch.fasta, output=combined_pouletchamp.fasta)
merge.count(count=pouletchamp2.trim.contigs.unique.good.good.filter.unique.precluster.denovo.vsearch.pick.count_table-combined_saracperf.good.filter.unique.precluster.denovo.vsearch.count_table, output=combined_pouletchamp.count_table)
summary.seqs(fasta=combined_pouletchamp.fasta, count=combined_pouletchamp.count_table)
set.seed(seed=100)
align.seqs(fasta=current, reference=silva.nr_v132.pcr.align, flip=t, processors=32, match=2, mismatch=-2)
summary.seqs(fasta=current, count=current)
screen.seqs(fasta=current, count=current, summary=current, start=1968, end=11550, maxambig=0, maxhomop=20)
filter.seqs(fasta=current, vertical=T, trump=.)
summary.seqs(fasta=current, count=current)
unique.seqs(fasta=current, count=current)
summary.seqs(fasta=current, count=current)
set.seed(seed=100)
pre.cluster(fasta=current, count=current, diffs=4)
summary.seqs(fasta=current, count=current)
set.seed(seed=100)
chimera.vsearch(fasta=current, count=current, dereplicate=t)
summary.seqs(fasta=current, count=current)
count.groups(count=current)
set.seed(seed=100)
classify.seqs(fasta=current, count=current, iters=1000, reference=silva.nr_v138.align, taxonomy=silva.nr_v138.tax, cutoff=80, processors=32)
set.seed(seed=100)
cluster.split(fasta=current, count=current, taxonomy=current, taxlevel=6, iters=1000, precision=1000, delta=0, cutoff=0.02, processors=32)
make.shared(list=current,count=current,label=0.02)
classify.otu(list=current, count=current, taxonomy=current, threshold=75)
quit()