I have 16S data that I am trying to run with the Schloss SOP tutorial. My original .sff file is “F11.sff” I have this as a .txt file that I use for my batch file:
#this is the first attempt to create a pipeline using the batch file on Mothur. This is using the F11 sample with the silva.bacteria and trainset folders inside of fodler F11. If it does not work, then try to just put the files from that folder into the sample folder. Started at 13:44 on my watch
sffinfo(sff=F11.sff, flow=T)
summary.seqs(fasta=F11.fasta)
trim.flows(flow=F11.flow, oligos=F.oligos, pdiffs=2, bdiffs=1)
shhh.flows(file=F11.flow.files)
trim.seqs(fasta=F11.shhh.fasta,name=F11.shhh.names,oligos=F.oligos,pdiffs=2,bdiffs=1,maxhomop=8, minlength=200, flip=T)
summary.seqs(fasta=F11.shhh.trim.fasta, name=F11.shhh.trim.names)
get.current()
unique.seqs(fasta=F11.shhh.trim.fasta, name=F11.shhh.trim.names)
summary.seqs(fasta=F11.shhh.trim.unique.fasta, name=F11.shhh.trim.names)
align.seqs(fasta=F11.shhh.trim.unique.fasta, reference=silva.bacteria.fasta)
summary.seqs(fasta=F11.shhh.trim.unique.align, name=F11.shhh.trim.names)
screen.seqs(fasta=F11.shhh.trim.unique.align,name=F11.shhh.trim.names,group=F11.shhh.groups,end=13125, optimize=start, criteria=95)
summary.seqs(fasta=F11.shhh.trim.unique.good.align, name=F11.shhh.trim.good.names)
filter.seqs(fasta=F11.shhh.trim.unique.good.align, vertical=T, trump=.)
unique.seqs(fasta=F11.shhh.trim.unique.good.filter.fasta, name=F11.shhh.trim.good.names)
pre.cluster(fasta=F11.shhh.trim.unique.good.filter.unique.fasta,name=F11.shhh.trim.unique.good.filter.names, group=F11.shhh.good.groups, diffs=2)
summary.seqs(fasta=F11.shhh.trim.unique.good.filter.unique.precluster.fasta,name=F11.shhh.trim.unique.good.filter.unique.precluster.names)
chimera.uchime(fasta=F11.shhh.trim.unique.good.filter.unique.precluster.fasta,name=F11.shhh.trim.unique.good.filter.unique.precluster.names,group=F11.shhh.good.groups)
chimera.uchime(fasta=F11.shhh.trim.unique.good.filter.unique.precluster.fasta,name=F11.shhh.trim.unique.good.filter.unique.precluster.names,group=F11.shhh.good.groups, dereplicate=T)
remove.seqs(accnos=F11.shhh.trim.unique.good.filter.unique.precluster.uchime.accnos,fasta=F11.shhh.trim.unique.good.filter.unique.precluster.fasta,name=F11.shhh.trim.unique.good.filter.unique.precluster.names, group=F11.shhh.good.groups, dups=F)
summary.seqs(name=current)
classify.seqs(fasta=F11.shhh.trim.unique.good.filter.unique.precluster.pick.fasta,name=F11.shhh.trim.unique.good.filter.unique.precluster.pick.names,group=F11.shhh.good.pick.groups,template=trainset9_032012.pds.fasta, taxonomy=trainset9_032012.pds.tax, cutoff=80)
remove.lineage(fasta=F11.shhh.trim.unique.good.filter.unique.precluster.pick.fasta,name=F11.shhh.trim.unique.good.filter.unique.precluster.pick.names,group=F11.shhh.good.pick.groups,taxonomy=F11.shhh.trim.unique.good.filter.unique.precluster.pick.pds.wang.taxonomy,taxon=Mitochondria-Chloroplast-Archaea-Eukarya-unknown)
summary.seqs(fasta=F11.shhh.trim.unique.good.filter.unique.precluster.pick.pick.fasta,name=F11.shhh.trim.unique.good.filter.unique.precluster.pick.pick.names)
system(cp F11.shhh.trim.unique.good.filter.unique.precluster.pick.pick.fasta F11final.fasta)
system(cp F11.shhh.trim.unique.good.filter.unique.precluster.pick.pick.names F11final.names)
system(cp F11.shhh.good.pick.pick.groups F11final.groups)
system(cp F11.shhh.trim.unique.good.filter.unique.precluster.pick.pds.wang.pick.taxonomy F11final.taxonomy)
dist.seqs(fasta=F11final.fasta, cutoff=0.15)
cluster(column=F11final.dist, name=F11final.names)
make.shared(list=F11final.an.list, group=F11final.groups, label=0.03)
count.groups()
sub.sample(shared=F11final.an.shared, size=3231)
classify.otu(list=F11final.an.list, name=F11final.names, taxonomy=F11final.taxonomy, label=0.03)
phylotype(taxonomy=F11final.taxonomy, name=F11final.names, label=1)
make.shared(list=F11final.tx.list, group=F11final.groups, label=1)
sub.sample(shared=F11final.tx.shared, size=3231)
classify.otu(list=F11final.tx.list, name=F11final.names, taxonomy=F11final.taxonomy, label=1)
dist.seqs(fasta=F11final.fasta, output=lt)
clearcut(phylip=F11final.phylip.dist)
However, when I get to the count.groups() command, Mothur displays the number of total sequences. I then
manually enter the number. I put 3231 in the batch command because it was from an earlier sample. This gave me the message of “F10 contains 2528. Eliminating. The size you selected is too large, skipping shared file.” I know that the number should have been 2528, but how can I incorporate that into the batch file without knowing what the number will be?
Also, later downstream where I used “label=0.03” Mothur displayed “your file does not include the label 0.03. I will use 0.02.
0.02 3”
The count.groups() number seems to come from the final.an.shared file (Excel format) adding D2+E2+F2+G2+H2 etc… and the label seems to come from cell A2.
Hopefully that was not too convoluted.
Basically, I can manually enter the number given by the count.groups command. But, how can I make a pipeline using the batch command to run the whole tutorial without having to manually enter the number given from count.groups? Is there a way to easily do that?
Thank you very much for your time and help.