(Moved here from the github issue page)
So, I am running mothur v.1.42.1 on an HPC cluster, following the MiSeq SOP commands but the number of OTUs I get in the end seems erroneously high: 145074 OTUs (16S rRNA) using 0.03 cut-off.
The data are from a MiSeq run using the V3–4 regions (primers 341F - 805RB).
I have copied the commands from the batch file I am using below (I just couldn’t add it as an attachment).
#This is a batch file for running the equivalent of mothur
make.contigs(file=mapping.files, processors=8)
summary.seqs(fasta=mapping.trim.contigs.fasta)
screen.seqs(fasta=mapping.trim.contigs.fasta, group=mapping.contigs.groups, summary=mapping.trim.contigs.summary, maxambig=0, minlength=100, maxlength=600, maxhomop=8)
unique.seqs(fasta=mapping.trim.contigs.good.fasta)
count.seqs(name=mapping.trim.contigs.good.names, group=mapping.contigs.good.groups)
summary.seqs(count=mapping.trim.contigs.good.count_table)
pcr.seqs(fasta=silva.nr_v132.align, start=1044, end=43116, keepdots=T, processors=8)
rename.file(input=silva.nr_v132.pcr.align, new=silva.fasta)
summary.seqs(fasta=silva.fasta)
align.seqs(fasta=mapping.trim.contigs.good.unique.fasta, reference=silva.fasta)
summary.seqs(fasta=mapping.trim.contigs.good.unique.align, count=mapping.trim.contigs.good.count_table)
screen.seqs(fasta=mapping.trim.contigs.good.unique.align, count=mapping.trim.contigs.good.count_table, summary=mapping.trim.contigs.good.unique.summary, start=6388, end=25316, maxhomop=8)
summary.seqs(fasta=mapping.trim.contigs.good.unique.good.align, count=mapping.trim.contigs.good.good.count_table)
filter.seqs(fasta=mapping.trim.contigs.good.unique.good.align, vertical=T, trump=.)
unique.seqs(fasta=mapping.trim.contigs.good.unique.good.filter.fasta, count=mapping.trim.contigs.good.good.count_table)
pre.cluster(fasta=mapping.trim.contigs.good.unique.good.filter.unique.fasta, count=mapping.trim.contigs.good.unique.good.filter.count_table, diffs=2)
chimera.vsearch(fasta=mapping.trim.contigs.good.unique.good.filter.unique.precluster.fasta, count=mapping.trim.contigs.good.unique.good.filter.unique.precluster.count_table, dereplicate=t)
remove.seqs(fasta=mapping.trim.contigs.good.unique.good.filter.unique.precluster.fasta, accnos=mapping.trim.contigs.good.unique.good.filter.unique.precluster.denovo.vsearch.accnos)
classify.seqs(fasta=mapping.trim.contigs.good.unique.good.filter.unique.precluster.pick.fasta, count=mapping.trim.contigs.good.unique.good.filter.unique.precluster.denovo.vsearch.pick.count_table, reference=silva.nr_v132.align, taxonomy=silva.nr_v132.tax, cutoff=80)
summary.tax(taxonomy=mapping.trim.contigs.good.unique.good.filter.unique.precluster.pick.nr_v132.wang.taxonomy, count=mapping.trim.contigs.good.unique.good.filter.unique.precluster.denovo.vsearch.pick.count_table)
dist.seqs(fasta=mapping.trim.contigs.good.unique.good.filter.unique.precluster.pick.fasta, cutoff=0.03)
cluster(column=mapping.trim.contigs.good.unique.good.filter.unique.precluster.pick.dist, count=mapping.trim.contigs.good.unique.good.filter.unique.precluster.denovo.vsearch.pick.count_table)
make.shared(list=mapping.trim.contigs.good.unique.good.filter.unique.precluster.pick.opti_mcc.list, count=mapping.trim.contigs.good.unique.good.filter.unique.precluster.denovo.vsearch.pick.count_table, label=0.03)
classify.otu(list=mapping.trim.contigs.good.unique.good.filter.unique.precluster.pick.opti_mcc.list, count=mapping.trim.contigs.good.unique.good.filter.unique.precluster.denovo.vsearch.pick.count_table, taxonomy=mapping.trim.contigs.good.unique.good.filter.unique.precluster.pick.nr_v132.wang.taxonomy, label=0.03)
rename.file(taxonomy=mapping.trim.contigs.good.unique.good.filter.unique.precluster.pick.opti_mcc.0.03.cons.taxonomy, shared=mapping.trim.contigs.good.unique.good.filter.unique.precluster.pick.opti_mcc.shared)
count.groups(shared=mapping.opti_mcc.shared)
rarefaction.single(shared=mapping.opti_mcc.shared, calc=sobs, freq=100)
summary.single(shared=mapping.opti_mcc.shared, calc=nseqs-coverage-sobs-invsimpson)
dist.shared(shared=mapping.opti_mcc.shared, calc=braycurtis)
get.group(shared=mapping.opti_mcc.shared)
nmds(phylip=mapping.opti_mcc.braycurtis.0.03.lt.dist)