Hi,
I am using mothur to analysis 24 samples which obtain from Illumina MiSeq original 16S Fastaq data.
Raw data contains 13Gb, 2x300bp, V3 region (341-805R).
Everything is great, but I face with a problem when command cluster.split running, it creat 806Gb data and it almost running for 7 days, now still in running process…
Is there any problem? thank you~
Here is my command:
make.contigs(file=stability.files, processors=4)
summary.seqs(fasta=stability.trim.contigs.fasta); total 6866561 seqs
screen.seqs(fasta=stability.trim.contigs.fasta, group=stability.contigs.groups, maxambig=0, maxlength=480)
get.current()
summary.seqs(fasta=stability.trim.contigs.good.fasta); total 5431619 seqs
unique.seqs(fasta=stability.trim.contigs.good.fasta); 2838557 unique seqs, total 5431619 seqs
count.seqs(name=stability.trim.contigs.good.names, group=stability.contigs.good.groups)
summary.seqs(count=stability.trim.contigs.good.count_table); 2838557 unique seqs, total 5431619 seqs
pcr.seqs(fasta=silva.bacteria.fasta, oligos=pcrTest.oligos)
align.seqs(fasta=stability.trim.contigs.good.unique.fasta, reference=silva.bacteria.pcr.fasta)
summary.seqs(fasta=stability.trim.contigs.good.unique.align, count=stability.trim.contigs.good.count_table); 2838557 unique seqs, total 5431619 sequence
screen.seqs(fasta=stability.trim.contigs.good.unique.align, count=stability.trim.contigs.good.count_table, summary=stability.trim.contigs.good.unique.summary, start=6388, end=25316, maxhomop=8)
summary.seqs(fasta=current, count=current); 2710518 unique seqs, total 5276789 seqs
filter.seqs(fasta=stability.trim.contigs.good.unique.good.align, vertical=T, trump=.)
unique.seqs(fasta=stability.trim.contigs.good.unique.good.filter.fasta, count=stability.trim.contigs.good.good.count_table)
pre.cluster(fasta=stability.trim.contigs.good.unique.good.filter.unique.fasta, count=stability.trim.contigs.good.unique.good.filter.count_table, diffs=2)
summary.seqs(fasta=current, count=current); 1199095 unique seqs, total 5376789 seqs
chimera.uchime(fasta=stability.trim.contigs.good.unique.good.filter.unique.precluster.fasta, count=stability.trim.contigs.good.unique.good.filter.unique.precluster.count_table, dereplicate=t)
remove.seqs(fasta=stability.trim.contigs.good.unique.good.filter.unique.precluster.fasta, accnos=stability.trim.contigs.good.unique.good.filter.unique.precluster.uchime.accnos)
summary.seqs(fasta=stability.trim.contigs.good.unique.good.filter.unique.precluster.pick.fasta, count=stability.trim.contigs.good.unique.good.filter.unique.precluster.uchime.pick.count_table)
966622 Unique sequences, total 4990158 seqs, 7.19% chimera
classify.seqs(fasta=stability.trim.contigs.good.unique.good.filter.unique.precluster.pick.fasta, count=stability.trim.contigs.good.unique.good.filter.unique.precluster.uchime.pick.count_table, reference=silva.bacteria.ng.fasta, taxonomy=silva.bacteria.silva.tax, cutoff=80)
remove.lineage(fasta=stability.trim.contigs.good.unique.good.filter.unique.precluster.pick.fasta, count=stability.trim.contigs.good.unique.good.filter.unique.precluster.uchime.pick.count_table, taxonomy=stability.trim.contigs.good.unique.good.filter.unique.precluster.pick.silva.wang.taxonomy, taxon=Chloroplast-Mitochondria-unknown-Eukaryota)
959943 unique sequence, toal 4958696 seqs
cluster.split(fasta=stability.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.fasta, count=stability.trim.contigs.good.unique.good.filter.unique.precluster.uchime.pick.pick.count_table, taxonomy=stability.trim.contigs.good.unique.good.filter.unique.precluster.pick.silva.wang.pick.taxonomy, splitmethod=classify, taxlevel=4, cutoff=0.15, processors=4)