Cluster (problem count_table)

Hi everyone!

I would like to ask you for help because I have a problem with the cluster command.

When we try to use the clustering script (“Cluster”, which it is the last command at the end of the pipeline) we get this error message indicating that it is unable to find the “count table” file:

“[ERROR]: M01388_125_000000000-AUMO1388_125_000000000-AU7ED_1_1106_14823_13204 is not in your count_table. Please correct.”

Below you can find the whole pipeline we had been using.

How we can fix it?

Thank you very much!

Felipe



make.contigs(ffastq=Undetermined_S0_L001_R1_001.fastq, rfastq=Undetermined_S0_L001_R2_001.fastq, findex=Undetermined_S0_L001_I1_001.fastq, oligos=Oligos_file.txt, processors=12)

cutadapt -b GTGCCAGCMGCCGCGGTAA -b GGACTACHVGGGTWTCTAAT Undetermined_S0_L001_R1_001.trim.contigs.fasta > Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.fasta

summary.seqs(fasta=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.fasta)

screen.seqs(fasta=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.fasta, summary=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.summary, minlength=252, maxhomop=8, maxambig=0, group=Undetermined_S0_L001_R1_001.contigs.groups, processors=12)

unique.seqs(fasta=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.fasta)

summary.seqs(fasta=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.fasta)

count.seqs(name=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.names, group=Undetermined_S0_L001_R1_001.contigs.good.groups, processors=12)

align.seqs(fasta=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.fasta, reference=/media/storage1/data_bases/silva128/silva.nr_v128.align, flip=T, processors=12)

summary.seqs(fasta=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.align, count=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.count_table)

screen.seqs(fasta=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.align, count=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.count_table, summary=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.summary, start=13862, end=23444).

summary.seqs(fasta=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.align, count=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.good.count_table)

filter.seqs(fasta=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.align, vertical=T, trump=.)


summary.seqs(fasta=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.fasta, count=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.good.count_table)

unique.seqs(fasta=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.fasta,count=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.good.count_table)

pre.cluster(fasta=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.fasta, count=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.count_table, diffs=1, processors=12)

summary.seqs(fasta=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.fasta, count=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.count_table).

chimera.uchime(fasta=current, count=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.count_table, dereplicate=T, processors=12)

remove.seqs(fasta=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.fasta, count=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.denovo.uchime.pick.count_table, accnos=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.denovo.uchime.accnos)
summary.seqs(fasta=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.pick.fasta, count=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.denovo.uchime.pick.pick.count_table)

classify.seqs(fasta=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.pick.fasta,count=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.denovo.uchime.pick.pick.count_table,reference=/media/storage1/data_bases/silva128/silva.nr_v128.ng.fasta, taxonomy=/media/storage1/data_bases/silva128/silva.nr_v128-2.tax, cutoff=80, processors=12)

remove.lineage(fasta=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.pick.fasta, count=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.denovo.uchime.pick.pick.count_table, taxonomy=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.pick.nr_v128_2.wang.taxonomy, taxon=Mitochondria-Chloroplast)

summary.seqs(fasta=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.pick.pick.fasta, count=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.denovo.uchime.pick.pick.pick.count_table, processors=12)

dist.seqs(fasta=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.pick.pick.fasta, cutoff=0.1)

cluster(column=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.pick.dist, count=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.pick.count_table, cutoff=0.03, method=average, processors=12)

Some filenames seems to not quite match in the last few commands.

summary.seqs(fasta=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.pick.pick.fasta, count=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.denovo.uchime.pick.pick.pick.count_table, processors=12)

dist.seqs(fasta=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.pick.pick.fasta, cutoff=0.1)

cluster(column=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.pick.dist, count=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.pick.count_table, cutoff=0.03, method=average, processors=12)

should be

cluster(column=current, count=current, cutoff=0.03, method=average, processors=12)

The current files should be:
column=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.pick.pick.dist
count=Undetermined_S0_L001_R1_001.trim.contigs.cutadapt.good.unique.good.filter.unique.precluster.denovo.uchime.pick.pick.pick.count_table

One thing to note, with the average neighbor method you should increase the cutoff so that it is above the desired distance you would like to cluster to. Here’s why, https://mothur.org/wiki/Frequently_asked_questions#Why_does_the_cutoff_change_when_I_cluster_with_average_neighbor.3F.