Hello!
This is not my first time asking about this, but this time around I am out of options. I need to run cluster.split (because when I run normal clustering I am loosing sequences in my positive control, namely my Salmonella unless I cluster at 0.005) So I want to use cluster.split to make sure that my positive control says as it should be and cluster everything under at 0.03.
But, when I run it, I get many error messages: "Your group file contains 390863 sequences and list file contains 390790 sequences. Please correct.
"
Which does not make sense to me since I am running my batch file using “current” for all files, therefore all files are correctly used. I am using 32 thread, is this a problem with using multiple or too much paralleling? I need to run around 700 samples together. Thanks for your input. Here is the spec I am using to run the samples (it takes me 23 hours to run, version v.1.44.3 which is the latest available on the computer servers I use) and my batch file.
Kind regards,
mem=128000M
cpus-per-task=32
set.current(processors=32)
set.logfile(name=megacampy_logFile_clustersplit)
make.contigs(file=megacampy.files, oligos=primers.oligo.txt, checkorient=t, pdiffs=0, maxee=2,deltaq=6)
summary.seqs(fasta=current)
screen.seqs(fasta=current, group=current, summary=current, maxambig=0, maxhomop=70)
count.groups(group=current)
summary.seqs(fasta=current)
unique.seqs(fasta=current)
count.seqs(name=current, group=current)
summary.seqs(fasta=current, count=current)
align.seqs(fasta=current, reference=silva.nr_v132.pcr.align, flip=t)
summary.seqs(fasta=current, count=current)
count.groups(group=current)
screen.seqs(fasta=current, count=current, summary=current, start=1968, end=11550)
count.groups(count=current)
summary.seqs(fasta=current, count=current)
filter.seqs(fasta=current, vertical=T, trump=.)
unique.seqs(fasta=current, count=current)
count.groups(count=current)
summary.seqs(fasta=current, count=current)
pre.cluster(fasta=current, count=current, diffs=4)
count.groups(count=current)
summary.seqs(fasta=current, count=current)
chimera.vsearch(fasta=current, count=current, dereplicate=t)
remove.seqs(fasta=current, accnos=current)
count.groups(count=current)
summary.seqs(fasta=current, count=current)
classify.seqs(fasta=current, count=current, iters=1000, reference=silva.nr_v138.align, taxonomy=silva.nr_v138.tax, cutoff=80)
remove.lineage(fasta=current, count=current, taxonomy=current, taxon=Chloroplast-Mitochondria-unknown-Eukaryota)
summary.tax(taxonomy=current, count=current)
summary.seqs(fasta=current, count=current)
count.groups(count=current)
dist.seqs(fasta=current,cutoff=0.04)
cluster.split(column=current, count=current, taxonomy=current, splitmethod=classify, taxlevel=6, delta=0, iters=500, cutoff=0.03)
make.shared(list=current,count=current,label=0.03)
classify.otu(list=current, count=current, taxonomy=current)
get.groups(count=current, fasta=current,taxonomy=current, groups=Positif_PCR_sophie)
summary.seqs(fasta=current, count=current)
seq.error(count=current, fasta=current, reference=zymostd_ref.txt, aligned=F)
summary.seqs(fasta=megacampy.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.fasta, count=megacampy.trim.contigs.good.unique.good.filter.unique.precluster.denovo.vsearch.pick.pick.count_table)
count.groups(count=current)
get.groups(count=current, fasta=current,taxonomy=current, groups=Zymo_Ctrl_PCR_carvacrol)
summary.seqs(fasta=current, count=current)
seq.error(count=current, fasta=current, reference=zymostd_ref.txt, aligned=F)
summary.seqs(fasta=megacampy.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.fasta, count=megacampy.trim.contigs.good.unique.good.filter.unique.precluster.denovo.vsearch.pick.pick.count_table)
count.groups(count=current)
get.groups(count=current, fasta=current,taxonomy=current, groups=zymo_extraction_carvacrol)
summary.seqs(fasta=current, count=current)
seq.error(count=current, fasta=current, reference=zymostd_ref.txt, aligned=F)
summary.seqs(fasta=megacampy.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.fasta, count=megacampy.trim.contigs.good.unique.good.filter.unique.precluster.denovo.vsearch.pick.pick.count_table)
count.groups(count=current)
get.groups(count=current, fasta=current,taxonomy=current, groups=Zymo_community_sophie)
summary.seqs(fasta=current, count=current)
seq.error(count=current, fasta=current, reference=zymostd_ref.txt, aligned=F)
summary.seqs(fasta=megacampy.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.fasta, count=megacampy.trim.contigs.good.unique.good.filter.unique.precluster.denovo.vsearch.pick.pick.count_table)
count.groups(count=current)
get.groups(count=current, fasta=current,taxonomy=current, groups=Positif_PCR2_sophie)
summary.seqs(fasta=current, count=current)
seq.error(count=current, fasta=current, reference=zymostd_ref.txt, aligned=F)
summary.seqs(fasta=megacampy.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.fasta, count=megacampy.trim.contigs.good.unique.good.filter.unique.precluster.denovo.vsearch.pick.pick.count_table)
count.groups(count=current)
get.groups(count=current, fasta=current,taxonomy=current, groups=Commart_probio)
summary.seqs(fasta=current, count=current)
seq.error(count=current, fasta=current, reference=zymostd_ref.txt, aligned=F)
summary.seqs(fasta=megacampy.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.fasta, count=megacampy.trim.contigs.good.unique.good.filter.unique.precluster.denovo.vsearch.pick.pick.count_table)
count.groups(count=current)
get.groups(count=current, fasta=current,taxonomy=current, groups=campy_protease_Zymo_extraction)
summary.seqs(fasta=current, count=current)
seq.error(count=current, fasta=current, reference=zymostd_ref.txt, aligned=F)
quit()