I’m running the following batch commands on a large set of sequences (after chimera checking- 77k preclustered unique, representing 8.6M seqs). during classify seqs the process failed.
Here are the lines from my error file
[compute-2-0:30895] *** Process received signal ***
[compute-2-0:30895] Signal: Segmentation fault (11)
[compute-2-0:30895] Signal code: Address not mapped (1)
[compute-2-0:30895] Failing at address: 0x1219883e0
[compute-2-0:30895] [ 0] /lib64/libpthread.so.0() [0x368ae0f500]
[compute-2-0:30895] [ 1] /opt/openmpi/lib/libmpi.so.1(opal_memory_ptmalloc2_int_malloc+0x7b1) [0x2afdd245a821]
[compute-2-0:30895] [ 2] /opt/openmpi/lib/libmpi.so.1(opal_memory_ptmalloc2_malloc+0x57) [0x2afdd245bb87]
[compute-2-0:30895] [ 3] /usr/lib64/libstdc++.so.6(_Znwm+0x1d) [0x368c6bd09d]
[compute-2-0:30895] [ 4] /usr/lib64/libstdc++.so.6(_Znam+0x9) [0x368c6bd1b9]
[compute-2-0:30895] [ 5] mothur(_ZN8Bayesian12readProbFileERSt14basic_ifstreamIcSt11char_traitsIcEES4_SsSs+0xbc9) [0x4ef399]
[compute-2-0:30895] [ 6] mothur(_ZN8BayesianC1ESsSsSsiiiibb+0x1946) [0x4f3b76]
[compute-2-0:30895] [ 7] mothur(_ZN19ClassifySeqsCommand7executeEv+0x4d7) [0x636027]
[compute-2-0:30895] [ 8] mothur(_ZN11BatchEngine8getInputEv+0x925) [0x7b6825]
[compute-2-0:30895] [ 9] mothur(main+0x126c) [0xa87a9c]
[compute-2-0:30895] [10] /lib64/libc.so.6(__libc_start_main+0xfd) [0x368a21ecdd]
[compute-2-0:30895] [11] mothur() [0x4aba49]
[compute-2-0:30895] *** End of error message ***
/opt/gridengine/default/spool/compute-2-0/job_scripts/53711: line 32: 30895 Segmentation fault (core dumped) mothur maps.to.may.batch
###basic mothur processing of MiSeq sequences using the Caporasso primer
#make.contigs(processors=8, ffastq=Undetermined_S0_L001_R1_001.fastq.gz, rfastq=Undetermined_S0_L001_R2_001.fastq.gz, findex=Undetermined_S0_L001_I1_001.fastq.gz, oligos=indu.oligos)
### make.contigs on each run individually, cat *001.trim.contigs.fasta and *contigs.groups
summary.seqs(fasta=maps.to.may.trim.contigs.fasta, processors=8)
screen.seqs(fasta=current, group=maps.to.may.contigs.groups, summary=current, maxambig=0, maxlength=300)
summary.seqs(fasta=current, group=current)
#reduce fasta size by only keeping one of each sequence, this generates a names file
unique.seqs(fasta=current)
summary.seqs(fasta=current, name=current)
#replaces both the names and group file (which contain the name of every sequence) with a count table, not sure if I like this but am going with it for now
count.seqs(name=current, group=current)
summary.seqs(count=current, fasta=current)
#align to a custom silva db (trimmed to v4 using "pcr.seqs")
align.seqs(fasta=current, reference=silva.v4.fasta)
summary.seqs(fasta=current, count=current)
#remove the seqs that just didn't align (using the nubmers from the previous summmary.seqs
screen.seqs(fasta=current, count=current, summary=current, start=8, end=9582, maxhomop=8)
#remove columns from alignment that only contain -
filter.seqs(fasta=current, vertical=T, processors=8)
summary.seqs(fasta=current, count=current)
#pre.cluster to 1% difference to reduce computation time
pre.cluster(fasta=current, diffs=2, count=current)
summary.seqs(fasta=current, count=current)
#removes chimeras only from the samples that they are called chimeras, if you want to remove from all samples change dereplicate=f
chimera.uchime(fasta=current, count=current, dereplicate=t)
remove.seqs(fasta=current, accnos=current, count=current)
summary.seqs(fasta=current, count=current)
#RDP classifier
classify.seqs(fasta=current, count=current, reference=trainset10_082014.pds.fasta, taxonomy=trainset10_082014.pds.tax, cutoff=60)
#remove all non-target sequences
remove.lineage(fasta=current, count=current, taxonomy=current, taxon=Chloroplast-Mitochondria-unknown-Archaea-Eukaryota)
#make otus for each Order individually
cluster.split(fasta=current, count=current, taxonomy=current, splitmethod=classify, taxlevel=4, cutoff=0.15, processors=4)
#make 3, 5 and 10% OTU matrix
make.shared(list=current, count=current, label=0.03-0.05-0.10)
#classify each OTU, used the RDP classification 100% means all seqs in that OTU match at that classification level
classify.otu(list=current, count=current, taxonomy=current)
get.oturep(fasta=current, count=current, list=current)
#check number of sequences in each sample
count.groups(shared=current)
#alpha diversity
summary.single(shared=current, calc=nseqs-sobs-coverage-shannoneven-invsimpson, subsample=10000)
#beta diversity
dist.shared(shared=current, calc=braycurtis-jest-thetayc, subsample=10000)