Hi,
we are having a problem obtaining representative sequences which can be blasted in order to check unclassified OTU taxonomic classifications. we have been using the following commands to process our data and obtain our OTU table
#sffinfo(sff=101712KM28F.sff, flow=T)
#summary.seqs(fasta=101712KM28F.fasta)
#trim.flows(flow=101712KM28F.flow, oligos=am_reads.oligos, pdffs=2, bdiffs=1, processors=10) #shhh.flows(file=101712KM28F.flow.foles, processors=11) summary.seqs(fasta=101712KM28F.shhh.fasta, name=101712KM28F.shhh.names) unique.seqs(fasta=101712KM28F.shhh.fasta, name=101712KM28F.shhh.names) summary.seqs(fasta=101712KM28F.shhh.unique.fasta, name=101712KM28F.shhh.unique.names)
align.seqs(fasta=101712KM28F.shhh.unique.fasta, reference=silva.bacteria.fasta, processors=4, flip=t) summary.seqs(fasta=101712KM28F.shhh.unique.align, name=101712KM28F.shhh.unique.names)
merge.files(input=101712KM28F.AMB1.28f.shhh.groups-101712KM28F.AMB10.28f.shhh.groups-101712KM28F.AMB11.28f.shhh.groups-101712KM28F.AMB12.28f.shhh.groups-101712KM28F.AMB14.28f.shhh.groups-101712KM28F.AMB15.28f.shhh.groups-101712KM28F.AMB5.28f.shhh.groups-101712KM28F.AMB6.28f.shhh.groups-101712KM28F.AMB7.28f.shhh.groups-101712KM28F.AMB9.28f.shhh.groups-101712KM28F.AMC10.28f.shhh.groups-101712KM28F.AMC11.28f.shhh.groups-101712KM28F.AMC12.28f.shhh.groups-101712KM28F.AMC13.28f.shhh.groups-101712KM28F.AMC15.28f.shhh.groups-101712KM28F.AMC3.28f.shhh.groups-101712KM28F.AMC5.28f.shhh.groups-101712KM28F.AMC6.28f.shhh.groups-101712KM28F.AMC8.28f.shhh.groups-101712KM28F.AMC9.28f.shhh.groups, output=101712KM28F.shhh.groups) screen.seqs(fasta=101712KM28F.shhh.unique.align, name=101712KM28F.shhh.unique.names, group=101712KM28F.shhh.groups, start=1044, optimize=end, criteria=95, processors=4) filter.seqs(fasta=101712KM28F.shhh.unique.good.align, vertical=T, trump=., processors=4) summary.seqs(fasta=101712KM28F.shhh.unique.good.filter.fasta, name=101712KM28F.shhh.unique.good.names)
unique.seqs(fasta=101712KM28F.shhh.unique.good.filter.fasta, name=101712KM28F.shhh.unique.good.names)
pre.cluster(fasta=101712KM28F.shhh.unique.good.filter.unique.fasta, name=101712KM28F.shhh.unique.good.filter.names, group=101712KM28F.shhh.good.groups, diffs=2) chimera.uchime(fasta=101712KM28F.shhh.unique.good.filter.unique.precluster.fasta, name=101712KM28F.shhh.unique.good.filter.unique.precluster.names, group=101712KM28F.shhh.good.groups, processors=4) remove.seqs(accnos=101712KM28F.shhh.unique.good.filter.unique.precluster.uchime.accnos, fasta=101712KM28F.shhh.unique.good.filter.unique.precluster.fasta, name=101712KM28F.shhh.unique.good.filter.unique.precluster.names, group=101712KM28F.shhh.good.groups, dups=T)
summary.seqs(name=current)
classify.seqs(fasta=101712KM28F.shhh.unique.good.filter.unique.precluster.pick.fasta, name=101712KM28F.shhh.unique.good.filter.unique.precluster.pick.names, group=101712KM28F.shhh.good.pick.groups, template=97_otus.fasta, taxonomy=97_otu_taxonomy_format.txt, cutoff=80, processors=4) remove.lineage(fasta=101712KM28F.shhh.unique.good.filter.unique.precluster.pick.fasta, name=101712KM28F.shhh.unique.good.filter.unique.precluster.pick.names, taxonomy=101712KM28F.shhh.unique.good.filter.unique.precluster.pick.97_otu_taxonomy_format.wang.taxonomy, taxon=Mitochondria-Eukaryota-Chloroplast-unknown, group=101712KM28F.shhh.good.pick.groups)
summary.seqs(fasta=101712KM28F.shhh.unique.good.filter.unique.precluster.pick.pick.fasta, name=101712KM28F.shhh.unique.good.filter.unique.precluster.pick.pick.names)
system(cp 101712KM28F.shhh.unique.good.filter.unique.precluster.pick.pick.fasta am_final.fasta)
system(cp 101712KM28F.shhh.unique.good.filter.unique.precluster.pick.pick.names am_final.names)
system(cp 101712KM28F.shhh.good.pick.pick.groups am_final.groups)
system(cp 101712KM28F.shhh.unique.good.filter.unique.precluster.pick.97_otu_taxonomy_format.wang.pick.taxonomy am_final.taxonomy)
dist.seqs(fasta=am_final.fasta, cutoff=0.15, processors=2)
cluster(column=am_final.dist, name=am_final.names)
make.shared(list=am_final.an.list, group=am_final.groups, label=0.03)
sub.sample(shared=am_final.an.shared, name=am_final.names, size=1426)
classify.otu(list=am_final.an.list, name=am_final.names, taxonomy=am_final.taxonomy, label=0.03)
dist.seqs(fasta=am_final.fasta, output=phylip, processors=2)
get.oturep(phylip=am_final.phylip.dist, fasta=am_final.fasta, name=am_final.names, list=am_final.an.list, group=am_final.groups)
clearcut(phylip=am_final.phylip.dist)
system(…/…/…/…/programs/pprospector-1.0.1/scripts/clean_fasta.py -f am_final.an.unique.rep.fasta)
the output from the subsample and the get.otu rep seems to me to not correspond to each other when we blast the output from get.oturep our results don't correspond to the assigned taxonomy in the am_final.taxonomy file or the sub.sample shared file (am_final.an.shared) we aren't doubting that the assignments in mother are correct, we just cant seem to figure out a way of effectively subsampling our data, then getting representative sequences for each out that correspond to each other. so many of our sequences have little to no taxonomic classifications, we need to look at some individual blast results to even get a feel for what we are dealing with
thank you from two very frustrated users
Patrick and Kathy