Hello everybody. It is probably my brain that is freezing but I have a problem.
For some reasons (too long to write), I need to split a fasta file and a count file (after chimera removal) in “2 dataset”.
After I need to remove sequences from somes groups (negative controls) in dataset1 only, and to remerge the new fasta and count file with dataset 2.
when I use merge.count, I get an error saying that
[ERROR]: Your count table contains more than 1 sequence named M00833_602_000000000-BGWRN_1_1101_10149_9615, sequence names must be unique. Please correct.
Which is weird because the original fasta and count are uniques…
So please help.
The workaround, I believe, would be to analyze dataset 1 and remove the sequences right after chimera removal, run data set 2 as usual, merge the two data sets, create OTUs, write a paper, live a happy life. But a faster approach would befit me. So please, if you have any cue, I could use some help.
Here are my commands lines (yea I know, lots of groups to be removed we went frenzy on controls this time around).
********************* #remove data set 2 from initial file to create dataset1
remove.groups(fasta=sandrine.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.pick.fasta, count=sandrine.trim.contigs.good.unique.good.filter.unique.precluster.denovo.vsearch.pick.pick.pick.count_table, groups=48B14semF-48B1J0F-48B24semF-48B2J0F-48B34semF-48B3J0F-48B44semF-48B4J0F-48C14semF-48C1J0F-48C24semF-48C2J0F-48C34semF-48C3J0F-48C44semF-48C4J0F-48D14semF-48D1J0F-48D24semF-48D2J0F-48D34semF-48D3J0F-48D44semF-48D4J0F-714semF-71J0F-724semF-72J0F-734semF-73J0F-744semF-74J0F-HB314semF-HB31J0F-HB324semF-HB32J0F-HB334semF-HB33J0F-HB344semF-HB34J0F-LAND114semF-LAND11J0F-LAND124semF-LAND12J0F-LAND134semF-LAND13J0F-LAND14J0F-LAND414semF-LAND41J0F-LAND424semF-LAND42J0F-LAND434semF-LAND43J0F-LAND44J0F-LI114semF-LI11J0F-LI124semF-LI12J0F-LI134semF-LI13J0F-LI144semF-LI14J0F-LI214semF-LI21J0F-LI224semF-LI22J0F-LI234semF-LI23J0F-LI244semF-LI24J0F-M2214semF-M221J0F-M2224semF-M222J0F-M2234semF-M223J0F-M2244semF-M224J0F-M714semF-M71J0F-M724semF-M72J0F-M734semF-M73J0F-M744semF-M74J0F-M814semF-M81J0F-M824semF-M82J0F-M834semF-M83J0F-M844semF-M84J0F-Communautefeces)
summary.seqs(fasta=current, count=current)
count.groups(count=current)
#remove what is not control from in dataset1
remove.groups(fasta=current, count=current, groups=M87J0-M86J0-M85J0-M84J0-M83J0-M82J0-M81J0-M77J0-M76J0-M75J0-M74J0-M73J0-M72J0-M71J0-LAND46J0-LAND45J0-LAND44J0-LAND43J0-LAND42J0-LAND41J0-LAND16J0-LAND15J0-LAND14J0-LAND13J0-LAND12J0-LAND11J0-48B7J0-48B6J0-48B5J0-48B4J0-48B3J0-48B2J0-48B1J0-M874sem-M864sem-M854sem-M844sem-M834sem-M824sem-M814sem-M774sem-M764sem-M754sem-M744sem-M734sem-M724sem-M714sem-LAND464sem-LAND454sem-LAND444sem-LAND434sem-LAND424sem-LAND414sem-LAND164sem-LAND154sem-LAND144sem-LAND134sem-LAND124sem-LAND114sem-48B74sem-48B64sem-48B54sem-48B44sem-48B34sem-48B24sem-48B14sem-48D1J0-48D2J0-48D3J0-48D4J0-48D5J0-48D6J0-48D7J0-48D14sem-48D24sem-48D34sem-48D44sem-48D54sem-48D64sem-48D74sem-M2274sem-M2264sem-M2254sem-M2244sem-M2234sem-M2224sem-M2214sem-LI27J0-LI26J0-LI25J0-LI24J0-LI23J0-LI22J0-LI21J0-LI17J0-LI16J0-LI15J0-LI14J0-LI13J0-LI12J0-LI11J0-LI274sem-LI264sem-LI254sem-LI244sem-LI234sem-LI224sem-LI214sem-LI174sem-LI164sem-LI154sem-LI144sem-LI134sem-LI124sem-LI114sem-HB374sem-HB364sem-HB354sem-HB344sem-HB334sem-HB324sem-HB314sem-774sem-764sem-754sem-744sem-734sem-724sem-714sem-48C1J0-48C2J0-48C3J0-48C4J0-48C5J0-48C6J0-71J0-72J0-73J0-74J0-75J0-76J0-HB31J0-HB32J0-HB33J0-HB34J0-HB35J0-HB36J0-M221J0-M222J0-M223J0-M224J0-M225J0-M226J0-48C7J0-77J0-HB37J0-M227J0-48C74sem-48C64sem-48C54sem-48C44sem-48C34sem-48C24sem-48C14sem-Communauteouef1-Communauteouef2)
summary.seqs(fasta=current, count=current)
count.groups(count=current)
list.seqs(count=current)
#remove sequences from full dataset1
remove.seqs(accnos=sandrine.trim.contigs.good.unique.good.filter.unique.precluster.denovo.vsearch.pick.pick.pick.pick.pick.accnos, fasta=sandrine.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.pick.pick.fasta, count=sandrine.trim.contigs.good.unique.good.filter.unique.precluster.denovo.vsearch.pick.pick.pick.pick.count_table)
summary.seqs(fasta=current, count=current)
count.groups(count=current)
unique.seqs(fasta=current, count=current)
#remove dataset 1 from initial file to create dataset 2
remove.groups(fasta=sandrine.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.pick.fasta, count=sandrine.trim.contigs.good.unique.good.filter.unique.precluster.denovo.vsearch.pick.pick.pick.count_table, groups=48B14sem-48B1J0-48B24sem-48B2J0-48B34sem-48B3J0-48B44sem-48B4J0-48B54sem-48B5J0-48B64sem-48B6J0-48B74sem-48B7J0-48B8ctrl4sem-48B8ctrlJ0-48C14sem-48C1J0-48C24sem-48C2J0-48C34sem-48C3J0-48C44sem-48C4J0-48C54sem-48C5J0-48C64sem-48C6J0-48C74sem-48C7J0-48C8ctrl4sem-48C8ctrlJ0-48D14sem-48D1J0-48D24sem-48D2J0-48D34sem-48D3J0-48D44sem-48D4J0-48D54sem-48D5J0-48D64sem-48D6J0-48D74sem-48D7J0-48D8ctrl4sem-48D8ctrlJ0-714sem-71J0-724sem-72J0-734sem-73J0-744sem-74J0-754sem-75J0-764sem-76J0-774sem-77J0-78ctrl4sem-78ctrlJ0-Communauteouef1-Communauteouef2-H20_oeuf1-H20_oeuf2-HB314sem-HB31J0-HB324sem-HB32J0-HB334sem-HB33J0-HB344sem-HB34J0-HB354sem-HB35J0-HB364sem-HB36J0-HB374sem-HB37J0-HB38ctrl4sem-HB38ctrlJ0-LAND114sem-LAND11J0-LAND124sem-LAND12J0-LAND134sem-LAND13J0-LAND144sem-LAND14J0-LAND154sem-LAND15J0-LAND164sem-LAND16J0-LAND18ctrlJ0-LAND414sem-LAND41J0-LAND424sem-LAND42J0-LAND434sem-LAND43J0-LAND444sem-LAND44J0-LAND454sem-LAND45J0-LAND464sem-LAND46J0-LAND48ctrl4sem-LAND48ctrlJ0-LI114sem-LI11J0-LI124sem-LI12J0-LI134sem-LI13J0-LI144sem-LI14J0-LI154sem-LI15J0-LI164sem-LI16J0-LI174sem-LI17J0-LI18ctrl4sem-LI18ctrlJ0-LI214sem-LI21J0-LI224sem-LI22J0-LI234sem-LI23J0-LI244sem-LI24J0-LI254sem-LI25J0-LI264sem-LI26J0-LI274sem-LI27J0-LI28ctrl4sem-LI28ctrlJ0-Land18ctrl4sem-M2214sem-M221J0-M2224sem-M222J0-M2234sem-M223J0-M2244sem-M224J0-M2254sem-M225J0-M2264sem-M226J0-M2274sem-M227J0-M228ctrl4sem-M228ctrlJ0-M714sem-M71J0-M724sem-M72J0-M734sem-M73J0-M744sem-M74J0-M754sem-M75J0-M764sem-M76J0-M774sem-M77J0-M78ctrl4sem-M78ctrlJ0-M814sem-M81J0-M824sem-M82J0-M834sem-M83J0-M844sem-M84J0-M854sem-M85J0-M864sem-M86J0-M874sem-M87J0-M87ctrl4sem-M88ctrlJ0)
summary.seqs(fasta=current, count=current)
count.groups(count=current)
unique.seqs(fasta=current, count=current)
#merge count
merge.count(count=sandrine.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.pick.pick.pick.count_table-sandrine.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.pick.pick.count_table, output=merge.count_table)
#merge is not working so I did not went further.
merge.files(input=sandrine.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.pick.pick.pick.unique.fasta-sandrine.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.pick.pick.unique.fasta, output=merge.fasta)
unique.seqs(fasta=current, count=current)
summary.seqs(fasta=merge.fasta, count=merge.fasta)