Hi!
First, I have no idea of whether this actually is a bug or not, but it certainly is strange enough. And I hope I haven’t made any trivial mistakes that I just haven’t found yet.
I have two times two datasets that I am working on. Datasets v1v2 and v6 for Patients and Controls.
I am processing the two datasets in the same manner, but I am getting strange results for the v1v2 dataset.
What I do is the following:
merge.files(input=FCQD7IX03_FLBHXVS14_seqs.fsa_inflated.fa-FLBHXVS06_FLBHXVS14_seqs.fsa_inflated.fa-FGUOMCV12_FLBHXVS14_seqs.fsa_inflated.fa-FKNCG3S03_FLBHXVS14_seqs.fsa_inflated.fa-FLBHXVS11_FLBHXVS14_seqs.fsa_inflated.fa-FLBHXVS15_FLBHXVS14_seqs.fsa_inflated.fa-FR1RFUD01_FR1RFUD04_seqs.fsa_inflated.fa-FR1RFUD03_FR1RFUD04_seqs.fsa_inflated.fa, output=v1v2_control.fasta)
summary.seqs(fasta=v1v2_control.fasta)
unique.seqs(fasta=v1v2_control.fasta)
summary.seqs(fasta=v1v2_control.unique.fasta)
pre.cluster(fasta=v1v2_control.unique.fasta, name=v1v2_control.names, diffs=2)
pairwise.seqs(fasta=v1v2_control.unique.precluster.fasta, calc=eachgap, countends=F, processors=20)
cluster(column=v1v2_control.unique.precluster.dist, name=v1v2_control.unique.precluster.names, method=average)
summary.single(list=v1v2_control.unique.precluster.an.list)
rarefaction.single(list=v1v2_control.unique.precluster.an.list)
#now for the patients
merge.files(input=FCQD7IX02_FLBHXVS14_seqs.fsa_inflated.fa-FCQD7IX04_FLBHXVS14_seqs.fsa_inflated.fa-FGUOMCV11_FLBHXVS14_seqs.fsa_inflated.fa-FLBHXVS10_FLBHXVS14_seqs.fsa_inflated.fa-FKNCG3S04_FLBHXVS14_seqs.fsa_inflated.fa-FLBHXVS12_FLBHXVS14_seqs.fsa_inflated.fa-FLBHXVS16_FR1RFUD04_seqs.fsa_inflated.fa-FR1RFUD02_FR1RFUD04_seqs.fsa_inflated.fa,output=v1v2_patient.fasta)
summary.seqs(fasta=v1v2_patient.fasta)
unique.seqs(fasta=v1v2_patient.fasta)
summary.seqs(fasta=v1v2_patient.unique.fasta)
pre.cluster(fasta=v1v2_patient.unique.fasta, name=v1v2_patient.names, diffs=2)
pairwise.seqs(fasta=v1v2_patient.unique.precluster.fasta, calc=eachgap, countends=F, processors=20)
cluster(column=v1v2_patient.unique.precluster.dist, name=v1v2_patient.unique.precluster.names, method=average)
summary.single(list=v1v2_patient.unique.precluster.an.list)
rarefaction.single(list=v1v2_patient.unique.precluster.an.list)
now for all in one
merge.files(input=FCQD7IX03_FLBHXVS14_seqs.fsa_inflated.fa-FLBHXVS06_FLBHXVS14_seqs.fsa_inflated.fa-FGUOMCV12_FLBHXVS14_seqs.fsa_inflated.fa-FKNCG3S03_FLBHXVS14_seqs.fsa_inflated.fa-FLBHXVS11_FLBHXVS14_seqs.fsa_inflated.fa-FLBHXVS15_FLBHXVS14_seqs.fsa_inflated.fa-FR1RFUD01_FR1RFUD04_seqs.fsa_inflated.fa-FR1RFUD03_FR1RFUD04_seqs.fsa_inflated.fa-FCQD7IX02_FLBHXVS14_seqs.fsa_inflated.fa-FCQD7IX04_FLBHXVS14_seqs.fsa_inflated.fa-FGUOMCV11_FLBHXVS14_seqs.fsa_inflated.fa-FLBHXVS10_FLBHXVS14_seqs.fsa_inflated.fa-FKNCG3S04_FLBHXVS14_seqs.fsa_inflated.fa-FLBHXVS12_FLBHXVS14_seqs.fsa_inflated.fa-FLBHXVS16_FR1RFUD04_seqs.fsa_inflated.fa-FR1RFUD02_FR1RFUD04_seqs.fsa_inflated.fa,output=v1v2.merged.fasta)
summary.seqs(fasta=v1v2.merged.fasta)
unique.seqs(fasta=v1v2.merged.fasta)
summary.seqs(fasta=v1v2.merged.unique.fasta)
pre.cluster(fasta=v1v2.merged.unique.fasta, name=v1v2.merged.names, diffs=2)
pairwise.seqs(fasta=v1v2.merged.unique.precluster.fasta, calc=eachgap, countends=F, processors=20)
cluster(column=v1v2.merged.unique.precluster.dist, name=v1v2.merged.unique.precluster.names, method=average)
make.group(fasta=FCQD7IX03_FLBHXVS14_seqs.fsa_inflated.fa-FLBHXVS06_FLBHXVS14_seqs.fsa_inflated.fa-FGUOMCV12_FLBHXVS14_seqs.fsa_inflated.fa-FKNCG3S03_FLBHXVS14_seqs.fsa_inflated.fa-FLBHXVS11_FLBHXVS14_seqs.fsa_inflated.fa-FLBHXVS15_FLBHXVS14_seqs.fsa_inflated.fa-FR1RFUD01_FR1RFUD04_seqs.fsa_inflated.fa-FR1RFUD03_FR1RFUD04_seqs.fsa_inflated.fa-FCQD7IX02_FLBHXVS14_seqs.fsa_inflated.fa-FCQD7IX04_FLBHXVS14_seqs.fsa_inflated.fa-FGUOMCV11_FLBHXVS14_seqs.fsa_inflated.fa-FLBHXVS10_FLBHXVS14_seqs.fsa_inflated.fa-FKNCG3S04_FLBHXVS14_seqs.fsa_inflated.fa-FLBHXVS12_FLBHXVS14_seqs.fsa_inflated.fa-FLBHXVS16_FR1RFUD04_seqs.fsa_inflated.fa-FR1RFUD02_FR1RFUD04_seqs.fsa_inflated.fa, groups=Kontroll-Kontroll-Kontroll-Kontroll-Kontroll-Kontroll-Kontroll-Kontroll-Pasient-Pasient-Pasient-Pasient-Pasient-Pasient-Pasient-Pasient)
make.shared(list=v1v2.merged.unique.precluster.an.list, group=merge.groups, label=0.03)
count.groups()
venn(groups=Pasient-Kontroll)
So, basically I am doing the same in all three cases, but on different levels.
My question is about the otu numbers I get:
When I do
grep “^0.03” *rabund |awk ‘{print $1"\t"$2}’
I get
v1v2_control.unique.precluster.an.rabund:0.03 1204
v1v2.merged.unique.precluster.an.Kontroll.rabund:0.03 1486
v1v2.merged.unique.precluster.an.Pasient.rabund:0.03 1050
v1v2_patient.unique.precluster.an.rabund:0.03 660
I.e, I am here getting considerably more otus for the combined analysis than I am getting for the separate analyses.
I am doing the same thing for v6, with different files, naturally, but here the otu numbers match within reason.
Am I missing something here, or is it something strange with my data, or what?
Best,
Karin
PS: thanks for a very nice package btw!