pkgsrc-WIP-changes archive
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index][Old Index]
ddocent: replace ddocent-test* with ddocent-assembly-test*
Module Name: pkgsrc-wip
Committed By: Winston Weinert <winston%ml1.net@localhost>
Pushed By: winston
Date: Mon Jun 18 10:49:40 2018 -0500
Changeset: 78d5aaf8432b04e92f30d2e87625b1274fdc2743
Modified Files:
ddocent/Makefile
ddocent/PLIST
Added Files:
ddocent/files/ddocent-assembly-test
ddocent/files/ddocent-assembly-test-cleanup
Removed Files:
ddocent/files/ddocent-test
Log Message:
ddocent: replace ddocent-test* with ddocent-assembly-test*
To see a diff of this commit:
https://wip.pkgsrc.org/cgi-bin/gitweb.cgi?p=pkgsrc-wip.git;a=commitdiff;h=78d5aaf8432b04e92f30d2e87625b1274fdc2743
Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.
diffstat:
ddocent/Makefile | 7 +-
ddocent/PLIST | 4 +-
ddocent/files/ddocent-assembly-test | 366 ++++++++++++++++++++++++++++
ddocent/files/ddocent-assembly-test-cleanup | 8 +
ddocent/files/ddocent-test | 227 -----------------
5 files changed, 380 insertions(+), 232 deletions(-)
diffs:
diff --git a/ddocent/Makefile b/ddocent/Makefile
index 4363cd996a..b6e9210ac0 100644
--- a/ddocent/Makefile
+++ b/ddocent/Makefile
@@ -49,7 +49,7 @@ SUBST_FILES.prefix+= ${DESTDIR}${PREFIX}/bin/dDocent
USE_LANGUAGES= # none
REPLACE_BASH= dDocent *.sh
-REPLACE_BASH+= scripts/ddocent-test* scripts/*.sh scripts/dDocent_filters
+REPLACE_BASH+= scripts/ddocent-assembly-test* scripts/*.sh scripts/dDocent_filters
REPLACE_PERL= scripts/*.pl
REPLACE_PYTHON= scripts/*.py
NO_BUILD= yes
@@ -63,14 +63,15 @@ JAVAJARDIR= ${PREFIX}/lib/java
INSTALLATION_DIRS= bin
post-extract:
- ${CP} ${FILESDIR}/ddocent-test ${FILESDIR}/ddocent-test-cleanup \
+ ${CP} ${FILESDIR}/ddocent-assembly-test ${FILESDIR}/ddocent-assembly-test-cleanup \
${WRKSRC}/scripts
do-install:
${INSTALL_SCRIPT} \
${WRKSRC}/dDocent \
${WRKSRC}/*.sh \
- ${WRKSRC}/scripts/ddocent-test* \
+ ${WRKSRC}/scripts/ddocent-assembly-test \
+ ${WRKSRC}/scripts/ddocent-assembly-test-cleanup \
${WRKSRC}/scripts/*.sh \
${WRKSRC}/scripts/*.pl \
${WRKSRC}/scripts/dDocent_filters \
diff --git a/ddocent/PLIST b/ddocent/PLIST
index 9de3aea86c..f44045003c 100644
--- a/ddocent/PLIST
+++ b/ddocent/PLIST
@@ -8,8 +8,8 @@ bin/Rename_for_dDocent_with-INDEX.sh
bin/dDocent
bin/dDocent_filters
bin/dDocent_ngs.sh
-bin/ddocent-test
-bin/ddocent-test-cleanup
+bin/ddocent-assembly-test
+bin/ddocent-assembly-test-cleanup
bin/filter_hwe_by_pop.pl
bin/filter_missing_ind.sh
bin/pop_missing_filter.sh
diff --git a/ddocent/files/ddocent-assembly-test b/ddocent/files/ddocent-assembly-test
new file mode 100644
index 0000000000..7696b900cf
--- /dev/null
+++ b/ddocent/files/ddocent-assembly-test
@@ -0,0 +1,366 @@
+#!/usr/bin/env bash
+
+set -e
+
+##########################################################################
+# Function description:
+# Pause until user presses return
+##########################################################################
+
+pause()
+{
+ local junk
+
+ printf "Press return to continue..."
+ read junk
+}
+
+
+##########################################################################
+# Not necessary if invoking scripts with "bash script-name" as shown
+# in the dDocent tutorial, but supplied for completeness. Tutorial
+# scripts have also been patched upstream to usr /usr/bin/env, but
+# we won't assume they'll all stay that way.
+##########################################################################
+
+fix_bash_path()
+{
+ # Don't echo these commands
+ { set +x; } 2>/dev/null
+ if [ $# != 1 ]; then
+ printf "Usage: $0 script\n"
+ exit 1
+ fi
+ sed -i.bak -e "s|#!/bin/bash|#!/usr/bin/env bash|g" $1
+ set -x
+}
+
+
+##########################################################################
+# Main
+##########################################################################
+
+##########################################################################
+# Workarounds for running dDocent from FreeBSD ports and Pkgsrc.
+# Include this section in any scripts running dDocent commands.
+##########################################################################
+
+# Hack to allow remake_reference.sh, etc. to find trimmomatic.
+# trimmomatic.jar and adapter files are not an executables and should not
+# be in PATH according to filesystem hierarchy standards, but the dDocent
+# scripts are coded to look for them there, because that's how dDocent
+# installs the bundled Trimmomatic.
+if [ -e /usr/local/share/java/classes/trimmomatic.jar ]; then
+ export PATH=${PATH}:/usr/local/share/java/classes:/usr/local/share/trimmomatic/adapters
+elif [ -e $PKGSRC/lib/java/trimmomatic.jar ]; then
+ export PATH=${PATH}:$PKGSRC/lib/java:$PKGSRC/share/trimmomatic/adapters
+else
+ printf "Error: Trimmomatic is not installed in a known location.\n" >> /dev/stderr
+fi
+
+if ! pwd | fgrep dDocent-test; then
+ mkdir -p dDocent-test
+ cd dDocent-test
+fi
+
+# remake_reference.sh and some other scripts use GNU extensions in awk
+# commands. Trick systems into using gawk to get around this without
+# having to patch every script.
+mkdir -p ddocent-bin
+ln -sf `which gawk` ddocent-bin/awk
+ln -sf `which python2.7` ddocent-bin/python
+export PATH=`pwd`/ddocent-bin:$PATH
+
+##########################################################################
+# Actual dDocent commands below
+##########################################################################
+
+##########################################################################
+# Command sequence from the dDocent tutorial on Github. See link below.
+##########################################################################
+
+cat << EOM
+
+This script runs the test commands described at
+
+http://ddocent.com/assembly
+
+You may want to follow along on this web page as the script runs. It will
+pause after each step to allow checking the output.
+
+EOM
+pause
+
+mkdir -p Data
+cd Data
+
+printf "Downloading and unpacking data.zip...\n"
+if [ -e data.zip ]; then
+ mv data.zip /tmp/dDocent-data.zip
+ rm -rf *
+ mv /tmp/dDocent-data.zip data.zip
+else
+ rm -rf *
+ curl --insecure -L -o data.zip \
+ 'https://www.dropbox.com/s/t09xjuudev4de72/data.zip?dl=0'
+fi
+
+# Hack: current data.zip contains data.zip. ??!
+if [ ! -e simRRLs2.py ]; then
+ yes | unzip data.zip || true
+fi
+ls -l
+pause
+
+set -x
+head SimRAD.barcodes
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+cut -f2 SimRAD.barcodes > barcodes
+head barcodes
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+process_radtags -1 SimRAD_R1.fastq.gz -2 SimRAD_R2.fastq.gz -b barcodes \
+ -e ecoRI --renz_2 mspI -r -i gzfastq
+ls | fgrep sample_ | head
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+rm *rem*
+{ set +x; } 2>/dev/null
+pause
+
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+Rename_for_dDocent.sh SimRAD.barcodes
+{ set +x; } 2>/dev/null
+
+set -x
+ls *.fq.gz
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+ls *.F.fq.gz > namelist
+sed -i'' -e 's/.F.fq.gz//g' namelist
+AWK1='BEGIN{P=1}{if(P==1||P==2){gsub(/^[@]/,">");print}; if(P==4)P=0; P++}'
+AWK2='!/>/'
+AWK3='!/NNN/'
+PERLT='while (<>) {chomp; $z{$_}++;} while(($k,$v) = each(%z)) {print "$v\t$k\n";}'
+
+cat namelist | parallel --no-notice -j 8 "zcat {}.F.fq.gz | mawk '$AWK1' | mawk '$AWK2' > {}.forward"
+cat namelist | parallel --no-notice -j 8 "zcat {}.R.fq.gz | mawk '$AWK1' | mawk '$AWK2' > {}.reverse"
+cat namelist | parallel --no-notice -j 8 "paste -d '-' {}.forward {}.reverse | mawk '$AWK3' | sed 's/-/NNNNNNNNNN/' | perl -e '$PERLT' > {}.uniq.seqs"
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+cat *.uniq.seqs > uniq.seqs
+for i in {2..20};
+do
+echo $i >> pfile
+done
+cat pfile | parallel --no-notice \
+ "echo -n {}xxx && mawk -v x={} '\$1 >= x' uniq.seqs | wc -l" \
+ | mawk '{gsub("xxx","\t",$0); print;}'| sort -g > uniqseq.data
+rm pfile
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+more uniqseq.data
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+gnuplot << \EOF
+set terminal dumb size 80, 30
+set autoscale
+set xrange [2:20]
+unset label
+set title "Number of Unique Sequences with More than X Coverage (Counted within individuals)"
+set xlabel "Coverage"
+set ylabel "Number of Unique Sequences"
+plot 'uniqseq.data' with lines notitle
+pause -1
+EOF
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+parallel --no-notice -j 8 mawk -v x=4 \''$1 >= x'\' ::: *.uniq.seqs | cut -f2 | perl -e 'while (<>) {chomp; $z{$_}++;} while(($k,$v) = each(%z)) {print "$v\t$k\n";}' > uniqCperindv
+wc -l uniqCperindv
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+for ((i = 2; i <= 10; i++));
+do
+echo $i >> ufile
+done
+
+cat ufile | parallel --no-notice "echo -n {}xxx && mawk -v x={} '\$1 >= x' uniqCperindv | wc -l" | mawk '{gsub("xxx","\t",$0); print;}'| sort -g > uniqseq.peri.data
+rm ufile
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+gnuplot << \EOF
+set terminal dumb size 80, 30
+set autoscale
+unset label
+set title "Number of Unique Sequences present in more than X Individuals"
+set xlabel "Number of Individuals"
+set ylabel "Number of Unique Sequences"
+plot 'uniqseq.peri.data' with lines notitle
+pause -1
+EOF
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+mawk -v x=4 '$1 >= x' uniqCperindv > uniq.k.4.c.4.seqs
+wc -l uniq.k.4.c.4.seqs
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+cut -f2 uniq.k.4.c.4.seqs > totaluniqseq
+mawk '{c= c + 1; print ">Contig_" c "\n" $1}' totaluniqseq > uniq.fasta
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+sed -e 's/NNNNNNNNNN/\t/g' uniq.fasta | cut -f1 > uniq.F.fasta
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+cd-hit-est -i uniq.F.fasta -o xxx -c 0.8 -T 0 -M 0 -g 1
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+mawk '{if ($1 ~ /Cl/) clus = clus + 1; else print $3 "\t" clus}' xxx.clstr | sed 's/[>Contig_,...]//g' | sort -g -k1 > sort.contig.cluster.ids
+paste sort.contig.cluster.ids totaluniqseq > contig.cluster.totaluniqseq
+sort -k2,2 -g contig.cluster.totaluniqseq | sed -e 's/NNNNNNNNNN/\t/g' > rcluster
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+head rcluster
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+cut -f2 rcluster | uniq | wc -l
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+rainbow div -i rcluster -o rbdiv.out
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+rainbow div -i rcluster -o rbdiv.out -f 0.5 -K 10
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+rainbow merge -o rbasm.out -a -i rbdiv.out
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+rainbow merge -o rbasm.out -a -i rbdiv.out -r 2
+{ set +x; } 2>/dev/null
+pause
+
+# If missing fdesc mount for bash, <(echo "E") will not work
+# cat rbasm.out <(echo "E") |sed 's/[0-9]*:[0-9]*://g' | mawk ' {
+set -x
+echo "E" > endfile
+cat rbasm.out endfile |sed 's/[0-9]*:[0-9]*://g' | mawk ' {
+if (NR == 1) e=$2;
+else if ($1 ~/E/ && lenp > len1) {c=c+1; print ">dDocent_Contig_" e "\n" seq2 "NNNNNNNNNN" seq1; seq1=0; seq2=0;lenp=0;e=$2;fclus=0;len1=0;freqp=0;lenf=0}
+else if ($1 ~/E/ && lenp <= len1) {c=c+1; print ">dDocent_Contig_" e "\n" seq1; seq1=0; seq2=0;lenp=0;e=$2;fclus=0;len1=0;freqp=0;lenf=0}
+else if ($1 ~/C/) clus=$2;
+else if ($1 ~/L/) len=$2;
+else if ($1 ~/S/) seq=$2;
+else if ($1 ~/N/) freq=$2;
+else if ($1 ~/R/ && $0 ~/0/ && $0 !~/1/ && len > lenf) {seq1 = seq; fclus=clus;lenf=len}
+else if ($1 ~/R/ && $0 ~/0/ && $0 ~/1/) {seq1 = seq; fclus=clus; len1=len}
+else if ($1 ~/R/ && $0 ~!/0/ && freq > freqp && len >= lenp || $1 ~/R/ && $0 ~!/0/ && freq == freqp && len > lenp) {seq2 = seq; lenp = len; freqp=freq}
+}' > rainbow.fasta
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+cd-hit-est -i rainbow.fasta -o referenceRC.fasta -M 0 -T 0 -c 0.9
+{ set +x; } 2>/dev/null
+pause
+
+remake_reference.sh 4 4 0.90 PE 2
+{ set +x; } 2>/dev/null
+pause
+
+ReferenceOpt.sh
+
+bash ReferenceOpt.sh 4 8 4 8 PE 16
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+bash remake_reference.sh 4 4 0.90 PE 2
+head reference.fasta
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+wc -l reference.fasta
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+mawk '/>/' reference.fasta | wc -l
+{ set +x; } 2>/dev/null
+pause
+
+set -x
+mawk '/^NAATT.*N*.*CCGN$/' reference.fasta | wc -l
+grep '^NAATT.*N*.*CCGN$' reference.fasta | wc -l
+{ set +x; } 2>/dev/null
+pause
+
+printf "Bonus Section: Optimize reference assemblies? (takes a long time) y/[n] "
+read bonus
+if [ 0$bonus = 0y ]; then
+ set -x
+ { set +x; } 2>/dev/null
+ printf "Running dDocent to trim reads.\n"
+ pause
+ set -x
+ cat << EOM | dDocent || true
+yes
+8
+10g
+yes
+no
+no
+no
+bacon%uwm.edu@localhost
+EOM
+ RefMapOpt.sh 4 8 4 8 0.9 64 PE
+ { set +x; } 2>/dev/null
+ pause
+ more mapping.results
+ pause
+fi
diff --git a/ddocent/files/ddocent-assembly-test-cleanup b/ddocent/files/ddocent-assembly-test-cleanup
new file mode 100644
index 0000000000..4189b3bb03
--- /dev/null
+++ b/ddocent/files/ddocent-assembly-test-cleanup
@@ -0,0 +1,8 @@
+#!/bin/sh -e
+
+if pwd | fgrep dDocent-test; then
+ rm -rf .*.bak Data ddocent-bin
+else
+ printf "$0 can only be run in a dDocent-test directory.\n"
+ exit 1
+fi
diff --git a/ddocent/files/ddocent-test b/ddocent/files/ddocent-test
deleted file mode 100644
index 3aa6ab5d3a..0000000000
--- a/ddocent/files/ddocent-test
+++ /dev/null
@@ -1,227 +0,0 @@
-#!/usr/bin/env bash
-
-set -ex
-
-##########################################################################
-# Function description:
-# Pause until user presses return
-##########################################################################
-
-pause()
-{
- set +x
- local junk
-
- printf "Press return to continue..."
- read junk
- set -x
-}
-
-
-##########################################################################
-# Not necessary if invoking scripts with "bash script-name" as shown
-# in the dDocent tutorial, but supplied for completeness. Tutorial
-# scripts have also been patched upstream to usr /usr/bin/env, but
-# we won't assume they'll all stay that way.
-##########################################################################
-
-fix_bash_path()
-{
- # Don't echo these commands
- set +x
- if [ $# != 1 ]; then
- printf "Usage: $0 script\n"
- exit 1
- fi
- sed -i.bak -e "s|#!/bin/bash|#!/usr/bin/env bash|g" $1
- set -x
-}
-
-
-##########################################################################
-# Main
-##########################################################################
-
-##########################################################################
-# Workarounds for running dDocent from FreeBSD ports and Pkgsrc.
-# Include this section in any scripts running dDocent commands.
-##########################################################################
-
-# Hack to allow remake_reference.sh, etc. to find trimmomatic.
-# trimmomatic.jar and adapter files are not an executables and should not
-# be in PATH according to filesystem hierarchy standards, but the dDocent
-# scripts are coded to look for them there, because that's how dDocent
-# installs the bundled Trimmomatic.
-if [ -e /usr/local/share/java/classes/trimmomatic.jar ]; then
- export PATH=${PATH}:/usr/local/share/java/classes:/usr/local/share/trimmomatic/adapters
-elif [ -e $PKGSRC/lib/java/trimmomatic.jar ]; then
- export PATH=${PATH}:$PKGSRC/lib/java:$PKGSRC/share/trimmomatic/adapters
-else
- printf "Error: Trimmomatic is not installed in a known location.\n" >> /dev/stderr
-fi
-
-if ! pwd | fgrep dDocent-test; then
- mkdir -p dDocent-test
- cd dDocent-test
-fi
-
-# remake_reference.sh and some other scripts use GNU extensions in awk
-# commands. Trick systems into using gawk to get around this without
-# having to patch every script.
-mkdir -p ddocent-bin
-ln -sf `which gawk` ddocent-bin/awk
-ln -sf `which python2.7` ddocent-bin/python
-export PATH=`pwd`/ddocent-bin:$PATH
-
-##########################################################################
-# Actual dDocent commands below
-##########################################################################
-
-##########################################################################
-# Command sequence from the dDocent tutorial on Github. See link below.
-##########################################################################
-
-cat << EOM
-
-This script runs the test commands described at
-
-https://github.com/jpuritz/dDocent/blob/master/tutorials/Reference%20Assembly%20Tutorial.md
-
-You may want to follow along on this web page as the script runs. It will
-pause after each step to allow checking the output.
-
-EOM
-
-mkdir -p Data
-cd Data
-
-if [ ! -e data.zip ]; then
- curl --insecure -L -o data.zip \
- 'https://www.dropbox.com/s/t09xjuudev4de72/data.zip?dl=0'
-fi
-
-# Hack: current data.zip contains data.zip. ??!
-if [ ! -e simRRLs2.py ]; then
- yes | unzip data.zip || true
-fi
-
-head SimRAD.barcodes
-
-cut -f2 SimRAD.barcodes > barcodes
-head barcodes
-
-process_radtags -1 SimRAD_R1.fastq.gz -2 SimRAD_R2.fastq.gz -b barcodes \
- -e ecoRI --renz_2 mspI -r -i gzfastq
-ls
-
-rm *rem*
-if [ ! -e Rename_for_dDocent.sh ]; then
- curl --insecure -L -O https://github.com/jpuritz/dDocent/raw/master/Rename_for_dDocent.sh
- fix_bash_path Rename_for_dDocent.sh
-fi
-
-bash Rename_for_dDocent.sh SimRAD.barcodes
-ls *.fq.gz
-
-ls *.F.fq.gz > namelist
-sed -i'' -e 's/.F.fq.gz//g' namelist
-AWK1='BEGIN{P=1}{if(P==1||P==2){gsub(/^[@]/,">");print}; if(P==4)P=0; P++}'
-AWK2='!/>/'
-AWK3='!/NNN/'
-PERLT='while (<>) {chomp; $z{$_}++;} while(($k,$v) = each(%z)) {print "$v\t$k\n";}'
-
-cat namelist | parallel --no-notice -j 8 "zcat {}.F.fq.gz | mawk '$AWK1' | mawk '$AWK2' > {}.forward"
-cat namelist | parallel --no-notice -j 8 "zcat {}.R.fq.gz | mawk '$AWK1' | mawk '$AWK2' > {}.reverse"
-cat namelist | parallel --no-notice -j 8 "paste -d '-' {}.forward {}.reverse | mawk '$AWK3' | sed 's/-/NNNNNNNNNN/' | perl -e '$PERLT' > {}.uniq.seqs"
-
-cat *.uniq.seqs > uniq.seqs
-for i in {2..20};
-do
-echo $i >> pfile
-done
-cat pfile | parallel --no-notice "echo -n {}xxx && mawk -v x={} '\$1 >= x' uniq.seqs | wc -l" | mawk '{gsub("xxx","\t",$0); print;}'| sort -g > uniqseq.data
-rm pfile
-cat uniqseq.data
-
-gnuplot << \EOF
-set terminal dumb size 120, 30
-set autoscale
-set xrange [2:20]
-unset label
-set title "Number of Unique Sequences with More than X Coverage (Counted within individuals)"
-set xlabel "Coverage"
-set ylabel "Number of Unique Sequences"
-plot 'uniqseq.data' with lines notitle
-pause -1
-EOF
-
-parallel --no-notice -j 8 mawk -v x=4 \''$1 >= x'\' ::: *.uniq.seqs | cut -f2 | perl -e 'while (<>) {chomp; $z{$_}++;} while(($k,$v) = each(%z)) {print "$v\t$k\n";}' > uniqCperindv
-wc -l uniqCperindv
-
-for ((i = 2; i <= 10; i++));
-do
-echo $i >> ufile
-done
-
-cat ufile | parallel --no-notice "echo -n {}xxx && mawk -v x={} '\$1 >= x' uniqCperindv | wc -l" | mawk '{gsub("xxx","\t",$0); print;}'| sort -g > uniqseq.peri.data
-rm ufile
-gnuplot << \EOF
-set terminal dumb size 120, 30
-set autoscale
-unset label
-set title "Number of Unique Sequences present in more than X Individuals"
-set xlabel "Number of Individuals"
-set ylabel "Number of Unique Sequences"
-plot 'uniqseq.peri.data' with lines notitle
-pause -1
-EOF
-
-mawk -v x=4 '$1 >= x' uniqCperindv > uniq.k.4.c.4.seqs
-wc -l uniq.k.4.c.4.seqs
-
-cut -f2 uniq.k.4.c.4.seqs > totaluniqseq
-mawk '{c= c + 1; print ">Contig_" c "\n" $1}' totaluniqseq > uniq.fasta
-
-sed -e 's/NNNNNNNNNN/\t/g' uniq.fasta | cut -f1 > uniq.F.fasta
-cd-hit-est -i uniq.F.fasta -o xxx -c 0.8 -T 0 -M 0 -g 1
-
-mawk '{if ($1 ~ /Cl/) clus = clus + 1; else print $3 "\t" clus}' xxx.clstr | sed 's/[>Contig_,...]//g' | sort -g -k1 > sort.contig.cluster.ids
-paste sort.contig.cluster.ids totaluniqseq > contig.cluster.totaluniqseq
-sort -k2,2 -g contig.cluster.totaluniqseq | sed -e 's/NNNNNNNNNN/\t/g' > rcluster
-
-cut -f2 rcluster | uniq | wc -l
-
-rainbow div -i rcluster -o rbdiv.out
-rainbow div -i rcluster -o rbdiv.out -f 0.5 -K 10
-rainbow merge -o rbasm.out -a -i rbdiv.out
-rainbow merge -o rbasm.out -a -i rbdiv.out -r 2
-
-# If missing fdesc mount for bash, <(echo "E") will not work
-# cat rbasm.out <(echo "E") |sed 's/[0-9]*:[0-9]*://g' | mawk ' {
-echo "E" > endfile
-cat rbasm.out endfile |sed 's/[0-9]*:[0-9]*://g' | mawk ' {
-if (NR == 1) e=$2;
-else if ($1 ~/E/ && lenp > len1) {c=c+1; print ">dDocent_Contig_" e "\n" seq2 "NNNNNNNNNN" seq1; seq1=0; seq2=0;lenp=0;e=$2;fclus=0;len1=0;freqp=0;lenf=0}
-else if ($1 ~/E/ && lenp <= len1) {c=c+1; print ">dDocent_Contig_" e "\n" seq1; seq1=0; seq2=0;lenp=0;e=$2;fclus=0;len1=0;freqp=0;lenf=0}
-else if ($1 ~/C/) clus=$2;
-else if ($1 ~/L/) len=$2;
-else if ($1 ~/S/) seq=$2;
-else if ($1 ~/N/) freq=$2;
-else if ($1 ~/R/ && $0 ~/0/ && $0 !~/1/ && len > lenf) {seq1 = seq; fclus=clus;lenf=len}
-else if ($1 ~/R/ && $0 ~/0/ && $0 ~/1/) {seq1 = seq; fclus=clus; len1=len}
-else if ($1 ~/R/ && $0 ~!/0/ && freq > freqp && len >= lenp || $1 ~/R/ && $0 ~!/0/ && freq == freqp && len > lenp) {seq2 = seq; lenp = len; freqp=freq}
-}' > rainbow.fasta
-
-cd-hit-est -i rainbow.fasta -o referenceRC.fasta -M 0 -T 0 -c 0.9
-
-if [ ! -e remake_reference.sh ]; then
- curl --insecure -L -O https://github.com/jpuritz/dDocent/raw/master/scripts/remake_reference.sh
- fix_bash_path remake_reference.sh
-fi
-bash remake_reference.sh 4 4 0.90 PE 2
-
-if [ ! -e ReferenceOpt.sh ]; then
- curl --insecure -L -O https://github.com/jpuritz/dDocent/raw/master/scripts/ReferenceOpt.sh
- fix_bash_path ReferenceOpt.sh
-fi
-bash ReferenceOpt.sh 4 8 4 8 PE 16
Home |
Main Index |
Thread Index |
Old Index