Difference between revisions of "ABySS"

From IBERS Bioinformatics and HPC Wiki
Jump to: navigation, search
Line 36: Line 36:
 
make
 
make
 
make install
 
make install
 +
    </nowiki>
 +
 +
==preparing reads==
 +
Before assembling my Illumina reads, I first used trimmomatic to remove adapter sequences. I then organised the fastq files into libraries, one for each fragment size.
 +
 +
I then used a custom python script to process the mate pair libraries to filter out read pairs which did not contain a valid mate pair.
 +
 +
I then renamed the reads from the default Illumina name to ABySS compatible names. ABySS expected the first read of a pair to end with /1, and the second read to have the same name but end with /2. I used my own python script to do this renaming.
 +
 +
==running==
 +
 +
I used the following script to assemble my Illumina paired end and mate pair data.
 +
 +
    <nowiki>
 +
#!/bin/sh
 +
 +
#$ -S /bin/sh
 +
#$ -N abyss
 +
#$ -o ../logs/$JOB_NAME.out.$JOB_ID
 +
#$ -e ../logs/$JOB_NAME.err.$JOB_ID
 +
#$ -cwd
 +
#$ -l h_vmem=245G
 +
 +
module load openmpi
 +
 +
SLOTS=16
 +
RDIR='/ibers/ernie/scratch/rov/large_data/avena_renamed/'
 +
ABYSSPE='/ibers/ernie/home/rov/programs/abyss-local/bin/abyss-pe'
 +
KMER=51
 +
OUT="k${KMER}-001"
 +
HOSTFILE='./myhostfile'
 +
export PATH=${PATH}:/ibers/ernie/home/rov/programs/abyss-local/bin
 +
#export TMPDIR=/ibers/ernie/scratch/rov/abyss_atlantica_assembly_2013-08-08/tmp
 +
export TMPDIR=/dev/shm/rov-abyss-tmp/k51_001
 +
 +
rm -rf ${TMPDIR}
 +
mkdir -p ${TMPDIR}
 +
 +
mkdir -p $OUT
 +
cd $OUT
 +
echo `hostname` slots=${SLOTS} > ${HOSTFILE}
 +
 +
${ABYSSPE} \
 +
np=${SLOTS} mpirun="mpirun -hostfile ${HOSTFILE}" \
 +
k=${KMER} \
 +
name=${OUT} \
 +
lib='pe200 pe700 peMP1' \
 +
mp='mpMP1' \
 +
pe200="${RDIR}pe200-R1.fq ${RDIR}pe200-R2.fq" \
 +
pe700="${RDIR}pe700-R1.fq ${RDIR}pe700-R2.fq" \
 +
peMP1="${RDIR}peMP1-R1.fq ${RDIR}peMP1-R2.fq" \
 +
mpMP1="${RDIR}mpMP1-R1.fq ${RDIR}mpMP1-R2.fq" \
 +
se="${RDIR}se.fq"
 +
 +
rm -rf ${TMPDIR}
 
     </nowiki>
 
     </nowiki>

Revision as of 09:05, 29 August 2013

ABySS is a denovo assembler of short read data.

installing

I used the following script to install abyss in my home folder (/home/rov). I first had to install sparsehash and boost libraries.

   
#/bin/sh

#make and install abyss in my local directory
#cd into abyss source distribution dir
#and run this script using ../build_abyss.sh

load module openmpi

MYDIR="/ibers/ernie/home/rov/programs"

#boost does not need to be compiled
RJVBOOST="--with-boost=${MYDIR}/boost_1_54_0"

#found using find / 2> /dev/null | grep openmpi
RJVOPENMPI="--with-mpi=/cm/shared/apps/openmpi/gcc/64/1.4.4"

#see https://groups.google.com/forum/#!msg/abyss-users/6NXwP959RTI/tqLtO14a4A8J
RJVLDFLAGS="LDFLAGS=-L/cm/shared/apps/openmpi/gcc/64/1.4.4/lib64"

#probably only one of these required
RJVCPPFLAGS="-I${MYDIR}/sparsehash-2.0.2/src/google/sparsehash -I${MYDIR}/sparsehash-2.0.2/src/google -I${MYDIR}/sparsehash-2.0.2/src"

#where to put abyss binaries
RJVINSTALL="--prefix=${MYDIR}/abyss-local"

#ensure max kmer size is > 64
RJVMAXK='--enable-maxk=96'

./configure ${RJVINSTALL} ${RJVBOOST} ${RJVOPENMPI} ${RJVMAXK} ${RJVLDFLAGS} CPPFLAGS="${RJVCPPFLAGS}"
make
make install
    

preparing reads

Before assembling my Illumina reads, I first used trimmomatic to remove adapter sequences. I then organised the fastq files into libraries, one for each fragment size.

I then used a custom python script to process the mate pair libraries to filter out read pairs which did not contain a valid mate pair.

I then renamed the reads from the default Illumina name to ABySS compatible names. ABySS expected the first read of a pair to end with /1, and the second read to have the same name but end with /2. I used my own python script to do this renaming.

running

I used the following script to assemble my Illumina paired end and mate pair data.

   
#!/bin/sh

#$ -S /bin/sh
#$ -N abyss
#$ -o ../logs/$JOB_NAME.out.$JOB_ID
#$ -e ../logs/$JOB_NAME.err.$JOB_ID
#$ -cwd
#$ -l h_vmem=245G

module load openmpi

SLOTS=16
RDIR='/ibers/ernie/scratch/rov/large_data/avena_renamed/'
ABYSSPE='/ibers/ernie/home/rov/programs/abyss-local/bin/abyss-pe'
KMER=51
OUT="k${KMER}-001"
HOSTFILE='./myhostfile'
export PATH=${PATH}:/ibers/ernie/home/rov/programs/abyss-local/bin
#export TMPDIR=/ibers/ernie/scratch/rov/abyss_atlantica_assembly_2013-08-08/tmp
export TMPDIR=/dev/shm/rov-abyss-tmp/k51_001

rm -rf ${TMPDIR}
mkdir -p ${TMPDIR}

mkdir -p $OUT
cd $OUT
echo `hostname` slots=${SLOTS} > ${HOSTFILE}

${ABYSSPE} \
np=${SLOTS} mpirun="mpirun -hostfile ${HOSTFILE}" \
k=${KMER} \
name=${OUT} \
lib='pe200 pe700 peMP1' \
mp='mpMP1' \
pe200="${RDIR}pe200-R1.fq ${RDIR}pe200-R2.fq" \
pe700="${RDIR}pe700-R1.fq ${RDIR}pe700-R2.fq" \
peMP1="${RDIR}peMP1-R1.fq ${RDIR}peMP1-R2.fq" \
mpMP1="${RDIR}mpMP1-R1.fq ${RDIR}mpMP1-R2.fq" \
se="${RDIR}se.fq"

rm -rf ${TMPDIR}