-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy path02-align_cluster.sh
More file actions
122 lines (99 loc) · 2.59 KB
/
02-align_cluster.sh
File metadata and controls
122 lines (99 loc) · 2.59 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#!/bin/bash
#
#SBATCH --job-name=RNA-seq
#SBATCH --ntasks=8 # Number of cores/threads
#SBATCH --mem=32000 # Ram in Mb
#SBATCH --partition=production
#SBATCH --time=0-04:00:00
##########################################################################################
# Author: Ben Laufer
# Email: blaufer@ucdavis.edu
##########################################################################################
###################
# Run Information #
###################
start=`date +%s`
hostname
THREADS=${SLURM_NTASKS}
MEM=$(expr ${SLURM_MEM_PER_CPU} / 1024)
echo "Allocated threads: " $THREADS
echo "Allocated memory: " $MEM
################
# Load Modules #
################
export mainPath="/share/lasallelab"
module load star/2.7.3a
module load samtools/1.11
export PYTHON_EGG_CACHE="${mainPath}/programs/CpG_Me"
module load trim_galore/0.6.6
source activate cutadapt-2.10
######################
# Set Up Environment #
######################
directory=${PWD}/
sample=`sed "${SLURM_ARRAY_TASK_ID}q;d" task_samples.txt`
rawpath=${directory}raw_sequences/
mappath=${directory}${sample}
fastq1=${rawpath}${sample}_*_R1_001.fastq.gz
fastq2=${rawpath}${sample}_*_R2_001.fastq.gz
trim1=${sample}_*_val_1.fq.gz
trim2=${sample}_*_val_2.fq.gz
BAM=${sample}_Aligned.sortedByCoord.out.bam
########
# Trim #
########
# Use 2color for NovaSeq and NextSeq, replace with quality for HiSeq and MiSeq
# Should trim for STAR: https://github.com/alexdobin/STAR/issues/455#issuecomment-407539412
mkdir ${mappath}
call="trim_galore \
--paired \
--cores 2 \
--2colour 20 \
--fastqc \
--output_dir ${mappath} \
${fastq1} \
${fastq2}"
echo $call
eval $call
#########
# Align #
#########
# adjust threads and genome directory
# Use zcat command for fastq.gz https://www.biostars.org/p/243683/
# ENCODE options from section 3.3.2 of STAR manual
# Use qauntMode to get GeneCounts for R https://www.biostars.org/p/218995/
cd ${mappath}
call="STAR \
--runThreadN 8 \
--genomeDir /share/lasallelab/genomes/GRCm38/star_150/ \
--readFilesIn ${trim1} ${trim2} \
--readFilesCommand zcat \
--outFilterType BySJout \
--outFilterMultimapNmax 20 \
--alignSJoverhangMin 8 \
--alignSJDBoverhangMin 1 \
--outFilterMismatchNmax 999
--outFilterMismatchNoverReadLmax 0.04 \
--alignIntronMin 20 \
--alignIntronMax 1000000 \
--alignMatesGapMax 1000000 \
--outSAMtype BAM SortedByCoordinate \
--outFileNamePrefix ${sample}_ \
--quantMode GeneCounts"
echo $call
eval $call
#########
# Index #
#########
call="samtools \
index \
-@ 7 \
${BAM}"
echo $call
eval $call
###################
# Run Information #
###################
end=`date +%s`
runtime=$((end-start))
echo $runtime