1.Clone the project
git clone https://github.com/liuchuwei/caNano.git
1.Install conda environment
conda env create -f caNano.yaml
2.prepare tookit:
check and modify the tookit.py file (in 'utils' directory).
1.Basecalling
python 01.basecalling.py -i $fast5 -o $out
2.Resguiggle
preprocess
conda activate tombo
python 02.resquiggle_pre.py -f $fast5 -o $out
annotate_raw_with_fastqs
cat *.fastq > merge.fastq
python 03.resquiggle.py preprocess annotate_raw_with_fastqs \
--fast5-basedir $single \
--fastq-filenames $merge_fastq \
--overwrite \
--processes 8
resquiggling
python 03.resquiggle.py resquiggle $fast5 $reference \
--rna \
--corrected-group RawGenomeCorrected_000 \
--basecall-group Basecall_1D_000 \
--overwrite \
--processes 16 \
--fit-global-scale \
--include-event-stdev
3.Minimap
python 04.minimap.py -i <directory of fastq files> -o <output directory> -r <path of reference>
4.Eventalign: preprocess data for m6anet model, you can skip this step if you don't use m6anet model.
python 05.eventalign.py -f <directory of fast5 files> -o <output directory> \
-fq <path of fastq> -r <path of reference> -bam <path of bam files> -o <output directory>
5.m6a detection
(1) caNano
activate environment
conda activate caNano
preprocess
python caNano.py preprocess --single $single_fast5 -o $output -g $genome.fa -r $transcript.fa -i $gene2transcripts.txt -b $bam
train_read
python caNano.py train_read --mod %mod.tsv --unmod %unmod.tsv --out %output
train_site
python caNano.py predict_read --input %feature.tsv --output %output --model %model
(2) Tombo
activate environment
conda activate tombo
predict
python tombo.py detect_modifications de_novo --fast5-basedirs <directory of fast5 files> \
--statistics-file-basename <output name> \
--corrected-group RawGenomeCorrected_000 \
--processes 16
output
python tombo.py text_output browser_files --fast5-basedirs <directory of fast5 files> \
--statistics-filename <output name> \
--browser-file-basename wt_rrach \
--genome-fasta <path of reference> \
--motif-descriptions RRACH:3:m6A \
--file-types coverage dampened_fraction fraction \
--corrected-group RawGenomeCorrected_000
(3) Mines
activate environment
conda activate Mines
tidy tombo result
awk '{if($0!=null){print $0}}' wt.fraction_modified_reads.plus.wig > wt.wig
wig2bed < wt.wig > wt.fraction_modified_reads.plus.wig.bed --multisplit=mines
predict
python Mines.py --fraction_modified $tombo/wt.fraction_modified_reads.plus.wig.bed \
--coverage $tombo/wt.coverage.plus.bedgraph \
--output wt.bed \
--ref $ref \
--kmer_models $MINES/Final_Models/names.txt
(4) m6anet
data preprocess
m6anet dataprep --eventalign wt_eventalign.txt
--out_dir wt
--n_processes 16
--readcount_max 2000000
predict
m6anet inference --input_dir wt
--out_dir run/wt
--n_processes 16
(5) Nanom6A
list all fast5 files
find single -name "*.fast5" >files.txt
extracting signals
extract_raw_and_feature_fast --cpu=20 --fl=files.txt -o result --clip=10
predict
predict_sites --cpu 20 -i result -o result_final -r data/cc_ref.fa -g data/cc_ref.fa -b data/gene2transcripts.txt --model Nanom6A/model
(6) Eligos
python ELGOS rna_mod -i <bam file> -reg <bed files> -ref <REFERENCE> -m <rBEM5+2 model> -p <output file prefix> \
-o <output file directory> --sub_bam_dir <SUB_BAM_DIR> \
--max_depth 2000000 --min_depth 5 --esb 0 --oddR 1 --pval 1 -t 16
(7) Epinano
extract features
python 07.Epinano_Variants.py -R $ref -b <wt / ko bam file> -s <path to sam2tsv> -n 16 -T t
slide features
python 07.Epinano_slide_feature.py <per.site.csv> 5
preict
python 07.Epinano_Predict.py \
--model $Epinano/models/rrach.q3.mis3.del3.linear.dump \
--predict wt.plus_strand.per.site.5mer.csv \
--columns 8,13,23 \
--out_prefix wt
Distributed under the GPL-2.0 License License. See LICENSE for more information.