## Additional file 1: Text S1 ## General commands used, anything surrounded by square brackets [] are things to replace depending on context ## Basecalling of MinION reads with albacore: full_1dsq_basecaller.py --flowcell FLO-MIN107 --kit SQK-LSK308 --input [PATH/TO/FAST5/FILES] --save_path ./ --worker_threads [38] ## Commands to assemble long read genomes for each of the assemblers used: ## Abruijn: abruijn [PATH/TO/READS] out_nano [COVERAGE_ESTIMATE] --platform nano --threads [56] ## Canu: canu -p [UNIQUE_NAME] genomeSize=12.8m -nanopore-raw [PATH/TO/READS] gnuplotTested=true ## SMARTdenovo: smartdenovo.pl -p [UNIQUE_NAME] [PATH/TO/READS] > [UNIQUE_NAME].mak make -f [UNIQUE_NAME].mak wtcns -t 64 < [UNIQUE_NAME].dmo.lay > [UNIQUE_NAME].fasta ## Commands to polish long read assemblies with Nanopolish: nanopolish index -d [PATH/TO/FAST5/FILES] [PATH/TO/READS] bwa index [PATH/TO/ASSEMBLY/TO/POLISH] bwa mem -x ont2d -t [8] [PATH/TO/ASSEMBLY/TO/POLISH] [PATH/TO/READS] | samtools sort -o reads.sorted.bam -T reads.tmp samtools index reads.sorted.bam python nanopolish_makerange.py [PATH/TO/ASSEMBLY/TO/POLISH] | parallel --results nanopolish.results -P 14 nanopolish variants --consensus [UNIQUE_NAME].{1}.fa -w {1} -r [PATH/TO/READS] -b reads.sorted.bam -g [PATH/TO/ASSEMBLY/TO/POLISH] -t [4] --min-candidate-frequency 0.1 python nanopolish_merge.py [UNIQUE_NAME].*.fa > [UNIQUE_NAME]_genome.fa ## Creating hybrid assemblies from short reads and long read assemblies using pilon: bwa index [GENOME_TO_CORRECT] bwa mem -t [56] [GENOME_TO_CORRECT] [SHORT_READS1] [SHORT_READS2] > [NAME].sam samtools sort [NAME].sam -o [NAME]_sorted.bam samtools index [NAME]_sorted.bam pilon -Xmx200g --genome [GENOME_TO_CORRECT] --frags [NAME]_sorted.bam --output [UNIQUE_NAME] ## Structural variant prediction using ngmlr and sniffles: ngmlr -t [56] -r [HYBRID_ASSEMBLY] -q [LONG_READS] -o [UNIQUE_NAME_ngmlr].sam -x ont sniffles -t [56] --genotype --cluster --report_seq -n -1 -m [ALIGNED_LONG_READS_ngmlr_sorted].bam -v [UNIQUE_NAME_SVs].vcf ## Mapping Giardia AWB Reference proteins to hybrid genomes for gene prediction: exonerate -m protein2genome -q [AWB_PROTEINS].fasta -t [HYBRID_ASSEMBLY].fasta -M [250000] -n 1 --showalignment FALSE --showvulgar FALSE --showtargetgff > [UNIQUE_NAME].txt sed '/^#/d' [UNIQUE_NAME].txt > [UNIQUE_NAME].gff ## Finding genes overlapping structural variant regions: sed '1,2d;$d' [UNIQUE_NAME].gff > [UNIQUE_NAME_2].gff bedtools intersect -a [UNIQUE_NAME_SVs].vcf -b [UNIQUE_NAME_2].gff -wb > [UNIQUE_NAME_intersect_vcf_genesonlyn1gff].txt