#!/bin/sh

##########################################################################
#   Description:
#       Simple menu system for running microsynteny tools
#
#   Conventions:
#       Global variables capitalized, local all lower-case
#       Constants all caps
#
#   History:
#   Date        Name        Modification
#   2022-04-12  Jason Bacon Begin
##########################################################################

usage()
{
    printf "Usage: $0\n"
    exit 1
}


##########################################################################
#   Function description:
#       Pause until user presses return
##########################################################################

pause()
{
    local junk
    
    printf "Press return to continue..."
    read junk
}


##########################################################################
#   Function description:
#       
#   Arguments:
#       
#   Returns:
#       
#   History:
#   Date        Name        Modification
#   2022-04-19  Jason Bacon Begin
##########################################################################

extracted_genes()
{
    local genes=$(ls Regions | cut -d - -f 2 | sort -u | tr '\n' ' ')
    printf "Extracted genes:\n\n$genes\n\n"
    return 0
}


##########################################################################
#   Function description:
#       
#   Arguments:
#       
#   Returns:
#       
#   History:
#   Date        Name        Modification
#   2022-04-19  Jason Bacon Begin
##########################################################################

available_genes()
{
    if [ $# != 1 ]; then
	printf "Usage: available_genes gene-list-filename\n"
	exit 1
    fi
    printf "Available genes:\n\n$(cut -d '|' -f 1 $1 | tr '\n' ' ')\n\n"
    return 0
}


##########################################################################
#   Function description:
#       
#   Arguments:
#       
#   Returns:
#       
#   History:
#   Date        Name        Modification
#   2022-04-16  Jason Bacon Begin
##########################################################################

get_adjacent_genes_max_nt()
{
    Adjacent_genes=$(auto-ask adjacent-genes "Adjacent genes?" 5)
    Max_nt=$(auto-ask max-nt "Max distance in NT?" 300000)
}


##########################################################################
#   Function description:
#       
#   Arguments:
#       
#   Returns:
#       
#   History:
#   Date        Name        Modification
#   2022-04-16  Jason Bacon Begin
##########################################################################

get_gene_list_filename()
{
    printf '\nAvailable gene lists (copy and paste one filename):\n\n'
    cd Genes
    ls *-ortho.txt
    cd ..
    local default=de-tf-refined-ortho.txt
    Gene_list_filename=$(auto-ask gene-list-filename "\nGene list?" $default)
    Gene_list_filename="Genes/$Gene_list_filename"
}


##########################################################################
#   Main
##########################################################################

if [ $# != 0 ]; then
    usage
fi

export AUTO_ASK_TAG_PREFIX=microsynteny-tools-

if [ ! -d Genes ]; then
    mkdir Genes
    cp /usr/pkg/share/microsynteny-tools/Genes/*-ortho.txt Genes
fi

if [ -z "$AWK_PATH" ]; then
    if [ $0 = Scripts/ms-menu ]; then
	# Development: Running from repo dir
	export PATH=$(pwd):$(pwd)/Scripts:$PATH
	export AWK_PATH=$(pwd)/Libexec
	make
    elif [ $0 = ./ms-menu ] && [ $(pwd | cut -d / -f -1) = microsynteny-tools ]; then
	# Development: Running from repo dir
	cd ..
	export PATH=$(pwd):$(pwd)/Scripts:$PATH
	export AWK_PATH=$(pwd)/Libexec
	make
    else
	# Installed
	export AWK_PATH=/usr/pkg/libexec
    fi
fi

selection=''
while [ 0"$selection" != 0q ]; do
    clear
    # Banner generated at https://www.ascii-art-generator.org
    cat << EOM
	   __  ____                                  __                   
	  /  |/  (_)_____________  _______  ______  / /____  ____  __  __
	 / /|_/ / / ___/ ___/ __ \/ ___/ / / / __ \/ __/ _ \/ __ \/ / / /
	/ /  / / / /__/ /  / /_/ (__  ) /_/ / / / / /_/  __/ / / / /_/ /
       /_/  /_/_/\___/_/   \____/____/\__, /_/ /_/\__/\___/_/ /_/\__, /
				     /____/                     /____/
			     ______            __
			    /_  __/___  ____  / /____
			     / / / __ \/ __ \/ / ___/
			    / / / /_/ / /_/ / (__  )
			   /_/  \____/\____/_/____/

1.. Download Ensembl GFFs, transcriptomes and genomes (may take a while)
2.. Extract regions
3.. Find intersections between regions across fish and mammals
4.. View stacked regions across species (scale discarded)
5.. Visualize regions in scale
6.. Locate genes in one or more GFF files
7.. Build/download BLAST databases
8.. BLAST a Danio gene against another species
9.. Show percent changed stats for fish and mammals
    (Extract regions for de-tf-ortho.txt and non-de-tf-ortho.txt first)
Q.. Quit

EOM
    printf "Selection? "
    read selection
    case "$selection" in
    1)  # Download genomics files
	mkdir -p GFF Transcriptome Reference
	cd GFF
	ms-fetch-gffs
	cd ../Transcriptome
	ms-fetch-cdnas
	cd ../Reference
	ms-fetch-refs
	cd ..
	;;
    
    2)  # Extract
	cat << EOM

For computing intersects, a large gene count (e.g. 10) and moderate max
distance (e.g. 500,000) is suggested.  Some regions are densely packed with
many neighboring genes within range to contain regulatory elements.

For visualizing stacked regions, a small gene count (e.g. 5 or less) and
large max distance (e.g. 1,000,000) may be preferable to avoid overpacking
the plot or missing distant neighbors.

EOM
	get_adjacent_genes_max_nt
	get_gene_list_filename
	mkdir -p Output
	outfile=Output/$(basename ${Gene_list_filename%.txt}-$Adjacent_genes-$Max_nt-extract.out)
	ms-gene-list-extract \
	    --adjacent-genes $Adjacent_genes --max-nt-distance $Max_nt \
	    $Gene_list_filename > $outfile
	printf "\nSummary output saved in $outfile.\n"
	printf "Region GFFs stored in Regions directory.\n"
	pause
	more $outfile
	;;
    
    3)  # Intersect
	get_adjacent_genes_max_nt
	get_gene_list_filename
	cd Regions
	printf "\nCopy and paste species names separated by space.\n\n"
	ls | cut -d - -f 1 | sort | uniq
	cd ..
	export FISH=$(auto-ask intersect-fish-species "\nFish species?" 'Danio_rerio Oryzias_latipes Takifugu_rubripes')
	export MAMMALS=$(auto-ask intersect-mammal-species "Mammal species?" 'Mus_musculus Rattus_norvegicus Homo_sapiens')
	mkdir -p Output
	outfile=Output/$(basename ${Gene_list_filename%.txt}-intersect.out)
	ms-gene-list-fish-mammal-intersect \
	    $Adjacent_genes $Max_nt $Gene_list_filename \
	    > $outfile
	printf "\nSummary output saved in $outfile.\n"
	printf "Intersection files stored in Intersects-* directories.\n"
	pause
	more $outfile
	;;
    
    4)  # View stacked
	get_gene_list_filename
	available_genes $Gene_list_filename
	gene=$(auto-ask stack-gene-name "Gene name (case-sensitive)?" jun)
	if [ 0"$gene" != 0 ]; then
	    printf '\nAvailable gene regions:\n\n'
	    ls Regions/*-$gene-*.gff3
	    cat << EOM

The last two numbers in each filename above indicate the number of adjacent
genes and maximum distance in NT for that region.  Choose from among these to
answer the next two questions.  You can also generate additional region
files by going back to option 2.

EOM
	    get_adjacent_genes_max_nt
	    ms-stack $Adjacent_genes $Max_nt $Gene_list_filename $gene
	fi
	;;
    
    5)  # View in scale
	get_gene_list_filename
	available_genes $Gene_list_filename
	gene=$(auto-ask scale-gene-name "Gene name (case-sensitive)?" jun)
	if [ 0"$gene" != 0 ]; then
	    printf '\nAvailable gene regions:\n\n'
	    ls Regions/*-$gene-*.gff3
	    cat << EOM

The last two numbers in each filename above indicate the number of adjacent
genes and maximum distance in NT for that region.  Choose from among these to
answer the next two questions.  You can also generate additional region
files by going back to option 2.

EOM
	    get_adjacent_genes_max_nt
	    cd Regions
	    printf "\nCopy and paste species names separated by space.\n\n"
	    ls | cut -d - -f 1 | sort | uniq
	    cd ..
	    species=$(auto-ask scale-species-list "\nSpecies? [Leave blank for all]" '')
	    ms-plot $Adjacent_genes $Max_nt $gene $species
	fi
	;;

    6)  # Locate in GFFs
	printf "\nHint: Use a portion of a gene name to allow orthologs, e.g. sox11 vs sox11a\n\n"
	gene_list=$(auto-ask locate-gene-list "Gene names (separated by space)?" '')
	if [ -n "$gene_list" ]; then
	    printf "\nAvailable species:\n\n"
	    cd GFF
	    ls *.gff3 | cut -d . -f 1
	    cd ..
	    default_species="Danio_rerio Oryzias_latipes Takifugu_rubripes Homo_sapiens Mus_musculus Rattus_norvegicus"
	    species_list=$(auto-ask locate-species "\nSpecies?" "$default_species]" "$default_species")
	    if [ 0"$species_list" = 0 ]; then
		species_list="$default_species"
	    fi
	    for species in $species_list; do
		gff="GFF/$species.*.gff3"
		ms-locate-gene $gff $gene_list
	    done
	fi
	;;
    
    7)  # Create/download BLAST dbs
	ms-blast-dbs
	;;
    
    8)  # BLAST search
	ms-blast
	;;
    
    9)  # DE vs non-DE stats
	printf '\nAvailable gene regions:\n\n'
	ls Regions/*-jun-*.gff3
	cat << EOM

The last two numbers in each filename above indicate the number of adjacent
genes and maximum distance in NT for that region.  Choose from among these to
answer the next two questions.  You can also generate additional region
files by going back to option 2.

EOM
	get_adjacent_genes_max_nt
	ms-fish-mammal-percent-changed $Adjacent_genes $Max_nt
	;;

    Q|q)
	exit 0
	;;
	
    *)
	printf "Invalid selection: $selection\n"
	;;
    esac
    pause
done
