### =========================================================================
### Helpers for SELECT'ing stuff from a TxDb object
### -------------------------------------------------------------------------
###
### Nothing in this file is exported.
###


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Low-level helpers (schema agnostic) for building SQL queries
###

.as_qualified <- function(tables, columns) paste(tables, columns, sep=".")

.tables_in_joins <- function(joins)
{
    joins_len <- length(joins)
    stopifnot(joins_len %% 2L == 1L)
    joins[seq(1L, joins_len, by=2L)]
}

.build_SQL_FROM <- function(joins, join_type="INNER")
{
    joins_len <- length(joins)
    stopifnot(joins_len %% 2L == 1L)
    SQL <- joins[[1L]]
    if (joins_len != 1L) {
        ON_idx <- 2L * seq_len(joins_len %/% 2L)
        ON <- joins[ON_idx]
        Rtables <- joins[ON_idx + 1L]
        SQL <- c(SQL, paste0(join_type, " JOIN ", Rtables, " ON (", ON, ")"))
    }
    SQL
}

.build_SQL_WHERE <- function(vals)
{
    if (length(vals) == 0L)
        return("")
    sql <-
      lapply(seq_len(length(vals)), function(i) {
               v <- vals[[i]]
               if (!is.numeric(v))
                 v <- paste0("'", v, "'")
               v <- paste0("(", paste0(v, collapse=","), ")")
               v <- paste0(names(vals)[i], " IN ", v)
               paste0("(", v, ")")
            })
    paste0(unlist(sql), collapse=" AND ")
}

.build_SQL_SELECT <- function(columns, joins, distinct=FALSE,
                              vals=list(), orderby=character(0))
{
    SQL <- "SELECT"
    if (distinct)
        SQL <- c(SQL, "DISTINCT")
    SQL <- c(SQL, paste0(columns, collapse=", "),
             "FROM", .build_SQL_FROM(joins))
    if (length(vals) != 0L)
        SQL <- c(SQL, "WHERE", .build_SQL_WHERE(vals))
    if (length(orderby) != 0L)
        SQL <- c(SQL, "ORDER BY", paste0(orderby, collapse=", "))
    SQL
}


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### TxDb schema
###

.TXDB_CHROMINFO_COLUMNS <- c(
    "_chrom_id",
    "chrom",
    "length",
    "is_circular"
)

.TXDB_TRANSCRIPT_COLUMNS <- c(
    id="_tx_id",
    name="tx_name",
    type="tx_type",
    chrom="tx_chrom",
    strand="tx_strand",
    start="tx_start",
    end="tx_end"
)

.TXDB_EXON_COLUMNS <- c(
    id="_exon_id",
    name="exon_name",
    chrom="exon_chrom",
    strand="exon_strand",
    start="exon_start",
    end="exon_end"
)

.TXDB_CDS_COLUMNS <- c(
    id="_cds_id",
    name="cds_name",
    chrom="cds_chrom",
    strand="cds_strand",
    start="cds_start",
    end="cds_end"
)

.TXDB_SPLICING_COLUMNS <- c(
    "_tx_id",
    "exon_rank",
    "_exon_id",
    "_cds_id"
)

.TXDB_GENE_COLUMNS <- c(
    "gene_id",
    "_tx_id"
)

### Order of tables matters! "transcript" must be before "splicing" and "gene",
### and "exon" and "cds" must be before "splicing". See TXDB_column2table()
### below why.
.TXDB_COLUMNS <- list(
    chrominfo=.TXDB_CHROMINFO_COLUMNS,
    transcript=.TXDB_TRANSCRIPT_COLUMNS,
    exon=.TXDB_EXON_COLUMNS,
    cds=.TXDB_CDS_COLUMNS,
    splicing=.TXDB_SPLICING_COLUMNS,
    gene=.TXDB_GENE_COLUMNS
)

### Tables "transcript", "exon", and "cds", must have these tags (at a minimum).
TXDB_CORE_TAGS <- c("id", "chrom", "strand", "start", "end")

### The "splicing right tables" can be bundled to the "splicing" table with
### a LEFT JOIN using the .TXDB_SPLICING_JOIN_USING columns.
.TXDB_SPLICING_RTABLES <- c("transcript", "exon", "cds")
.TXDB_SPLICING_JOIN_USING <- setNames(c("_tx_id", "_exon_id", "_cds_id"),
                                      .TXDB_SPLICING_RTABLES)
TXDB_SPLICING_BUNDLE <- c("splicing", .TXDB_SPLICING_RTABLES)

TXDB_tables <- function() names(.TXDB_COLUMNS)

TXDB_table_columns <- function(table) .TXDB_COLUMNS[[table]]

### When the same column belongs to more than one table (e.g. "_tx_id",
### "_exon_id", or "_cds_id"), then the table for which the column is a
### primary key is chosen by default. This behavior can be changed by passing
### the name of a table to 'from_table' in which case the priority is given to
### that table.
TXDB_column2table <- function(columns, from_table=NA)
{
    if (length(columns) == 0L)
        return(character(0))
    tables <- sapply(columns,
        function(column) {
            for (table in TXDB_tables()) {
                if (column %in% TXDB_table_columns(table))
                    return(table)
            }
            stop(column, ": unknown db column")
        }
    )
    if (!is.na(from_table))
        tables[columns %in% TXDB_table_columns(from_table)] <- from_table
    tables
}


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### .TXDB_join_tables() and .TXDB_join_splicing_Rtables()
###

.TXDB_join_tables <- function(tables)
{
    tables <- unique(tables)
    if (length(tables) == 1L)
        return(tables)
    if (any(tables %in% c("exon", "cds")))
        tables <- c(tables, "splicing")
    ## Order tables & remove duplicates.
    join_order <- c("transcript", "splicing", "exon", "cds", "gene")
    tables <- intersect(join_order, tables)
    joins <- character(2L * length(tables) - 1L)
    ON_idx <- 2L * seq_len(length(tables) - 1L)
    ON <- sapply(2:length(tables), function(i) {
        Rtable <- tables[[i]]
        if (Rtable == "exon") {
            USING <- "_exon_id"
            Ltable <- "splicing"
        } else if (Rtable == "cds") {
            USING <- "_cds_id"
            Ltable <- "splicing"
        } else {
            USING <- "_tx_id"
            Ltable <- tables[[1L]]
        }
        Lcolumn <- .as_qualified(Ltable, USING)
        Rcolumn <- .as_qualified(Rtable, USING)
        paste(Lcolumn, Rcolumn, sep="=")
    })
    joins[ON_idx] <- ON
    joins[c(1L, ON_idx + 1L)] <- tables
    joins
}

.TXDB_join_splicing_Rtables <- function(tables=character(0))
{
    if (!all(tables %in% TXDB_SPLICING_BUNDLE))
        stop("all tables must be in TXDB_SPLICING_BUNDLE")
    tables <- c("splicing", tables)
    ## Order tables & remove duplicates.
    tables <- intersect(TXDB_SPLICING_BUNDLE, tables)
    if (length(tables) == 1L)
        return(tables)
    joins <- character(2L * length(tables) - 1L)
    ON_idx <- 2L * seq_len(length(tables) - 1L)
    Rtables <- tables[-1L]
    USING <- .TXDB_SPLICING_JOIN_USING[Rtables]
    Lcolumns <- .as_qualified("splicing", USING)
    Rcolumns <- .as_qualified(Rtables, USING)
    ON <- paste(Lcolumns, Rcolumns, sep="=")
    joins[ON_idx] <- ON
    joins[c(1L, ON_idx + 1L)] <- tables
    joins
}


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### The 2 main helpers for SELECT'ing stuff from a TxDb object:
###   - TxDb_SELECT_from_INNER_JOIN()
###   - TxDb_SELECT_from_splicing_bundle()
### They should satisfy the needs of most extractors defined in the package.
###

### The columns in 'columns' + those involved thru 'vals' and 'orderby' are
### collected and their corresponding tables are INNER JOIN'ed.
TxDb_SELECT_from_INNER_JOIN <- function(txdb, table, columns, vals=list(),
                                        orderby=character(0))
{
    tables <- TXDB_column2table(columns, from_table=table)
    where_columns <- names(vals)
    where_tables <- TXDB_column2table(where_columns, from_table=table)
    joins <- .TXDB_join_tables(c(table, tables, where_tables))
    orderby_tables <- TXDB_column2table(orderby, from_table=table)
    stopifnot(all(orderby_tables %in% .tables_in_joins(joins)))
    use_joins <- length(joins) > 1L
    if (use_joins) {
        columns <- .as_qualified(tables, columns)
        names(vals) <- .as_qualified(where_tables, where_columns)
        orderby <- .as_qualified(orderby_tables, orderby)
    }
    ## .build_SQL_SELECT() uses INNER joins.
    SQL <- .build_SQL_SELECT(columns, joins, distinct=use_joins,
                             vals=vals, orderby=orderby)
    queryAnnotationDb(txdb, SQL)
}

### Can only involve columns (thru 'columns', 'vals', and 'orderby') that
### belong to the tables in TXDB_SPLICING_BUNDLE at the moment.
TxDb_SELECT_from_splicing_bundle <- function(txdb, columns,
                                             vals=list(), orderby=character(0),
                                             join_type="LEFT")
{
    tables <- TXDB_column2table(columns, from_table="splicing")
    where_columns <- names(vals)
    where_tables <- TXDB_column2table(where_columns, from_table="splicing")
    orderby_tables <- TXDB_column2table(orderby, from_table="splicing")
    joins <- .TXDB_join_splicing_Rtables(c(tables, where_tables,
                                           orderby_tables))
    use_joins <- length(joins) > 1L
    if (use_joins) {
        columns <- .as_qualified(tables, columns)
        names(vals) <- .as_qualified(where_tables, where_columns)
        orderby <- .as_qualified(orderby_tables, orderby)
    }
    ## .build_SQL_SELECT() would use INNER joins but we need to override this
    ## to use the type of join specified by the user.
    from <- paste0(.build_SQL_FROM(joins, join_type), collapse=" ")
    SQL <- .build_SQL_SELECT(columns, from, distinct=FALSE,
                             vals=vals, orderby=orderby)
    queryAnnotationDb(txdb, SQL)
}

