from argparse import ArgumentParser

import os
import numpy as np
import xlwt
from xlwt import Worksheet, Column
from xlwt.Utils import rowcol_to_cell, rowcol_pair_to_cellrange


def get_stat_and_formulas():
    stats = ["mean", "std", "min", "quart1", "med", "quart3", "max"]
    formulas_str = [  # string containing the formula to fill with the cell range
        "AVERAGE({})",
        "STDEV({})",
        "QUARTILE({}; 0)",
        "QUARTILE({}; 1)",
        "QUARTILE({}; 2)",
        "QUARTILE({}; 3)",
        "QUARTILE({}; 4)"
    ]
    return stats, formulas_str


def formula_range(formula, r1, r2, c1, c2):
    return xlwt.Formula("{}({})".format(formula, rowcol_pair_to_cellrange(r1, c1, r2, c2)))


def vlookup(lookup_row, lookup_col, search_r1, search_r2, search_c1, search_c2):
    """Generate a VLOOKUP which takes the final value in the last column of the search range"""
    return xlwt.Formula("VLOOKUP({};{};{};{})".format(
        rowcol_to_cell(lookup_row, lookup_col, col_abs=True),
        rowcol_pair_to_cellrange(search_r1, search_c1, search_r2, search_c2, row1_abs=True, row2_abs=True, col2_abs=True),
        search_c2 - search_c1 + 1,
        "FALSE"
    ))


def write_meta(sheet, row_offset, col_offest, meta):
    offset_metric = [
        ("cv_best_score", 0),
        ("min_samples_leaf", 1),
        ("max_features", 2),
        ("accuracy", 4),
        ("precision", 5),
        ("recall", 6),
        ("f1-score", 7),
        ("roc_auc", 8)
    ]

    for metric, offset in offset_metric:
        if metric in meta:
            sheet.write(row_offset + offset, col_offest, float(meta[metric]))


def get_existing_file_path(path, model, dataset):
    without_source = "{}_{}_importances.npz".format(model, dataset)
    with_source = "{}_image_net_{}_importances.npz".format(model, dataset)
    if os.path.isfile(os.path.join(path, without_source)):
        return without_source
    else:
        return with_source


def main(argv):
    parser = ArgumentParser()

    # Model loading and saving
    parser.add_argument("--path", dest="path")
    parser.add_argument("--models", dest="models")
    parser.add_argument("--datasets", dest="datasets")
    parser.add_argument("--dest", dest="dest", default=".")
    params, unknown = parser.parse_known_args(argv)
    params.models = params.models.split(",")
    params.datasets = params.datasets.split(",")
    print("Parameters: {}".format(params))

    book = xlwt.Workbook()
    n_datasets = len(params.datasets)

    for model in params.models:
        print("Model '{}'".format(model))
        print("> loads importances")
        filenames = {dataset: get_existing_file_path(params.path, model, dataset) for dataset in params.datasets}
        filedata = {dataset: np.load(os.path.join(params.path, filename)) for dataset, filename in filenames.items()}
        importances = {dataset: data["importances"] for dataset, data in filedata.items()}

        n_features = importances.values()[0].shape[0]
        print("> n_features: {}".format(n_features))

        sheet = book.add_sheet(model)

        # block labels (rank, importance)
        impo_col_offset = 1
        rank_col_offset = impo_col_offset + n_datasets + 2
        rank_stat_col_offset = rank_col_offset + n_datasets + 2
        sheet.write_merge(0, 0, impo_col_offset, impo_col_offset + n_datasets - 1, "importances")
        sheet.write_merge(0, 0, rank_col_offset, rank_col_offset + n_datasets - 1, "rank")
        sheet.write_merge(0, 0, rank_stat_col_offset, rank_stat_col_offset + 1, "rank stats")

        # row label
        sheet.write(1, 0, "feature")
        for i in range(n_features):
            sheet.write(i + 2, impo_col_offset - 1, "feature_{}".format(i + 1))
            sheet.write(i + 2, rank_col_offset - 1, "feature_{}".format(i + 1))
            sheet.write(i + 2, rank_col_offset + n_datasets, "feature_{}".format(i + 1))  # for VLOOKUP with rankings

        # impo statistics labels
        stat_row_offset = n_features + 4
        stats, formulas = get_stat_and_formulas()
        for i, stat in enumerate(stats):
            sheet.write(stat_row_offset + i, 0, stat)

        # metadata label
        meta_row_offset = stat_row_offset + len(stats) + 2
        sheet.write(meta_row_offset, 0, "cv acc")
        sheet.write(meta_row_offset + 1, 0, "min_samples_leaf")
        sheet.write(meta_row_offset + 2, 0, "max_features")
        sheet.write(meta_row_offset + 4, 0, "accuracy")
        sheet.write(meta_row_offset + 5, 0, "precision")
        sheet.write(meta_row_offset + 6, 0, "recall")
        sheet.write(meta_row_offset + 7, 0, "f1-score")
        sheet.write(meta_row_offset + 8, 0, "roc auc")

        # ranking
        most_imp_row_offset = stat_row_offset
        least_imp_row_offset = most_imp_row_offset + 11
        sheet.write_merge(most_imp_row_offset, most_imp_row_offset + 9, rank_col_offset - 2, rank_col_offset - 2, "Most imp.")
        sheet.write_merge(least_imp_row_offset, least_imp_row_offset + 9, rank_col_offset - 2, rank_col_offset - 2, "Least imp.")
        for i in range(10):
            sheet.write(most_imp_row_offset + i, rank_col_offset - 1, i + 1)
            sheet.write(least_imp_row_offset + i, rank_col_offset - 1, n_features - 9 + i)

        # rank_stat
        rank_min_col_offset = rank_stat_col_offset
        rank_max_col_offset = rank_stat_col_offset + 1
        sheet.write(1, rank_min_col_offset, "Min. rank")
        sheet.write(1, rank_max_col_offset, "Max. rank")
        for j in range(n_features):
            sheet.write(2 + j, rank_min_col_offset, formula_range("MIN", 2 + j, 2 + j, rank_col_offset, rank_col_offset + n_datasets - 1))
            sheet.write(2 + j, rank_max_col_offset, formula_range("MAX", 2 + j, 2 + j, rank_col_offset, rank_col_offset + n_datasets - 1))

        rank_stat_stat_row_offset = n_features + 4
        rank_stat_stats = [
            (rank_stat_stat_row_offset, rank_min_col_offset, "MIN"),
            (rank_stat_stat_row_offset + 1, rank_min_col_offset, "MAX"),
            (rank_stat_stat_row_offset, rank_max_col_offset, "MIN"),
            (rank_stat_stat_row_offset + 1, rank_max_col_offset, "MAX")
        ]
        for row, col, stat_formula in rank_stat_stats:
            sheet.write(row, col, formula_range(stat_formula, 2, 1 + n_features, col, col))

        for i, dataset in enumerate(params.datasets):
            print "> dataset '{}'".format(dataset),
            i_col_offset = impo_col_offset + i
            r_col_offset = rank_col_offset + i
            sheet.write(1, i_col_offset, dataset)
            sheet.write(1, r_col_offset, dataset)

            importance = importances[dataset]
            rank = np.argsort(np.negative(importance))  # negate so that 1 is the rank of the most informative feature

            # values
            print "values",
            for j in range(n_features):
                sheet.write(2 + j, i_col_offset, importance[j])
                sheet.write(2 + j, r_col_offset, int(rank[j]) + 1)

            # stats
            print "stats",
            imp_cell_range = rowcol_pair_to_cellrange(2, i_col_offset, 1 + n_features, i_col_offset)
            for j, formula in enumerate(formulas):
                sheet.write(stat_row_offset + j, i_col_offset, xlwt.Formula(formula.format(imp_cell_range)))

            # meta
            print "meta",
            write_meta(sheet, meta_row_offset, i_col_offset, filedata[dataset])

            # rankings
            print "ranks"
            lookup_col = rank_col_offset - 1
            search_col2 = rank_col_offset + n_datasets
            search_row1, search_row2 = 2, 2 + n_features - 1

            for j in range(10):
                # most informative
                most_lookup = vlookup(
                    lookup_row=most_imp_row_offset + j, lookup_col=lookup_col,
                    search_r1=search_row1, search_r2=search_row2,
                    search_c1=r_col_offset, search_c2=search_col2
                )
                least_lookup = vlookup(
                    lookup_row=least_imp_row_offset + j, lookup_col=lookup_col,
                    search_r1=search_row1, search_r2=search_row2,
                    search_c1=r_col_offset, search_c2=search_col2
                )
                sheet.write(most_imp_row_offset + j, r_col_offset, most_lookup)
                sheet.write(least_imp_row_offset + j, r_col_offset, least_lookup)



    book.save(os.path.join(os.path.join(params.path, "wkb.xls")))

if __name__ == "__main__":
    import sys
    main(sys.argv[1:])
