# ======================
# ==  Clean the mess  ==
# ======================

rm(list = ls())


# ==============================
# ==  Define the working dir  ==
# ==============================

import_folder <- ("C:\\JavaSTICS-1.40-stics-8.50\\ClimateChange_folder")

export_folder <- ("C:\\Users\\admin\\Dropbox\\Academ_Enseignement\\AGRO0023-1-SmartFarming\\TP_CropModel_ClimateChange\\Results")


# ==============================
# ==  Load libraries  ==
# ==============================

 # install.packages ('ggplot2')
 # install.packages ('agricolae')
 # install.packages ('dgof')
 # install.packages ('kSamples')
 # install.packages ('GGally')
 # install.packages ('FactoMineR')
 # install.packages ('factoextra')
 # install.packages ('ggforce')
 # install.packages ('ggfortify')


library('ggplot2')
library('agricolae')
library('dgof')
library('kSamples')
library('GGally')
library('FactoMineR')
library('factoextra')
library('ggforce')
library('ggfortify')


# =======================
# ==  import mod_sUSM  ==
# =======================

setwd (import_folder)

flag_first_import <- TRUE

RCP <- c('00','45','85')

for (rcp in RCP) {
  
  if (rcp=='00'){
    HT <- c('00')
  } else {
    HT <- c('55','85')
  } 
  
  for (ht in HT) {
    
    if (rcp=='00'){
      init_year <- 1980
    } else {
      if (ht=='55'){
        init_year <- 2040
      } else if (ht=='85'){
        init_year <- 2070
      }
    }
    
    
    # for(yy in c(init_year : (init_year+28))  ){
    for(yy in c(init_year : (init_year+28))  ){
    
    
      # Define USM name
      name_usm <- sprintf('%s%s%s%s%s%d%s', 'mod_sUSM_rcp_',rcp,'_ht_',ht,'_', yy+1,'.sti')
      
      # import .sti as csv file
      tmp <- read.csv(name_usm, 
                      header = TRUE, 
                      sep = ";", 
                      dec = ",",
                      encoding = "unknown")
      
      # convert dataframe column
      for ( ii in c(1:length(tmp[1,])) ) {
        tmp[,ii] <- as.numeric(tmp[,ii])
      }
      
      # create DOY 
      
      tmp$DOY = c(tmp$jul[1]: (tmp$jul[1]+length(tmp$jul)-1) )
      
      # add meta data relative to climate
      tmp$RCP <- as.factor (sprintf('%s%s','rcp_',rcp))
      tmp$HT <- as.factor (sprintf('%s%s','ht_',ht))
      tmp$RCP.HT <- as.factor (sprintf('%s%s%s%s','rcp_',rcp,'_ht_',ht))
      tmp$HarvYear <- as.factor (sprintf('%s%d','year_',yy))
      tmp$ClimSerie <- as.factor (sprintf('%s%d','yearrel_',yy-init_year+1))
      tmp$RCP.HT.year <- as.factor (sprintf('%s%s%s%s%s%d','rcp_',rcp,'_ht_',ht,'_year_',yy-init_year+1))
      
      
      # Create empty dataframe
      if ( flag_first_import == TRUE ) {
      
        Head_sti <- colnames(tmp)
        
        # my_df <- setNames(data.frame(matrix(ncol = length(Head_sti), nrow = 0)), Head_sti)
        my_df <- data.frame(matrix(ncol = length(Head_sti), nrow = 0))
        colnames(my_df) <- Head_sti
        
        for (ii in c(1:length(tmp[1,])) ) {
          
          Type_sti <- class(tmp[1,ii])

          if (Type_sti == 'numeric') {
            my_df[, ii] <- as.numeric(my_df[, ii])
          }else if (Type_sti == 'factor'){
            my_df[, ii] <- as.factor(my_df[, ii])
          }
            
          # attributes(obj = my_df[, ii])$class <- Type_sti
        
          }
        
        flag_first_import <- FALSE
      }
      
      # Merge dataframes
      my_df <- rbind(my_df,tmp)

      
    } # close yy
    
  } # close ht
  
}  # close rcp


# =====================================================
# ==  Preliminary Analysis of  simulations mod_sUSM  ==
# =====================================================

setwd (export_folder)


#  Plot some graphs
# ==================

gg_lin_AGB <- ggplot()+
              geom_line(data = my_df, aes(x=DOY,y=masec.n., group = RCP.HT.year, colour = factor(RCP.HT)))+
              ylab("Aboveground biomass [ton/ha]") + xlab("Day of the year [DOY]")+
              theme()
print (gg_lin_AGB)


gg_lin_yield <- ggplot()+
            geom_line(data = my_df, aes(x=DOY,y=mafruit, group = RCP.HT.year, colour = factor(RCP.HT)))+
            ylab("Yield [ton/ha]") + xlab("Day of the year [DOY]")+
            theme()
print (gg_lin_yield)


gg_lin_resmes <- ggplot()+
            geom_line(data = my_df, aes(x=DOY,y=resmes, group = RCP.HT.year, colour = factor(RCP.HT)))+
            ylab("Water reserve over profile [mm]") + xlab("Day of the year [DOY]")+
            theme()
print (gg_lin_resmes)


gg_lin_azomes <- ggplot()+
            geom_line(data = my_df, aes(x=DOY,y=azomes, group = RCP.HT.year, colour = factor(RCP.HT)))+
            ylab("Nitrogen content over profile [kgN/ha]") + xlab("Day of the year [DOY]")+
            theme()
print (gg_lin_azomes)




# ===============================
# ==  Synthesis at end season  ==
# ===============================

uniq_id <- unique(my_df$RCP.HT.year)

Head_sti <- colnames(my_df)
my_df_summary <- data.frame(matrix(ncol = length(uniq_id), nrow = 0))
colnames(my_df_summary) <- Head_sti


for (uu in c(1:length(uniq_id)) ) {
  
  id_sum <- which (my_df$RCP.HT.year == uniq_id[uu])
  
  tmp_sum <- my_df[id_sum[length(id_sum)],]
  
  my_df_summary <- rbind(my_df_summary,tmp_sum)
  
}


# Extract specific data
# ======================

my_df_summary$GFilTime = my_df_summary$imats - my_df_summary$iflos


# Extract stresses by period
my_df_summary$SWFAC.juv <- NA
my_df_summary$SWFAC.veg <- NA
my_df_summary$SWFAC.rep <- NA

my_df_summary$EWFAC.juv <- NA
my_df_summary$EWFAC.veg <- NA
my_df_summary$EWFAC.rep <- NA

my_df_summary$INNFAC.juv <- NA
my_df_summary$INNFAC.veg <- NA
my_df_summary$INNFAC.rep <- NA

my_df_summary$TPFAC.juv <- NA
my_df_summary$TPFAC.veg <- NA
my_df_summary$TPFAC.rep <- NA


for (uu in c(1:length(uniq_id)) ) {
  
  tmp_sum <- my_df[which (my_df$RCP.HT.year == uniq_id[uu]), ]
  
  id_lev <- which (tmp_sum$DOY == tmp_sum$ilevs[length(tmp_sum$DOY[])] )
  id_amf <- which (tmp_sum$DOY == tmp_sum$iamfs[length(tmp_sum$DOY[])] )
  id_flo <- which (tmp_sum$DOY == tmp_sum$iflos[length(tmp_sum$DOY[])] )
  id_mat <- which (tmp_sum$DOY == tmp_sum$imats[length(tmp_sum$DOY[])] )
  
  my_df_summary$SWFAC.juv[uu] <- mean(tmp_sum$swfac [id_lev:id_amf] )
  my_df_summary$SWFAC.veg[uu] <- mean(tmp_sum$swfac [id_amf:id_flo] )
  my_df_summary$SWFAC.rep[uu] <- mean(tmp_sum$swfac [id_flo:id_mat] )
  
  my_df_summary$EWFAC.juv[uu] <- mean(tmp_sum$exobiom [id_lev:id_amf] )
  my_df_summary$EWFAC.veg[uu] <- mean(tmp_sum$exobiom [id_amf:id_flo] )
  my_df_summary$EWFAC.rep[uu] <- mean(tmp_sum$exobiom [id_flo:id_mat] )
  
  my_df_summary$INNFAC.juv[uu] <- mean(tmp_sum$inns [id_lev:id_amf] )
  my_df_summary$INNFAC.veg[uu] <- mean(tmp_sum$inns [id_amf:id_flo] )
  my_df_summary$INNFAC.rep[uu] <- mean(tmp_sum$inns [id_flo:id_mat] )
  
  my_df_summary$TPFAC.juv[uu] <- mean(tmp_sum$ftemp [id_lev:id_amf] )
  my_df_summary$TPFAC.veg[uu] <- mean(tmp_sum$ftemp [id_amf:id_flo] )
  my_df_summary$TPFAC.rep[uu] <- mean(tmp_sum$ftemp [id_flo:id_mat] )
  
}

# Extract N uptake and biomass at Anthesis

my_df_summary$LAI.max <- NA

my_df_summary$ABG.flo <- NA
my_df_summary$ABG.mat <- NA

my_df_summary$Yield.tot <- NA
my_df_summary$Yield.rmb <- NA

my_df_summary$QNPlant.flo <- NA
my_df_summary$QNPlant.mat <- NA
my_df_summary$QNGrain.mat <- NA

for (uu in c(1:length(uniq_id)) ) {
  
  tmp_sum <- my_df[which (my_df$RCP.HT.year == uniq_id[uu]), ]
  
  id_lev <- which (tmp_sum$DOY == tmp_sum$ilevs[length(tmp_sum$DOY[])] )
  id_amf <- which (tmp_sum$DOY == tmp_sum$iamfs[length(tmp_sum$DOY[])] )
  id_flo <- which (tmp_sum$DOY == tmp_sum$iflos[length(tmp_sum$DOY[])] )
  id_mat <- which (tmp_sum$DOY == tmp_sum$imats[length(tmp_sum$DOY[])] )
  
  my_df_summary$LAI.max[uu] <- max(tmp_sum$lai.n.[id_lev:id_mat])
  
  my_df_summary$ABG.flo[uu] <- tmp_sum$masec.n.[id_flo]
  my_df_summary$ABG.mat[uu] <- tmp_sum$masec.n.[id_mat]
  
  my_df_summary$Yield.tot[uu] <- tmp_sum$mafruit[id_mat]
  my_df_summary$Yield.rmb[uu] <- my_df_summary$ABG.mat[uu] - my_df_summary$ABG.flo[uu]
  
  my_df_summary$QNPlant.flo[uu] <- tmp_sum$QNplante[id_flo]
  my_df_summary$QNPlant.mat[uu] <- tmp_sum$QNplante[id_mat]
  my_df_summary$QNGrain.mat[uu] <- tmp_sum$CNgrain[id_mat]*my_df_summary$Yield.tot[uu]*10
  
}




#  Basic analysis
# ==================

gg_bxpt_Yield <- ggplot()+
  geom_boxplot(data = my_df_summary, aes(x=RCP.HT,y=Yield.tot))+
  ylab("Final Yield [ton/ha]") + xlab("Climatic scenario")+
  theme()
print (gg_bxpt_Yield)

Result_aov <- aov(Yield.tot ~ RCP.HT, data = my_df_summary)
print(summary(Result_aov))

SNK_result <- SNK.test(Result_aov, "RCP.HT")
print(SNK_result$groups)


gg_bxpt_GNumber <- ggplot()+
  geom_boxplot(data = my_df_summary, aes(x=RCP.HT,y=chargefruit))+
  ylab("GrainNumber [#/m]") + xlab("Climatic scenario")+
  theme()
print (gg_bxpt_GNumber)

Result_aov <- aov(chargefruit ~ RCP.HT, data = my_df_summary)
print(summary(Result_aov))

SNK_result <- SNK.test(Result_aov, "RCP.HT")
print(SNK_result$groups)


gg_bxpt_iFlow <- ggplot()+
  geom_boxplot(data = my_df_summary, aes(x=RCP.HT,y=iflos))+
  ylab("Flowering [DOY]") + xlab("Climatic scenario")+
  theme()
print (gg_bxpt_iFlow)

Result_aov <- aov(iflos ~ RCP.HT, data = my_df_summary)
print(summary(Result_aov))

SNK_result <- SNK.test(Result_aov, "RCP.HT")
print(SNK_result$groups)


gg_bxpt_iMat <- ggplot()+
  geom_boxplot(data = my_df_summary, aes(x=RCP.HT,y=imats))+
  ylab("Maturity [DOY]") + xlab("Climatic scenario")+
  theme()
print (gg_bxpt_iMat)

Result_aov <- aov(imats ~ RCP.HT, data = my_df_summary)
print(summary(Result_aov))

SNK_result <- SNK.test(Result_aov, "RCP.HT")
print(SNK_result$groups)


gg_bxpt_GFilTime <- ggplot()+
  geom_boxplot(data = my_df_summary, aes(x=RCP.HT,y=GFilTime))+
  ylab("Grain filling period [days]") + xlab("Climatic scenario")+
  theme()
print (gg_bxpt_GFilTime)

Result_aov <- aov(GFilTime ~ RCP.HT, data = my_df_summary)
print(summary(Result_aov))

SNK_result <- SNK.test(Result_aov, "RCP.HT")
print(SNK_result$groups)


gg_bxpt_stress <- ggplot()+
  geom_boxplot(data = my_df_summary, aes(x=RCP.HT,y=SWFAC.veg))+
  ylab("Water stress in vegetative phase [days]") + xlab("Climatic scenario")+
  theme()
print (gg_bxpt_stress)

Result_aov <- aov(SWFAC.veg ~ RCP.HT, data = my_df_summary)
print(summary(Result_aov))

SNK_result <- SNK.test(Result_aov, "RCP.HT")
print(SNK_result$groups)


# ================================================
# ==  Develop a generic code to analyse curves  ==
# ================================================

Var2Analyse <- 'Yield.tot'
x_colname <- 'RCP.HT'

{  # to run the all code with figures and statistical tests exportation

gg_bxpt_generic <- ggplot()+
  geom_boxplot(data = my_df_summary, aes_string(x=x_colname,y=Var2Analyse))+
  ylab(Var2Analyse) + xlab("Climatic scenario")+
  theme()
print (gg_bxpt_generic)

# bxpt_name <- sprintf('%s%s%s',Var2Analyse,'_boxplot','.tiff')
# tiff(bxpt_name, units="in", width=10, height=8, res=150, compression = 'lzw')
#   print(gg_bxpt_generic)
# dev.off()


gg_pdf_generic <- ggplot()+
  geom_density(data = my_df_summary, aes_string(x=Var2Analyse, colour = x_colname), size =1)+
  xlab(Var2Analyse) + ylab("PDF")+
  theme()
print (gg_pdf_generic)

# pdf_name <- sprintf('%s%s%s',Var2Analyse,'_pdf','.tiff')
# tiff(pdf_name, units="in", width=10, height=8, res=150, compression = 'lzw')
#   print(gg_pdf_generic)
# dev.off()


gg_cdf_generic <- ggplot()+
  stat_ecdf(data = my_df_summary, aes_string(x=Var2Analyse, colour = x_colname) , size =1)+
  xlab(Var2Analyse) + ylab("CDF")+
  theme()
print (gg_cdf_generic)

# cdf_name <- sprintf('%s%s%s',Var2Analyse,'_cdf','.tiff')
# tiff(cdf_name, units="in", width=10, height=8, res=150, compression = 'lzw')
#   print(gg_cdf_generic)
# dev.off()


# Performing Anova test and SNk post-hoc test
# --------------------------------------------

sink(sprintf('%s%s',Var2Analyse,'_Statistical_analysis.txt'))

  
print('=============')
print('==  ANOVA  ==')
print('=============')

Result_aov_generic <- aov(my_df_summary[,Var2Analyse] ~ my_df_summary[,x_colname])
print(summary(Result_aov_generic))

print('=========================')
print('==  SNK post hoc test  ==')
print('=========================')

SNK_result <- SNK.test(Result_aov_generic, "my_df_summary[, x_colname]")
print(SNK_result$groups)



# Performing the Kolmogorov-Smirnof Test on x and x2
# ----------------------------------------------------
# the close D from zero, the more likely the two distributions are from same distribution
# H0 : one distribution is equal to another
# p < 0.05 => reject null hypothesis => different population

print('===============================')
print('==  Kolmogorov-Smirnof test  ==')
print('===============================')

# Var2Analyse = "INNFAC.veg"

ks.results1 <- ks.test(my_df_summary[which(my_df_summary$RCP.HT == 'rcp_00_ht_00'),Var2Analyse], 
                       my_df_summary[which(my_df_summary$RCP.HT == 'rcp_85_ht_55'),Var2Analyse],
                       alternative = 'two.sided')

ks.results2 <- ks.test(my_df_summary[which(my_df_summary$RCP.HT == 'rcp_45_ht_55'),Var2Analyse], 
                       my_df_summary[which(my_df_summary$RCP.HT == 'rcp_85_ht_55'),Var2Analyse],
                       alternative = 'two.sided')


print(ks.results1)
print(ks.results2)



# Performing the Wilcoxon Test 
# ----------------------------
# Compare two groups under the non-normality assumption
# H0 : distribution are from identical populations
# H0 : the null hypothesis is that the distributions of x and y differ by a location shift of mu (=0, here) and the alternative is that they differ by some other location shift 
# H1 : true location shift is not equal to mu (0, here)
# p < 0.05 => conclude that results are significantly different between groups 

print('=====================')
print('==  Wilcoxon test  ==')
print('=====================')

# Var2Analyse = "mafruit"

wc.results <- wilcox.test(my_df_summary[which(my_df_summary$RCP.HT == 'rcp_00_ht_00'),Var2Analyse],
                          my_df_summary[which(my_df_summary$RCP.HT == 'rcp_85_ht_55'),Var2Analyse],
                          paired = FALSE, mu = 0, alternative="two.sided")

print(wc.results)


wc.results <- wilcox.test(my_df_summary[which(my_df_summary$RCP.HT == 'rcp_45_ht_55'),Var2Analyse],
                          my_df_summary[which(my_df_summary$RCP.HT == 'rcp_85_ht_55'),Var2Analyse],
                          paired = FALSE, mu = 0, alternative="two.sided")

print(wc.results)


# Performing the Anderson-Darling Test 
# -------------------------------------
# H0 : All samples come from a common population.
# H0 : test the hypothesis that k independent samples arose from a common unspecified distribution function F(x) 
# p < 0.05 => reject null hypothesis => different population

print('=============================')
print('==  Anderson-Darling test  ==')
print('=============================')

# Var2Analyse = "mafruit"

ad.results <- ad.test(my_df_summary[which(my_df_summary$RCP.HT == 'rcp_00_ht_00'),Var2Analyse],
                      my_df_summary[which(my_df_summary$RCP.HT == 'rcp_45_ht_55'),Var2Analyse],
                      my_df_summary[which(my_df_summary$RCP.HT == 'rcp_45_ht_85'),Var2Analyse],
                      my_df_summary[which(my_df_summary$RCP.HT == 'rcp_85_ht_55'),Var2Analyse],
                      my_df_summary[which(my_df_summary$RCP.HT == 'rcp_85_ht_85'),Var2Analyse])

print(ad.results)


sink()

}  # end of exportation



# =============================================
# ==  Integrated analysis between variables  ==
# =============================================

# Subset the data
# ----------------

my_df_short <- my_df_summary[,c("Yield.tot","chargefruit","GFilTime","Yield.rmb",
                                "ABG.flo","ABG.mat",
                                "QNPlant.flo","QNPlant.mat","QNGrain.mat",
                                "resmes","azomes",
                                "SWFAC.veg","SWFAC.rep",
                                "EWFAC.veg","EWFAC.rep",
                                "INNFAC.veg","INNFAC.rep",
                                "TPFAC.veg","TPFAC.rep",
                                "HarvYear","RCP.HT")]

# "SWFAC.juv" show no variation !


# Analyze correlations
# --------------------
#  Define fit function
my_fn_pair <- function(data, mapping, ...){
  
  # lm fit
  p <- ggplot(data = data, mapping = mapping) +
    geom_point() +
    geom_smooth(method=lm, fill="blue", color="blue", ...)
  p
  
}

# GGpairs - All data
g = ggpairs(my_df_short, columns = 1:(length(my_df_short[1,])-2), upper = list(continuous = wrap('cor', method = "pearson")), lower = list(continuous = my_fn_pair))
g
# ggsave("GGpairs_full.tiff", device = "tiff", dpi = 150, height = 15, width = 15, units = c("in"))



# PCA Analysis
# --------------
my_df_pca <- my_df_summary[,c("Yield.tot","chargefruit","GFilTime","Yield.rmb",
                                "ABG.flo","ABG.mat",
                                "QNPlant.flo","QNPlant.mat","QNGrain.mat",
                                "resmes","azomes",
                                "SWFAC.veg","SWFAC.rep",
                                "EWFAC.veg","EWFAC.rep",
                                "INNFAC.veg","INNFAC.rep",
                                "TPFAC.veg","TPFAC.rep",
                                "HarvYear","RCP.HT")]



ind2remove = which(colnames(my_df_pca)=="RCP.HT" | colnames(my_df_pca)=="HarvYear")

SumDyn.pca <- prcomp(my_df_pca[,-ind2remove],
                     center = TRUE,
                     scale. = TRUE)

summary(SumDyn.pca)

gg_PCA <- autoplot(SumDyn.pca, 
                   data = my_df_pca, colour = 'RCP.HT',
                   loadings = TRUE, loadings.label = TRUE,
                   frame = TRUE,frame.colour = 'RCP.HT')
print (gg_PCA)

# tiff('PCA_Analysis.tiff', units="in", width=10, height=8, res=150, compression = 'lzw')
#   print(gg_PCA)
# dev.off()
