#' Formats bulletin object for Taylor's analysis.
#'
#' Formats bulletin object for Taylor Clarke's analysis.
#' This function was created for Taylor Clarke's research in 2018.
#' @param BulletinObj Bulletin object created by getBulletins() function
#' @param RareProbCombThreshold Minimum number of cases of avalanche problem combination needed to be kept in dataframe. Default is 10.
#' @param NumForecastsThreshold Minimum number of forecasts a forecaster needs in the dataset to be included. Defaut is 50.
#'
#' @examples
#' require(SarpBulletinTools)
#'
#' load(url("http://data.avalancheresearch.ca/AllBulletins_2010To2017.RData"))
#' summary(Bulletins)
#'
#' TblAnalysis <- formatBulletinObjForHzdAnalysis_Taylor1(Bulletins)
#'
#' ## The formatting takes a bit of time due to computations for the additional danger rating columns.
#' ## To avoid the waiting time, you can load TblAnalysis directly with
#' ## load(url("http://data.avalancheresearch.ca/2018_Analysis_Taylor.RData"))
#'
#'
#' table(TblAnalysis$REGION, TblAnalysis$SEASON)
#'
#' table(TblAnalysis$PROB_COMBINATION, TblAnalysis$ELEV)
#' table(TblAnalysis$PROB_COMBINATION, TblAnalysis$MTNRANGE)
#'
#' @export

formatBulletinObjForHzdAnalysis_Taylor1 <- function(BulletinObj, RareProbCombThreshold=10, NumForecastsThreshold=50) {

  ## General transformation
  Output <- formatBulletinObjForHzdAnalysis(BulletinObj,
                                            MissingAvProblemInWideFormat = 0,
                                            AddDngRatingPrevDay = TRUE,
                                            AddDngRatingOtherElev = TRUE)

  ## Eliminate unwanted forecast regions
  Before <- nrow(Output)
  Output <- Output[Output$REGION != "Yukon",]
  Output <- Output[Output$REGION != "Whistler Blackcomb",]
  Output <- Output[Output$REGION != "South Coast",]
  Output <- Output[Output$REGION != "Northwest - BC",]
  Output <- Output[Output$REGION != "North Shore",]
  Output <- Output[Output$REGION != "North Rockies - BC",]
  Output <- Output[Output$REGION != "North Columbia",]
  Output <- Output[Output$REGION != "Little Yoho",]
  Output$REGION <- factor(Output$REGION)
  After <- nrow(Output)
  print(paste0("Eliminated ", Before-After, " records from non-relevant forecast regions."))


  ## Eliminate unwanted seasons
  Before <- nrow(Output)
  Output <- Output[Output$SEASON != 2010,]
  Output <- Output[Output$SEASON != 2011,]
  Output$SEASON <- factor(Output$SEASON)
  After <- nrow(Output)
  print(paste0("Eliminated ", Before-After, " records from non-relevant forecast seasons."))


  ## Eliminate assessments of forecasters with only few
  Before <- nrow(Output)
  ForecasterCount <- as.data.frame(table(Output$FORECASTER))
  ForecasterIncl <- as.character(ForecasterCount$Var1[ForecasterCount$Freq >= NumForecastsThreshold])
  Output <- Output[Output$FORECASTER %in% ForecasterIncl,]
  Output$FORECASTER <- factor(Output$FORECASTER)
  After <- nrow(Output)
  print(paste0("Eliminated ", Before-After, " records from forecasters with too few assessment (less than ", NumForecastsThreshold, ")"))


  ## Eliminate unwanted danger ratings
  Before <- nrow(Output)
  Output <- Output[Output$DAY0 != "No Rating",]
  Output <- Output[Output$DAY0 != "Spring",]
  Output$DAY0 <- factor(Output$DAY0)
  After <- nrow(Output)
  print(paste0("Eliminated ", Before-After, " records from non-relevant danger ratings (No rating, Spring)."))

  ## Deleting same danger ratings from derived columns
  Output$DAYPREV[Output$DAYPREV == "No Rating" | Output$DAYPREV == "Spring"] <- NA
  Output$DAY0ALP[Output$DAY0ALP == "No Rating" | Output$DAY0ALP == "Spring"] <- NA
  Output$DAY0TL [Output$DAY0TL  == "No Rating" | Output$DAY0TL  == "Spring"] <- NA
  Output$DAY0BTL[Output$DAY0BTL == "No Rating" | Output$DAY0BTL == "Spring"] <- NA

  ## Combine High & Extreme danger ratings
  combineHighExtreme <- function(DangerRating) {
    DangerRating <- as.character(DangerRating)
    DangerRating[DangerRating=="High"|DangerRating=="Extreme"] <- "High/Extreme"
    DangerRating <- ordered(DangerRating, levels=c("Low", "Moderate", "Considerable", "High/Extreme"))
    return(DangerRating)
  }
  Output$DAY0 <- combineHighExtreme(Output$DAY0)
  Output$DAYPREV <- combineHighExtreme(Output$DAYPREV)
  Output$DAY0ALP <- combineHighExtreme(Output$DAY0ALP)
  Output$DAY0TL <- combineHighExtreme(Output$DAY0TL)
  Output$DAY0BTL <- combineHighExtreme(Output$DAY0BTL)


  ## Create month column
  Output$MONTH <- months(Output$PUBLISH_DATE)


  ## Create mountain range column
  Output$MTNRANGE <- NA
  Rockies <- c("Banff, Yoho and Kootenay", "Jasper", "Kananaskis Country", "Lizard Range", "South Rockies", "Waterton Lakes")
  Coast <- c("Northwest Coastal", "Northwest Inland", "Sea-to-Sky", "South Coast Inland")
  Output$MTNRANGE[(Output$REGION %in% Rockies)] <- "Rockies"
  Output$MTNRANGE[(Output$REGION %in% Coast)] <- "Coast"
  Output$MTNRANGE[is.na(Output$MTNRANGE)] <- "Columbia"
  Output$MTNRANGE <- factor(Output$MTNRANGE)

  ## Format agency
  Output$AGENCY <- factor(Output$AGENCY)

  ## Derived columns: binary, product, sum, diff
  for (Index in 1:length(ListAvProblemTypesAbbrev)) {

    Output[paste0(ListAvProblemTypesAbbrev[Index], "_PRES")] <- 0
    Output[paste0(ListAvProblemTypesAbbrev[Index], "_PRES")][Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_MAX")]>0] <- 1

    Output[paste0(ListAvProblemTypesAbbrev[Index], "_PROD_MIN")] <- Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_MIN")] * Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_MIN")]
    Output[paste0(ListAvProblemTypesAbbrev[Index], "_PROD_TYP")] <- Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_TYP")] * Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_TYP")]
    Output[paste0(ListAvProblemTypesAbbrev[Index], "_PROD_MAX")] <- Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_MAX")] * Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_MAX")]

    Output[paste0(ListAvProblemTypesAbbrev[Index], "_SUM_MIN")] <- Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_MIN")] + Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_MIN")]
    Output[paste0(ListAvProblemTypesAbbrev[Index], "_SUM_TYP")] <- Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_TYP")] + Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_TYP")]
    Output[paste0(ListAvProblemTypesAbbrev[Index], "_SUM_MAX")] <- Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_MAX")] + Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_MAX")]

    Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_TYP_PLUS")] <- Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_MAX")] - Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_TYP")]
    Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_TYP_MINUS")] <- Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_TYP")] - Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_MIN")]
    Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_TYP_PLUS")] <- Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_MAX")] - Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_TYP")]
    Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_TYP_MINUS")] <- Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_TYP")] - Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_MIN")]

    Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_N")] <- as.numeric(Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_N")])
    Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_N")][is.na(Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_N")])] <- 0

    Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_NE")] <- as.numeric(Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_NE")])
    Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_NE")][is.na(Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_NE")])] <- 0

    Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_E")] <- as.numeric(Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_E")])
    Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_E")][is.na(Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_E")])] <- 0

    Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_SE")] <- as.numeric(Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_SE")])
    Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_SE")][is.na(Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_SE")])] <- 0

    Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_S")] <- as.numeric(Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_S")])
    Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_S")][is.na(Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_S")])] <- 0

    Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_SW")] <- as.numeric(Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_SW")])
    Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_SW")][is.na(Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_SW")])] <- 0

    Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_W")] <- as.numeric(Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_W")])
    Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_W")][is.na(Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_W")])] <- 0

    Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_NW")] <- as.numeric(Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_NW")])
    Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_NW")][is.na(Output[,paste0(ListAvProblemTypesAbbrev[Index], "_ASP_NW")])] <- 0

    Output[paste0(ListAvProblemTypesAbbrev[Index], "_ASP_NUM")] <- Output[paste0(ListAvProblemTypesAbbrev[Index], "_ASP_N")] +
                                                                   Output[paste0(ListAvProblemTypesAbbrev[Index], "_ASP_NE")] +
                                                                   Output[paste0(ListAvProblemTypesAbbrev[Index], "_ASP_E")] +
                                                                   Output[paste0(ListAvProblemTypesAbbrev[Index], "_ASP_SE")] +
                                                                   Output[paste0(ListAvProblemTypesAbbrev[Index], "_ASP_S")] +
                                                                   Output[paste0(ListAvProblemTypesAbbrev[Index], "_ASP_SW")] +
                                                                   Output[paste0(ListAvProblemTypesAbbrev[Index], "_ASP_W")] +
                                                                   Output[paste0(ListAvProblemTypesAbbrev[Index], "_ASP_NW")]

  }
  print("Created derived variables for each problem: XXXX_PRES (binary), XXXX_PROD_XXXX (product), XXXX_SUM_XXX (sum) and XXXX_TYP_PlUS/MINUS.")


  ## Properly format likelihood and size as ordered factors
  for (Index in 1:length(ListAvProblemTypesAbbrev)) {

    Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_MIN")][(Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_MIN")]==0)] <- NA
    Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_MIN")] <- ordered(as.array(Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_MIN")][,1]), levels=c(1:9), labels=LevelsLikelihood)

    Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_TYP")][(Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_TYP")]==0)] <- NA
    Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_TYP")] <- ordered(as.array(Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_TYP")][,1]), levels=c(1:9), labels=LevelsLikelihood)

    Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_MAX")][(Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_MAX")]==0)] <- NA
    Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_MAX")] <- ordered(as.array(Output[paste0(ListAvProblemTypesAbbrev[Index], "_LIKELIHOOD_MAX")][,1]), levels=c(1:9), labels=LevelsLikelihood)

    Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_MIN")][(Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_MIN")]==0)] <- NA
    Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_MIN")] <- ordered(as.array(Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_MIN")][,1]), levels=c(1:9), labels=LevelsSize)

    Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_TYP")][(Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_TYP")]==0)] <- NA
    Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_TYP")] <- ordered(as.array(Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_TYP")][,1]), levels=c(1:9), labels=LevelsSize)

    Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_MAX")][(Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_MAX")]==0)] <- NA
    Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_MAX")] <- ordered(as.array(Output[paste0(ListAvProblemTypesAbbrev[Index], "_SIZE_MAX")][,1]), levels=c(1:9), labels=LevelsSize)

  }


  ## create problem combinations
  Output$PROB_COMBINATION <- NA
  Output$PROB_NUM <- 0

  for (Index in 1:length(ListAvProblemTypesAbbrev)) {
    Output$PROB_COMBINATION[Output[paste0(ListAvProblemTypesAbbrev[Index], "_PRES")] == 1] <- ifelse(is.na(Output$PROB_COMBINATION[Output[paste0(ListAvProblemTypesAbbrev[Index], "_PRES")] == 1]), ListAvProblemTypesAbbrev[Index], paste(Output$PROB_COMBINATION[Output[paste0(ListAvProblemTypesAbbrev[Index], "_PRES")] == 1], ListAvProblemTypesAbbrev[Index], sep="_"))
    Output$PROB_NUM[Output[paste0(ListAvProblemTypesAbbrev[Index], "_PRES")] == 1] <- Output$PROB_NUM[Output[paste0(ListAvProblemTypesAbbrev[Index], "_PRES")] == 1] + 1
  }

  Output$PROB_COMBINATION[Output$PROB_NUM==0] <- "NOPROB"

  Output$PROB_COMBINATION <- factor(Output$PROB_COMBINATION)
  print("Created variable PROB_COMBINATION to indicate combination of problems.")


  ## Eliminate rate combinations
  Before <- nrow(Output)
  Output <- Output[Output$PROB_NUM<4,]
  Output$PROB_COMBINATION <- factor(Output$PROB_COMBINATION)
  After <- nrow(Output)
  print(paste0("Eliminated ", Before-After, " records with more than three problems."))

  Before <- nrow(Output)
  ProbCombFreq <- as.data.frame(table(Output$PROB_COMBINATION))
  RareProbComb <- ProbCombFreq$Var1[ProbCombFreq$Freq<RareProbCombThreshold]
  Output <- Output[!(Output$PROB_COMBINATION %in% RareProbComb),]
  Output$PROB_COMBINATION <- factor(Output$PROB_COMBINATION)
  After <- nrow(Output)
  print(paste0("Eliminated ", Before-After, " records with rare combinations (less than ", RareProbCombThreshold, ")."))


  ## Delete records with missing DAY0 danger rating
  Before <- nrow(Output)
  Output <- Output[!is.na(Output$DAY0),]
  After <- nrow(Output)
  print(paste0("Eliminated ", Before-After, " records due to missing danger ratings."))


  ## Return output
  return(Output)

}
