;+
; NAME:
;    regiondna_conf_resid.pro
;
; PURPOSE:
;    This function determines a change factor for the confidence metric used 
;    in the IPCC AR6 WGII detection and attribution assessments, based on the 
;    residuals from the multiple linear regression calculations.
;
; CATEGORY:
;    REGIONAL D-AND-A CONFIDENCE
;
; CALLING SEQUENCE:
;    result = regiondna_conf_resid( p_resid )
;
; INPUT:
;    P_RESID:  A required float vector or array specifying the p-value of the 
;        tests on the residuals for each of a series of multiple linear 
;        regression calculations, each using different data products.
;    P_LIMIT
;
; KEYWORD PARAMETERS:
;    METHOD:  A required scalar string specifying the method for evaluation. 
;        Possible values are:
;          * 'binomial':  The Stone and Hansen (2016) method, comparing the 
;            frequency that the P_LIMIT significance level is exceeded in the 
;            P_RESID output against a binomial distribution.
;          * 'k-s':  Compare the distribution of values in P_RESID against a 
;            uniform distribution using a Kolmogorov-Smirnov test.  Weightings 
;            can be included via the SOURCE_WEIGHT keyword input.
;          * 'weighted binomial':  Similar to the Stone and Hansen (2016) 
;            method, but including weightings on each of the samples as input 
;            via the SOURCE_WEIGHT keyword input.
;    P_LIMIT:  An optional scalar float specifying the p-value for statistical 
;        significance tests.  The default is 0.1.
;    SOURCE_WEIGHT:  An optional float vector of size N_SOURCE specifying 
;        weights for the sources when calculating bulk measures.  The default 
;        is 1.0/N_SOURCE for all sources.  The weighting is normalised, so it 
;        is okay if the sum of the weights is not equal to one.  This is only 
;        usable if METHOD='k-s'.
;
; OUTPUT:
;    RESULT:  A float scalar in the range [0,1] specifying the confidence 
;        change factor by which to multiply the confidence metric in 
;        ipccar6_conf.pro, according to the p-values of the residuals.
;
; USES:
;    ks_onesample.pro
;
; PROCEDURE:
;    See Stone and Hansen (2016) for more details.
;
; EXAMPLE:
;    See regiondna_conf.pro.
;
; MODIFICATION HISTORY:
;    Written by:  Daithi A. Stone (dastone@runbox.com), 2015-03-23 (As 
;        part of hanseng_conf_assess_up.pro)
;    Modified:  DAS, 2020-10-29 (Adapted from hanseng_conf_assess_up.pro)
;    Modified:  DAS, 2020-11-19 (Added SOURCE_WEIGHT, METHOD keyword inputs)
;    Modified:  DAS, 2020-12-11 (Added capability to handle NaN inputs for k-s 
;        option)
;    Modified:  DAS, 2021-02-10 (Added the 'weighted binomial' option to the 
;        METHOD keyword input)
;-

;***********************************************************************

FUNCTION REGIONDNA_CONF_RESID, $
    P_RESID, $
    METHOD=method, $
    P_LIMIT=p_limit, $
    SOURCE_WEIGHT=source_weight

;***********************************************************************
; Constants and options

; Ensure the method is specified
if n_elements( method ) ne 1 then stop

; The default p-value for assigning statistical significance
if not( keyword_set( p_limit ) ) then p_limit = 0.1

; The default number of Monte Carlo samples for determining the statistical 
; significance of the weighted binomial outcome
if method eq 'weighted binomial' then n_mc_binomial = 10000

; Determine the exponent on a cosine such that the 0.5-intercepts are at 
; [p_limit/1,1-p_limit/2].  This is used to convert p-values of tests into 
; confidence change factors.
factor_exp = alog( 0.5 ) / alog( 0.5 * ( 1. - cos( !pi * p_limit ) ) )

; Ensure consistent weighting vector
if keyword_set( source_weight ) then begin
  if n_elements( source_weight ) ne n_elements( p_resid ) then stop
  if method eq 'binomial' then stop
endif

;***********************************************************************
; Calculate the degradation factor

; Determine where we have defined p-values for the residuals
id_take = where( finite( p_resid ) eq 1, n_id_take )
; If there are no defined p-values
if n_id_take eq 0 then begin
  ; Then degrade to maximum
  result = 0.5
; If there are defined values
endif else begin
  ; If we are using the Stone and Hansen (2016) approach
  if method eq 'binomial' then begin
    ; Determine where residuals are significantly different from expected
    id_sig = where( ( p_resid[id_take] lt p_limit / 2. ) $
        or ( p_resid[id_take] gt 1. - p_limit / 2. ), n_id_sig )
    ; Degrade according to fraction of significantly different residuals
    result = binomial( n_id_sig, n_id_take, p_limit )
    result = ( 0.5 * ( 1. - cos( !pi * result ) ) ) ^ factor_exp
    result = 0.5 + result / 2.
  ; If we are using the weighted binomial approach
  endif else if method eq 'weighted binomial' then begin
    ; Identify cases of inconsistency with 1
    id_sig = where( ( p_resid[id_take] lt p_limit / 2. ) $
        or ( p_resid[id_take] gt 1. - p_limit / 2. ), n_id_sig )
    if n_id_sig gt 0 then n_id_sig = total( source_weight[id_take[id_sig]] )
    ; Calculate the p-value of this outcome 
    n_id_sig_mc = fltarr( n_mc_binomial )
    for i_mc = 0, n_mc_binomial - 1 do begin
      temp = randomu( seed, n_id_take )
      id_sig_temp = where( temp le p_limit, n_id_sig_temp )
      if n_id_sig_temp eq 0 then begin
        n_id_sig_mc[i_mc] = 0.
      endif else begin
        n_id_sig_mc[i_mc] = total( source_weight[id_take[id_sig_temp]] )
      endelse
    endfor
    id = where( n_id_sig_mc ge n_id_sig, n_id )
    result = float( n_id ) / float( n_mc_binomial )
    ; Calculate degradation factor
    result = ( 0.5 * ( 1. - cos( !pi * result ) ) ) ^ factor_exp
    result = 0.5 + result / 2.
  ; If we are using the K-S test approach
  endif else if method eq 'k-s' then begin
    ; Define the weights
    if keyword_set( source_weight ) then temp_weight = source_weight[id_take]
    ; If we just have one sample then take its p-value
    if n_id_take eq 1 then begin
      result = p_resid[id_take[0]]
    ; Otherwise determine the p-value of the fit of residuals to a uniform 
    ; distribution
    endif else begin
      result = ks_onesample( p_resid[id_take], method='monte carlo', $
          ref_name='uniform', weights=temp_weight )
    endelse
    ; Convert p-value to confidence factor
    result = ( 0.5 * ( 1. - cos( 2 * !pi * result ) ) ) ^ factor_exp
    result = 0.5 $
        + result / 2. * total( source_weight[id_take] ) $
        / total( source_weight )
  ; If the method is not implemented
  endif else begin
    stop
  endelse
endelse

;***********************************************************************
; The end

return, result
END