IndependenceTest.c

Go to the documentation of this file.
00001 
00009 #include "party.h"
00010 
00011 
00021 void C_TeststatPvalue(const SEXP linexpcov, const SEXP varctrl, 
00022                       double *ans_teststat, double *ans_pvalue) {
00023     
00024     double releps, abseps, tol;
00025     int maxpts;
00026     
00027     maxpts = get_maxpts(varctrl);
00028     tol = get_tol(varctrl);
00029     abseps = get_abseps(varctrl);
00030     releps = get_releps(varctrl);
00031     
00032     /* compute the test statistic */
00033     ans_teststat[0] = C_TestStatistic(linexpcov, get_teststat(varctrl), 
00034                                   get_tol(varctrl));
00035 
00036     /* compute the p-value if requested */                                  
00037     if (get_pvalue(varctrl))
00038         ans_pvalue[0] =  C_ConditionalPvalue(ans_teststat[0], linexpcov, 
00039                                          get_teststat(varctrl),
00040                                          tol, &maxpts, &releps, &abseps);
00041     else
00042         ans_pvalue[0] = 1.0;
00043 }
00044 
00053 void C_TeststatCriterion(const SEXP linexpcov, const SEXP varctrl, 
00054                          double *ans_teststat, double *ans_criterion) {
00055     
00056     C_TeststatPvalue(linexpcov, varctrl, ans_teststat, ans_criterion);
00057     
00058     /* the node criterion is to be MAXIMISED, 
00059        i.e. 1-pvalue or test statistic \in \[0, \infty\] */
00060     if (get_pvalue(varctrl))
00061         ans_criterion[0] = 1 - ans_criterion[0];
00062     else
00063         ans_criterion[0] = ans_teststat[0];
00064     
00065 }
00066 
00067 
00078 void C_IndependenceTest(const SEXP x, const SEXP y, const SEXP weights, 
00079                         SEXP linexpcov, SEXP varctrl, 
00080                         SEXP ans) {
00081     
00082     /* compute linear statistic and its conditional expectation and
00083        covariance
00084     */
00085     C_LinStatExpCov(REAL(x), ncol(x), REAL(y), ncol(y), 
00086                     REAL(weights), nrow(x), 1, 
00087                     GET_SLOT(linexpcov, PL2_expcovinfSym), linexpcov);
00088 
00089     /* compute test statistic */
00090     if (get_teststat(varctrl) == 2) 
00091         C_LinStatExpCovMPinv(linexpcov, get_tol(varctrl));
00092     C_TeststatPvalue(linexpcov, varctrl, &REAL(ans)[0], &REAL(ans)[1]);
00093 }
00094 
00095 
00105 SEXP R_IndependenceTest(SEXP x, SEXP y, SEXP weights, SEXP linexpcov, SEXP varctrl) {
00106                         
00107     SEXP ans;
00108     
00109     PROTECT(ans = allocVector(REALSXP, 2));
00110     C_IndependenceTest(x, y, weights, linexpcov, varctrl, ans);
00111     UNPROTECT(1);
00112     return(ans);
00113 }
00114 
00115 
00130 void C_GlobalTest(const SEXP learnsample, const SEXP weights, 
00131                   SEXP fitmem, const SEXP varctrl, 
00132                   const SEXP gtctrl, const double minsplit, 
00133                   double *ans_teststat, double *ans_criterion, int depth) {
00134 
00135     int ninputs, nobs, j, i, k, RECALC = 1, type;
00136     SEXP responses, inputs, y, x, xmem, expcovinf;
00137     SEXP thiswhichNA, Smtry;
00138     double *thisweights, *dweights, *pvaltmp, stweights = 0.0;
00139     int *ithiswhichNA, RANDOM, mtry, *randomvar, *index;
00140     int *dontuse, *dontusetmp;
00141     
00142     ninputs = get_ninputs(learnsample);
00143     nobs = get_nobs(learnsample);
00144     responses = GET_SLOT(learnsample, PL2_responsesSym);
00145     inputs = GET_SLOT(learnsample, PL2_inputsSym);
00146     dweights = REAL(weights);
00147     
00148     /* y = get_transformation(responses, 1); */
00149     y = get_test_trafo(responses);
00150     
00151     expcovinf = GET_SLOT(fitmem, PL2_expcovinfSym);
00152     C_ExpectCovarInfluence(REAL(y), ncol(y), REAL(weights), 
00153                            nobs, expcovinf);
00154     
00155     if (REAL(GET_SLOT(expcovinf, PL2_sumweightsSym))[0] < minsplit) {
00156         for (j = 0; j < ninputs; j++) {
00157             ans_teststat[j] = 0.0;
00158             ans_criterion[j] = 0.0;
00159         }
00160     } else {
00161 
00162         dontuse = INTEGER(get_dontuse(fitmem));
00163         dontusetmp = INTEGER(get_dontusetmp(fitmem));
00164     
00165         for (j = 0; j < ninputs; j++) dontusetmp[j] = !dontuse[j];
00166     
00167         /* random forest */
00168         RANDOM = get_randomsplits(gtctrl);
00169         Smtry = get_mtry(gtctrl);
00170         if (LENGTH(Smtry) == 1) {
00171             mtry = INTEGER(Smtry)[0];
00172         } else {
00173             /* mtry may vary with tree depth */
00174             depth = (depth <= LENGTH(Smtry)) ? depth : LENGTH(Smtry);
00175             mtry = INTEGER(get_mtry(gtctrl))[depth - 1];
00176             Rprintf("using mtry %d\n", mtry);
00177         }
00178         if (RANDOM & (mtry > ninputs)) {
00179             warning("mtry is larger than ninputs, using mtry = inputs");
00180             mtry = ninputs;
00181             RANDOM = 0;
00182         }
00183         if (RANDOM) {
00184             index = Calloc(ninputs, int);
00185             randomvar = Calloc(mtry, int);
00186             C_SampleNoReplace(index, ninputs, mtry, randomvar);
00187             j = 0;
00188             for (k = 0; k < mtry; k++) {
00189                 j = randomvar[k];
00190                 while(dontuse[j] && j < ninputs) j++;
00191                 if (j == ninputs) 
00192                     error("not enough variables to sample from");
00193                 dontusetmp[j] = 0;
00194             }
00195             Free(index);
00196             Free(randomvar);
00197         }
00198 
00199         for (j = 1; j <= ninputs; j++) {
00200 
00201             if ((RANDOM && dontusetmp[j - 1]) || dontuse[j - 1]) {
00202                 ans_teststat[j - 1] = 0.0;
00203                 ans_criterion[j - 1] = 0.0;
00204                 continue; 
00205             }
00206         
00207             x = get_transformation(inputs, j);
00208 
00209             xmem = get_varmemory(fitmem, j);
00210             if (!has_missings(inputs, j)) {
00211                 C_LinStatExpCov(REAL(x), ncol(x), REAL(y), ncol(y),
00212                                 REAL(weights), nrow(x), !RECALC, expcovinf,
00213                                 xmem);
00214             } else {
00215                 thisweights = C_tempweights(j, weights, fitmem, inputs);
00216 
00217                 /* check if minsplit criterion is still met 
00218                    in the presence of missing values
00219                    bug spotted by Han Lee <Han.Lee@geodecapital.com>
00220                        fixed 2006-08-31
00221                 */
00222                 stweights = 0.0;
00223                 for (i = 0; i < nobs; i++) stweights += thisweights[i];
00224                 if (stweights < minsplit) {
00225                     ans_teststat[j - 1] = 0.0;
00226                     ans_criterion[j - 1] = 0.0;
00227                     continue; 
00228                 }
00229 
00230                 C_LinStatExpCov(REAL(x), ncol(x), REAL(y), ncol(y),
00231                                 thisweights, nrow(x), RECALC, 
00232                                 GET_SLOT(xmem, PL2_expcovinfSym),
00233                                 xmem);
00234             }
00235 
00236             if (get_teststat(varctrl) == 2)
00237                 C_LinStatExpCovMPinv(xmem, get_tol(varctrl));
00238             C_TeststatCriterion(xmem, varctrl, &ans_teststat[j - 1], 
00239                                 &ans_criterion[j - 1]);
00240         }                
00241 
00242         type = get_testtype(gtctrl);
00243         switch(type) {
00244             /* Bonferroni: p_adj = 1 - (1 - p)^k */
00245             case BONFERRONI: 
00246                     for (j = 0; j < ninputs; j++)
00247                         ans_criterion[j] = R_pow_di(ans_criterion[j], ninputs);
00248                     break;
00249             /* Monte-Carlo */
00250             case MONTECARLO: 
00251                     pvaltmp = Calloc(ninputs, double);
00252                     C_MonteCarlo(ans_criterion, learnsample, weights, fitmem, 
00253                                  varctrl, gtctrl, pvaltmp);
00254                     for (j = 0; j < ninputs; j++)
00255                         ans_criterion[j] = 1 - pvaltmp[j];
00256                     Free(pvaltmp);
00257                     break;
00258             /* aggregated */
00259             case AGGREGATED: 
00260                     error("C_GlobalTest: aggregated global test not yet implemented");
00261                     break;
00262             /* raw */
00263             case UNIVARIATE: break;
00264             case TESTSTATISTIC: break;
00265             default: error("C_GlobalTest: undefined value for type argument");
00266                      break;
00267         }
00268     }
00269 }
00270 
00271 
00281 SEXP R_GlobalTest(SEXP learnsample, SEXP weights, SEXP fitmem, 
00282                   SEXP varctrl, SEXP gtctrl) {
00283 
00284     SEXP ans, teststat, criterion;
00285 
00286     GetRNGstate();
00287 
00288     PROTECT(ans = allocVector(VECSXP, 2));
00289     SET_VECTOR_ELT(ans, 0, 
00290         teststat = allocVector(REALSXP, get_ninputs(learnsample)));
00291     SET_VECTOR_ELT(ans, 1, 
00292         criterion = allocVector(REALSXP, get_ninputs(learnsample)));
00293 
00294     C_GlobalTest(learnsample, weights, fitmem, varctrl, gtctrl, 0, 
00295                  REAL(teststat), REAL(criterion), 1);
00296                  
00297     PutRNGstate();
00298     
00299     UNPROTECT(1);
00300     return(ans);
00301 }

Generated on Tue Jun 16 09:15:19 2009 for party by  doxygen 1.5.8