/*======================================================================| | Wilcoxon test program. | |-----------------------------------------------------------------------| |This program is used to calculate the Wilcoxon test based on the data | |input from a text file. | |-----------------------------------------------------------------------| |The input text file consist of two columns of floating numbers. | |THERE HAS TO BE A NEW LINE AT THE END OF THE TEXT FILE. | |-----------------------------------------------------------------------| |This program is complied in yallara machine with the following command | | cc -o -lm | |======================================================================*/ #include #include #include #include /* #define VERBOSE */ #define TRUE 1 #define FALSE -1 #define UNKNOWN 0 #define MAX_ELEMENT 10000 typedef struct { unsigned char queryno; /* query number */ float method1; /* raw data of method 1*/ float method2; /* raw data of method 2*/ float difference; /* the difference of the two method */ float signedrank; /* signed rank for each difference */ } WILCOXON; /*-------------------------------------------------------------------------- the below structure is used mainly for the purpose of calculating the unsig- ned rank associate with each absolute difference difference. --------------------------------------------------------------------------*/ typedef struct { /* ------------------------------------------------------------------------- the difference member is used to store the absolute value of each of the difference copied from the WILCOXON structure. --------------------------------------------------------------------------*/ float difference; /*------------------------------------------------------------------------ the unsignedrank member is used to store the unsigned rank associate with each value store in the difference member. --------------------------------------------------------------------------*/ float unsignedrank; /*------------------------------------------------------------------------- essentially the validqueryno is the serial number assign to the difference member after the array of this structure is sorted in increasing order based on the difference. We need to store this information because it is need to calculate the unsigned rank. That is for all the difference that are equal, we need to add all the validqueryno together and divide this by by the frequen- cy of the equal differences, the quotient is the unsinged rank. --------------------------------------------------------------------------*/ int validqueryno; } UNSIGNEDRANK; /*------------------------------------------------------------------------- If the value of CONFIDENCE is set to FALSE that means method 1 is not better than method 2. If it is set to TRUE then method 1 is better than method 2. --------------------------------------------------------------------------*/ int CONFIDENCE; /*------------------------------------------------------------------------- conf_level is used to store the confidence level specified at the command line. It is a float because the confidence level can be 97.5% etc. --------------------------------------------------------------------------*/ char conf_level[80]; /*------------------------------------------------------------------------- This (WilIdx) is the number of nonzero test. ---------------------------------------------------------------------------*/ int WilIdx = -1, /*------------------------------------------------------------------------- This (resultIdx) is the index to the resultUns[] array. Effectively, its value indicated how many unsigned rank we have for the number of nonzero test. However, this information is not important. ---------------------------------------------------------------------------*/ resultIdx = -1; /*--------------------------------------------------------------------------- This (LevelOfConfi) stores the conf_level% level of confidence. ----------------------------------------------------------------------------*/ float LevelOfConfi = 0.0, /*--------------------------------------------------------------------------- This (SumOfSignedRank) stores the sum of the signed rank. ----------------------------------------------------------------------------*/ SumOfSignedRank = 0.0, /*--------------------------------------------------------------------------- This (StdDev) stores the standard deviation. ----------------------------------------------------------------------------*/ StdDev = 0.0; /*--------------------------------------------------------------------------- The array absof is used to store the absolute difference copied from the WilWork array. The unsigned rank associate with each difference, the serial number of each difference, after the array is sorted based on the value of difference. The primary purpose of having this array is to calculate the frequency of each of the difference. ----------------------------------------------------------------------------*/ UNSIGNEDRANK absof[MAX_ELEMENT], /*-------------------------------------------------------------------------- This array is used to store the difference and the unsigned rank associate with each difference. The difference between this array and the absof array is that: absof array contains the whole number of difference, whereas this array contain only each number of difference. For example: absof may contain the following difference: 1.0, 1.0, 1.0, then resultUns contain only 1.0 and the unsigned rank for 1.0. The purpose of doing this is to make it easier the assignment of signed rank to the signedrank member of the WilWork array. ----------------------------------------------------------------------------*/ resultUns[MAX_ELEMENT]; /*---------------------------------------------------------------------------- This array contain the final data for calculation. That is it holds the pair data that generate a non zero difference, togther with the signed rank for this difference. The sum of signed rank is based on the member signedrank in this array. -----------------------------------------------------------------------------*/ WILCOXON WilWork[MAX_ELEMENT]; /*----------------------------------------------------------------------------- This is the input file handler. ------------------------------------------------------------------------------*/ FILE *inptr, *fopen(); /*----------------------------------------------------------------------- | Read the input file and load the data into the memory (that is to the | | global array WilWork). | -----------------------------------------------------------------------*/ void Load_Input() { char buf[80]; float m1, m2; int qno; while (fscanf(inptr, "%f %f", &m1, &m2) == 2) { WilIdx++; /* Move the index to the next position. */ qno = WilIdx; WilWork[WilIdx].queryno = qno+1;/* Index start at 0 so the actual serial = index + 1. */ WilWork[WilIdx].method1 = m1; /* Assign the data to the array. */ WilWork[WilIdx].method2 = m2; } #ifdef VERBOSE printf("\n-------------------------------------------------------------------\n"); printf("The number of data items is --> %d\n", WilIdx + 1); printf("\n-------------------------------------------------------------------\n"); #endif } /*----------------------------------------------------------------------- | Go through the entire array (WilWork) and calculates the contents of | | member "difference". | ----------Comment for the below for loop-------------------------------*/ void Calculate_Difference() { int i; for (i = 0; i <= WilIdx; i++) { WilWork[i].difference = WilWork[i].method1 - WilWork[i].method2; } } /*----------------------------------------------------------------------- Eliminate the zero difference from the WilWork array. Set the number of non zero difference after the operation. -----------------------------------------------------------------------*/ void Eliminate_Zero_Difference() { int i, j, TempWilIdx = -1; WILCOXON WilLocal[MAX_ELEMENT]; /*----------Comment for the below for loop------------------------------- | Loads only the elements that has the member "difference" not equal to | | zero to the WilLocal array. So after the for loop finished execution | | there will only be the elements with member "difference" not equal to | | zero in the array. | -----------------------------------------------------------------------*/ for (i = 0; i <= WilIdx; i++) { /* for */ if (WilWork[i].difference != 0) { /* if */ TempWilIdx++; WilLocal[TempWilIdx] = WilWork[i]; } /* if */ } /* for */ WilIdx = TempWilIdx; /*----------Comment for the below for loop------------------------------- | Now copies the contents of the WilLocal array backs to the WilWork | | global array. So that we discards the elements that have member | | "difference" equal to zero. | -----------------------------------------------------------------------*/ for (i = 0; i <= WilIdx; i++) WilWork[i] = WilLocal[i]; } /*---------------------------------------------------------------------- Turn a negative floating point number to a positive number. This function is easier to use than the abs function. ------------------------------------------------------------------------*/ void Toggle(f) float *f; { float temp; temp = *f; *f = (-(*f) - (*f)) + *f; } /*---------------------------------------------------------------------- Swap the elements at absof[i] and absof[j]. -----------------------------------------------------------------------*/ void swap(i, j) int i; int j; { UNSIGNEDRANK u; u = absof[i]; absof[i] = absof[j]; absof[j] = u; } /*----------------------------------------------------------------------- | Sort the absof global array in increasing order based on the content| | of the member "difference". | -----------------------------------------------------------------------*/ void Sort_Difference(left, right) int left; int right; { int i, last; if (left >= right) return; swap(left, (left + right) / 2); last = left; for (i = left+1; i <= right; i++) if (absof[i].difference < absof[left].difference) swap(++last, i); swap(left, last); Sort_Difference(left, last-1); Sort_Difference(last+1, right); } /*--------------------------------------------------------------------------- | Load the member "difference" from WilWork[] to absof in order to calculate| | the rank. After the absof array is loaded, all the negative floating | | number will be made positve, this is requried, so that the unsigned rank | | can be calculated. After that, the signed and unsigned rank will be | | to the corresponding "signedrank" in WilWork[]. | ---------------------------------------------------------------------------*/ void Load_Absolute_Difference() { int i; for (i = 0; i <= WilIdx; i++) { absof[i].difference = WilWork[i].difference; if (absof[i].difference < 0.0) Toggle(&absof[i].difference); } } /*------------------------------------------------------------------------------------ Calculate the unsigned rank for each of the absolute difference in the absof array. The method for calculation is as follow: add the serial number (validqueryno) for all the equal difference together and divide this total by the frequency of the difference. The quotient is the unsigned rank. ------------------------------------------------------------------------------------*/ void Calculate_Unsigned_Rank() { int i, vqn, j, freq = 0, total = 0, i1, i2; float r, r1, quotient, remainder; /*----------------------------------------------------------------------------------- Turn all the negative difference from WilWork array to positive one and store them in the absof array. -------------------------------------------------------------------------------------*/ Load_Absolute_Difference(); /*----------------------------------------------------------------------------------- Sort the absof array based on the difference. -------------------------------------------------------------------------------------*/ Sort_Difference(0, WilIdx); /*------------------------------------------------------------------------------------ After the array is sorted assign the serial number to each of the element in the array. ------------------------------------------------------------------------------------*/ for (i = 0; i <= WilIdx; i++) { vqn = i; absof[i].validqueryno = vqn + 1; } for (i = 0, j = i + 1; i <= WilIdx; i++, j++) { r = absof[i].difference - absof[j].difference; r1 = r * 100000; i1 = (int) r1; } /*----------------------------------------------------------------------------------- The loop for calculate the unsigned rank. -----------------------------------------------------------------------------------*/ for (i = 0, j = i + 1; i <= WilIdx; i++, j++) { /* for */ r = absof[i].difference - absof[j].difference; /*The three statement is used to find */ r1 = r * 100000; /*out if the two adjacent elements*/ i1 = (int) r1; /*equal or not.*/ if (i1 == 0) /* If they are equal then */ { /* i1 == 0 */ freq++; /* increment the frequency and add */ total = total + absof[i].validqueryno; /* the serial number to the total. */ } /* i1 == 0 */ else if (r1 != 0) /* If they are not equal then */ { /* i1 != 0 */ freq++; /* Increment the frequency for the element in front */ resultIdx++; /* increment the index for storing the unsigned rank in the resultUns array*/ total = total + absof[i].validqueryno; /* calculate the new total for this diffrence. */ resultUns[resultIdx].difference = absof[i].difference; /* Calculate the unsigned rank and assigned it to the array. */ quotient = total / freq; r = total % freq; remainder = r / freq; resultUns[resultIdx].unsignedrank = quotient + remainder; /* Reset freq and total for the next element in the absof. */ freq = 0; total = 0; } /* i1 != 0 */ } /* for */ } /*------------------------------------------------------------------------ Calculate the signed for each of the difference in the WilWork array. ------------------------------------------------------------------------*/ void Assign_Signed_Rank() { float r1, r2; int i, j, i1, i2; for (i = 0; i <= resultIdx; i++) /* go through every element in the resultUns array */ { /* i for loop */ for (j = 0; j <= WilIdx; j++) /* go through every element in the WilWork array */ { /* j for loop */ r1 = WilWork[j].difference; if (r1 < 0.0) Toggle(&r1); /* prepare for the comparision resultUns[i].difference == WilWork[j].difference */ r2 = (r1 - resultUns[i].difference) * 100000; i1 = (int) r2; if (i1 == 0) /* now resultUns[i].difference == fabs(WilWork[j].difference) */ { /* i1 == 0 */ if (WilWork[j].difference > 0.0) /* if WilWork[j].difference is + then assign resultUns[i].unsignedrank */ WilWork[j].signedrank = resultUns[i].unsignedrank; /* to WilWork[j].signedrank */ else if (WilWork[j].difference < 0.0) /* otherwise assign the - resultUns[i].unsignedrank to */ WilWork[j].signedrank = 0.0 - resultUns[i].unsignedrank; /* WilWork[j].signedrank */ } /* i1 == 0 */ } /* j for loop */ } /* i for loop */ } /*-------------------------------------------------------------------------------- Calculate the: + Sum of the signed rank. + conf_level% level of confidence. Also based on the result of calculate set the value of CONFIDENCE to determine whether method1 is better than method2 or not. ---------------------------------------------------------------------------------*/ void Calculate_Final_Data() { float t1, t2, t3, cum_prob, res; int i, totaltest; for (i = 0; i <= WilIdx; i++) SumOfSignedRank = SumOfSignedRank + WilWork[i].signedrank; totaltest = WilIdx + 1; t1 = totaltest * (totaltest + 1); t2 = (totaltest * 2) + 1; t3 = (t1 * t2) / 6; StdDev = sqrt(t3); if (strcmp(conf_level, "90.0") == 0) cum_prob = 1.282; else if (strcmp(conf_level, "95.0") == 0) cum_prob = 1.645; else if (strcmp(conf_level, "97.5") == 0)cum_prob = 1.960; else if (strcmp(conf_level, "98.0") == 0)cum_prob = 2.054; else if (strcmp(conf_level, "99.0") == 0)cum_prob = 2.326; else if (strcmp(conf_level, "99.5") == 0)cum_prob = 2.576; else if (strcmp(conf_level, "99.9") == 0)cum_prob = 3.090; /*--------------------------------------------------------------------------- If there is a third command and it is something other those specified above; then copy the 95% confidence level into cum_prob as default, and also copy the string "95.0" into conf_level, to be displayed. ----------------------------------------------------------------------------*/ else { strcpy(conf_level, "95.0"); cum_prob = 1.645; } LevelOfConfi = cum_prob * StdDev; if (fabs(SumOfSignedRank) > LevelOfConfi) { if (SumOfSignedRank > 0) CONFIDENCE = TRUE; /* method1 is better than method2 */ else CONFIDENCE = FALSE; /* method2 is better than method1 */ } else CONFIDENCE = UNKNOWN; /* Cannot distinguish between methods */ } /*------------------------------------------------------------------------------ Write the entire WilWork array and other results such as: sum of signed rank, conf_level% level of confidence, etc, to whatever the output device specified by the users. ------------------------------------------------------------------------------*/ void Write_Result_To_File() { int i; #ifdef VERBOSE printf("QueryNo Method 1 Method 2 Difference Signed Rank\n"); printf("--------------------------------------------------------------------\n"); for (i = 0; i <= WilIdx; i++) printf("%3d %8.4f %8.4f %8.4f %8.1f\n", WilWork[i].queryno, WilWork[i].method1, WilWork[i].method2, WilWork[i].difference, WilWork[i].signedrank); printf("--------------------------------------------------------------------\n"); #endif printf("The number of nonzero tests is --> %d\n", WilIdx + 1); printf("The sum of the signed rank is --> %f\n", SumOfSignedRank); printf("The %s",conf_level); printf("%% level of confidence is --> %f\n", LevelOfConfi); if (CONFIDENCE == FALSE) printf("\nMethod 2 is better than method 1\n"); else if (CONFIDENCE == TRUE) printf("\nMethod 1 is better than method 2\n"); else printf("\nMethods cannot be separated.\n"); } /*------------------------------------------------------------------ Check if user specified the input data file. If not just exit the program. -------------------------------------------------------------------*/ void Check_Command_Line(ComCount, List) int ComCount; char *List[]; { if (ComCount < 2) { printf("\n\nPlease give me input file name next time you run me\n\n"); printf("Usage:\n"); printf("------\n"); printf("%s input file name [Confidence level]\n", List[0]); printf("or %s input file name [Confidence level] > output file name\n\n", List[0]); printf("Available Confidence level are: 90.0, 95.0, 97.5, 98.0, 99.0, 99.5, 99.9.\n\n"); printf("Please Note that:\n"); printf(" + If you do not specify the confidence level, 95"); putchar(37); printf(" will be used.\n"); printf(" + The exact confidence level number must be entered (i.e. 90.0, or 97.5).\n\n\n"); exit(1); } else if (ComCount == 2) strcpy(conf_level, "95.0"); else if (ComCount == 3) strcpy(conf_level, List[2]); else if (ComCount > 3) { printf(""); strcpy(conf_level, List[2]); printf("\n\n\n\nWarning\n"); printf("-------\n"); printf("Your command is to be executed. But I do not understand the\n"); printf("parameter(s) after the third one.\n\n\n"); printf("\n\nPlease Press to continue."); getchar(); } } void main(argc, argv) int argc; char *argv[]; { Check_Command_Line(argc,argv); if( strcmp(argv[1], "-") == 0 ) /* take stdin */ inptr = stdin; else inptr = fopen(argv[1], "r"); if( inptr == NULL ) { fprintf(stderr, "File not found\n"); exit(1); } Load_Input(); Calculate_Difference(); Eliminate_Zero_Difference(); Calculate_Unsigned_Rank(); Assign_Signed_Rank(); Calculate_Final_Data(); Write_Result_To_File(); fclose(inptr); }