# # COPYRIGHT # # xclass2kde.awk - Outputs statistics on the results of xclass2kde. # Copyright (C) 2012 Exstrom Laboratories LLC # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # A copy of the GNU General Public License is available on the internet at: # http://www.gnu.org/copyleft/gpl.html # # or you can write to: # # The Free Software Foundation, Inc. # 675 Mass Ave # Cambridge, MA 02139, USA # # Exstrom Laboratories LLC contact: # stefan(AT)exstrom.com # # Exstrom Laboratories LLC # Longmont, CO 80503, USA # # # xclass2kde.awk reads output of xclass2kde as well as a # classification file of what actually happened and # a threshold probability, then outputs statistics. # The output statistics is based on the calculation # of 4 values: tp, fp, fn, tn. # The meaning of these 4 values can be understood in terms # of this table: # # Actual # | P | N # ------------- # Predicted P | TP | FP # ------------- # N | FN | TN # # If we predicted P and the actual was P, then TP is incremented. # If we predicted N and the actual was N, then TN is incremented. # If we predicted P and the actual was N, then FP is incremented. # If we predicted N and the actual was P, then FN is incremented. # The following statistics in 2 lines are output: # *First line contains 4 integers: tp, fp, fn, tn # *Second line contains 3 floating points: # 1) True positive rate or sensitivity (TPR) # TPR = TP/(TP+FN) = probability of predicting P given that actual is P. # 2) False positive rate or sensitivity (FPR) # FPR = FP/(FP+TN) = probability of predicting P given that actual is N. # 3) Accuracy # Accuracy = (TP+TN)/(TP+TN+FP+FN) # Example: # awk -f xclass2kde.awk -v thr=0.5 xclass2kde.out resp_spy2.dat BEGIN { FS="[ ,]" # field separator is space, or comma tp=0; fp=0; fn=0; tn=0 } FILENAME == ARGV[1] { # current file is 1st input file (output of xclass2kde) prob[FNR]=$1 } FILENAME == ARGV[2] { # current file is 2nd input file (classification or response file) class[FNR]=$1 } END { for(i = 1; i <= FNR; i++) { if(prob[i]>thr) printf("1 ") else printf("0 ") if((prob[i]>thr)&&(class[i]==1)) tp+=1 else if((prob[i]<=thr)&&(class[i]==0)) tn+=1 else if((prob[i]>thr)&&(class[i]==0)) fp+=1 else fn+=1 } # end for printf("\n%d %d %d %d\n",tp,fp,fn,tn) printf("%1.5lf %1.5lf %1.5lf\n",tp/(tp+fn),fp/(fp+tn),(tp+tn)/(tp+tn+fp+fn)) } # end END