# # COPYRIGHT # # class2kde.awk - Outputs statistics on the results of class2kde. # Copyright (C) 2012 Exstrom Laboratories LLC # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # A copy of the GNU General Public License is available on the internet at: # http://www.gnu.org/copyleft/gpl.html # # or you can write to: # # The Free Software Foundation, Inc. # 675 Mass Ave # Cambridge, MA 02139, USA # # Exstrom Laboratories LLC contact: # stefan(AT)exstrom.com # # Exstrom Laboratories LLC # Longmont, CO 80503, USA # # # class2kde.awk reads unsorted output of class2kde, # as well as a threshold probability value, and outputs # statistics. The output statistics is based on the calculation # of 4 values: TP, FP, FN, TN. # The meaning of these 4 values can be understood in terms # of this table: # # Actual # | P | N # ------------- # Predicted P | TP | FP # ------------- # N | FN | TN # # If we predicted P and the actual was P, then TP is incremented. # If we predicted N and the actual was N, then TN is incremented. # If we predicted P and the actual was N, then FP is incremented. # If we predicted N and the actual was P, then FN is incremented. # The following statistics in 2 lines are output: # *First line contains 4 integers: TP, FP, FN, TN # *Second line contains 3 floating point numbers: # 1) True positive rate or sensitivity (TPR) # TPR = TP/(TP+FN) = probability of predicting P given that actual is P. # 2) False positive rate or sensitivity (FPR) # FPR = FP/(FP+TN) = probability of predicting P given that actual is N. # 3) Accuracy # Accuracy = (TP+TN)/(TP+TN+FP+FN) # Example: # awk -f class2kde.awk -v thr=0.5 class2kde.out BEGIN { FS="[ ,]" # field separator is space, or comma tp=0; fp=0; fn=0; tn=0 } { /* This code runs for all records */ prob[FNR]=\$1 class[FNR]=\$2 if((prob[FNR]>thr)&&(class[FNR]==1)) tp+=1 else if((prob[FNR]<=thr)&&(class[FNR]==0)) tn+=1 else if((prob[FNR]>thr)&&(class[FNR]==0)) fp+=1 else fn+=1 } END { printf("%d %d %d %d\n",tp,fp,fn,tn) printf("%1.5lf %1.5lf %1.5lf\n",tp/(tp+fn),fp/(fp+tn),(tp+tn)/(tp+tn+fp+fn)) } # end END