#
# COPYRIGHT
#
# xclass2kde.awk - Outputs statistics on the results of xclass2kde.
# Copyright (C) 2012 Exstrom Laboratories LLC
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available on the internet at:
# http://www.gnu.org/copyleft/gpl.html
#
# or you can write to:
#
# The Free Software Foundation, Inc.
# 675 Mass Ave
# Cambridge, MA 02139, USA
#
# Exstrom Laboratories LLC contact:
# stefan(AT)exstrom.com
#
# Exstrom Laboratories LLC
# Longmont, CO 80503, USA
#
#
# xclass2kde.awk reads output of xclass2kde as well as a
# classification file of what actually happened and
# a threshold probability, then outputs statistics.
# The output statistics is based on the calculation
# of 4 values: tp, fp, fn, tn.
# The meaning of these 4 values can be understood in terms
# of this table:
#
# Actual
# | P | N
# -------------
# Predicted P | TP | FP
# -------------
# N | FN | TN
#
# If we predicted P and the actual was P, then TP is incremented.
# If we predicted N and the actual was N, then TN is incremented.
# If we predicted P and the actual was N, then FP is incremented.
# If we predicted N and the actual was P, then FN is incremented.
# The following statistics in 2 lines are output:
# *First line contains 4 integers: tp, fp, fn, tn
# *Second line contains 3 floating points:
# 1) True positive rate or sensitivity (TPR)
# TPR = TP/(TP+FN) = probability of predicting P given that actual is P.
# 2) False positive rate or sensitivity (FPR)
# FPR = FP/(FP+TN) = probability of predicting P given that actual is N.
# 3) Accuracy
# Accuracy = (TP+TN)/(TP+TN+FP+FN)
# Example:
# awk -f xclass2kde.awk -v thr=0.5 xclass2kde.out resp_spy2.dat
BEGIN {
FS="[ ,]" # field separator is space, or comma
tp=0; fp=0; fn=0; tn=0
}
FILENAME == ARGV[1] { # current file is 1st input file (output of xclass2kde)
prob[FNR]=$1
}
FILENAME == ARGV[2] { # current file is 2nd input file (classification or response file)
class[FNR]=$1
}
END {
for(i = 1; i <= FNR; i++) {
if(prob[i]>thr)
printf("1 ")
else
printf("0 ")
if((prob[i]>thr)&&(class[i]==1))
tp+=1
else if((prob[i]<=thr)&&(class[i]==0))
tn+=1
else if((prob[i]>thr)&&(class[i]==0))
fp+=1
else fn+=1
} # end for
printf("\n%d %d %d %d\n",tp,fp,fn,tn)
printf("%1.5lf %1.5lf %1.5lf\n",tp/(tp+fn),fp/(fp+tn),(tp+tn)/(tp+tn+fp+fn))
} # end END