#
# COPYRIGHT
#
# class2kde.awk - Outputs statistics on the results of class2kde.
# Copyright (C) 2012 Exstrom Laboratories LLC
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# A copy of the GNU General Public License is available on the internet at:
# http://www.gnu.org/copyleft/gpl.html
#
# or you can write to:
#
# The Free Software Foundation, Inc.
# 675 Mass Ave
# Cambridge, MA 02139, USA
#
# Exstrom Laboratories LLC contact:
# stefan(AT)exstrom.com
#
# Exstrom Laboratories LLC
# Longmont, CO 80503, USA
#
#
# class2kde.awk reads unsorted output of class2kde,
# as well as a threshold probability value, and outputs
# statistics. The output statistics is based on the calculation
# of 4 values: TP, FP, FN, TN.
# The meaning of these 4 values can be understood in terms
# of this table:
#
# Actual
# | P | N
# -------------
# Predicted P | TP | FP
# -------------
# N | FN | TN
#
# If we predicted P and the actual was P, then TP is incremented.
# If we predicted N and the actual was N, then TN is incremented.
# If we predicted P and the actual was N, then FP is incremented.
# If we predicted N and the actual was P, then FN is incremented.
# The following statistics in 2 lines are output:
# *First line contains 4 integers: TP, FP, FN, TN
# *Second line contains 3 floating point numbers:
# 1) True positive rate or sensitivity (TPR)
# TPR = TP/(TP+FN) = probability of predicting P given that actual is P.
# 2) False positive rate or sensitivity (FPR)
# FPR = FP/(FP+TN) = probability of predicting P given that actual is N.
# 3) Accuracy
# Accuracy = (TP+TN)/(TP+TN+FP+FN)
# Example:
# awk -f class2kde.awk -v thr=0.5 class2kde.out
BEGIN {
FS="[ ,]" # field separator is space, or comma
tp=0; fp=0; fn=0; tn=0
}
{ /* This code runs for all records */
prob[FNR]=$1
class[FNR]=$2
if((prob[FNR]>thr)&&(class[FNR]==1))
tp+=1
else if((prob[FNR]<=thr)&&(class[FNR]==0))
tn+=1
else if((prob[FNR]>thr)&&(class[FNR]==0))
fp+=1
else fn+=1
}
END {
printf("%d %d %d %d\n",tp,fp,fn,tn)
printf("%1.5lf %1.5lf %1.5lf\n",tp/(tp+fn),fp/(fp+tn),(tp+tn)/(tp+tn+fp+fn))
} # end END