#!/usr/bin/python

import numpy
from numpy import array
from numpy import random

def pr_2hist(Hpos, Hneg, n = None, normalize = True) :
    """Compute the ROC curve and area under the curve for a two class problem

	:Parameters:
      - `Hpos`,`Hneg` - histograms of positive and negative class scores
	  - `n` - the number of false positives to take into account (roc_n)
	  - `targetClass` - the "positive" class
      - `normalize` whether to normalize the roc curve (default: True)
        when this is set to False, TP/FP counts are output rather than TP/FP rates
            
    """
    assert(len(Hpos)==len(Hneg))
    
    if n is not None and n < 1 :
        n = sum(Hneg)

    tp = [0.0]
    fp = [0.0]
    
    I = xrange(len(Hpos))
    
    for hpos,hneg in zip(Hpos,Hneg):
        if (hneg == 0):
                tp[-1] += hpos
        else:
                tp.append(tp[-1] + hpos)
                fp.append(fp[-1] + hneg)

    numTP = float(sum(Hpos))
    numFP = float(sum(Hneg))

    tp=array(tp,float)
    fp=array(fp,float)

    prec = tp / (tp+fp+1e-10)
    rec  = tp / numTP

    return prec,rec


from numpy import array
def roc_2hist(Hpos, Hneg, n = None, normalize = True) :
    """Compute the ROC curve and area under the curve for a two class problem

	:Parameters:
      - `Hpos`,`Hneg` - histograms of positive and negative class scores
	  - `n` - the number of false positives to take into account (roc_n)
	  - `targetClass` - the "positive" class
      - `normalize` whether to normalize the roc curve (default: True)
        when this is set to False, TP/FP counts are output rather than TP/FP rates
            
    """
    assert(len(Hpos)==len(Hneg))
    
    if n is not None and n < 1 :
        n = sum(Hneg)

    tp = [0.0]
    fp = [0.0]
    
    I = xrange(len(Hpos))
    
    for hpos,hneg in zip(Hpos,Hneg):
        if (hneg == 0):
                tp[-1] += hpos
        else:
                tp.append(tp[-1] + hpos)
                fp.append(fp[-1] + hneg)

    numTP = float(sum(Hpos))
    numFP = float(sum(Hneg))
    
    if normalize: 
        for i in range(len(tp)):
            if numTP>0: tp[i] /= numTP
        for i in range(len(fp)):
            if numFP>0: fp[i] /= numFP

    area=0
    for i in range(1,len(tp)):
        area += (tp[i]+tp[i-1])/2*(fp[i]-fp[i-1])
#    area/=len(fp)
    
    
    return array(tp),array(fp), area



def roc(Y, givenY, decisionFunc, n = None, targetClass = 1, normalize = True) :
    """Compute the ROC curve and area under the curve for a two class problem

	:Parameters:
      - `Y` - the predicted labels (can put None instead)
      - `givenY` - the true labels
	  - `decisionFunc` - the values of the decision function
	  - `n` - the number of false positives to take into account (roc_n)
	  - `targetClass` - the "positive" class
      - `normalize` whether to normalize the roc curve (default: True)
        when this is set to False, TP/FP counts are output rather than TP/FP rates
            
    """
    
    if n is not None and n < 1 :
        n = int(n * numpy.sum(numpy.not_equal(givenY, targetClass)))

    I = range(len(decisionFunc))
    random.shuffle(I)
    decisionFunc = [decisionFunc[i] for i in I]
    givenY = [givenY[i] for i in I]
    f = numpy.array(decisionFunc)

    tp = [0.0]
    fp = [0.0]
    I = numpy.argsort(-f)
    
    for patternIdx in I :
        if givenY[patternIdx] == targetClass :
            tp[-1] += 1
        else :
            tp.append(tp[-1])
            fp.append(fp[-1] + 1.0)
        if n is not None and fp[-1] >= n :
            break

    numTP = numpy.sum(numpy.equal(givenY, targetClass))
    
    if normalize : 
        for i in range(len(tp)):
            #if tp[-1] > 0 : tp[i] /= float(tp[-1])
            if tp[-1] > 0 : tp[i] /= float(numTP)
        for i in range(len(fp)) :
            if fp[-1] > 0 : fp[i] /= float(fp[-1])

        area = numpy.sum(tp) / len(tp)

    else :
        area = numpy.sum(tp) / (len(tp) * numTP)

    return tp,fp, area


def precision_recall(Y, givenY, decisionFunc, n = None, targetClass = 1, normalize = True) :
    """Compute the ROC curve and area under the curve for a two class problem

	:Parameters:
      - `Y` - the predicted labels (can put None instead)
      - `givenY` - the true labels
	  - `decisionFunc` - the values of the decision function
	  - `n` - the number of false positives to take into account (roc_n)
	  - `targetClass` - the "positive" class
      - `normalize` whether to normalize the roc curve (default: True)
        when this is set to False, TP/FP counts are output rather than TP/FP rates
            
    """
    
    if n is not None and n < 1:
        n = int(n * numpy.sum(numpy.not_equal(givenY, targetClass)))

    I = range(len(decisionFunc))
    random.shuffle(I)
    decisionFunc = [decisionFunc[i] for i in I]
    givenY = [givenY[i] for i in I]
    f = numpy.array(decisionFunc)

    tp = [0.0]
    fp = [0.0]
    I = numpy.argsort(-f)
    
    for patternIdx in I :
        if givenY[patternIdx] == targetClass :
            tp[-1] += 1
        else :
            tp.append(tp[-1])
            fp.append(fp[-1] + 1.0)
        if n is not None and fp[-1] >= n :
            break

    numTP = numpy.sum(numpy.equal(givenY, targetClass))
    
    if normalize: 
        for i in range(len(tp)):
            if tp[-1] > 0 : tp[i] /= float(numTP)
        for i in range(len(fp)) :
            if fp[-1] > 0 : fp[i] /= float(fp[-1])

        area = numpy.sum(tp) / len(tp)

    else :
        area = numpy.sum(tp) / (len(tp) * numTP)

    return tp,fp, area


if __name__ == "__main__":
    from numpy import loadtxt
    import sys
    
    try:
        inputfile = sys.argv[1]
    except IndexError:
        inputfile = sys.stdin
    
    pred = loadtxt(inputfile)
    tp,fp,auc=roc(None,pred[:,0],pred[:,1])
    #print "AUC: ",auc
    print "AUC: ",auc
