Logistic (Log) Loss Function

(Redirected from Log Loss)
Jump to: navigation, search

A Logistic (Log) Loss Function is a convex loss function that is a log function.



def logloss(true_label, predicted, eps=1e-15):
 p = np.clip(predicted, eps, 1 - eps)
 if true_label == 1:
   return -log(p)
   return -log(1 - p)




from math import log 
def log_loss(predicted, target): if len(predicted) != len(target): print 'lengths not equal!' return target = [float(x) for x in target] # make sure all float values predicted = [min([max([x,1e-15]),1-1e-15]) for x in predicted] # within (0,1) interval return -(1.0/len(target))*sum([target[i]*log(predicted[i]) + \ (1.0-target[i])*log(1.0-predicted[i]) \ for i in xrange(len(target))])
if __name__=='__main__': # if you run at the command line as 'python utils.py' actual = [0, 1, 1, 1, 1, 0, 0, 1, 0, 1] pred = [0.24160452, 0.41107934, 0.37063768, 0.48732519, 0.88929869, 0.60626423, 0.09678324, 0.38135864, 0.20463064, 0.21945892] print log_loss(pred,actual)


def log_loss(solution, prediction, task = 'binary.classification'):
    Log loss for binary and multiclass. 
   [sample_num, label_num] = solution.shape
   eps = 1e-15
   pred = np.copy(prediction) # beware: changes in prediction occur through this
   sol = np.copy(solution)
   if (task == 'multiclass.classification') and (label_num>1):
       # Make sure the lines add up to one for multi-class classification
       norma = np.sum(prediction, axis=1)
       for k in range(sample_num):
           pred[k,:] /= sp.maximum (norma[k], eps) 
       # Make sure there is a single label active per line for multi-class classification
       sol = binarize_predictions(solution, task='multiclass.classification')
       # For the base prediction, this solution is ridiculous in the multi-label case
   # Bounding of predictions to avoid log(0),1/0,...
   pred = sp.minimum (1-eps, sp.maximum (eps, pred))
   # Compute the log loss    
   pos_class_log_loss = - mvmean(sol*np.log(pred), axis=0)
   if (task != 'multiclass.classification') or (label_num==1):
       # The multi-label case is a bunch of binary problems.
       # The second class is the negative class for each column.
       neg_class_log_loss = - mvmean((1-sol)*np.log(1-pred), axis=0)
       log_loss = pos_class_log_loss + neg_class_log_loss
       # Each column is an independent problem, so we average.
       # The probabilities in one line do not add up to one.
       # log_loss = mvmean(log_loss) 
       # print('binary {}'.format(log_loss))
       # In the multilabel case, the right thing i to AVERAGE not sum
       # We return all the scores so we can normalize correctly later on
       # For the multiclass case the probabilities in one line add up one.
       log_loss = pos_class_log_loss
       # We sum the contributions of the columns.
       log_loss = np.sum(log_loss) 
       #print('multiclass {}'.format(log_loss))
   return log_loss