/*
	Base class for tag recommenders

	Author:   Steffen Rendle, http://www.libfm.org/
	modified: 2010-12-10

	Copyright 2010 Steffen Rendle, see license.txt for more information
*/

#ifndef TAGRECOMMENDER_H_
#define TAGRECOMMENDER_H_

#include <vector>
#include <assert.h>
#include <math.h>

struct WeightedTag {
	int tag_id;
	double weight;	
};

bool operator<(const WeightedTag& a, const WeightedTag& b) {
    return a.weight < b.weight;
}


class TagRecommender {
	public:
		TagRecommender() { N = 10; HLU_N = 5; }

		int N;
		int HLU_N;
		
		// abstract methods to be implemented in base class
		virtual void train(Dataset& dataset) = 0;
		virtual double predict(int user_id, int item_id, int tag_id) = 0;

		// implemented methods by TagRecommender
		double evaluate(Dataset* dataset) {
			double f5 = 0;
			return evaluate(dataset, f5);
		}
		double evaluate(Dataset* dataset, double& f5);
		virtual void predictTopTags(int user_id, int item_id, WeightedTag* tags, int num_tags);
		virtual void saveModel(std::string filename) {};	
		virtual void loadModel(std::string filename) {};	
		virtual SparseTensorDouble predict(SparseMatrixBoolean& posts, int num_tags, int max_tags_per_post_out);
		void savePrediction(SparseMatrixBoolean& posts, const std::string& filename, int num_tags, int max_tags_per_post_out);
		inline virtual void learn(int user_id, int item_id, int tp_id, int tn_id) {};
		virtual void auto_save(int iteration) {};
};

double TagRecommender::evaluate(Dataset* dataset, double& f5) {

	int num_post = 0;
	double sum_auc = 0;
	double sum_hlu = 0;	
	DVector<double> sum_precision(N);
	DVector<double> sum_recall(N);
	sum_precision.init(0.0);
	sum_recall.init(0.0);
	
	int num_tags = dataset->max_tag_id+1;
	bool* is_positive = new bool[num_tags];
	
	WeightedTag* weighted_tag = new WeightedTag[num_tags];

	for(SparseTensorBoolean::const_iterator u = dataset->test_data.begin(); u != dataset->test_data.end(); ++u) {
		for(SparseMatrixBoolean::const_iterator i = u->second.begin(); i != u->second.end(); ++i) {
			int user_id = u->first;
			int item_id = i->first;
			// evaluate on (user_id, item_id)
			for (int t = 0; t < num_tags; t++) {
				weighted_tag[t].tag_id = t;
			}
			predictTopTags(user_id, item_id, weighted_tag, num_tags);

			// compute auc
			for (int t = 0; t < num_tags; t++) {
				is_positive[t] = false;
			}
			for(SparseVectorBoolean::const_iterator t = i->second.begin(); t != i->second.end(); ++t) {
				is_positive[*t] = true;
			}
			int auc_pair_count = 0;
			double auc_pair_correct = 0;
			for(SparseVectorBoolean::const_iterator t = i->second.begin(); t != i->second.end(); ++t) {
				int tag_positive = *t;
				for (int tag_negative = 0; tag_negative < num_tags; tag_negative++) {
					if (is_positive[tag_negative]) {
						continue;
					}
					auc_pair_count++;
					if (weighted_tag[tag_positive].weight > weighted_tag[tag_negative].weight) {
						auc_pair_correct += 1;
					} else if (weighted_tag[tag_positive].weight == weighted_tag[tag_negative].weight) {
						auc_pair_correct += 0.5;	
					}
				}
			}
			sum_auc += ((double) auc_pair_correct/ auc_pair_count);

			// sort
			std::sort(weighted_tag, weighted_tag+num_tags);
			// evaluate F1-Measure
			int t_p = 0;
			for (int t = 0; t < N; t++) {
				// look if this tag is in the users tag list
				if (is_positive[weighted_tag[num_tags-t-1].tag_id]) {
					t_p++;
				}
				sum_precision(t) += (double) t_p/(t+1);
				sum_recall(t) += (double) t_p / i->second.size(); 
			}
			// evaluate HLU:
			double post_hlu = 0;
			for (int t = 0; t < num_tags; t++) {
				if (is_positive[weighted_tag[num_tags-t-1].tag_id]) {
					double p = (double)t/(HLU_N-1);
					post_hlu += pow(2, -p);	
				}
			}
			double normalizer = 0;
			for (uint t = 0; t < i->second.size(); t++) {
				double p = (double)t/(HLU_N-1);
				normalizer += pow(2, -p);
			}
			post_hlu /= normalizer;
			sum_hlu += post_hlu;
			
			num_post++;
		}
	}
	
	double avg_auc = sum_auc / num_post;
	double avg_hlu = sum_hlu / num_post;
  	std::cout << "HLU/AUC/FPR1.." << N << ": " << avg_hlu << "/" << avg_auc;
  	for (int i = 0; i < N ; i++) {
  		double prec = sum_precision(i)/num_post;
  		double recall = sum_recall(i)/num_post;
  		double fm = (2*prec*recall) / (prec+recall);
  		std::cout << "/" << fm << "/" << sum_precision(i)/num_post << "/" << sum_recall(i)/num_post;
		if (i == 4) { f5 = fm; }
  	}
  	std::cout << std::endl;
	
	delete [] is_positive;
	delete [] weighted_tag;
	
	return avg_auc;	
}


void TagRecommender::predictTopTags(int user_id, int item_id, WeightedTag* tags, int num_tags) {
	for (int t = 0; t < num_tags; t++) {
		tags[t].weight = predict(user_id, item_id, tags[t].tag_id);
	}
}


SparseTensorDouble TagRecommender::predict(SparseMatrixBoolean& posts, int num_tags, int max_tags_per_post_out) {
	WeightedTag* weighted_tag = new WeightedTag[num_tags];
	SparseTensorDouble prediction;
	
	for (SparseMatrixBoolean::const_iterator u = posts.begin(); u != posts.end(); ++u) {
		for (SparseVectorBoolean::const_iterator i = u->second.begin(); i != u->second.end(); ++i) {
			int user_id = u->first;
			int item_id = *i;
			for (int t = 0; t < num_tags; t++) {
				weighted_tag[t].tag_id = t;
				weighted_tag[t].weight = 0;
			}				
			predictTopTags(user_id, item_id, weighted_tag, num_tags);
			std::sort(weighted_tag, weighted_tag+num_tags);
	
			for (int t = 0; t < std::min(num_tags, max_tags_per_post_out); t++) {
				prediction[user_id][item_id][weighted_tag[num_tags-t-1].tag_id] = weighted_tag[num_tags-t-1].weight;
			}
		}
	}
	delete[] weighted_tag;
	return prediction;
}


void TagRecommender::savePrediction(SparseMatrixBoolean& posts, const std::string& filename, int num_tags, int max_tags_per_post_out) {
	SparseTensorDouble prediction = predict(posts, num_tags, max_tags_per_post_out);		
	prediction.toFile(filename);	
}		
		


#endif /*TAGRECOMMENDER_H_*/
