export class ArticleRelevance {
	articles = [];
	lang = 'en';
	constructor(articles, lang) {
		this.articles = [...articles];
		if (lang) {
			this.lang = lang;
		}
	}

	search(terms) {
		const searchTerms = terms.split(' ').filter((t) => !!t);
		let articles = [];

		for (const term of searchTerms) {
			if (term) {
				articles = this.calculateArticleTermScore(term, this.articles);
			}
		}

		articles = articles
			.filter((a) => a.tf)
			.sort((a, b) => {
				if (a.score > 0) {
					return a.score < b.score ? 1 : -1;
				}

				return a.tf < b.tf ? 1 : -1;
			});

		return [...articles];
	}

	calculateArticleTermScore(term, articles) {
		term = term.toLowerCase();
		const termFrequencyMap = new Map();
		// Keep tracks in how many documents the term appears
		let appearanceCounter = 0;

		// Calculate term frequency
		for (const article of articles) {
			const corpus = this.getArticleCorpus(article);
			const termFrequency = ArticleRelevance.getTermFrequency(
				term,
				corpus
			);
			if (termFrequency > 0) {
				appearanceCounter += 1;
			}
			termFrequencyMap.set(article.id, termFrequency);
		}

		/*Calculate the inverse document frequency (idf)
			log(N / counter)
			Where:
			- N is the total of articles
			- counter the article where the term appears at least once
		*/
		const idf = Math.log(articles.length / appearanceCounter);

		for (let i = 0; i < articles.length; i++) {
			const article = articles[i];
			const tf = termFrequencyMap.get(article.id);

			const tfidf = tf * idf;

			// Increate the score value
			articles[i].tf = articles[i].tf ? articles[i].tf + tf : tf;
			articles[i].score = articles[i].score
				? articles[i].score + tfidf
				: tfidf;
		}

		return articles;
	}

	getArticleCorpus(article) {
		const words = [];
		if (!article.components || !article.components.length) {
			return words;
		}

		for (const component of article.components) {
			if (component['text_lang']?.[this.lang]) {
				let sentence = component['text_lang']?.[this.lang];
				// Remove HTML and non alphanumeric values
				sentence = sentence
					.replace(/<[^>]*>?/gm, '')
					.replace(/[^\w\s!?]/g, ' ');
				for (const word of sentence.split(' ')) {
					// Make sure there is not empty word
					if (word) {
						words.push(word.toLowerCase());
					}
				}
				continue;
			}

			if (component.component === 'columns') {
				for (const column of component.columns) {
					const corpus = this.getColumnCorpus(column);
					for (const word of corpus) {
						words.push(word);
					}
				}
			}
		}
		return words;
	}

	getColumnCorpus(column) {
		const words = [];
		for (const component of column) {
			if (component['text_lang']?.[this.lang]) {
				let sentence = component['text_lang']?.[this.lang];
				// Remove HTML and non alphanumeric values
				sentence = sentence
					.replace(/<[^>]*>?/gm, '')
					.replace(/[^\w\s!?]/g, ' ');
				for (const word of sentence.split(' ')) {
					// Make sure there is not empty word
					if (word) {
						words.push(word.toLowerCase());
					}
				}
			}
		}

		return words;
	}

	static getTermFrequency(term, words) {
		if (!words.length) {
			return 0;
		}

		let frequency = 0;
		for (const word of words) {
			if (word === term) {
				frequency += 1;
			}
		}
		return frequency / words.length;
	}
}

export function getRelevance(terms, articles, lang) {
	const searchTerms = terms.split(' ').filter((t) => !!t);

	for (const term of searchTerms) {
		if (term) {
			articles = getTermScore(term, articles, lang);
		}
	}

	const response = articles
		.filter((a) => a.tf)
		.sort((a, b) => {
			if (a.score > 0) {
				return a.score < b.score ? 1 : -1;
			}

			return a.tf < b.tf ? 1 : -1;
		});

	return [...response];
}

function getTermScore(term, articles, lang) {
	term = term.toLowerCase();
	const termFrequencyMap = new Map();
	// Keep tracks in how many documents the term appears
	let appearanceCounter = 0;

	// Calculate term frequency
	for (const article of articles) {
		const corpus = getArticleCorpus(article, lang);
		const termFrequency = getTermFrequency(term, corpus);
		if (termFrequency > 0) {
			appearanceCounter += 1;
		}
		termFrequencyMap.set(article.id, termFrequency);
	}

	/*Calculate the inverse document frequency (idf)
		log(N / counter)
		Where:
		- N is the total of articles
		- counter the article where the term appears at least once
	*/
	const idf = Math.log(articles.length / appearanceCounter);

	for (let i = 0; i < articles.length; i++) {
		const article = articles[i];
		const tf = termFrequencyMap.get(article.id);

		const tfidf = tf * idf;

		// Increate the score value
		articles[i].tf = articles[i].tf ? articles[i].tf + tf : tf;
		articles[i].score = articles[i].score
			? articles[i].score + tfidf
			: tfidf;
	}

	return articles;
}

function getTermFrequency(term, words) {
	if (!words.length) {
		return 0;
	}

	let frequency = 0;
	for (const word of words) {
		if (word === term) {
			frequency += 1;
		}
	}
	return frequency / words.length;
}

function getArticleCorpus(article, lang) {
	const words = [];
	if (!article.components || !article.components.length) {
		return words;
	}

	for (const component of article.components) {
		if (component['text_lang']?.[lang]) {
			let sentence = component['text_lang']?.[lang];
			// Remove HTML and non alphanumeric values
			sentence = sentence
				.replace(/<[^>]*>?/gm, '')
				.replace(/[^\w\s!?]/g, ' ');
			for (const word of sentence.split(' ')) {
				// Make sure there is not empty word
				if (word) {
					words.push(word.toLowerCase());
				}
			}
			continue;
		}

		if (component.component === 'columns') {
			for (const column of component.columns) {
				const corpus = getColumnCorpus(column, lang);
				for (const word of corpus) {
					words.push(word);
				}
			}
		}
	}
	return words;
}

function getColumnCorpus(column, lang) {
	const words = [];
	for (const component of column) {
		if (component['text_lang']?.[lang]) {
			let sentence = component['text_lang']?.[lang];
			// Remove HTML and non alphanumeric values
			sentence = sentence
				.replace(/<[^>]*>?/gm, '')
				.replace(/[^\w\s!?]/g, ' ');
			for (const word of sentence.split(' ')) {
				// Make sure there is not empty word
				if (word) {
					words.push(word.toLowerCase());
				}
			}
		}
	}

	return words;
}
