"""
@version: 0.1
@author: Blade He
@license: Morningstar 
@contact: blade.he@morningstar.com
@site: 
@software: PyCharm
@file: Similarity.py
@time: 2019/03/20
"""
from math import *
from decimal import Decimal
import math
import re


class Similarity:
    """ Five similarity measures function """
    def euclidean_distance(self, x, y):
        """ return euclidean distance between two lists """

        return sqrt(sum(pow(a - b, 2) for a, b in zip(x, y)))

    def manhattan_distance(self, x, y):
        """ return manhattan distance between two lists """

        return sum(abs(a - b) for a, b in zip(x, y))

    def minkowski_distance(self, x, y, p_value):
        """ return minkowski distance between two lists """

        return self.nth_root(sum(pow(abs(a - b), p_value) for a, b in zip(x, y)), p_value)

    def nth_root(self, value, n_root):
        """ returns the n_root of an value """

        root_value = 1 / float(n_root)
        return round(Decimal(value) ** Decimal(root_value), 3)

    def cosine_similarity(self, x, y):
        """ return cosine similarity between two lists """

        numerator = sum(a * b for a, b in zip(x, y))
        denominator = self.square_rooted(x) * self.square_rooted(y)
        return round(numerator / float(denominator), 3)

    def square_rooted(self, x):
        """ return 3 rounded square rooted value """

        return round(sqrt(sum([a * a for a in x])), 3)

    def jaccard_similarity(self, x: list, y: list):
        """ returns the jaccard similarity between two lists """
        intersection_cardinality = len(set.intersection(*[set(x), set(y)]))
        union_cardinality = len(set.union(*[set(x), set(y)]))
        if union_cardinality == 0:
            return 0
        return intersection_cardinality / float(union_cardinality)


    def y_in_x_similarity(self, x: list, y: list):
        """ returns the jaccard similarity between two lists """
        intersection_cardinality = len(set.intersection(*[set(x), set(y)]))
        len_y = len(set(y))
        if len_y == 0:
            return 0
        return intersection_cardinality / float(len_y)
    
    def compare_text_in_text_list_similarity(self, text: str, compare_text_list: list):
        if text is None or len(text) == 0:
            return 0
        if compare_text_list is None or len(compare_text_list) == 0:
            return 0
        # remove specical case for text
        text = text.lower()

        # Fix issue for matching fund feeder
        # It's the case for the following text:
        # Raw fund name: Schroders Capital UK Real Estate Fund Feeder Trust
        # Fund name list in database:
        # Schroder UK Real Estate Fund Feeder Trust
        # Schroders Capital UK Real Estate Fund
        # The matching should be Schroder UK Real Estate Fund Feeder Trust.
        # But somehow, the matching is Schroders Capital UK Real Estate Fund, 
        # it's incorrect.
        if "feeder" in text.split():
            need_tranform = False
            for compare in compare_text_list:
                if "feeder" in compare.lower().split():
                    need_tranform = True
                    break
            if need_tranform:
                temp_max_similarity = 0
                temp_max_similarity_text = ""
                for compare in compare_text_list:
                    compare = compare.lower()
                    if "feeder" in compare.split(): 
                        similarity = self.y_in_x_similarity(text.split(), compare.split())
                        if similarity > temp_max_similarity:
                            temp_max_similarity = similarity
                            temp_max_similarity_text = compare
                if temp_max_similarity > 0:
                    text = temp_max_similarity_text

        text = re.sub(r'\W', ' ', text)
        text = re.sub(r'\s+', ' ', text)
        text_split = list(set([word for word in text.split() 
                               if word.lower() not in ["name", "fund", "funds"]]))
        if len(text_split) == 0:
            return 0, ""
        max_similarity = 0
        max_similarity_text = ""
        max_similarity_text_split = []
        for comapare_text in compare_text_list:
            updated_comapare_text = comapare_text.lower()
            updated_comapare_text = re.sub(r'\W', ' ', updated_comapare_text)
            updated_comapare_text = re.sub(r'\s+', ' ', updated_comapare_text)
            comapare_text_split = list(set([word for word in updated_comapare_text.split()
                                           if word.lower() not in ["name", "fund", "funds"]]))
            if len(comapare_text_split) == 0:
                continue
            similarity = self.y_in_x_similarity(text_split, comapare_text_split)
            if similarity > 0 and similarity == max_similarity:
                if len(comapare_text_split) > len(max_similarity_text_split):
                    max_similarity_text = comapare_text
                    max_similarity_text_split = comapare_text_split
            if similarity > max_similarity:
                max_similarity = similarity
                max_similarity_text = comapare_text
                max_similarity_text_split = comapare_text_split
            
        return max_similarity, max_similarity_text


    def edit_distance_similarity(self, left: str, right: str):
        m, n = len(left) + 1, len(right) + 1
        # create a matrix (m*n)
        matrix = [[0] * n for i in range(m)]
        matrix[0][0] = 0
        for i in range(1, m):
            matrix[i][0] = matrix[i - 1][0] + 1

        for j in range(1, n):
            matrix[0][j] = matrix[0][j - 1] + 1

        # for i in range(m):
        #     print(matrix[i])
        #
        # print()
        "********************"
        for i in range(1, m):
            for j in range(1, n):
                if left[i - 1] == right[j - 1]:
                    cost = 0
                else:
                    cost = 1

                matrix[i][j] = min(matrix[i - 1][j] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j - 1] + cost)

        # for i in range(m):
        #     print(matrix[i])

        distance = matrix[m - 1][n - 1]
        return 1 - distance / max(len(left), len(right))