kneeliverse.knee_ranking

The following module provides a set of methods used for ranking knees from multi-knees approaches.

  1# coding: utf-8
  2
  3'''
  4The following module provides a set of methods
  5used for ranking knees from multi-knees approaches.
  6'''
  7
  8__author__ = 'Mário Antunes'
  9__version__ = '0.1'
 10__email__ = 'mario.antunes@ua.pt'
 11__status__ = 'Development'
 12__license__ = 'MIT'
 13__copyright__ = '''
 14Copyright (c) 2021-2023 Stony Brook University
 15Copyright (c) 2021-2023 The Research Foundation of SUNY
 16'''
 17
 18import math
 19import enum
 20import logging
 21import numpy as np
 22import kneeliverse.linear_fit as lf
 23import kneeliverse.evaluation as ev
 24
 25
 26logger = logging.getLogger(__name__)
 27
 28
 29class ClusterRanking(enum.Enum):
 30    """
 31    Enum data type that represents the direction of the ranking within a cluster.
 32    """
 33    left = 'left'
 34    linear = 'linear'
 35    right = 'right'
 36    hull = 'hull'
 37
 38    def __str__(self):
 39        return self.value
 40
 41
 42def distances(point:np.ndarray, points:np.ndarray) -> np.ndarray:
 43    """
 44    Computes the euclidean distance from a single point to a vector of points.
 45
 46    Args:
 47        point (np.ndarray): the point
 48        points (np.ndarray): the vector of points
 49    
 50    Returns:
 51        np.ndarray: a vector with the distances from point to all the points. 
 52    """
 53    return np.sqrt(np.sum(np.power(points - point, 2), axis=1))
 54
 55
 56def rect_overlap(amin: np.ndarray, amax: np.ndarray, bmin: np.ndarray, bmax: np.ndarray) -> float:
 57    """
 58    Computes the percentage of the overlap for two rectangles.
 59
 60    Args:
 61        amin (np.ndarray): the low point in rectangle A
 62        amax (np.ndarray): the high point in rectangle A
 63        bmin (np.ndarray): the low point in rectangle B
 64        bmax (np.ndarray): the high point in rectangle B
 65
 66    Returns:
 67        float: percentage of the overlap of two rectangles
 68    """
 69    #logger.info('%s %s %s %s', amin, amax, bmin, bmax)
 70    dx = max(0.0, min(amax[0], bmax[0]) - max(amin[0], bmin[0]))
 71    dy = max(0.0, min(amax[1], bmax[1]) - max(amin[1], bmin[1]))
 72    #logger.info('dx %s dy %s', dx, dy)
 73    overlap = dx * dy
 74    #logger.info('overlap = %s', overlap)
 75    if overlap > 0.0:
 76        a = np.abs(amax-amin)
 77        b = np.abs(bmax-bmin)
 78        total_area = a[0]*a[1] + b[0]*b[1] - overlap
 79        #print(f'overlap area = {overlap} total area =  {total_area}')
 80        return overlap / total_area
 81    else:
 82        return 0.0
 83
 84
 85def rect(p1: np.ndarray, p2: np.ndarray) -> tuple:
 86    """
 87    Creates the low and high rectangle coordinates from 2 points.
 88
 89    Args:
 90        p1 (np.ndarray): one of the points in the rectangle
 91        p2 (np.ndarray): one of the points in the rectangle
 92
 93    Returns:
 94        tuple: tuple with two points (low and high)
 95    """
 96    p1x, p1y = p1
 97    p2x, p2y = p2
 98    return np.array([min(p1x, p2x), min(p1y, p2y)]), np.array([max(p1x, p2x), max(p1y, p2y)])
 99
100
101def distance_to_similarity(array: np.ndarray) -> np.ndarray:
102    """
103    Converts an array of distances into an array of similarities.
104
105    Args:
106        array (np.ndarray): array with distances values
107
108    Returns:
109        np.ndarray: an array with similarity values
110    """
111    return max(array) - array
112
113
114def rank(array: np.ndarray) -> np.ndarray:
115    """
116    Computes the rank of an array of values.
117
118    Args:
119        array (np.ndarray): array with values
120
121    Returns:
122        np.ndarray: an array with the ranks of each value
123    """
124    temp = array.argsort()
125    ranks = np.empty_like(temp)
126    ranks[temp] = np.arange(len(array))
127    return ranks
128
129
130def slope_ranking(points: np.ndarray, knees: np.ndarray, t: float = 0.8) -> np.ndarray:
131    """
132    Computes the rank of a set of knees in a curve.
133
134    The ranking is based on the slope of the left of the knee point.
135    The left neighbourhood is computed based on the R2 metric.
136    
137    Args:
138        points (np.ndarray): numpy array with the points (x, y)
139        knees (np.ndarray): knees indexes
140        t (float): the R2 threshold for the neighbourhood (default 0.8)
141
142    Returns:
143        np.ndarray: an array with the ranks of each value
144    """
145    # corner case
146    if len(knees) == 1.0:
147        rankings = np.array([1.0])
148    else:
149        rankings = []
150
151        x = points[:, 0]
152        y = points[:, 1]
153
154        _, _, slope = ev.get_neighbourhood(x, y, knees[0], 0, t)
155        rankings.append(math.fabs(slope))
156
157        for i in range(1, len(knees)):
158            _, _, slope = ev.get_neighbourhood(x, y, knees[i], knees[i-1], t)
159            rankings.append(math.fabs(slope))
160
161        rankings = np.array(rankings)
162        rankings = rank(rankings)
163        # Min Max normalization
164        if len(rankings) > 1:
165            rankings = (rankings - np.min(rankings))/np.ptp(rankings)
166        else:
167            rankings = np.array([1.0])
168
169    return rankings
170
171
172def smooth_ranking(points: np.ndarray, knees: np.ndarray, t: ClusterRanking) -> np.ndarray:
173    """
174    Computes the rank for a cluster of knees in a curve.
175
176    The ranking is a weighted raking based on the Y axis improvement and the
177    slope/smoothed of the curve.
178    This methods tries to find the best knee within a cluster of knees, this
179    means that the boundaries for the computation are based on the cluster dimention.
180
181    Args:
182        points (np.ndarray): numpy array with the points (x, y)
183        knees (np.ndarray): knees indexes
184        t (ClusterRanking): selects the direction where the curve must be smooth
185
186    Returns:
187        np.ndarray: an array with the ranks of each value
188    """
189
190    x = points[:, 0]
191    y = points[:, 1]
192
193    fit = []
194    weights = []
195    
196    j = knees[0]
197    peak = np.max(y[knees])
198
199    # TODO: find a better approximation, for example SMAPE
200    for i in range(0, len(knees)):
201        # R2 score
202        r2 = 0
203        if t is ClusterRanking.linear:
204            r2_left = lf.r2(x[j:knees[i]+1], y[j:knees[i]+1])
205            r2_right = lf.r2(x[knees[i]:knees[-1]], y[knees[i]:knees[-1]])
206            r2 = (r2_left + r2_right) / 2.0
207        elif t is ClusterRanking.left:
208            r2 = lf.r2(x[j:knees[i]+1], y[j:knees[i]+1])
209        else:
210            r2 = lf.r2(x[knees[i]:knees[-1]], y[knees[i]:knees[-1]])
211        fit.append(r2)
212
213        # height of the segment
214        d = math.fabs(peak - y[knees[i]])
215        weights.append(d)
216
217    #weights.append(0)
218    weights = np.array(weights)
219    #fit.append(0)
220    fit = np.array(fit)
221
222    #max_weights = np.max(weights)
223    # if max_weights != 0:
224    #    weights = weights / max_weights
225
226    sum_weights = np.sum(weights)
227    if sum_weights != 0:
228        weights = weights / sum_weights
229
230    #logger.info(f'Fit & Weights {fit} / {weights}')
231
232    rankings = fit * weights
233
234    #logger.info(f'Smooth Ranking {rankings}')
235
236    return rankings
logger = <Logger kneeliverse.knee_ranking (WARNING)>
class ClusterRanking(enum.Enum):
30class ClusterRanking(enum.Enum):
31    """
32    Enum data type that represents the direction of the ranking within a cluster.
33    """
34    left = 'left'
35    linear = 'linear'
36    right = 'right'
37    hull = 'hull'
38
39    def __str__(self):
40        return self.value

Enum data type that represents the direction of the ranking within a cluster.

left = <ClusterRanking.left: 'left'>
linear = <ClusterRanking.linear: 'linear'>
right = <ClusterRanking.right: 'right'>
hull = <ClusterRanking.hull: 'hull'>
Inherited Members
enum.Enum
name
value
def distances(point: numpy.ndarray, points: numpy.ndarray) -> numpy.ndarray:
43def distances(point:np.ndarray, points:np.ndarray) -> np.ndarray:
44    """
45    Computes the euclidean distance from a single point to a vector of points.
46
47    Args:
48        point (np.ndarray): the point
49        points (np.ndarray): the vector of points
50    
51    Returns:
52        np.ndarray: a vector with the distances from point to all the points. 
53    """
54    return np.sqrt(np.sum(np.power(points - point, 2), axis=1))

Computes the euclidean distance from a single point to a vector of points.

Arguments:
  • point (np.ndarray): the point
  • points (np.ndarray): the vector of points
Returns:

np.ndarray: a vector with the distances from point to all the points.

def rect_overlap( amin: numpy.ndarray, amax: numpy.ndarray, bmin: numpy.ndarray, bmax: numpy.ndarray) -> float:
57def rect_overlap(amin: np.ndarray, amax: np.ndarray, bmin: np.ndarray, bmax: np.ndarray) -> float:
58    """
59    Computes the percentage of the overlap for two rectangles.
60
61    Args:
62        amin (np.ndarray): the low point in rectangle A
63        amax (np.ndarray): the high point in rectangle A
64        bmin (np.ndarray): the low point in rectangle B
65        bmax (np.ndarray): the high point in rectangle B
66
67    Returns:
68        float: percentage of the overlap of two rectangles
69    """
70    #logger.info('%s %s %s %s', amin, amax, bmin, bmax)
71    dx = max(0.0, min(amax[0], bmax[0]) - max(amin[0], bmin[0]))
72    dy = max(0.0, min(amax[1], bmax[1]) - max(amin[1], bmin[1]))
73    #logger.info('dx %s dy %s', dx, dy)
74    overlap = dx * dy
75    #logger.info('overlap = %s', overlap)
76    if overlap > 0.0:
77        a = np.abs(amax-amin)
78        b = np.abs(bmax-bmin)
79        total_area = a[0]*a[1] + b[0]*b[1] - overlap
80        #print(f'overlap area = {overlap} total area =  {total_area}')
81        return overlap / total_area
82    else:
83        return 0.0

Computes the percentage of the overlap for two rectangles.

Arguments:
  • amin (np.ndarray): the low point in rectangle A
  • amax (np.ndarray): the high point in rectangle A
  • bmin (np.ndarray): the low point in rectangle B
  • bmax (np.ndarray): the high point in rectangle B
Returns:

float: percentage of the overlap of two rectangles

def rect(p1: numpy.ndarray, p2: numpy.ndarray) -> tuple:
86def rect(p1: np.ndarray, p2: np.ndarray) -> tuple:
87    """
88    Creates the low and high rectangle coordinates from 2 points.
89
90    Args:
91        p1 (np.ndarray): one of the points in the rectangle
92        p2 (np.ndarray): one of the points in the rectangle
93
94    Returns:
95        tuple: tuple with two points (low and high)
96    """
97    p1x, p1y = p1
98    p2x, p2y = p2
99    return np.array([min(p1x, p2x), min(p1y, p2y)]), np.array([max(p1x, p2x), max(p1y, p2y)])

Creates the low and high rectangle coordinates from 2 points.

Arguments:
  • p1 (np.ndarray): one of the points in the rectangle
  • p2 (np.ndarray): one of the points in the rectangle
Returns:

tuple: tuple with two points (low and high)

def distance_to_similarity(array: numpy.ndarray) -> numpy.ndarray:
102def distance_to_similarity(array: np.ndarray) -> np.ndarray:
103    """
104    Converts an array of distances into an array of similarities.
105
106    Args:
107        array (np.ndarray): array with distances values
108
109    Returns:
110        np.ndarray: an array with similarity values
111    """
112    return max(array) - array

Converts an array of distances into an array of similarities.

Arguments:
  • array (np.ndarray): array with distances values
Returns:

np.ndarray: an array with similarity values

def rank(array: numpy.ndarray) -> numpy.ndarray:
115def rank(array: np.ndarray) -> np.ndarray:
116    """
117    Computes the rank of an array of values.
118
119    Args:
120        array (np.ndarray): array with values
121
122    Returns:
123        np.ndarray: an array with the ranks of each value
124    """
125    temp = array.argsort()
126    ranks = np.empty_like(temp)
127    ranks[temp] = np.arange(len(array))
128    return ranks

Computes the rank of an array of values.

Arguments:
  • array (np.ndarray): array with values
Returns:

np.ndarray: an array with the ranks of each value

def slope_ranking( points: numpy.ndarray, knees: numpy.ndarray, t: float = 0.8) -> numpy.ndarray:
131def slope_ranking(points: np.ndarray, knees: np.ndarray, t: float = 0.8) -> np.ndarray:
132    """
133    Computes the rank of a set of knees in a curve.
134
135    The ranking is based on the slope of the left of the knee point.
136    The left neighbourhood is computed based on the R2 metric.
137    
138    Args:
139        points (np.ndarray): numpy array with the points (x, y)
140        knees (np.ndarray): knees indexes
141        t (float): the R2 threshold for the neighbourhood (default 0.8)
142
143    Returns:
144        np.ndarray: an array with the ranks of each value
145    """
146    # corner case
147    if len(knees) == 1.0:
148        rankings = np.array([1.0])
149    else:
150        rankings = []
151
152        x = points[:, 0]
153        y = points[:, 1]
154
155        _, _, slope = ev.get_neighbourhood(x, y, knees[0], 0, t)
156        rankings.append(math.fabs(slope))
157
158        for i in range(1, len(knees)):
159            _, _, slope = ev.get_neighbourhood(x, y, knees[i], knees[i-1], t)
160            rankings.append(math.fabs(slope))
161
162        rankings = np.array(rankings)
163        rankings = rank(rankings)
164        # Min Max normalization
165        if len(rankings) > 1:
166            rankings = (rankings - np.min(rankings))/np.ptp(rankings)
167        else:
168            rankings = np.array([1.0])
169
170    return rankings

Computes the rank of a set of knees in a curve.

The ranking is based on the slope of the left of the knee point. The left neighbourhood is computed based on the R2 metric.

Arguments:
  • points (np.ndarray): numpy array with the points (x, y)
  • knees (np.ndarray): knees indexes
  • t (float): the R2 threshold for the neighbourhood (default 0.8)
Returns:

np.ndarray: an array with the ranks of each value

def smooth_ranking( points: numpy.ndarray, knees: numpy.ndarray, t: ClusterRanking) -> numpy.ndarray:
173def smooth_ranking(points: np.ndarray, knees: np.ndarray, t: ClusterRanking) -> np.ndarray:
174    """
175    Computes the rank for a cluster of knees in a curve.
176
177    The ranking is a weighted raking based on the Y axis improvement and the
178    slope/smoothed of the curve.
179    This methods tries to find the best knee within a cluster of knees, this
180    means that the boundaries for the computation are based on the cluster dimention.
181
182    Args:
183        points (np.ndarray): numpy array with the points (x, y)
184        knees (np.ndarray): knees indexes
185        t (ClusterRanking): selects the direction where the curve must be smooth
186
187    Returns:
188        np.ndarray: an array with the ranks of each value
189    """
190
191    x = points[:, 0]
192    y = points[:, 1]
193
194    fit = []
195    weights = []
196    
197    j = knees[0]
198    peak = np.max(y[knees])
199
200    # TODO: find a better approximation, for example SMAPE
201    for i in range(0, len(knees)):
202        # R2 score
203        r2 = 0
204        if t is ClusterRanking.linear:
205            r2_left = lf.r2(x[j:knees[i]+1], y[j:knees[i]+1])
206            r2_right = lf.r2(x[knees[i]:knees[-1]], y[knees[i]:knees[-1]])
207            r2 = (r2_left + r2_right) / 2.0
208        elif t is ClusterRanking.left:
209            r2 = lf.r2(x[j:knees[i]+1], y[j:knees[i]+1])
210        else:
211            r2 = lf.r2(x[knees[i]:knees[-1]], y[knees[i]:knees[-1]])
212        fit.append(r2)
213
214        # height of the segment
215        d = math.fabs(peak - y[knees[i]])
216        weights.append(d)
217
218    #weights.append(0)
219    weights = np.array(weights)
220    #fit.append(0)
221    fit = np.array(fit)
222
223    #max_weights = np.max(weights)
224    # if max_weights != 0:
225    #    weights = weights / max_weights
226
227    sum_weights = np.sum(weights)
228    if sum_weights != 0:
229        weights = weights / sum_weights
230
231    #logger.info(f'Fit & Weights {fit} / {weights}')
232
233    rankings = fit * weights
234
235    #logger.info(f'Smooth Ranking {rankings}')
236
237    return rankings

Computes the rank for a cluster of knees in a curve.

The ranking is a weighted raking based on the Y axis improvement and the slope/smoothed of the curve. This methods tries to find the best knee within a cluster of knees, this means that the boundaries for the computation are based on the cluster dimention.

Arguments:
  • points (np.ndarray): numpy array with the points (x, y)
  • knees (np.ndarray): knees indexes
  • t (ClusterRanking): selects the direction where the curve must be smooth
Returns:

np.ndarray: an array with the ranks of each value