kneeliverse.knee_ranking
The following module provides a set of methods used for ranking knees from multi-knees approaches.
1# coding: utf-8 2 3''' 4The following module provides a set of methods 5used for ranking knees from multi-knees approaches. 6''' 7 8__author__ = 'Mário Antunes' 9__version__ = '0.1' 10__email__ = 'mario.antunes@ua.pt' 11__status__ = 'Development' 12__license__ = 'MIT' 13__copyright__ = ''' 14Copyright (c) 2021-2023 Stony Brook University 15Copyright (c) 2021-2023 The Research Foundation of SUNY 16''' 17 18import math 19import enum 20import logging 21import numpy as np 22import kneeliverse.linear_fit as lf 23import kneeliverse.evaluation as ev 24 25 26logger = logging.getLogger(__name__) 27 28 29class ClusterRanking(enum.Enum): 30 """ 31 Enum data type that represents the direction of the ranking within a cluster. 32 """ 33 left = 'left' 34 linear = 'linear' 35 right = 'right' 36 hull = 'hull' 37 38 def __str__(self): 39 return self.value 40 41 42def distances(point:np.ndarray, points:np.ndarray) -> np.ndarray: 43 """ 44 Computes the euclidean distance from a single point to a vector of points. 45 46 Args: 47 point (np.ndarray): the point 48 points (np.ndarray): the vector of points 49 50 Returns: 51 np.ndarray: a vector with the distances from point to all the points. 52 """ 53 return np.sqrt(np.sum(np.power(points - point, 2), axis=1)) 54 55 56def rect_overlap(amin: np.ndarray, amax: np.ndarray, bmin: np.ndarray, bmax: np.ndarray) -> float: 57 """ 58 Computes the percentage of the overlap for two rectangles. 59 60 Args: 61 amin (np.ndarray): the low point in rectangle A 62 amax (np.ndarray): the high point in rectangle A 63 bmin (np.ndarray): the low point in rectangle B 64 bmax (np.ndarray): the high point in rectangle B 65 66 Returns: 67 float: percentage of the overlap of two rectangles 68 """ 69 #logger.info('%s %s %s %s', amin, amax, bmin, bmax) 70 dx = max(0.0, min(amax[0], bmax[0]) - max(amin[0], bmin[0])) 71 dy = max(0.0, min(amax[1], bmax[1]) - max(amin[1], bmin[1])) 72 #logger.info('dx %s dy %s', dx, dy) 73 overlap = dx * dy 74 #logger.info('overlap = %s', overlap) 75 if overlap > 0.0: 76 a = np.abs(amax-amin) 77 b = np.abs(bmax-bmin) 78 total_area = a[0]*a[1] + b[0]*b[1] - overlap 79 #print(f'overlap area = {overlap} total area = {total_area}') 80 return overlap / total_area 81 else: 82 return 0.0 83 84 85def rect(p1: np.ndarray, p2: np.ndarray) -> tuple: 86 """ 87 Creates the low and high rectangle coordinates from 2 points. 88 89 Args: 90 p1 (np.ndarray): one of the points in the rectangle 91 p2 (np.ndarray): one of the points in the rectangle 92 93 Returns: 94 tuple: tuple with two points (low and high) 95 """ 96 p1x, p1y = p1 97 p2x, p2y = p2 98 return np.array([min(p1x, p2x), min(p1y, p2y)]), np.array([max(p1x, p2x), max(p1y, p2y)]) 99 100 101def distance_to_similarity(array: np.ndarray) -> np.ndarray: 102 """ 103 Converts an array of distances into an array of similarities. 104 105 Args: 106 array (np.ndarray): array with distances values 107 108 Returns: 109 np.ndarray: an array with similarity values 110 """ 111 return max(array) - array 112 113 114def rank(array: np.ndarray) -> np.ndarray: 115 """ 116 Computes the rank of an array of values. 117 118 Args: 119 array (np.ndarray): array with values 120 121 Returns: 122 np.ndarray: an array with the ranks of each value 123 """ 124 temp = array.argsort() 125 ranks = np.empty_like(temp) 126 ranks[temp] = np.arange(len(array)) 127 return ranks 128 129 130def slope_ranking(points: np.ndarray, knees: np.ndarray, t: float = 0.8) -> np.ndarray: 131 """ 132 Computes the rank of a set of knees in a curve. 133 134 The ranking is based on the slope of the left of the knee point. 135 The left neighbourhood is computed based on the R2 metric. 136 137 Args: 138 points (np.ndarray): numpy array with the points (x, y) 139 knees (np.ndarray): knees indexes 140 t (float): the R2 threshold for the neighbourhood (default 0.8) 141 142 Returns: 143 np.ndarray: an array with the ranks of each value 144 """ 145 # corner case 146 if len(knees) == 1.0: 147 rankings = np.array([1.0]) 148 else: 149 rankings = [] 150 151 x = points[:, 0] 152 y = points[:, 1] 153 154 _, _, slope = ev.get_neighbourhood(x, y, knees[0], 0, t) 155 rankings.append(math.fabs(slope)) 156 157 for i in range(1, len(knees)): 158 _, _, slope = ev.get_neighbourhood(x, y, knees[i], knees[i-1], t) 159 rankings.append(math.fabs(slope)) 160 161 rankings = np.array(rankings) 162 rankings = rank(rankings) 163 # Min Max normalization 164 if len(rankings) > 1: 165 rankings = (rankings - np.min(rankings))/np.ptp(rankings) 166 else: 167 rankings = np.array([1.0]) 168 169 return rankings 170 171 172def smooth_ranking(points: np.ndarray, knees: np.ndarray, t: ClusterRanking) -> np.ndarray: 173 """ 174 Computes the rank for a cluster of knees in a curve. 175 176 The ranking is a weighted raking based on the Y axis improvement and the 177 slope/smoothed of the curve. 178 This methods tries to find the best knee within a cluster of knees, this 179 means that the boundaries for the computation are based on the cluster dimention. 180 181 Args: 182 points (np.ndarray): numpy array with the points (x, y) 183 knees (np.ndarray): knees indexes 184 t (ClusterRanking): selects the direction where the curve must be smooth 185 186 Returns: 187 np.ndarray: an array with the ranks of each value 188 """ 189 190 x = points[:, 0] 191 y = points[:, 1] 192 193 fit = [] 194 weights = [] 195 196 j = knees[0] 197 peak = np.max(y[knees]) 198 199 # TODO: find a better approximation, for example SMAPE 200 for i in range(0, len(knees)): 201 # R2 score 202 r2 = 0 203 if t is ClusterRanking.linear: 204 r2_left = lf.r2(x[j:knees[i]+1], y[j:knees[i]+1]) 205 r2_right = lf.r2(x[knees[i]:knees[-1]], y[knees[i]:knees[-1]]) 206 r2 = (r2_left + r2_right) / 2.0 207 elif t is ClusterRanking.left: 208 r2 = lf.r2(x[j:knees[i]+1], y[j:knees[i]+1]) 209 else: 210 r2 = lf.r2(x[knees[i]:knees[-1]], y[knees[i]:knees[-1]]) 211 fit.append(r2) 212 213 # height of the segment 214 d = math.fabs(peak - y[knees[i]]) 215 weights.append(d) 216 217 #weights.append(0) 218 weights = np.array(weights) 219 #fit.append(0) 220 fit = np.array(fit) 221 222 #max_weights = np.max(weights) 223 # if max_weights != 0: 224 # weights = weights / max_weights 225 226 sum_weights = np.sum(weights) 227 if sum_weights != 0: 228 weights = weights / sum_weights 229 230 #logger.info(f'Fit & Weights {fit} / {weights}') 231 232 rankings = fit * weights 233 234 #logger.info(f'Smooth Ranking {rankings}') 235 236 return rankings
30class ClusterRanking(enum.Enum): 31 """ 32 Enum data type that represents the direction of the ranking within a cluster. 33 """ 34 left = 'left' 35 linear = 'linear' 36 right = 'right' 37 hull = 'hull' 38 39 def __str__(self): 40 return self.value
Enum data type that represents the direction of the ranking within a cluster.
Inherited Members
- enum.Enum
- name
- value
43def distances(point:np.ndarray, points:np.ndarray) -> np.ndarray: 44 """ 45 Computes the euclidean distance from a single point to a vector of points. 46 47 Args: 48 point (np.ndarray): the point 49 points (np.ndarray): the vector of points 50 51 Returns: 52 np.ndarray: a vector with the distances from point to all the points. 53 """ 54 return np.sqrt(np.sum(np.power(points - point, 2), axis=1))
Computes the euclidean distance from a single point to a vector of points.
Arguments:
- point (np.ndarray): the point
- points (np.ndarray): the vector of points
Returns:
np.ndarray: a vector with the distances from point to all the points.
57def rect_overlap(amin: np.ndarray, amax: np.ndarray, bmin: np.ndarray, bmax: np.ndarray) -> float: 58 """ 59 Computes the percentage of the overlap for two rectangles. 60 61 Args: 62 amin (np.ndarray): the low point in rectangle A 63 amax (np.ndarray): the high point in rectangle A 64 bmin (np.ndarray): the low point in rectangle B 65 bmax (np.ndarray): the high point in rectangle B 66 67 Returns: 68 float: percentage of the overlap of two rectangles 69 """ 70 #logger.info('%s %s %s %s', amin, amax, bmin, bmax) 71 dx = max(0.0, min(amax[0], bmax[0]) - max(amin[0], bmin[0])) 72 dy = max(0.0, min(amax[1], bmax[1]) - max(amin[1], bmin[1])) 73 #logger.info('dx %s dy %s', dx, dy) 74 overlap = dx * dy 75 #logger.info('overlap = %s', overlap) 76 if overlap > 0.0: 77 a = np.abs(amax-amin) 78 b = np.abs(bmax-bmin) 79 total_area = a[0]*a[1] + b[0]*b[1] - overlap 80 #print(f'overlap area = {overlap} total area = {total_area}') 81 return overlap / total_area 82 else: 83 return 0.0
Computes the percentage of the overlap for two rectangles.
Arguments:
- amin (np.ndarray): the low point in rectangle A
- amax (np.ndarray): the high point in rectangle A
- bmin (np.ndarray): the low point in rectangle B
- bmax (np.ndarray): the high point in rectangle B
Returns:
float: percentage of the overlap of two rectangles
86def rect(p1: np.ndarray, p2: np.ndarray) -> tuple: 87 """ 88 Creates the low and high rectangle coordinates from 2 points. 89 90 Args: 91 p1 (np.ndarray): one of the points in the rectangle 92 p2 (np.ndarray): one of the points in the rectangle 93 94 Returns: 95 tuple: tuple with two points (low and high) 96 """ 97 p1x, p1y = p1 98 p2x, p2y = p2 99 return np.array([min(p1x, p2x), min(p1y, p2y)]), np.array([max(p1x, p2x), max(p1y, p2y)])
Creates the low and high rectangle coordinates from 2 points.
Arguments:
- p1 (np.ndarray): one of the points in the rectangle
- p2 (np.ndarray): one of the points in the rectangle
Returns:
tuple: tuple with two points (low and high)
102def distance_to_similarity(array: np.ndarray) -> np.ndarray: 103 """ 104 Converts an array of distances into an array of similarities. 105 106 Args: 107 array (np.ndarray): array with distances values 108 109 Returns: 110 np.ndarray: an array with similarity values 111 """ 112 return max(array) - array
Converts an array of distances into an array of similarities.
Arguments:
- array (np.ndarray): array with distances values
Returns:
np.ndarray: an array with similarity values
115def rank(array: np.ndarray) -> np.ndarray: 116 """ 117 Computes the rank of an array of values. 118 119 Args: 120 array (np.ndarray): array with values 121 122 Returns: 123 np.ndarray: an array with the ranks of each value 124 """ 125 temp = array.argsort() 126 ranks = np.empty_like(temp) 127 ranks[temp] = np.arange(len(array)) 128 return ranks
Computes the rank of an array of values.
Arguments:
- array (np.ndarray): array with values
Returns:
np.ndarray: an array with the ranks of each value
131def slope_ranking(points: np.ndarray, knees: np.ndarray, t: float = 0.8) -> np.ndarray: 132 """ 133 Computes the rank of a set of knees in a curve. 134 135 The ranking is based on the slope of the left of the knee point. 136 The left neighbourhood is computed based on the R2 metric. 137 138 Args: 139 points (np.ndarray): numpy array with the points (x, y) 140 knees (np.ndarray): knees indexes 141 t (float): the R2 threshold for the neighbourhood (default 0.8) 142 143 Returns: 144 np.ndarray: an array with the ranks of each value 145 """ 146 # corner case 147 if len(knees) == 1.0: 148 rankings = np.array([1.0]) 149 else: 150 rankings = [] 151 152 x = points[:, 0] 153 y = points[:, 1] 154 155 _, _, slope = ev.get_neighbourhood(x, y, knees[0], 0, t) 156 rankings.append(math.fabs(slope)) 157 158 for i in range(1, len(knees)): 159 _, _, slope = ev.get_neighbourhood(x, y, knees[i], knees[i-1], t) 160 rankings.append(math.fabs(slope)) 161 162 rankings = np.array(rankings) 163 rankings = rank(rankings) 164 # Min Max normalization 165 if len(rankings) > 1: 166 rankings = (rankings - np.min(rankings))/np.ptp(rankings) 167 else: 168 rankings = np.array([1.0]) 169 170 return rankings
Computes the rank of a set of knees in a curve.
The ranking is based on the slope of the left of the knee point. The left neighbourhood is computed based on the R2 metric.
Arguments:
- points (np.ndarray): numpy array with the points (x, y)
- knees (np.ndarray): knees indexes
- t (float): the R2 threshold for the neighbourhood (default 0.8)
Returns:
np.ndarray: an array with the ranks of each value
173def smooth_ranking(points: np.ndarray, knees: np.ndarray, t: ClusterRanking) -> np.ndarray: 174 """ 175 Computes the rank for a cluster of knees in a curve. 176 177 The ranking is a weighted raking based on the Y axis improvement and the 178 slope/smoothed of the curve. 179 This methods tries to find the best knee within a cluster of knees, this 180 means that the boundaries for the computation are based on the cluster dimention. 181 182 Args: 183 points (np.ndarray): numpy array with the points (x, y) 184 knees (np.ndarray): knees indexes 185 t (ClusterRanking): selects the direction where the curve must be smooth 186 187 Returns: 188 np.ndarray: an array with the ranks of each value 189 """ 190 191 x = points[:, 0] 192 y = points[:, 1] 193 194 fit = [] 195 weights = [] 196 197 j = knees[0] 198 peak = np.max(y[knees]) 199 200 # TODO: find a better approximation, for example SMAPE 201 for i in range(0, len(knees)): 202 # R2 score 203 r2 = 0 204 if t is ClusterRanking.linear: 205 r2_left = lf.r2(x[j:knees[i]+1], y[j:knees[i]+1]) 206 r2_right = lf.r2(x[knees[i]:knees[-1]], y[knees[i]:knees[-1]]) 207 r2 = (r2_left + r2_right) / 2.0 208 elif t is ClusterRanking.left: 209 r2 = lf.r2(x[j:knees[i]+1], y[j:knees[i]+1]) 210 else: 211 r2 = lf.r2(x[knees[i]:knees[-1]], y[knees[i]:knees[-1]]) 212 fit.append(r2) 213 214 # height of the segment 215 d = math.fabs(peak - y[knees[i]]) 216 weights.append(d) 217 218 #weights.append(0) 219 weights = np.array(weights) 220 #fit.append(0) 221 fit = np.array(fit) 222 223 #max_weights = np.max(weights) 224 # if max_weights != 0: 225 # weights = weights / max_weights 226 227 sum_weights = np.sum(weights) 228 if sum_weights != 0: 229 weights = weights / sum_weights 230 231 #logger.info(f'Fit & Weights {fit} / {weights}') 232 233 rankings = fit * weights 234 235 #logger.info(f'Smooth Ranking {rankings}') 236 237 return rankings
Computes the rank for a cluster of knees in a curve.
The ranking is a weighted raking based on the Y axis improvement and the slope/smoothed of the curve. This methods tries to find the best knee within a cluster of knees, this means that the boundaries for the computation are based on the cluster dimention.
Arguments:
- points (np.ndarray): numpy array with the points (x, y)
- knees (np.ndarray): knees indexes
- t (ClusterRanking): selects the direction where the curve must be smooth
Returns:
np.ndarray: an array with the ranks of each value