kneeliverse.linear_fit

The following module provides a set of methods used for fast linear fitting. This methods are them used on the L-methods knee algorithm.

  1# coding: utf-8
  2
  3'''
  4The following module provides a set of methods
  5used for fast linear fitting. This methods are
  6them used on the L-methods knee algorithm.
  7'''
  8
  9__author__ = 'Mário Antunes'
 10__version__ = '1.0'
 11__email__ = 'mario.antunes@ua.pt'
 12__status__ = 'Development'
 13__license__ = 'MIT'
 14__copyright__ = '''
 15Copyright (c) 2021-2023 Stony Brook University
 16Copyright (c) 2021-2023 The Research Foundation of SUNY
 17'''
 18
 19import math
 20import logging
 21import numpy as np
 22import kneeliverse.metrics as metrics
 23
 24
 25from typing import Union
 26
 27
 28logger = logging.getLogger(__name__)
 29
 30
 31def linear_fit_points(points: np.ndarray) -> tuple:
 32    """
 33    Computes the linear fit for the points.
 34
 35    This methods approximates the linear fit using only the
 36    first and last points in a curve.
 37
 38    Args:
 39        points (np.ndarray): numpy array with the points (x, y)
 40
 41    Returns:
 42        tuple: (b, m)
 43    """
 44    x = points[:, 0]
 45    y = points[:, 1]
 46    return linear_fit(x, y)
 47
 48
 49def linear_fit(x: np.ndarray, y: np.ndarray) -> tuple:
 50    """
 51    Computes the linear fit for the points.
 52
 53    This methods approximates the linear fit using only the
 54    first and last points in a curve.
 55
 56    Args:
 57        x (np.ndarray): the value of the points in the x axis coordinates
 58        y (np.ndarray): the value of the points in the y axis coordinates
 59
 60    Returns:
 61        tuple: (b, m)
 62    """
 63
 64    d = x[0] - x[-1]
 65    if d != 0:
 66        m = (y[0] - y[-1])/(x[0] - x[-1])
 67        b = y[0] - (m*x[0])
 68        return (b, m)
 69    else:
 70        return (0, 0)
 71
 72
 73def linear_transform_points(points: np.ndarray, coef: tuple) -> np.ndarray:
 74    """
 75    Computes the y values for an x array and the given coefficients.
 76
 77    Args:
 78        points (np.ndarray): numpy array with the points (x, y)
 79        coef (tuple): the coefficients from the linear fit
 80
 81    Returns:
 82        np.ndarray: the corresponding y values
 83    """
 84    x = points[:, 0]
 85    return linear_transform(x, coef)
 86
 87
 88def linear_transform(x: np.ndarray, coef: tuple) -> np.ndarray:
 89    """
 90    Computes the y values for an x array and the given coefficients.
 91
 92    Args:
 93        x (np.ndarray): the value of the points in the x axis coordinates
 94        coef (tuple): the coefficients from the linear fit
 95
 96    Returns:
 97        np.ndarray: the corresponding y values
 98    """
 99    b, m = coef
100    y_hat = x * m + b
101    return y_hat
102
103
104def linear_hv_residuals_points(points: np.ndarray) -> float:
105    """
106    Computes the residual error from a straight line fitting.
107
108    This methods tries a horizontal (typical $f(x)=m \\times x + b$ equation)
109    and a vertinal line (a non typical $f(y) = m \\times y + b$ equation).
110    It returns the smaller residual errors.
111
112    Args:
113        points (np.ndarray): numpy array with the points (x, y)
114    
115    Returns:
116        float: the residuals of the line fitting.
117    """
118    x = points[:, 0]
119    y = points[:, 1]
120    return linear_hv_residuals(x,y)
121
122
123def linear_hv_residuals(x: np.ndarray, y: np.ndarray) -> float:
124    """
125    Computes the residual error from a straight line fitting.
126
127    This methods tries a horizontal (typical $f(x)=m \\times x + b$ equation)
128    and a vertinal line (a non typical $f(y) = m \\times y + b$ equation).
129    It returns the smaller residual errors.
130
131    Args:
132        x (np.ndarray): the value of the points in the x axis coordinates
133        y (np.ndarray): the value of the points in the y axis coordinates
134    
135    Returns:
136        float: the residuals of the line fitting.
137    """
138    # try a tipical y = mx + b line
139    coef1 = linear_fit(x, y)
140    y_residuals = linear_residuals(x, y, coef1)
141
142    # try a non-typical x = my + b line
143    coef2 = linear_fit(y, x)
144    x_residuals = linear_residuals(y, x, coef2)
145
146    if y_residuals <= x_residuals:
147        return y_residuals
148    else:
149        return x_residuals
150
151
152def linear_fit_transform_points(points: np.ndarray, vertical:bool=False) -> Union[np.ndarray, tuple]:
153    """
154    Computes the horizontal or vertical line fitting.
155
156    A horizontal line fitting uses the typical $f(x)=m \\times x + b$ equation.
157    The vertinal line fitting uses the non typical $f(y) = m \\times y + b$ equation.
158
159    Args:
160        points (np.ndarray): numpy array with the points (x, y)
161        vertical (bool): if True computes the vertical line fitting (default: False)
162    
163    Returns:
164        Union[np.ndarray, tuple]: for horizontal retuns only the computed y_hat values.
165        For the vertical line fitting returns the x points and the y_hat values.
166    """
167    x = points[:, 0]
168    y = points[:, 1]
169    return linear_fit_transform(x, y, vertical)
170
171
172def linear_fit_transform(x: np.ndarray, y: np.ndarray, vertical=False) -> Union[np.ndarray, tuple]:
173    # try a tipical y = mx + b line
174    coef1 = linear_fit(x, y)
175    y_hat = linear_transform(x, coef1)
176    
177    if vertical:
178        y_residuals = linear_residuals(x, y, coef1)
179        # try a non-typical x = my + b line
180        coef2 = linear_fit(y, x)
181        x_hat = linear_transform(y, coef2)
182        x_residuals = linear_residuals(y, x, coef2)
183
184        if y_residuals <= x_residuals:
185            return y, y_hat
186        else:
187            return x, x_hat
188    else:
189        return y_hat
190
191
192def linear_r2_points(points: np.ndarray, coef: tuple, r2: metrics.R2 = metrics.R2.classic) -> float:
193    """
194    Computes the coefficient of determination (R2).
195
196    Args:
197        points (np.ndarray): numpy array with the points (x, y)
198        coef (tuple): the coefficients from the linear fit
199        r2 (R2): select the type of coefficient of determination (default: R2.classic)
200
201    Returns:
202        float: coefficient of determination (R2)
203    """
204    x = points[:, 0]
205    y = points[:, 1]
206    return linear_r2(x, y, coef, r2)
207
208
209def linear_r2(x: np.ndarray, y: np.ndarray, coef: tuple, r2: metrics.R2 = metrics.R2.classic) -> float:
210    """
211    Computes the coefficient of determination (R2).
212
213    Args:
214        x (np.ndarray): the value of the points in the x axis coordinates
215        y (np.ndarray): the value of the points in the y axis coordinates
216        coef (tuple): the coefficients from the linear fit
217        r2 (R2): select the type of coefficient of determination (default: R2.classic)
218
219    Returns:
220        float: coefficient of determination (R2)
221    """
222    y_hat = linear_transform(x, coef)
223    y_mean = np.mean(y)
224    rss = np.sum((y-y_hat)**2)
225    tss = np.sum((y-y_mean)**2)
226    rv = 0.0
227
228    if tss == 0:
229        rv = 1.0 - rss
230    else:
231        rv = 1.0 - (rss/tss)
232
233    if r2 is metrics.R2.adjusted:
234        rv = 1.0 - (1.0 - rv)*((len(x)-1)/(len(x)-2))
235
236    return rv
237
238
239def rmspe_points(points: np.ndarray, coef: tuple, eps: float = 1e-16) -> float:
240    """
241    Computes the Root Mean Squared Percentage Error (RMSPE).
242
243    Args:
244        points (np.ndarray): numpy array with the points (x, y)
245        coef (tuple): the coefficients from the linear fit
246        eps (float): eps value to prevent division by zero (default: 1E-16)
247
248    Returns:
249        float: Root Mean Squared Percentage Error (RMSPE)
250    """
251    x = points[:, 0]
252    y = points[:, 1]
253    return rmspe(x, y, coef, eps)
254
255
256def rmspe(x: np.ndarray, y: np.ndarray, coef: tuple, eps: float = 1e-16) -> float:
257    """
258    Computes the Root Mean Squared Percentage Error (RMSPE).
259
260    Args:
261        x (np.ndarray): the value of the points in the x axis coordinates
262        y (np.ndarray): the value of the points in the y axis coordinates
263        coef (tuple): the coefficients from the linear fit
264        eps (float): eps value to prevent division by zero (default: 1E-16)
265
266    Returns:
267        float: Root Mean Squared Percentage Error (RMSPE)
268    """
269    y_hat = linear_transform(x, coef)
270    return metrics.rmspe(y, y_hat)
271
272
273def rmsle_points(points: np.ndarray, coef: tuple) -> float:
274    """
275    Computes the Root Mean Squared Log Error (RMSLE):
276    $$
277    RMSLE(y, \\hat{y}) = \\sqrt{\\frac{\\sum_{i=1}^{n}(\\log (y_i+1) - \\log (\\hat{y_i}+1))^2}{n}}
278    $$
279
280    Args:
281        points (np.ndarray): numpy array with the points (x, y)
282        coef (tuple): the coefficients from the linear fit
283
284    Returns:
285        float: Root Mean Squared Log Error (RMSLE)
286    """
287    x = points[:, 0]
288    y = points[:, 1]
289    return rmsle(x, y, coef)
290
291
292def rmsle(x: np.ndarray, y: np.ndarray, coef: tuple) -> float:
293    """
294    Computes the Root Mean Squared Log Error (RMSLE):
295    $$
296    RMSLE(y, \\hat{y}) = \\sqrt{\\frac{\\sum_{i=1}^{n}(\\log (y_i+1) - \\log (\\hat{y_i}+1))^2}{n}}
297    $$
298
299    Args:
300        x (np.ndarray): the value of the points in the x axis coordinates
301        y (np.ndarray): the value of the points in the y axis coordinates
302        coef (tuple): the coefficients from the linear fit
303
304    Returns:
305        float: Root Mean Squared Log Error (RMSLE)
306    """
307    y_hat = linear_transform(x, coef)
308    return metrics.rmsle(y, y_hat)
309
310
311def smape_points(points: np.ndarray, coef: tuple, eps: float = 1e-16)->float:
312    x = points[:, 0]
313    y = points[:, 1]
314    return smape(x, y, coef, eps)
315
316
317def smape(x: np.ndarray, y: np.ndarray, coef: tuple, eps: float = 1e-16) -> float:
318    y_hat = linear_transform(x, coef)
319    return metrics.smape(y, y_hat, eps)
320
321
322def rpd_points(points: np.ndarray, coef: tuple, eps: float = 1e-16) -> float:
323    """
324    Computes the Relative Percentage Difference (RPD).
325
326    Args:
327        points (np.ndarray): numpy array with the points (x, y)
328        coef (tuple): the coefficients from the linear fit
329        eps (float): eps value to prevent division by zero (default: 1E-16)
330
331    Returns:
332        float: Relative Percentage Difference (RPD)
333    """
334    x = points[:, 0]
335    y = points[:, 1]
336    return rpd(x, y, coef, eps)
337
338
339def rpd(x: np.ndarray, y: np.ndarray, coef: tuple, eps: float = 1e-16) -> float:
340    """
341    Computes the Relative Percentage Difference (RPD).
342
343    Args:
344        x (np.ndarray): the value of the points in the x axis coordinates
345        y (np.ndarray): the value of the points in the y axis coordinates
346        coef (tuple): the coefficients from the linear fit
347        eps (float): eps value to prevent division by zero (default: 1E-16)
348
349    Returns:
350        float: Relative Percentage Difference (RPD)
351    """
352    y_hat = linear_transform(x, coef)
353    return metrics.rpd(y, y_hat, eps)
354
355
356def rmse_points(points: np.ndarray, coef: tuple) -> float:
357    """
358    Computes the Root Mean Squared Error (RMSE).
359
360    Args:
361        points (np.ndarray): numpy array with the points (x, y)
362        coef (tuple): the coefficients from the linear fit
363
364    Returns:
365        float: Root Mean Squared Error (RMSE)
366    """
367    x = points[:, 0]
368    y = points[:, 1]
369    return rmse(x, y, coef)
370
371
372def rmse(x: np.ndarray, y: np.ndarray, coef: tuple) -> float:
373    """
374    Computes the Root Mean Squared Error (RMSE).
375
376    Args:
377        x (np.ndarray): the value of the points in the x axis coordinates
378        y (np.ndarray): the value of the points in the y axis coordinates
379        coef (tuple): the coefficients from the linear fit
380
381    Returns:
382        float: Root Mean Squared Error (RMSE)
383    """
384    y_hat = linear_transform(x, coef)
385    return metrics.rmse(y, y_hat)
386
387
388def linear_residuals_points(points: np.ndarray, coef: tuple) -> float:
389    """
390    Computes the residual error of the linear fit.
391
392    Args:
393        points (np.ndarray): numpy array with the points (x, y)
394        coef (tuple): the coefficients from the linear fit
395
396    Returns:
397        float: residual error of the linear fit
398    """
399    x = points[:, 0]
400    y = points[:, 1]
401    return linear_residuals(x, y, coef)
402
403
404def linear_residuals(x: np.ndarray, y: np.ndarray, coef: tuple) -> float:
405    """
406    Computes the residual error of the linear fit.
407
408    Args:
409        x (np.ndarray): the value of the points in the x axis coordinates
410        y (np.ndarray): the value of the points in the y axis coordinates
411        coef (tuple): the coefficients from the linear fit
412
413    Returns:
414        float: residual error of the linear fit
415    """
416    y_hat = linear_transform(x, coef)
417    return metrics.residuals(y, y_hat)
418
419
420def linear_fit_residuals_points(points: np.ndarray) -> float:
421    x = points[:, 0]
422    y = points[:, 1]
423    return linear_fit_residuals(x, y)
424
425
426def linear_fit_residuals(x: np.ndarray, y: np.ndarray) -> float:
427    coef = linear_fit(x, y)
428    y_hat = linear_transform(x, coef)
429    return metrics.residuals(y, y_hat)
430
431
432def r2_points(points: np.ndarray, t: metrics.R2 = metrics.R2.classic) -> float:
433    """
434    Computes the coefficient of determination (R2).
435
436    Computes the best fit (and not the fast point fit)
437    and computes the corresponding R2.
438
439    Args:
440        points (np.ndarray): numpy array with the points (x, y)
441        t (R2): select the type of coefficient of determination
442
443    Returns:
444        float: coefficient of determination (R2)
445    """
446    if len(points) <= 2:
447        return 1.0
448    else:
449        x = points[:, 0]
450        y = points[:, 1]
451        return r2(x, y, t)
452
453
454def r2(x: np.ndarray, y: np.ndarray, t: metrics.R2 = metrics.R2.classic) -> float:
455    """Computes the coefficient of determination (R2).
456
457    Computes the best fit (and not the fast point fit)
458    and computes the corresponding R2.
459
460    Args:
461        x (np.ndarray): the value of the points in the x axis coordinates
462        y (np.ndarray): the value of the points in the y axis coordinates
463        t (R2): select the type of coefficient of determination
464
465    Returns:
466        float: coefficient of determination (R2)
467    """
468    rv = 0.0
469    if len(x) <= 2:
470        rv = 1.0
471    else:
472        rv = (np.corrcoef(x, y)[0, 1])**2.0
473
474    if t is metrics.R2.adjusted:
475        rv = 1.0 - (1-rv)*((len(x)-1)/(len(x)-2))
476
477    return rv
478
479
480def angle(coef1: tuple, coef2: tuple) -> float:
481    """
482    Computes the angle between two lines.
483
484    Args:
485        coef1 (tuple): the coefficients from the first line
486        coef2 (tuple): the coefficients from the second line
487
488    Returns:
489        float: the angle between two lines $\\left[0,\\frac{\\pi}{2} \\right]$
490    """
491    _, m1 = coef1
492    _, m2 = coef2
493    return math.atan((m1-m2)/(1.0+m1*m2))
494
495
496def shortest_distance_points(p: np.ndarray, a: np.ndarray, b: np.ndarray):
497    """
498    Computes the shortest distance from the points to the 
499    straight line defined by the left and right point.
500
501    Args:
502        pt (np.ndarray): numpy array with the points (x, y)
503        start (np.ndarray): the left point
504        end (np.ndarray): the right point
505
506    Returns:
507        np.ndarray: the perpendicular distances
508    """
509
510    # TODO for you: consider implementing @Eskapp's suggestions
511    if np.all(a == b):
512        return np.linalg.norm(p - a, axis=1)
513
514    # normalized tangent vector
515    d = np.divide(b - a, np.linalg.norm(b - a))
516
517    # signed parallel distance components
518    s = np.dot(a - p, d)
519    t = np.dot(p - b, d)
520
521    # clamped parallel distance
522    h = np.maximum.reduce([s, t, np.zeros(len(p))])
523
524    # perpendicular distance component, as before
525    # note that for the 3D case these will be vectors
526    c = np.cross(p - a, d)
527
528    # use hypot for Pythagoras to improve accuracy
529    return np.hypot(h, c)
530
531
532def perpendicular_distance(points: np.ndarray) -> np.ndarray:
533    """
534    Computes the perpendicular distance from the points to the 
535    straight line defined by the first and last point.
536
537    Args:
538        points (np.ndarray): numpy array with the points (x, y)
539
540    Returns:
541        np.ndarray: the perpendicular distances
542
543    """
544    return perpendicular_distance_index(points, 0, len(points) - 1)
545
546
547def perpendicular_distance_index(points: np.ndarray, left: int, right: int) -> np.ndarray:
548    """
549    Computes the perpendicular distance from the points to the 
550    straight line defined by the left and right point.
551
552    Args:
553        points (np.ndarray): numpy array with the points (x, y)
554        left (int): the index of the left point
555        right (int): the index of the right point
556
557    Returns:
558        np.ndarray: the perpendicular distances
559    """
560    return left + perpendicular_distance_points(points[left:right+1], points[left], points[right])
561
562
563def perpendicular_distance_points(pt: np.ndarray, start: np.ndarray, end: np.ndarray) -> np.ndarray:
564    """
565    Computes the perpendicular distance from the points to the 
566    straight line defined by the left and right point.
567
568    Args:
569        pt (np.ndarray): numpy array with the points (x, y)
570        start (np.ndarray): the left point
571        end (np.ndarray): the right point
572
573    Returns:
574        np.ndarray: the perpendicular distances
575    """
576    return np.fabs(np.cross(end-start, pt-start)/np.linalg.norm(end-start))
logger = <Logger kneeliverse.linear_fit (WARNING)>
def linear_fit_points(points: numpy.ndarray) -> tuple:
32def linear_fit_points(points: np.ndarray) -> tuple:
33    """
34    Computes the linear fit for the points.
35
36    This methods approximates the linear fit using only the
37    first and last points in a curve.
38
39    Args:
40        points (np.ndarray): numpy array with the points (x, y)
41
42    Returns:
43        tuple: (b, m)
44    """
45    x = points[:, 0]
46    y = points[:, 1]
47    return linear_fit(x, y)

Computes the linear fit for the points.

This methods approximates the linear fit using only the first and last points in a curve.

Arguments:
  • points (np.ndarray): numpy array with the points (x, y)
Returns:

tuple: (b, m)

def linear_fit(x: numpy.ndarray, y: numpy.ndarray) -> tuple:
50def linear_fit(x: np.ndarray, y: np.ndarray) -> tuple:
51    """
52    Computes the linear fit for the points.
53
54    This methods approximates the linear fit using only the
55    first and last points in a curve.
56
57    Args:
58        x (np.ndarray): the value of the points in the x axis coordinates
59        y (np.ndarray): the value of the points in the y axis coordinates
60
61    Returns:
62        tuple: (b, m)
63    """
64
65    d = x[0] - x[-1]
66    if d != 0:
67        m = (y[0] - y[-1])/(x[0] - x[-1])
68        b = y[0] - (m*x[0])
69        return (b, m)
70    else:
71        return (0, 0)

Computes the linear fit for the points.

This methods approximates the linear fit using only the first and last points in a curve.

Arguments:
  • x (np.ndarray): the value of the points in the x axis coordinates
  • y (np.ndarray): the value of the points in the y axis coordinates
Returns:

tuple: (b, m)

def linear_transform_points(points: numpy.ndarray, coef: tuple) -> numpy.ndarray:
74def linear_transform_points(points: np.ndarray, coef: tuple) -> np.ndarray:
75    """
76    Computes the y values for an x array and the given coefficients.
77
78    Args:
79        points (np.ndarray): numpy array with the points (x, y)
80        coef (tuple): the coefficients from the linear fit
81
82    Returns:
83        np.ndarray: the corresponding y values
84    """
85    x = points[:, 0]
86    return linear_transform(x, coef)

Computes the y values for an x array and the given coefficients.

Arguments:
  • points (np.ndarray): numpy array with the points (x, y)
  • coef (tuple): the coefficients from the linear fit
Returns:

np.ndarray: the corresponding y values

def linear_transform(x: numpy.ndarray, coef: tuple) -> numpy.ndarray:
 89def linear_transform(x: np.ndarray, coef: tuple) -> np.ndarray:
 90    """
 91    Computes the y values for an x array and the given coefficients.
 92
 93    Args:
 94        x (np.ndarray): the value of the points in the x axis coordinates
 95        coef (tuple): the coefficients from the linear fit
 96
 97    Returns:
 98        np.ndarray: the corresponding y values
 99    """
100    b, m = coef
101    y_hat = x * m + b
102    return y_hat

Computes the y values for an x array and the given coefficients.

Arguments:
  • x (np.ndarray): the value of the points in the x axis coordinates
  • coef (tuple): the coefficients from the linear fit
Returns:

np.ndarray: the corresponding y values

def linear_hv_residuals_points(points: numpy.ndarray) -> float:
105def linear_hv_residuals_points(points: np.ndarray) -> float:
106    """
107    Computes the residual error from a straight line fitting.
108
109    This methods tries a horizontal (typical $f(x)=m \\times x + b$ equation)
110    and a vertinal line (a non typical $f(y) = m \\times y + b$ equation).
111    It returns the smaller residual errors.
112
113    Args:
114        points (np.ndarray): numpy array with the points (x, y)
115    
116    Returns:
117        float: the residuals of the line fitting.
118    """
119    x = points[:, 0]
120    y = points[:, 1]
121    return linear_hv_residuals(x,y)

Computes the residual error from a straight line fitting.

This methods tries a horizontal (typical $f(x)=m \times x + b$ equation) and a vertinal line (a non typical $f(y) = m \times y + b$ equation). It returns the smaller residual errors.

Arguments:
  • points (np.ndarray): numpy array with the points (x, y)
Returns:

float: the residuals of the line fitting.

def linear_hv_residuals(x: numpy.ndarray, y: numpy.ndarray) -> float:
124def linear_hv_residuals(x: np.ndarray, y: np.ndarray) -> float:
125    """
126    Computes the residual error from a straight line fitting.
127
128    This methods tries a horizontal (typical $f(x)=m \\times x + b$ equation)
129    and a vertinal line (a non typical $f(y) = m \\times y + b$ equation).
130    It returns the smaller residual errors.
131
132    Args:
133        x (np.ndarray): the value of the points in the x axis coordinates
134        y (np.ndarray): the value of the points in the y axis coordinates
135    
136    Returns:
137        float: the residuals of the line fitting.
138    """
139    # try a tipical y = mx + b line
140    coef1 = linear_fit(x, y)
141    y_residuals = linear_residuals(x, y, coef1)
142
143    # try a non-typical x = my + b line
144    coef2 = linear_fit(y, x)
145    x_residuals = linear_residuals(y, x, coef2)
146
147    if y_residuals <= x_residuals:
148        return y_residuals
149    else:
150        return x_residuals

Computes the residual error from a straight line fitting.

This methods tries a horizontal (typical $f(x)=m \times x + b$ equation) and a vertinal line (a non typical $f(y) = m \times y + b$ equation). It returns the smaller residual errors.

Arguments:
  • x (np.ndarray): the value of the points in the x axis coordinates
  • y (np.ndarray): the value of the points in the y axis coordinates
Returns:

float: the residuals of the line fitting.

def linear_fit_transform_points( points: numpy.ndarray, vertical: bool = False) -> Union[numpy.ndarray, tuple]:
153def linear_fit_transform_points(points: np.ndarray, vertical:bool=False) -> Union[np.ndarray, tuple]:
154    """
155    Computes the horizontal or vertical line fitting.
156
157    A horizontal line fitting uses the typical $f(x)=m \\times x + b$ equation.
158    The vertinal line fitting uses the non typical $f(y) = m \\times y + b$ equation.
159
160    Args:
161        points (np.ndarray): numpy array with the points (x, y)
162        vertical (bool): if True computes the vertical line fitting (default: False)
163    
164    Returns:
165        Union[np.ndarray, tuple]: for horizontal retuns only the computed y_hat values.
166        For the vertical line fitting returns the x points and the y_hat values.
167    """
168    x = points[:, 0]
169    y = points[:, 1]
170    return linear_fit_transform(x, y, vertical)

Computes the horizontal or vertical line fitting.

A horizontal line fitting uses the typical $f(x)=m \times x + b$ equation. The vertinal line fitting uses the non typical $f(y) = m \times y + b$ equation.

Arguments:
  • points (np.ndarray): numpy array with the points (x, y)
  • vertical (bool): if True computes the vertical line fitting (default: False)
Returns:

Union[np.ndarray, tuple]: for horizontal retuns only the computed y_hat values. For the vertical line fitting returns the x points and the y_hat values.

def linear_fit_transform( x: numpy.ndarray, y: numpy.ndarray, vertical=False) -> Union[numpy.ndarray, tuple]:
173def linear_fit_transform(x: np.ndarray, y: np.ndarray, vertical=False) -> Union[np.ndarray, tuple]:
174    # try a tipical y = mx + b line
175    coef1 = linear_fit(x, y)
176    y_hat = linear_transform(x, coef1)
177    
178    if vertical:
179        y_residuals = linear_residuals(x, y, coef1)
180        # try a non-typical x = my + b line
181        coef2 = linear_fit(y, x)
182        x_hat = linear_transform(y, coef2)
183        x_residuals = linear_residuals(y, x, coef2)
184
185        if y_residuals <= x_residuals:
186            return y, y_hat
187        else:
188            return x, x_hat
189    else:
190        return y_hat
def linear_r2_points( points: numpy.ndarray, coef: tuple, r2: kneeliverse.metrics.R2 = <R2.classic: 'classic'>) -> float:
193def linear_r2_points(points: np.ndarray, coef: tuple, r2: metrics.R2 = metrics.R2.classic) -> float:
194    """
195    Computes the coefficient of determination (R2).
196
197    Args:
198        points (np.ndarray): numpy array with the points (x, y)
199        coef (tuple): the coefficients from the linear fit
200        r2 (R2): select the type of coefficient of determination (default: R2.classic)
201
202    Returns:
203        float: coefficient of determination (R2)
204    """
205    x = points[:, 0]
206    y = points[:, 1]
207    return linear_r2(x, y, coef, r2)

Computes the coefficient of determination (R2).

Arguments:
  • points (np.ndarray): numpy array with the points (x, y)
  • coef (tuple): the coefficients from the linear fit
  • r2 (R2): select the type of coefficient of determination (default: R2.classic)
Returns:

float: coefficient of determination (R2)

def linear_r2( x: numpy.ndarray, y: numpy.ndarray, coef: tuple, r2: kneeliverse.metrics.R2 = <R2.classic: 'classic'>) -> float:
210def linear_r2(x: np.ndarray, y: np.ndarray, coef: tuple, r2: metrics.R2 = metrics.R2.classic) -> float:
211    """
212    Computes the coefficient of determination (R2).
213
214    Args:
215        x (np.ndarray): the value of the points in the x axis coordinates
216        y (np.ndarray): the value of the points in the y axis coordinates
217        coef (tuple): the coefficients from the linear fit
218        r2 (R2): select the type of coefficient of determination (default: R2.classic)
219
220    Returns:
221        float: coefficient of determination (R2)
222    """
223    y_hat = linear_transform(x, coef)
224    y_mean = np.mean(y)
225    rss = np.sum((y-y_hat)**2)
226    tss = np.sum((y-y_mean)**2)
227    rv = 0.0
228
229    if tss == 0:
230        rv = 1.0 - rss
231    else:
232        rv = 1.0 - (rss/tss)
233
234    if r2 is metrics.R2.adjusted:
235        rv = 1.0 - (1.0 - rv)*((len(x)-1)/(len(x)-2))
236
237    return rv

Computes the coefficient of determination (R2).

Arguments:
  • x (np.ndarray): the value of the points in the x axis coordinates
  • y (np.ndarray): the value of the points in the y axis coordinates
  • coef (tuple): the coefficients from the linear fit
  • r2 (R2): select the type of coefficient of determination (default: R2.classic)
Returns:

float: coefficient of determination (R2)

def rmspe_points(points: numpy.ndarray, coef: tuple, eps: float = 1e-16) -> float:
240def rmspe_points(points: np.ndarray, coef: tuple, eps: float = 1e-16) -> float:
241    """
242    Computes the Root Mean Squared Percentage Error (RMSPE).
243
244    Args:
245        points (np.ndarray): numpy array with the points (x, y)
246        coef (tuple): the coefficients from the linear fit
247        eps (float): eps value to prevent division by zero (default: 1E-16)
248
249    Returns:
250        float: Root Mean Squared Percentage Error (RMSPE)
251    """
252    x = points[:, 0]
253    y = points[:, 1]
254    return rmspe(x, y, coef, eps)

Computes the Root Mean Squared Percentage Error (RMSPE).

Arguments:
  • points (np.ndarray): numpy array with the points (x, y)
  • coef (tuple): the coefficients from the linear fit
  • eps (float): eps value to prevent division by zero (default: 1E-16)
Returns:

float: Root Mean Squared Percentage Error (RMSPE)

def rmspe( x: numpy.ndarray, y: numpy.ndarray, coef: tuple, eps: float = 1e-16) -> float:
257def rmspe(x: np.ndarray, y: np.ndarray, coef: tuple, eps: float = 1e-16) -> float:
258    """
259    Computes the Root Mean Squared Percentage Error (RMSPE).
260
261    Args:
262        x (np.ndarray): the value of the points in the x axis coordinates
263        y (np.ndarray): the value of the points in the y axis coordinates
264        coef (tuple): the coefficients from the linear fit
265        eps (float): eps value to prevent division by zero (default: 1E-16)
266
267    Returns:
268        float: Root Mean Squared Percentage Error (RMSPE)
269    """
270    y_hat = linear_transform(x, coef)
271    return metrics.rmspe(y, y_hat)

Computes the Root Mean Squared Percentage Error (RMSPE).

Arguments:
  • x (np.ndarray): the value of the points in the x axis coordinates
  • y (np.ndarray): the value of the points in the y axis coordinates
  • coef (tuple): the coefficients from the linear fit
  • eps (float): eps value to prevent division by zero (default: 1E-16)
Returns:

float: Root Mean Squared Percentage Error (RMSPE)

def rmsle_points(points: numpy.ndarray, coef: tuple) -> float:
274def rmsle_points(points: np.ndarray, coef: tuple) -> float:
275    """
276    Computes the Root Mean Squared Log Error (RMSLE):
277    $$
278    RMSLE(y, \\hat{y}) = \\sqrt{\\frac{\\sum_{i=1}^{n}(\\log (y_i+1) - \\log (\\hat{y_i}+1))^2}{n}}
279    $$
280
281    Args:
282        points (np.ndarray): numpy array with the points (x, y)
283        coef (tuple): the coefficients from the linear fit
284
285    Returns:
286        float: Root Mean Squared Log Error (RMSLE)
287    """
288    x = points[:, 0]
289    y = points[:, 1]
290    return rmsle(x, y, coef)

Computes the Root Mean Squared Log Error (RMSLE): $$ RMSLE(y, \hat{y}) = \sqrt{\frac{\sum_{i=1}^{n}(\log (y_i+1) - \log (\hat{y_i}+1))^2}{n}} $$

Arguments:
  • points (np.ndarray): numpy array with the points (x, y)
  • coef (tuple): the coefficients from the linear fit
Returns:

float: Root Mean Squared Log Error (RMSLE)

def rmsle(x: numpy.ndarray, y: numpy.ndarray, coef: tuple) -> float:
293def rmsle(x: np.ndarray, y: np.ndarray, coef: tuple) -> float:
294    """
295    Computes the Root Mean Squared Log Error (RMSLE):
296    $$
297    RMSLE(y, \\hat{y}) = \\sqrt{\\frac{\\sum_{i=1}^{n}(\\log (y_i+1) - \\log (\\hat{y_i}+1))^2}{n}}
298    $$
299
300    Args:
301        x (np.ndarray): the value of the points in the x axis coordinates
302        y (np.ndarray): the value of the points in the y axis coordinates
303        coef (tuple): the coefficients from the linear fit
304
305    Returns:
306        float: Root Mean Squared Log Error (RMSLE)
307    """
308    y_hat = linear_transform(x, coef)
309    return metrics.rmsle(y, y_hat)

Computes the Root Mean Squared Log Error (RMSLE): $$ RMSLE(y, \hat{y}) = \sqrt{\frac{\sum_{i=1}^{n}(\log (y_i+1) - \log (\hat{y_i}+1))^2}{n}} $$

Arguments:
  • x (np.ndarray): the value of the points in the x axis coordinates
  • y (np.ndarray): the value of the points in the y axis coordinates
  • coef (tuple): the coefficients from the linear fit
Returns:

float: Root Mean Squared Log Error (RMSLE)

def smape_points(points: numpy.ndarray, coef: tuple, eps: float = 1e-16) -> float:
312def smape_points(points: np.ndarray, coef: tuple, eps: float = 1e-16)->float:
313    x = points[:, 0]
314    y = points[:, 1]
315    return smape(x, y, coef, eps)
def smape( x: numpy.ndarray, y: numpy.ndarray, coef: tuple, eps: float = 1e-16) -> float:
318def smape(x: np.ndarray, y: np.ndarray, coef: tuple, eps: float = 1e-16) -> float:
319    y_hat = linear_transform(x, coef)
320    return metrics.smape(y, y_hat, eps)
def rpd_points(points: numpy.ndarray, coef: tuple, eps: float = 1e-16) -> float:
323def rpd_points(points: np.ndarray, coef: tuple, eps: float = 1e-16) -> float:
324    """
325    Computes the Relative Percentage Difference (RPD).
326
327    Args:
328        points (np.ndarray): numpy array with the points (x, y)
329        coef (tuple): the coefficients from the linear fit
330        eps (float): eps value to prevent division by zero (default: 1E-16)
331
332    Returns:
333        float: Relative Percentage Difference (RPD)
334    """
335    x = points[:, 0]
336    y = points[:, 1]
337    return rpd(x, y, coef, eps)

Computes the Relative Percentage Difference (RPD).

Arguments:
  • points (np.ndarray): numpy array with the points (x, y)
  • coef (tuple): the coefficients from the linear fit
  • eps (float): eps value to prevent division by zero (default: 1E-16)
Returns:

float: Relative Percentage Difference (RPD)

def rpd( x: numpy.ndarray, y: numpy.ndarray, coef: tuple, eps: float = 1e-16) -> float:
340def rpd(x: np.ndarray, y: np.ndarray, coef: tuple, eps: float = 1e-16) -> float:
341    """
342    Computes the Relative Percentage Difference (RPD).
343
344    Args:
345        x (np.ndarray): the value of the points in the x axis coordinates
346        y (np.ndarray): the value of the points in the y axis coordinates
347        coef (tuple): the coefficients from the linear fit
348        eps (float): eps value to prevent division by zero (default: 1E-16)
349
350    Returns:
351        float: Relative Percentage Difference (RPD)
352    """
353    y_hat = linear_transform(x, coef)
354    return metrics.rpd(y, y_hat, eps)

Computes the Relative Percentage Difference (RPD).

Arguments:
  • x (np.ndarray): the value of the points in the x axis coordinates
  • y (np.ndarray): the value of the points in the y axis coordinates
  • coef (tuple): the coefficients from the linear fit
  • eps (float): eps value to prevent division by zero (default: 1E-16)
Returns:

float: Relative Percentage Difference (RPD)

def rmse_points(points: numpy.ndarray, coef: tuple) -> float:
357def rmse_points(points: np.ndarray, coef: tuple) -> float:
358    """
359    Computes the Root Mean Squared Error (RMSE).
360
361    Args:
362        points (np.ndarray): numpy array with the points (x, y)
363        coef (tuple): the coefficients from the linear fit
364
365    Returns:
366        float: Root Mean Squared Error (RMSE)
367    """
368    x = points[:, 0]
369    y = points[:, 1]
370    return rmse(x, y, coef)

Computes the Root Mean Squared Error (RMSE).

Arguments:
  • points (np.ndarray): numpy array with the points (x, y)
  • coef (tuple): the coefficients from the linear fit
Returns:

float: Root Mean Squared Error (RMSE)

def rmse(x: numpy.ndarray, y: numpy.ndarray, coef: tuple) -> float:
373def rmse(x: np.ndarray, y: np.ndarray, coef: tuple) -> float:
374    """
375    Computes the Root Mean Squared Error (RMSE).
376
377    Args:
378        x (np.ndarray): the value of the points in the x axis coordinates
379        y (np.ndarray): the value of the points in the y axis coordinates
380        coef (tuple): the coefficients from the linear fit
381
382    Returns:
383        float: Root Mean Squared Error (RMSE)
384    """
385    y_hat = linear_transform(x, coef)
386    return metrics.rmse(y, y_hat)

Computes the Root Mean Squared Error (RMSE).

Arguments:
  • x (np.ndarray): the value of the points in the x axis coordinates
  • y (np.ndarray): the value of the points in the y axis coordinates
  • coef (tuple): the coefficients from the linear fit
Returns:

float: Root Mean Squared Error (RMSE)

def linear_residuals_points(points: numpy.ndarray, coef: tuple) -> float:
389def linear_residuals_points(points: np.ndarray, coef: tuple) -> float:
390    """
391    Computes the residual error of the linear fit.
392
393    Args:
394        points (np.ndarray): numpy array with the points (x, y)
395        coef (tuple): the coefficients from the linear fit
396
397    Returns:
398        float: residual error of the linear fit
399    """
400    x = points[:, 0]
401    y = points[:, 1]
402    return linear_residuals(x, y, coef)

Computes the residual error of the linear fit.

Arguments:
  • points (np.ndarray): numpy array with the points (x, y)
  • coef (tuple): the coefficients from the linear fit
Returns:

float: residual error of the linear fit

def linear_residuals(x: numpy.ndarray, y: numpy.ndarray, coef: tuple) -> float:
405def linear_residuals(x: np.ndarray, y: np.ndarray, coef: tuple) -> float:
406    """
407    Computes the residual error of the linear fit.
408
409    Args:
410        x (np.ndarray): the value of the points in the x axis coordinates
411        y (np.ndarray): the value of the points in the y axis coordinates
412        coef (tuple): the coefficients from the linear fit
413
414    Returns:
415        float: residual error of the linear fit
416    """
417    y_hat = linear_transform(x, coef)
418    return metrics.residuals(y, y_hat)

Computes the residual error of the linear fit.

Arguments:
  • x (np.ndarray): the value of the points in the x axis coordinates
  • y (np.ndarray): the value of the points in the y axis coordinates
  • coef (tuple): the coefficients from the linear fit
Returns:

float: residual error of the linear fit

def linear_fit_residuals_points(points: numpy.ndarray) -> float:
421def linear_fit_residuals_points(points: np.ndarray) -> float:
422    x = points[:, 0]
423    y = points[:, 1]
424    return linear_fit_residuals(x, y)
def linear_fit_residuals(x: numpy.ndarray, y: numpy.ndarray) -> float:
427def linear_fit_residuals(x: np.ndarray, y: np.ndarray) -> float:
428    coef = linear_fit(x, y)
429    y_hat = linear_transform(x, coef)
430    return metrics.residuals(y, y_hat)
def r2_points( points: numpy.ndarray, t: kneeliverse.metrics.R2 = <R2.classic: 'classic'>) -> float:
433def r2_points(points: np.ndarray, t: metrics.R2 = metrics.R2.classic) -> float:
434    """
435    Computes the coefficient of determination (R2).
436
437    Computes the best fit (and not the fast point fit)
438    and computes the corresponding R2.
439
440    Args:
441        points (np.ndarray): numpy array with the points (x, y)
442        t (R2): select the type of coefficient of determination
443
444    Returns:
445        float: coefficient of determination (R2)
446    """
447    if len(points) <= 2:
448        return 1.0
449    else:
450        x = points[:, 0]
451        y = points[:, 1]
452        return r2(x, y, t)

Computes the coefficient of determination (R2).

Computes the best fit (and not the fast point fit) and computes the corresponding R2.

Arguments:
  • points (np.ndarray): numpy array with the points (x, y)
  • t (R2): select the type of coefficient of determination
Returns:

float: coefficient of determination (R2)

def r2( x: numpy.ndarray, y: numpy.ndarray, t: kneeliverse.metrics.R2 = <R2.classic: 'classic'>) -> float:
455def r2(x: np.ndarray, y: np.ndarray, t: metrics.R2 = metrics.R2.classic) -> float:
456    """Computes the coefficient of determination (R2).
457
458    Computes the best fit (and not the fast point fit)
459    and computes the corresponding R2.
460
461    Args:
462        x (np.ndarray): the value of the points in the x axis coordinates
463        y (np.ndarray): the value of the points in the y axis coordinates
464        t (R2): select the type of coefficient of determination
465
466    Returns:
467        float: coefficient of determination (R2)
468    """
469    rv = 0.0
470    if len(x) <= 2:
471        rv = 1.0
472    else:
473        rv = (np.corrcoef(x, y)[0, 1])**2.0
474
475    if t is metrics.R2.adjusted:
476        rv = 1.0 - (1-rv)*((len(x)-1)/(len(x)-2))
477
478    return rv

Computes the coefficient of determination (R2).

Computes the best fit (and not the fast point fit) and computes the corresponding R2.

Arguments:
  • x (np.ndarray): the value of the points in the x axis coordinates
  • y (np.ndarray): the value of the points in the y axis coordinates
  • t (R2): select the type of coefficient of determination
Returns:

float: coefficient of determination (R2)

def angle(coef1: tuple, coef2: tuple) -> float:
481def angle(coef1: tuple, coef2: tuple) -> float:
482    """
483    Computes the angle between two lines.
484
485    Args:
486        coef1 (tuple): the coefficients from the first line
487        coef2 (tuple): the coefficients from the second line
488
489    Returns:
490        float: the angle between two lines $\\left[0,\\frac{\\pi}{2} \\right]$
491    """
492    _, m1 = coef1
493    _, m2 = coef2
494    return math.atan((m1-m2)/(1.0+m1*m2))

Computes the angle between two lines.

Arguments:
  • coef1 (tuple): the coefficients from the first line
  • coef2 (tuple): the coefficients from the second line
Returns:

float: the angle between two lines $\left[0,\frac{\pi}{2} \right]$

def shortest_distance_points(p: numpy.ndarray, a: numpy.ndarray, b: numpy.ndarray):
497def shortest_distance_points(p: np.ndarray, a: np.ndarray, b: np.ndarray):
498    """
499    Computes the shortest distance from the points to the 
500    straight line defined by the left and right point.
501
502    Args:
503        pt (np.ndarray): numpy array with the points (x, y)
504        start (np.ndarray): the left point
505        end (np.ndarray): the right point
506
507    Returns:
508        np.ndarray: the perpendicular distances
509    """
510
511    # TODO for you: consider implementing @Eskapp's suggestions
512    if np.all(a == b):
513        return np.linalg.norm(p - a, axis=1)
514
515    # normalized tangent vector
516    d = np.divide(b - a, np.linalg.norm(b - a))
517
518    # signed parallel distance components
519    s = np.dot(a - p, d)
520    t = np.dot(p - b, d)
521
522    # clamped parallel distance
523    h = np.maximum.reduce([s, t, np.zeros(len(p))])
524
525    # perpendicular distance component, as before
526    # note that for the 3D case these will be vectors
527    c = np.cross(p - a, d)
528
529    # use hypot for Pythagoras to improve accuracy
530    return np.hypot(h, c)

Computes the shortest distance from the points to the straight line defined by the left and right point.

Arguments:
  • pt (np.ndarray): numpy array with the points (x, y)
  • start (np.ndarray): the left point
  • end (np.ndarray): the right point
Returns:

np.ndarray: the perpendicular distances

def perpendicular_distance(points: numpy.ndarray) -> numpy.ndarray:
533def perpendicular_distance(points: np.ndarray) -> np.ndarray:
534    """
535    Computes the perpendicular distance from the points to the 
536    straight line defined by the first and last point.
537
538    Args:
539        points (np.ndarray): numpy array with the points (x, y)
540
541    Returns:
542        np.ndarray: the perpendicular distances
543
544    """
545    return perpendicular_distance_index(points, 0, len(points) - 1)

Computes the perpendicular distance from the points to the straight line defined by the first and last point.

Arguments:
  • points (np.ndarray): numpy array with the points (x, y)
Returns:

np.ndarray: the perpendicular distances

def perpendicular_distance_index(points: numpy.ndarray, left: int, right: int) -> numpy.ndarray:
548def perpendicular_distance_index(points: np.ndarray, left: int, right: int) -> np.ndarray:
549    """
550    Computes the perpendicular distance from the points to the 
551    straight line defined by the left and right point.
552
553    Args:
554        points (np.ndarray): numpy array with the points (x, y)
555        left (int): the index of the left point
556        right (int): the index of the right point
557
558    Returns:
559        np.ndarray: the perpendicular distances
560    """
561    return left + perpendicular_distance_points(points[left:right+1], points[left], points[right])

Computes the perpendicular distance from the points to the straight line defined by the left and right point.

Arguments:
  • points (np.ndarray): numpy array with the points (x, y)
  • left (int): the index of the left point
  • right (int): the index of the right point
Returns:

np.ndarray: the perpendicular distances

def perpendicular_distance_points( pt: numpy.ndarray, start: numpy.ndarray, end: numpy.ndarray) -> numpy.ndarray:
564def perpendicular_distance_points(pt: np.ndarray, start: np.ndarray, end: np.ndarray) -> np.ndarray:
565    """
566    Computes the perpendicular distance from the points to the 
567    straight line defined by the left and right point.
568
569    Args:
570        pt (np.ndarray): numpy array with the points (x, y)
571        start (np.ndarray): the left point
572        end (np.ndarray): the right point
573
574    Returns:
575        np.ndarray: the perpendicular distances
576    """
577    return np.fabs(np.cross(end-start, pt-start)/np.linalg.norm(end-start))

Computes the perpendicular distance from the points to the straight line defined by the left and right point.

Arguments:
  • pt (np.ndarray): numpy array with the points (x, y)
  • start (np.ndarray): the left point
  • end (np.ndarray): the right point
Returns:

np.ndarray: the perpendicular distances