kneeliverse.linear_fit
The following module provides a set of methods used for fast linear fitting. This methods are them used on the L-methods knee algorithm.
1# coding: utf-8 2 3''' 4The following module provides a set of methods 5used for fast linear fitting. This methods are 6them used on the L-methods knee algorithm. 7''' 8 9__author__ = 'Mário Antunes' 10__version__ = '1.0' 11__email__ = 'mario.antunes@ua.pt' 12__status__ = 'Development' 13__license__ = 'MIT' 14__copyright__ = ''' 15Copyright (c) 2021-2023 Stony Brook University 16Copyright (c) 2021-2023 The Research Foundation of SUNY 17''' 18 19import math 20import logging 21import numpy as np 22import kneeliverse.metrics as metrics 23 24 25from typing import Union 26 27 28logger = logging.getLogger(__name__) 29 30 31def linear_fit_points(points: np.ndarray) -> tuple: 32 """ 33 Computes the linear fit for the points. 34 35 This methods approximates the linear fit using only the 36 first and last points in a curve. 37 38 Args: 39 points (np.ndarray): numpy array with the points (x, y) 40 41 Returns: 42 tuple: (b, m) 43 """ 44 x = points[:, 0] 45 y = points[:, 1] 46 return linear_fit(x, y) 47 48 49def linear_fit(x: np.ndarray, y: np.ndarray) -> tuple: 50 """ 51 Computes the linear fit for the points. 52 53 This methods approximates the linear fit using only the 54 first and last points in a curve. 55 56 Args: 57 x (np.ndarray): the value of the points in the x axis coordinates 58 y (np.ndarray): the value of the points in the y axis coordinates 59 60 Returns: 61 tuple: (b, m) 62 """ 63 64 d = x[0] - x[-1] 65 if d != 0: 66 m = (y[0] - y[-1])/(x[0] - x[-1]) 67 b = y[0] - (m*x[0]) 68 return (b, m) 69 else: 70 return (0, 0) 71 72 73def linear_transform_points(points: np.ndarray, coef: tuple) -> np.ndarray: 74 """ 75 Computes the y values for an x array and the given coefficients. 76 77 Args: 78 points (np.ndarray): numpy array with the points (x, y) 79 coef (tuple): the coefficients from the linear fit 80 81 Returns: 82 np.ndarray: the corresponding y values 83 """ 84 x = points[:, 0] 85 return linear_transform(x, coef) 86 87 88def linear_transform(x: np.ndarray, coef: tuple) -> np.ndarray: 89 """ 90 Computes the y values for an x array and the given coefficients. 91 92 Args: 93 x (np.ndarray): the value of the points in the x axis coordinates 94 coef (tuple): the coefficients from the linear fit 95 96 Returns: 97 np.ndarray: the corresponding y values 98 """ 99 b, m = coef 100 y_hat = x * m + b 101 return y_hat 102 103 104def linear_hv_residuals_points(points: np.ndarray) -> float: 105 """ 106 Computes the residual error from a straight line fitting. 107 108 This methods tries a horizontal (typical $f(x)=m \\times x + b$ equation) 109 and a vertinal line (a non typical $f(y) = m \\times y + b$ equation). 110 It returns the smaller residual errors. 111 112 Args: 113 points (np.ndarray): numpy array with the points (x, y) 114 115 Returns: 116 float: the residuals of the line fitting. 117 """ 118 x = points[:, 0] 119 y = points[:, 1] 120 return linear_hv_residuals(x,y) 121 122 123def linear_hv_residuals(x: np.ndarray, y: np.ndarray) -> float: 124 """ 125 Computes the residual error from a straight line fitting. 126 127 This methods tries a horizontal (typical $f(x)=m \\times x + b$ equation) 128 and a vertinal line (a non typical $f(y) = m \\times y + b$ equation). 129 It returns the smaller residual errors. 130 131 Args: 132 x (np.ndarray): the value of the points in the x axis coordinates 133 y (np.ndarray): the value of the points in the y axis coordinates 134 135 Returns: 136 float: the residuals of the line fitting. 137 """ 138 # try a tipical y = mx + b line 139 coef1 = linear_fit(x, y) 140 y_residuals = linear_residuals(x, y, coef1) 141 142 # try a non-typical x = my + b line 143 coef2 = linear_fit(y, x) 144 x_residuals = linear_residuals(y, x, coef2) 145 146 if y_residuals <= x_residuals: 147 return y_residuals 148 else: 149 return x_residuals 150 151 152def linear_fit_transform_points(points: np.ndarray, vertical:bool=False) -> Union[np.ndarray, tuple]: 153 """ 154 Computes the horizontal or vertical line fitting. 155 156 A horizontal line fitting uses the typical $f(x)=m \\times x + b$ equation. 157 The vertinal line fitting uses the non typical $f(y) = m \\times y + b$ equation. 158 159 Args: 160 points (np.ndarray): numpy array with the points (x, y) 161 vertical (bool): if True computes the vertical line fitting (default: False) 162 163 Returns: 164 Union[np.ndarray, tuple]: for horizontal retuns only the computed y_hat values. 165 For the vertical line fitting returns the x points and the y_hat values. 166 """ 167 x = points[:, 0] 168 y = points[:, 1] 169 return linear_fit_transform(x, y, vertical) 170 171 172def linear_fit_transform(x: np.ndarray, y: np.ndarray, vertical=False) -> Union[np.ndarray, tuple]: 173 # try a tipical y = mx + b line 174 coef1 = linear_fit(x, y) 175 y_hat = linear_transform(x, coef1) 176 177 if vertical: 178 y_residuals = linear_residuals(x, y, coef1) 179 # try a non-typical x = my + b line 180 coef2 = linear_fit(y, x) 181 x_hat = linear_transform(y, coef2) 182 x_residuals = linear_residuals(y, x, coef2) 183 184 if y_residuals <= x_residuals: 185 return y, y_hat 186 else: 187 return x, x_hat 188 else: 189 return y_hat 190 191 192def linear_r2_points(points: np.ndarray, coef: tuple, r2: metrics.R2 = metrics.R2.classic) -> float: 193 """ 194 Computes the coefficient of determination (R2). 195 196 Args: 197 points (np.ndarray): numpy array with the points (x, y) 198 coef (tuple): the coefficients from the linear fit 199 r2 (R2): select the type of coefficient of determination (default: R2.classic) 200 201 Returns: 202 float: coefficient of determination (R2) 203 """ 204 x = points[:, 0] 205 y = points[:, 1] 206 return linear_r2(x, y, coef, r2) 207 208 209def linear_r2(x: np.ndarray, y: np.ndarray, coef: tuple, r2: metrics.R2 = metrics.R2.classic) -> float: 210 """ 211 Computes the coefficient of determination (R2). 212 213 Args: 214 x (np.ndarray): the value of the points in the x axis coordinates 215 y (np.ndarray): the value of the points in the y axis coordinates 216 coef (tuple): the coefficients from the linear fit 217 r2 (R2): select the type of coefficient of determination (default: R2.classic) 218 219 Returns: 220 float: coefficient of determination (R2) 221 """ 222 y_hat = linear_transform(x, coef) 223 y_mean = np.mean(y) 224 rss = np.sum((y-y_hat)**2) 225 tss = np.sum((y-y_mean)**2) 226 rv = 0.0 227 228 if tss == 0: 229 rv = 1.0 - rss 230 else: 231 rv = 1.0 - (rss/tss) 232 233 if r2 is metrics.R2.adjusted: 234 rv = 1.0 - (1.0 - rv)*((len(x)-1)/(len(x)-2)) 235 236 return rv 237 238 239def rmspe_points(points: np.ndarray, coef: tuple, eps: float = 1e-16) -> float: 240 """ 241 Computes the Root Mean Squared Percentage Error (RMSPE). 242 243 Args: 244 points (np.ndarray): numpy array with the points (x, y) 245 coef (tuple): the coefficients from the linear fit 246 eps (float): eps value to prevent division by zero (default: 1E-16) 247 248 Returns: 249 float: Root Mean Squared Percentage Error (RMSPE) 250 """ 251 x = points[:, 0] 252 y = points[:, 1] 253 return rmspe(x, y, coef, eps) 254 255 256def rmspe(x: np.ndarray, y: np.ndarray, coef: tuple, eps: float = 1e-16) -> float: 257 """ 258 Computes the Root Mean Squared Percentage Error (RMSPE). 259 260 Args: 261 x (np.ndarray): the value of the points in the x axis coordinates 262 y (np.ndarray): the value of the points in the y axis coordinates 263 coef (tuple): the coefficients from the linear fit 264 eps (float): eps value to prevent division by zero (default: 1E-16) 265 266 Returns: 267 float: Root Mean Squared Percentage Error (RMSPE) 268 """ 269 y_hat = linear_transform(x, coef) 270 return metrics.rmspe(y, y_hat) 271 272 273def rmsle_points(points: np.ndarray, coef: tuple) -> float: 274 """ 275 Computes the Root Mean Squared Log Error (RMSLE): 276 $$ 277 RMSLE(y, \\hat{y}) = \\sqrt{\\frac{\\sum_{i=1}^{n}(\\log (y_i+1) - \\log (\\hat{y_i}+1))^2}{n}} 278 $$ 279 280 Args: 281 points (np.ndarray): numpy array with the points (x, y) 282 coef (tuple): the coefficients from the linear fit 283 284 Returns: 285 float: Root Mean Squared Log Error (RMSLE) 286 """ 287 x = points[:, 0] 288 y = points[:, 1] 289 return rmsle(x, y, coef) 290 291 292def rmsle(x: np.ndarray, y: np.ndarray, coef: tuple) -> float: 293 """ 294 Computes the Root Mean Squared Log Error (RMSLE): 295 $$ 296 RMSLE(y, \\hat{y}) = \\sqrt{\\frac{\\sum_{i=1}^{n}(\\log (y_i+1) - \\log (\\hat{y_i}+1))^2}{n}} 297 $$ 298 299 Args: 300 x (np.ndarray): the value of the points in the x axis coordinates 301 y (np.ndarray): the value of the points in the y axis coordinates 302 coef (tuple): the coefficients from the linear fit 303 304 Returns: 305 float: Root Mean Squared Log Error (RMSLE) 306 """ 307 y_hat = linear_transform(x, coef) 308 return metrics.rmsle(y, y_hat) 309 310 311def smape_points(points: np.ndarray, coef: tuple, eps: float = 1e-16)->float: 312 x = points[:, 0] 313 y = points[:, 1] 314 return smape(x, y, coef, eps) 315 316 317def smape(x: np.ndarray, y: np.ndarray, coef: tuple, eps: float = 1e-16) -> float: 318 y_hat = linear_transform(x, coef) 319 return metrics.smape(y, y_hat, eps) 320 321 322def rpd_points(points: np.ndarray, coef: tuple, eps: float = 1e-16) -> float: 323 """ 324 Computes the Relative Percentage Difference (RPD). 325 326 Args: 327 points (np.ndarray): numpy array with the points (x, y) 328 coef (tuple): the coefficients from the linear fit 329 eps (float): eps value to prevent division by zero (default: 1E-16) 330 331 Returns: 332 float: Relative Percentage Difference (RPD) 333 """ 334 x = points[:, 0] 335 y = points[:, 1] 336 return rpd(x, y, coef, eps) 337 338 339def rpd(x: np.ndarray, y: np.ndarray, coef: tuple, eps: float = 1e-16) -> float: 340 """ 341 Computes the Relative Percentage Difference (RPD). 342 343 Args: 344 x (np.ndarray): the value of the points in the x axis coordinates 345 y (np.ndarray): the value of the points in the y axis coordinates 346 coef (tuple): the coefficients from the linear fit 347 eps (float): eps value to prevent division by zero (default: 1E-16) 348 349 Returns: 350 float: Relative Percentage Difference (RPD) 351 """ 352 y_hat = linear_transform(x, coef) 353 return metrics.rpd(y, y_hat, eps) 354 355 356def rmse_points(points: np.ndarray, coef: tuple) -> float: 357 """ 358 Computes the Root Mean Squared Error (RMSE). 359 360 Args: 361 points (np.ndarray): numpy array with the points (x, y) 362 coef (tuple): the coefficients from the linear fit 363 364 Returns: 365 float: Root Mean Squared Error (RMSE) 366 """ 367 x = points[:, 0] 368 y = points[:, 1] 369 return rmse(x, y, coef) 370 371 372def rmse(x: np.ndarray, y: np.ndarray, coef: tuple) -> float: 373 """ 374 Computes the Root Mean Squared Error (RMSE). 375 376 Args: 377 x (np.ndarray): the value of the points in the x axis coordinates 378 y (np.ndarray): the value of the points in the y axis coordinates 379 coef (tuple): the coefficients from the linear fit 380 381 Returns: 382 float: Root Mean Squared Error (RMSE) 383 """ 384 y_hat = linear_transform(x, coef) 385 return metrics.rmse(y, y_hat) 386 387 388def linear_residuals_points(points: np.ndarray, coef: tuple) -> float: 389 """ 390 Computes the residual error of the linear fit. 391 392 Args: 393 points (np.ndarray): numpy array with the points (x, y) 394 coef (tuple): the coefficients from the linear fit 395 396 Returns: 397 float: residual error of the linear fit 398 """ 399 x = points[:, 0] 400 y = points[:, 1] 401 return linear_residuals(x, y, coef) 402 403 404def linear_residuals(x: np.ndarray, y: np.ndarray, coef: tuple) -> float: 405 """ 406 Computes the residual error of the linear fit. 407 408 Args: 409 x (np.ndarray): the value of the points in the x axis coordinates 410 y (np.ndarray): the value of the points in the y axis coordinates 411 coef (tuple): the coefficients from the linear fit 412 413 Returns: 414 float: residual error of the linear fit 415 """ 416 y_hat = linear_transform(x, coef) 417 return metrics.residuals(y, y_hat) 418 419 420def linear_fit_residuals_points(points: np.ndarray) -> float: 421 x = points[:, 0] 422 y = points[:, 1] 423 return linear_fit_residuals(x, y) 424 425 426def linear_fit_residuals(x: np.ndarray, y: np.ndarray) -> float: 427 coef = linear_fit(x, y) 428 y_hat = linear_transform(x, coef) 429 return metrics.residuals(y, y_hat) 430 431 432def r2_points(points: np.ndarray, t: metrics.R2 = metrics.R2.classic) -> float: 433 """ 434 Computes the coefficient of determination (R2). 435 436 Computes the best fit (and not the fast point fit) 437 and computes the corresponding R2. 438 439 Args: 440 points (np.ndarray): numpy array with the points (x, y) 441 t (R2): select the type of coefficient of determination 442 443 Returns: 444 float: coefficient of determination (R2) 445 """ 446 if len(points) <= 2: 447 return 1.0 448 else: 449 x = points[:, 0] 450 y = points[:, 1] 451 return r2(x, y, t) 452 453 454def r2(x: np.ndarray, y: np.ndarray, t: metrics.R2 = metrics.R2.classic) -> float: 455 """Computes the coefficient of determination (R2). 456 457 Computes the best fit (and not the fast point fit) 458 and computes the corresponding R2. 459 460 Args: 461 x (np.ndarray): the value of the points in the x axis coordinates 462 y (np.ndarray): the value of the points in the y axis coordinates 463 t (R2): select the type of coefficient of determination 464 465 Returns: 466 float: coefficient of determination (R2) 467 """ 468 rv = 0.0 469 if len(x) <= 2: 470 rv = 1.0 471 else: 472 rv = (np.corrcoef(x, y)[0, 1])**2.0 473 474 if t is metrics.R2.adjusted: 475 rv = 1.0 - (1-rv)*((len(x)-1)/(len(x)-2)) 476 477 return rv 478 479 480def angle(coef1: tuple, coef2: tuple) -> float: 481 """ 482 Computes the angle between two lines. 483 484 Args: 485 coef1 (tuple): the coefficients from the first line 486 coef2 (tuple): the coefficients from the second line 487 488 Returns: 489 float: the angle between two lines $\\left[0,\\frac{\\pi}{2} \\right]$ 490 """ 491 _, m1 = coef1 492 _, m2 = coef2 493 return math.atan((m1-m2)/(1.0+m1*m2)) 494 495 496def shortest_distance_points(p: np.ndarray, a: np.ndarray, b: np.ndarray): 497 """ 498 Computes the shortest distance from the points to the 499 straight line defined by the left and right point. 500 501 Args: 502 pt (np.ndarray): numpy array with the points (x, y) 503 start (np.ndarray): the left point 504 end (np.ndarray): the right point 505 506 Returns: 507 np.ndarray: the perpendicular distances 508 """ 509 510 # TODO for you: consider implementing @Eskapp's suggestions 511 if np.all(a == b): 512 return np.linalg.norm(p - a, axis=1) 513 514 # normalized tangent vector 515 d = np.divide(b - a, np.linalg.norm(b - a)) 516 517 # signed parallel distance components 518 s = np.dot(a - p, d) 519 t = np.dot(p - b, d) 520 521 # clamped parallel distance 522 h = np.maximum.reduce([s, t, np.zeros(len(p))]) 523 524 # perpendicular distance component, as before 525 # note that for the 3D case these will be vectors 526 c = np.cross(p - a, d) 527 528 # use hypot for Pythagoras to improve accuracy 529 return np.hypot(h, c) 530 531 532def perpendicular_distance(points: np.ndarray) -> np.ndarray: 533 """ 534 Computes the perpendicular distance from the points to the 535 straight line defined by the first and last point. 536 537 Args: 538 points (np.ndarray): numpy array with the points (x, y) 539 540 Returns: 541 np.ndarray: the perpendicular distances 542 543 """ 544 return perpendicular_distance_index(points, 0, len(points) - 1) 545 546 547def perpendicular_distance_index(points: np.ndarray, left: int, right: int) -> np.ndarray: 548 """ 549 Computes the perpendicular distance from the points to the 550 straight line defined by the left and right point. 551 552 Args: 553 points (np.ndarray): numpy array with the points (x, y) 554 left (int): the index of the left point 555 right (int): the index of the right point 556 557 Returns: 558 np.ndarray: the perpendicular distances 559 """ 560 return left + perpendicular_distance_points(points[left:right+1], points[left], points[right]) 561 562 563def perpendicular_distance_points(pt: np.ndarray, start: np.ndarray, end: np.ndarray) -> np.ndarray: 564 """ 565 Computes the perpendicular distance from the points to the 566 straight line defined by the left and right point. 567 568 Args: 569 pt (np.ndarray): numpy array with the points (x, y) 570 start (np.ndarray): the left point 571 end (np.ndarray): the right point 572 573 Returns: 574 np.ndarray: the perpendicular distances 575 """ 576 return np.fabs(np.cross(end-start, pt-start)/np.linalg.norm(end-start))
32def linear_fit_points(points: np.ndarray) -> tuple: 33 """ 34 Computes the linear fit for the points. 35 36 This methods approximates the linear fit using only the 37 first and last points in a curve. 38 39 Args: 40 points (np.ndarray): numpy array with the points (x, y) 41 42 Returns: 43 tuple: (b, m) 44 """ 45 x = points[:, 0] 46 y = points[:, 1] 47 return linear_fit(x, y)
Computes the linear fit for the points.
This methods approximates the linear fit using only the first and last points in a curve.
Arguments:
- points (np.ndarray): numpy array with the points (x, y)
Returns:
tuple: (b, m)
50def linear_fit(x: np.ndarray, y: np.ndarray) -> tuple: 51 """ 52 Computes the linear fit for the points. 53 54 This methods approximates the linear fit using only the 55 first and last points in a curve. 56 57 Args: 58 x (np.ndarray): the value of the points in the x axis coordinates 59 y (np.ndarray): the value of the points in the y axis coordinates 60 61 Returns: 62 tuple: (b, m) 63 """ 64 65 d = x[0] - x[-1] 66 if d != 0: 67 m = (y[0] - y[-1])/(x[0] - x[-1]) 68 b = y[0] - (m*x[0]) 69 return (b, m) 70 else: 71 return (0, 0)
Computes the linear fit for the points.
This methods approximates the linear fit using only the first and last points in a curve.
Arguments:
- x (np.ndarray): the value of the points in the x axis coordinates
- y (np.ndarray): the value of the points in the y axis coordinates
Returns:
tuple: (b, m)
74def linear_transform_points(points: np.ndarray, coef: tuple) -> np.ndarray: 75 """ 76 Computes the y values for an x array and the given coefficients. 77 78 Args: 79 points (np.ndarray): numpy array with the points (x, y) 80 coef (tuple): the coefficients from the linear fit 81 82 Returns: 83 np.ndarray: the corresponding y values 84 """ 85 x = points[:, 0] 86 return linear_transform(x, coef)
Computes the y values for an x array and the given coefficients.
Arguments:
- points (np.ndarray): numpy array with the points (x, y)
- coef (tuple): the coefficients from the linear fit
Returns:
np.ndarray: the corresponding y values
89def linear_transform(x: np.ndarray, coef: tuple) -> np.ndarray: 90 """ 91 Computes the y values for an x array and the given coefficients. 92 93 Args: 94 x (np.ndarray): the value of the points in the x axis coordinates 95 coef (tuple): the coefficients from the linear fit 96 97 Returns: 98 np.ndarray: the corresponding y values 99 """ 100 b, m = coef 101 y_hat = x * m + b 102 return y_hat
Computes the y values for an x array and the given coefficients.
Arguments:
- x (np.ndarray): the value of the points in the x axis coordinates
- coef (tuple): the coefficients from the linear fit
Returns:
np.ndarray: the corresponding y values
105def linear_hv_residuals_points(points: np.ndarray) -> float: 106 """ 107 Computes the residual error from a straight line fitting. 108 109 This methods tries a horizontal (typical $f(x)=m \\times x + b$ equation) 110 and a vertinal line (a non typical $f(y) = m \\times y + b$ equation). 111 It returns the smaller residual errors. 112 113 Args: 114 points (np.ndarray): numpy array with the points (x, y) 115 116 Returns: 117 float: the residuals of the line fitting. 118 """ 119 x = points[:, 0] 120 y = points[:, 1] 121 return linear_hv_residuals(x,y)
Computes the residual error from a straight line fitting.
This methods tries a horizontal (typical $f(x)=m \times x + b$ equation) and a vertinal line (a non typical $f(y) = m \times y + b$ equation). It returns the smaller residual errors.
Arguments:
- points (np.ndarray): numpy array with the points (x, y)
Returns:
float: the residuals of the line fitting.
124def linear_hv_residuals(x: np.ndarray, y: np.ndarray) -> float: 125 """ 126 Computes the residual error from a straight line fitting. 127 128 This methods tries a horizontal (typical $f(x)=m \\times x + b$ equation) 129 and a vertinal line (a non typical $f(y) = m \\times y + b$ equation). 130 It returns the smaller residual errors. 131 132 Args: 133 x (np.ndarray): the value of the points in the x axis coordinates 134 y (np.ndarray): the value of the points in the y axis coordinates 135 136 Returns: 137 float: the residuals of the line fitting. 138 """ 139 # try a tipical y = mx + b line 140 coef1 = linear_fit(x, y) 141 y_residuals = linear_residuals(x, y, coef1) 142 143 # try a non-typical x = my + b line 144 coef2 = linear_fit(y, x) 145 x_residuals = linear_residuals(y, x, coef2) 146 147 if y_residuals <= x_residuals: 148 return y_residuals 149 else: 150 return x_residuals
Computes the residual error from a straight line fitting.
This methods tries a horizontal (typical $f(x)=m \times x + b$ equation) and a vertinal line (a non typical $f(y) = m \times y + b$ equation). It returns the smaller residual errors.
Arguments:
- x (np.ndarray): the value of the points in the x axis coordinates
- y (np.ndarray): the value of the points in the y axis coordinates
Returns:
float: the residuals of the line fitting.
153def linear_fit_transform_points(points: np.ndarray, vertical:bool=False) -> Union[np.ndarray, tuple]: 154 """ 155 Computes the horizontal or vertical line fitting. 156 157 A horizontal line fitting uses the typical $f(x)=m \\times x + b$ equation. 158 The vertinal line fitting uses the non typical $f(y) = m \\times y + b$ equation. 159 160 Args: 161 points (np.ndarray): numpy array with the points (x, y) 162 vertical (bool): if True computes the vertical line fitting (default: False) 163 164 Returns: 165 Union[np.ndarray, tuple]: for horizontal retuns only the computed y_hat values. 166 For the vertical line fitting returns the x points and the y_hat values. 167 """ 168 x = points[:, 0] 169 y = points[:, 1] 170 return linear_fit_transform(x, y, vertical)
Computes the horizontal or vertical line fitting.
A horizontal line fitting uses the typical $f(x)=m \times x + b$ equation. The vertinal line fitting uses the non typical $f(y) = m \times y + b$ equation.
Arguments:
- points (np.ndarray): numpy array with the points (x, y)
- vertical (bool): if True computes the vertical line fitting (default: False)
Returns:
Union[np.ndarray, tuple]: for horizontal retuns only the computed y_hat values. For the vertical line fitting returns the x points and the y_hat values.
173def linear_fit_transform(x: np.ndarray, y: np.ndarray, vertical=False) -> Union[np.ndarray, tuple]: 174 # try a tipical y = mx + b line 175 coef1 = linear_fit(x, y) 176 y_hat = linear_transform(x, coef1) 177 178 if vertical: 179 y_residuals = linear_residuals(x, y, coef1) 180 # try a non-typical x = my + b line 181 coef2 = linear_fit(y, x) 182 x_hat = linear_transform(y, coef2) 183 x_residuals = linear_residuals(y, x, coef2) 184 185 if y_residuals <= x_residuals: 186 return y, y_hat 187 else: 188 return x, x_hat 189 else: 190 return y_hat
193def linear_r2_points(points: np.ndarray, coef: tuple, r2: metrics.R2 = metrics.R2.classic) -> float: 194 """ 195 Computes the coefficient of determination (R2). 196 197 Args: 198 points (np.ndarray): numpy array with the points (x, y) 199 coef (tuple): the coefficients from the linear fit 200 r2 (R2): select the type of coefficient of determination (default: R2.classic) 201 202 Returns: 203 float: coefficient of determination (R2) 204 """ 205 x = points[:, 0] 206 y = points[:, 1] 207 return linear_r2(x, y, coef, r2)
Computes the coefficient of determination (R2).
Arguments:
- points (np.ndarray): numpy array with the points (x, y)
- coef (tuple): the coefficients from the linear fit
- r2 (R2): select the type of coefficient of determination (default: R2.classic)
Returns:
float: coefficient of determination (R2)
210def linear_r2(x: np.ndarray, y: np.ndarray, coef: tuple, r2: metrics.R2 = metrics.R2.classic) -> float: 211 """ 212 Computes the coefficient of determination (R2). 213 214 Args: 215 x (np.ndarray): the value of the points in the x axis coordinates 216 y (np.ndarray): the value of the points in the y axis coordinates 217 coef (tuple): the coefficients from the linear fit 218 r2 (R2): select the type of coefficient of determination (default: R2.classic) 219 220 Returns: 221 float: coefficient of determination (R2) 222 """ 223 y_hat = linear_transform(x, coef) 224 y_mean = np.mean(y) 225 rss = np.sum((y-y_hat)**2) 226 tss = np.sum((y-y_mean)**2) 227 rv = 0.0 228 229 if tss == 0: 230 rv = 1.0 - rss 231 else: 232 rv = 1.0 - (rss/tss) 233 234 if r2 is metrics.R2.adjusted: 235 rv = 1.0 - (1.0 - rv)*((len(x)-1)/(len(x)-2)) 236 237 return rv
Computes the coefficient of determination (R2).
Arguments:
- x (np.ndarray): the value of the points in the x axis coordinates
- y (np.ndarray): the value of the points in the y axis coordinates
- coef (tuple): the coefficients from the linear fit
- r2 (R2): select the type of coefficient of determination (default: R2.classic)
Returns:
float: coefficient of determination (R2)
240def rmspe_points(points: np.ndarray, coef: tuple, eps: float = 1e-16) -> float: 241 """ 242 Computes the Root Mean Squared Percentage Error (RMSPE). 243 244 Args: 245 points (np.ndarray): numpy array with the points (x, y) 246 coef (tuple): the coefficients from the linear fit 247 eps (float): eps value to prevent division by zero (default: 1E-16) 248 249 Returns: 250 float: Root Mean Squared Percentage Error (RMSPE) 251 """ 252 x = points[:, 0] 253 y = points[:, 1] 254 return rmspe(x, y, coef, eps)
Computes the Root Mean Squared Percentage Error (RMSPE).
Arguments:
- points (np.ndarray): numpy array with the points (x, y)
- coef (tuple): the coefficients from the linear fit
- eps (float): eps value to prevent division by zero (default: 1E-16)
Returns:
float: Root Mean Squared Percentage Error (RMSPE)
257def rmspe(x: np.ndarray, y: np.ndarray, coef: tuple, eps: float = 1e-16) -> float: 258 """ 259 Computes the Root Mean Squared Percentage Error (RMSPE). 260 261 Args: 262 x (np.ndarray): the value of the points in the x axis coordinates 263 y (np.ndarray): the value of the points in the y axis coordinates 264 coef (tuple): the coefficients from the linear fit 265 eps (float): eps value to prevent division by zero (default: 1E-16) 266 267 Returns: 268 float: Root Mean Squared Percentage Error (RMSPE) 269 """ 270 y_hat = linear_transform(x, coef) 271 return metrics.rmspe(y, y_hat)
Computes the Root Mean Squared Percentage Error (RMSPE).
Arguments:
- x (np.ndarray): the value of the points in the x axis coordinates
- y (np.ndarray): the value of the points in the y axis coordinates
- coef (tuple): the coefficients from the linear fit
- eps (float): eps value to prevent division by zero (default: 1E-16)
Returns:
float: Root Mean Squared Percentage Error (RMSPE)
274def rmsle_points(points: np.ndarray, coef: tuple) -> float: 275 """ 276 Computes the Root Mean Squared Log Error (RMSLE): 277 $$ 278 RMSLE(y, \\hat{y}) = \\sqrt{\\frac{\\sum_{i=1}^{n}(\\log (y_i+1) - \\log (\\hat{y_i}+1))^2}{n}} 279 $$ 280 281 Args: 282 points (np.ndarray): numpy array with the points (x, y) 283 coef (tuple): the coefficients from the linear fit 284 285 Returns: 286 float: Root Mean Squared Log Error (RMSLE) 287 """ 288 x = points[:, 0] 289 y = points[:, 1] 290 return rmsle(x, y, coef)
Computes the Root Mean Squared Log Error (RMSLE): $$ RMSLE(y, \hat{y}) = \sqrt{\frac{\sum_{i=1}^{n}(\log (y_i+1) - \log (\hat{y_i}+1))^2}{n}} $$
Arguments:
- points (np.ndarray): numpy array with the points (x, y)
- coef (tuple): the coefficients from the linear fit
Returns:
float: Root Mean Squared Log Error (RMSLE)
293def rmsle(x: np.ndarray, y: np.ndarray, coef: tuple) -> float: 294 """ 295 Computes the Root Mean Squared Log Error (RMSLE): 296 $$ 297 RMSLE(y, \\hat{y}) = \\sqrt{\\frac{\\sum_{i=1}^{n}(\\log (y_i+1) - \\log (\\hat{y_i}+1))^2}{n}} 298 $$ 299 300 Args: 301 x (np.ndarray): the value of the points in the x axis coordinates 302 y (np.ndarray): the value of the points in the y axis coordinates 303 coef (tuple): the coefficients from the linear fit 304 305 Returns: 306 float: Root Mean Squared Log Error (RMSLE) 307 """ 308 y_hat = linear_transform(x, coef) 309 return metrics.rmsle(y, y_hat)
Computes the Root Mean Squared Log Error (RMSLE): $$ RMSLE(y, \hat{y}) = \sqrt{\frac{\sum_{i=1}^{n}(\log (y_i+1) - \log (\hat{y_i}+1))^2}{n}} $$
Arguments:
- x (np.ndarray): the value of the points in the x axis coordinates
- y (np.ndarray): the value of the points in the y axis coordinates
- coef (tuple): the coefficients from the linear fit
Returns:
float: Root Mean Squared Log Error (RMSLE)
323def rpd_points(points: np.ndarray, coef: tuple, eps: float = 1e-16) -> float: 324 """ 325 Computes the Relative Percentage Difference (RPD). 326 327 Args: 328 points (np.ndarray): numpy array with the points (x, y) 329 coef (tuple): the coefficients from the linear fit 330 eps (float): eps value to prevent division by zero (default: 1E-16) 331 332 Returns: 333 float: Relative Percentage Difference (RPD) 334 """ 335 x = points[:, 0] 336 y = points[:, 1] 337 return rpd(x, y, coef, eps)
Computes the Relative Percentage Difference (RPD).
Arguments:
- points (np.ndarray): numpy array with the points (x, y)
- coef (tuple): the coefficients from the linear fit
- eps (float): eps value to prevent division by zero (default: 1E-16)
Returns:
float: Relative Percentage Difference (RPD)
340def rpd(x: np.ndarray, y: np.ndarray, coef: tuple, eps: float = 1e-16) -> float: 341 """ 342 Computes the Relative Percentage Difference (RPD). 343 344 Args: 345 x (np.ndarray): the value of the points in the x axis coordinates 346 y (np.ndarray): the value of the points in the y axis coordinates 347 coef (tuple): the coefficients from the linear fit 348 eps (float): eps value to prevent division by zero (default: 1E-16) 349 350 Returns: 351 float: Relative Percentage Difference (RPD) 352 """ 353 y_hat = linear_transform(x, coef) 354 return metrics.rpd(y, y_hat, eps)
Computes the Relative Percentage Difference (RPD).
Arguments:
- x (np.ndarray): the value of the points in the x axis coordinates
- y (np.ndarray): the value of the points in the y axis coordinates
- coef (tuple): the coefficients from the linear fit
- eps (float): eps value to prevent division by zero (default: 1E-16)
Returns:
float: Relative Percentage Difference (RPD)
357def rmse_points(points: np.ndarray, coef: tuple) -> float: 358 """ 359 Computes the Root Mean Squared Error (RMSE). 360 361 Args: 362 points (np.ndarray): numpy array with the points (x, y) 363 coef (tuple): the coefficients from the linear fit 364 365 Returns: 366 float: Root Mean Squared Error (RMSE) 367 """ 368 x = points[:, 0] 369 y = points[:, 1] 370 return rmse(x, y, coef)
Computes the Root Mean Squared Error (RMSE).
Arguments:
- points (np.ndarray): numpy array with the points (x, y)
- coef (tuple): the coefficients from the linear fit
Returns:
float: Root Mean Squared Error (RMSE)
373def rmse(x: np.ndarray, y: np.ndarray, coef: tuple) -> float: 374 """ 375 Computes the Root Mean Squared Error (RMSE). 376 377 Args: 378 x (np.ndarray): the value of the points in the x axis coordinates 379 y (np.ndarray): the value of the points in the y axis coordinates 380 coef (tuple): the coefficients from the linear fit 381 382 Returns: 383 float: Root Mean Squared Error (RMSE) 384 """ 385 y_hat = linear_transform(x, coef) 386 return metrics.rmse(y, y_hat)
Computes the Root Mean Squared Error (RMSE).
Arguments:
- x (np.ndarray): the value of the points in the x axis coordinates
- y (np.ndarray): the value of the points in the y axis coordinates
- coef (tuple): the coefficients from the linear fit
Returns:
float: Root Mean Squared Error (RMSE)
389def linear_residuals_points(points: np.ndarray, coef: tuple) -> float: 390 """ 391 Computes the residual error of the linear fit. 392 393 Args: 394 points (np.ndarray): numpy array with the points (x, y) 395 coef (tuple): the coefficients from the linear fit 396 397 Returns: 398 float: residual error of the linear fit 399 """ 400 x = points[:, 0] 401 y = points[:, 1] 402 return linear_residuals(x, y, coef)
Computes the residual error of the linear fit.
Arguments:
- points (np.ndarray): numpy array with the points (x, y)
- coef (tuple): the coefficients from the linear fit
Returns:
float: residual error of the linear fit
405def linear_residuals(x: np.ndarray, y: np.ndarray, coef: tuple) -> float: 406 """ 407 Computes the residual error of the linear fit. 408 409 Args: 410 x (np.ndarray): the value of the points in the x axis coordinates 411 y (np.ndarray): the value of the points in the y axis coordinates 412 coef (tuple): the coefficients from the linear fit 413 414 Returns: 415 float: residual error of the linear fit 416 """ 417 y_hat = linear_transform(x, coef) 418 return metrics.residuals(y, y_hat)
Computes the residual error of the linear fit.
Arguments:
- x (np.ndarray): the value of the points in the x axis coordinates
- y (np.ndarray): the value of the points in the y axis coordinates
- coef (tuple): the coefficients from the linear fit
Returns:
float: residual error of the linear fit
433def r2_points(points: np.ndarray, t: metrics.R2 = metrics.R2.classic) -> float: 434 """ 435 Computes the coefficient of determination (R2). 436 437 Computes the best fit (and not the fast point fit) 438 and computes the corresponding R2. 439 440 Args: 441 points (np.ndarray): numpy array with the points (x, y) 442 t (R2): select the type of coefficient of determination 443 444 Returns: 445 float: coefficient of determination (R2) 446 """ 447 if len(points) <= 2: 448 return 1.0 449 else: 450 x = points[:, 0] 451 y = points[:, 1] 452 return r2(x, y, t)
Computes the coefficient of determination (R2).
Computes the best fit (and not the fast point fit) and computes the corresponding R2.
Arguments:
- points (np.ndarray): numpy array with the points (x, y)
- t (R2): select the type of coefficient of determination
Returns:
float: coefficient of determination (R2)
455def r2(x: np.ndarray, y: np.ndarray, t: metrics.R2 = metrics.R2.classic) -> float: 456 """Computes the coefficient of determination (R2). 457 458 Computes the best fit (and not the fast point fit) 459 and computes the corresponding R2. 460 461 Args: 462 x (np.ndarray): the value of the points in the x axis coordinates 463 y (np.ndarray): the value of the points in the y axis coordinates 464 t (R2): select the type of coefficient of determination 465 466 Returns: 467 float: coefficient of determination (R2) 468 """ 469 rv = 0.0 470 if len(x) <= 2: 471 rv = 1.0 472 else: 473 rv = (np.corrcoef(x, y)[0, 1])**2.0 474 475 if t is metrics.R2.adjusted: 476 rv = 1.0 - (1-rv)*((len(x)-1)/(len(x)-2)) 477 478 return rv
Computes the coefficient of determination (R2).
Computes the best fit (and not the fast point fit) and computes the corresponding R2.
Arguments:
- x (np.ndarray): the value of the points in the x axis coordinates
- y (np.ndarray): the value of the points in the y axis coordinates
- t (R2): select the type of coefficient of determination
Returns:
float: coefficient of determination (R2)
481def angle(coef1: tuple, coef2: tuple) -> float: 482 """ 483 Computes the angle between two lines. 484 485 Args: 486 coef1 (tuple): the coefficients from the first line 487 coef2 (tuple): the coefficients from the second line 488 489 Returns: 490 float: the angle between two lines $\\left[0,\\frac{\\pi}{2} \\right]$ 491 """ 492 _, m1 = coef1 493 _, m2 = coef2 494 return math.atan((m1-m2)/(1.0+m1*m2))
Computes the angle between two lines.
Arguments:
- coef1 (tuple): the coefficients from the first line
- coef2 (tuple): the coefficients from the second line
Returns:
float: the angle between two lines $\left[0,\frac{\pi}{2} \right]$
497def shortest_distance_points(p: np.ndarray, a: np.ndarray, b: np.ndarray): 498 """ 499 Computes the shortest distance from the points to the 500 straight line defined by the left and right point. 501 502 Args: 503 pt (np.ndarray): numpy array with the points (x, y) 504 start (np.ndarray): the left point 505 end (np.ndarray): the right point 506 507 Returns: 508 np.ndarray: the perpendicular distances 509 """ 510 511 # TODO for you: consider implementing @Eskapp's suggestions 512 if np.all(a == b): 513 return np.linalg.norm(p - a, axis=1) 514 515 # normalized tangent vector 516 d = np.divide(b - a, np.linalg.norm(b - a)) 517 518 # signed parallel distance components 519 s = np.dot(a - p, d) 520 t = np.dot(p - b, d) 521 522 # clamped parallel distance 523 h = np.maximum.reduce([s, t, np.zeros(len(p))]) 524 525 # perpendicular distance component, as before 526 # note that for the 3D case these will be vectors 527 c = np.cross(p - a, d) 528 529 # use hypot for Pythagoras to improve accuracy 530 return np.hypot(h, c)
Computes the shortest distance from the points to the straight line defined by the left and right point.
Arguments:
- pt (np.ndarray): numpy array with the points (x, y)
- start (np.ndarray): the left point
- end (np.ndarray): the right point
Returns:
np.ndarray: the perpendicular distances
533def perpendicular_distance(points: np.ndarray) -> np.ndarray: 534 """ 535 Computes the perpendicular distance from the points to the 536 straight line defined by the first and last point. 537 538 Args: 539 points (np.ndarray): numpy array with the points (x, y) 540 541 Returns: 542 np.ndarray: the perpendicular distances 543 544 """ 545 return perpendicular_distance_index(points, 0, len(points) - 1)
Computes the perpendicular distance from the points to the straight line defined by the first and last point.
Arguments:
- points (np.ndarray): numpy array with the points (x, y)
Returns:
np.ndarray: the perpendicular distances
548def perpendicular_distance_index(points: np.ndarray, left: int, right: int) -> np.ndarray: 549 """ 550 Computes the perpendicular distance from the points to the 551 straight line defined by the left and right point. 552 553 Args: 554 points (np.ndarray): numpy array with the points (x, y) 555 left (int): the index of the left point 556 right (int): the index of the right point 557 558 Returns: 559 np.ndarray: the perpendicular distances 560 """ 561 return left + perpendicular_distance_points(points[left:right+1], points[left], points[right])
Computes the perpendicular distance from the points to the straight line defined by the left and right point.
Arguments:
- points (np.ndarray): numpy array with the points (x, y)
- left (int): the index of the left point
- right (int): the index of the right point
Returns:
np.ndarray: the perpendicular distances
564def perpendicular_distance_points(pt: np.ndarray, start: np.ndarray, end: np.ndarray) -> np.ndarray: 565 """ 566 Computes the perpendicular distance from the points to the 567 straight line defined by the left and right point. 568 569 Args: 570 pt (np.ndarray): numpy array with the points (x, y) 571 start (np.ndarray): the left point 572 end (np.ndarray): the right point 573 574 Returns: 575 np.ndarray: the perpendicular distances 576 """ 577 return np.fabs(np.cross(end-start, pt-start)/np.linalg.norm(end-start))
Computes the perpendicular distance from the points to the straight line defined by the left and right point.
Arguments:
- pt (np.ndarray): numpy array with the points (x, y)
- start (np.ndarray): the left point
- end (np.ndarray): the right point
Returns:
np.ndarray: the perpendicular distances