How can I use gsl to calculate polynomial regression data points?

老子叫甜甜 提交于 2021-01-28 01:45:57

问题


(Disclaimer: I am terrible with math and am coming from JavaScript, so I apologize for any inaccuracies and will do my best to correct them.)

The example on Rosetta Code shows how to calculate coefficients using gsl. Here is the code:

polifitgsl.h:

#ifndef _POLIFITGSL_H
#define _POLIFITGSL_H
#include <gsl/gsl_multifit.h>
#include <stdbool.h>
#include <math.h>
bool polynomialfit(int obs, int degree, 
           double *dx, double *dy, double *store); /* n, p */
#endif

polifitgsl.cpp:

#include "polifitgsl.h"

bool polynomialfit(int obs, int degree, 
           double *dx, double *dy, double *store) /* n, p */
{
  gsl_multifit_linear_workspace *ws;
  gsl_matrix *cov, *X;
  gsl_vector *y, *c;
  double chisq;

  int i, j;

  X = gsl_matrix_alloc(obs, degree);
  y = gsl_vector_alloc(obs);
  c = gsl_vector_alloc(degree);
  cov = gsl_matrix_alloc(degree, degree);

  for(i=0; i < obs; i++) {
    for(j=0; j < degree; j++) {
      gsl_matrix_set(X, i, j, pow(dx[i], j));
    }
    gsl_vector_set(y, i, dy[i]);
  }

  ws = gsl_multifit_linear_alloc(obs, degree);
  gsl_multifit_linear(X, y, c, cov, &chisq, ws);

  /* store result ... */
  for(i=0; i < degree; i++)
  {
    store[i] = gsl_vector_get(c, i);
  }

  gsl_multifit_linear_free(ws);
  gsl_matrix_free(X);
  gsl_matrix_free(cov);
  gsl_vector_free(y);
  gsl_vector_free(c);
  return true; /* we do not "analyse" the result (cov matrix mainly)
          to know if the fit is "good" */
}

main.cpp (note I've replaced the sample numbers for x any y with my own):

#include <stdio.h>

#include "polifitgsl.h"

#define NP 11
double x[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
double y[] = {98.02, 98.01, 98.01, 98.02, 97.98, 97.97, 97.96, 97.94, 97.96, 97.96, 97.97, 97.97, 97.94, 97.94, 97.94, 97.92, 97.96, 97.9, 97.85, 97.9};

#define DEGREE 3
double coeff[DEGREE];

int main()
{
  int i;

  polynomialfit(NP, DEGREE, x, y, coeff);
  for(i=0; i < DEGREE; i++) {
    printf("%lf\n", coeff[i]);
  }
  return 0;
}

And here is the output:

98.030909
-0.016182
0.000909

So that gives me the coefficients. But what I really want is the actual fitted points. In JavaScript, I've used the regression package to calculate the points:

var regression = require('regression');

var calculateRegression = function(values, degree) {
    var data = [];
    var regressionOutput;
    var valuesCount = values.length;
    var i = 0;

    // Format the data in a way the regression library expects.
    for (i = 0; i < valuesCount; i++) {
        data[i] = [i, values[i]];
    }

    // Use the library to calculate the regression.
    regressionOutput = regression('polynomial', data, degree);

    return regressionOutput;
};

var y = [98.02, 98.01, 98.01, 98.02, 97.98, 97.97, 97.96, 97.94, 97.96, 97.96, 97.97, 97.97, 97.94, 97.94, 97.94, 97.92, 97.96, 97.9, 97.85, 97.9];

console.log(calculateRegression(y, 3));

Which produces:

{ equation: 
   [ 98.02987916431594,
     -0.017378390369880512,
     0.0015748071645344357,
     -0.00005721503635571101 ],
  points: 
   [ [ 0, 98.02987916431594 ],
     [ 1, 98.01401836607424 ],
     [ 2, 98.00096389194348 ],
     [ 3, 97.9903724517055 ],
     [ 4, 97.98190075514219 ],
     [ 5, 97.97520551203543 ],
     [ 6, 97.96994343216707 ],
     [ 7, 97.96577122531896 ],
     [ 8, 97.96234560127297 ],
     [ 9, 97.959323269811 ],
     [ 10, 97.95636094071487 ],
     [ 11, 97.95311532376647 ],
     [ 12, 97.94924312874768 ],
     [ 13, 97.94440106544033 ],
     [ 14, 97.93824584362629 ],
     [ 15, 97.93043417308745 ],
     [ 16, 97.92062276360569 ],
     [ 17, 97.90846832496283 ],
     [ 18, 97.89362756694074 ],
     [ 19, 97.87575719932133 ] ],
  string: 'y = 0x^3 + 0x^2 + -0.02x + 98.03' }

(Note there are floating point issues in JavaScript, so the numbers aren't perfectly exact.)

points here is what I'm wanting to generate using gsl. Is there a way I can do this?


回答1:


Chad, if I understand what you are needing, you simply need to compute the values based on the polynomial equation using the coefficients you found with your polynomialfit function. After you compute the coefficients, you can find the value y for any x (for DEGREE = 3) with the equation:

y = x^2*(coeff2) + x*(coeff1) + coeff0

or in C-syntax

y = x*x*coeff[2] + x*coeff[1] + coeff[0];

You can modify your main.cpp as follows (you should rename both main.cpp to main.c and polyfitgsl.cpp to polyfitgsl.c -- as they are both C-source files, not C++)

#include <stdio.h>

#include "polifitgsl.h"

#define NP 20
double x[] = {  0,  1,  2,  3,  4,  5,  6,  7,  8, 9,
               10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
double y[] = { 98.02, 98.01, 98.01, 98.02, 97.98,
               97.97, 97.96, 97.94, 97.96, 97.96,
               97.97, 97.97, 97.94, 97.94, 97.94,
               97.92, 97.96, 97.9,  97.85, 97.9 };

#define DEGREE 3
double coeff[DEGREE];

int main (void)
{
    int i;

    polynomialfit (NP, DEGREE, x, y, coeff);

    printf ("\n polynomial coefficients:\n\n");
    for (i = 0; i < DEGREE; i++) {
        printf ("  coeff[%d] : %11.7lf\n", i, coeff[i]);
    }
    putchar ('\n');

    printf (" computed values:\n\n   x    y\n");
    for (i = 0; i < (int)(sizeof x/sizeof *x); i++)
        printf ("  %2d, %11.7lf\n", i, 
                i*i*coeff[2] + i*coeff[1] + coeff[0]);
    putchar ('\n');

    return 0;
}

Which provides the following output:

Example Use/Output

$ ./bin/main

 polynomial coefficients:

  coeff[0] :  98.0132468
  coeff[1] :  -0.0053003
  coeff[2] :  -0.0000558

 computed values:

   x    y
   0,  98.0132468
   1,  98.0078906
   2,  98.0024229
   3,  97.9968435
   4,  97.9911524
   5,  97.9853497
   6,  97.9794354
   7,  97.9734094
   8,  97.9672718
   9,  97.9610226
  10,  97.9546617
  11,  97.9481891
  12,  97.9416049
  13,  97.9349091
  14,  97.9281016
  15,  97.9211825
  16,  97.9141517
  17,  97.9070093
  18,  97.8997553
  19,  97.8923896

That seems like what you are asking for. Look it over, compare your values and let me know if you need additional help.


Cleaning Up A Bit Further

Just to clean the code up a bit further, since there is no need for global declaration of x, y, or coeff, and using an enum to define the constants DEGREE and NP, a tidier version would be:

#include <stdio.h>

#include "polifitgsl.h"

enum { DEGREE = 3, NP = 20 };

int main (void)
{
    int i;
    double x[] = {  0,  1,  2,  3,  4,  5,  6,  7,  8, 9,
                   10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
    double y[] = { 98.02, 98.01, 98.01, 98.02, 97.98,
                   97.97, 97.96, 97.94, 97.96, 97.96,
                   97.97, 97.97, 97.94, 97.94, 97.94,
                   97.92, 97.96, 97.9,  97.85, 97.9 };
    double coeff[DEGREE] = {0};


    polynomialfit (NP, DEGREE, x, y, coeff);

    printf ("\n polynomial coefficients:\n\n");
    for (i = 0; i < DEGREE; i++)
        printf ("  coeff[%d] : %11.7lf\n", i, coeff[i]);
    putchar ('\n');

    printf (" computed values:\n\n   x    y\n");
    for (i = 0; i < (int)(sizeof x/sizeof *x); i++)
        printf ("  %2d, %11.7lf\n", i, 
                i*i*coeff[2] + i*coeff[1] + coeff[0]);
    putchar ('\n');

    return 0;
}



回答2:


I had the same problem and have taken the answers above and added a solution for direct calculation (without gsl) taken from http://www.cplusplus.com/forum/general/181580/

Below, you find a standalone test program with a gsl-based and a direct calculation solution.

I have done some profiling runs and the performance of direct calculation is an impressive 65 times higher on my system, 5.22s vs. 0.08s for 1000 calls to the respective functions.

As a side note, the direct calculation uses Cramer's rule, so you should be careful with ill conditioned data. Normally I would avoid Cramer's but using a full linear system solver for a 3x3 system seems overkill to me.

/*
* =====================================================================================
*
*       Filename:  polyfit.cpp
*
*    Description:  Least squares fit of second order polynomials 
*                  Test program using gsl and direct calculation
*        Version:  1.0
*        Created:  2017-07-17 09:32:55
*       Compiler:  gcc
*
*         Author:  Bernhard Brunner, brb_blog@epr.ch
*     References:  This code was merged, adapted and optimized from these two sources:
*                  http://www.cplusplus.com/forum/general/181580/
*                  https://stackoverflow.com/questions/36522882/how-can-i-use-gsl-to-calculate-polynomial-regression-data-points
*                  http://brb.epr.ch/blog/blog:least_squares_regression_of_parabola
*          Build:  compile and link using 
*                  g++    -c -o polifitgsl.o polifitgsl.cpp
*                  gcc  polifitgsl.o -lgsl -lm -lblas -o polifitgsl
* 
*      Profiling:
*                  valgrind --tool=callgrind ./polifitgsl
*                  kcachegrind
*                  
*                  polynomialfit        takes 5.22s for 1000 calls
*                  findQuadCoefficients takes 0.08s for 1000 calls
*                   65x faster
* =====================================================================================
*/

#include <stdio.h>
#include <gsl/gsl_multifit.h>
#include <stdbool.h>
#include <math.h>

bool polynomialfit(int obs, int degree, 
        double *dx, double *dy, double *store); /* n, p */
double x[] = {  0,  1,  2,  3,  4,  5,  6,  7,  8, 9,
            10, 11, 12, 13, 14, 15, 16, 17, 18, 19};
double y[] = { 98.02, 98.01, 98.01, 98.02, 97.98,
            97.97, 97.96, 97.94, 97.96, 97.96,
            97.97, 97.97, 97.94, 97.94, 97.94,
            97.92, 97.96, 97.9,  97.85, 97.9 };

#define NP (sizeof(x)/sizeof(double)) // 20

#define DEGREE 3
double coeff[DEGREE];

bool findQuadCoefficients(double timeArray[], double valueArray[], double *coef, double &critPoint, int PointsNum){
    const double S00=PointsNum;//points number
    double S40=0, S10=0, S20=0, S30=0, S01=0, S11=0, S21 = 0;
//    const double MINvalue = valueArray[0];
//    const double MINtime = timeArray[0];
    for (int i=0; i<PointsNum; i++ ){
        double value = valueArray[i]; //  - MINvalue); //normalizing
//      cout << "i=" << i << " index=" << index << " value=" << value << endl;

        int index = timeArray[i]; //  - MINtime;
        int index2 = index * index;
        int index3 = index2 * index;
        int index4 = index3 * index;

        S40+= index4;
        S30+= index3; 
        S20+= index2;
        S10+= index;

        S01 += value;
        S11 += value*index;
        S21 += value*index2;
    }

    double S20squared = S20*S20;

    //minors M_ij=M_ji
    double M11 = S20*S00 - S10*S10;
    double M21 = S30*S00 - S20*S10;
    double M22 = S40*S00 - S20squared;
    double M31 = S30*S10 - S20squared;

    double M32 = S40*S10 - S20*S30;
//  double M33 = S40*S20 - pow(S30,2);

    double discriminant = S40*M11 - S30*M21 + S20*M31;
//    printf("discriminant :%lf\n", discriminant);
    if (abs(discriminant) < .00000000001) return  false;

    double Da = S21*M11
               -S11*M21
               +S01*M31;
    coef[2] = Da/discriminant;
//  cout << "discriminant=" << discriminant;
//  cout << " Da=" << Da;

    double Db = -S21*M21
                +S11*M22
                -S01*M32;
    coef[1] = Db/discriminant;
//  cout << " Db=" << Db << endl;

    double Dc =   S40*(S20*S01 - S10*S11) 
                - S30*(S30*S01 - S10*S21) 
                + S20*(S30*S11 - S20*S21);
    coef[0] = Dc/discriminant;
//    printf("c=%lf\n", c);

    critPoint = -Db/(2*Da); // + MINtime; //-b/(2*a)= -Db/discriminant / (2*(Da/discriminant)) = -Db/(2*Da);

    return true;
}

bool polynomialfit(int obs, int degree, 
        double *dx, double *dy, double *store) /* n, p */
{
gsl_multifit_linear_workspace *ws;
gsl_matrix *cov, *X;
gsl_vector *y, *c;
double chisq;

int i, j;

X = gsl_matrix_alloc(obs, degree);
y = gsl_vector_alloc(obs);
c = gsl_vector_alloc(degree);
cov = gsl_matrix_alloc(degree, degree);

for(i=0; i < obs; i++) {
    for(j=0; j < degree; j++) {
        gsl_matrix_set(X, i, j, pow(dx[i], j));
    }
    gsl_vector_set(y, i, dy[i]);
}

ws = gsl_multifit_linear_alloc(obs, degree);
gsl_multifit_linear(X, y, c, cov, &chisq, ws);

/* store result ... */
for(i=0; i < degree; i++) {
    store[i] = gsl_vector_get(c, i);
}

gsl_multifit_linear_free(ws);
gsl_matrix_free(X);
gsl_matrix_free(cov);
gsl_vector_free(y);
gsl_vector_free(c);
return true; /* we do not "analyse" the result (cov matrix mainly)
        to know if the fit is "good" */
}

void testcoeff(double *coeff)
{
    printf ("\n polynomial coefficients\n");
    for (int i = 0; i < DEGREE; i++) {
        printf ("  coeff[%d] : %11.7lf\n", i, coeff[i]);
    }
    putchar ('\n');

    printf (" computed values:\n\n   x, yi, yip\n");
    for (unsigned i = 0; i < NP; i++) {
        printf ("%2u,%.7lf,%.7lf\n", i, 
                y[i], 
                i*i*coeff[2] + i*coeff[1] + coeff[0]);
    }
    putchar ('\n');
}

int main (void)
{
    #define ITER 1000
    for (int i=0; i< ITER; i++) {
        polynomialfit (NP, DEGREE, x, y, coeff);
    }
    testcoeff(coeff);

    double sx; 
    for (int i=0; i< ITER; i++) {
        findQuadCoefficients(x, y, coeff, sx, NP);
    }
    printf("critical point %lf\n", sx);
    testcoeff(coeff);
    return 0;
}

References:

  • http://www.cplusplus.com/forum/general/181580/
  • http://mathworld.wolfram.com/LeastSquaresFittingPolynomial.html


来源:https://stackoverflow.com/questions/36522882/how-can-i-use-gsl-to-calculate-polynomial-regression-data-points

易学教程内所有资源均来自网络或用户发布的内容,如有违反法律规定的内容欢迎反馈
该文章没有解决你所遇到的问题?点击提问,说说你的问题,让更多的人一起探讨吧!