test_pie/external/alglib-3.16.0/dataanalysis.h

10478 lines
425 KiB
C
Raw Permalink Normal View History

2023-09-14 11:12:02 +02:00
/*************************************************************************
ALGLIB 3.16.0 (source code generated 2019-12-19)
Copyright (c) Sergey Bochkanov (ALGLIB project).
>>> SOURCE LICENSE >>>
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation (www.fsf.org); either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
A copy of the GNU General Public License is available at
http://www.fsf.org/licensing/licenses
>>> END OF LICENSE >>>
*************************************************************************/
#ifndef _dataanalysis_pkg_h
#define _dataanalysis_pkg_h
#include "ap.h"
#include "alglibinternal.h"
#include "alglibmisc.h"
#include "linalg.h"
#include "statistics.h"
#include "specialfunctions.h"
#include "solvers.h"
#include "optimization.h"
/////////////////////////////////////////////////////////////////////////
//
// THIS SECTION CONTAINS COMPUTATIONAL CORE DECLARATIONS (DATATYPES)
//
/////////////////////////////////////////////////////////////////////////
namespace alglib_impl
{
#if defined(AE_COMPILE_PCA) || !defined(AE_PARTIAL_BUILD)
#endif
#if defined(AE_COMPILE_BDSS) || !defined(AE_PARTIAL_BUILD)
typedef struct
{
double relclserror;
double avgce;
double rmserror;
double avgerror;
double avgrelerror;
} cvreport;
#endif
#if defined(AE_COMPILE_MLPBASE) || !defined(AE_PARTIAL_BUILD)
typedef struct
{
double relclserror;
double avgce;
double rmserror;
double avgerror;
double avgrelerror;
} modelerrors;
typedef struct
{
double f;
ae_vector g;
} smlpgrad;
typedef struct
{
ae_int_t hlnetworktype;
ae_int_t hlnormtype;
ae_vector hllayersizes;
ae_vector hlconnections;
ae_vector hlneurons;
ae_vector structinfo;
ae_vector weights;
ae_vector columnmeans;
ae_vector columnsigmas;
ae_vector neurons;
ae_vector dfdnet;
ae_vector derror;
ae_vector x;
ae_vector y;
ae_matrix xy;
ae_vector xyrow;
ae_vector nwbuf;
ae_vector integerbuf;
modelerrors err;
ae_vector rndbuf;
ae_shared_pool buf;
ae_shared_pool gradbuf;
ae_matrix dummydxy;
sparsematrix dummysxy;
ae_vector dummyidx;
ae_shared_pool dummypool;
} multilayerperceptron;
#endif
#if defined(AE_COMPILE_LDA) || !defined(AE_PARTIAL_BUILD)
#endif
#if defined(AE_COMPILE_SSA) || !defined(AE_PARTIAL_BUILD)
typedef struct
{
ae_int_t nsequences;
ae_vector sequenceidx;
ae_vector sequencedata;
ae_int_t algotype;
ae_int_t windowwidth;
ae_int_t rtpowerup;
ae_int_t topk;
ae_int_t precomputedwidth;
ae_int_t precomputednbasis;
ae_matrix precomputedbasis;
ae_int_t defaultsubspaceits;
ae_int_t memorylimit;
ae_bool arebasisandsolvervalid;
ae_matrix basis;
ae_matrix basist;
ae_vector sv;
ae_vector forecasta;
ae_int_t nbasis;
eigsubspacestate solver;
ae_matrix xxt;
hqrndstate rs;
ae_int_t rngseed;
ae_vector rtqueue;
ae_int_t rtqueuecnt;
ae_int_t rtqueuechunk;
ae_int_t dbgcntevd;
ae_vector tmp0;
ae_vector tmp1;
eigsubspacereport solverrep;
ae_vector alongtrend;
ae_vector alongnoise;
ae_matrix aseqtrajectory;
ae_matrix aseqtbproduct;
ae_vector aseqcounts;
ae_vector fctrend;
ae_vector fcnoise;
ae_matrix fctrendm;
ae_matrix uxbatch;
ae_int_t uxbatchwidth;
ae_int_t uxbatchsize;
ae_int_t uxbatchlimit;
} ssamodel;
#endif
#if defined(AE_COMPILE_LINREG) || !defined(AE_PARTIAL_BUILD)
typedef struct
{
ae_vector w;
} linearmodel;
typedef struct
{
ae_matrix c;
double rmserror;
double avgerror;
double avgrelerror;
double cvrmserror;
double cvavgerror;
double cvavgrelerror;
ae_int_t ncvdefects;
ae_vector cvdefects;
} lrreport;
#endif
#if defined(AE_COMPILE_FILTERS) || !defined(AE_PARTIAL_BUILD)
#endif
#if defined(AE_COMPILE_LOGIT) || !defined(AE_PARTIAL_BUILD)
typedef struct
{
ae_vector w;
} logitmodel;
typedef struct
{
ae_bool brackt;
ae_bool stage1;
ae_int_t infoc;
double dg;
double dgm;
double dginit;
double dgtest;
double dgx;
double dgxm;
double dgy;
double dgym;
double finit;
double ftest1;
double fm;
double fx;
double fxm;
double fy;
double fym;
double stx;
double sty;
double stmin;
double stmax;
double width;
double width1;
double xtrapf;
} logitmcstate;
typedef struct
{
ae_int_t ngrad;
ae_int_t nhess;
} mnlreport;
#endif
#if defined(AE_COMPILE_MCPD) || !defined(AE_PARTIAL_BUILD)
typedef struct
{
ae_int_t n;
ae_vector states;
ae_int_t npairs;
ae_matrix data;
ae_matrix ec;
ae_matrix bndl;
ae_matrix bndu;
ae_matrix c;
ae_vector ct;
ae_int_t ccnt;
ae_vector pw;
ae_matrix priorp;
double regterm;
minbleicstate bs;
ae_int_t repinneriterationscount;
ae_int_t repouteriterationscount;
ae_int_t repnfev;
ae_int_t repterminationtype;
minbleicreport br;
ae_vector tmpp;
ae_vector effectivew;
ae_vector effectivebndl;
ae_vector effectivebndu;
ae_matrix effectivec;
ae_vector effectivect;
ae_vector h;
ae_matrix p;
} mcpdstate;
typedef struct
{
ae_int_t inneriterationscount;
ae_int_t outeriterationscount;
ae_int_t nfev;
ae_int_t terminationtype;
} mcpdreport;
#endif
#if defined(AE_COMPILE_MLPE) || !defined(AE_PARTIAL_BUILD)
typedef struct
{
ae_int_t ensemblesize;
ae_vector weights;
ae_vector columnmeans;
ae_vector columnsigmas;
multilayerperceptron network;
ae_vector y;
} mlpensemble;
#endif
#if defined(AE_COMPILE_MLPTRAIN) || !defined(AE_PARTIAL_BUILD)
typedef struct
{
double relclserror;
double avgce;
double rmserror;
double avgerror;
double avgrelerror;
ae_int_t ngrad;
ae_int_t nhess;
ae_int_t ncholesky;
} mlpreport;
typedef struct
{
double relclserror;
double avgce;
double rmserror;
double avgerror;
double avgrelerror;
} mlpcvreport;
typedef struct
{
ae_vector bestparameters;
double bestrmserror;
ae_bool randomizenetwork;
multilayerperceptron network;
minlbfgsstate optimizer;
minlbfgsreport optimizerrep;
ae_vector wbuf0;
ae_vector wbuf1;
ae_vector allminibatches;
ae_vector currentminibatch;
rcommstate rstate;
ae_int_t algoused;
ae_int_t minibatchsize;
hqrndstate generator;
} smlptrnsession;
typedef struct
{
ae_vector trnsubset;
ae_vector valsubset;
ae_shared_pool mlpsessions;
mlpreport mlprep;
multilayerperceptron network;
} mlpetrnsession;
typedef struct
{
ae_int_t nin;
ae_int_t nout;
ae_bool rcpar;
ae_int_t lbfgsfactor;
double decay;
double wstep;
ae_int_t maxits;
ae_int_t datatype;
ae_int_t npoints;
ae_matrix densexy;
sparsematrix sparsexy;
smlptrnsession session;
ae_int_t ngradbatch;
ae_vector subset;
ae_int_t subsetsize;
ae_vector valsubset;
ae_int_t valsubsetsize;
ae_int_t algokind;
ae_int_t minibatchsize;
} mlptrainer;
typedef struct
{
multilayerperceptron network;
mlpreport rep;
ae_vector subset;
ae_int_t subsetsize;
ae_vector xyrow;
ae_vector y;
ae_int_t ngrad;
ae_shared_pool trnpool;
} mlpparallelizationcv;
#endif
#if defined(AE_COMPILE_CLUSTERING) || !defined(AE_PARTIAL_BUILD)
typedef struct
{
ae_matrix ct;
ae_matrix ctbest;
ae_vector xycbest;
ae_vector xycprev;
ae_vector d2;
ae_vector csizes;
apbuffers initbuf;
ae_shared_pool updatepool;
} kmeansbuffers;
typedef struct
{
ae_int_t npoints;
ae_int_t nfeatures;
ae_int_t disttype;
ae_matrix xy;
ae_matrix d;
ae_int_t ahcalgo;
ae_int_t kmeansrestarts;
ae_int_t kmeansmaxits;
ae_int_t kmeansinitalgo;
ae_bool kmeansdbgnoits;
ae_int_t seed;
ae_matrix tmpd;
apbuffers distbuf;
kmeansbuffers kmeanstmp;
} clusterizerstate;
typedef struct
{
ae_int_t terminationtype;
ae_int_t npoints;
ae_vector p;
ae_matrix z;
ae_matrix pz;
ae_matrix pm;
ae_vector mergedist;
} ahcreport;
typedef struct
{
ae_int_t npoints;
ae_int_t nfeatures;
ae_int_t terminationtype;
ae_int_t iterationscount;
double energy;
ae_int_t k;
ae_matrix c;
ae_vector cidx;
} kmeansreport;
#endif
#if defined(AE_COMPILE_DFOREST) || !defined(AE_PARTIAL_BUILD)
typedef struct
{
ae_int_t dstype;
ae_int_t npoints;
ae_int_t nvars;
ae_int_t nclasses;
ae_vector dsdata;
ae_vector dsrval;
ae_vector dsival;
ae_int_t rdfalgo;
double rdfratio;
double rdfvars;
ae_int_t rdfglobalseed;
ae_int_t rdfsplitstrength;
ae_int_t rdfimportance;
ae_vector dsmin;
ae_vector dsmax;
ae_vector dsbinary;
double dsravg;
ae_vector dsctotals;
ae_int_t rdfprogress;
ae_int_t rdftotal;
ae_shared_pool workpool;
ae_shared_pool votepool;
ae_shared_pool treepool;
ae_shared_pool treefactory;
ae_bool neediobmatrix;
ae_matrix iobmatrix;
ae_vector varimpshuffle2;
} decisionforestbuilder;
typedef struct
{
ae_vector classpriors;
ae_vector varpool;
ae_int_t varpoolsize;
ae_vector trnset;
ae_int_t trnsize;
ae_vector trnlabelsr;
ae_vector trnlabelsi;
ae_vector oobset;
ae_int_t oobsize;
ae_vector ooblabelsr;
ae_vector ooblabelsi;
ae_vector treebuf;
ae_vector curvals;
ae_vector bestvals;
ae_vector tmp0i;
ae_vector tmp1i;
ae_vector tmp0r;
ae_vector tmp1r;
ae_vector tmp2r;
ae_vector tmp3r;
ae_vector tmpnrms2;
ae_vector classtotals0;
ae_vector classtotals1;
ae_vector classtotals01;
} dfworkbuf;
typedef struct
{
ae_vector trntotals;
ae_vector oobtotals;
ae_vector trncounts;
ae_vector oobcounts;
ae_vector giniimportances;
} dfvotebuf;
typedef struct
{
ae_vector losses;
ae_vector xraw;
ae_vector xdist;
ae_vector xcur;
ae_vector y;
ae_vector yv;
ae_vector targety;
ae_vector startnodes;
} dfpermimpbuf;
typedef struct
{
ae_vector treebuf;
ae_int_t treeidx;
} dftreebuf;
typedef struct
{
ae_vector x;
ae_vector y;
} decisionforestbuffer;
typedef struct
{
ae_int_t forestformat;
ae_bool usemantissa8;
ae_int_t nvars;
ae_int_t nclasses;
ae_int_t ntrees;
ae_int_t bufsize;
ae_vector trees;
decisionforestbuffer buffer;
ae_vector trees8;
} decisionforest;
typedef struct
{
double relclserror;
double avgce;
double rmserror;
double avgerror;
double avgrelerror;
double oobrelclserror;
double oobavgce;
double oobrmserror;
double oobavgerror;
double oobavgrelerror;
ae_vector topvars;
ae_vector varimportances;
} dfreport;
typedef struct
{
ae_vector treebuf;
ae_vector idxbuf;
ae_vector tmpbufr;
ae_vector tmpbufr2;
ae_vector tmpbufi;
ae_vector classibuf;
ae_vector sortrbuf;
ae_vector sortrbuf2;
ae_vector sortibuf;
ae_vector varpool;
ae_vector evsbin;
ae_vector evssplits;
} dfinternalbuffers;
#endif
#if defined(AE_COMPILE_KNN) || !defined(AE_PARTIAL_BUILD)
typedef struct
{
kdtreerequestbuffer treebuf;
ae_vector x;
ae_vector y;
ae_vector tags;
ae_matrix xy;
} knnbuffer;
typedef struct
{
ae_int_t dstype;
ae_int_t npoints;
ae_int_t nvars;
ae_bool iscls;
ae_int_t nout;
ae_matrix dsdata;
ae_vector dsrval;
ae_vector dsival;
ae_int_t knnnrm;
} knnbuilder;
typedef struct
{
ae_int_t nvars;
ae_int_t nout;
ae_int_t k;
double eps;
ae_bool iscls;
ae_bool isdummy;
kdtree tree;
knnbuffer buffer;
} knnmodel;
typedef struct
{
double relclserror;
double avgce;
double rmserror;
double avgerror;
double avgrelerror;
} knnreport;
#endif
#if defined(AE_COMPILE_DATACOMP) || !defined(AE_PARTIAL_BUILD)
#endif
}
/////////////////////////////////////////////////////////////////////////
//
// THIS SECTION CONTAINS C++ INTERFACE
//
/////////////////////////////////////////////////////////////////////////
namespace alglib
{
#if defined(AE_COMPILE_PCA) || !defined(AE_PARTIAL_BUILD)
#endif
#if defined(AE_COMPILE_BDSS) || !defined(AE_PARTIAL_BUILD)
#endif
#if defined(AE_COMPILE_MLPBASE) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
Model's errors:
* RelCLSError - fraction of misclassified cases.
* AvgCE - acerage cross-entropy
* RMSError - root-mean-square error
* AvgError - average error
* AvgRelError - average relative error
NOTE 1: RelCLSError/AvgCE are zero on regression problems.
NOTE 2: on classification problems RMSError/AvgError/AvgRelError contain
errors in prediction of posterior probabilities
*************************************************************************/
class _modelerrors_owner
{
public:
_modelerrors_owner();
_modelerrors_owner(const _modelerrors_owner &rhs);
_modelerrors_owner& operator=(const _modelerrors_owner &rhs);
virtual ~_modelerrors_owner();
alglib_impl::modelerrors* c_ptr();
alglib_impl::modelerrors* c_ptr() const;
protected:
alglib_impl::modelerrors *p_struct;
};
class modelerrors : public _modelerrors_owner
{
public:
modelerrors();
modelerrors(const modelerrors &rhs);
modelerrors& operator=(const modelerrors &rhs);
virtual ~modelerrors();
double &relclserror;
double &avgce;
double &rmserror;
double &avgerror;
double &avgrelerror;
};
/*************************************************************************
*************************************************************************/
class _multilayerperceptron_owner
{
public:
_multilayerperceptron_owner();
_multilayerperceptron_owner(const _multilayerperceptron_owner &rhs);
_multilayerperceptron_owner& operator=(const _multilayerperceptron_owner &rhs);
virtual ~_multilayerperceptron_owner();
alglib_impl::multilayerperceptron* c_ptr();
alglib_impl::multilayerperceptron* c_ptr() const;
protected:
alglib_impl::multilayerperceptron *p_struct;
};
class multilayerperceptron : public _multilayerperceptron_owner
{
public:
multilayerperceptron();
multilayerperceptron(const multilayerperceptron &rhs);
multilayerperceptron& operator=(const multilayerperceptron &rhs);
virtual ~multilayerperceptron();
};
#endif
#if defined(AE_COMPILE_LDA) || !defined(AE_PARTIAL_BUILD)
#endif
#if defined(AE_COMPILE_SSA) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
This object stores state of the SSA model.
You should use ALGLIB functions to work with this object.
*************************************************************************/
class _ssamodel_owner
{
public:
_ssamodel_owner();
_ssamodel_owner(const _ssamodel_owner &rhs);
_ssamodel_owner& operator=(const _ssamodel_owner &rhs);
virtual ~_ssamodel_owner();
alglib_impl::ssamodel* c_ptr();
alglib_impl::ssamodel* c_ptr() const;
protected:
alglib_impl::ssamodel *p_struct;
};
class ssamodel : public _ssamodel_owner
{
public:
ssamodel();
ssamodel(const ssamodel &rhs);
ssamodel& operator=(const ssamodel &rhs);
virtual ~ssamodel();
};
#endif
#if defined(AE_COMPILE_LINREG) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
*************************************************************************/
class _linearmodel_owner
{
public:
_linearmodel_owner();
_linearmodel_owner(const _linearmodel_owner &rhs);
_linearmodel_owner& operator=(const _linearmodel_owner &rhs);
virtual ~_linearmodel_owner();
alglib_impl::linearmodel* c_ptr();
alglib_impl::linearmodel* c_ptr() const;
protected:
alglib_impl::linearmodel *p_struct;
};
class linearmodel : public _linearmodel_owner
{
public:
linearmodel();
linearmodel(const linearmodel &rhs);
linearmodel& operator=(const linearmodel &rhs);
virtual ~linearmodel();
};
/*************************************************************************
LRReport structure contains additional information about linear model:
* C - covariation matrix, array[0..NVars,0..NVars].
C[i,j] = Cov(A[i],A[j])
* RMSError - root mean square error on a training set
* AvgError - average error on a training set
* AvgRelError - average relative error on a training set (excluding
observations with zero function value).
* CVRMSError - leave-one-out cross-validation estimate of
generalization error. Calculated using fast algorithm
with O(NVars*NPoints) complexity.
* CVAvgError - cross-validation estimate of average error
* CVAvgRelError - cross-validation estimate of average relative error
All other fields of the structure are intended for internal use and should
not be used outside ALGLIB.
*************************************************************************/
class _lrreport_owner
{
public:
_lrreport_owner();
_lrreport_owner(const _lrreport_owner &rhs);
_lrreport_owner& operator=(const _lrreport_owner &rhs);
virtual ~_lrreport_owner();
alglib_impl::lrreport* c_ptr();
alglib_impl::lrreport* c_ptr() const;
protected:
alglib_impl::lrreport *p_struct;
};
class lrreport : public _lrreport_owner
{
public:
lrreport();
lrreport(const lrreport &rhs);
lrreport& operator=(const lrreport &rhs);
virtual ~lrreport();
real_2d_array c;
double &rmserror;
double &avgerror;
double &avgrelerror;
double &cvrmserror;
double &cvavgerror;
double &cvavgrelerror;
ae_int_t &ncvdefects;
integer_1d_array cvdefects;
};
#endif
#if defined(AE_COMPILE_FILTERS) || !defined(AE_PARTIAL_BUILD)
#endif
#if defined(AE_COMPILE_LOGIT) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
*************************************************************************/
class _logitmodel_owner
{
public:
_logitmodel_owner();
_logitmodel_owner(const _logitmodel_owner &rhs);
_logitmodel_owner& operator=(const _logitmodel_owner &rhs);
virtual ~_logitmodel_owner();
alglib_impl::logitmodel* c_ptr();
alglib_impl::logitmodel* c_ptr() const;
protected:
alglib_impl::logitmodel *p_struct;
};
class logitmodel : public _logitmodel_owner
{
public:
logitmodel();
logitmodel(const logitmodel &rhs);
logitmodel& operator=(const logitmodel &rhs);
virtual ~logitmodel();
};
/*************************************************************************
MNLReport structure contains information about training process:
* NGrad - number of gradient calculations
* NHess - number of Hessian calculations
*************************************************************************/
class _mnlreport_owner
{
public:
_mnlreport_owner();
_mnlreport_owner(const _mnlreport_owner &rhs);
_mnlreport_owner& operator=(const _mnlreport_owner &rhs);
virtual ~_mnlreport_owner();
alglib_impl::mnlreport* c_ptr();
alglib_impl::mnlreport* c_ptr() const;
protected:
alglib_impl::mnlreport *p_struct;
};
class mnlreport : public _mnlreport_owner
{
public:
mnlreport();
mnlreport(const mnlreport &rhs);
mnlreport& operator=(const mnlreport &rhs);
virtual ~mnlreport();
ae_int_t &ngrad;
ae_int_t &nhess;
};
#endif
#if defined(AE_COMPILE_MCPD) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
This structure is a MCPD (Markov Chains for Population Data) solver.
You should use ALGLIB functions in order to work with this object.
-- ALGLIB --
Copyright 23.05.2010 by Bochkanov Sergey
*************************************************************************/
class _mcpdstate_owner
{
public:
_mcpdstate_owner();
_mcpdstate_owner(const _mcpdstate_owner &rhs);
_mcpdstate_owner& operator=(const _mcpdstate_owner &rhs);
virtual ~_mcpdstate_owner();
alglib_impl::mcpdstate* c_ptr();
alglib_impl::mcpdstate* c_ptr() const;
protected:
alglib_impl::mcpdstate *p_struct;
};
class mcpdstate : public _mcpdstate_owner
{
public:
mcpdstate();
mcpdstate(const mcpdstate &rhs);
mcpdstate& operator=(const mcpdstate &rhs);
virtual ~mcpdstate();
};
/*************************************************************************
This structure is a MCPD training report:
InnerIterationsCount - number of inner iterations of the
underlying optimization algorithm
OuterIterationsCount - number of outer iterations of the
underlying optimization algorithm
NFEV - number of merit function evaluations
TerminationType - termination type
(same as for MinBLEIC optimizer, positive
values denote success, negative ones -
failure)
-- ALGLIB --
Copyright 23.05.2010 by Bochkanov Sergey
*************************************************************************/
class _mcpdreport_owner
{
public:
_mcpdreport_owner();
_mcpdreport_owner(const _mcpdreport_owner &rhs);
_mcpdreport_owner& operator=(const _mcpdreport_owner &rhs);
virtual ~_mcpdreport_owner();
alglib_impl::mcpdreport* c_ptr();
alglib_impl::mcpdreport* c_ptr() const;
protected:
alglib_impl::mcpdreport *p_struct;
};
class mcpdreport : public _mcpdreport_owner
{
public:
mcpdreport();
mcpdreport(const mcpdreport &rhs);
mcpdreport& operator=(const mcpdreport &rhs);
virtual ~mcpdreport();
ae_int_t &inneriterationscount;
ae_int_t &outeriterationscount;
ae_int_t &nfev;
ae_int_t &terminationtype;
};
#endif
#if defined(AE_COMPILE_MLPE) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
Neural networks ensemble
*************************************************************************/
class _mlpensemble_owner
{
public:
_mlpensemble_owner();
_mlpensemble_owner(const _mlpensemble_owner &rhs);
_mlpensemble_owner& operator=(const _mlpensemble_owner &rhs);
virtual ~_mlpensemble_owner();
alglib_impl::mlpensemble* c_ptr();
alglib_impl::mlpensemble* c_ptr() const;
protected:
alglib_impl::mlpensemble *p_struct;
};
class mlpensemble : public _mlpensemble_owner
{
public:
mlpensemble();
mlpensemble(const mlpensemble &rhs);
mlpensemble& operator=(const mlpensemble &rhs);
virtual ~mlpensemble();
};
#endif
#if defined(AE_COMPILE_MLPTRAIN) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
Training report:
* RelCLSError - fraction of misclassified cases.
* AvgCE - acerage cross-entropy
* RMSError - root-mean-square error
* AvgError - average error
* AvgRelError - average relative error
* NGrad - number of gradient calculations
* NHess - number of Hessian calculations
* NCholesky - number of Cholesky decompositions
NOTE 1: RelCLSError/AvgCE are zero on regression problems.
NOTE 2: on classification problems RMSError/AvgError/AvgRelError contain
errors in prediction of posterior probabilities
*************************************************************************/
class _mlpreport_owner
{
public:
_mlpreport_owner();
_mlpreport_owner(const _mlpreport_owner &rhs);
_mlpreport_owner& operator=(const _mlpreport_owner &rhs);
virtual ~_mlpreport_owner();
alglib_impl::mlpreport* c_ptr();
alglib_impl::mlpreport* c_ptr() const;
protected:
alglib_impl::mlpreport *p_struct;
};
class mlpreport : public _mlpreport_owner
{
public:
mlpreport();
mlpreport(const mlpreport &rhs);
mlpreport& operator=(const mlpreport &rhs);
virtual ~mlpreport();
double &relclserror;
double &avgce;
double &rmserror;
double &avgerror;
double &avgrelerror;
ae_int_t &ngrad;
ae_int_t &nhess;
ae_int_t &ncholesky;
};
/*************************************************************************
Cross-validation estimates of generalization error
*************************************************************************/
class _mlpcvreport_owner
{
public:
_mlpcvreport_owner();
_mlpcvreport_owner(const _mlpcvreport_owner &rhs);
_mlpcvreport_owner& operator=(const _mlpcvreport_owner &rhs);
virtual ~_mlpcvreport_owner();
alglib_impl::mlpcvreport* c_ptr();
alglib_impl::mlpcvreport* c_ptr() const;
protected:
alglib_impl::mlpcvreport *p_struct;
};
class mlpcvreport : public _mlpcvreport_owner
{
public:
mlpcvreport();
mlpcvreport(const mlpcvreport &rhs);
mlpcvreport& operator=(const mlpcvreport &rhs);
virtual ~mlpcvreport();
double &relclserror;
double &avgce;
double &rmserror;
double &avgerror;
double &avgrelerror;
};
/*************************************************************************
Trainer object for neural network.
You should not try to access fields of this object directly - use ALGLIB
functions to work with this object.
*************************************************************************/
class _mlptrainer_owner
{
public:
_mlptrainer_owner();
_mlptrainer_owner(const _mlptrainer_owner &rhs);
_mlptrainer_owner& operator=(const _mlptrainer_owner &rhs);
virtual ~_mlptrainer_owner();
alglib_impl::mlptrainer* c_ptr();
alglib_impl::mlptrainer* c_ptr() const;
protected:
alglib_impl::mlptrainer *p_struct;
};
class mlptrainer : public _mlptrainer_owner
{
public:
mlptrainer();
mlptrainer(const mlptrainer &rhs);
mlptrainer& operator=(const mlptrainer &rhs);
virtual ~mlptrainer();
};
#endif
#if defined(AE_COMPILE_CLUSTERING) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
This structure is a clusterization engine.
You should not try to access its fields directly.
Use ALGLIB functions in order to work with this object.
-- ALGLIB --
Copyright 10.07.2012 by Bochkanov Sergey
*************************************************************************/
class _clusterizerstate_owner
{
public:
_clusterizerstate_owner();
_clusterizerstate_owner(const _clusterizerstate_owner &rhs);
_clusterizerstate_owner& operator=(const _clusterizerstate_owner &rhs);
virtual ~_clusterizerstate_owner();
alglib_impl::clusterizerstate* c_ptr();
alglib_impl::clusterizerstate* c_ptr() const;
protected:
alglib_impl::clusterizerstate *p_struct;
};
class clusterizerstate : public _clusterizerstate_owner
{
public:
clusterizerstate();
clusterizerstate(const clusterizerstate &rhs);
clusterizerstate& operator=(const clusterizerstate &rhs);
virtual ~clusterizerstate();
};
/*************************************************************************
This structure is used to store results of the agglomerative hierarchical
clustering (AHC).
Following information is returned:
* TerminationType - completion code:
* 1 for successful completion of algorithm
* -5 inappropriate combination of clustering algorithm and distance
function was used. As for now, it is possible only when Ward's
method is called for dataset with non-Euclidean distance function.
In case negative completion code is returned, other fields of report
structure are invalid and should not be used.
* NPoints contains number of points in the original dataset
* Z contains information about merges performed (see below). Z contains
indexes from the original (unsorted) dataset and it can be used when you
need to know what points were merged. However, it is not convenient when
you want to build a dendrograd (see below).
* if you want to build dendrogram, you can use Z, but it is not good
option, because Z contains indexes from unsorted dataset. Dendrogram
built from such dataset is likely to have intersections. So, you have to
reorder you points before building dendrogram.
Permutation which reorders point is returned in P. Another representation
of merges, which is more convenient for dendorgram construction, is
returned in PM.
* more information on format of Z, P and PM can be found below and in the
examples from ALGLIB Reference Manual.
FORMAL DESCRIPTION OF FIELDS:
NPoints number of points
Z array[NPoints-1,2], contains indexes of clusters
linked in pairs to form clustering tree. I-th row
corresponds to I-th merge:
* Z[I,0] - index of the first cluster to merge
* Z[I,1] - index of the second cluster to merge
* Z[I,0]<Z[I,1]
* clusters are numbered from 0 to 2*NPoints-2, with
indexes from 0 to NPoints-1 corresponding to points
of the original dataset, and indexes from NPoints to
2*NPoints-2 correspond to clusters generated by
subsequent merges (I-th row of Z creates cluster
with index NPoints+I).
IMPORTANT: indexes in Z[] are indexes in the ORIGINAL,
unsorted dataset. In addition to Z algorithm outputs
permutation which rearranges points in such way that
subsequent merges are performed on adjacent points
(such order is needed if you want to build dendrogram).
However, indexes in Z are related to original,
unrearranged sequence of points.
P array[NPoints], permutation which reorders points for
dendrogram construction. P[i] contains index of the
position where we should move I-th point of the
original dataset in order to apply merges PZ/PM.
PZ same as Z, but for permutation of points given by P.
The only thing which changed are indexes of the
original points; indexes of clusters remained same.
MergeDist array[NPoints-1], contains distances between clusters
being merged (MergeDist[i] correspond to merge stored
in Z[i,...]):
* CLINK, SLINK and average linkage algorithms report
"raw", unmodified distance metric.
* Ward's method reports weighted intra-cluster
variance, which is equal to ||Ca-Cb||^2 * Sa*Sb/(Sa+Sb).
Here A and B are clusters being merged, Ca is a
center of A, Cb is a center of B, Sa is a size of A,
Sb is a size of B.
PM array[NPoints-1,6], another representation of merges,
which is suited for dendrogram construction. It deals
with rearranged points (permutation P is applied) and
represents merges in a form which different from one
used by Z.
For each I from 0 to NPoints-2, I-th row of PM represents
merge performed on two clusters C0 and C1. Here:
* C0 contains points with indexes PM[I,0]...PM[I,1]
* C1 contains points with indexes PM[I,2]...PM[I,3]
* indexes stored in PM are given for dataset sorted
according to permutation P
* PM[I,1]=PM[I,2]-1 (only adjacent clusters are merged)
* PM[I,0]<=PM[I,1], PM[I,2]<=PM[I,3], i.e. both
clusters contain at least one point
* heights of "subdendrograms" corresponding to C0/C1
are stored in PM[I,4] and PM[I,5]. Subdendrograms
corresponding to single-point clusters have
height=0. Dendrogram of the merge result has height
H=max(H0,H1)+1.
NOTE: there is one-to-one correspondence between merges described by Z and
PM. I-th row of Z describes same merge of clusters as I-th row of PM,
with "left" cluster from Z corresponding to the "left" one from PM.
-- ALGLIB --
Copyright 10.07.2012 by Bochkanov Sergey
*************************************************************************/
class _ahcreport_owner
{
public:
_ahcreport_owner();
_ahcreport_owner(const _ahcreport_owner &rhs);
_ahcreport_owner& operator=(const _ahcreport_owner &rhs);
virtual ~_ahcreport_owner();
alglib_impl::ahcreport* c_ptr();
alglib_impl::ahcreport* c_ptr() const;
protected:
alglib_impl::ahcreport *p_struct;
};
class ahcreport : public _ahcreport_owner
{
public:
ahcreport();
ahcreport(const ahcreport &rhs);
ahcreport& operator=(const ahcreport &rhs);
virtual ~ahcreport();
ae_int_t &terminationtype;
ae_int_t &npoints;
integer_1d_array p;
integer_2d_array z;
integer_2d_array pz;
integer_2d_array pm;
real_1d_array mergedist;
};
/*************************************************************************
This structure is used to store results of the k-means clustering
algorithm.
Following information is always returned:
* NPoints contains number of points in the original dataset
* TerminationType contains completion code, negative on failure, positive
on success
* K contains number of clusters
For positive TerminationType we return:
* NFeatures contains number of variables in the original dataset
* C, which contains centers found by algorithm
* CIdx, which maps points of the original dataset to clusters
FORMAL DESCRIPTION OF FIELDS:
NPoints number of points, >=0
NFeatures number of variables, >=1
TerminationType completion code:
* -5 if distance type is anything different from
Euclidean metric
* -3 for degenerate dataset: a) less than K distinct
points, b) K=0 for non-empty dataset.
* +1 for successful completion
K number of clusters
C array[K,NFeatures], rows of the array store centers
CIdx array[NPoints], which contains cluster indexes
IterationsCount actual number of iterations performed by clusterizer.
If algorithm performed more than one random restart,
total number of iterations is returned.
Energy merit function, "energy", sum of squared deviations
from cluster centers
-- ALGLIB --
Copyright 27.11.2012 by Bochkanov Sergey
*************************************************************************/
class _kmeansreport_owner
{
public:
_kmeansreport_owner();
_kmeansreport_owner(const _kmeansreport_owner &rhs);
_kmeansreport_owner& operator=(const _kmeansreport_owner &rhs);
virtual ~_kmeansreport_owner();
alglib_impl::kmeansreport* c_ptr();
alglib_impl::kmeansreport* c_ptr() const;
protected:
alglib_impl::kmeansreport *p_struct;
};
class kmeansreport : public _kmeansreport_owner
{
public:
kmeansreport();
kmeansreport(const kmeansreport &rhs);
kmeansreport& operator=(const kmeansreport &rhs);
virtual ~kmeansreport();
ae_int_t &npoints;
ae_int_t &nfeatures;
ae_int_t &terminationtype;
ae_int_t &iterationscount;
double &energy;
ae_int_t &k;
real_2d_array c;
integer_1d_array cidx;
};
#endif
#if defined(AE_COMPILE_DFOREST) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
A random forest (decision forest) builder object.
Used to store dataset and specify decision forest training algorithm settings.
*************************************************************************/
class _decisionforestbuilder_owner
{
public:
_decisionforestbuilder_owner();
_decisionforestbuilder_owner(const _decisionforestbuilder_owner &rhs);
_decisionforestbuilder_owner& operator=(const _decisionforestbuilder_owner &rhs);
virtual ~_decisionforestbuilder_owner();
alglib_impl::decisionforestbuilder* c_ptr();
alglib_impl::decisionforestbuilder* c_ptr() const;
protected:
alglib_impl::decisionforestbuilder *p_struct;
};
class decisionforestbuilder : public _decisionforestbuilder_owner
{
public:
decisionforestbuilder();
decisionforestbuilder(const decisionforestbuilder &rhs);
decisionforestbuilder& operator=(const decisionforestbuilder &rhs);
virtual ~decisionforestbuilder();
};
/*************************************************************************
Buffer object which is used to perform various requests (usually model
inference) in the multithreaded mode (multiple threads working with same
DF object).
This object should be created with DFCreateBuffer().
*************************************************************************/
class _decisionforestbuffer_owner
{
public:
_decisionforestbuffer_owner();
_decisionforestbuffer_owner(const _decisionforestbuffer_owner &rhs);
_decisionforestbuffer_owner& operator=(const _decisionforestbuffer_owner &rhs);
virtual ~_decisionforestbuffer_owner();
alglib_impl::decisionforestbuffer* c_ptr();
alglib_impl::decisionforestbuffer* c_ptr() const;
protected:
alglib_impl::decisionforestbuffer *p_struct;
};
class decisionforestbuffer : public _decisionforestbuffer_owner
{
public:
decisionforestbuffer();
decisionforestbuffer(const decisionforestbuffer &rhs);
decisionforestbuffer& operator=(const decisionforestbuffer &rhs);
virtual ~decisionforestbuffer();
};
/*************************************************************************
Decision forest (random forest) model.
*************************************************************************/
class _decisionforest_owner
{
public:
_decisionforest_owner();
_decisionforest_owner(const _decisionforest_owner &rhs);
_decisionforest_owner& operator=(const _decisionforest_owner &rhs);
virtual ~_decisionforest_owner();
alglib_impl::decisionforest* c_ptr();
alglib_impl::decisionforest* c_ptr() const;
protected:
alglib_impl::decisionforest *p_struct;
};
class decisionforest : public _decisionforest_owner
{
public:
decisionforest();
decisionforest(const decisionforest &rhs);
decisionforest& operator=(const decisionforest &rhs);
virtual ~decisionforest();
};
/*************************************************************************
Decision forest training report.
=== training/oob errors ==================================================
Following fields store training set errors:
* relclserror - fraction of misclassified cases, [0,1]
* avgce - average cross-entropy in bits per symbol
* rmserror - root-mean-square error
* avgerror - average error
* avgrelerror - average relative error
Out-of-bag estimates are stored in fields with same names, but "oob" prefix.
For classification problems:
* RMS, AVG and AVGREL errors are calculated for posterior probabilities
For regression problems:
* RELCLS and AVGCE errors are zero
=== variable importance ==================================================
Following fields are used to store variable importance information:
* topvars - variables ordered from the most important to
less important ones (according to current
choice of importance raiting).
For example, topvars[0] contains index of the
most important variable, and topvars[0:2] are
indexes of 3 most important ones and so on.
* varimportances - array[nvars], ratings (the larger, the more
important the variable is, always in [0,1]
range).
By default, filled by zeros (no importance
ratings are provided unless you explicitly
request them).
Zero rating means that variable is not important,
however you will rarely encounter such a thing,
in many cases unimportant variables produce
nearly-zero (but nonzero) ratings.
Variable importance report must be EXPLICITLY requested by calling:
* dfbuildersetimportancegini() function, if you need out-of-bag Gini-based
importance rating also known as MDI (fast to calculate, resistant to
overfitting issues, but has some bias towards continuous and
high-cardinality categorical variables)
* dfbuildersetimportancetrngini() function, if you need training set Gini-
-based importance rating (what other packages typically report).
* dfbuildersetimportancepermutation() function, if you need permutation-
based importance rating also known as MDA (slower to calculate, but less
biased)
* dfbuildersetimportancenone() function, if you do not need importance
ratings - ratings will be zero, topvars[] will be [0,1,2,...]
Different importance ratings (Gini or permutation) produce non-comparable
values. Although in all cases rating values lie in [0,1] range, there are
exist differences:
* informally speaking, Gini importance rating tends to divide "unit amount
of importance" between several important variables, i.e. it produces
estimates which roughly sum to 1.0 (or less than 1.0, if your task can
not be solved exactly). If all variables are equally important, they
will have same rating, roughly 1/NVars, even if every variable is
critically important.
* from the other side, permutation importance tells us what percentage of
the model predictive power will be ruined by permuting this specific
variable. It does not produce estimates which sum to one. Critically
important variable will have rating close to 1.0, and you may have
multiple variables with such a rating.
More information on variable importance ratings can be found in comments
on the dfbuildersetimportancegini() and dfbuildersetimportancepermutation()
functions.
*************************************************************************/
class _dfreport_owner
{
public:
_dfreport_owner();
_dfreport_owner(const _dfreport_owner &rhs);
_dfreport_owner& operator=(const _dfreport_owner &rhs);
virtual ~_dfreport_owner();
alglib_impl::dfreport* c_ptr();
alglib_impl::dfreport* c_ptr() const;
protected:
alglib_impl::dfreport *p_struct;
};
class dfreport : public _dfreport_owner
{
public:
dfreport();
dfreport(const dfreport &rhs);
dfreport& operator=(const dfreport &rhs);
virtual ~dfreport();
double &relclserror;
double &avgce;
double &rmserror;
double &avgerror;
double &avgrelerror;
double &oobrelclserror;
double &oobavgce;
double &oobrmserror;
double &oobavgerror;
double &oobavgrelerror;
integer_1d_array topvars;
real_1d_array varimportances;
};
#endif
#if defined(AE_COMPILE_KNN) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
Buffer object which is used to perform various requests (usually model
inference) in the multithreaded mode (multiple threads working with same
KNN object).
This object should be created with KNNCreateBuffer().
*************************************************************************/
class _knnbuffer_owner
{
public:
_knnbuffer_owner();
_knnbuffer_owner(const _knnbuffer_owner &rhs);
_knnbuffer_owner& operator=(const _knnbuffer_owner &rhs);
virtual ~_knnbuffer_owner();
alglib_impl::knnbuffer* c_ptr();
alglib_impl::knnbuffer* c_ptr() const;
protected:
alglib_impl::knnbuffer *p_struct;
};
class knnbuffer : public _knnbuffer_owner
{
public:
knnbuffer();
knnbuffer(const knnbuffer &rhs);
knnbuffer& operator=(const knnbuffer &rhs);
virtual ~knnbuffer();
};
/*************************************************************************
A KNN builder object; this object encapsulates dataset and all related
settings, it is used to create an actual instance of KNN model.
*************************************************************************/
class _knnbuilder_owner
{
public:
_knnbuilder_owner();
_knnbuilder_owner(const _knnbuilder_owner &rhs);
_knnbuilder_owner& operator=(const _knnbuilder_owner &rhs);
virtual ~_knnbuilder_owner();
alglib_impl::knnbuilder* c_ptr();
alglib_impl::knnbuilder* c_ptr() const;
protected:
alglib_impl::knnbuilder *p_struct;
};
class knnbuilder : public _knnbuilder_owner
{
public:
knnbuilder();
knnbuilder(const knnbuilder &rhs);
knnbuilder& operator=(const knnbuilder &rhs);
virtual ~knnbuilder();
};
/*************************************************************************
KNN model, can be used for classification or regression
*************************************************************************/
class _knnmodel_owner
{
public:
_knnmodel_owner();
_knnmodel_owner(const _knnmodel_owner &rhs);
_knnmodel_owner& operator=(const _knnmodel_owner &rhs);
virtual ~_knnmodel_owner();
alglib_impl::knnmodel* c_ptr();
alglib_impl::knnmodel* c_ptr() const;
protected:
alglib_impl::knnmodel *p_struct;
};
class knnmodel : public _knnmodel_owner
{
public:
knnmodel();
knnmodel(const knnmodel &rhs);
knnmodel& operator=(const knnmodel &rhs);
virtual ~knnmodel();
};
/*************************************************************************
KNN training report.
Following fields store training set errors:
* relclserror - fraction of misclassified cases, [0,1]
* avgce - average cross-entropy in bits per symbol
* rmserror - root-mean-square error
* avgerror - average error
* avgrelerror - average relative error
For classification problems:
* RMS, AVG and AVGREL errors are calculated for posterior probabilities
For regression problems:
* RELCLS and AVGCE errors are zero
*************************************************************************/
class _knnreport_owner
{
public:
_knnreport_owner();
_knnreport_owner(const _knnreport_owner &rhs);
_knnreport_owner& operator=(const _knnreport_owner &rhs);
virtual ~_knnreport_owner();
alglib_impl::knnreport* c_ptr();
alglib_impl::knnreport* c_ptr() const;
protected:
alglib_impl::knnreport *p_struct;
};
class knnreport : public _knnreport_owner
{
public:
knnreport();
knnreport(const knnreport &rhs);
knnreport& operator=(const knnreport &rhs);
virtual ~knnreport();
double &relclserror;
double &avgce;
double &rmserror;
double &avgerror;
double &avgrelerror;
};
#endif
#if defined(AE_COMPILE_DATACOMP) || !defined(AE_PARTIAL_BUILD)
#endif
#if defined(AE_COMPILE_PCA) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
Principal components analysis
This function builds orthogonal basis where first axis corresponds to
direction with maximum variance, second axis maximizes variance in the
subspace orthogonal to first axis and so on.
This function builds FULL basis, i.e. returns N vectors corresponding to
ALL directions, no matter how informative. If you need just a few (say,
10 or 50) of the most important directions, you may find it faster to use
one of the reduced versions:
* pcatruncatedsubspace() - for subspace iteration based method
It should be noted that, unlike LDA, PCA does not use class labels.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
! * hardware vendor (Intel) implementations of linear algebra primitives
! (C++ and C# versions, x86/x64 platform)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
X - dataset, array[0..NPoints-1,0..NVars-1].
matrix contains ONLY INDEPENDENT VARIABLES.
NPoints - dataset size, NPoints>=0
NVars - number of independent variables, NVars>=1
OUTPUT PARAMETERS:
Info - return code:
* -4, if SVD subroutine haven't converged
* -1, if wrong parameters has been passed (NPoints<0,
NVars<1)
* 1, if task is solved
S2 - array[0..NVars-1]. variance values corresponding
to basis vectors.
V - array[0..NVars-1,0..NVars-1]
matrix, whose columns store basis vectors.
-- ALGLIB --
Copyright 25.08.2008 by Bochkanov Sergey
*************************************************************************/
void pcabuildbasis(const real_2d_array &x, const ae_int_t npoints, const ae_int_t nvars, ae_int_t &info, real_1d_array &s2, real_2d_array &v, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Principal components analysis
This function performs truncated PCA, i.e. returns just a few most important
directions.
Internally it uses iterative eigensolver which is very efficient when only
a minor fraction of full basis is required. Thus, if you need full basis,
it is better to use pcabuildbasis() function.
It should be noted that, unlike LDA, PCA does not use class labels.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
! * hardware vendor (Intel) implementations of linear algebra primitives
! (C++ and C# versions, x86/x64 platform)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
X - dataset, array[0..NPoints-1,0..NVars-1].
matrix contains ONLY INDEPENDENT VARIABLES.
NPoints - dataset size, NPoints>=0
NVars - number of independent variables, NVars>=1
NNeeded - number of requested components, in [1,NVars] range;
this function is efficient only for NNeeded<<NVars.
Eps - desired precision of vectors returned; underlying
solver will stop iterations as soon as absolute error
in corresponding singular values reduces to roughly
eps*MAX(lambda[]), with lambda[] being array of eigen
values.
Zero value means that algorithm performs number of
iterations specified by maxits parameter, without
paying attention to precision.
MaxIts - number of iterations performed by subspace iteration
method. Zero value means that no limit on iteration
count is placed (eps-based stopping condition is used).
OUTPUT PARAMETERS:
S2 - array[NNeeded]. Variance values corresponding
to basis vectors.
V - array[NVars,NNeeded]
matrix, whose columns store basis vectors.
NOTE: passing eps=0 and maxits=0 results in small eps being selected as
stopping condition. Exact value of automatically selected eps is version-
-dependent.
-- ALGLIB --
Copyright 10.01.2017 by Bochkanov Sergey
*************************************************************************/
void pcatruncatedsubspace(const real_2d_array &x, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nneeded, const double eps, const ae_int_t maxits, real_1d_array &s2, real_2d_array &v, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Sparse truncated principal components analysis
This function performs sparse truncated PCA, i.e. returns just a few most
important principal components for a sparse input X.
Internally it uses iterative eigensolver which is very efficient when only
a minor fraction of full basis is required.
It should be noted that, unlike LDA, PCA does not use class labels.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
! * hardware vendor (Intel) implementations of linear algebra primitives
! (C++ and C# versions, x86/x64 platform)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
X - sparse dataset, sparse npoints*nvars matrix. It is
recommended to use CRS sparse storage format; non-CRS
input will be internally converted to CRS.
Matrix contains ONLY INDEPENDENT VARIABLES, and must
be EXACTLY npoints*nvars.
NPoints - dataset size, NPoints>=0
NVars - number of independent variables, NVars>=1
NNeeded - number of requested components, in [1,NVars] range;
this function is efficient only for NNeeded<<NVars.
Eps - desired precision of vectors returned; underlying
solver will stop iterations as soon as absolute error
in corresponding singular values reduces to roughly
eps*MAX(lambda[]), with lambda[] being array of eigen
values.
Zero value means that algorithm performs number of
iterations specified by maxits parameter, without
paying attention to precision.
MaxIts - number of iterations performed by subspace iteration
method. Zero value means that no limit on iteration
count is placed (eps-based stopping condition is used).
OUTPUT PARAMETERS:
S2 - array[NNeeded]. Variance values corresponding
to basis vectors.
V - array[NVars,NNeeded]
matrix, whose columns store basis vectors.
NOTE: passing eps=0 and maxits=0 results in small eps being selected as
a stopping condition. Exact value of automatically selected eps is
version-dependent.
NOTE: zero MaxIts is silently replaced by some reasonable value which
prevents eternal loops (possible when inputs are degenerate and too
stringent stopping criteria are specified). In current version it
is 50+2*NVars.
-- ALGLIB --
Copyright 10.01.2017 by Bochkanov Sergey
*************************************************************************/
void pcatruncatedsubspacesparse(const sparsematrix &x, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nneeded, const double eps, const ae_int_t maxits, real_1d_array &s2, real_2d_array &v, const xparams _xparams = alglib::xdefault);
#endif
#if defined(AE_COMPILE_BDSS) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
Optimal binary classification
Algorithms finds optimal (=with minimal cross-entropy) binary partition.
Internal subroutine.
INPUT PARAMETERS:
A - array[0..N-1], variable
C - array[0..N-1], class numbers (0 or 1).
N - array size
OUTPUT PARAMETERS:
Info - completetion code:
* -3, all values of A[] are same (partition is impossible)
* -2, one of C[] is incorrect (<0, >1)
* -1, incorrect pararemets were passed (N<=0).
* 1, OK
Threshold- partiton boundary. Left part contains values which are
strictly less than Threshold. Right part contains values
which are greater than or equal to Threshold.
PAL, PBL- probabilities P(0|v<Threshold) and P(1|v<Threshold)
PAR, PBR- probabilities P(0|v>=Threshold) and P(1|v>=Threshold)
CVE - cross-validation estimate of cross-entropy
-- ALGLIB --
Copyright 22.05.2008 by Bochkanov Sergey
*************************************************************************/
void dsoptimalsplit2(const real_1d_array &a, const integer_1d_array &c, const ae_int_t n, ae_int_t &info, double &threshold, double &pal, double &pbl, double &par, double &pbr, double &cve, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Optimal partition, internal subroutine. Fast version.
Accepts:
A array[0..N-1] array of attributes array[0..N-1]
C array[0..N-1] array of class labels
TiesBuf array[0..N] temporaries (ties)
CntBuf array[0..2*NC-1] temporaries (counts)
Alpha centering factor (0<=alpha<=1, recommended value - 0.05)
BufR array[0..N-1] temporaries
BufI array[0..N-1] temporaries
Output:
Info error code (">0"=OK, "<0"=bad)
RMS training set RMS error
CVRMS leave-one-out RMS error
Note:
content of all arrays is changed by subroutine;
it doesn't allocate temporaries.
-- ALGLIB --
Copyright 11.12.2008 by Bochkanov Sergey
*************************************************************************/
void dsoptimalsplit2fast(real_1d_array &a, integer_1d_array &c, integer_1d_array &tiesbuf, integer_1d_array &cntbuf, real_1d_array &bufr, integer_1d_array &bufi, const ae_int_t n, const ae_int_t nc, const double alpha, ae_int_t &info, double &threshold, double &rms, double &cvrms, const xparams _xparams = alglib::xdefault);
#endif
#if defined(AE_COMPILE_MLPBASE) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
This function serializes data structure to string.
Important properties of s_out:
* it contains alphanumeric characters, dots, underscores, minus signs
* these symbols are grouped into words, which are separated by spaces
and Windows-style (CR+LF) newlines
* although serializer uses spaces and CR+LF as separators, you can
replace any separator character by arbitrary combination of spaces,
tabs, Windows or Unix newlines. It allows flexible reformatting of
the string in case you want to include it into text or XML file.
But you should not insert separators into the middle of the "words"
nor you should change case of letters.
* s_out can be freely moved between 32-bit and 64-bit systems, little
and big endian machines, and so on. You can serialize structure on
32-bit machine and unserialize it on 64-bit one (or vice versa), or
serialize it on SPARC and unserialize on x86. You can also
serialize it in C++ version of ALGLIB and unserialize in C# one,
and vice versa.
*************************************************************************/
void mlpserialize(multilayerperceptron &obj, std::string &s_out);
/*************************************************************************
This function unserializes data structure from string.
*************************************************************************/
void mlpunserialize(const std::string &s_in, multilayerperceptron &obj);
/*************************************************************************
This function serializes data structure to C++ stream.
Data stream generated by this function is same as string representation
generated by string version of serializer - alphanumeric characters,
dots, underscores, minus signs, which are grouped into words separated by
spaces and CR+LF.
We recommend you to read comments on string version of serializer to find
out more about serialization of AlGLIB objects.
*************************************************************************/
void mlpserialize(multilayerperceptron &obj, std::ostream &s_out);
/*************************************************************************
This function unserializes data structure from stream.
*************************************************************************/
void mlpunserialize(const std::istream &s_in, multilayerperceptron &obj);
/*************************************************************************
Creates neural network with NIn inputs, NOut outputs, without hidden
layers, with linear output layer. Network weights are filled with small
random values.
-- ALGLIB --
Copyright 04.11.2007 by Bochkanov Sergey
*************************************************************************/
void mlpcreate0(const ae_int_t nin, const ae_int_t nout, multilayerperceptron &network, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Same as MLPCreate0, but with one hidden layer (NHid neurons) with
non-linear activation function. Output layer is linear.
-- ALGLIB --
Copyright 04.11.2007 by Bochkanov Sergey
*************************************************************************/
void mlpcreate1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, multilayerperceptron &network, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Same as MLPCreate0, but with two hidden layers (NHid1 and NHid2 neurons)
with non-linear activation function. Output layer is linear.
$ALL
-- ALGLIB --
Copyright 04.11.2007 by Bochkanov Sergey
*************************************************************************/
void mlpcreate2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, multilayerperceptron &network, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Creates neural network with NIn inputs, NOut outputs, without hidden
layers with non-linear output layer. Network weights are filled with small
random values.
Activation function of the output layer takes values:
(B, +INF), if D>=0
or
(-INF, B), if D<0.
-- ALGLIB --
Copyright 30.03.2008 by Bochkanov Sergey
*************************************************************************/
void mlpcreateb0(const ae_int_t nin, const ae_int_t nout, const double b, const double d, multilayerperceptron &network, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Same as MLPCreateB0 but with non-linear hidden layer.
-- ALGLIB --
Copyright 30.03.2008 by Bochkanov Sergey
*************************************************************************/
void mlpcreateb1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, const double b, const double d, multilayerperceptron &network, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Same as MLPCreateB0 but with two non-linear hidden layers.
-- ALGLIB --
Copyright 30.03.2008 by Bochkanov Sergey
*************************************************************************/
void mlpcreateb2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, const double b, const double d, multilayerperceptron &network, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Creates neural network with NIn inputs, NOut outputs, without hidden
layers with non-linear output layer. Network weights are filled with small
random values. Activation function of the output layer takes values [A,B].
-- ALGLIB --
Copyright 30.03.2008 by Bochkanov Sergey
*************************************************************************/
void mlpcreater0(const ae_int_t nin, const ae_int_t nout, const double a, const double b, multilayerperceptron &network, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Same as MLPCreateR0, but with non-linear hidden layer.
-- ALGLIB --
Copyright 30.03.2008 by Bochkanov Sergey
*************************************************************************/
void mlpcreater1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, const double a, const double b, multilayerperceptron &network, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Same as MLPCreateR0, but with two non-linear hidden layers.
-- ALGLIB --
Copyright 30.03.2008 by Bochkanov Sergey
*************************************************************************/
void mlpcreater2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, const double a, const double b, multilayerperceptron &network, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Creates classifier network with NIn inputs and NOut possible classes.
Network contains no hidden layers and linear output layer with SOFTMAX-
normalization (so outputs sums up to 1.0 and converge to posterior
probabilities).
-- ALGLIB --
Copyright 04.11.2007 by Bochkanov Sergey
*************************************************************************/
void mlpcreatec0(const ae_int_t nin, const ae_int_t nout, multilayerperceptron &network, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Same as MLPCreateC0, but with one non-linear hidden layer.
-- ALGLIB --
Copyright 04.11.2007 by Bochkanov Sergey
*************************************************************************/
void mlpcreatec1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, multilayerperceptron &network, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Same as MLPCreateC0, but with two non-linear hidden layers.
-- ALGLIB --
Copyright 04.11.2007 by Bochkanov Sergey
*************************************************************************/
void mlpcreatec2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, multilayerperceptron &network, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Copying of neural network
INPUT PARAMETERS:
Network1 - original
OUTPUT PARAMETERS:
Network2 - copy
-- ALGLIB --
Copyright 04.11.2007 by Bochkanov Sergey
*************************************************************************/
void mlpcopy(const multilayerperceptron &network1, multilayerperceptron &network2, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function copies tunable parameters (weights/means/sigmas) from one
network to another with same architecture. It performs some rudimentary
checks that architectures are same, and throws exception if check fails.
It is intended for fast copying of states between two network which are
known to have same geometry.
INPUT PARAMETERS:
Network1 - source, must be correctly initialized
Network2 - target, must have same architecture
OUTPUT PARAMETERS:
Network2 - network state is copied from source to target
-- ALGLIB --
Copyright 20.06.2013 by Bochkanov Sergey
*************************************************************************/
void mlpcopytunableparameters(const multilayerperceptron &network1, const multilayerperceptron &network2, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Randomization of neural network weights
-- ALGLIB --
Copyright 06.11.2007 by Bochkanov Sergey
*************************************************************************/
void mlprandomize(const multilayerperceptron &network, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Randomization of neural network weights and standartisator
-- ALGLIB --
Copyright 10.03.2008 by Bochkanov Sergey
*************************************************************************/
void mlprandomizefull(const multilayerperceptron &network, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Internal subroutine.
-- ALGLIB --
Copyright 30.03.2008 by Bochkanov Sergey
*************************************************************************/
void mlpinitpreprocessor(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t ssize, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Returns information about initialized network: number of inputs, outputs,
weights.
-- ALGLIB --
Copyright 04.11.2007 by Bochkanov Sergey
*************************************************************************/
void mlpproperties(const multilayerperceptron &network, ae_int_t &nin, ae_int_t &nout, ae_int_t &wcount, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Returns number of inputs.
-- ALGLIB --
Copyright 19.10.2011 by Bochkanov Sergey
*************************************************************************/
ae_int_t mlpgetinputscount(const multilayerperceptron &network, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Returns number of outputs.
-- ALGLIB --
Copyright 19.10.2011 by Bochkanov Sergey
*************************************************************************/
ae_int_t mlpgetoutputscount(const multilayerperceptron &network, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Returns number of weights.
-- ALGLIB --
Copyright 19.10.2011 by Bochkanov Sergey
*************************************************************************/
ae_int_t mlpgetweightscount(const multilayerperceptron &network, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Tells whether network is SOFTMAX-normalized (i.e. classifier) or not.
-- ALGLIB --
Copyright 04.11.2007 by Bochkanov Sergey
*************************************************************************/
bool mlpissoftmax(const multilayerperceptron &network, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function returns total number of layers (including input, hidden and
output layers).
-- ALGLIB --
Copyright 25.03.2011 by Bochkanov Sergey
*************************************************************************/
ae_int_t mlpgetlayerscount(const multilayerperceptron &network, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function returns size of K-th layer.
K=0 corresponds to input layer, K=CNT-1 corresponds to output layer.
Size of the output layer is always equal to the number of outputs, although
when we have softmax-normalized network, last neuron doesn't have any
connections - it is just zero.
-- ALGLIB --
Copyright 25.03.2011 by Bochkanov Sergey
*************************************************************************/
ae_int_t mlpgetlayersize(const multilayerperceptron &network, const ae_int_t k, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function returns offset/scaling coefficients for I-th input of the
network.
INPUT PARAMETERS:
Network - network
I - input index
OUTPUT PARAMETERS:
Mean - mean term
Sigma - sigma term, guaranteed to be nonzero.
I-th input is passed through linear transformation
IN[i] = (IN[i]-Mean)/Sigma
before feeding to the network
-- ALGLIB --
Copyright 25.03.2011 by Bochkanov Sergey
*************************************************************************/
void mlpgetinputscaling(const multilayerperceptron &network, const ae_int_t i, double &mean, double &sigma, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function returns offset/scaling coefficients for I-th output of the
network.
INPUT PARAMETERS:
Network - network
I - input index
OUTPUT PARAMETERS:
Mean - mean term
Sigma - sigma term, guaranteed to be nonzero.
I-th output is passed through linear transformation
OUT[i] = OUT[i]*Sigma+Mean
before returning it to user. In case we have SOFTMAX-normalized network,
we return (Mean,Sigma)=(0.0,1.0).
-- ALGLIB --
Copyright 25.03.2011 by Bochkanov Sergey
*************************************************************************/
void mlpgetoutputscaling(const multilayerperceptron &network, const ae_int_t i, double &mean, double &sigma, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function returns information about Ith neuron of Kth layer
INPUT PARAMETERS:
Network - network
K - layer index
I - neuron index (within layer)
OUTPUT PARAMETERS:
FKind - activation function type (used by MLPActivationFunction())
this value is zero for input or linear neurons
Threshold - also called offset, bias
zero for input neurons
NOTE: this function throws exception if layer or neuron with given index
do not exists.
-- ALGLIB --
Copyright 25.03.2011 by Bochkanov Sergey
*************************************************************************/
void mlpgetneuroninfo(const multilayerperceptron &network, const ae_int_t k, const ae_int_t i, ae_int_t &fkind, double &threshold, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function returns information about connection from I0-th neuron of
K0-th layer to I1-th neuron of K1-th layer.
INPUT PARAMETERS:
Network - network
K0 - layer index
I0 - neuron index (within layer)
K1 - layer index
I1 - neuron index (within layer)
RESULT:
connection weight (zero for non-existent connections)
This function:
1. throws exception if layer or neuron with given index do not exists.
2. returns zero if neurons exist, but there is no connection between them
-- ALGLIB --
Copyright 25.03.2011 by Bochkanov Sergey
*************************************************************************/
double mlpgetweight(const multilayerperceptron &network, const ae_int_t k0, const ae_int_t i0, const ae_int_t k1, const ae_int_t i1, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets offset/scaling coefficients for I-th input of the
network.
INPUT PARAMETERS:
Network - network
I - input index
Mean - mean term
Sigma - sigma term (if zero, will be replaced by 1.0)
NTE: I-th input is passed through linear transformation
IN[i] = (IN[i]-Mean)/Sigma
before feeding to the network. This function sets Mean and Sigma.
-- ALGLIB --
Copyright 25.03.2011 by Bochkanov Sergey
*************************************************************************/
void mlpsetinputscaling(const multilayerperceptron &network, const ae_int_t i, const double mean, const double sigma, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets offset/scaling coefficients for I-th output of the
network.
INPUT PARAMETERS:
Network - network
I - input index
Mean - mean term
Sigma - sigma term (if zero, will be replaced by 1.0)
OUTPUT PARAMETERS:
NOTE: I-th output is passed through linear transformation
OUT[i] = OUT[i]*Sigma+Mean
before returning it to user. This function sets Sigma/Mean. In case we
have SOFTMAX-normalized network, you can not set (Sigma,Mean) to anything
other than(0.0,1.0) - this function will throw exception.
-- ALGLIB --
Copyright 25.03.2011 by Bochkanov Sergey
*************************************************************************/
void mlpsetoutputscaling(const multilayerperceptron &network, const ae_int_t i, const double mean, const double sigma, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function modifies information about Ith neuron of Kth layer
INPUT PARAMETERS:
Network - network
K - layer index
I - neuron index (within layer)
FKind - activation function type (used by MLPActivationFunction())
this value must be zero for input neurons
(you can not set activation function for input neurons)
Threshold - also called offset, bias
this value must be zero for input neurons
(you can not set threshold for input neurons)
NOTES:
1. this function throws exception if layer or neuron with given index do
not exists.
2. this function also throws exception when you try to set non-linear
activation function for input neurons (any kind of network) or for output
neurons of classifier network.
3. this function throws exception when you try to set non-zero threshold for
input neurons (any kind of network).
-- ALGLIB --
Copyright 25.03.2011 by Bochkanov Sergey
*************************************************************************/
void mlpsetneuroninfo(const multilayerperceptron &network, const ae_int_t k, const ae_int_t i, const ae_int_t fkind, const double threshold, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function modifies information about connection from I0-th neuron of
K0-th layer to I1-th neuron of K1-th layer.
INPUT PARAMETERS:
Network - network
K0 - layer index
I0 - neuron index (within layer)
K1 - layer index
I1 - neuron index (within layer)
W - connection weight (must be zero for non-existent
connections)
This function:
1. throws exception if layer or neuron with given index do not exists.
2. throws exception if you try to set non-zero weight for non-existent
connection
-- ALGLIB --
Copyright 25.03.2011 by Bochkanov Sergey
*************************************************************************/
void mlpsetweight(const multilayerperceptron &network, const ae_int_t k0, const ae_int_t i0, const ae_int_t k1, const ae_int_t i1, const double w, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Neural network activation function
INPUT PARAMETERS:
NET - neuron input
K - function index (zero for linear function)
OUTPUT PARAMETERS:
F - function
DF - its derivative
D2F - its second derivative
-- ALGLIB --
Copyright 04.11.2007 by Bochkanov Sergey
*************************************************************************/
void mlpactivationfunction(const double net, const ae_int_t k, double &f, double &df, double &d2f, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Procesing
INPUT PARAMETERS:
Network - neural network
X - input vector, array[0..NIn-1].
OUTPUT PARAMETERS:
Y - result. Regression estimate when solving regression task,
vector of posterior probabilities for classification task.
See also MLPProcessI
-- ALGLIB --
Copyright 04.11.2007 by Bochkanov Sergey
*************************************************************************/
void mlpprocess(const multilayerperceptron &network, const real_1d_array &x, real_1d_array &y, const xparams _xparams = alglib::xdefault);
/*************************************************************************
'interactive' variant of MLPProcess for languages like Python which
support constructs like "Y = MLPProcess(NN,X)" and interactive mode of the
interpreter
This function allocates new array on each call, so it is significantly
slower than its 'non-interactive' counterpart, but it is more convenient
when you call it from command line.
-- ALGLIB --
Copyright 21.09.2010 by Bochkanov Sergey
*************************************************************************/
void mlpprocessi(const multilayerperceptron &network, const real_1d_array &x, real_1d_array &y, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Error of the neural network on dataset.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
Network - neural network;
XY - training set, see below for information on the
training set format;
NPoints - points count.
RESULT:
sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2)
DATASET FORMAT:
This function uses two different dataset formats - one for regression
networks, another one for classification networks.
For regression networks with NIn inputs and NOut outputs following dataset
format is used:
* dataset is given by NPoints*(NIn+NOut) matrix
* each row corresponds to one example
* first NIn columns are inputs, next NOut columns are outputs
For classification networks with NIn inputs and NClasses clases following
dataset format is used:
* dataset is given by NPoints*(NIn+1) matrix
* each row corresponds to one example
* first NIn columns are inputs, last column stores class number (from 0 to
NClasses-1).
-- ALGLIB --
Copyright 04.11.2007 by Bochkanov Sergey
*************************************************************************/
double mlperror(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Error of the neural network on dataset given by sparse matrix.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
Network - neural network
XY - training set, see below for information on the
training set format. This function checks correctness
of the dataset (no NANs/INFs, class numbers are
correct) and throws exception when incorrect dataset
is passed. Sparse matrix must use CRS format for
storage.
NPoints - points count, >=0
RESULT:
sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2)
DATASET FORMAT:
This function uses two different dataset formats - one for regression
networks, another one for classification networks.
For regression networks with NIn inputs and NOut outputs following dataset
format is used:
* dataset is given by NPoints*(NIn+NOut) matrix
* each row corresponds to one example
* first NIn columns are inputs, next NOut columns are outputs
For classification networks with NIn inputs and NClasses clases following
dataset format is used:
* dataset is given by NPoints*(NIn+1) matrix
* each row corresponds to one example
* first NIn columns are inputs, last column stores class number (from 0 to
NClasses-1).
-- ALGLIB --
Copyright 23.07.2012 by Bochkanov Sergey
*************************************************************************/
double mlperrorsparse(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Natural error function for neural network, internal subroutine.
NOTE: this function is single-threaded. Unlike other error function, it
receives no speed-up from being executed in SMP mode.
-- ALGLIB --
Copyright 04.11.2007 by Bochkanov Sergey
*************************************************************************/
double mlperrorn(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t ssize, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Classification error of the neural network on dataset.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
Network - neural network;
XY - training set, see below for information on the
training set format;
NPoints - points count.
RESULT:
classification error (number of misclassified cases)
DATASET FORMAT:
This function uses two different dataset formats - one for regression
networks, another one for classification networks.
For regression networks with NIn inputs and NOut outputs following dataset
format is used:
* dataset is given by NPoints*(NIn+NOut) matrix
* each row corresponds to one example
* first NIn columns are inputs, next NOut columns are outputs
For classification networks with NIn inputs and NClasses clases following
dataset format is used:
* dataset is given by NPoints*(NIn+1) matrix
* each row corresponds to one example
* first NIn columns are inputs, last column stores class number (from 0 to
NClasses-1).
-- ALGLIB --
Copyright 04.11.2007 by Bochkanov Sergey
*************************************************************************/
ae_int_t mlpclserror(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Relative classification error on the test set.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
Network - neural network;
XY - training set, see below for information on the
training set format;
NPoints - points count.
RESULT:
Percent of incorrectly classified cases. Works both for classifier
networks and general purpose networks used as classifiers.
DATASET FORMAT:
This function uses two different dataset formats - one for regression
networks, another one for classification networks.
For regression networks with NIn inputs and NOut outputs following dataset
format is used:
* dataset is given by NPoints*(NIn+NOut) matrix
* each row corresponds to one example
* first NIn columns are inputs, next NOut columns are outputs
For classification networks with NIn inputs and NClasses clases following
dataset format is used:
* dataset is given by NPoints*(NIn+1) matrix
* each row corresponds to one example
* first NIn columns are inputs, last column stores class number (from 0 to
NClasses-1).
-- ALGLIB --
Copyright 25.12.2008 by Bochkanov Sergey
*************************************************************************/
double mlprelclserror(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Relative classification error on the test set given by sparse matrix.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
Network - neural network;
XY - training set, see below for information on the
training set format. Sparse matrix must use CRS format
for storage.
NPoints - points count, >=0.
RESULT:
Percent of incorrectly classified cases. Works both for classifier
networks and general purpose networks used as classifiers.
DATASET FORMAT:
This function uses two different dataset formats - one for regression
networks, another one for classification networks.
For regression networks with NIn inputs and NOut outputs following dataset
format is used:
* dataset is given by NPoints*(NIn+NOut) matrix
* each row corresponds to one example
* first NIn columns are inputs, next NOut columns are outputs
For classification networks with NIn inputs and NClasses clases following
dataset format is used:
* dataset is given by NPoints*(NIn+1) matrix
* each row corresponds to one example
* first NIn columns are inputs, last column stores class number (from 0 to
NClasses-1).
-- ALGLIB --
Copyright 09.08.2012 by Bochkanov Sergey
*************************************************************************/
double mlprelclserrorsparse(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Average cross-entropy (in bits per element) on the test set.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
Network - neural network;
XY - training set, see below for information on the
training set format;
NPoints - points count.
RESULT:
CrossEntropy/(NPoints*LN(2)).
Zero if network solves regression task.
DATASET FORMAT:
This function uses two different dataset formats - one for regression
networks, another one for classification networks.
For regression networks with NIn inputs and NOut outputs following dataset
format is used:
* dataset is given by NPoints*(NIn+NOut) matrix
* each row corresponds to one example
* first NIn columns are inputs, next NOut columns are outputs
For classification networks with NIn inputs and NClasses clases following
dataset format is used:
* dataset is given by NPoints*(NIn+1) matrix
* each row corresponds to one example
* first NIn columns are inputs, last column stores class number (from 0 to
NClasses-1).
-- ALGLIB --
Copyright 08.01.2009 by Bochkanov Sergey
*************************************************************************/
double mlpavgce(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Average cross-entropy (in bits per element) on the test set given by
sparse matrix.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
Network - neural network;
XY - training set, see below for information on the
training set format. This function checks correctness
of the dataset (no NANs/INFs, class numbers are
correct) and throws exception when incorrect dataset
is passed. Sparse matrix must use CRS format for
storage.
NPoints - points count, >=0.
RESULT:
CrossEntropy/(NPoints*LN(2)).
Zero if network solves regression task.
DATASET FORMAT:
This function uses two different dataset formats - one for regression
networks, another one for classification networks.
For regression networks with NIn inputs and NOut outputs following dataset
format is used:
* dataset is given by NPoints*(NIn+NOut) matrix
* each row corresponds to one example
* first NIn columns are inputs, next NOut columns are outputs
For classification networks with NIn inputs and NClasses clases following
dataset format is used:
* dataset is given by NPoints*(NIn+1) matrix
* each row corresponds to one example
* first NIn columns are inputs, last column stores class number (from 0 to
NClasses-1).
-- ALGLIB --
Copyright 9.08.2012 by Bochkanov Sergey
*************************************************************************/
double mlpavgcesparse(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
RMS error on the test set given.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
Network - neural network;
XY - training set, see below for information on the
training set format;
NPoints - points count.
RESULT:
Root mean square error. Its meaning for regression task is obvious. As for
classification task, RMS error means error when estimating posterior
probabilities.
DATASET FORMAT:
This function uses two different dataset formats - one for regression
networks, another one for classification networks.
For regression networks with NIn inputs and NOut outputs following dataset
format is used:
* dataset is given by NPoints*(NIn+NOut) matrix
* each row corresponds to one example
* first NIn columns are inputs, next NOut columns are outputs
For classification networks with NIn inputs and NClasses clases following
dataset format is used:
* dataset is given by NPoints*(NIn+1) matrix
* each row corresponds to one example
* first NIn columns are inputs, last column stores class number (from 0 to
NClasses-1).
-- ALGLIB --
Copyright 04.11.2007 by Bochkanov Sergey
*************************************************************************/
double mlprmserror(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
RMS error on the test set given by sparse matrix.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
Network - neural network;
XY - training set, see below for information on the
training set format. This function checks correctness
of the dataset (no NANs/INFs, class numbers are
correct) and throws exception when incorrect dataset
is passed. Sparse matrix must use CRS format for
storage.
NPoints - points count, >=0.
RESULT:
Root mean square error. Its meaning for regression task is obvious. As for
classification task, RMS error means error when estimating posterior
probabilities.
DATASET FORMAT:
This function uses two different dataset formats - one for regression
networks, another one for classification networks.
For regression networks with NIn inputs and NOut outputs following dataset
format is used:
* dataset is given by NPoints*(NIn+NOut) matrix
* each row corresponds to one example
* first NIn columns are inputs, next NOut columns are outputs
For classification networks with NIn inputs and NClasses clases following
dataset format is used:
* dataset is given by NPoints*(NIn+1) matrix
* each row corresponds to one example
* first NIn columns are inputs, last column stores class number (from 0 to
NClasses-1).
-- ALGLIB --
Copyright 09.08.2012 by Bochkanov Sergey
*************************************************************************/
double mlprmserrorsparse(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Average absolute error on the test set.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
Network - neural network;
XY - training set, see below for information on the
training set format;
NPoints - points count.
RESULT:
Its meaning for regression task is obvious. As for classification task, it
means average error when estimating posterior probabilities.
DATASET FORMAT:
This function uses two different dataset formats - one for regression
networks, another one for classification networks.
For regression networks with NIn inputs and NOut outputs following dataset
format is used:
* dataset is given by NPoints*(NIn+NOut) matrix
* each row corresponds to one example
* first NIn columns are inputs, next NOut columns are outputs
For classification networks with NIn inputs and NClasses clases following
dataset format is used:
* dataset is given by NPoints*(NIn+1) matrix
* each row corresponds to one example
* first NIn columns are inputs, last column stores class number (from 0 to
NClasses-1).
-- ALGLIB --
Copyright 11.03.2008 by Bochkanov Sergey
*************************************************************************/
double mlpavgerror(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Average absolute error on the test set given by sparse matrix.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
Network - neural network;
XY - training set, see below for information on the
training set format. This function checks correctness
of the dataset (no NANs/INFs, class numbers are
correct) and throws exception when incorrect dataset
is passed. Sparse matrix must use CRS format for
storage.
NPoints - points count, >=0.
RESULT:
Its meaning for regression task is obvious. As for classification task, it
means average error when estimating posterior probabilities.
DATASET FORMAT:
This function uses two different dataset formats - one for regression
networks, another one for classification networks.
For regression networks with NIn inputs and NOut outputs following dataset
format is used:
* dataset is given by NPoints*(NIn+NOut) matrix
* each row corresponds to one example
* first NIn columns are inputs, next NOut columns are outputs
For classification networks with NIn inputs and NClasses clases following
dataset format is used:
* dataset is given by NPoints*(NIn+1) matrix
* each row corresponds to one example
* first NIn columns are inputs, last column stores class number (from 0 to
NClasses-1).
-- ALGLIB --
Copyright 09.08.2012 by Bochkanov Sergey
*************************************************************************/
double mlpavgerrorsparse(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Average relative error on the test set.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
Network - neural network;
XY - training set, see below for information on the
training set format;
NPoints - points count.
RESULT:
Its meaning for regression task is obvious. As for classification task, it
means average relative error when estimating posterior probability of
belonging to the correct class.
DATASET FORMAT:
This function uses two different dataset formats - one for regression
networks, another one for classification networks.
For regression networks with NIn inputs and NOut outputs following dataset
format is used:
* dataset is given by NPoints*(NIn+NOut) matrix
* each row corresponds to one example
* first NIn columns are inputs, next NOut columns are outputs
For classification networks with NIn inputs and NClasses clases following
dataset format is used:
* dataset is given by NPoints*(NIn+1) matrix
* each row corresponds to one example
* first NIn columns are inputs, last column stores class number (from 0 to
NClasses-1).
-- ALGLIB --
Copyright 11.03.2008 by Bochkanov Sergey
*************************************************************************/
double mlpavgrelerror(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Average relative error on the test set given by sparse matrix.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
Network - neural network;
XY - training set, see below for information on the
training set format. This function checks correctness
of the dataset (no NANs/INFs, class numbers are
correct) and throws exception when incorrect dataset
is passed. Sparse matrix must use CRS format for
storage.
NPoints - points count, >=0.
RESULT:
Its meaning for regression task is obvious. As for classification task, it
means average relative error when estimating posterior probability of
belonging to the correct class.
DATASET FORMAT:
This function uses two different dataset formats - one for regression
networks, another one for classification networks.
For regression networks with NIn inputs and NOut outputs following dataset
format is used:
* dataset is given by NPoints*(NIn+NOut) matrix
* each row corresponds to one example
* first NIn columns are inputs, next NOut columns are outputs
For classification networks with NIn inputs and NClasses clases following
dataset format is used:
* dataset is given by NPoints*(NIn+1) matrix
* each row corresponds to one example
* first NIn columns are inputs, last column stores class number (from 0 to
NClasses-1).
-- ALGLIB --
Copyright 09.08.2012 by Bochkanov Sergey
*************************************************************************/
double mlpavgrelerrorsparse(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Gradient calculation
INPUT PARAMETERS:
Network - network initialized with one of the network creation funcs
X - input vector, length of array must be at least NIn
DesiredY- desired outputs, length of array must be at least NOut
Grad - possibly preallocated array. If size of array is smaller
than WCount, it will be reallocated. It is recommended to
reuse previously allocated array to reduce allocation
overhead.
OUTPUT PARAMETERS:
E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
Grad - gradient of E with respect to weights of network, array[WCount]
-- ALGLIB --
Copyright 04.11.2007 by Bochkanov Sergey
*************************************************************************/
void mlpgrad(const multilayerperceptron &network, const real_1d_array &x, const real_1d_array &desiredy, double &e, real_1d_array &grad, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Gradient calculation (natural error function is used)
INPUT PARAMETERS:
Network - network initialized with one of the network creation funcs
X - input vector, length of array must be at least NIn
DesiredY- desired outputs, length of array must be at least NOut
Grad - possibly preallocated array. If size of array is smaller
than WCount, it will be reallocated. It is recommended to
reuse previously allocated array to reduce allocation
overhead.
OUTPUT PARAMETERS:
E - error function, sum-of-squares for regression networks,
cross-entropy for classification networks.
Grad - gradient of E with respect to weights of network, array[WCount]
-- ALGLIB --
Copyright 04.11.2007 by Bochkanov Sergey
*************************************************************************/
void mlpgradn(const multilayerperceptron &network, const real_1d_array &x, const real_1d_array &desiredy, double &e, real_1d_array &grad, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Batch gradient calculation for a set of inputs/outputs
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
Network - network initialized with one of the network creation funcs
XY - original dataset in dense format; one sample = one row:
* first NIn columns contain inputs,
* for regression problem, next NOut columns store
desired outputs.
* for classification problem, next column (just one!)
stores class number.
SSize - number of elements in XY
Grad - possibly preallocated array. If size of array is smaller
than WCount, it will be reallocated. It is recommended to
reuse previously allocated array to reduce allocation
overhead.
OUTPUT PARAMETERS:
E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
Grad - gradient of E with respect to weights of network, array[WCount]
-- ALGLIB --
Copyright 04.11.2007 by Bochkanov Sergey
*************************************************************************/
void mlpgradbatch(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t ssize, double &e, real_1d_array &grad, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Batch gradient calculation for a set of inputs/outputs given by sparse
matrices
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
Network - network initialized with one of the network creation funcs
XY - original dataset in sparse format; one sample = one row:
* MATRIX MUST BE STORED IN CRS FORMAT
* first NIn columns contain inputs.
* for regression problem, next NOut columns store
desired outputs.
* for classification problem, next column (just one!)
stores class number.
SSize - number of elements in XY
Grad - possibly preallocated array. If size of array is smaller
than WCount, it will be reallocated. It is recommended to
reuse previously allocated array to reduce allocation
overhead.
OUTPUT PARAMETERS:
E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
Grad - gradient of E with respect to weights of network, array[WCount]
-- ALGLIB --
Copyright 26.07.2012 by Bochkanov Sergey
*************************************************************************/
void mlpgradbatchsparse(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t ssize, double &e, real_1d_array &grad, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Batch gradient calculation for a subset of dataset
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
Network - network initialized with one of the network creation funcs
XY - original dataset in dense format; one sample = one row:
* first NIn columns contain inputs,
* for regression problem, next NOut columns store
desired outputs.
* for classification problem, next column (just one!)
stores class number.
SetSize - real size of XY, SetSize>=0;
Idx - subset of SubsetSize elements, array[SubsetSize]:
* Idx[I] stores row index in the original dataset which is
given by XY. Gradient is calculated with respect to rows
whose indexes are stored in Idx[].
* Idx[] must store correct indexes; this function throws
an exception in case incorrect index (less than 0 or
larger than rows(XY)) is given
* Idx[] may store indexes in any order and even with
repetitions.
SubsetSize- number of elements in Idx[] array:
* positive value means that subset given by Idx[] is processed
* zero value results in zero gradient
* negative value means that full dataset is processed
Grad - possibly preallocated array. If size of array is smaller
than WCount, it will be reallocated. It is recommended to
reuse previously allocated array to reduce allocation
overhead.
OUTPUT PARAMETERS:
E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
Grad - gradient of E with respect to weights of network,
array[WCount]
-- ALGLIB --
Copyright 26.07.2012 by Bochkanov Sergey
*************************************************************************/
void mlpgradbatchsubset(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t setsize, const integer_1d_array &idx, const ae_int_t subsetsize, double &e, real_1d_array &grad, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Batch gradient calculation for a set of inputs/outputs for a subset of
dataset given by set of indexes.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
Network - network initialized with one of the network creation funcs
XY - original dataset in sparse format; one sample = one row:
* MATRIX MUST BE STORED IN CRS FORMAT
* first NIn columns contain inputs,
* for regression problem, next NOut columns store
desired outputs.
* for classification problem, next column (just one!)
stores class number.
SetSize - real size of XY, SetSize>=0;
Idx - subset of SubsetSize elements, array[SubsetSize]:
* Idx[I] stores row index in the original dataset which is
given by XY. Gradient is calculated with respect to rows
whose indexes are stored in Idx[].
* Idx[] must store correct indexes; this function throws
an exception in case incorrect index (less than 0 or
larger than rows(XY)) is given
* Idx[] may store indexes in any order and even with
repetitions.
SubsetSize- number of elements in Idx[] array:
* positive value means that subset given by Idx[] is processed
* zero value results in zero gradient
* negative value means that full dataset is processed
Grad - possibly preallocated array. If size of array is smaller
than WCount, it will be reallocated. It is recommended to
reuse previously allocated array to reduce allocation
overhead.
OUTPUT PARAMETERS:
E - error function, SUM(sqr(y[i]-desiredy[i])/2,i)
Grad - gradient of E with respect to weights of network,
array[WCount]
NOTE: when SubsetSize<0 is used full dataset by call MLPGradBatchSparse
function.
-- ALGLIB --
Copyright 26.07.2012 by Bochkanov Sergey
*************************************************************************/
void mlpgradbatchsparsesubset(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t setsize, const integer_1d_array &idx, const ae_int_t subsetsize, double &e, real_1d_array &grad, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Batch gradient calculation for a set of inputs/outputs
(natural error function is used)
INPUT PARAMETERS:
Network - network initialized with one of the network creation funcs
XY - set of inputs/outputs; one sample = one row;
first NIn columns contain inputs,
next NOut columns - desired outputs.
SSize - number of elements in XY
Grad - possibly preallocated array. If size of array is smaller
than WCount, it will be reallocated. It is recommended to
reuse previously allocated array to reduce allocation
overhead.
OUTPUT PARAMETERS:
E - error function, sum-of-squares for regression networks,
cross-entropy for classification networks.
Grad - gradient of E with respect to weights of network, array[WCount]
-- ALGLIB --
Copyright 04.11.2007 by Bochkanov Sergey
*************************************************************************/
void mlpgradnbatch(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t ssize, double &e, real_1d_array &grad, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Batch Hessian calculation (natural error function) using R-algorithm.
Internal subroutine.
-- ALGLIB --
Copyright 26.01.2008 by Bochkanov Sergey.
Hessian calculation based on R-algorithm described in
"Fast Exact Multiplication by the Hessian",
B. A. Pearlmutter,
Neural Computation, 1994.
*************************************************************************/
void mlphessiannbatch(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t ssize, double &e, real_1d_array &grad, real_2d_array &h, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Batch Hessian calculation using R-algorithm.
Internal subroutine.
-- ALGLIB --
Copyright 26.01.2008 by Bochkanov Sergey.
Hessian calculation based on R-algorithm described in
"Fast Exact Multiplication by the Hessian",
B. A. Pearlmutter,
Neural Computation, 1994.
*************************************************************************/
void mlphessianbatch(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t ssize, double &e, real_1d_array &grad, real_2d_array &h, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Calculation of all types of errors on subset of dataset.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
Network - network initialized with one of the network creation funcs
XY - original dataset; one sample = one row;
first NIn columns contain inputs,
next NOut columns - desired outputs.
SetSize - real size of XY, SetSize>=0;
Subset - subset of SubsetSize elements, array[SubsetSize];
SubsetSize- number of elements in Subset[] array:
* if SubsetSize>0, rows of XY with indices Subset[0]...
...Subset[SubsetSize-1] are processed
* if SubsetSize=0, zeros are returned
* if SubsetSize<0, entire dataset is processed; Subset[]
array is ignored in this case.
OUTPUT PARAMETERS:
Rep - it contains all type of errors.
-- ALGLIB --
Copyright 04.09.2012 by Bochkanov Sergey
*************************************************************************/
void mlpallerrorssubset(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t setsize, const integer_1d_array &subset, const ae_int_t subsetsize, modelerrors &rep, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Calculation of all types of errors on subset of dataset.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
Network - network initialized with one of the network creation funcs
XY - original dataset given by sparse matrix;
one sample = one row;
first NIn columns contain inputs,
next NOut columns - desired outputs.
SetSize - real size of XY, SetSize>=0;
Subset - subset of SubsetSize elements, array[SubsetSize];
SubsetSize- number of elements in Subset[] array:
* if SubsetSize>0, rows of XY with indices Subset[0]...
...Subset[SubsetSize-1] are processed
* if SubsetSize=0, zeros are returned
* if SubsetSize<0, entire dataset is processed; Subset[]
array is ignored in this case.
OUTPUT PARAMETERS:
Rep - it contains all type of errors.
-- ALGLIB --
Copyright 04.09.2012 by Bochkanov Sergey
*************************************************************************/
void mlpallerrorssparsesubset(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t setsize, const integer_1d_array &subset, const ae_int_t subsetsize, modelerrors &rep, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Error of the neural network on subset of dataset.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
Network - neural network;
XY - training set, see below for information on the
training set format;
SetSize - real size of XY, SetSize>=0;
Subset - subset of SubsetSize elements, array[SubsetSize];
SubsetSize- number of elements in Subset[] array:
* if SubsetSize>0, rows of XY with indices Subset[0]...
...Subset[SubsetSize-1] are processed
* if SubsetSize=0, zeros are returned
* if SubsetSize<0, entire dataset is processed; Subset[]
array is ignored in this case.
RESULT:
sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2)
DATASET FORMAT:
This function uses two different dataset formats - one for regression
networks, another one for classification networks.
For regression networks with NIn inputs and NOut outputs following dataset
format is used:
* dataset is given by NPoints*(NIn+NOut) matrix
* each row corresponds to one example
* first NIn columns are inputs, next NOut columns are outputs
For classification networks with NIn inputs and NClasses clases following
dataset format is used:
* dataset is given by NPoints*(NIn+1) matrix
* each row corresponds to one example
* first NIn columns are inputs, last column stores class number (from 0 to
NClasses-1).
-- ALGLIB --
Copyright 04.09.2012 by Bochkanov Sergey
*************************************************************************/
double mlperrorsubset(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t setsize, const integer_1d_array &subset, const ae_int_t subsetsize, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Error of the neural network on subset of sparse dataset.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
Network - neural network;
XY - training set, see below for information on the
training set format. This function checks correctness
of the dataset (no NANs/INFs, class numbers are
correct) and throws exception when incorrect dataset
is passed. Sparse matrix must use CRS format for
storage.
SetSize - real size of XY, SetSize>=0;
it is used when SubsetSize<0;
Subset - subset of SubsetSize elements, array[SubsetSize];
SubsetSize- number of elements in Subset[] array:
* if SubsetSize>0, rows of XY with indices Subset[0]...
...Subset[SubsetSize-1] are processed
* if SubsetSize=0, zeros are returned
* if SubsetSize<0, entire dataset is processed; Subset[]
array is ignored in this case.
RESULT:
sum-of-squares error, SUM(sqr(y[i]-desired_y[i])/2)
DATASET FORMAT:
This function uses two different dataset formats - one for regression
networks, another one for classification networks.
For regression networks with NIn inputs and NOut outputs following dataset
format is used:
* dataset is given by NPoints*(NIn+NOut) matrix
* each row corresponds to one example
* first NIn columns are inputs, next NOut columns are outputs
For classification networks with NIn inputs and NClasses clases following
dataset format is used:
* dataset is given by NPoints*(NIn+1) matrix
* each row corresponds to one example
* first NIn columns are inputs, last column stores class number (from 0 to
NClasses-1).
-- ALGLIB --
Copyright 04.09.2012 by Bochkanov Sergey
*************************************************************************/
double mlperrorsparsesubset(const multilayerperceptron &network, const sparsematrix &xy, const ae_int_t setsize, const integer_1d_array &subset, const ae_int_t subsetsize, const xparams _xparams = alglib::xdefault);
#endif
#if defined(AE_COMPILE_LDA) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
Multiclass Fisher LDA
Subroutine finds coefficients of linear combination which optimally separates
training set on classes.
COMMERCIAL EDITION OF ALGLIB:
! Commercial version of ALGLIB includes two important improvements of
! this function, which can be used from C++ and C#:
! * Intel MKL support (lightweight Intel MKL is shipped with ALGLIB)
! * multithreading support
!
! Intel MKL gives approximately constant (with respect to number of
! worker threads) acceleration factor which depends on CPU being used,
! problem size and "baseline" ALGLIB edition which is used for
! comparison. Best results are achieved for high-dimensional problems
! (NVars is at least 256).
!
! Multithreading is used to accelerate initial phase of LDA, which
! includes calculation of products of large matrices. Again, for best
! efficiency problem must be high-dimensional.
!
! Generally, commercial ALGLIB is several times faster than open-source
! generic C edition, and many times faster than open-source C# edition.
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
XY - training set, array[0..NPoints-1,0..NVars].
First NVars columns store values of independent
variables, next column stores number of class (from 0
to NClasses-1) which dataset element belongs to. Fractional
values are rounded to nearest integer.
NPoints - training set size, NPoints>=0
NVars - number of independent variables, NVars>=1
NClasses - number of classes, NClasses>=2
OUTPUT PARAMETERS:
Info - return code:
* -4, if internal EVD subroutine hasn't converged
* -2, if there is a point with class number
outside of [0..NClasses-1].
* -1, if incorrect parameters was passed (NPoints<0,
NVars<1, NClasses<2)
* 1, if task has been solved
* 2, if there was a multicollinearity in training set,
but task has been solved.
W - linear combination coefficients, array[0..NVars-1]
-- ALGLIB --
Copyright 31.05.2008 by Bochkanov Sergey
*************************************************************************/
void fisherlda(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nclasses, ae_int_t &info, real_1d_array &w, const xparams _xparams = alglib::xdefault);
/*************************************************************************
N-dimensional multiclass Fisher LDA
Subroutine finds coefficients of linear combinations which optimally separates
training set on classes. It returns N-dimensional basis whose vector are sorted
by quality of training set separation (in descending order).
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
! * hardware vendor (Intel) implementations of linear algebra primitives
! (C++ and C# versions, x86/x64 platform)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
XY - training set, array[0..NPoints-1,0..NVars].
First NVars columns store values of independent
variables, next column stores number of class (from 0
to NClasses-1) which dataset element belongs to. Fractional
values are rounded to nearest integer.
NPoints - training set size, NPoints>=0
NVars - number of independent variables, NVars>=1
NClasses - number of classes, NClasses>=2
OUTPUT PARAMETERS:
Info - return code:
* -4, if internal EVD subroutine hasn't converged
* -2, if there is a point with class number
outside of [0..NClasses-1].
* -1, if incorrect parameters was passed (NPoints<0,
NVars<1, NClasses<2)
* 1, if task has been solved
* 2, if there was a multicollinearity in training set,
but task has been solved.
W - basis, array[0..NVars-1,0..NVars-1]
columns of matrix stores basis vectors, sorted by
quality of training set separation (in descending order)
-- ALGLIB --
Copyright 31.05.2008 by Bochkanov Sergey
*************************************************************************/
void fisherldan(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nclasses, ae_int_t &info, real_2d_array &w, const xparams _xparams = alglib::xdefault);
#endif
#if defined(AE_COMPILE_SSA) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
This function creates SSA model object. Right after creation model is in
"dummy" mode - you can add data, but analyzing/prediction will return
just zeros (it assumes that basis is empty).
HOW TO USE SSA MODEL:
1. create model with ssacreate()
2. add data with one/many ssaaddsequence() calls
3. choose SSA algorithm with one of ssasetalgo...() functions:
* ssasetalgotopkdirect() for direct one-run analysis
* ssasetalgotopkrealtime() for algorithm optimized for many subsequent
runs with warm-start capabilities
* ssasetalgoprecomputed() for user-supplied basis
4. set window width with ssasetwindow()
5. perform one of the analysis-related activities:
a) call ssagetbasis() to get basis
b) call ssaanalyzelast() ssaanalyzesequence() or ssaanalyzelastwindow()
to perform analysis (trend/noise separation)
c) call one of the forecasting functions (ssaforecastlast() or
ssaforecastsequence()) to perform prediction; alternatively, you can
extract linear recurrence coefficients with ssagetlrr().
SSA analysis will be performed during first call to analysis-related
function. SSA model is smart enough to track all changes in the dataset
and model settings, to cache previously computed basis and to
re-evaluate basis only when necessary.
Additionally, if your setting involves constant stream of incoming data,
you can perform quick update already calculated model with one of the
incremental append-and-update functions: ssaappendpointandupdate() or
ssaappendsequenceandupdate().
NOTE: steps (2), (3), (4) can be performed in arbitrary order.
INPUT PARAMETERS:
none
OUTPUT PARAMETERS:
S - structure which stores model state
-- ALGLIB --
Copyright 30.10.2017 by Bochkanov Sergey
*************************************************************************/
void ssacreate(ssamodel &s, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets window width for SSA model. You should call it before
analysis phase. Default window width is 1 (not for real use).
Special notes:
* this function call can be performed at any moment before first call to
analysis-related functions
* changing window width invalidates internally stored basis; if you change
window width AFTER you call analysis-related function, next analysis
phase will require re-calculation of the basis according to current
algorithm.
* calling this function with exactly same window width as current one has
no effect
* if you specify window width larger than any data sequence stored in the
model, analysis will return zero basis.
INPUT PARAMETERS:
S - SSA model created with ssacreate()
WindowWidth - >=1, new window width
OUTPUT PARAMETERS:
S - SSA model, updated
-- ALGLIB --
Copyright 30.10.2017 by Bochkanov Sergey
*************************************************************************/
void ssasetwindow(const ssamodel &s, const ae_int_t windowwidth, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets seed which is used to initialize internal RNG when
we make pseudorandom decisions on model updates.
By default, deterministic seed is used - which results in same sequence of
pseudorandom decisions every time you run SSA model. If you specify non-
deterministic seed value, then SSA model may return slightly different
results after each run.
This function can be useful when you have several SSA models updated with
sseappendpointandupdate() called with 0<UpdateIts<1 (fractional value) and
due to performance limitations want them to perform updates at different
moments.
INPUT PARAMETERS:
S - SSA model
Seed - seed:
* positive values = use deterministic seed for each run of
algorithms which depend on random initialization
* zero or negative values = use non-deterministic seed
-- ALGLIB --
Copyright 03.11.2017 by Bochkanov Sergey
*************************************************************************/
void ssasetseed(const ssamodel &s, const ae_int_t seed, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets length of power-up cycle for real-time algorithm.
By default, this algorithm performs costly O(N*WindowWidth^2) init phase
followed by full run of truncated EVD. However, if you are ready to
live with a bit lower-quality basis during first few iterations, you can
split this O(N*WindowWidth^2) initialization between several subsequent
append-and-update rounds. It results in better latency of the algorithm.
This function invalidates basis/solver, next analysis call will result in
full recalculation of everything.
INPUT PARAMETERS:
S - SSA model
PWLen - length of the power-up stage:
* 0 means that no power-up is requested
* 1 is the same as 0
* >1 means that delayed power-up is performed
-- ALGLIB --
Copyright 03.11.2017 by Bochkanov Sergey
*************************************************************************/
void ssasetpoweruplength(const ssamodel &s, const ae_int_t pwlen, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets memory limit of SSA analysis.
Straightforward SSA with sequence length T and window width W needs O(T*W)
memory. It is possible to reduce memory consumption by splitting task into
smaller chunks.
Thus function allows you to specify approximate memory limit (measured in
double precision numbers used for buffers). Actual memory consumption will
be comparable to the number specified by you.
Default memory limit is 50.000.000 (400Mbytes) in current version.
INPUT PARAMETERS:
S - SSA model
MemLimit- memory limit, >=0. Zero value means no limit.
-- ALGLIB --
Copyright 20.12.2017 by Bochkanov Sergey
*************************************************************************/
void ssasetmemorylimit(const ssamodel &s, const ae_int_t memlimit, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function adds data sequence to SSA model. Only single-dimensional
sequences are supported.
What is a sequences? Following definitions/requirements apply:
* a sequence is an array of values measured in subsequent, equally
separated time moments (ticks).
* you may have many sequences in your dataset; say, one sequence may
correspond to one trading session.
* sequence length should be larger than current window length (shorter
sequences will be ignored during analysis).
* analysis is performed within a sequence; different sequences are NOT
stacked together to produce one large contiguous stream of data.
* analysis is performed for all sequences at once, i.e. same set of basis
vectors is computed for all sequences
INCREMENTAL ANALYSIS
This function is non intended for incremental updates of previously found
SSA basis. Calling it invalidates all previous analysis results (basis is
reset and will be recalculated from zero during next analysis).
If you want to perform incremental/real-time SSA, consider using
following functions:
* ssaappendpointandupdate() for appending one point
* ssaappendsequenceandupdate() for appending new sequence
INPUT PARAMETERS:
S - SSA model created with ssacreate()
X - array[N], data, can be larger (additional values
are ignored)
N - data length, can be automatically determined from
the array length. N>=0.
OUTPUT PARAMETERS:
S - SSA model, updated
NOTE: you can clear dataset with ssacleardata()
-- ALGLIB --
Copyright 30.10.2017 by Bochkanov Sergey
*************************************************************************/
void ssaaddsequence(const ssamodel &s, const real_1d_array &x, const ae_int_t n, const xparams _xparams = alglib::xdefault);
void ssaaddsequence(const ssamodel &s, const real_1d_array &x, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function appends single point to last data sequence stored in the SSA
model and tries to update model in the incremental manner (if possible
with current algorithm).
If you want to add more than one point at once:
* if you want to add M points to the same sequence, perform M-1 calls with
UpdateIts parameter set to 0.0, and last call with non-zero UpdateIts.
* if you want to add new sequence, use ssaappendsequenceandupdate()
Running time of this function does NOT depend on dataset size, only on
window width and number of singular vectors. Depending on algorithm being
used, incremental update has complexity:
* for top-K real time - O(UpdateIts*K*Width^2), with fractional UpdateIts
* for top-K direct - O(Width^3) for any non-zero UpdateIts
* for precomputed basis - O(1), no update is performed
INPUT PARAMETERS:
S - SSA model created with ssacreate()
X - new point
UpdateIts - >=0, floating point (!) value, desired update
frequency:
* zero value means that point is stored, but no
update is performed
* integer part of the value means that specified
number of iterations is always performed
* fractional part of the value means that one
iteration is performed with this probability.
Recommended value: 0<UpdateIts<=1. Values larger
than 1 are VERY seldom needed. If your dataset
changes slowly, you can set it to 0.1 and skip
90% of updates.
In any case, no information is lost even with zero
value of UpdateIts! It will be incorporated into
model, sooner or later.
OUTPUT PARAMETERS:
S - SSA model, updated
NOTE: this function uses internal RNG to handle fractional values of
UpdateIts. By default it is initialized with fixed seed during
initial calculation of basis. Thus subsequent calls to this function
will result in the same sequence of pseudorandom decisions.
However, if you have several SSA models which are calculated
simultaneously, and if you want to reduce computational bottlenecks
by performing random updates at random moments, then fixed seed is
not an option - all updates will fire at same moments.
You may change it with ssasetseed() function.
NOTE: this function throws an exception if called for empty dataset (there
is no "last" sequence to modify).
-- ALGLIB --
Copyright 30.10.2017 by Bochkanov Sergey
*************************************************************************/
void ssaappendpointandupdate(const ssamodel &s, const double x, const double updateits, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function appends new sequence to dataset stored in the SSA model and
tries to update model in the incremental manner (if possible with current
algorithm).
Notes:
* if you want to add M sequences at once, perform M-1 calls with UpdateIts
parameter set to 0.0, and last call with non-zero UpdateIts.
* if you want to add just one point, use ssaappendpointandupdate()
Running time of this function does NOT depend on dataset size, only on
sequence length, window width and number of singular vectors. Depending on
algorithm being used, incremental update has complexity:
* for top-K real time - O(UpdateIts*K*Width^2+(NTicks-Width)*Width^2)
* for top-K direct - O(Width^3+(NTicks-Width)*Width^2)
* for precomputed basis - O(1), no update is performed
INPUT PARAMETERS:
S - SSA model created with ssacreate()
X - new sequence, array[NTicks] or larget
NTicks - >=1, number of ticks in the sequence
UpdateIts - >=0, floating point (!) value, desired update
frequency:
* zero value means that point is stored, but no
update is performed
* integer part of the value means that specified
number of iterations is always performed
* fractional part of the value means that one
iteration is performed with this probability.
Recommended value: 0<UpdateIts<=1. Values larger
than 1 are VERY seldom needed. If your dataset
changes slowly, you can set it to 0.1 and skip
90% of updates.
In any case, no information is lost even with zero
value of UpdateIts! It will be incorporated into
model, sooner or later.
OUTPUT PARAMETERS:
S - SSA model, updated
NOTE: this function uses internal RNG to handle fractional values of
UpdateIts. By default it is initialized with fixed seed during
initial calculation of basis. Thus subsequent calls to this function
will result in the same sequence of pseudorandom decisions.
However, if you have several SSA models which are calculated
simultaneously, and if you want to reduce computational bottlenecks
by performing random updates at random moments, then fixed seed is
not an option - all updates will fire at same moments.
You may change it with ssasetseed() function.
-- ALGLIB --
Copyright 30.10.2017 by Bochkanov Sergey
*************************************************************************/
void ssaappendsequenceandupdate(const ssamodel &s, const real_1d_array &x, const ae_int_t nticks, const double updateits, const xparams _xparams = alglib::xdefault);
void ssaappendsequenceandupdate(const ssamodel &s, const real_1d_array &x, const double updateits, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets SSA algorithm to "precomputed vectors" algorithm.
This algorithm uses precomputed set of orthonormal (orthogonal AND
normalized) basis vectors supplied by user. Thus, basis calculation phase
is not performed - we already have our basis - and only analysis/
forecasting phase requires actual calculations.
This algorithm may handle "append" requests which add just one/few ticks
to the end of the last sequence in O(1) time.
NOTE: this algorithm accepts both basis and window width, because these
two parameters are naturally aligned. Calling this function sets
window width; if you call ssasetwindow() with other window width,
then during analysis stage algorithm will detect conflict and reset
to zero basis.
INPUT PARAMETERS:
S - SSA model
A - array[WindowWidth,NBasis], orthonormalized basis;
this function does NOT control orthogonality and
does NOT perform any kind of renormalization. It
is your responsibility to provide it with correct
basis.
WindowWidth - window width, >=1
NBasis - number of basis vectors, 1<=NBasis<=WindowWidth
OUTPUT PARAMETERS:
S - updated model
NOTE: calling this function invalidates basis in all cases.
-- ALGLIB --
Copyright 30.10.2017 by Bochkanov Sergey
*************************************************************************/
void ssasetalgoprecomputed(const ssamodel &s, const real_2d_array &a, const ae_int_t windowwidth, const ae_int_t nbasis, const xparams _xparams = alglib::xdefault);
void ssasetalgoprecomputed(const ssamodel &s, const real_2d_array &a, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets SSA algorithm to "direct top-K" algorithm.
"Direct top-K" algorithm performs full SVD of the N*WINDOW trajectory
matrix (hence its name - direct solver is used), then extracts top K
components. Overall running time is O(N*WINDOW^2), where N is a number of
ticks in the dataset, WINDOW is window width.
This algorithm may handle "append" requests which add just one/few ticks
to the end of the last sequence in O(WINDOW^3) time, which is ~N/WINDOW
times faster than re-computing everything from scratch.
INPUT PARAMETERS:
S - SSA model
TopK - number of components to analyze; TopK>=1.
OUTPUT PARAMETERS:
S - updated model
NOTE: TopK>WindowWidth is silently decreased to WindowWidth during analysis
phase
NOTE: calling this function invalidates basis, except for the situation
when this algorithm was already set with same parameters.
-- ALGLIB --
Copyright 30.10.2017 by Bochkanov Sergey
*************************************************************************/
void ssasetalgotopkdirect(const ssamodel &s, const ae_int_t topk, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets SSA algorithm to "top-K real time algorithm". This algo
extracts K components with largest singular values.
It is real-time version of top-K algorithm which is optimized for
incremental processing and fast start-up. Internally it uses subspace
eigensolver for truncated SVD. It results in ability to perform quick
updates of the basis when only a few points/sequences is added to dataset.
Performance profile of the algorithm is given below:
* O(K*WindowWidth^2) running time for incremental update of the dataset
with one of the "append-and-update" functions (ssaappendpointandupdate()
or ssaappendsequenceandupdate()).
* O(N*WindowWidth^2) running time for initial basis evaluation (N=size of
dataset)
* ability to split costly initialization across several incremental
updates of the basis (so called "Power-Up" functionality, activated by
ssasetpoweruplength() function)
INPUT PARAMETERS:
S - SSA model
TopK - number of components to analyze; TopK>=1.
OUTPUT PARAMETERS:
S - updated model
NOTE: this algorithm is optimized for large-scale tasks with large
datasets. On toy problems with just 5-10 points it can return basis
which is slightly different from that returned by direct algorithm
(ssasetalgotopkdirect() function). However, the difference becomes
negligible as dataset grows.
NOTE: TopK>WindowWidth is silently decreased to WindowWidth during analysis
phase
NOTE: calling this function invalidates basis, except for the situation
when this algorithm was already set with same parameters.
-- ALGLIB --
Copyright 30.10.2017 by Bochkanov Sergey
*************************************************************************/
void ssasetalgotopkrealtime(const ssamodel &s, const ae_int_t topk, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function clears all data stored in the model and invalidates all
basis components found so far.
INPUT PARAMETERS:
S - SSA model created with ssacreate()
OUTPUT PARAMETERS:
S - SSA model, updated
-- ALGLIB --
Copyright 30.10.2017 by Bochkanov Sergey
*************************************************************************/
void ssacleardata(const ssamodel &s, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function executes SSA on internally stored dataset and returns basis
found by current method.
INPUT PARAMETERS:
S - SSA model
OUTPUT PARAMETERS:
A - array[WindowWidth,NBasis], basis; vectors are
stored in matrix columns, by descreasing variance
SV - array[NBasis]:
* zeros - for model initialized with SSASetAlgoPrecomputed()
* singular values - for other algorithms
WindowWidth - current window
NBasis - basis size
CACHING/REUSE OF THE BASIS
Caching/reuse of previous results is performed:
* first call performs full run of SSA; basis is stored in the cache
* subsequent calls reuse previously cached basis
* if you call any function which changes model properties (window length,
algorithm, dataset), internal basis will be invalidated.
* the only calls which do NOT invalidate basis are listed below:
a) ssasetwindow() with same window length
b) ssaappendpointandupdate()
c) ssaappendsequenceandupdate()
d) ssasetalgotopk...() with exactly same K
Calling these functions will result in reuse of previously found basis.
HANDLING OF DEGENERATE CASES
Calling this function in degenerate cases (no data or all data are
shorter than window size; no algorithm is specified) returns basis with
just one zero vector.
-- ALGLIB --
Copyright 30.10.2017 by Bochkanov Sergey
*************************************************************************/
void ssagetbasis(const ssamodel &s, real_2d_array &a, real_1d_array &sv, ae_int_t &windowwidth, ae_int_t &nbasis, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function returns linear recurrence relation (LRR) coefficients found
by current SSA algorithm.
INPUT PARAMETERS:
S - SSA model
OUTPUT PARAMETERS:
A - array[WindowWidth-1]. Coefficients of the
linear recurrence of the form:
X[W-1] = X[W-2]*A[W-2] + X[W-3]*A[W-3] + ... + X[0]*A[0].
Empty array for WindowWidth=1.
WindowWidth - current window width
CACHING/REUSE OF THE BASIS
Caching/reuse of previous results is performed:
* first call performs full run of SSA; basis is stored in the cache
* subsequent calls reuse previously cached basis
* if you call any function which changes model properties (window length,
algorithm, dataset), internal basis will be invalidated.
* the only calls which do NOT invalidate basis are listed below:
a) ssasetwindow() with same window length
b) ssaappendpointandupdate()
c) ssaappendsequenceandupdate()
d) ssasetalgotopk...() with exactly same K
Calling these functions will result in reuse of previously found basis.
HANDLING OF DEGENERATE CASES
Calling this function in degenerate cases (no data or all data are
shorter than window size; no algorithm is specified) returns zeros.
-- ALGLIB --
Copyright 30.10.2017 by Bochkanov Sergey
*************************************************************************/
void ssagetlrr(const ssamodel &s, real_1d_array &a, ae_int_t &windowwidth, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function executes SSA on internally stored dataset and returns
analysis for the last window of the last sequence. Such analysis is
an lightweight alternative for full scale reconstruction (see below).
Typical use case for this function is real-time setting, when you are
interested in quick-and-dirty (very quick and very dirty) processing of
just a few last ticks of the trend.
IMPORTANT: full scale SSA involves analysis of the ENTIRE dataset,
with reconstruction being done for all positions of sliding
window with subsequent hankelization (diagonal averaging) of
the resulting matrix.
Such analysis requires O((DataLen-Window)*Window*NBasis) FLOPs
and can be quite costly. However, it has nice noise-canceling
effects due to averaging.
This function performs REDUCED analysis of the last window. It
is much faster - just O(Window*NBasis), but its results are
DIFFERENT from that of ssaanalyzelast(). In particular, first
few points of the trend are much more prone to noise.
INPUT PARAMETERS:
S - SSA model
OUTPUT PARAMETERS:
Trend - array[WindowSize], reconstructed trend line
Noise - array[WindowSize], the rest of the signal;
it holds that ActualData = Trend+Noise.
NTicks - current WindowSize
CACHING/REUSE OF THE BASIS
Caching/reuse of previous results is performed:
* first call performs full run of SSA; basis is stored in the cache
* subsequent calls reuse previously cached basis
* if you call any function which changes model properties (window length,
algorithm, dataset), internal basis will be invalidated.
* the only calls which do NOT invalidate basis are listed below:
a) ssasetwindow() with same window length
b) ssaappendpointandupdate()
c) ssaappendsequenceandupdate()
d) ssasetalgotopk...() with exactly same K
Calling these functions will result in reuse of previously found basis.
In any case, only basis is reused. Reconstruction is performed from
scratch every time you call this function.
HANDLING OF DEGENERATE CASES
Following degenerate cases may happen:
* dataset is empty (no analysis can be done)
* all sequences are shorter than the window length,no analysis can be done
* no algorithm is specified (no analysis can be done)
* last sequence is shorter than the window length (analysis can be done,
but we can not perform reconstruction on the last sequence)
Calling this function in degenerate cases returns following result:
* in any case, WindowWidth ticks is returned
* trend is assumed to be zero
* noise is initialized by the last sequence; if last sequence is shorter
than the window size, it is moved to the end of the array, and the
beginning of the noise array is filled by zeros
No analysis is performed in degenerate cases (we immediately return dummy
values, no basis is constructed).
-- ALGLIB --
Copyright 30.10.2017 by Bochkanov Sergey
*************************************************************************/
void ssaanalyzelastwindow(const ssamodel &s, real_1d_array &trend, real_1d_array &noise, ae_int_t &nticks, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function:
* builds SSA basis using internally stored (entire) dataset
* returns reconstruction for the last NTicks of the last sequence
If you want to analyze some other sequence, use ssaanalyzesequence().
Reconstruction phase involves generation of NTicks-WindowWidth sliding
windows, their decomposition using empirical orthogonal functions found by
SSA, followed by averaging of each data point across several overlapping
windows. Thus, every point in the output trend is reconstructed using up
to WindowWidth overlapping windows (WindowWidth windows exactly in the
inner points, just one window at the extremal points).
IMPORTANT: due to averaging this function returns different results for
different values of NTicks. It is expected and not a bug.
For example:
* Trend[NTicks-1] is always same because it is not averaged in
any case (same applies to Trend[0]).
* Trend[NTicks-2] has different values for NTicks=WindowWidth
and NTicks=WindowWidth+1 because former case means that no
averaging is performed, and latter case means that averaging
using two sliding windows is performed. Larger values of
NTicks produce same results as NTicks=WindowWidth+1.
* ...and so on...
PERFORMANCE: this function has O((NTicks-WindowWidth)*WindowWidth*NBasis)
running time. If you work in time-constrained setting and
have to analyze just a few last ticks, choosing NTicks equal
to WindowWidth+SmoothingLen, with SmoothingLen=1...WindowWidth
will result in good compromise between noise cancellation and
analysis speed.
INPUT PARAMETERS:
S - SSA model
NTicks - number of ticks to analyze, Nticks>=1.
* special case of NTicks<=WindowWidth is handled
by analyzing last window and returning NTicks
last ticks.
* special case NTicks>LastSequenceLen is handled
by prepending result with NTicks-LastSequenceLen
zeros.
OUTPUT PARAMETERS:
Trend - array[NTicks], reconstructed trend line
Noise - array[NTicks], the rest of the signal;
it holds that ActualData = Trend+Noise.
CACHING/REUSE OF THE BASIS
Caching/reuse of previous results is performed:
* first call performs full run of SSA; basis is stored in the cache
* subsequent calls reuse previously cached basis
* if you call any function which changes model properties (window length,
algorithm, dataset), internal basis will be invalidated.
* the only calls which do NOT invalidate basis are listed below:
a) ssasetwindow() with same window length
b) ssaappendpointandupdate()
c) ssaappendsequenceandupdate()
d) ssasetalgotopk...() with exactly same K
Calling these functions will result in reuse of previously found basis.
In any case, only basis is reused. Reconstruction is performed from
scratch every time you call this function.
HANDLING OF DEGENERATE CASES
Following degenerate cases may happen:
* dataset is empty (no analysis can be done)
* all sequences are shorter than the window length,no analysis can be done
* no algorithm is specified (no analysis can be done)
* last sequence is shorter than the window length (analysis can be done,
but we can not perform reconstruction on the last sequence)
Calling this function in degenerate cases returns following result:
* in any case, NTicks ticks is returned
* trend is assumed to be zero
* noise is initialized by the last sequence; if last sequence is shorter
than the window size, it is moved to the end of the array, and the
beginning of the noise array is filled by zeros
No analysis is performed in degenerate cases (we immediately return dummy
values, no basis is constructed).
-- ALGLIB --
Copyright 30.10.2017 by Bochkanov Sergey
*************************************************************************/
void ssaanalyzelast(const ssamodel &s, const ae_int_t nticks, real_1d_array &trend, real_1d_array &noise, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function:
* builds SSA basis using internally stored (entire) dataset
* returns reconstruction for the sequence being passed to this function
If you want to analyze last sequence stored in the model, use
ssaanalyzelast().
Reconstruction phase involves generation of NTicks-WindowWidth sliding
windows, their decomposition using empirical orthogonal functions found by
SSA, followed by averaging of each data point across several overlapping
windows. Thus, every point in the output trend is reconstructed using up
to WindowWidth overlapping windows (WindowWidth windows exactly in the
inner points, just one window at the extremal points).
PERFORMANCE: this function has O((NTicks-WindowWidth)*WindowWidth*NBasis)
running time. If you work in time-constrained setting and
have to analyze just a few last ticks, choosing NTicks equal
to WindowWidth+SmoothingLen, with SmoothingLen=1...WindowWidth
will result in good compromise between noise cancellation and
analysis speed.
INPUT PARAMETERS:
S - SSA model
Data - array[NTicks], can be larger (only NTicks leading
elements will be used)
NTicks - number of ticks to analyze, Nticks>=1.
* special case of NTicks<WindowWidth is handled
by returning zeros as trend, and signal as noise
OUTPUT PARAMETERS:
Trend - array[NTicks], reconstructed trend line
Noise - array[NTicks], the rest of the signal;
it holds that ActualData = Trend+Noise.
CACHING/REUSE OF THE BASIS
Caching/reuse of previous results is performed:
* first call performs full run of SSA; basis is stored in the cache
* subsequent calls reuse previously cached basis
* if you call any function which changes model properties (window length,
algorithm, dataset), internal basis will be invalidated.
* the only calls which do NOT invalidate basis are listed below:
a) ssasetwindow() with same window length
b) ssaappendpointandupdate()
c) ssaappendsequenceandupdate()
d) ssasetalgotopk...() with exactly same K
Calling these functions will result in reuse of previously found basis.
In any case, only basis is reused. Reconstruction is performed from
scratch every time you call this function.
HANDLING OF DEGENERATE CASES
Following degenerate cases may happen:
* dataset is empty (no analysis can be done)
* all sequences are shorter than the window length,no analysis can be done
* no algorithm is specified (no analysis can be done)
* sequence being passed is shorter than the window length
Calling this function in degenerate cases returns following result:
* in any case, NTicks ticks is returned
* trend is assumed to be zero
* noise is initialized by the sequence.
No analysis is performed in degenerate cases (we immediately return dummy
values, no basis is constructed).
-- ALGLIB --
Copyright 30.10.2017 by Bochkanov Sergey
*************************************************************************/
void ssaanalyzesequence(const ssamodel &s, const real_1d_array &data, const ae_int_t nticks, real_1d_array &trend, real_1d_array &noise, const xparams _xparams = alglib::xdefault);
void ssaanalyzesequence(const ssamodel &s, const real_1d_array &data, real_1d_array &trend, real_1d_array &noise, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function builds SSA basis and performs forecasting for a specified
number of ticks, returning value of trend.
Forecast is performed as follows:
* SSA trend extraction is applied to last WindowWidth elements of the
internally stored dataset; this step is basically a noise reduction.
* linear recurrence relation is applied to extracted trend
This function has following running time:
* O(NBasis*WindowWidth) for trend extraction phase (always performed)
* O(WindowWidth*NTicks) for forecast phase
NOTE: noise reduction is ALWAYS applied by this algorithm; if you want to
apply recurrence relation to raw unprocessed data, use another
function - ssaforecastsequence() which allows to turn on and off
noise reduction phase.
NOTE: this algorithm performs prediction using only one - last - sliding
window. Predictions produced by such approach are smooth
continuations of the reconstructed trend line, but they can be
easily corrupted by noise. If you need noise-resistant prediction,
use ssaforecastavglast() function, which averages predictions built
using several sliding windows.
INPUT PARAMETERS:
S - SSA model
NTicks - number of ticks to forecast, NTicks>=1
OUTPUT PARAMETERS:
Trend - array[NTicks], predicted trend line
CACHING/REUSE OF THE BASIS
Caching/reuse of previous results is performed:
* first call performs full run of SSA; basis is stored in the cache
* subsequent calls reuse previously cached basis
* if you call any function which changes model properties (window length,
algorithm, dataset), internal basis will be invalidated.
* the only calls which do NOT invalidate basis are listed below:
a) ssasetwindow() with same window length
b) ssaappendpointandupdate()
c) ssaappendsequenceandupdate()
d) ssasetalgotopk...() with exactly same K
Calling these functions will result in reuse of previously found basis.
HANDLING OF DEGENERATE CASES
Following degenerate cases may happen:
* dataset is empty (no analysis can be done)
* all sequences are shorter than the window length,no analysis can be done
* no algorithm is specified (no analysis can be done)
* last sequence is shorter than the WindowWidth (analysis can be done,
but we can not perform forecasting on the last sequence)
* window lentgh is 1 (impossible to use for forecasting)
* SSA analysis algorithm is configured to extract basis whose size is
equal to window length (impossible to use for forecasting; only basis
whose size is less than window length can be used).
Calling this function in degenerate cases returns following result:
* NTicks copies of the last value is returned for non-empty task with
large enough dataset, but with overcomplete basis (window width=1 or
basis size is equal to window width)
* zero trend with length=NTicks is returned for empty task
No analysis is performed in degenerate cases (we immediately return dummy
values, no basis is ever constructed).
-- ALGLIB --
Copyright 30.10.2017 by Bochkanov Sergey
*************************************************************************/
void ssaforecastlast(const ssamodel &s, const ae_int_t nticks, real_1d_array &trend, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function builds SSA basis and performs forecasting for a user-
specified sequence, returning value of trend.
Forecasting is done in two stages:
* first, we extract trend from the WindowWidth last elements of the
sequence. This stage is optional, you can turn it off if you pass
data which are already processed with SSA. Of course, you can turn it
off even for raw data, but it is not recommended - noise suppression is
very important for correct prediction.
* then, we apply LRR for last WindowWidth-1 elements of the extracted
trend.
This function has following running time:
* O(NBasis*WindowWidth) for trend extraction phase
* O(WindowWidth*NTicks) for forecast phase
NOTE: this algorithm performs prediction using only one - last - sliding
window. Predictions produced by such approach are smooth
continuations of the reconstructed trend line, but they can be
easily corrupted by noise. If you need noise-resistant prediction,
use ssaforecastavgsequence() function, which averages predictions
built using several sliding windows.
INPUT PARAMETERS:
S - SSA model
Data - array[NTicks], data to forecast
DataLen - number of ticks in the data, DataLen>=1
ForecastLen - number of ticks to predict, ForecastLen>=1
ApplySmoothing - whether to apply smoothing trend extraction or not;
if you do not know what to specify, pass True.
OUTPUT PARAMETERS:
Trend - array[ForecastLen], forecasted trend
CACHING/REUSE OF THE BASIS
Caching/reuse of previous results is performed:
* first call performs full run of SSA; basis is stored in the cache
* subsequent calls reuse previously cached basis
* if you call any function which changes model properties (window length,
algorithm, dataset), internal basis will be invalidated.
* the only calls which do NOT invalidate basis are listed below:
a) ssasetwindow() with same window length
b) ssaappendpointandupdate()
c) ssaappendsequenceandupdate()
d) ssasetalgotopk...() with exactly same K
Calling these functions will result in reuse of previously found basis.
HANDLING OF DEGENERATE CASES
Following degenerate cases may happen:
* dataset is empty (no analysis can be done)
* all sequences are shorter than the window length,no analysis can be done
* no algorithm is specified (no analysis can be done)
* data sequence is shorter than the WindowWidth (analysis can be done,
but we can not perform forecasting on the last sequence)
* window lentgh is 1 (impossible to use for forecasting)
* SSA analysis algorithm is configured to extract basis whose size is
equal to window length (impossible to use for forecasting; only basis
whose size is less than window length can be used).
Calling this function in degenerate cases returns following result:
* ForecastLen copies of the last value is returned for non-empty task with
large enough dataset, but with overcomplete basis (window width=1 or
basis size is equal to window width)
* zero trend with length=ForecastLen is returned for empty task
No analysis is performed in degenerate cases (we immediately return dummy
values, no basis is ever constructed).
-- ALGLIB --
Copyright 30.10.2017 by Bochkanov Sergey
*************************************************************************/
void ssaforecastsequence(const ssamodel &s, const real_1d_array &data, const ae_int_t datalen, const ae_int_t forecastlen, const bool applysmoothing, real_1d_array &trend, const xparams _xparams = alglib::xdefault);
void ssaforecastsequence(const ssamodel &s, const real_1d_array &data, const ae_int_t forecastlen, real_1d_array &trend, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function builds SSA basis and performs forecasting for a specified
number of ticks, returning value of trend.
Forecast is performed as follows:
* SSA trend extraction is applied to last M sliding windows of the
internally stored dataset
* for each of M sliding windows, M predictions are built
* average value of M predictions is returned
This function has following running time:
* O(NBasis*WindowWidth*M) for trend extraction phase (always performed)
* O(WindowWidth*NTicks*M) for forecast phase
NOTE: noise reduction is ALWAYS applied by this algorithm; if you want to
apply recurrence relation to raw unprocessed data, use another
function - ssaforecastsequence() which allows to turn on and off
noise reduction phase.
NOTE: combination of several predictions results in lesser sensitivity to
noise, but it may produce undesirable discontinuities between last
point of the trend and first point of the prediction. The reason is
that last point of the trend is usually corrupted by noise, but
average value of several predictions is less sensitive to noise,
thus discontinuity appears. It is not a bug.
INPUT PARAMETERS:
S - SSA model
M - number of sliding windows to combine, M>=1. If
your dataset has less than M sliding windows, this
parameter will be silently reduced.
NTicks - number of ticks to forecast, NTicks>=1
OUTPUT PARAMETERS:
Trend - array[NTicks], predicted trend line
CACHING/REUSE OF THE BASIS
Caching/reuse of previous results is performed:
* first call performs full run of SSA; basis is stored in the cache
* subsequent calls reuse previously cached basis
* if you call any function which changes model properties (window length,
algorithm, dataset), internal basis will be invalidated.
* the only calls which do NOT invalidate basis are listed below:
a) ssasetwindow() with same window length
b) ssaappendpointandupdate()
c) ssaappendsequenceandupdate()
d) ssasetalgotopk...() with exactly same K
Calling these functions will result in reuse of previously found basis.
HANDLING OF DEGENERATE CASES
Following degenerate cases may happen:
* dataset is empty (no analysis can be done)
* all sequences are shorter than the window length,no analysis can be done
* no algorithm is specified (no analysis can be done)
* last sequence is shorter than the WindowWidth (analysis can be done,
but we can not perform forecasting on the last sequence)
* window lentgh is 1 (impossible to use for forecasting)
* SSA analysis algorithm is configured to extract basis whose size is
equal to window length (impossible to use for forecasting; only basis
whose size is less than window length can be used).
Calling this function in degenerate cases returns following result:
* NTicks copies of the last value is returned for non-empty task with
large enough dataset, but with overcomplete basis (window width=1 or
basis size is equal to window width)
* zero trend with length=NTicks is returned for empty task
No analysis is performed in degenerate cases (we immediately return dummy
values, no basis is ever constructed).
-- ALGLIB --
Copyright 30.10.2017 by Bochkanov Sergey
*************************************************************************/
void ssaforecastavglast(const ssamodel &s, const ae_int_t m, const ae_int_t nticks, real_1d_array &trend, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function builds SSA basis and performs forecasting for a user-
specified sequence, returning value of trend.
Forecasting is done in two stages:
* first, we extract trend from M last sliding windows of the sequence.
This stage is optional, you can turn it off if you pass data which
are already processed with SSA. Of course, you can turn it off even
for raw data, but it is not recommended - noise suppression is very
important for correct prediction.
* then, we apply LRR independently for M sliding windows
* average of M predictions is returned
This function has following running time:
* O(NBasis*WindowWidth*M) for trend extraction phase
* O(WindowWidth*NTicks*M) for forecast phase
NOTE: combination of several predictions results in lesser sensitivity to
noise, but it may produce undesirable discontinuities between last
point of the trend and first point of the prediction. The reason is
that last point of the trend is usually corrupted by noise, but
average value of several predictions is less sensitive to noise,
thus discontinuity appears. It is not a bug.
INPUT PARAMETERS:
S - SSA model
Data - array[NTicks], data to forecast
DataLen - number of ticks in the data, DataLen>=1
M - number of sliding windows to combine, M>=1. If
your dataset has less than M sliding windows, this
parameter will be silently reduced.
ForecastLen - number of ticks to predict, ForecastLen>=1
ApplySmoothing - whether to apply smoothing trend extraction or not.
if you do not know what to specify, pass true.
OUTPUT PARAMETERS:
Trend - array[ForecastLen], forecasted trend
CACHING/REUSE OF THE BASIS
Caching/reuse of previous results is performed:
* first call performs full run of SSA; basis is stored in the cache
* subsequent calls reuse previously cached basis
* if you call any function which changes model properties (window length,
algorithm, dataset), internal basis will be invalidated.
* the only calls which do NOT invalidate basis are listed below:
a) ssasetwindow() with same window length
b) ssaappendpointandupdate()
c) ssaappendsequenceandupdate()
d) ssasetalgotopk...() with exactly same K
Calling these functions will result in reuse of previously found basis.
HANDLING OF DEGENERATE CASES
Following degenerate cases may happen:
* dataset is empty (no analysis can be done)
* all sequences are shorter than the window length,no analysis can be done
* no algorithm is specified (no analysis can be done)
* data sequence is shorter than the WindowWidth (analysis can be done,
but we can not perform forecasting on the last sequence)
* window lentgh is 1 (impossible to use for forecasting)
* SSA analysis algorithm is configured to extract basis whose size is
equal to window length (impossible to use for forecasting; only basis
whose size is less than window length can be used).
Calling this function in degenerate cases returns following result:
* ForecastLen copies of the last value is returned for non-empty task with
large enough dataset, but with overcomplete basis (window width=1 or
basis size is equal to window width)
* zero trend with length=ForecastLen is returned for empty task
No analysis is performed in degenerate cases (we immediately return dummy
values, no basis is ever constructed).
-- ALGLIB --
Copyright 30.10.2017 by Bochkanov Sergey
*************************************************************************/
void ssaforecastavgsequence(const ssamodel &s, const real_1d_array &data, const ae_int_t datalen, const ae_int_t m, const ae_int_t forecastlen, const bool applysmoothing, real_1d_array &trend, const xparams _xparams = alglib::xdefault);
void ssaforecastavgsequence(const ssamodel &s, const real_1d_array &data, const ae_int_t m, const ae_int_t forecastlen, real_1d_array &trend, const xparams _xparams = alglib::xdefault);
#endif
#if defined(AE_COMPILE_LINREG) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
Linear regression
Subroutine builds model:
Y = A(0)*X[0] + ... + A(N-1)*X[N-1] + A(N)
and model found in ALGLIB format, covariation matrix, training set errors
(rms, average, average relative) and leave-one-out cross-validation
estimate of the generalization error. CV estimate calculated using fast
algorithm with O(NPoints*NVars) complexity.
When covariation matrix is calculated standard deviations of function
values are assumed to be equal to RMS error on the training set.
INPUT PARAMETERS:
XY - training set, array [0..NPoints-1,0..NVars]:
* NVars columns - independent variables
* last column - dependent variable
NPoints - training set size, NPoints>NVars+1
NVars - number of independent variables
OUTPUT PARAMETERS:
Info - return code:
* -255, in case of unknown internal error
* -4, if internal SVD subroutine haven't converged
* -1, if incorrect parameters was passed (NPoints<NVars+2, NVars<1).
* 1, if subroutine successfully finished
LM - linear model in the ALGLIB format. Use subroutines of
this unit to work with the model.
AR - additional results
-- ALGLIB --
Copyright 02.08.2008 by Bochkanov Sergey
*************************************************************************/
void lrbuild(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, ae_int_t &info, linearmodel &lm, lrreport &ar, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Linear regression
Variant of LRBuild which uses vector of standatd deviations (errors in
function values).
INPUT PARAMETERS:
XY - training set, array [0..NPoints-1,0..NVars]:
* NVars columns - independent variables
* last column - dependent variable
S - standard deviations (errors in function values)
array[0..NPoints-1], S[i]>0.
NPoints - training set size, NPoints>NVars+1
NVars - number of independent variables
OUTPUT PARAMETERS:
Info - return code:
* -255, in case of unknown internal error
* -4, if internal SVD subroutine haven't converged
* -1, if incorrect parameters was passed (NPoints<NVars+2, NVars<1).
* -2, if S[I]<=0
* 1, if subroutine successfully finished
LM - linear model in the ALGLIB format. Use subroutines of
this unit to work with the model.
AR - additional results
-- ALGLIB --
Copyright 02.08.2008 by Bochkanov Sergey
*************************************************************************/
void lrbuilds(const real_2d_array &xy, const real_1d_array &s, const ae_int_t npoints, const ae_int_t nvars, ae_int_t &info, linearmodel &lm, lrreport &ar, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Like LRBuildS, but builds model
Y = A(0)*X[0] + ... + A(N-1)*X[N-1]
i.e. with zero constant term.
-- ALGLIB --
Copyright 30.10.2008 by Bochkanov Sergey
*************************************************************************/
void lrbuildzs(const real_2d_array &xy, const real_1d_array &s, const ae_int_t npoints, const ae_int_t nvars, ae_int_t &info, linearmodel &lm, lrreport &ar, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Like LRBuild but builds model
Y = A(0)*X[0] + ... + A(N-1)*X[N-1]
i.e. with zero constant term.
-- ALGLIB --
Copyright 30.10.2008 by Bochkanov Sergey
*************************************************************************/
void lrbuildz(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, ae_int_t &info, linearmodel &lm, lrreport &ar, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Unpacks coefficients of linear model.
INPUT PARAMETERS:
LM - linear model in ALGLIB format
OUTPUT PARAMETERS:
V - coefficients, array[0..NVars]
constant term (intercept) is stored in the V[NVars].
NVars - number of independent variables (one less than number
of coefficients)
-- ALGLIB --
Copyright 30.08.2008 by Bochkanov Sergey
*************************************************************************/
void lrunpack(const linearmodel &lm, real_1d_array &v, ae_int_t &nvars, const xparams _xparams = alglib::xdefault);
/*************************************************************************
"Packs" coefficients and creates linear model in ALGLIB format (LRUnpack
reversed).
INPUT PARAMETERS:
V - coefficients, array[0..NVars]
NVars - number of independent variables
OUTPUT PAREMETERS:
LM - linear model.
-- ALGLIB --
Copyright 30.08.2008 by Bochkanov Sergey
*************************************************************************/
void lrpack(const real_1d_array &v, const ae_int_t nvars, linearmodel &lm, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Procesing
INPUT PARAMETERS:
LM - linear model
X - input vector, array[0..NVars-1].
Result:
value of linear model regression estimate
-- ALGLIB --
Copyright 03.09.2008 by Bochkanov Sergey
*************************************************************************/
double lrprocess(const linearmodel &lm, const real_1d_array &x, const xparams _xparams = alglib::xdefault);
/*************************************************************************
RMS error on the test set
INPUT PARAMETERS:
LM - linear model
XY - test set
NPoints - test set size
RESULT:
root mean square error.
-- ALGLIB --
Copyright 30.08.2008 by Bochkanov Sergey
*************************************************************************/
double lrrmserror(const linearmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Average error on the test set
INPUT PARAMETERS:
LM - linear model
XY - test set
NPoints - test set size
RESULT:
average error.
-- ALGLIB --
Copyright 30.08.2008 by Bochkanov Sergey
*************************************************************************/
double lravgerror(const linearmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
RMS error on the test set
INPUT PARAMETERS:
LM - linear model
XY - test set
NPoints - test set size
RESULT:
average relative error.
-- ALGLIB --
Copyright 30.08.2008 by Bochkanov Sergey
*************************************************************************/
double lravgrelerror(const linearmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
#endif
#if defined(AE_COMPILE_FILTERS) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
Filters: simple moving averages (unsymmetric).
This filter replaces array by results of SMA(K) filter. SMA(K) is defined
as filter which averages at most K previous points (previous - not points
AROUND central point) - or less, in case of the first K-1 points.
INPUT PARAMETERS:
X - array[N], array to process. It can be larger than N,
in this case only first N points are processed.
N - points count, N>=0
K - K>=1 (K can be larger than N , such cases will be
correctly handled). Window width. K=1 corresponds to
identity transformation (nothing changes).
OUTPUT PARAMETERS:
X - array, whose first N elements were processed with SMA(K)
NOTE 1: this function uses efficient in-place algorithm which does not
allocate temporary arrays.
NOTE 2: this algorithm makes only one pass through array and uses running
sum to speed-up calculation of the averages. Additional measures
are taken to ensure that running sum on a long sequence of zero
elements will be correctly reset to zero even in the presence of
round-off error.
NOTE 3: this is unsymmetric version of the algorithm, which does NOT
averages points after the current one. Only X[i], X[i-1], ... are
used when calculating new value of X[i]. We should also note that
this algorithm uses BOTH previous points and current one, i.e.
new value of X[i] depends on BOTH previous point and X[i] itself.
-- ALGLIB --
Copyright 25.10.2011 by Bochkanov Sergey
*************************************************************************/
void filtersma(real_1d_array &x, const ae_int_t n, const ae_int_t k, const xparams _xparams = alglib::xdefault);
void filtersma(real_1d_array &x, const ae_int_t k, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Filters: exponential moving averages.
This filter replaces array by results of EMA(alpha) filter. EMA(alpha) is
defined as filter which replaces X[] by S[]:
S[0] = X[0]
S[t] = alpha*X[t] + (1-alpha)*S[t-1]
INPUT PARAMETERS:
X - array[N], array to process. It can be larger than N,
in this case only first N points are processed.
N - points count, N>=0
alpha - 0<alpha<=1, smoothing parameter.
OUTPUT PARAMETERS:
X - array, whose first N elements were processed
with EMA(alpha)
NOTE 1: this function uses efficient in-place algorithm which does not
allocate temporary arrays.
NOTE 2: this algorithm uses BOTH previous points and current one, i.e.
new value of X[i] depends on BOTH previous point and X[i] itself.
NOTE 3: technical analytis users quite often work with EMA coefficient
expressed in DAYS instead of fractions. If you want to calculate
EMA(N), where N is a number of days, you can use alpha=2/(N+1).
-- ALGLIB --
Copyright 25.10.2011 by Bochkanov Sergey
*************************************************************************/
void filterema(real_1d_array &x, const ae_int_t n, const double alpha, const xparams _xparams = alglib::xdefault);
void filterema(real_1d_array &x, const double alpha, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Filters: linear regression moving averages.
This filter replaces array by results of LRMA(K) filter.
LRMA(K) is defined as filter which, for each data point, builds linear
regression model using K prevous points (point itself is included in
these K points) and calculates value of this linear model at the point in
question.
INPUT PARAMETERS:
X - array[N], array to process. It can be larger than N,
in this case only first N points are processed.
N - points count, N>=0
K - K>=1 (K can be larger than N , such cases will be
correctly handled). Window width. K=1 corresponds to
identity transformation (nothing changes).
OUTPUT PARAMETERS:
X - array, whose first N elements were processed with SMA(K)
NOTE 1: this function uses efficient in-place algorithm which does not
allocate temporary arrays.
NOTE 2: this algorithm makes only one pass through array and uses running
sum to speed-up calculation of the averages. Additional measures
are taken to ensure that running sum on a long sequence of zero
elements will be correctly reset to zero even in the presence of
round-off error.
NOTE 3: this is unsymmetric version of the algorithm, which does NOT
averages points after the current one. Only X[i], X[i-1], ... are
used when calculating new value of X[i]. We should also note that
this algorithm uses BOTH previous points and current one, i.e.
new value of X[i] depends on BOTH previous point and X[i] itself.
-- ALGLIB --
Copyright 25.10.2011 by Bochkanov Sergey
*************************************************************************/
void filterlrma(real_1d_array &x, const ae_int_t n, const ae_int_t k, const xparams _xparams = alglib::xdefault);
void filterlrma(real_1d_array &x, const ae_int_t k, const xparams _xparams = alglib::xdefault);
#endif
#if defined(AE_COMPILE_LOGIT) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
This subroutine trains logit model.
INPUT PARAMETERS:
XY - training set, array[0..NPoints-1,0..NVars]
First NVars columns store values of independent
variables, next column stores number of class (from 0
to NClasses-1) which dataset element belongs to. Fractional
values are rounded to nearest integer.
NPoints - training set size, NPoints>=1
NVars - number of independent variables, NVars>=1
NClasses - number of classes, NClasses>=2
OUTPUT PARAMETERS:
Info - return code:
* -2, if there is a point with class number
outside of [0..NClasses-1].
* -1, if incorrect parameters was passed
(NPoints<NVars+2, NVars<1, NClasses<2).
* 1, if task has been solved
LM - model built
Rep - training report
-- ALGLIB --
Copyright 10.09.2008 by Bochkanov Sergey
*************************************************************************/
void mnltrainh(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nclasses, ae_int_t &info, logitmodel &lm, mnlreport &rep, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Procesing
INPUT PARAMETERS:
LM - logit model, passed by non-constant reference
(some fields of structure are used as temporaries
when calculating model output).
X - input vector, array[0..NVars-1].
Y - (possibly) preallocated buffer; if size of Y is less than
NClasses, it will be reallocated.If it is large enough, it
is NOT reallocated, so we can save some time on reallocation.
OUTPUT PARAMETERS:
Y - result, array[0..NClasses-1]
Vector of posterior probabilities for classification task.
-- ALGLIB --
Copyright 10.09.2008 by Bochkanov Sergey
*************************************************************************/
void mnlprocess(const logitmodel &lm, const real_1d_array &x, real_1d_array &y, const xparams _xparams = alglib::xdefault);
/*************************************************************************
'interactive' variant of MNLProcess for languages like Python which
support constructs like "Y = MNLProcess(LM,X)" and interactive mode of the
interpreter
This function allocates new array on each call, so it is significantly
slower than its 'non-interactive' counterpart, but it is more convenient
when you call it from command line.
-- ALGLIB --
Copyright 10.09.2008 by Bochkanov Sergey
*************************************************************************/
void mnlprocessi(const logitmodel &lm, const real_1d_array &x, real_1d_array &y, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Unpacks coefficients of logit model. Logit model have form:
P(class=i) = S(i) / (S(0) + S(1) + ... +S(M-1))
S(i) = Exp(A[i,0]*X[0] + ... + A[i,N-1]*X[N-1] + A[i,N]), when i<M-1
S(M-1) = 1
INPUT PARAMETERS:
LM - logit model in ALGLIB format
OUTPUT PARAMETERS:
V - coefficients, array[0..NClasses-2,0..NVars]
NVars - number of independent variables
NClasses - number of classes
-- ALGLIB --
Copyright 10.09.2008 by Bochkanov Sergey
*************************************************************************/
void mnlunpack(const logitmodel &lm, real_2d_array &a, ae_int_t &nvars, ae_int_t &nclasses, const xparams _xparams = alglib::xdefault);
/*************************************************************************
"Packs" coefficients and creates logit model in ALGLIB format (MNLUnpack
reversed).
INPUT PARAMETERS:
A - model (see MNLUnpack)
NVars - number of independent variables
NClasses - number of classes
OUTPUT PARAMETERS:
LM - logit model.
-- ALGLIB --
Copyright 10.09.2008 by Bochkanov Sergey
*************************************************************************/
void mnlpack(const real_2d_array &a, const ae_int_t nvars, const ae_int_t nclasses, logitmodel &lm, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Average cross-entropy (in bits per element) on the test set
INPUT PARAMETERS:
LM - logit model
XY - test set
NPoints - test set size
RESULT:
CrossEntropy/(NPoints*ln(2)).
-- ALGLIB --
Copyright 10.09.2008 by Bochkanov Sergey
*************************************************************************/
double mnlavgce(const logitmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Relative classification error on the test set
INPUT PARAMETERS:
LM - logit model
XY - test set
NPoints - test set size
RESULT:
percent of incorrectly classified cases.
-- ALGLIB --
Copyright 10.09.2008 by Bochkanov Sergey
*************************************************************************/
double mnlrelclserror(const logitmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
RMS error on the test set
INPUT PARAMETERS:
LM - logit model
XY - test set
NPoints - test set size
RESULT:
root mean square error (error when estimating posterior probabilities).
-- ALGLIB --
Copyright 30.08.2008 by Bochkanov Sergey
*************************************************************************/
double mnlrmserror(const logitmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Average error on the test set
INPUT PARAMETERS:
LM - logit model
XY - test set
NPoints - test set size
RESULT:
average error (error when estimating posterior probabilities).
-- ALGLIB --
Copyright 30.08.2008 by Bochkanov Sergey
*************************************************************************/
double mnlavgerror(const logitmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Average relative error on the test set
INPUT PARAMETERS:
LM - logit model
XY - test set
NPoints - test set size
RESULT:
average relative error (error when estimating posterior probabilities).
-- ALGLIB --
Copyright 30.08.2008 by Bochkanov Sergey
*************************************************************************/
double mnlavgrelerror(const logitmodel &lm, const real_2d_array &xy, const ae_int_t ssize, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Classification error on test set = MNLRelClsError*NPoints
-- ALGLIB --
Copyright 10.09.2008 by Bochkanov Sergey
*************************************************************************/
ae_int_t mnlclserror(const logitmodel &lm, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
#endif
#if defined(AE_COMPILE_MCPD) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
DESCRIPTION:
This function creates MCPD (Markov Chains for Population Data) solver.
This solver can be used to find transition matrix P for N-dimensional
prediction problem where transition from X[i] to X[i+1] is modelled as
X[i+1] = P*X[i]
where X[i] and X[i+1] are N-dimensional population vectors (components of
each X are non-negative), and P is a N*N transition matrix (elements of P
are non-negative, each column sums to 1.0).
Such models arise when when:
* there is some population of individuals
* individuals can have different states
* individuals can transit from one state to another
* population size is constant, i.e. there is no new individuals and no one
leaves population
* you want to model transitions of individuals from one state into another
USAGE:
Here we give very brief outline of the MCPD. We strongly recommend you to
read examples in the ALGLIB Reference Manual and to read ALGLIB User Guide
on data analysis which is available at http://www.alglib.net/dataanalysis/
1. User initializes algorithm state with MCPDCreate() call
2. User adds one or more tracks - sequences of states which describe
evolution of a system being modelled from different starting conditions
3. User may add optional boundary, equality and/or linear constraints on
the coefficients of P by calling one of the following functions:
* MCPDSetEC() to set equality constraints
* MCPDSetBC() to set bound constraints
* MCPDSetLC() to set linear constraints
4. Optionally, user may set custom weights for prediction errors (by
default, algorithm assigns non-equal, automatically chosen weights for
errors in the prediction of different components of X). It can be done
with a call of MCPDSetPredictionWeights() function.
5. User calls MCPDSolve() function which takes algorithm state and
pointer (delegate, etc.) to callback function which calculates F/G.
6. User calls MCPDResults() to get solution
INPUT PARAMETERS:
N - problem dimension, N>=1
OUTPUT PARAMETERS:
State - structure stores algorithm state
-- ALGLIB --
Copyright 23.05.2010 by Bochkanov Sergey
*************************************************************************/
void mcpdcreate(const ae_int_t n, mcpdstate &s, const xparams _xparams = alglib::xdefault);
/*************************************************************************
DESCRIPTION:
This function is a specialized version of MCPDCreate() function, and we
recommend you to read comments for this function for general information
about MCPD solver.
This function creates MCPD (Markov Chains for Population Data) solver
for "Entry-state" model, i.e. model where transition from X[i] to X[i+1]
is modelled as
X[i+1] = P*X[i]
where
X[i] and X[i+1] are N-dimensional state vectors
P is a N*N transition matrix
and one selected component of X[] is called "entry" state and is treated
in a special way:
system state always transits from "entry" state to some another state
system state can not transit from any state into "entry" state
Such conditions basically mean that row of P which corresponds to "entry"
state is zero.
Such models arise when:
* there is some population of individuals
* individuals can have different states
* individuals can transit from one state to another
* population size is NOT constant - at every moment of time there is some
(unpredictable) amount of "new" individuals, which can transit into one
of the states at the next turn, but still no one leaves population
* you want to model transitions of individuals from one state into another
* but you do NOT want to predict amount of "new" individuals because it
does not depends on individuals already present (hence system can not
transit INTO entry state - it can only transit FROM it).
This model is discussed in more details in the ALGLIB User Guide (see
http://www.alglib.net/dataanalysis/ for more data).
INPUT PARAMETERS:
N - problem dimension, N>=2
EntryState- index of entry state, in 0..N-1
OUTPUT PARAMETERS:
State - structure stores algorithm state
-- ALGLIB --
Copyright 23.05.2010 by Bochkanov Sergey
*************************************************************************/
void mcpdcreateentry(const ae_int_t n, const ae_int_t entrystate, mcpdstate &s, const xparams _xparams = alglib::xdefault);
/*************************************************************************
DESCRIPTION:
This function is a specialized version of MCPDCreate() function, and we
recommend you to read comments for this function for general information
about MCPD solver.
This function creates MCPD (Markov Chains for Population Data) solver
for "Exit-state" model, i.e. model where transition from X[i] to X[i+1]
is modelled as
X[i+1] = P*X[i]
where
X[i] and X[i+1] are N-dimensional state vectors
P is a N*N transition matrix
and one selected component of X[] is called "exit" state and is treated
in a special way:
system state can transit from any state into "exit" state
system state can not transit from "exit" state into any other state
transition operator discards "exit" state (makes it zero at each turn)
Such conditions basically mean that column of P which corresponds to
"exit" state is zero. Multiplication by such P may decrease sum of vector
components.
Such models arise when:
* there is some population of individuals
* individuals can have different states
* individuals can transit from one state to another
* population size is NOT constant - individuals can move into "exit" state
and leave population at the next turn, but there are no new individuals
* amount of individuals which leave population can be predicted
* you want to model transitions of individuals from one state into another
(including transitions into the "exit" state)
This model is discussed in more details in the ALGLIB User Guide (see
http://www.alglib.net/dataanalysis/ for more data).
INPUT PARAMETERS:
N - problem dimension, N>=2
ExitState- index of exit state, in 0..N-1
OUTPUT PARAMETERS:
State - structure stores algorithm state
-- ALGLIB --
Copyright 23.05.2010 by Bochkanov Sergey
*************************************************************************/
void mcpdcreateexit(const ae_int_t n, const ae_int_t exitstate, mcpdstate &s, const xparams _xparams = alglib::xdefault);
/*************************************************************************
DESCRIPTION:
This function is a specialized version of MCPDCreate() function, and we
recommend you to read comments for this function for general information
about MCPD solver.
This function creates MCPD (Markov Chains for Population Data) solver
for "Entry-Exit-states" model, i.e. model where transition from X[i] to
X[i+1] is modelled as
X[i+1] = P*X[i]
where
X[i] and X[i+1] are N-dimensional state vectors
P is a N*N transition matrix
one selected component of X[] is called "entry" state and is treated in a
special way:
system state always transits from "entry" state to some another state
system state can not transit from any state into "entry" state
and another one component of X[] is called "exit" state and is treated in
a special way too:
system state can transit from any state into "exit" state
system state can not transit from "exit" state into any other state
transition operator discards "exit" state (makes it zero at each turn)
Such conditions basically mean that:
row of P which corresponds to "entry" state is zero
column of P which corresponds to "exit" state is zero
Multiplication by such P may decrease sum of vector components.
Such models arise when:
* there is some population of individuals
* individuals can have different states
* individuals can transit from one state to another
* population size is NOT constant
* at every moment of time there is some (unpredictable) amount of "new"
individuals, which can transit into one of the states at the next turn
* some individuals can move (predictably) into "exit" state and leave
population at the next turn
* you want to model transitions of individuals from one state into another,
including transitions from the "entry" state and into the "exit" state.
* but you do NOT want to predict amount of "new" individuals because it
does not depends on individuals already present (hence system can not
transit INTO entry state - it can only transit FROM it).
This model is discussed in more details in the ALGLIB User Guide (see
http://www.alglib.net/dataanalysis/ for more data).
INPUT PARAMETERS:
N - problem dimension, N>=2
EntryState- index of entry state, in 0..N-1
ExitState- index of exit state, in 0..N-1
OUTPUT PARAMETERS:
State - structure stores algorithm state
-- ALGLIB --
Copyright 23.05.2010 by Bochkanov Sergey
*************************************************************************/
void mcpdcreateentryexit(const ae_int_t n, const ae_int_t entrystate, const ae_int_t exitstate, mcpdstate &s, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function is used to add a track - sequence of system states at the
different moments of its evolution.
You may add one or several tracks to the MCPD solver. In case you have
several tracks, they won't overwrite each other. For example, if you pass
two tracks, A1-A2-A3 (system at t=A+1, t=A+2 and t=A+3) and B1-B2-B3, then
solver will try to model transitions from t=A+1 to t=A+2, t=A+2 to t=A+3,
t=B+1 to t=B+2, t=B+2 to t=B+3. But it WONT mix these two tracks - i.e. it
wont try to model transition from t=A+3 to t=B+1.
INPUT PARAMETERS:
S - solver
XY - track, array[K,N]:
* I-th row is a state at t=I
* elements of XY must be non-negative (exception will be
thrown on negative elements)
K - number of points in a track
* if given, only leading K rows of XY are used
* if not given, automatically determined from size of XY
NOTES:
1. Track may contain either proportional or population data:
* with proportional data all rows of XY must sum to 1.0, i.e. we have
proportions instead of absolute population values
* with population data rows of XY contain population counts and generally
do not sum to 1.0 (although they still must be non-negative)
-- ALGLIB --
Copyright 23.05.2010 by Bochkanov Sergey
*************************************************************************/
void mcpdaddtrack(const mcpdstate &s, const real_2d_array &xy, const ae_int_t k, const xparams _xparams = alglib::xdefault);
void mcpdaddtrack(const mcpdstate &s, const real_2d_array &xy, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function is used to add equality constraints on the elements of the
transition matrix P.
MCPD solver has four types of constraints which can be placed on P:
* user-specified equality constraints (optional)
* user-specified bound constraints (optional)
* user-specified general linear constraints (optional)
* basic constraints (always present):
* non-negativity: P[i,j]>=0
* consistency: every column of P sums to 1.0
Final constraints which are passed to the underlying optimizer are
calculated as intersection of all present constraints. For example, you
may specify boundary constraint on P[0,0] and equality one:
0.1<=P[0,0]<=0.9
P[0,0]=0.5
Such combination of constraints will be silently reduced to their
intersection, which is P[0,0]=0.5.
This function can be used to place equality constraints on arbitrary
subset of elements of P. Set of constraints is specified by EC, which may
contain either NAN's or finite numbers from [0,1]. NAN denotes absence of
constraint, finite number denotes equality constraint on specific element
of P.
You can also use MCPDAddEC() function which allows to ADD equality
constraint for one element of P without changing constraints for other
elements.
These functions (MCPDSetEC and MCPDAddEC) interact as follows:
* there is internal matrix of equality constraints which is stored in the
MCPD solver
* MCPDSetEC() replaces this matrix by another one (SET)
* MCPDAddEC() modifies one element of this matrix and leaves other ones
unchanged (ADD)
* thus MCPDAddEC() call preserves all modifications done by previous
calls, while MCPDSetEC() completely discards all changes done to the
equality constraints.
INPUT PARAMETERS:
S - solver
EC - equality constraints, array[N,N]. Elements of EC can be
either NAN's or finite numbers from [0,1]. NAN denotes
absence of constraints, while finite value denotes
equality constraint on the corresponding element of P.
NOTES:
1. infinite values of EC will lead to exception being thrown. Values less
than 0.0 or greater than 1.0 will lead to error code being returned after
call to MCPDSolve().
-- ALGLIB --
Copyright 23.05.2010 by Bochkanov Sergey
*************************************************************************/
void mcpdsetec(const mcpdstate &s, const real_2d_array &ec, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function is used to add equality constraints on the elements of the
transition matrix P.
MCPD solver has four types of constraints which can be placed on P:
* user-specified equality constraints (optional)
* user-specified bound constraints (optional)
* user-specified general linear constraints (optional)
* basic constraints (always present):
* non-negativity: P[i,j]>=0
* consistency: every column of P sums to 1.0
Final constraints which are passed to the underlying optimizer are
calculated as intersection of all present constraints. For example, you
may specify boundary constraint on P[0,0] and equality one:
0.1<=P[0,0]<=0.9
P[0,0]=0.5
Such combination of constraints will be silently reduced to their
intersection, which is P[0,0]=0.5.
This function can be used to ADD equality constraint for one element of P
without changing constraints for other elements.
You can also use MCPDSetEC() function which allows you to specify
arbitrary set of equality constraints in one call.
These functions (MCPDSetEC and MCPDAddEC) interact as follows:
* there is internal matrix of equality constraints which is stored in the
MCPD solver
* MCPDSetEC() replaces this matrix by another one (SET)
* MCPDAddEC() modifies one element of this matrix and leaves other ones
unchanged (ADD)
* thus MCPDAddEC() call preserves all modifications done by previous
calls, while MCPDSetEC() completely discards all changes done to the
equality constraints.
INPUT PARAMETERS:
S - solver
I - row index of element being constrained
J - column index of element being constrained
C - value (constraint for P[I,J]). Can be either NAN (no
constraint) or finite value from [0,1].
NOTES:
1. infinite values of C will lead to exception being thrown. Values less
than 0.0 or greater than 1.0 will lead to error code being returned after
call to MCPDSolve().
-- ALGLIB --
Copyright 23.05.2010 by Bochkanov Sergey
*************************************************************************/
void mcpdaddec(const mcpdstate &s, const ae_int_t i, const ae_int_t j, const double c, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function is used to add bound constraints on the elements of the
transition matrix P.
MCPD solver has four types of constraints which can be placed on P:
* user-specified equality constraints (optional)
* user-specified bound constraints (optional)
* user-specified general linear constraints (optional)
* basic constraints (always present):
* non-negativity: P[i,j]>=0
* consistency: every column of P sums to 1.0
Final constraints which are passed to the underlying optimizer are
calculated as intersection of all present constraints. For example, you
may specify boundary constraint on P[0,0] and equality one:
0.1<=P[0,0]<=0.9
P[0,0]=0.5
Such combination of constraints will be silently reduced to their
intersection, which is P[0,0]=0.5.
This function can be used to place bound constraints on arbitrary
subset of elements of P. Set of constraints is specified by BndL/BndU
matrices, which may contain arbitrary combination of finite numbers or
infinities (like -INF<x<=0.5 or 0.1<=x<+INF).
You can also use MCPDAddBC() function which allows to ADD bound constraint
for one element of P without changing constraints for other elements.
These functions (MCPDSetBC and MCPDAddBC) interact as follows:
* there is internal matrix of bound constraints which is stored in the
MCPD solver
* MCPDSetBC() replaces this matrix by another one (SET)
* MCPDAddBC() modifies one element of this matrix and leaves other ones
unchanged (ADD)
* thus MCPDAddBC() call preserves all modifications done by previous
calls, while MCPDSetBC() completely discards all changes done to the
equality constraints.
INPUT PARAMETERS:
S - solver
BndL - lower bounds constraints, array[N,N]. Elements of BndL can
be finite numbers or -INF.
BndU - upper bounds constraints, array[N,N]. Elements of BndU can
be finite numbers or +INF.
-- ALGLIB --
Copyright 23.05.2010 by Bochkanov Sergey
*************************************************************************/
void mcpdsetbc(const mcpdstate &s, const real_2d_array &bndl, const real_2d_array &bndu, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function is used to add bound constraints on the elements of the
transition matrix P.
MCPD solver has four types of constraints which can be placed on P:
* user-specified equality constraints (optional)
* user-specified bound constraints (optional)
* user-specified general linear constraints (optional)
* basic constraints (always present):
* non-negativity: P[i,j]>=0
* consistency: every column of P sums to 1.0
Final constraints which are passed to the underlying optimizer are
calculated as intersection of all present constraints. For example, you
may specify boundary constraint on P[0,0] and equality one:
0.1<=P[0,0]<=0.9
P[0,0]=0.5
Such combination of constraints will be silently reduced to their
intersection, which is P[0,0]=0.5.
This function can be used to ADD bound constraint for one element of P
without changing constraints for other elements.
You can also use MCPDSetBC() function which allows to place bound
constraints on arbitrary subset of elements of P. Set of constraints is
specified by BndL/BndU matrices, which may contain arbitrary combination
of finite numbers or infinities (like -INF<x<=0.5 or 0.1<=x<+INF).
These functions (MCPDSetBC and MCPDAddBC) interact as follows:
* there is internal matrix of bound constraints which is stored in the
MCPD solver
* MCPDSetBC() replaces this matrix by another one (SET)
* MCPDAddBC() modifies one element of this matrix and leaves other ones
unchanged (ADD)
* thus MCPDAddBC() call preserves all modifications done by previous
calls, while MCPDSetBC() completely discards all changes done to the
equality constraints.
INPUT PARAMETERS:
S - solver
I - row index of element being constrained
J - column index of element being constrained
BndL - lower bound
BndU - upper bound
-- ALGLIB --
Copyright 23.05.2010 by Bochkanov Sergey
*************************************************************************/
void mcpdaddbc(const mcpdstate &s, const ae_int_t i, const ae_int_t j, const double bndl, const double bndu, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function is used to set linear equality/inequality constraints on the
elements of the transition matrix P.
This function can be used to set one or several general linear constraints
on the elements of P. Two types of constraints are supported:
* equality constraints
* inequality constraints (both less-or-equal and greater-or-equal)
Coefficients of constraints are specified by matrix C (one of the
parameters). One row of C corresponds to one constraint. Because
transition matrix P has N*N elements, we need N*N columns to store all
coefficients (they are stored row by row), and one more column to store
right part - hence C has N*N+1 columns. Constraint kind is stored in the
CT array.
Thus, I-th linear constraint is
P[0,0]*C[I,0] + P[0,1]*C[I,1] + .. + P[0,N-1]*C[I,N-1] +
+ P[1,0]*C[I,N] + P[1,1]*C[I,N+1] + ... +
+ P[N-1,N-1]*C[I,N*N-1] ?=? C[I,N*N]
where ?=? can be either "=" (CT[i]=0), "<=" (CT[i]<0) or ">=" (CT[i]>0).
Your constraint may involve only some subset of P (less than N*N elements).
For example it can be something like
P[0,0] + P[0,1] = 0.5
In this case you still should pass matrix with N*N+1 columns, but all its
elements (except for C[0,0], C[0,1] and C[0,N*N-1]) will be zero.
INPUT PARAMETERS:
S - solver
C - array[K,N*N+1] - coefficients of constraints
(see above for complete description)
CT - array[K] - constraint types
(see above for complete description)
K - number of equality/inequality constraints, K>=0:
* if given, only leading K elements of C/CT are used
* if not given, automatically determined from sizes of C/CT
-- ALGLIB --
Copyright 23.05.2010 by Bochkanov Sergey
*************************************************************************/
void mcpdsetlc(const mcpdstate &s, const real_2d_array &c, const integer_1d_array &ct, const ae_int_t k, const xparams _xparams = alglib::xdefault);
void mcpdsetlc(const mcpdstate &s, const real_2d_array &c, const integer_1d_array &ct, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function allows to tune amount of Tikhonov regularization being
applied to your problem.
By default, regularizing term is equal to r*||P-prior_P||^2, where r is a
small non-zero value, P is transition matrix, prior_P is identity matrix,
||X||^2 is a sum of squared elements of X.
This function allows you to change coefficient r. You can also change
prior values with MCPDSetPrior() function.
INPUT PARAMETERS:
S - solver
V - regularization coefficient, finite non-negative value. It
is not recommended to specify zero value unless you are
pretty sure that you want it.
-- ALGLIB --
Copyright 23.05.2010 by Bochkanov Sergey
*************************************************************************/
void mcpdsettikhonovregularizer(const mcpdstate &s, const double v, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function allows to set prior values used for regularization of your
problem.
By default, regularizing term is equal to r*||P-prior_P||^2, where r is a
small non-zero value, P is transition matrix, prior_P is identity matrix,
||X||^2 is a sum of squared elements of X.
This function allows you to change prior values prior_P. You can also
change r with MCPDSetTikhonovRegularizer() function.
INPUT PARAMETERS:
S - solver
PP - array[N,N], matrix of prior values:
1. elements must be real numbers from [0,1]
2. columns must sum to 1.0.
First property is checked (exception is thrown otherwise),
while second one is not checked/enforced.
-- ALGLIB --
Copyright 23.05.2010 by Bochkanov Sergey
*************************************************************************/
void mcpdsetprior(const mcpdstate &s, const real_2d_array &pp, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function is used to change prediction weights
MCPD solver scales prediction errors as follows
Error(P) = ||W*(y-P*x)||^2
where
x is a system state at time t
y is a system state at time t+1
P is a transition matrix
W is a diagonal scaling matrix
By default, weights are chosen in order to minimize relative prediction
error instead of absolute one. For example, if one component of state is
about 0.5 in magnitude and another one is about 0.05, then algorithm will
make corresponding weights equal to 2.0 and 20.0.
INPUT PARAMETERS:
S - solver
PW - array[N], weights:
* must be non-negative values (exception will be thrown otherwise)
* zero values will be replaced by automatically chosen values
-- ALGLIB --
Copyright 23.05.2010 by Bochkanov Sergey
*************************************************************************/
void mcpdsetpredictionweights(const mcpdstate &s, const real_1d_array &pw, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function is used to start solution of the MCPD problem.
After return from this function, you can use MCPDResults() to get solution
and completion code.
-- ALGLIB --
Copyright 23.05.2010 by Bochkanov Sergey
*************************************************************************/
void mcpdsolve(const mcpdstate &s, const xparams _xparams = alglib::xdefault);
/*************************************************************************
MCPD results
INPUT PARAMETERS:
State - algorithm state
OUTPUT PARAMETERS:
P - array[N,N], transition matrix
Rep - optimization report. You should check Rep.TerminationType
in order to distinguish successful termination from
unsuccessful one. Speaking short, positive values denote
success, negative ones are failures.
More information about fields of this structure can be
found in the comments on MCPDReport datatype.
-- ALGLIB --
Copyright 23.05.2010 by Bochkanov Sergey
*************************************************************************/
void mcpdresults(const mcpdstate &s, real_2d_array &p, mcpdreport &rep, const xparams _xparams = alglib::xdefault);
#endif
#if defined(AE_COMPILE_MLPE) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
This function serializes data structure to string.
Important properties of s_out:
* it contains alphanumeric characters, dots, underscores, minus signs
* these symbols are grouped into words, which are separated by spaces
and Windows-style (CR+LF) newlines
* although serializer uses spaces and CR+LF as separators, you can
replace any separator character by arbitrary combination of spaces,
tabs, Windows or Unix newlines. It allows flexible reformatting of
the string in case you want to include it into text or XML file.
But you should not insert separators into the middle of the "words"
nor you should change case of letters.
* s_out can be freely moved between 32-bit and 64-bit systems, little
and big endian machines, and so on. You can serialize structure on
32-bit machine and unserialize it on 64-bit one (or vice versa), or
serialize it on SPARC and unserialize on x86. You can also
serialize it in C++ version of ALGLIB and unserialize in C# one,
and vice versa.
*************************************************************************/
void mlpeserialize(mlpensemble &obj, std::string &s_out);
/*************************************************************************
This function unserializes data structure from string.
*************************************************************************/
void mlpeunserialize(const std::string &s_in, mlpensemble &obj);
/*************************************************************************
This function serializes data structure to C++ stream.
Data stream generated by this function is same as string representation
generated by string version of serializer - alphanumeric characters,
dots, underscores, minus signs, which are grouped into words separated by
spaces and CR+LF.
We recommend you to read comments on string version of serializer to find
out more about serialization of AlGLIB objects.
*************************************************************************/
void mlpeserialize(mlpensemble &obj, std::ostream &s_out);
/*************************************************************************
This function unserializes data structure from stream.
*************************************************************************/
void mlpeunserialize(const std::istream &s_in, mlpensemble &obj);
/*************************************************************************
Like MLPCreate0, but for ensembles.
-- ALGLIB --
Copyright 18.02.2009 by Bochkanov Sergey
*************************************************************************/
void mlpecreate0(const ae_int_t nin, const ae_int_t nout, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Like MLPCreate1, but for ensembles.
-- ALGLIB --
Copyright 18.02.2009 by Bochkanov Sergey
*************************************************************************/
void mlpecreate1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Like MLPCreate2, but for ensembles.
-- ALGLIB --
Copyright 18.02.2009 by Bochkanov Sergey
*************************************************************************/
void mlpecreate2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Like MLPCreateB0, but for ensembles.
-- ALGLIB --
Copyright 18.02.2009 by Bochkanov Sergey
*************************************************************************/
void mlpecreateb0(const ae_int_t nin, const ae_int_t nout, const double b, const double d, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Like MLPCreateB1, but for ensembles.
-- ALGLIB --
Copyright 18.02.2009 by Bochkanov Sergey
*************************************************************************/
void mlpecreateb1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, const double b, const double d, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Like MLPCreateB2, but for ensembles.
-- ALGLIB --
Copyright 18.02.2009 by Bochkanov Sergey
*************************************************************************/
void mlpecreateb2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, const double b, const double d, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Like MLPCreateR0, but for ensembles.
-- ALGLIB --
Copyright 18.02.2009 by Bochkanov Sergey
*************************************************************************/
void mlpecreater0(const ae_int_t nin, const ae_int_t nout, const double a, const double b, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Like MLPCreateR1, but for ensembles.
-- ALGLIB --
Copyright 18.02.2009 by Bochkanov Sergey
*************************************************************************/
void mlpecreater1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, const double a, const double b, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Like MLPCreateR2, but for ensembles.
-- ALGLIB --
Copyright 18.02.2009 by Bochkanov Sergey
*************************************************************************/
void mlpecreater2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, const double a, const double b, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Like MLPCreateC0, but for ensembles.
-- ALGLIB --
Copyright 18.02.2009 by Bochkanov Sergey
*************************************************************************/
void mlpecreatec0(const ae_int_t nin, const ae_int_t nout, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Like MLPCreateC1, but for ensembles.
-- ALGLIB --
Copyright 18.02.2009 by Bochkanov Sergey
*************************************************************************/
void mlpecreatec1(const ae_int_t nin, const ae_int_t nhid, const ae_int_t nout, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Like MLPCreateC2, but for ensembles.
-- ALGLIB --
Copyright 18.02.2009 by Bochkanov Sergey
*************************************************************************/
void mlpecreatec2(const ae_int_t nin, const ae_int_t nhid1, const ae_int_t nhid2, const ae_int_t nout, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Creates ensemble from network. Only network geometry is copied.
-- ALGLIB --
Copyright 17.02.2009 by Bochkanov Sergey
*************************************************************************/
void mlpecreatefromnetwork(const multilayerperceptron &network, const ae_int_t ensemblesize, mlpensemble &ensemble, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Randomization of MLP ensemble
-- ALGLIB --
Copyright 17.02.2009 by Bochkanov Sergey
*************************************************************************/
void mlperandomize(const mlpensemble &ensemble, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Return ensemble properties (number of inputs and outputs).
-- ALGLIB --
Copyright 17.02.2009 by Bochkanov Sergey
*************************************************************************/
void mlpeproperties(const mlpensemble &ensemble, ae_int_t &nin, ae_int_t &nout, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Return normalization type (whether ensemble is SOFTMAX-normalized or not).
-- ALGLIB --
Copyright 17.02.2009 by Bochkanov Sergey
*************************************************************************/
bool mlpeissoftmax(const mlpensemble &ensemble, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Procesing
INPUT PARAMETERS:
Ensemble- neural networks ensemble
X - input vector, array[0..NIn-1].
Y - (possibly) preallocated buffer; if size of Y is less than
NOut, it will be reallocated. If it is large enough, it
is NOT reallocated, so we can save some time on reallocation.
OUTPUT PARAMETERS:
Y - result. Regression estimate when solving regression task,
vector of posterior probabilities for classification task.
-- ALGLIB --
Copyright 17.02.2009 by Bochkanov Sergey
*************************************************************************/
void mlpeprocess(const mlpensemble &ensemble, const real_1d_array &x, real_1d_array &y, const xparams _xparams = alglib::xdefault);
/*************************************************************************
'interactive' variant of MLPEProcess for languages like Python which
support constructs like "Y = MLPEProcess(LM,X)" and interactive mode of the
interpreter
This function allocates new array on each call, so it is significantly
slower than its 'non-interactive' counterpart, but it is more convenient
when you call it from command line.
-- ALGLIB --
Copyright 17.02.2009 by Bochkanov Sergey
*************************************************************************/
void mlpeprocessi(const mlpensemble &ensemble, const real_1d_array &x, real_1d_array &y, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Relative classification error on the test set
INPUT PARAMETERS:
Ensemble- ensemble
XY - test set
NPoints - test set size
RESULT:
percent of incorrectly classified cases.
Works both for classifier betwork and for regression networks which
are used as classifiers.
-- ALGLIB --
Copyright 17.02.2009 by Bochkanov Sergey
*************************************************************************/
double mlperelclserror(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Average cross-entropy (in bits per element) on the test set
INPUT PARAMETERS:
Ensemble- ensemble
XY - test set
NPoints - test set size
RESULT:
CrossEntropy/(NPoints*LN(2)).
Zero if ensemble solves regression task.
-- ALGLIB --
Copyright 17.02.2009 by Bochkanov Sergey
*************************************************************************/
double mlpeavgce(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
RMS error on the test set
INPUT PARAMETERS:
Ensemble- ensemble
XY - test set
NPoints - test set size
RESULT:
root mean square error.
Its meaning for regression task is obvious. As for classification task
RMS error means error when estimating posterior probabilities.
-- ALGLIB --
Copyright 17.02.2009 by Bochkanov Sergey
*************************************************************************/
double mlpermserror(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Average error on the test set
INPUT PARAMETERS:
Ensemble- ensemble
XY - test set
NPoints - test set size
RESULT:
Its meaning for regression task is obvious. As for classification task
it means average error when estimating posterior probabilities.
-- ALGLIB --
Copyright 17.02.2009 by Bochkanov Sergey
*************************************************************************/
double mlpeavgerror(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Average relative error on the test set
INPUT PARAMETERS:
Ensemble- ensemble
XY - test set
NPoints - test set size
RESULT:
Its meaning for regression task is obvious. As for classification task
it means average relative error when estimating posterior probabilities.
-- ALGLIB --
Copyright 17.02.2009 by Bochkanov Sergey
*************************************************************************/
double mlpeavgrelerror(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
#endif
#if defined(AE_COMPILE_MLPTRAIN) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
Neural network training using modified Levenberg-Marquardt with exact
Hessian calculation and regularization. Subroutine trains neural network
with restarts from random positions. Algorithm is well suited for small
and medium scale problems (hundreds of weights).
INPUT PARAMETERS:
Network - neural network with initialized geometry
XY - training set
NPoints - training set size
Decay - weight decay constant, >=0.001
Decay term 'Decay*||Weights||^2' is added to error
function.
If you don't know what Decay to choose, use 0.001.
Restarts - number of restarts from random position, >0.
If you don't know what Restarts to choose, use 2.
OUTPUT PARAMETERS:
Network - trained neural network.
Info - return code:
* -9, if internal matrix inverse subroutine failed
* -2, if there is a point with class number
outside of [0..NOut-1].
* -1, if wrong parameters specified
(NPoints<0, Restarts<1).
* 2, if task has been solved.
Rep - training report
-- ALGLIB --
Copyright 10.03.2009 by Bochkanov Sergey
*************************************************************************/
void mlptrainlm(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const double decay, const ae_int_t restarts, ae_int_t &info, mlpreport &rep, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Neural network training using L-BFGS algorithm with regularization.
Subroutine trains neural network with restarts from random positions.
Algorithm is well suited for problems of any dimensionality (memory
requirements and step complexity are linear by weights number).
INPUT PARAMETERS:
Network - neural network with initialized geometry
XY - training set
NPoints - training set size
Decay - weight decay constant, >=0.001
Decay term 'Decay*||Weights||^2' is added to error
function.
If you don't know what Decay to choose, use 0.001.
Restarts - number of restarts from random position, >0.
If you don't know what Restarts to choose, use 2.
WStep - stopping criterion. Algorithm stops if step size is
less than WStep. Recommended value - 0.01. Zero step
size means stopping after MaxIts iterations.
MaxIts - stopping criterion. Algorithm stops after MaxIts
iterations (NOT gradient calculations). Zero MaxIts
means stopping when step is sufficiently small.
OUTPUT PARAMETERS:
Network - trained neural network.
Info - return code:
* -8, if both WStep=0 and MaxIts=0
* -2, if there is a point with class number
outside of [0..NOut-1].
* -1, if wrong parameters specified
(NPoints<0, Restarts<1).
* 2, if task has been solved.
Rep - training report
-- ALGLIB --
Copyright 09.12.2007 by Bochkanov Sergey
*************************************************************************/
void mlptrainlbfgs(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const double decay, const ae_int_t restarts, const double wstep, const ae_int_t maxits, ae_int_t &info, mlpreport &rep, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Neural network training using early stopping (base algorithm - L-BFGS with
regularization).
INPUT PARAMETERS:
Network - neural network with initialized geometry
TrnXY - training set
TrnSize - training set size, TrnSize>0
ValXY - validation set
ValSize - validation set size, ValSize>0
Decay - weight decay constant, >=0.001
Decay term 'Decay*||Weights||^2' is added to error
function.
If you don't know what Decay to choose, use 0.001.
Restarts - number of restarts, either:
* strictly positive number - algorithm make specified
number of restarts from random position.
* -1, in which case algorithm makes exactly one run
from the initial state of the network (no randomization).
If you don't know what Restarts to choose, choose one
one the following:
* -1 (deterministic start)
* +1 (one random restart)
* +5 (moderate amount of random restarts)
OUTPUT PARAMETERS:
Network - trained neural network.
Info - return code:
* -2, if there is a point with class number
outside of [0..NOut-1].
* -1, if wrong parameters specified
(NPoints<0, Restarts<1, ...).
* 2, task has been solved, stopping criterion met -
sufficiently small step size. Not expected (we
use EARLY stopping) but possible and not an
error.
* 6, task has been solved, stopping criterion met -
increasing of validation set error.
Rep - training report
NOTE:
Algorithm stops if validation set error increases for a long enough or
step size is small enought (there are task where validation set may
decrease for eternity). In any case solution returned corresponds to the
minimum of validation set error.
-- ALGLIB --
Copyright 10.03.2009 by Bochkanov Sergey
*************************************************************************/
void mlptraines(const multilayerperceptron &network, const real_2d_array &trnxy, const ae_int_t trnsize, const real_2d_array &valxy, const ae_int_t valsize, const double decay, const ae_int_t restarts, ae_int_t &info, mlpreport &rep, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Cross-validation estimate of generalization error.
Base algorithm - L-BFGS.
INPUT PARAMETERS:
Network - neural network with initialized geometry. Network is
not changed during cross-validation - it is used only
as a representative of its architecture.
XY - training set.
SSize - training set size
Decay - weight decay, same as in MLPTrainLBFGS
Restarts - number of restarts, >0.
restarts are counted for each partition separately, so
total number of restarts will be Restarts*FoldsCount.
WStep - stopping criterion, same as in MLPTrainLBFGS
MaxIts - stopping criterion, same as in MLPTrainLBFGS
FoldsCount - number of folds in k-fold cross-validation,
2<=FoldsCount<=SSize.
recommended value: 10.
OUTPUT PARAMETERS:
Info - return code, same as in MLPTrainLBFGS
Rep - report, same as in MLPTrainLM/MLPTrainLBFGS
CVRep - generalization error estimates
-- ALGLIB --
Copyright 09.12.2007 by Bochkanov Sergey
*************************************************************************/
void mlpkfoldcvlbfgs(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const double decay, const ae_int_t restarts, const double wstep, const ae_int_t maxits, const ae_int_t foldscount, ae_int_t &info, mlpreport &rep, mlpcvreport &cvrep, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Cross-validation estimate of generalization error.
Base algorithm - Levenberg-Marquardt.
INPUT PARAMETERS:
Network - neural network with initialized geometry. Network is
not changed during cross-validation - it is used only
as a representative of its architecture.
XY - training set.
SSize - training set size
Decay - weight decay, same as in MLPTrainLBFGS
Restarts - number of restarts, >0.
restarts are counted for each partition separately, so
total number of restarts will be Restarts*FoldsCount.
FoldsCount - number of folds in k-fold cross-validation,
2<=FoldsCount<=SSize.
recommended value: 10.
OUTPUT PARAMETERS:
Info - return code, same as in MLPTrainLBFGS
Rep - report, same as in MLPTrainLM/MLPTrainLBFGS
CVRep - generalization error estimates
-- ALGLIB --
Copyright 09.12.2007 by Bochkanov Sergey
*************************************************************************/
void mlpkfoldcvlm(const multilayerperceptron &network, const real_2d_array &xy, const ae_int_t npoints, const double decay, const ae_int_t restarts, const ae_int_t foldscount, ae_int_t &info, mlpreport &rep, mlpcvreport &cvrep, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function estimates generalization error using cross-validation on the
current dataset with current training settings.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
S - trainer object
Network - neural network. It must have same number of inputs and
output/classes as was specified during creation of the
trainer object. Network is not changed during cross-
validation and is not trained - it is used only as
representative of its architecture. I.e., we estimate
generalization properties of ARCHITECTURE, not some
specific network.
NRestarts - number of restarts, >=0:
* NRestarts>0 means that for each cross-validation
round specified number of random restarts is
performed, with best network being chosen after
training.
* NRestarts=0 is same as NRestarts=1
FoldsCount - number of folds in k-fold cross-validation:
* 2<=FoldsCount<=size of dataset
* recommended value: 10.
* values larger than dataset size will be silently
truncated down to dataset size
OUTPUT PARAMETERS:
Rep - structure which contains cross-validation estimates:
* Rep.RelCLSError - fraction of misclassified cases.
* Rep.AvgCE - acerage cross-entropy
* Rep.RMSError - root-mean-square error
* Rep.AvgError - average error
* Rep.AvgRelError - average relative error
NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(),
or subset with only one point was given, zeros are returned as
estimates.
NOTE: this method performs FoldsCount cross-validation rounds, each one
with NRestarts random starts. Thus, FoldsCount*NRestarts networks
are trained in total.
NOTE: Rep.RelCLSError/Rep.AvgCE are zero on regression problems.
NOTE: on classification problems Rep.RMSError/Rep.AvgError/Rep.AvgRelError
contain errors in prediction of posterior probabilities.
-- ALGLIB --
Copyright 23.07.2012 by Bochkanov Sergey
*************************************************************************/
void mlpkfoldcv(const mlptrainer &s, const multilayerperceptron &network, const ae_int_t nrestarts, const ae_int_t foldscount, mlpreport &rep, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Creation of the network trainer object for regression networks
INPUT PARAMETERS:
NIn - number of inputs, NIn>=1
NOut - number of outputs, NOut>=1
OUTPUT PARAMETERS:
S - neural network trainer object.
This structure can be used to train any regression
network with NIn inputs and NOut outputs.
-- ALGLIB --
Copyright 23.07.2012 by Bochkanov Sergey
*************************************************************************/
void mlpcreatetrainer(const ae_int_t nin, const ae_int_t nout, mlptrainer &s, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Creation of the network trainer object for classification networks
INPUT PARAMETERS:
NIn - number of inputs, NIn>=1
NClasses - number of classes, NClasses>=2
OUTPUT PARAMETERS:
S - neural network trainer object.
This structure can be used to train any classification
network with NIn inputs and NOut outputs.
-- ALGLIB --
Copyright 23.07.2012 by Bochkanov Sergey
*************************************************************************/
void mlpcreatetrainercls(const ae_int_t nin, const ae_int_t nclasses, mlptrainer &s, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets "current dataset" of the trainer object to one passed
by user.
INPUT PARAMETERS:
S - trainer object
XY - training set, see below for information on the
training set format. This function checks correctness
of the dataset (no NANs/INFs, class numbers are
correct) and throws exception when incorrect dataset
is passed.
NPoints - points count, >=0.
DATASET FORMAT:
This function uses two different dataset formats - one for regression
networks, another one for classification networks.
For regression networks with NIn inputs and NOut outputs following dataset
format is used:
* dataset is given by NPoints*(NIn+NOut) matrix
* each row corresponds to one example
* first NIn columns are inputs, next NOut columns are outputs
For classification networks with NIn inputs and NClasses clases following
datasetformat is used:
* dataset is given by NPoints*(NIn+1) matrix
* each row corresponds to one example
* first NIn columns are inputs, last column stores class number (from 0 to
NClasses-1).
-- ALGLIB --
Copyright 23.07.2012 by Bochkanov Sergey
*************************************************************************/
void mlpsetdataset(const mlptrainer &s, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets "current dataset" of the trainer object to one passed
by user (sparse matrix is used to store dataset).
INPUT PARAMETERS:
S - trainer object
XY - training set, see below for information on the
training set format. This function checks correctness
of the dataset (no NANs/INFs, class numbers are
correct) and throws exception when incorrect dataset
is passed. Any sparse storage format can be used:
Hash-table, CRS...
NPoints - points count, >=0
DATASET FORMAT:
This function uses two different dataset formats - one for regression
networks, another one for classification networks.
For regression networks with NIn inputs and NOut outputs following dataset
format is used:
* dataset is given by NPoints*(NIn+NOut) matrix
* each row corresponds to one example
* first NIn columns are inputs, next NOut columns are outputs
For classification networks with NIn inputs and NClasses clases following
datasetformat is used:
* dataset is given by NPoints*(NIn+1) matrix
* each row corresponds to one example
* first NIn columns are inputs, last column stores class number (from 0 to
NClasses-1).
-- ALGLIB --
Copyright 23.07.2012 by Bochkanov Sergey
*************************************************************************/
void mlpsetsparsedataset(const mlptrainer &s, const sparsematrix &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets weight decay coefficient which is used for training.
INPUT PARAMETERS:
S - trainer object
Decay - weight decay coefficient, >=0. Weight decay term
'Decay*||Weights||^2' is added to error function. If
you don't know what Decay to choose, use 1.0E-3.
Weight decay can be set to zero, in this case network
is trained without weight decay.
NOTE: by default network uses some small nonzero value for weight decay.
-- ALGLIB --
Copyright 23.07.2012 by Bochkanov Sergey
*************************************************************************/
void mlpsetdecay(const mlptrainer &s, const double decay, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets stopping criteria for the optimizer.
INPUT PARAMETERS:
S - trainer object
WStep - stopping criterion. Algorithm stops if step size is
less than WStep. Recommended value - 0.01. Zero step
size means stopping after MaxIts iterations.
WStep>=0.
MaxIts - stopping criterion. Algorithm stops after MaxIts
epochs (full passes over entire dataset). Zero MaxIts
means stopping when step is sufficiently small.
MaxIts>=0.
NOTE: by default, WStep=0.005 and MaxIts=0 are used. These values are also
used when MLPSetCond() is called with WStep=0 and MaxIts=0.
NOTE: these stopping criteria are used for all kinds of neural training -
from "conventional" networks to early stopping ensembles. When used
for "conventional" networks, they are used as the only stopping
criteria. When combined with early stopping, they used as ADDITIONAL
stopping criteria which can terminate early stopping algorithm.
-- ALGLIB --
Copyright 23.07.2012 by Bochkanov Sergey
*************************************************************************/
void mlpsetcond(const mlptrainer &s, const double wstep, const ae_int_t maxits, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets training algorithm: batch training using L-BFGS will be
used.
This algorithm:
* the most robust for small-scale problems, but may be too slow for large
scale ones.
* perfoms full pass through the dataset before performing step
* uses conditions specified by MLPSetCond() for stopping
* is default one used by trainer object
INPUT PARAMETERS:
S - trainer object
-- ALGLIB --
Copyright 23.07.2012 by Bochkanov Sergey
*************************************************************************/
void mlpsetalgobatch(const mlptrainer &s, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function trains neural network passed to this function, using current
dataset (one which was passed to MLPSetDataset() or MLPSetSparseDataset())
and current training settings. Training from NRestarts random starting
positions is performed, best network is chosen.
Training is performed using current training algorithm.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
S - trainer object
Network - neural network. It must have same number of inputs and
output/classes as was specified during creation of the
trainer object.
NRestarts - number of restarts, >=0:
* NRestarts>0 means that specified number of random
restarts are performed, best network is chosen after
training
* NRestarts=0 means that current state of the network
is used for training.
OUTPUT PARAMETERS:
Network - trained network
NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(),
network is filled by zero values. Same behavior for functions
MLPStartTraining and MLPContinueTraining.
NOTE: this method uses sum-of-squares error function for training.
-- ALGLIB --
Copyright 23.07.2012 by Bochkanov Sergey
*************************************************************************/
void mlptrainnetwork(const mlptrainer &s, const multilayerperceptron &network, const ae_int_t nrestarts, mlpreport &rep, const xparams _xparams = alglib::xdefault);
/*************************************************************************
IMPORTANT: this is an "expert" version of the MLPTrain() function. We do
not recommend you to use it unless you are pretty sure that you
need ability to monitor training progress.
This function performs step-by-step training of the neural network. Here
"step-by-step" means that training starts with MLPStartTraining() call,
and then user subsequently calls MLPContinueTraining() to perform one more
iteration of the training.
After call to this function trainer object remembers network and is ready
to train it. However, no training is performed until first call to
MLPContinueTraining() function. Subsequent calls to MLPContinueTraining()
will advance training progress one iteration further.
EXAMPLE:
>
> ...initialize network and trainer object....
>
> MLPStartTraining(Trainer, Network, True)
> while MLPContinueTraining(Trainer, Network) do
> ...visualize training progress...
>
INPUT PARAMETERS:
S - trainer object
Network - neural network. It must have same number of inputs and
output/classes as was specified during creation of the
trainer object.
RandomStart - randomize network before training or not:
* True means that network is randomized and its
initial state (one which was passed to the trainer
object) is lost.
* False means that training is started from the
current state of the network
OUTPUT PARAMETERS:
Network - neural network which is ready to training (weights are
initialized, preprocessor is initialized using current
training set)
NOTE: this method uses sum-of-squares error function for training.
NOTE: it is expected that trainer object settings are NOT changed during
step-by-step training, i.e. no one changes stopping criteria or
training set during training. It is possible and there is no defense
against such actions, but algorithm behavior in such cases is
undefined and can be unpredictable.
-- ALGLIB --
Copyright 23.07.2012 by Bochkanov Sergey
*************************************************************************/
void mlpstarttraining(const mlptrainer &s, const multilayerperceptron &network, const bool randomstart, const xparams _xparams = alglib::xdefault);
/*************************************************************************
IMPORTANT: this is an "expert" version of the MLPTrain() function. We do
not recommend you to use it unless you are pretty sure that you
need ability to monitor training progress.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
This function performs step-by-step training of the neural network. Here
"step-by-step" means that training starts with MLPStartTraining() call,
and then user subsequently calls MLPContinueTraining() to perform one more
iteration of the training.
This function performs one more iteration of the training and returns
either True (training continues) or False (training stopped). In case True
was returned, Network weights are updated according to the current state
of the optimization progress. In case False was returned, no additional
updates is performed (previous update of the network weights moved us to
the final point, and no additional updates is needed).
EXAMPLE:
>
> [initialize network and trainer object]
>
> MLPStartTraining(Trainer, Network, True)
> while MLPContinueTraining(Trainer, Network) do
> [visualize training progress]
>
INPUT PARAMETERS:
S - trainer object
Network - neural network structure, which is used to store
current state of the training process.
OUTPUT PARAMETERS:
Network - weights of the neural network are rewritten by the
current approximation.
NOTE: this method uses sum-of-squares error function for training.
NOTE: it is expected that trainer object settings are NOT changed during
step-by-step training, i.e. no one changes stopping criteria or
training set during training. It is possible and there is no defense
against such actions, but algorithm behavior in such cases is
undefined and can be unpredictable.
NOTE: It is expected that Network is the same one which was passed to
MLPStartTraining() function. However, THIS function checks only
following:
* that number of network inputs is consistent with trainer object
settings
* that number of network outputs/classes is consistent with trainer
object settings
* that number of network weights is the same as number of weights in
the network passed to MLPStartTraining() function
Exception is thrown when these conditions are violated.
It is also expected that you do not change state of the network on
your own - the only party who has right to change network during its
training is a trainer object. Any attempt to interfere with trainer
may lead to unpredictable results.
-- ALGLIB --
Copyright 23.07.2012 by Bochkanov Sergey
*************************************************************************/
bool mlpcontinuetraining(const mlptrainer &s, const multilayerperceptron &network, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Training neural networks ensemble using bootstrap aggregating (bagging).
Modified Levenberg-Marquardt algorithm is used as base training method.
INPUT PARAMETERS:
Ensemble - model with initialized geometry
XY - training set
NPoints - training set size
Decay - weight decay coefficient, >=0.001
Restarts - restarts, >0.
OUTPUT PARAMETERS:
Ensemble - trained model
Info - return code:
* -2, if there is a point with class number
outside of [0..NClasses-1].
* -1, if incorrect parameters was passed
(NPoints<0, Restarts<1).
* 2, if task has been solved.
Rep - training report.
OOBErrors - out-of-bag generalization error estimate
-- ALGLIB --
Copyright 17.02.2009 by Bochkanov Sergey
*************************************************************************/
void mlpebagginglm(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const double decay, const ae_int_t restarts, ae_int_t &info, mlpreport &rep, mlpcvreport &ooberrors, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Training neural networks ensemble using bootstrap aggregating (bagging).
L-BFGS algorithm is used as base training method.
INPUT PARAMETERS:
Ensemble - model with initialized geometry
XY - training set
NPoints - training set size
Decay - weight decay coefficient, >=0.001
Restarts - restarts, >0.
WStep - stopping criterion, same as in MLPTrainLBFGS
MaxIts - stopping criterion, same as in MLPTrainLBFGS
OUTPUT PARAMETERS:
Ensemble - trained model
Info - return code:
* -8, if both WStep=0 and MaxIts=0
* -2, if there is a point with class number
outside of [0..NClasses-1].
* -1, if incorrect parameters was passed
(NPoints<0, Restarts<1).
* 2, if task has been solved.
Rep - training report.
OOBErrors - out-of-bag generalization error estimate
-- ALGLIB --
Copyright 17.02.2009 by Bochkanov Sergey
*************************************************************************/
void mlpebagginglbfgs(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const double decay, const ae_int_t restarts, const double wstep, const ae_int_t maxits, ae_int_t &info, mlpreport &rep, mlpcvreport &ooberrors, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Training neural networks ensemble using early stopping.
INPUT PARAMETERS:
Ensemble - model with initialized geometry
XY - training set
NPoints - training set size
Decay - weight decay coefficient, >=0.001
Restarts - restarts, >0.
OUTPUT PARAMETERS:
Ensemble - trained model
Info - return code:
* -2, if there is a point with class number
outside of [0..NClasses-1].
* -1, if incorrect parameters was passed
(NPoints<0, Restarts<1).
* 6, if task has been solved.
Rep - training report.
OOBErrors - out-of-bag generalization error estimate
-- ALGLIB --
Copyright 10.03.2009 by Bochkanov Sergey
*************************************************************************/
void mlpetraines(const mlpensemble &ensemble, const real_2d_array &xy, const ae_int_t npoints, const double decay, const ae_int_t restarts, ae_int_t &info, mlpreport &rep, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function trains neural network ensemble passed to this function using
current dataset and early stopping training algorithm. Each early stopping
round performs NRestarts random restarts (thus, EnsembleSize*NRestarts
training rounds is performed in total).
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
S - trainer object;
Ensemble - neural network ensemble. It must have same number of
inputs and outputs/classes as was specified during
creation of the trainer object.
NRestarts - number of restarts, >=0:
* NRestarts>0 means that specified number of random
restarts are performed during each ES round;
* NRestarts=0 is silently replaced by 1.
OUTPUT PARAMETERS:
Ensemble - trained ensemble;
Rep - it contains all type of errors.
NOTE: this training method uses BOTH early stopping and weight decay! So,
you should select weight decay before starting training just as you
select it before training "conventional" networks.
NOTE: when no dataset was specified with MLPSetDataset/SetSparseDataset(),
or single-point dataset was passed, ensemble is filled by zero
values.
NOTE: this method uses sum-of-squares error function for training.
-- ALGLIB --
Copyright 22.08.2012 by Bochkanov Sergey
*************************************************************************/
void mlptrainensemblees(const mlptrainer &s, const mlpensemble &ensemble, const ae_int_t nrestarts, mlpreport &rep, const xparams _xparams = alglib::xdefault);
#endif
#if defined(AE_COMPILE_CLUSTERING) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
This function initializes clusterizer object. Newly initialized object is
empty, i.e. it does not contain dataset. You should use it as follows:
1. creation
2. dataset is added with ClusterizerSetPoints()
3. additional parameters are set
3. clusterization is performed with one of the clustering functions
-- ALGLIB --
Copyright 10.07.2012 by Bochkanov Sergey
*************************************************************************/
void clusterizercreate(clusterizerstate &s, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function adds dataset to the clusterizer structure.
This function overrides all previous calls of ClusterizerSetPoints() or
ClusterizerSetDistances().
INPUT PARAMETERS:
S - clusterizer state, initialized by ClusterizerCreate()
XY - array[NPoints,NFeatures], dataset
NPoints - number of points, >=0
NFeatures- number of features, >=1
DistType- distance function:
* 0 Chebyshev distance (L-inf norm)
* 1 city block distance (L1 norm)
* 2 Euclidean distance (L2 norm), non-squared
* 10 Pearson correlation:
dist(a,b) = 1-corr(a,b)
* 11 Absolute Pearson correlation:
dist(a,b) = 1-|corr(a,b)|
* 12 Uncentered Pearson correlation (cosine of the angle):
dist(a,b) = a'*b/(|a|*|b|)
* 13 Absolute uncentered Pearson correlation
dist(a,b) = |a'*b|/(|a|*|b|)
* 20 Spearman rank correlation:
dist(a,b) = 1-rankcorr(a,b)
* 21 Absolute Spearman rank correlation
dist(a,b) = 1-|rankcorr(a,b)|
NOTE 1: different distance functions have different performance penalty:
* Euclidean or Pearson correlation distances are the fastest ones
* Spearman correlation distance function is a bit slower
* city block and Chebyshev distances are order of magnitude slower
The reason behing difference in performance is that correlation-based
distance functions are computed using optimized linear algebra kernels,
while Chebyshev and city block distance functions are computed using
simple nested loops with two branches at each iteration.
NOTE 2: different clustering algorithms have different limitations:
* agglomerative hierarchical clustering algorithms may be used with
any kind of distance metric
* k-means++ clustering algorithm may be used only with Euclidean
distance function
Thus, list of specific clustering algorithms you may use depends
on distance function you specify when you set your dataset.
-- ALGLIB --
Copyright 10.07.2012 by Bochkanov Sergey
*************************************************************************/
void clusterizersetpoints(const clusterizerstate &s, const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nfeatures, const ae_int_t disttype, const xparams _xparams = alglib::xdefault);
void clusterizersetpoints(const clusterizerstate &s, const real_2d_array &xy, const ae_int_t disttype, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function adds dataset given by distance matrix to the clusterizer
structure. It is important that dataset is not given explicitly - only
distance matrix is given.
This function overrides all previous calls of ClusterizerSetPoints() or
ClusterizerSetDistances().
INPUT PARAMETERS:
S - clusterizer state, initialized by ClusterizerCreate()
D - array[NPoints,NPoints], distance matrix given by its upper
or lower triangle (main diagonal is ignored because its
entries are expected to be zero).
NPoints - number of points
IsUpper - whether upper or lower triangle of D is given.
NOTE 1: different clustering algorithms have different limitations:
* agglomerative hierarchical clustering algorithms may be used with
any kind of distance metric, including one which is given by
distance matrix
* k-means++ clustering algorithm may be used only with Euclidean
distance function and explicitly given points - it can not be
used with dataset given by distance matrix
Thus, if you call this function, you will be unable to use k-means
clustering algorithm to process your problem.
-- ALGLIB --
Copyright 10.07.2012 by Bochkanov Sergey
*************************************************************************/
void clusterizersetdistances(const clusterizerstate &s, const real_2d_array &d, const ae_int_t npoints, const bool isupper, const xparams _xparams = alglib::xdefault);
void clusterizersetdistances(const clusterizerstate &s, const real_2d_array &d, const bool isupper, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets agglomerative hierarchical clustering algorithm
INPUT PARAMETERS:
S - clusterizer state, initialized by ClusterizerCreate()
Algo - algorithm type:
* 0 complete linkage (default algorithm)
* 1 single linkage
* 2 unweighted average linkage
* 3 weighted average linkage
* 4 Ward's method
NOTE: Ward's method works correctly only with Euclidean distance, that's
why algorithm will return negative termination code (failure) for
any other distance type.
It is possible, however, to use this method with user-supplied
distance matrix. It is your responsibility to pass one which was
calculated with Euclidean distance function.
-- ALGLIB --
Copyright 10.07.2012 by Bochkanov Sergey
*************************************************************************/
void clusterizersetahcalgo(const clusterizerstate &s, const ae_int_t algo, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets k-means properties: number of restarts and maximum
number of iterations per one run.
INPUT PARAMETERS:
S - clusterizer state, initialized by ClusterizerCreate()
Restarts- restarts count, >=1.
k-means++ algorithm performs several restarts and chooses
best set of centers (one with minimum squared distance).
MaxIts - maximum number of k-means iterations performed during one
run. >=0, zero value means that algorithm performs unlimited
number of iterations.
-- ALGLIB --
Copyright 10.07.2012 by Bochkanov Sergey
*************************************************************************/
void clusterizersetkmeanslimits(const clusterizerstate &s, const ae_int_t restarts, const ae_int_t maxits, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets k-means initialization algorithm. Several different
algorithms can be chosen, including k-means++.
INPUT PARAMETERS:
S - clusterizer state, initialized by ClusterizerCreate()
InitAlgo- initialization algorithm:
* 0 automatic selection ( different versions of ALGLIB
may select different algorithms)
* 1 random initialization
* 2 k-means++ initialization (best quality of initial
centers, but long non-parallelizable initialization
phase with bad cache locality)
* 3 "fast-greedy" algorithm with efficient, easy to
parallelize initialization. Quality of initial centers
is somewhat worse than that of k-means++. This
algorithm is a default one in the current version of
ALGLIB.
*-1 "debug" algorithm which always selects first K rows
of dataset; this algorithm is used for debug purposes
only. Do not use it in the industrial code!
-- ALGLIB --
Copyright 21.01.2015 by Bochkanov Sergey
*************************************************************************/
void clusterizersetkmeansinit(const clusterizerstate &s, const ae_int_t initalgo, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets seed which is used to initialize internal RNG. By
default, deterministic seed is used - same for each run of clusterizer. If
you specify non-deterministic seed value, then some algorithms which
depend on random initialization (in current version: k-means) may return
slightly different results after each run.
INPUT PARAMETERS:
S - clusterizer state, initialized by ClusterizerCreate()
Seed - seed:
* positive values = use deterministic seed for each run of
algorithms which depend on random initialization
* zero or negative values = use non-deterministic seed
-- ALGLIB --
Copyright 08.06.2017 by Bochkanov Sergey
*************************************************************************/
void clusterizersetseed(const clusterizerstate &s, const ae_int_t seed, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function performs agglomerative hierarchical clustering
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
! * hardware vendor (Intel) implementations of linear algebra primitives
! (C++ and C# versions, x86/x64 platform)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
NOTE: Agglomerative hierarchical clustering algorithm has two phases:
distance matrix calculation and clustering itself. Only first phase
(distance matrix calculation) is accelerated by Intel MKL and
multithreading. Thus, acceleration is significant only for medium or
high-dimensional problems.
Although activating multithreading gives some speedup over single-
threaded execution, you should not expect nearly-linear scaling
with respect to cores count.
INPUT PARAMETERS:
S - clusterizer state, initialized by ClusterizerCreate()
OUTPUT PARAMETERS:
Rep - clustering results; see description of AHCReport
structure for more information.
NOTE 1: hierarchical clustering algorithms require large amounts of memory.
In particular, this implementation needs sizeof(double)*NPoints^2
bytes, which are used to store distance matrix. In case we work
with user-supplied matrix, this amount is multiplied by 2 (we have
to store original matrix and to work with its copy).
For example, problem with 10000 points would require 800M of RAM,
even when working in a 1-dimensional space.
-- ALGLIB --
Copyright 10.07.2012 by Bochkanov Sergey
*************************************************************************/
void clusterizerrunahc(const clusterizerstate &s, ahcreport &rep, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function performs clustering by k-means++ algorithm.
You may change algorithm properties by calling:
* ClusterizerSetKMeansLimits() to change number of restarts or iterations
* ClusterizerSetKMeansInit() to change initialization algorithm
By default, one restart and unlimited number of iterations are used.
Initialization algorithm is chosen automatically.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
! * hardware vendor (Intel) implementations of linear algebra primitives
! (C++ and C# versions, x86/x64 platform)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
NOTE: k-means clustering algorithm has two phases: selection of initial
centers and clustering itself. ALGLIB parallelizes both phases.
Parallel version is optimized for the following scenario: medium or
high-dimensional problem (8 or more dimensions) with large number of
points and clusters. However, some speed-up can be obtained even
when assumptions above are violated.
INPUT PARAMETERS:
S - clusterizer state, initialized by ClusterizerCreate()
K - number of clusters, K>=0.
K can be zero only when algorithm is called for empty
dataset, in this case completion code is set to
success (+1).
If K=0 and dataset size is non-zero, we can not
meaningfully assign points to some center (there are no
centers because K=0) and return -3 as completion code
(failure).
OUTPUT PARAMETERS:
Rep - clustering results; see description of KMeansReport
structure for more information.
NOTE 1: k-means clustering can be performed only for datasets with
Euclidean distance function. Algorithm will return negative
completion code in Rep.TerminationType in case dataset was added
to clusterizer with DistType other than Euclidean (or dataset was
specified by distance matrix instead of explicitly given points).
NOTE 2: by default, k-means uses non-deterministic seed to initialize RNG
which is used to select initial centers. As result, each run of
algorithm may return different values. If you need deterministic
behavior, use ClusterizerSetSeed() function.
-- ALGLIB --
Copyright 10.07.2012 by Bochkanov Sergey
*************************************************************************/
void clusterizerrunkmeans(const clusterizerstate &s, const ae_int_t k, kmeansreport &rep, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function returns distance matrix for dataset
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
! * hardware vendor (Intel) implementations of linear algebra primitives
! (C++ and C# versions, x86/x64 platform)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
XY - array[NPoints,NFeatures], dataset
NPoints - number of points, >=0
NFeatures- number of features, >=1
DistType- distance function:
* 0 Chebyshev distance (L-inf norm)
* 1 city block distance (L1 norm)
* 2 Euclidean distance (L2 norm, non-squared)
* 10 Pearson correlation:
dist(a,b) = 1-corr(a,b)
* 11 Absolute Pearson correlation:
dist(a,b) = 1-|corr(a,b)|
* 12 Uncentered Pearson correlation (cosine of the angle):
dist(a,b) = a'*b/(|a|*|b|)
* 13 Absolute uncentered Pearson correlation
dist(a,b) = |a'*b|/(|a|*|b|)
* 20 Spearman rank correlation:
dist(a,b) = 1-rankcorr(a,b)
* 21 Absolute Spearman rank correlation
dist(a,b) = 1-|rankcorr(a,b)|
OUTPUT PARAMETERS:
D - array[NPoints,NPoints], distance matrix
(full matrix is returned, with lower and upper triangles)
NOTE: different distance functions have different performance penalty:
* Euclidean or Pearson correlation distances are the fastest ones
* Spearman correlation distance function is a bit slower
* city block and Chebyshev distances are order of magnitude slower
The reason behing difference in performance is that correlation-based
distance functions are computed using optimized linear algebra kernels,
while Chebyshev and city block distance functions are computed using
simple nested loops with two branches at each iteration.
-- ALGLIB --
Copyright 10.07.2012 by Bochkanov Sergey
*************************************************************************/
void clusterizergetdistances(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nfeatures, const ae_int_t disttype, real_2d_array &d, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function takes as input clusterization report Rep, desired clusters
count K, and builds top K clusters from hierarchical clusterization tree.
It returns assignment of points to clusters (array of cluster indexes).
INPUT PARAMETERS:
Rep - report from ClusterizerRunAHC() performed on XY
K - desired number of clusters, 1<=K<=NPoints.
K can be zero only when NPoints=0.
OUTPUT PARAMETERS:
CIdx - array[NPoints], I-th element contains cluster index (from
0 to K-1) for I-th point of the dataset.
CZ - array[K]. This array allows to convert cluster indexes
returned by this function to indexes used by Rep.Z. J-th
cluster returned by this function corresponds to CZ[J]-th
cluster stored in Rep.Z/PZ/PM.
It is guaranteed that CZ[I]<CZ[I+1].
NOTE: K clusters built by this subroutine are assumed to have no hierarchy.
Although they were obtained by manipulation with top K nodes of
dendrogram (i.e. hierarchical decomposition of dataset), this
function does not return information about hierarchy. Each of the
clusters stand on its own.
NOTE: Cluster indexes returned by this function does not correspond to
indexes returned in Rep.Z/PZ/PM. Either you work with hierarchical
representation of the dataset (dendrogram), or you work with "flat"
representation returned by this function. Each of representations
has its own clusters indexing system (former uses [0, 2*NPoints-2]),
while latter uses [0..K-1]), although it is possible to perform
conversion from one system to another by means of CZ array, returned
by this function, which allows you to convert indexes stored in CIdx
to the numeration system used by Rep.Z.
NOTE: this subroutine is optimized for moderate values of K. Say, for K=5
it will perform many times faster than for K=100. Its worst-case
performance is O(N*K), although in average case it perform better
(up to O(N*log(K))).
-- ALGLIB --
Copyright 10.07.2012 by Bochkanov Sergey
*************************************************************************/
void clusterizergetkclusters(const ahcreport &rep, const ae_int_t k, integer_1d_array &cidx, integer_1d_array &cz, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function accepts AHC report Rep, desired minimum intercluster
distance and returns top clusters from hierarchical clusterization tree
which are separated by distance R or HIGHER.
It returns assignment of points to clusters (array of cluster indexes).
There is one more function with similar name - ClusterizerSeparatedByCorr,
which returns clusters with intercluster correlation equal to R or LOWER
(note: higher for distance, lower for correlation).
INPUT PARAMETERS:
Rep - report from ClusterizerRunAHC() performed on XY
R - desired minimum intercluster distance, R>=0
OUTPUT PARAMETERS:
K - number of clusters, 1<=K<=NPoints
CIdx - array[NPoints], I-th element contains cluster index (from
0 to K-1) for I-th point of the dataset.
CZ - array[K]. This array allows to convert cluster indexes
returned by this function to indexes used by Rep.Z. J-th
cluster returned by this function corresponds to CZ[J]-th
cluster stored in Rep.Z/PZ/PM.
It is guaranteed that CZ[I]<CZ[I+1].
NOTE: K clusters built by this subroutine are assumed to have no hierarchy.
Although they were obtained by manipulation with top K nodes of
dendrogram (i.e. hierarchical decomposition of dataset), this
function does not return information about hierarchy. Each of the
clusters stand on its own.
NOTE: Cluster indexes returned by this function does not correspond to
indexes returned in Rep.Z/PZ/PM. Either you work with hierarchical
representation of the dataset (dendrogram), or you work with "flat"
representation returned by this function. Each of representations
has its own clusters indexing system (former uses [0, 2*NPoints-2]),
while latter uses [0..K-1]), although it is possible to perform
conversion from one system to another by means of CZ array, returned
by this function, which allows you to convert indexes stored in CIdx
to the numeration system used by Rep.Z.
NOTE: this subroutine is optimized for moderate values of K. Say, for K=5
it will perform many times faster than for K=100. Its worst-case
performance is O(N*K), although in average case it perform better
(up to O(N*log(K))).
-- ALGLIB --
Copyright 10.07.2012 by Bochkanov Sergey
*************************************************************************/
void clusterizerseparatedbydist(const ahcreport &rep, const double r, ae_int_t &k, integer_1d_array &cidx, integer_1d_array &cz, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function accepts AHC report Rep, desired maximum intercluster
correlation and returns top clusters from hierarchical clusterization tree
which are separated by correlation R or LOWER.
It returns assignment of points to clusters (array of cluster indexes).
There is one more function with similar name - ClusterizerSeparatedByDist,
which returns clusters with intercluster distance equal to R or HIGHER
(note: higher for distance, lower for correlation).
INPUT PARAMETERS:
Rep - report from ClusterizerRunAHC() performed on XY
R - desired maximum intercluster correlation, -1<=R<=+1
OUTPUT PARAMETERS:
K - number of clusters, 1<=K<=NPoints
CIdx - array[NPoints], I-th element contains cluster index (from
0 to K-1) for I-th point of the dataset.
CZ - array[K]. This array allows to convert cluster indexes
returned by this function to indexes used by Rep.Z. J-th
cluster returned by this function corresponds to CZ[J]-th
cluster stored in Rep.Z/PZ/PM.
It is guaranteed that CZ[I]<CZ[I+1].
NOTE: K clusters built by this subroutine are assumed to have no hierarchy.
Although they were obtained by manipulation with top K nodes of
dendrogram (i.e. hierarchical decomposition of dataset), this
function does not return information about hierarchy. Each of the
clusters stand on its own.
NOTE: Cluster indexes returned by this function does not correspond to
indexes returned in Rep.Z/PZ/PM. Either you work with hierarchical
representation of the dataset (dendrogram), or you work with "flat"
representation returned by this function. Each of representations
has its own clusters indexing system (former uses [0, 2*NPoints-2]),
while latter uses [0..K-1]), although it is possible to perform
conversion from one system to another by means of CZ array, returned
by this function, which allows you to convert indexes stored in CIdx
to the numeration system used by Rep.Z.
NOTE: this subroutine is optimized for moderate values of K. Say, for K=5
it will perform many times faster than for K=100. Its worst-case
performance is O(N*K), although in average case it perform better
(up to O(N*log(K))).
-- ALGLIB --
Copyright 10.07.2012 by Bochkanov Sergey
*************************************************************************/
void clusterizerseparatedbycorr(const ahcreport &rep, const double r, ae_int_t &k, integer_1d_array &cidx, integer_1d_array &cz, const xparams _xparams = alglib::xdefault);
#endif
#if defined(AE_COMPILE_DFOREST) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
This function serializes data structure to string.
Important properties of s_out:
* it contains alphanumeric characters, dots, underscores, minus signs
* these symbols are grouped into words, which are separated by spaces
and Windows-style (CR+LF) newlines
* although serializer uses spaces and CR+LF as separators, you can
replace any separator character by arbitrary combination of spaces,
tabs, Windows or Unix newlines. It allows flexible reformatting of
the string in case you want to include it into text or XML file.
But you should not insert separators into the middle of the "words"
nor you should change case of letters.
* s_out can be freely moved between 32-bit and 64-bit systems, little
and big endian machines, and so on. You can serialize structure on
32-bit machine and unserialize it on 64-bit one (or vice versa), or
serialize it on SPARC and unserialize on x86. You can also
serialize it in C++ version of ALGLIB and unserialize in C# one,
and vice versa.
*************************************************************************/
void dfserialize(decisionforest &obj, std::string &s_out);
/*************************************************************************
This function unserializes data structure from string.
*************************************************************************/
void dfunserialize(const std::string &s_in, decisionforest &obj);
/*************************************************************************
This function serializes data structure to C++ stream.
Data stream generated by this function is same as string representation
generated by string version of serializer - alphanumeric characters,
dots, underscores, minus signs, which are grouped into words separated by
spaces and CR+LF.
We recommend you to read comments on string version of serializer to find
out more about serialization of AlGLIB objects.
*************************************************************************/
void dfserialize(decisionforest &obj, std::ostream &s_out);
/*************************************************************************
This function unserializes data structure from stream.
*************************************************************************/
void dfunserialize(const std::istream &s_in, decisionforest &obj);
/*************************************************************************
This function creates buffer structure which can be used to perform
parallel inference requests.
DF subpackage provides two sets of computing functions - ones which use
internal buffer of DF model (these functions are single-threaded because
they use same buffer, which can not shared between threads), and ones
which use external buffer.
This function is used to initialize external buffer.
INPUT PARAMETERS
Model - DF model which is associated with newly created buffer
OUTPUT PARAMETERS
Buf - external buffer.
IMPORTANT: buffer object should be used only with model which was used to
initialize buffer. Any attempt to use buffer with different
object is dangerous - you may get integrity check failure
(exception) because sizes of internal arrays do not fit to
dimensions of the model structure.
-- ALGLIB --
Copyright 15.02.2019 by Bochkanov Sergey
*************************************************************************/
void dfcreatebuffer(const decisionforest &model, decisionforestbuffer &buf, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This subroutine creates DecisionForestBuilder object which is used to
train decision forests.
By default, new builder stores empty dataset and some reasonable default
settings. At the very least, you should specify dataset prior to building
decision forest. You can also tweak settings of the forest construction
algorithm (recommended, although default setting should work well).
Following actions are mandatory:
* calling dfbuildersetdataset() to specify dataset
* calling dfbuilderbuildrandomforest() to build decision forest using
current dataset and default settings
Additionally, you may call:
* dfbuildersetrndvars() or dfbuildersetrndvarsratio() to specify number of
variables randomly chosen for each split
* dfbuildersetsubsampleratio() to specify fraction of the dataset randomly
subsampled to build each tree
* dfbuildersetseed() to control random seed chosen for tree construction
INPUT PARAMETERS:
none
OUTPUT PARAMETERS:
S - decision forest builder
-- ALGLIB --
Copyright 21.05.2018 by Bochkanov Sergey
*************************************************************************/
void dfbuildercreate(decisionforestbuilder &s, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This subroutine adds dense dataset to the internal storage of the builder
object. Specifying your dataset in the dense format means that the dense
version of the forest construction algorithm will be invoked.
INPUT PARAMETERS:
S - decision forest builder object
XY - array[NPoints,NVars+1] (minimum size; actual size can
be larger, only leading part is used anyway), dataset:
* first NVars elements of each row store values of the
independent variables
* last column store class number (in 0...NClasses-1)
or real value of the dependent variable
NPoints - number of rows in the dataset, NPoints>=1
NVars - number of independent variables, NVars>=1
NClasses - indicates type of the problem being solved:
* NClasses>=2 means that classification problem is
solved (last column of the dataset stores class
number)
* NClasses=1 means that regression problem is solved
(last column of the dataset stores variable value)
OUTPUT PARAMETERS:
S - decision forest builder
-- ALGLIB --
Copyright 21.05.2018 by Bochkanov Sergey
*************************************************************************/
void dfbuildersetdataset(const decisionforestbuilder &s, const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nclasses, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets number of variables (in [1,NVars] range) used by
decision forest construction algorithm.
The default option is to use roughly sqrt(NVars) variables.
INPUT PARAMETERS:
S - decision forest builder object
RndVars - number of randomly selected variables; values outside
of [1,NVars] range are silently clipped.
OUTPUT PARAMETERS:
S - decision forest builder
-- ALGLIB --
Copyright 21.05.2018 by Bochkanov Sergey
*************************************************************************/
void dfbuildersetrndvars(const decisionforestbuilder &s, const ae_int_t rndvars, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets number of variables used by decision forest construction
algorithm as a fraction of total variable count (0,1) range.
The default option is to use roughly sqrt(NVars) variables.
INPUT PARAMETERS:
S - decision forest builder object
F - round(NVars*F) variables are selected
OUTPUT PARAMETERS:
S - decision forest builder
-- ALGLIB --
Copyright 21.05.2018 by Bochkanov Sergey
*************************************************************************/
void dfbuildersetrndvarsratio(const decisionforestbuilder &s, const double f, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function tells decision forest builder to automatically choose number
of variables used by decision forest construction algorithm. Roughly
sqrt(NVars) variables will be used.
INPUT PARAMETERS:
S - decision forest builder object
OUTPUT PARAMETERS:
S - decision forest builder
-- ALGLIB --
Copyright 21.05.2018 by Bochkanov Sergey
*************************************************************************/
void dfbuildersetrndvarsauto(const decisionforestbuilder &s, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets size of dataset subsample generated the decision forest
construction algorithm. Size is specified as a fraction of total dataset
size.
The default option is to use 50% of the dataset for training, 50% for the
OOB estimates. You can decrease fraction F down to 10%, 1% or even below
in order to reduce overfitting.
INPUT PARAMETERS:
S - decision forest builder object
F - fraction of the dataset to use, in (0,1] range. Values
outside of this range will be silently clipped. At
least one element is always selected for the training
set.
OUTPUT PARAMETERS:
S - decision forest builder
-- ALGLIB --
Copyright 21.05.2018 by Bochkanov Sergey
*************************************************************************/
void dfbuildersetsubsampleratio(const decisionforestbuilder &s, const double f, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets seed used by internal RNG for random subsampling and
random selection of variable subsets.
By default random seed is used, i.e. every time you build decision forest,
we seed generator with new value obtained from system-wide RNG. Thus,
decision forest builder returns non-deterministic results. You can change
such behavior by specyfing fixed positive seed value.
INPUT PARAMETERS:
S - decision forest builder object
SeedVal - seed value:
* positive values are used for seeding RNG with fixed
seed, i.e. subsequent runs on same data will return
same decision forests
* non-positive seed means that random seed is used
for every run of builder, i.e. subsequent runs on
same datasets will return slightly different
decision forests
OUTPUT PARAMETERS:
S - decision forest builder, see
-- ALGLIB --
Copyright 21.05.2018 by Bochkanov Sergey
*************************************************************************/
void dfbuildersetseed(const decisionforestbuilder &s, const ae_int_t seedval, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets random decision forest construction algorithm.
As for now, only one decision forest construction algorithm is supported -
a dense "baseline" RDF algorithm.
INPUT PARAMETERS:
S - decision forest builder object
AlgoType - algorithm type:
* 0 = baseline dense RDF
OUTPUT PARAMETERS:
S - decision forest builder, see
-- ALGLIB --
Copyright 21.05.2018 by Bochkanov Sergey
*************************************************************************/
void dfbuildersetrdfalgo(const decisionforestbuilder &s, const ae_int_t algotype, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets split selection algorithm used by decision forest
classifier. You may choose several algorithms, with different speed and
quality of the results.
INPUT PARAMETERS:
S - decision forest builder object
SplitStrength- split type:
* 0 = split at the random position, fastest one
* 1 = split at the middle of the range
* 2 = strong split at the best point of the range (default)
OUTPUT PARAMETERS:
S - decision forest builder, see
-- ALGLIB --
Copyright 21.05.2018 by Bochkanov Sergey
*************************************************************************/
void dfbuildersetrdfsplitstrength(const decisionforestbuilder &s, const ae_int_t splitstrength, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function tells decision forest construction algorithm to use
Gini impurity based variable importance estimation (also known as MDI).
This version of importance estimation algorithm analyzes mean decrease in
impurity (MDI) on training sample during splits. The result is divided
by impurity at the root node in order to produce estimate in [0,1] range.
Such estimates are fast to calculate and beautifully normalized (sum to
one) but have following downsides:
* They ALWAYS sum to 1.0, even if output is completely unpredictable. I.e.
MDI allows to order variables by importance, but does not tell us about
"absolute" importances of variables
* there exist some bias towards continuous and high-cardinality categorical
variables
NOTE: informally speaking, MDA (permutation importance) rating answers the
question "what part of the model predictive power is ruined by
permuting k-th variable?" while MDI tells us "what part of the model
predictive power was achieved due to usage of k-th variable".
Thus, MDA rates each variable independently at "0 to 1" scale while
MDI (and OOB-MDI too) tends to divide "unit amount of importance"
between several important variables.
If all variables are equally important, they will have same
MDI/OOB-MDI rating, equal (for OOB-MDI: roughly equal) to 1/NVars.
However, roughly same picture will be produced for the "all
variables provide information no one is critical" situation and for
the "all variables are critical, drop any one, everything is ruined"
situation.
Contrary to that, MDA will rate critical variable as ~1.0 important,
and important but non-critical variable will have less than unit
rating.
NOTE: quite an often MDA and MDI return same results. It generally happens
on problems with low test set error (a few percents at most) and
large enough training set to avoid overfitting.
The difference between MDA, MDI and OOB-MDI becomes important only
on "hard" tasks with high test set error and/or small training set.
INPUT PARAMETERS:
S - decision forest builder object
OUTPUT PARAMETERS:
S - decision forest builder object. Next call to the forest
construction function will produce:
* importance estimates in rep.varimportances field
* variable ranks in rep.topvars field
-- ALGLIB --
Copyright 29.07.2019 by Bochkanov Sergey
*************************************************************************/
void dfbuildersetimportancetrngini(const decisionforestbuilder &s, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function tells decision forest construction algorithm to use
out-of-bag version of Gini variable importance estimation (also known as
OOB-MDI).
This version of importance estimation algorithm analyzes mean decrease in
impurity (MDI) on out-of-bag sample during splits. The result is divided
by impurity at the root node in order to produce estimate in [0,1] range.
Such estimates are fast to calculate and resistant to overfitting issues
(thanks to the out-of-bag estimates used). However, OOB Gini rating has
following downsides:
* there exist some bias towards continuous and high-cardinality categorical
variables
* Gini rating allows us to order variables by importance, but it is hard
to define importance of the variable by itself.
NOTE: informally speaking, MDA (permutation importance) rating answers the
question "what part of the model predictive power is ruined by
permuting k-th variable?" while MDI tells us "what part of the model
predictive power was achieved due to usage of k-th variable".
Thus, MDA rates each variable independently at "0 to 1" scale while
MDI (and OOB-MDI too) tends to divide "unit amount of importance"
between several important variables.
If all variables are equally important, they will have same
MDI/OOB-MDI rating, equal (for OOB-MDI: roughly equal) to 1/NVars.
However, roughly same picture will be produced for the "all
variables provide information no one is critical" situation and for
the "all variables are critical, drop any one, everything is ruined"
situation.
Contrary to that, MDA will rate critical variable as ~1.0 important,
and important but non-critical variable will have less than unit
rating.
NOTE: quite an often MDA and MDI return same results. It generally happens
on problems with low test set error (a few percents at most) and
large enough training set to avoid overfitting.
The difference between MDA, MDI and OOB-MDI becomes important only
on "hard" tasks with high test set error and/or small training set.
INPUT PARAMETERS:
S - decision forest builder object
OUTPUT PARAMETERS:
S - decision forest builder object. Next call to the forest
construction function will produce:
* importance estimates in rep.varimportances field
* variable ranks in rep.topvars field
-- ALGLIB --
Copyright 29.07.2019 by Bochkanov Sergey
*************************************************************************/
void dfbuildersetimportanceoobgini(const decisionforestbuilder &s, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function tells decision forest construction algorithm to use
permutation variable importance estimator (also known as MDA).
This version of importance estimation algorithm analyzes mean increase in
out-of-bag sum of squared residuals after random permutation of J-th
variable. The result is divided by error computed with all variables being
perturbed in order to produce R-squared-like estimate in [0,1] range.
Such estimate is slower to calculate than Gini-based rating because it
needs multiple inference runs for each of variables being studied.
ALGLIB uses parallelized and highly optimized algorithm which analyzes
path through the decision tree and allows to handle most perturbations
in O(1) time; nevertheless, requesting MDA importances may increase forest
construction time from 10% to 200% (or more, if you have thousands of
variables).
However, MDA rating has following benefits over Gini-based ones:
* no bias towards specific variable types
* ability to directly evaluate "absolute" importance of some variable at
"0 to 1" scale (contrary to Gini-based rating, which returns comparative
importances).
NOTE: informally speaking, MDA (permutation importance) rating answers the
question "what part of the model predictive power is ruined by
permuting k-th variable?" while MDI tells us "what part of the model
predictive power was achieved due to usage of k-th variable".
Thus, MDA rates each variable independently at "0 to 1" scale while
MDI (and OOB-MDI too) tends to divide "unit amount of importance"
between several important variables.
If all variables are equally important, they will have same
MDI/OOB-MDI rating, equal (for OOB-MDI: roughly equal) to 1/NVars.
However, roughly same picture will be produced for the "all
variables provide information no one is critical" situation and for
the "all variables are critical, drop any one, everything is ruined"
situation.
Contrary to that, MDA will rate critical variable as ~1.0 important,
and important but non-critical variable will have less than unit
rating.
NOTE: quite an often MDA and MDI return same results. It generally happens
on problems with low test set error (a few percents at most) and
large enough training set to avoid overfitting.
The difference between MDA, MDI and OOB-MDI becomes important only
on "hard" tasks with high test set error and/or small training set.
INPUT PARAMETERS:
S - decision forest builder object
OUTPUT PARAMETERS:
S - decision forest builder object. Next call to the forest
construction function will produce:
* importance estimates in rep.varimportances field
* variable ranks in rep.topvars field
-- ALGLIB --
Copyright 29.07.2019 by Bochkanov Sergey
*************************************************************************/
void dfbuildersetimportancepermutation(const decisionforestbuilder &s, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function tells decision forest construction algorithm to skip
variable importance estimation.
INPUT PARAMETERS:
S - decision forest builder object
OUTPUT PARAMETERS:
S - decision forest builder object. Next call to the forest
construction function will result in forest being built
without variable importance estimation.
-- ALGLIB --
Copyright 29.07.2019 by Bochkanov Sergey
*************************************************************************/
void dfbuildersetimportancenone(const decisionforestbuilder &s, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function is an alias for dfbuilderpeekprogress(), left in ALGLIB for
backward compatibility reasons.
-- ALGLIB --
Copyright 21.05.2018 by Bochkanov Sergey
*************************************************************************/
double dfbuildergetprogress(const decisionforestbuilder &s, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function is used to peek into decision forest construction process
from some other thread and get current progress indicator.
It returns value in [0,1].
INPUT PARAMETERS:
S - decision forest builder object used to build forest
in some other thread
RESULT:
progress value, in [0,1]
-- ALGLIB --
Copyright 21.05.2018 by Bochkanov Sergey
*************************************************************************/
double dfbuilderpeekprogress(const decisionforestbuilder &s, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This subroutine builds decision forest according to current settings using
dataset internally stored in the builder object. Dense algorithm is used.
NOTE: this function uses dense algorithm for forest construction
independently from the dataset format (dense or sparse).
NOTE: forest built with this function is stored in-memory using 64-bit
data structures for offsets/indexes/split values. It is possible to
convert forest into more memory-efficient compressed binary
representation. Depending on the problem properties, 3.7x-5.7x
compression factors are possible.
The downsides of compression are (a) slight reduction in the model
accuracy and (b) ~1.5x reduction in the inference speed (due to
increased complexity of the storage format).
See comments on dfbinarycompression() for more info.
Default settings are used by the algorithm; you can tweak them with the
help of the following functions:
* dfbuildersetrfactor() - to control a fraction of the dataset used for
subsampling
* dfbuildersetrandomvars() - to control number of variables randomly chosen
for decision rule creation
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
S - decision forest builder object
NTrees - NTrees>=1, number of trees to train
OUTPUT PARAMETERS:
DF - decision forest. You can compress this forest to more
compact 16-bit representation with dfbinarycompression()
Rep - report, see below for information on its fields.
=== report information produced by forest construction function ==========
Decision forest training report includes following information:
* training set errors
* out-of-bag estimates of errors
* variable importance ratings
Following fields are used to store information:
* training set errors are stored in rep.relclserror, rep.avgce, rep.rmserror,
rep.avgerror and rep.avgrelerror
* out-of-bag estimates of errors are stored in rep.oobrelclserror, rep.oobavgce,
rep.oobrmserror, rep.oobavgerror and rep.oobavgrelerror
Variable importance reports, if requested by dfbuildersetimportancegini(),
dfbuildersetimportancetrngini() or dfbuildersetimportancepermutation()
call, are stored in:
* rep.varimportances field stores importance ratings
* rep.topvars stores variable indexes ordered from the most important to
less important ones
You can find more information about report fields in:
* comments on dfreport structure
* comments on dfbuildersetimportancegini function
* comments on dfbuildersetimportancetrngini function
* comments on dfbuildersetimportancepermutation function
-- ALGLIB --
Copyright 21.05.2018 by Bochkanov Sergey
*************************************************************************/
void dfbuilderbuildrandomforest(const decisionforestbuilder &s, const ae_int_t ntrees, decisionforest &df, dfreport &rep, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function performs binary compression of the decision forest.
Original decision forest produced by the forest builder is stored using
64-bit representation for all numbers - offsets, variable indexes, split
points.
It is possible to significantly reduce model size by means of:
* using compressed dynamic encoding for integers (offsets and variable
indexes), which uses just 1 byte to store small ints (less than 128),
just 2 bytes for larger values (less than 128^2) and so on
* storing floating point numbers using 8-bit exponent and 16-bit mantissa
As result, model needs significantly less memory (compression factor
depends on variable and class counts). In particular:
* NVars<128 and NClasses<128 result in 4.4x-5.7x model size reduction
* NVars<16384 and NClasses<128 result in 3.7x-4.5x model size reduction
Such storage format performs lossless compression of all integers, but
compression of floating point values (split values) is lossy, with roughly
0.01% relative error introduced during rounding. Thus, we recommend you to
re-evaluate model accuracy after compression.
Another downside of compression is ~1.5x reduction in the inference
speed due to necessity of dynamic decompression of the compressed model.
INPUT PARAMETERS:
DF - decision forest built by forest builder
OUTPUT PARAMETERS:
DF - replaced by compressed forest
RESULT:
compression factor (in-RAM size of the compressed model vs than of the
uncompressed one), positive number larger than 1.0
-- ALGLIB --
Copyright 22.07.2019 by Bochkanov Sergey
*************************************************************************/
double dfbinarycompression(const decisionforest &df, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Inference using decision forest
IMPORTANT: this function is thread-unsafe and may modify internal
structures of the model! You can not use same model object for
parallel evaluation from several threads.
Use dftsprocess() with independent thread-local buffers if
you need thread-safe evaluation.
INPUT PARAMETERS:
DF - decision forest model
X - input vector, array[NVars]
Y - possibly preallocated buffer, reallocated if too small
OUTPUT PARAMETERS:
Y - result. Regression estimate when solving regression task,
vector of posterior probabilities for classification task.
See also DFProcessI.
-- ALGLIB --
Copyright 16.02.2009 by Bochkanov Sergey
*************************************************************************/
void dfprocess(const decisionforest &df, const real_1d_array &x, real_1d_array &y, const xparams _xparams = alglib::xdefault);
/*************************************************************************
'interactive' variant of DFProcess for languages like Python which support
constructs like "Y = DFProcessI(DF,X)" and interactive mode of interpreter
This function allocates new array on each call, so it is significantly
slower than its 'non-interactive' counterpart, but it is more convenient
when you call it from command line.
IMPORTANT: this function is thread-unsafe and may modify internal
structures of the model! You can not use same model object for
parallel evaluation from several threads.
Use dftsprocess() with independent thread-local buffers if
you need thread-safe evaluation.
-- ALGLIB --
Copyright 28.02.2010 by Bochkanov Sergey
*************************************************************************/
void dfprocessi(const decisionforest &df, const real_1d_array &x, real_1d_array &y, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function returns first component of the inferred vector (i.e. one
with index #0).
It is a convenience wrapper for dfprocess() intended for either:
* 1-dimensional regression problems
* 2-class classification problems
In the former case this function returns inference result as scalar, which
is definitely more convenient that wrapping it as vector. In the latter
case it returns probability of object belonging to class #0.
If you call it for anything different from two cases above, it will work
as defined, i.e. return y[0], although it is of less use in such cases.
IMPORTANT: this function is thread-unsafe and modifies internal structures
of the model! You can not use same model object for parallel
evaluation from several threads.
Use dftsprocess() with independent thread-local buffers, if
you need thread-safe evaluation.
INPUT PARAMETERS:
Model - DF model
X - input vector, array[0..NVars-1].
RESULT:
Y[0]
-- ALGLIB --
Copyright 15.02.2019 by Bochkanov Sergey
*************************************************************************/
double dfprocess0(const decisionforest &model, const real_1d_array &x, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function returns most probable class number for an input X. It is
same as calling dfprocess(model,x,y), then determining i=argmax(y[i]) and
returning i.
A class number in [0,NOut) range in returned for classification problems,
-1 is returned when this function is called for regression problems.
IMPORTANT: this function is thread-unsafe and modifies internal structures
of the model! You can not use same model object for parallel
evaluation from several threads.
Use dftsprocess() with independent thread-local buffers, if
you need thread-safe evaluation.
INPUT PARAMETERS:
Model - decision forest model
X - input vector, array[0..NVars-1].
RESULT:
class number, -1 for regression tasks
-- ALGLIB --
Copyright 15.02.2019 by Bochkanov Sergey
*************************************************************************/
ae_int_t dfclassify(const decisionforest &model, const real_1d_array &x, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Inference using decision forest
Thread-safe procesing using external buffer for temporaries.
This function is thread-safe (i.e . you can use same DF model from
multiple threads) as long as you use different buffer objects for different
threads.
INPUT PARAMETERS:
DF - decision forest model
Buf - buffer object, must be allocated specifically for this
model with dfcreatebuffer().
X - input vector, array[NVars]
Y - possibly preallocated buffer, reallocated if too small
OUTPUT PARAMETERS:
Y - result. Regression estimate when solving regression task,
vector of posterior probabilities for classification task.
See also DFProcessI.
-- ALGLIB --
Copyright 16.02.2009 by Bochkanov Sergey
*************************************************************************/
void dftsprocess(const decisionforest &df, const decisionforestbuffer &buf, const real_1d_array &x, real_1d_array &y, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Relative classification error on the test set
INPUT PARAMETERS:
DF - decision forest model
XY - test set
NPoints - test set size
RESULT:
percent of incorrectly classified cases.
Zero if model solves regression task.
-- ALGLIB --
Copyright 16.02.2009 by Bochkanov Sergey
*************************************************************************/
double dfrelclserror(const decisionforest &df, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Average cross-entropy (in bits per element) on the test set
INPUT PARAMETERS:
DF - decision forest model
XY - test set
NPoints - test set size
RESULT:
CrossEntropy/(NPoints*LN(2)).
Zero if model solves regression task.
-- ALGLIB --
Copyright 16.02.2009 by Bochkanov Sergey
*************************************************************************/
double dfavgce(const decisionforest &df, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
RMS error on the test set
INPUT PARAMETERS:
DF - decision forest model
XY - test set
NPoints - test set size
RESULT:
root mean square error.
Its meaning for regression task is obvious. As for
classification task, RMS error means error when estimating posterior
probabilities.
-- ALGLIB --
Copyright 16.02.2009 by Bochkanov Sergey
*************************************************************************/
double dfrmserror(const decisionforest &df, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Average error on the test set
INPUT PARAMETERS:
DF - decision forest model
XY - test set
NPoints - test set size
RESULT:
Its meaning for regression task is obvious. As for
classification task, it means average error when estimating posterior
probabilities.
-- ALGLIB --
Copyright 16.02.2009 by Bochkanov Sergey
*************************************************************************/
double dfavgerror(const decisionforest &df, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Average relative error on the test set
INPUT PARAMETERS:
DF - decision forest model
XY - test set
NPoints - test set size
RESULT:
Its meaning for regression task is obvious. As for
classification task, it means average relative error when estimating
posterior probability of belonging to the correct class.
-- ALGLIB --
Copyright 16.02.2009 by Bochkanov Sergey
*************************************************************************/
double dfavgrelerror(const decisionforest &df, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This subroutine builds random decision forest.
--------- DEPRECATED VERSION! USE DECISION FOREST BUILDER OBJECT ---------
-- ALGLIB --
Copyright 19.02.2009 by Bochkanov Sergey
*************************************************************************/
void dfbuildrandomdecisionforest(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nclasses, const ae_int_t ntrees, const double r, ae_int_t &info, decisionforest &df, dfreport &rep, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This subroutine builds random decision forest.
--------- DEPRECATED VERSION! USE DECISION FOREST BUILDER OBJECT ---------
-- ALGLIB --
Copyright 19.02.2009 by Bochkanov Sergey
*************************************************************************/
void dfbuildrandomdecisionforestx1(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nclasses, const ae_int_t ntrees, const ae_int_t nrndvars, const double r, ae_int_t &info, decisionforest &df, dfreport &rep, const xparams _xparams = alglib::xdefault);
#endif
#if defined(AE_COMPILE_KNN) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
This function serializes data structure to string.
Important properties of s_out:
* it contains alphanumeric characters, dots, underscores, minus signs
* these symbols are grouped into words, which are separated by spaces
and Windows-style (CR+LF) newlines
* although serializer uses spaces and CR+LF as separators, you can
replace any separator character by arbitrary combination of spaces,
tabs, Windows or Unix newlines. It allows flexible reformatting of
the string in case you want to include it into text or XML file.
But you should not insert separators into the middle of the "words"
nor you should change case of letters.
* s_out can be freely moved between 32-bit and 64-bit systems, little
and big endian machines, and so on. You can serialize structure on
32-bit machine and unserialize it on 64-bit one (or vice versa), or
serialize it on SPARC and unserialize on x86. You can also
serialize it in C++ version of ALGLIB and unserialize in C# one,
and vice versa.
*************************************************************************/
void knnserialize(knnmodel &obj, std::string &s_out);
/*************************************************************************
This function unserializes data structure from string.
*************************************************************************/
void knnunserialize(const std::string &s_in, knnmodel &obj);
/*************************************************************************
This function serializes data structure to C++ stream.
Data stream generated by this function is same as string representation
generated by string version of serializer - alphanumeric characters,
dots, underscores, minus signs, which are grouped into words separated by
spaces and CR+LF.
We recommend you to read comments on string version of serializer to find
out more about serialization of AlGLIB objects.
*************************************************************************/
void knnserialize(knnmodel &obj, std::ostream &s_out);
/*************************************************************************
This function unserializes data structure from stream.
*************************************************************************/
void knnunserialize(const std::istream &s_in, knnmodel &obj);
/*************************************************************************
This function creates buffer structure which can be used to perform
parallel KNN requests.
KNN subpackage provides two sets of computing functions - ones which use
internal buffer of KNN model (these functions are single-threaded because
they use same buffer, which can not shared between threads), and ones
which use external buffer.
This function is used to initialize external buffer.
INPUT PARAMETERS
Model - KNN model which is associated with newly created buffer
OUTPUT PARAMETERS
Buf - external buffer.
IMPORTANT: buffer object should be used only with model which was used to
initialize buffer. Any attempt to use buffer with different
object is dangerous - you may get integrity check failure
(exception) because sizes of internal arrays do not fit to
dimensions of the model structure.
-- ALGLIB --
Copyright 15.02.2019 by Bochkanov Sergey
*************************************************************************/
void knncreatebuffer(const knnmodel &model, knnbuffer &buf, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This subroutine creates KNNBuilder object which is used to train KNN models.
By default, new builder stores empty dataset and some reasonable default
settings. At the very least, you should specify dataset prior to building
KNN model. You can also tweak settings of the model construction algorithm
(recommended, although default settings should work well).
Following actions are mandatory:
* calling knnbuildersetdataset() to specify dataset
* calling knnbuilderbuildknnmodel() to build KNN model using current
dataset and default settings
Additionally, you may call:
* knnbuildersetnorm() to change norm being used
INPUT PARAMETERS:
none
OUTPUT PARAMETERS:
S - KNN builder
-- ALGLIB --
Copyright 15.02.2019 by Bochkanov Sergey
*************************************************************************/
void knnbuildercreate(knnbuilder &s, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Specifies regression problem (one or more continuous output variables are
predicted). There also exists "classification" version of this function.
This subroutine adds dense dataset to the internal storage of the builder
object. Specifying your dataset in the dense format means that the dense
version of the KNN construction algorithm will be invoked.
INPUT PARAMETERS:
S - KNN builder object
XY - array[NPoints,NVars+NOut] (note: actual size can be
larger, only leading part is used anyway), dataset:
* first NVars elements of each row store values of the
independent variables
* next NOut elements store values of the dependent
variables
NPoints - number of rows in the dataset, NPoints>=1
NVars - number of independent variables, NVars>=1
NOut - number of dependent variables, NOut>=1
OUTPUT PARAMETERS:
S - KNN builder
-- ALGLIB --
Copyright 15.02.2019 by Bochkanov Sergey
*************************************************************************/
void knnbuildersetdatasetreg(const knnbuilder &s, const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nout, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Specifies classification problem (two or more classes are predicted).
There also exists "regression" version of this function.
This subroutine adds dense dataset to the internal storage of the builder
object. Specifying your dataset in the dense format means that the dense
version of the KNN construction algorithm will be invoked.
INPUT PARAMETERS:
S - KNN builder object
XY - array[NPoints,NVars+1] (note: actual size can be
larger, only leading part is used anyway), dataset:
* first NVars elements of each row store values of the
independent variables
* next element stores class index, in [0,NClasses)
NPoints - number of rows in the dataset, NPoints>=1
NVars - number of independent variables, NVars>=1
NClasses - number of classes, NClasses>=2
OUTPUT PARAMETERS:
S - KNN builder
-- ALGLIB --
Copyright 15.02.2019 by Bochkanov Sergey
*************************************************************************/
void knnbuildersetdatasetcls(const knnbuilder &s, const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t nclasses, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function sets norm type used for neighbor search.
INPUT PARAMETERS:
S - decision forest builder object
NormType - norm type:
* 0 inf-norm
* 1 1-norm
* 2 Euclidean norm (default)
OUTPUT PARAMETERS:
S - decision forest builder
-- ALGLIB --
Copyright 15.02.2019 by Bochkanov Sergey
*************************************************************************/
void knnbuildersetnorm(const knnbuilder &s, const ae_int_t nrmtype, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This subroutine builds KNN model according to current settings, using
dataset internally stored in the builder object.
The model being built performs inference using Eps-approximate K nearest
neighbors search algorithm, with:
* K=1, Eps=0 corresponding to the "nearest neighbor algorithm"
* K>1, Eps=0 corresponding to the "K nearest neighbors algorithm"
* K>=1, Eps>0 corresponding to "approximate nearest neighbors algorithm"
An approximate KNN is a good option for high-dimensional datasets (exact
KNN works slowly when dimensions count grows).
An ALGLIB implementation of kd-trees is used to perform k-nn searches.
! COMMERCIAL EDITION OF ALGLIB:
!
! Commercial Edition of ALGLIB includes following important improvements
! of this function:
! * high-performance native backend with same C# interface (C# version)
! * multithreading support (C++ and C# versions)
!
! We recommend you to read 'Working with commercial version' section of
! ALGLIB Reference Manual in order to find out how to use performance-
! related features provided by commercial edition of ALGLIB.
INPUT PARAMETERS:
S - KNN builder object
K - number of neighbors to search for, K>=1
Eps - approximation factor:
* Eps=0 means that exact kNN search is performed
* Eps>0 means that (1+Eps)-approximate search is performed
OUTPUT PARAMETERS:
Model - KNN model
Rep - report
-- ALGLIB --
Copyright 15.02.2019 by Bochkanov Sergey
*************************************************************************/
void knnbuilderbuildknnmodel(const knnbuilder &s, const ae_int_t k, const double eps, knnmodel &model, knnreport &rep, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Changing search settings of KNN model.
K and EPS parameters of KNN (AKNN) search are specified during model
construction. However, plain KNN algorithm with Euclidean distance allows
you to change them at any moment.
NOTE: future versions of KNN model may support advanced versions of KNN,
such as NCA or LMNN. It is possible that such algorithms won't allow
you to change search settings on the fly. If you call this function
for an algorithm which does not support on-the-fly changes, it will
throw an exception.
INPUT PARAMETERS:
Model - KNN model
K - K>=1, neighbors count
EPS - accuracy of the EPS-approximate NN search. Set to 0.0, if
you want to perform "classic" KNN search. Specify larger
values if you need to speed-up high-dimensional KNN
queries.
OUTPUT PARAMETERS:
nothing on success, exception on failure
-- ALGLIB --
Copyright 15.02.2019 by Bochkanov Sergey
*************************************************************************/
void knnrewritekeps(const knnmodel &model, const ae_int_t k, const double eps, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Inference using KNN model.
See also knnprocess0(), knnprocessi() and knnclassify() for options with a
bit more convenient interface.
IMPORTANT: this function is thread-unsafe and modifies internal structures
of the model! You can not use same model object for parallel
evaluation from several threads.
Use knntsprocess() with independent thread-local buffers, if
you need thread-safe evaluation.
INPUT PARAMETERS:
Model - KNN model
X - input vector, array[0..NVars-1].
Y - possible preallocated buffer. Reused if long enough.
OUTPUT PARAMETERS:
Y - result. Regression estimate when solving regression task,
vector of posterior probabilities for classification task.
-- ALGLIB --
Copyright 15.02.2019 by Bochkanov Sergey
*************************************************************************/
void knnprocess(const knnmodel &model, const real_1d_array &x, real_1d_array &y, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function returns first component of the inferred vector (i.e. one
with index #0).
It is a convenience wrapper for knnprocess() intended for either:
* 1-dimensional regression problems
* 2-class classification problems
In the former case this function returns inference result as scalar, which
is definitely more convenient that wrapping it as vector. In the latter
case it returns probability of object belonging to class #0.
If you call it for anything different from two cases above, it will work
as defined, i.e. return y[0], although it is of less use in such cases.
IMPORTANT: this function is thread-unsafe and modifies internal structures
of the model! You can not use same model object for parallel
evaluation from several threads.
Use knntsprocess() with independent thread-local buffers, if
you need thread-safe evaluation.
INPUT PARAMETERS:
Model - KNN model
X - input vector, array[0..NVars-1].
RESULT:
Y[0]
-- ALGLIB --
Copyright 15.02.2019 by Bochkanov Sergey
*************************************************************************/
double knnprocess0(const knnmodel &model, const real_1d_array &x, const xparams _xparams = alglib::xdefault);
/*************************************************************************
This function returns most probable class number for an input X. It is
same as calling knnprocess(model,x,y), then determining i=argmax(y[i]) and
returning i.
A class number in [0,NOut) range in returned for classification problems,
-1 is returned when this function is called for regression problems.
IMPORTANT: this function is thread-unsafe and modifies internal structures
of the model! You can not use same model object for parallel
evaluation from several threads.
Use knntsprocess() with independent thread-local buffers, if
you need thread-safe evaluation.
INPUT PARAMETERS:
Model - KNN model
X - input vector, array[0..NVars-1].
RESULT:
class number, -1 for regression tasks
-- ALGLIB --
Copyright 15.02.2019 by Bochkanov Sergey
*************************************************************************/
ae_int_t knnclassify(const knnmodel &model, const real_1d_array &x, const xparams _xparams = alglib::xdefault);
/*************************************************************************
'interactive' variant of knnprocess() for languages like Python which
support constructs like "y = knnprocessi(model,x)" and interactive mode of
the interpreter.
This function allocates new array on each call, so it is significantly
slower than its 'non-interactive' counterpart, but it is more convenient
when you call it from command line.
IMPORTANT: this function is thread-unsafe and may modify internal
structures of the model! You can not use same model object for
parallel evaluation from several threads.
Use knntsprocess() with independent thread-local buffers if
you need thread-safe evaluation.
-- ALGLIB --
Copyright 15.02.2019 by Bochkanov Sergey
*************************************************************************/
void knnprocessi(const knnmodel &model, const real_1d_array &x, real_1d_array &y, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Thread-safe procesing using external buffer for temporaries.
This function is thread-safe (i.e . you can use same KNN model from
multiple threads) as long as you use different buffer objects for different
threads.
INPUT PARAMETERS:
Model - KNN model
Buf - buffer object, must be allocated specifically for this
model with knncreatebuffer().
X - input vector, array[NVars]
OUTPUT PARAMETERS:
Y - result, array[NOut]. Regression estimate when solving
regression task, vector of posterior probabilities for
a classification task.
-- ALGLIB --
Copyright 15.02.2019 by Bochkanov Sergey
*************************************************************************/
void knntsprocess(const knnmodel &model, const knnbuffer &buf, const real_1d_array &x, real_1d_array &y, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Relative classification error on the test set
INPUT PARAMETERS:
Model - KNN model
XY - test set
NPoints - test set size
RESULT:
percent of incorrectly classified cases.
Zero if model solves regression task.
NOTE: if you need several different kinds of error metrics, it is better
to use knnallerrors() which computes all error metric with just one
pass over dataset.
-- ALGLIB --
Copyright 15.02.2019 by Bochkanov Sergey
*************************************************************************/
double knnrelclserror(const knnmodel &model, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Average cross-entropy (in bits per element) on the test set
INPUT PARAMETERS:
Model - KNN model
XY - test set
NPoints - test set size
RESULT:
CrossEntropy/NPoints.
Zero if model solves regression task.
NOTE: the cross-entropy metric is too unstable when used to evaluate KNN
models (such models can report exactly zero probabilities), so we
do not recommend using it.
NOTE: if you need several different kinds of error metrics, it is better
to use knnallerrors() which computes all error metric with just one
pass over dataset.
-- ALGLIB --
Copyright 15.02.2019 by Bochkanov Sergey
*************************************************************************/
double knnavgce(const knnmodel &model, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
RMS error on the test set.
Its meaning for regression task is obvious. As for classification problems,
RMS error means error when estimating posterior probabilities.
INPUT PARAMETERS:
Model - KNN model
XY - test set
NPoints - test set size
RESULT:
root mean square error.
NOTE: if you need several different kinds of error metrics, it is better
to use knnallerrors() which computes all error metric with just one
pass over dataset.
-- ALGLIB --
Copyright 15.02.2019 by Bochkanov Sergey
*************************************************************************/
double knnrmserror(const knnmodel &model, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Average error on the test set
Its meaning for regression task is obvious. As for classification problems,
average error means error when estimating posterior probabilities.
INPUT PARAMETERS:
Model - KNN model
XY - test set
NPoints - test set size
RESULT:
average error
NOTE: if you need several different kinds of error metrics, it is better
to use knnallerrors() which computes all error metric with just one
pass over dataset.
-- ALGLIB --
Copyright 15.02.2019 by Bochkanov Sergey
*************************************************************************/
double knnavgerror(const knnmodel &model, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Average relative error on the test set
Its meaning for regression task is obvious. As for classification problems,
average relative error means error when estimating posterior probabilities.
INPUT PARAMETERS:
Model - KNN model
XY - test set
NPoints - test set size
RESULT:
average relative error
NOTE: if you need several different kinds of error metrics, it is better
to use knnallerrors() which computes all error metric with just one
pass over dataset.
-- ALGLIB --
Copyright 15.02.2019 by Bochkanov Sergey
*************************************************************************/
double knnavgrelerror(const knnmodel &model, const real_2d_array &xy, const ae_int_t npoints, const xparams _xparams = alglib::xdefault);
/*************************************************************************
Calculates all kinds of errors for the model in one call.
INPUT PARAMETERS:
Model - KNN model
XY - test set:
* one row per point
* first NVars columns store independent variables
* depending on problem type:
* next column stores class number in [0,NClasses) - for
classification problems
* next NOut columns store dependent variables - for
regression problems
NPoints - test set size, NPoints>=0
OUTPUT PARAMETERS:
Rep - following fields are loaded with errors for both regression
and classification models:
* rep.rmserror - RMS error for the output
* rep.avgerror - average error
* rep.avgrelerror - average relative error
following fields are set only for classification models,
zero for regression ones:
* relclserror - relative classification error, in [0,1]
* avgce - average cross-entropy in bits per dataset entry
NOTE: the cross-entropy metric is too unstable when used to evaluate KNN
models (such models can report exactly zero probabilities), so we
do not recommend using it.
-- ALGLIB --
Copyright 15.02.2019 by Bochkanov Sergey
*************************************************************************/
void knnallerrors(const knnmodel &model, const real_2d_array &xy, const ae_int_t npoints, knnreport &rep, const xparams _xparams = alglib::xdefault);
#endif
#if defined(AE_COMPILE_DATACOMP) || !defined(AE_PARTIAL_BUILD)
/*************************************************************************
k-means++ clusterization.
Backward compatibility function, we recommend to use CLUSTERING subpackage
as better replacement.
-- ALGLIB --
Copyright 21.03.2009 by Bochkanov Sergey
*************************************************************************/
void kmeansgenerate(const real_2d_array &xy, const ae_int_t npoints, const ae_int_t nvars, const ae_int_t k, const ae_int_t restarts, ae_int_t &info, real_2d_array &c, integer_1d_array &xyc, const xparams _xparams = alglib::xdefault);
#endif
}
/////////////////////////////////////////////////////////////////////////
//
// THIS SECTION CONTAINS COMPUTATIONAL CORE DECLARATIONS (FUNCTIONS)
//
/////////////////////////////////////////////////////////////////////////
namespace alglib_impl
{
#if defined(AE_COMPILE_PCA) || !defined(AE_PARTIAL_BUILD)
void pcabuildbasis(/* Real */ ae_matrix* x,
ae_int_t npoints,
ae_int_t nvars,
ae_int_t* info,
/* Real */ ae_vector* s2,
/* Real */ ae_matrix* v,
ae_state *_state);
void pcatruncatedsubspace(/* Real */ ae_matrix* x,
ae_int_t npoints,
ae_int_t nvars,
ae_int_t nneeded,
double eps,
ae_int_t maxits,
/* Real */ ae_vector* s2,
/* Real */ ae_matrix* v,
ae_state *_state);
void pcatruncatedsubspacesparse(sparsematrix* x,
ae_int_t npoints,
ae_int_t nvars,
ae_int_t nneeded,
double eps,
ae_int_t maxits,
/* Real */ ae_vector* s2,
/* Real */ ae_matrix* v,
ae_state *_state);
#endif
#if defined(AE_COMPILE_BDSS) || !defined(AE_PARTIAL_BUILD)
void dserrallocate(ae_int_t nclasses,
/* Real */ ae_vector* buf,
ae_state *_state);
void dserraccumulate(/* Real */ ae_vector* buf,
/* Real */ ae_vector* y,
/* Real */ ae_vector* desiredy,
ae_state *_state);
void dserrfinish(/* Real */ ae_vector* buf, ae_state *_state);
void dsnormalize(/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_int_t nvars,
ae_int_t* info,
/* Real */ ae_vector* means,
/* Real */ ae_vector* sigmas,
ae_state *_state);
void dsnormalizec(/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_int_t nvars,
ae_int_t* info,
/* Real */ ae_vector* means,
/* Real */ ae_vector* sigmas,
ae_state *_state);
double dsgetmeanmindistance(/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_int_t nvars,
ae_state *_state);
void dstie(/* Real */ ae_vector* a,
ae_int_t n,
/* Integer */ ae_vector* ties,
ae_int_t* tiecount,
/* Integer */ ae_vector* p1,
/* Integer */ ae_vector* p2,
ae_state *_state);
void dstiefasti(/* Real */ ae_vector* a,
/* Integer */ ae_vector* b,
ae_int_t n,
/* Integer */ ae_vector* ties,
ae_int_t* tiecount,
/* Real */ ae_vector* bufr,
/* Integer */ ae_vector* bufi,
ae_state *_state);
void dsoptimalsplit2(/* Real */ ae_vector* a,
/* Integer */ ae_vector* c,
ae_int_t n,
ae_int_t* info,
double* threshold,
double* pal,
double* pbl,
double* par,
double* pbr,
double* cve,
ae_state *_state);
void dsoptimalsplit2fast(/* Real */ ae_vector* a,
/* Integer */ ae_vector* c,
/* Integer */ ae_vector* tiesbuf,
/* Integer */ ae_vector* cntbuf,
/* Real */ ae_vector* bufr,
/* Integer */ ae_vector* bufi,
ae_int_t n,
ae_int_t nc,
double alpha,
ae_int_t* info,
double* threshold,
double* rms,
double* cvrms,
ae_state *_state);
void dssplitk(/* Real */ ae_vector* a,
/* Integer */ ae_vector* c,
ae_int_t n,
ae_int_t nc,
ae_int_t kmax,
ae_int_t* info,
/* Real */ ae_vector* thresholds,
ae_int_t* ni,
double* cve,
ae_state *_state);
void dsoptimalsplitk(/* Real */ ae_vector* a,
/* Integer */ ae_vector* c,
ae_int_t n,
ae_int_t nc,
ae_int_t kmax,
ae_int_t* info,
/* Real */ ae_vector* thresholds,
ae_int_t* ni,
double* cve,
ae_state *_state);
void _cvreport_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _cvreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _cvreport_clear(void* _p);
void _cvreport_destroy(void* _p);
#endif
#if defined(AE_COMPILE_MLPBASE) || !defined(AE_PARTIAL_BUILD)
ae_int_t mlpgradsplitcost(ae_state *_state);
ae_int_t mlpgradsplitsize(ae_state *_state);
void mlpcreate0(ae_int_t nin,
ae_int_t nout,
multilayerperceptron* network,
ae_state *_state);
void mlpcreate1(ae_int_t nin,
ae_int_t nhid,
ae_int_t nout,
multilayerperceptron* network,
ae_state *_state);
void mlpcreate2(ae_int_t nin,
ae_int_t nhid1,
ae_int_t nhid2,
ae_int_t nout,
multilayerperceptron* network,
ae_state *_state);
void mlpcreateb0(ae_int_t nin,
ae_int_t nout,
double b,
double d,
multilayerperceptron* network,
ae_state *_state);
void mlpcreateb1(ae_int_t nin,
ae_int_t nhid,
ae_int_t nout,
double b,
double d,
multilayerperceptron* network,
ae_state *_state);
void mlpcreateb2(ae_int_t nin,
ae_int_t nhid1,
ae_int_t nhid2,
ae_int_t nout,
double b,
double d,
multilayerperceptron* network,
ae_state *_state);
void mlpcreater0(ae_int_t nin,
ae_int_t nout,
double a,
double b,
multilayerperceptron* network,
ae_state *_state);
void mlpcreater1(ae_int_t nin,
ae_int_t nhid,
ae_int_t nout,
double a,
double b,
multilayerperceptron* network,
ae_state *_state);
void mlpcreater2(ae_int_t nin,
ae_int_t nhid1,
ae_int_t nhid2,
ae_int_t nout,
double a,
double b,
multilayerperceptron* network,
ae_state *_state);
void mlpcreatec0(ae_int_t nin,
ae_int_t nout,
multilayerperceptron* network,
ae_state *_state);
void mlpcreatec1(ae_int_t nin,
ae_int_t nhid,
ae_int_t nout,
multilayerperceptron* network,
ae_state *_state);
void mlpcreatec2(ae_int_t nin,
ae_int_t nhid1,
ae_int_t nhid2,
ae_int_t nout,
multilayerperceptron* network,
ae_state *_state);
void mlpcopy(multilayerperceptron* network1,
multilayerperceptron* network2,
ae_state *_state);
void mlpcopyshared(multilayerperceptron* network1,
multilayerperceptron* network2,
ae_state *_state);
ae_bool mlpsamearchitecture(multilayerperceptron* network1,
multilayerperceptron* network2,
ae_state *_state);
void mlpcopytunableparameters(multilayerperceptron* network1,
multilayerperceptron* network2,
ae_state *_state);
void mlpexporttunableparameters(multilayerperceptron* network,
/* Real */ ae_vector* p,
ae_int_t* pcount,
ae_state *_state);
void mlpimporttunableparameters(multilayerperceptron* network,
/* Real */ ae_vector* p,
ae_state *_state);
void mlpserializeold(multilayerperceptron* network,
/* Real */ ae_vector* ra,
ae_int_t* rlen,
ae_state *_state);
void mlpunserializeold(/* Real */ ae_vector* ra,
multilayerperceptron* network,
ae_state *_state);
void mlprandomize(multilayerperceptron* network, ae_state *_state);
void mlprandomizefull(multilayerperceptron* network, ae_state *_state);
void mlpinitpreprocessor(multilayerperceptron* network,
/* Real */ ae_matrix* xy,
ae_int_t ssize,
ae_state *_state);
void mlpinitpreprocessorsparse(multilayerperceptron* network,
sparsematrix* xy,
ae_int_t ssize,
ae_state *_state);
void mlpinitpreprocessorsubset(multilayerperceptron* network,
/* Real */ ae_matrix* xy,
ae_int_t setsize,
/* Integer */ ae_vector* idx,
ae_int_t subsetsize,
ae_state *_state);
void mlpinitpreprocessorsparsesubset(multilayerperceptron* network,
sparsematrix* xy,
ae_int_t setsize,
/* Integer */ ae_vector* idx,
ae_int_t subsetsize,
ae_state *_state);
void mlpproperties(multilayerperceptron* network,
ae_int_t* nin,
ae_int_t* nout,
ae_int_t* wcount,
ae_state *_state);
ae_int_t mlpntotal(multilayerperceptron* network, ae_state *_state);
ae_int_t mlpgetinputscount(multilayerperceptron* network,
ae_state *_state);
ae_int_t mlpgetoutputscount(multilayerperceptron* network,
ae_state *_state);
ae_int_t mlpgetweightscount(multilayerperceptron* network,
ae_state *_state);
ae_bool mlpissoftmax(multilayerperceptron* network, ae_state *_state);
ae_int_t mlpgetlayerscount(multilayerperceptron* network,
ae_state *_state);
ae_int_t mlpgetlayersize(multilayerperceptron* network,
ae_int_t k,
ae_state *_state);
void mlpgetinputscaling(multilayerperceptron* network,
ae_int_t i,
double* mean,
double* sigma,
ae_state *_state);
void mlpgetoutputscaling(multilayerperceptron* network,
ae_int_t i,
double* mean,
double* sigma,
ae_state *_state);
void mlpgetneuroninfo(multilayerperceptron* network,
ae_int_t k,
ae_int_t i,
ae_int_t* fkind,
double* threshold,
ae_state *_state);
double mlpgetweight(multilayerperceptron* network,
ae_int_t k0,
ae_int_t i0,
ae_int_t k1,
ae_int_t i1,
ae_state *_state);
void mlpsetinputscaling(multilayerperceptron* network,
ae_int_t i,
double mean,
double sigma,
ae_state *_state);
void mlpsetoutputscaling(multilayerperceptron* network,
ae_int_t i,
double mean,
double sigma,
ae_state *_state);
void mlpsetneuroninfo(multilayerperceptron* network,
ae_int_t k,
ae_int_t i,
ae_int_t fkind,
double threshold,
ae_state *_state);
void mlpsetweight(multilayerperceptron* network,
ae_int_t k0,
ae_int_t i0,
ae_int_t k1,
ae_int_t i1,
double w,
ae_state *_state);
void mlpactivationfunction(double net,
ae_int_t k,
double* f,
double* df,
double* d2f,
ae_state *_state);
void mlpprocess(multilayerperceptron* network,
/* Real */ ae_vector* x,
/* Real */ ae_vector* y,
ae_state *_state);
void mlpprocessi(multilayerperceptron* network,
/* Real */ ae_vector* x,
/* Real */ ae_vector* y,
ae_state *_state);
double mlperror(multilayerperceptron* network,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double mlperrorsparse(multilayerperceptron* network,
sparsematrix* xy,
ae_int_t npoints,
ae_state *_state);
double mlperrorn(multilayerperceptron* network,
/* Real */ ae_matrix* xy,
ae_int_t ssize,
ae_state *_state);
ae_int_t mlpclserror(multilayerperceptron* network,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double mlprelclserror(multilayerperceptron* network,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double mlprelclserrorsparse(multilayerperceptron* network,
sparsematrix* xy,
ae_int_t npoints,
ae_state *_state);
double mlpavgce(multilayerperceptron* network,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double mlpavgcesparse(multilayerperceptron* network,
sparsematrix* xy,
ae_int_t npoints,
ae_state *_state);
double mlprmserror(multilayerperceptron* network,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double mlprmserrorsparse(multilayerperceptron* network,
sparsematrix* xy,
ae_int_t npoints,
ae_state *_state);
double mlpavgerror(multilayerperceptron* network,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double mlpavgerrorsparse(multilayerperceptron* network,
sparsematrix* xy,
ae_int_t npoints,
ae_state *_state);
double mlpavgrelerror(multilayerperceptron* network,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double mlpavgrelerrorsparse(multilayerperceptron* network,
sparsematrix* xy,
ae_int_t npoints,
ae_state *_state);
void mlpgrad(multilayerperceptron* network,
/* Real */ ae_vector* x,
/* Real */ ae_vector* desiredy,
double* e,
/* Real */ ae_vector* grad,
ae_state *_state);
void mlpgradn(multilayerperceptron* network,
/* Real */ ae_vector* x,
/* Real */ ae_vector* desiredy,
double* e,
/* Real */ ae_vector* grad,
ae_state *_state);
void mlpgradbatch(multilayerperceptron* network,
/* Real */ ae_matrix* xy,
ae_int_t ssize,
double* e,
/* Real */ ae_vector* grad,
ae_state *_state);
void mlpgradbatchsparse(multilayerperceptron* network,
sparsematrix* xy,
ae_int_t ssize,
double* e,
/* Real */ ae_vector* grad,
ae_state *_state);
void mlpgradbatchsubset(multilayerperceptron* network,
/* Real */ ae_matrix* xy,
ae_int_t setsize,
/* Integer */ ae_vector* idx,
ae_int_t subsetsize,
double* e,
/* Real */ ae_vector* grad,
ae_state *_state);
void mlpgradbatchsparsesubset(multilayerperceptron* network,
sparsematrix* xy,
ae_int_t setsize,
/* Integer */ ae_vector* idx,
ae_int_t subsetsize,
double* e,
/* Real */ ae_vector* grad,
ae_state *_state);
void mlpgradbatchx(multilayerperceptron* network,
/* Real */ ae_matrix* densexy,
sparsematrix* sparsexy,
ae_int_t datasetsize,
ae_int_t datasettype,
/* Integer */ ae_vector* idx,
ae_int_t subset0,
ae_int_t subset1,
ae_int_t subsettype,
ae_shared_pool* buf,
ae_shared_pool* gradbuf,
ae_state *_state);
ae_bool _trypexec_mlpgradbatchx(multilayerperceptron* network,
/* Real */ ae_matrix* densexy,
sparsematrix* sparsexy,
ae_int_t datasetsize,
ae_int_t datasettype,
/* Integer */ ae_vector* idx,
ae_int_t subset0,
ae_int_t subset1,
ae_int_t subsettype,
ae_shared_pool* buf,
ae_shared_pool* gradbuf, ae_state *_state);
void mlpgradnbatch(multilayerperceptron* network,
/* Real */ ae_matrix* xy,
ae_int_t ssize,
double* e,
/* Real */ ae_vector* grad,
ae_state *_state);
void mlphessiannbatch(multilayerperceptron* network,
/* Real */ ae_matrix* xy,
ae_int_t ssize,
double* e,
/* Real */ ae_vector* grad,
/* Real */ ae_matrix* h,
ae_state *_state);
void mlphessianbatch(multilayerperceptron* network,
/* Real */ ae_matrix* xy,
ae_int_t ssize,
double* e,
/* Real */ ae_vector* grad,
/* Real */ ae_matrix* h,
ae_state *_state);
void mlpinternalprocessvector(/* Integer */ ae_vector* structinfo,
/* Real */ ae_vector* weights,
/* Real */ ae_vector* columnmeans,
/* Real */ ae_vector* columnsigmas,
/* Real */ ae_vector* neurons,
/* Real */ ae_vector* dfdnet,
/* Real */ ae_vector* x,
/* Real */ ae_vector* y,
ae_state *_state);
void mlpalloc(ae_serializer* s,
multilayerperceptron* network,
ae_state *_state);
void mlpserialize(ae_serializer* s,
multilayerperceptron* network,
ae_state *_state);
void mlpunserialize(ae_serializer* s,
multilayerperceptron* network,
ae_state *_state);
void mlpallerrorssubset(multilayerperceptron* network,
/* Real */ ae_matrix* xy,
ae_int_t setsize,
/* Integer */ ae_vector* subset,
ae_int_t subsetsize,
modelerrors* rep,
ae_state *_state);
void mlpallerrorssparsesubset(multilayerperceptron* network,
sparsematrix* xy,
ae_int_t setsize,
/* Integer */ ae_vector* subset,
ae_int_t subsetsize,
modelerrors* rep,
ae_state *_state);
double mlperrorsubset(multilayerperceptron* network,
/* Real */ ae_matrix* xy,
ae_int_t setsize,
/* Integer */ ae_vector* subset,
ae_int_t subsetsize,
ae_state *_state);
double mlperrorsparsesubset(multilayerperceptron* network,
sparsematrix* xy,
ae_int_t setsize,
/* Integer */ ae_vector* subset,
ae_int_t subsetsize,
ae_state *_state);
void mlpallerrorsx(multilayerperceptron* network,
/* Real */ ae_matrix* densexy,
sparsematrix* sparsexy,
ae_int_t datasetsize,
ae_int_t datasettype,
/* Integer */ ae_vector* idx,
ae_int_t subset0,
ae_int_t subset1,
ae_int_t subsettype,
ae_shared_pool* buf,
modelerrors* rep,
ae_state *_state);
ae_bool _trypexec_mlpallerrorsx(multilayerperceptron* network,
/* Real */ ae_matrix* densexy,
sparsematrix* sparsexy,
ae_int_t datasetsize,
ae_int_t datasettype,
/* Integer */ ae_vector* idx,
ae_int_t subset0,
ae_int_t subset1,
ae_int_t subsettype,
ae_shared_pool* buf,
modelerrors* rep, ae_state *_state);
void _modelerrors_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _modelerrors_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _modelerrors_clear(void* _p);
void _modelerrors_destroy(void* _p);
void _smlpgrad_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _smlpgrad_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _smlpgrad_clear(void* _p);
void _smlpgrad_destroy(void* _p);
void _multilayerperceptron_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _multilayerperceptron_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _multilayerperceptron_clear(void* _p);
void _multilayerperceptron_destroy(void* _p);
#endif
#if defined(AE_COMPILE_LDA) || !defined(AE_PARTIAL_BUILD)
void fisherlda(/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_int_t nvars,
ae_int_t nclasses,
ae_int_t* info,
/* Real */ ae_vector* w,
ae_state *_state);
void fisherldan(/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_int_t nvars,
ae_int_t nclasses,
ae_int_t* info,
/* Real */ ae_matrix* w,
ae_state *_state);
#endif
#if defined(AE_COMPILE_SSA) || !defined(AE_PARTIAL_BUILD)
void ssacreate(ssamodel* s, ae_state *_state);
void ssasetwindow(ssamodel* s, ae_int_t windowwidth, ae_state *_state);
void ssasetseed(ssamodel* s, ae_int_t seed, ae_state *_state);
void ssasetpoweruplength(ssamodel* s, ae_int_t pwlen, ae_state *_state);
void ssasetmemorylimit(ssamodel* s, ae_int_t memlimit, ae_state *_state);
void ssaaddsequence(ssamodel* s,
/* Real */ ae_vector* x,
ae_int_t n,
ae_state *_state);
void ssaappendpointandupdate(ssamodel* s,
double x,
double updateits,
ae_state *_state);
void ssaappendsequenceandupdate(ssamodel* s,
/* Real */ ae_vector* x,
ae_int_t nticks,
double updateits,
ae_state *_state);
void ssasetalgoprecomputed(ssamodel* s,
/* Real */ ae_matrix* a,
ae_int_t windowwidth,
ae_int_t nbasis,
ae_state *_state);
void ssasetalgotopkdirect(ssamodel* s, ae_int_t topk, ae_state *_state);
void ssasetalgotopkrealtime(ssamodel* s, ae_int_t topk, ae_state *_state);
void ssacleardata(ssamodel* s, ae_state *_state);
void ssagetbasis(ssamodel* s,
/* Real */ ae_matrix* a,
/* Real */ ae_vector* sv,
ae_int_t* windowwidth,
ae_int_t* nbasis,
ae_state *_state);
void ssagetlrr(ssamodel* s,
/* Real */ ae_vector* a,
ae_int_t* windowwidth,
ae_state *_state);
void ssaanalyzelastwindow(ssamodel* s,
/* Real */ ae_vector* trend,
/* Real */ ae_vector* noise,
ae_int_t* nticks,
ae_state *_state);
void ssaanalyzelast(ssamodel* s,
ae_int_t nticks,
/* Real */ ae_vector* trend,
/* Real */ ae_vector* noise,
ae_state *_state);
void ssaanalyzesequence(ssamodel* s,
/* Real */ ae_vector* data,
ae_int_t nticks,
/* Real */ ae_vector* trend,
/* Real */ ae_vector* noise,
ae_state *_state);
void ssaforecastlast(ssamodel* s,
ae_int_t nticks,
/* Real */ ae_vector* trend,
ae_state *_state);
void ssaforecastsequence(ssamodel* s,
/* Real */ ae_vector* data,
ae_int_t datalen,
ae_int_t forecastlen,
ae_bool applysmoothing,
/* Real */ ae_vector* trend,
ae_state *_state);
void ssaforecastavglast(ssamodel* s,
ae_int_t m,
ae_int_t nticks,
/* Real */ ae_vector* trend,
ae_state *_state);
void ssaforecastavgsequence(ssamodel* s,
/* Real */ ae_vector* data,
ae_int_t datalen,
ae_int_t m,
ae_int_t forecastlen,
ae_bool applysmoothing,
/* Real */ ae_vector* trend,
ae_state *_state);
void _ssamodel_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _ssamodel_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _ssamodel_clear(void* _p);
void _ssamodel_destroy(void* _p);
#endif
#if defined(AE_COMPILE_LINREG) || !defined(AE_PARTIAL_BUILD)
void lrbuild(/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_int_t nvars,
ae_int_t* info,
linearmodel* lm,
lrreport* ar,
ae_state *_state);
void lrbuilds(/* Real */ ae_matrix* xy,
/* Real */ ae_vector* s,
ae_int_t npoints,
ae_int_t nvars,
ae_int_t* info,
linearmodel* lm,
lrreport* ar,
ae_state *_state);
void lrbuildzs(/* Real */ ae_matrix* xy,
/* Real */ ae_vector* s,
ae_int_t npoints,
ae_int_t nvars,
ae_int_t* info,
linearmodel* lm,
lrreport* ar,
ae_state *_state);
void lrbuildz(/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_int_t nvars,
ae_int_t* info,
linearmodel* lm,
lrreport* ar,
ae_state *_state);
void lrunpack(linearmodel* lm,
/* Real */ ae_vector* v,
ae_int_t* nvars,
ae_state *_state);
void lrpack(/* Real */ ae_vector* v,
ae_int_t nvars,
linearmodel* lm,
ae_state *_state);
double lrprocess(linearmodel* lm,
/* Real */ ae_vector* x,
ae_state *_state);
double lrrmserror(linearmodel* lm,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double lravgerror(linearmodel* lm,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double lravgrelerror(linearmodel* lm,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
void lrcopy(linearmodel* lm1, linearmodel* lm2, ae_state *_state);
void lrlines(/* Real */ ae_matrix* xy,
/* Real */ ae_vector* s,
ae_int_t n,
ae_int_t* info,
double* a,
double* b,
double* vara,
double* varb,
double* covab,
double* corrab,
double* p,
ae_state *_state);
void lrline(/* Real */ ae_matrix* xy,
ae_int_t n,
ae_int_t* info,
double* a,
double* b,
ae_state *_state);
void _linearmodel_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _linearmodel_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _linearmodel_clear(void* _p);
void _linearmodel_destroy(void* _p);
void _lrreport_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _lrreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _lrreport_clear(void* _p);
void _lrreport_destroy(void* _p);
#endif
#if defined(AE_COMPILE_FILTERS) || !defined(AE_PARTIAL_BUILD)
void filtersma(/* Real */ ae_vector* x,
ae_int_t n,
ae_int_t k,
ae_state *_state);
void filterema(/* Real */ ae_vector* x,
ae_int_t n,
double alpha,
ae_state *_state);
void filterlrma(/* Real */ ae_vector* x,
ae_int_t n,
ae_int_t k,
ae_state *_state);
#endif
#if defined(AE_COMPILE_LOGIT) || !defined(AE_PARTIAL_BUILD)
void mnltrainh(/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_int_t nvars,
ae_int_t nclasses,
ae_int_t* info,
logitmodel* lm,
mnlreport* rep,
ae_state *_state);
void mnlprocess(logitmodel* lm,
/* Real */ ae_vector* x,
/* Real */ ae_vector* y,
ae_state *_state);
void mnlprocessi(logitmodel* lm,
/* Real */ ae_vector* x,
/* Real */ ae_vector* y,
ae_state *_state);
void mnlunpack(logitmodel* lm,
/* Real */ ae_matrix* a,
ae_int_t* nvars,
ae_int_t* nclasses,
ae_state *_state);
void mnlpack(/* Real */ ae_matrix* a,
ae_int_t nvars,
ae_int_t nclasses,
logitmodel* lm,
ae_state *_state);
void mnlcopy(logitmodel* lm1, logitmodel* lm2, ae_state *_state);
double mnlavgce(logitmodel* lm,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double mnlrelclserror(logitmodel* lm,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double mnlrmserror(logitmodel* lm,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double mnlavgerror(logitmodel* lm,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double mnlavgrelerror(logitmodel* lm,
/* Real */ ae_matrix* xy,
ae_int_t ssize,
ae_state *_state);
ae_int_t mnlclserror(logitmodel* lm,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
void _logitmodel_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _logitmodel_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _logitmodel_clear(void* _p);
void _logitmodel_destroy(void* _p);
void _logitmcstate_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _logitmcstate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _logitmcstate_clear(void* _p);
void _logitmcstate_destroy(void* _p);
void _mnlreport_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _mnlreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _mnlreport_clear(void* _p);
void _mnlreport_destroy(void* _p);
#endif
#if defined(AE_COMPILE_MCPD) || !defined(AE_PARTIAL_BUILD)
void mcpdcreate(ae_int_t n, mcpdstate* s, ae_state *_state);
void mcpdcreateentry(ae_int_t n,
ae_int_t entrystate,
mcpdstate* s,
ae_state *_state);
void mcpdcreateexit(ae_int_t n,
ae_int_t exitstate,
mcpdstate* s,
ae_state *_state);
void mcpdcreateentryexit(ae_int_t n,
ae_int_t entrystate,
ae_int_t exitstate,
mcpdstate* s,
ae_state *_state);
void mcpdaddtrack(mcpdstate* s,
/* Real */ ae_matrix* xy,
ae_int_t k,
ae_state *_state);
void mcpdsetec(mcpdstate* s,
/* Real */ ae_matrix* ec,
ae_state *_state);
void mcpdaddec(mcpdstate* s,
ae_int_t i,
ae_int_t j,
double c,
ae_state *_state);
void mcpdsetbc(mcpdstate* s,
/* Real */ ae_matrix* bndl,
/* Real */ ae_matrix* bndu,
ae_state *_state);
void mcpdaddbc(mcpdstate* s,
ae_int_t i,
ae_int_t j,
double bndl,
double bndu,
ae_state *_state);
void mcpdsetlc(mcpdstate* s,
/* Real */ ae_matrix* c,
/* Integer */ ae_vector* ct,
ae_int_t k,
ae_state *_state);
void mcpdsettikhonovregularizer(mcpdstate* s, double v, ae_state *_state);
void mcpdsetprior(mcpdstate* s,
/* Real */ ae_matrix* pp,
ae_state *_state);
void mcpdsetpredictionweights(mcpdstate* s,
/* Real */ ae_vector* pw,
ae_state *_state);
void mcpdsolve(mcpdstate* s, ae_state *_state);
void mcpdresults(mcpdstate* s,
/* Real */ ae_matrix* p,
mcpdreport* rep,
ae_state *_state);
void _mcpdstate_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _mcpdstate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _mcpdstate_clear(void* _p);
void _mcpdstate_destroy(void* _p);
void _mcpdreport_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _mcpdreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _mcpdreport_clear(void* _p);
void _mcpdreport_destroy(void* _p);
#endif
#if defined(AE_COMPILE_MLPE) || !defined(AE_PARTIAL_BUILD)
void mlpecreate0(ae_int_t nin,
ae_int_t nout,
ae_int_t ensemblesize,
mlpensemble* ensemble,
ae_state *_state);
void mlpecreate1(ae_int_t nin,
ae_int_t nhid,
ae_int_t nout,
ae_int_t ensemblesize,
mlpensemble* ensemble,
ae_state *_state);
void mlpecreate2(ae_int_t nin,
ae_int_t nhid1,
ae_int_t nhid2,
ae_int_t nout,
ae_int_t ensemblesize,
mlpensemble* ensemble,
ae_state *_state);
void mlpecreateb0(ae_int_t nin,
ae_int_t nout,
double b,
double d,
ae_int_t ensemblesize,
mlpensemble* ensemble,
ae_state *_state);
void mlpecreateb1(ae_int_t nin,
ae_int_t nhid,
ae_int_t nout,
double b,
double d,
ae_int_t ensemblesize,
mlpensemble* ensemble,
ae_state *_state);
void mlpecreateb2(ae_int_t nin,
ae_int_t nhid1,
ae_int_t nhid2,
ae_int_t nout,
double b,
double d,
ae_int_t ensemblesize,
mlpensemble* ensemble,
ae_state *_state);
void mlpecreater0(ae_int_t nin,
ae_int_t nout,
double a,
double b,
ae_int_t ensemblesize,
mlpensemble* ensemble,
ae_state *_state);
void mlpecreater1(ae_int_t nin,
ae_int_t nhid,
ae_int_t nout,
double a,
double b,
ae_int_t ensemblesize,
mlpensemble* ensemble,
ae_state *_state);
void mlpecreater2(ae_int_t nin,
ae_int_t nhid1,
ae_int_t nhid2,
ae_int_t nout,
double a,
double b,
ae_int_t ensemblesize,
mlpensemble* ensemble,
ae_state *_state);
void mlpecreatec0(ae_int_t nin,
ae_int_t nout,
ae_int_t ensemblesize,
mlpensemble* ensemble,
ae_state *_state);
void mlpecreatec1(ae_int_t nin,
ae_int_t nhid,
ae_int_t nout,
ae_int_t ensemblesize,
mlpensemble* ensemble,
ae_state *_state);
void mlpecreatec2(ae_int_t nin,
ae_int_t nhid1,
ae_int_t nhid2,
ae_int_t nout,
ae_int_t ensemblesize,
mlpensemble* ensemble,
ae_state *_state);
void mlpecreatefromnetwork(multilayerperceptron* network,
ae_int_t ensemblesize,
mlpensemble* ensemble,
ae_state *_state);
void mlpecopy(mlpensemble* ensemble1,
mlpensemble* ensemble2,
ae_state *_state);
void mlperandomize(mlpensemble* ensemble, ae_state *_state);
void mlpeproperties(mlpensemble* ensemble,
ae_int_t* nin,
ae_int_t* nout,
ae_state *_state);
ae_bool mlpeissoftmax(mlpensemble* ensemble, ae_state *_state);
void mlpeprocess(mlpensemble* ensemble,
/* Real */ ae_vector* x,
/* Real */ ae_vector* y,
ae_state *_state);
void mlpeprocessi(mlpensemble* ensemble,
/* Real */ ae_vector* x,
/* Real */ ae_vector* y,
ae_state *_state);
void mlpeallerrorsx(mlpensemble* ensemble,
/* Real */ ae_matrix* densexy,
sparsematrix* sparsexy,
ae_int_t datasetsize,
ae_int_t datasettype,
/* Integer */ ae_vector* idx,
ae_int_t subset0,
ae_int_t subset1,
ae_int_t subsettype,
ae_shared_pool* buf,
modelerrors* rep,
ae_state *_state);
void mlpeallerrorssparse(mlpensemble* ensemble,
sparsematrix* xy,
ae_int_t npoints,
double* relcls,
double* avgce,
double* rms,
double* avg,
double* avgrel,
ae_state *_state);
double mlperelclserror(mlpensemble* ensemble,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double mlpeavgce(mlpensemble* ensemble,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double mlpermserror(mlpensemble* ensemble,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double mlpeavgerror(mlpensemble* ensemble,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double mlpeavgrelerror(mlpensemble* ensemble,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
void mlpealloc(ae_serializer* s, mlpensemble* ensemble, ae_state *_state);
void mlpeserialize(ae_serializer* s,
mlpensemble* ensemble,
ae_state *_state);
void mlpeunserialize(ae_serializer* s,
mlpensemble* ensemble,
ae_state *_state);
void _mlpensemble_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _mlpensemble_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _mlpensemble_clear(void* _p);
void _mlpensemble_destroy(void* _p);
#endif
#if defined(AE_COMPILE_MLPTRAIN) || !defined(AE_PARTIAL_BUILD)
void mlptrainlm(multilayerperceptron* network,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
double decay,
ae_int_t restarts,
ae_int_t* info,
mlpreport* rep,
ae_state *_state);
void mlptrainlbfgs(multilayerperceptron* network,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
double decay,
ae_int_t restarts,
double wstep,
ae_int_t maxits,
ae_int_t* info,
mlpreport* rep,
ae_state *_state);
void mlptraines(multilayerperceptron* network,
/* Real */ ae_matrix* trnxy,
ae_int_t trnsize,
/* Real */ ae_matrix* valxy,
ae_int_t valsize,
double decay,
ae_int_t restarts,
ae_int_t* info,
mlpreport* rep,
ae_state *_state);
void mlpkfoldcvlbfgs(multilayerperceptron* network,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
double decay,
ae_int_t restarts,
double wstep,
ae_int_t maxits,
ae_int_t foldscount,
ae_int_t* info,
mlpreport* rep,
mlpcvreport* cvrep,
ae_state *_state);
void mlpkfoldcvlm(multilayerperceptron* network,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
double decay,
ae_int_t restarts,
ae_int_t foldscount,
ae_int_t* info,
mlpreport* rep,
mlpcvreport* cvrep,
ae_state *_state);
void mlpkfoldcv(mlptrainer* s,
multilayerperceptron* network,
ae_int_t nrestarts,
ae_int_t foldscount,
mlpreport* rep,
ae_state *_state);
void mlpcreatetrainer(ae_int_t nin,
ae_int_t nout,
mlptrainer* s,
ae_state *_state);
void mlpcreatetrainercls(ae_int_t nin,
ae_int_t nclasses,
mlptrainer* s,
ae_state *_state);
void mlpsetdataset(mlptrainer* s,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
void mlpsetsparsedataset(mlptrainer* s,
sparsematrix* xy,
ae_int_t npoints,
ae_state *_state);
void mlpsetdecay(mlptrainer* s, double decay, ae_state *_state);
void mlpsetcond(mlptrainer* s,
double wstep,
ae_int_t maxits,
ae_state *_state);
void mlpsetalgobatch(mlptrainer* s, ae_state *_state);
void mlptrainnetwork(mlptrainer* s,
multilayerperceptron* network,
ae_int_t nrestarts,
mlpreport* rep,
ae_state *_state);
void mlpstarttraining(mlptrainer* s,
multilayerperceptron* network,
ae_bool randomstart,
ae_state *_state);
ae_bool mlpcontinuetraining(mlptrainer* s,
multilayerperceptron* network,
ae_state *_state);
void mlpebagginglm(mlpensemble* ensemble,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
double decay,
ae_int_t restarts,
ae_int_t* info,
mlpreport* rep,
mlpcvreport* ooberrors,
ae_state *_state);
void mlpebagginglbfgs(mlpensemble* ensemble,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
double decay,
ae_int_t restarts,
double wstep,
ae_int_t maxits,
ae_int_t* info,
mlpreport* rep,
mlpcvreport* ooberrors,
ae_state *_state);
void mlpetraines(mlpensemble* ensemble,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
double decay,
ae_int_t restarts,
ae_int_t* info,
mlpreport* rep,
ae_state *_state);
void mlptrainensemblees(mlptrainer* s,
mlpensemble* ensemble,
ae_int_t nrestarts,
mlpreport* rep,
ae_state *_state);
void _mlpreport_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _mlpreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _mlpreport_clear(void* _p);
void _mlpreport_destroy(void* _p);
void _mlpcvreport_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _mlpcvreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _mlpcvreport_clear(void* _p);
void _mlpcvreport_destroy(void* _p);
void _smlptrnsession_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _smlptrnsession_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _smlptrnsession_clear(void* _p);
void _smlptrnsession_destroy(void* _p);
void _mlpetrnsession_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _mlpetrnsession_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _mlpetrnsession_clear(void* _p);
void _mlpetrnsession_destroy(void* _p);
void _mlptrainer_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _mlptrainer_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _mlptrainer_clear(void* _p);
void _mlptrainer_destroy(void* _p);
void _mlpparallelizationcv_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _mlpparallelizationcv_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _mlpparallelizationcv_clear(void* _p);
void _mlpparallelizationcv_destroy(void* _p);
#endif
#if defined(AE_COMPILE_CLUSTERING) || !defined(AE_PARTIAL_BUILD)
void clusterizercreate(clusterizerstate* s, ae_state *_state);
void clusterizersetpoints(clusterizerstate* s,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_int_t nfeatures,
ae_int_t disttype,
ae_state *_state);
void clusterizersetdistances(clusterizerstate* s,
/* Real */ ae_matrix* d,
ae_int_t npoints,
ae_bool isupper,
ae_state *_state);
void clusterizersetahcalgo(clusterizerstate* s,
ae_int_t algo,
ae_state *_state);
void clusterizersetkmeanslimits(clusterizerstate* s,
ae_int_t restarts,
ae_int_t maxits,
ae_state *_state);
void clusterizersetkmeansinit(clusterizerstate* s,
ae_int_t initalgo,
ae_state *_state);
void clusterizersetseed(clusterizerstate* s,
ae_int_t seed,
ae_state *_state);
void clusterizerrunahc(clusterizerstate* s,
ahcreport* rep,
ae_state *_state);
void clusterizerrunkmeans(clusterizerstate* s,
ae_int_t k,
kmeansreport* rep,
ae_state *_state);
void clusterizergetdistances(/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_int_t nfeatures,
ae_int_t disttype,
/* Real */ ae_matrix* d,
ae_state *_state);
void clusterizergetdistancesbuf(apbuffers* buf,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_int_t nfeatures,
ae_int_t disttype,
/* Real */ ae_matrix* d,
ae_state *_state);
void clusterizergetkclusters(ahcreport* rep,
ae_int_t k,
/* Integer */ ae_vector* cidx,
/* Integer */ ae_vector* cz,
ae_state *_state);
void clusterizerseparatedbydist(ahcreport* rep,
double r,
ae_int_t* k,
/* Integer */ ae_vector* cidx,
/* Integer */ ae_vector* cz,
ae_state *_state);
void clusterizerseparatedbycorr(ahcreport* rep,
double r,
ae_int_t* k,
/* Integer */ ae_vector* cidx,
/* Integer */ ae_vector* cz,
ae_state *_state);
void kmeansinitbuf(kmeansbuffers* buf, ae_state *_state);
void kmeansgenerateinternal(/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_int_t nvars,
ae_int_t k,
ae_int_t initalgo,
ae_int_t seed,
ae_int_t maxits,
ae_int_t restarts,
ae_bool kmeansdbgnoits,
ae_int_t* info,
ae_int_t* iterationscount,
/* Real */ ae_matrix* ccol,
ae_bool needccol,
/* Real */ ae_matrix* crow,
ae_bool needcrow,
/* Integer */ ae_vector* xyc,
double* energy,
kmeansbuffers* buf,
ae_state *_state);
void kmeansupdatedistances(/* Real */ ae_matrix* xy,
ae_int_t idx0,
ae_int_t idx1,
ae_int_t nvars,
/* Real */ ae_matrix* ct,
ae_int_t cidx0,
ae_int_t cidx1,
/* Integer */ ae_vector* xyc,
/* Real */ ae_vector* xydist2,
ae_shared_pool* bufferpool,
ae_state *_state);
ae_bool _trypexec_kmeansupdatedistances(/* Real */ ae_matrix* xy,
ae_int_t idx0,
ae_int_t idx1,
ae_int_t nvars,
/* Real */ ae_matrix* ct,
ae_int_t cidx0,
ae_int_t cidx1,
/* Integer */ ae_vector* xyc,
/* Real */ ae_vector* xydist2,
ae_shared_pool* bufferpool, ae_state *_state);
void _kmeansbuffers_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _kmeansbuffers_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _kmeansbuffers_clear(void* _p);
void _kmeansbuffers_destroy(void* _p);
void _clusterizerstate_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _clusterizerstate_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _clusterizerstate_clear(void* _p);
void _clusterizerstate_destroy(void* _p);
void _ahcreport_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _ahcreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _ahcreport_clear(void* _p);
void _ahcreport_destroy(void* _p);
void _kmeansreport_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _kmeansreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _kmeansreport_clear(void* _p);
void _kmeansreport_destroy(void* _p);
#endif
#if defined(AE_COMPILE_DFOREST) || !defined(AE_PARTIAL_BUILD)
void dfcreatebuffer(decisionforest* model,
decisionforestbuffer* buf,
ae_state *_state);
void dfbuildercreate(decisionforestbuilder* s, ae_state *_state);
void dfbuildersetdataset(decisionforestbuilder* s,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_int_t nvars,
ae_int_t nclasses,
ae_state *_state);
void dfbuildersetrndvars(decisionforestbuilder* s,
ae_int_t rndvars,
ae_state *_state);
void dfbuildersetrndvarsratio(decisionforestbuilder* s,
double f,
ae_state *_state);
void dfbuildersetrndvarsauto(decisionforestbuilder* s, ae_state *_state);
void dfbuildersetsubsampleratio(decisionforestbuilder* s,
double f,
ae_state *_state);
void dfbuildersetseed(decisionforestbuilder* s,
ae_int_t seedval,
ae_state *_state);
void dfbuildersetrdfalgo(decisionforestbuilder* s,
ae_int_t algotype,
ae_state *_state);
void dfbuildersetrdfsplitstrength(decisionforestbuilder* s,
ae_int_t splitstrength,
ae_state *_state);
void dfbuildersetimportancetrngini(decisionforestbuilder* s,
ae_state *_state);
void dfbuildersetimportanceoobgini(decisionforestbuilder* s,
ae_state *_state);
void dfbuildersetimportancepermutation(decisionforestbuilder* s,
ae_state *_state);
void dfbuildersetimportancenone(decisionforestbuilder* s,
ae_state *_state);
double dfbuildergetprogress(decisionforestbuilder* s, ae_state *_state);
double dfbuilderpeekprogress(decisionforestbuilder* s, ae_state *_state);
void dfbuilderbuildrandomforest(decisionforestbuilder* s,
ae_int_t ntrees,
decisionforest* df,
dfreport* rep,
ae_state *_state);
double dfbinarycompression(decisionforest* df, ae_state *_state);
double dfbinarycompression8(decisionforest* df, ae_state *_state);
void dfprocess(decisionforest* df,
/* Real */ ae_vector* x,
/* Real */ ae_vector* y,
ae_state *_state);
void dfprocessi(decisionforest* df,
/* Real */ ae_vector* x,
/* Real */ ae_vector* y,
ae_state *_state);
double dfprocess0(decisionforest* model,
/* Real */ ae_vector* x,
ae_state *_state);
ae_int_t dfclassify(decisionforest* model,
/* Real */ ae_vector* x,
ae_state *_state);
void dftsprocess(decisionforest* df,
decisionforestbuffer* buf,
/* Real */ ae_vector* x,
/* Real */ ae_vector* y,
ae_state *_state);
double dfrelclserror(decisionforest* df,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double dfavgce(decisionforest* df,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double dfrmserror(decisionforest* df,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double dfavgerror(decisionforest* df,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double dfavgrelerror(decisionforest* df,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
void dfcopy(decisionforest* df1, decisionforest* df2, ae_state *_state);
void dfalloc(ae_serializer* s, decisionforest* forest, ae_state *_state);
void dfserialize(ae_serializer* s,
decisionforest* forest,
ae_state *_state);
void dfunserialize(ae_serializer* s,
decisionforest* forest,
ae_state *_state);
void dfbuildrandomdecisionforest(/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_int_t nvars,
ae_int_t nclasses,
ae_int_t ntrees,
double r,
ae_int_t* info,
decisionforest* df,
dfreport* rep,
ae_state *_state);
void dfbuildrandomdecisionforestx1(/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_int_t nvars,
ae_int_t nclasses,
ae_int_t ntrees,
ae_int_t nrndvars,
double r,
ae_int_t* info,
decisionforest* df,
dfreport* rep,
ae_state *_state);
void dfbuildinternal(/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_int_t nvars,
ae_int_t nclasses,
ae_int_t ntrees,
ae_int_t samplesize,
ae_int_t nfeatures,
ae_int_t flags,
ae_int_t* info,
decisionforest* df,
dfreport* rep,
ae_state *_state);
void _decisionforestbuilder_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _decisionforestbuilder_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _decisionforestbuilder_clear(void* _p);
void _decisionforestbuilder_destroy(void* _p);
void _dfworkbuf_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _dfworkbuf_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _dfworkbuf_clear(void* _p);
void _dfworkbuf_destroy(void* _p);
void _dfvotebuf_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _dfvotebuf_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _dfvotebuf_clear(void* _p);
void _dfvotebuf_destroy(void* _p);
void _dfpermimpbuf_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _dfpermimpbuf_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _dfpermimpbuf_clear(void* _p);
void _dfpermimpbuf_destroy(void* _p);
void _dftreebuf_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _dftreebuf_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _dftreebuf_clear(void* _p);
void _dftreebuf_destroy(void* _p);
void _decisionforestbuffer_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _decisionforestbuffer_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _decisionforestbuffer_clear(void* _p);
void _decisionforestbuffer_destroy(void* _p);
void _decisionforest_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _decisionforest_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _decisionforest_clear(void* _p);
void _decisionforest_destroy(void* _p);
void _dfreport_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _dfreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _dfreport_clear(void* _p);
void _dfreport_destroy(void* _p);
void _dfinternalbuffers_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _dfinternalbuffers_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _dfinternalbuffers_clear(void* _p);
void _dfinternalbuffers_destroy(void* _p);
#endif
#if defined(AE_COMPILE_KNN) || !defined(AE_PARTIAL_BUILD)
void knncreatebuffer(knnmodel* model, knnbuffer* buf, ae_state *_state);
void knnbuildercreate(knnbuilder* s, ae_state *_state);
void knnbuildersetdatasetreg(knnbuilder* s,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_int_t nvars,
ae_int_t nout,
ae_state *_state);
void knnbuildersetdatasetcls(knnbuilder* s,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_int_t nvars,
ae_int_t nclasses,
ae_state *_state);
void knnbuildersetnorm(knnbuilder* s, ae_int_t nrmtype, ae_state *_state);
void knnbuilderbuildknnmodel(knnbuilder* s,
ae_int_t k,
double eps,
knnmodel* model,
knnreport* rep,
ae_state *_state);
void knnrewritekeps(knnmodel* model,
ae_int_t k,
double eps,
ae_state *_state);
void knnprocess(knnmodel* model,
/* Real */ ae_vector* x,
/* Real */ ae_vector* y,
ae_state *_state);
double knnprocess0(knnmodel* model,
/* Real */ ae_vector* x,
ae_state *_state);
ae_int_t knnclassify(knnmodel* model,
/* Real */ ae_vector* x,
ae_state *_state);
void knnprocessi(knnmodel* model,
/* Real */ ae_vector* x,
/* Real */ ae_vector* y,
ae_state *_state);
void knntsprocess(knnmodel* model,
knnbuffer* buf,
/* Real */ ae_vector* x,
/* Real */ ae_vector* y,
ae_state *_state);
double knnrelclserror(knnmodel* model,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double knnavgce(knnmodel* model,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double knnrmserror(knnmodel* model,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double knnavgerror(knnmodel* model,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
double knnavgrelerror(knnmodel* model,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_state *_state);
void knnallerrors(knnmodel* model,
/* Real */ ae_matrix* xy,
ae_int_t npoints,
knnreport* rep,
ae_state *_state);
void knnalloc(ae_serializer* s, knnmodel* model, ae_state *_state);
void knnserialize(ae_serializer* s, knnmodel* model, ae_state *_state);
void knnunserialize(ae_serializer* s, knnmodel* model, ae_state *_state);
void _knnbuffer_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _knnbuffer_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _knnbuffer_clear(void* _p);
void _knnbuffer_destroy(void* _p);
void _knnbuilder_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _knnbuilder_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _knnbuilder_clear(void* _p);
void _knnbuilder_destroy(void* _p);
void _knnmodel_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _knnmodel_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _knnmodel_clear(void* _p);
void _knnmodel_destroy(void* _p);
void _knnreport_init(void* _p, ae_state *_state, ae_bool make_automatic);
void _knnreport_init_copy(void* _dst, void* _src, ae_state *_state, ae_bool make_automatic);
void _knnreport_clear(void* _p);
void _knnreport_destroy(void* _p);
#endif
#if defined(AE_COMPILE_DATACOMP) || !defined(AE_PARTIAL_BUILD)
void kmeansgenerate(/* Real */ ae_matrix* xy,
ae_int_t npoints,
ae_int_t nvars,
ae_int_t k,
ae_int_t restarts,
ae_int_t* info,
/* Real */ ae_matrix* c,
/* Integer */ ae_vector* xyc,
ae_state *_state);
#endif
}
#endif