#ifndef multiDS_h #define multiDS_h #include #include "../Basics/basic.h" #include "dataSet.h" #include "pattern.h" #include "cutPtsSet.h" /* ******************** class multiDS ******************** This class is a collection of objects of type dataSet. */ tcT class cutPtsSet; // forward tcT class multiDS // a collection of dataSets { public: dataSet* ds; Matrix label; ////////////////// // Constructors // ////////////////// multiDS (); // Default constructor. Create 0 dataSets. multiDS (int nbCategories); // Create nbCategories empty dataSets. multiDS (const multiDS& from, float rate); // Extract a random subset of data from FROM. If rate==1, this is the // copy constructor. multiDS (const multiDS& from, const Array& varIndex); // Constructs a new multiDS by keeping only variables with given indices // from multiDS FROM. // Indices out of {0,...,from.nbVars()-1} are ignored. multiDS (const dataSet& from); multiDS (const dataSet& from, const Matrix& catIndex); // Create several data sets from one. In the first procedure, one set // is created for each different value of attribute indexed ATTRINDEX. // While in the second, only values in CATINDEX are considered. ~multiDS (); // Default destructor. /////////////// // Operators // /////////////// int nbCats() const; // Gives the number of data sets. int nbPairs() const; // # pairs of points from different classes. int distinct() const;// Sum of distinct data in each set. int card() const; // Total number of data. int dim() const; // Dimension of each data. char monotone(const int attr) const; // Return the monotonicity contraint of ATTR. multiDS& checkMult(); // Suppresses repetitions in all data sets. multiDS& sort(); // Sorts all data sets. multiDS& reorganize(); // Reorganizes all data sets. void split(multiDS& first, multiDS& second, float rate) const; // Split the dataset into two datasets, the size of the first one // is given by rate. void partition(Matrix& part, int K) const; // Constructs a partition of K equal parts of the dataset. void setFold(multiDS& data, const Matrix& part, const int fold) const; // If fold>0, set DATA to the fold^th partition of *this, // if fold<0, set DATA to everything but the fold^th partition of *this, // fold \in {-K,...,-1} \union {1,...,K}. boolean isConsistant(const boolean signalIfNot=true) const; // Returns false iff there is two identical data in two different sets or // if there are two data D1 and D2 in sets S1 and S2, S1D2(V) && V!=positiveAttr. Array listOfPairs(boolean compress=false, boolean recreate=false); // Return the list of pairs of points from two different classes. // The result is a 4 columns matrix, each row represents a pair // as follows: in row (c1,p1,c2,p2), ci is the index of the class, and // pi is the index of the point. If compress, the point-in-a-box idea // is applied to reduce the number of pairs. // N.B. each dataset is assumed *sorted*. multiDS& binarize (const cutPtsSet& cps); multiDS& binarize (const cutPtsSet& cps, float safety); // Binarize the data, using one binary attribute per cut point in cp. // If safety is given, each coefficient is either -1, 0 or +1. void mapCube (Matrix& net, const int expectedDim = 10); // Map a hypercube of size expectedDim into the space of data. // The size of net will be 2^expectedDim x this.dim(). multiDS& binarize (const Matrix net); // Binarize the data according to an organized map represented by NET. // NET is a matrix of size 2^p x this.dim(), and after the binarization, // this.dim() will be equal to p. dataSet merge(const boolean class_first=true); // here we merge all the data sets into one data set. multiDS& separate(const dataSet& from); // here we split a dataSet into a multiDS. void smallPatterns(patterns& patList, int& nbNonCovered, const Array& posClass, const Array& negClass, const boolean orientation, int maxSize=4, int minTrue=1, int coverNeeded=10, int gap2F=1, float tolerance=0.0 /*float rateLimit=0.3 -??*/) const; void patchPatterns(patterns& patList, const Array& posClass, const Array& negClass, const boolean orientation, int& puc) const; void printPatFile (patterns& posList, patterns& negList, const Array& posClass, const Array& negClass,ostream& s, int getPat, int pos_neg, Array& stat) const; void cleanPatterns(patterns& patList, const Array& posClass) const; void reducePatterns(patterns& patList, const Array& posClass, int minCover=1) const; void weightPatterns(patterns& patList, const Array& posClass, const Array& negClass, int weighting=1, boolean normalize=true, boolean delZWP=true) const; void balanceWeightPatterns(patterns& posList, patterns& negList, const Array& posClass, const Array& negClass, int method=1) const; int nbErrors(patterns& posList, patterns& negList, float errorTolerance, int& wronglyOn, int& wronglyOff, int& undecidable, char* fileName=NULL) const; int boundOnErrors(const multiDS& basis) const; friend ostream& operator << (ostream& s, const multiDS& myself); boolean normalized; ///////////////////////////// private: int nbSets; Matrix list; }; tcT ostream& operator << (ostream& s, const multiDS& myself); // Outputs a set of data sets. #endif