/* data structure of class cutPtsSet: private: Matrix origin; Matrix weight; Matrix var; Matrix sigmaCP; Matrix span; - *this is a row vector of nbCP() T, value of each cutPtsSet ***times 2***. the cutPtsSets are organized in an increasing order of the variables, and for each variable, in an increasing size. - weight is a row vector of nbCP() float, weight of each cutPtsSet. - var is a row vector of nbCP() int. var(cpIndex) is the index of the attribute associate to the cpIndex-th cutPtsSet. - sigmaCP is a row vector of nbVars() int. sigmaCP(var) := nbCP(var) + sigmaCP(var-1) if var > 0 nbCP(var) if var == 0 - span is a row vector of nbVars() float, span(var) is propotional to the maximal value along var, minus the minimal value along var. - origin is a row vector of nbCP() T, with the original value of each cutPtsSet. This is used only in case of normalization (by a call to normalize), otherwise this is an empty vector. */ #include "cutPtsSet.h" #include #include #include "../Matrices/spareM.h" #include "../Matrices/setCovering.h" #include "main_def.h" #define _CHECK_INDEX // if T is char, short or int #if 1 #define Plus1(x) ((x)+1) #define Div2(x) ((x)>>1) #define Tim2(x) ((x)<<1) // else if T is float or double #else #define Plus1(x) (x) #define Div2(x) ((x)/2.0) #define Tim2(x) ((x)*2.0) #endif //////////////////////////// class cutPtsSet ////////////////////////////////// ////////////////// // Constructors // ////////////////// tcT cutPtsSet::cutPtsSet (int nbCPts, int nbVars) : Matrix(nbCPts), origin(0), weight(nbCPts), var(nbCPts), sigmaCP(nbVars), span(nbVars) { } tcT cutPtsSet& cutPtsSet::resize(int nbCPts, int nbVars) { Matrix::resize(nbCPts); weight.resize(nbCPts); var.resize(nbCPts); sigmaCP.resize(nbVars); span.resize(nbVars); if ( origin.n() ) origin.resize(nbCPts); return *this; } tcT /*inline*/ cutPtsSet::cutPtsSet () : Matrix(0), origin(0), weight(0), var(0), sigmaCP(0), span(0) { } tcT /*inline*/ cutPtsSet::~cutPtsSet () { } tcT cutPtsSet::cutPtsSet (multiDS& d, float& confidence, const char method/**=0**/, const char weighting/**=0**/) : Matrix(0), origin(0), weight(0), var(0), sigmaCP(0), span(0) { if ( method==0 ) // insert a cut point at every change of class along the axis { resize(d.ds[0].dim()*d.distinct(), d.ds[0].dim()); weight=0.0; Matrix valcat(d.distinct(),2); int nbTh=0; for (int attr=0; attr::operator()(nbTh) = val1 + valcat(i,0); if ( weighting==2 ) weight(nbTh) = float(valcat(i,0)-val1)/(2.0*span(attr)); var(nbTh) = attr; sigmaCP(attr)++; nbTh++; prevThresh=valcat(i,0); } } } resize_(nbTh); weight.resize_(nbTh); var.resize_(nbTh); } else if ( method==1 ) // insert a cut point for every pairs of points of different classes { resize(d.ds[0].dim()*d.nbPairs(), d.ds[0].dim()); Matrix my_cut_pts(d.nbPairs()); int nbTh=0; for (int attr=0; attr singleton(1,1,attr); multiDS proj(d,singleton); proj.sort().checkMult(); // introduce one cutPtsSet for each pairs of points from two // different classes int ptr=0; T ll,rr; for (int clL=0; clL merged; merged = proj.merge(false); merged.sort(); for(int i=0; i=0 && unknown(merged(i,0)); i--); maxi=merged(i,0); span(attr) = maxi - mini; } Array::operator()(nbTh) = my_cut_pts(0); var(nbTh) = attr; sigmaCP(attr)++; nbTh++; for (int pth=1; pth::operator()(nbTh) = my_cut_pts(pth); var(nbTh) = attr; sigmaCP(attr)++; nbTh++; } } else span(attr)=0; } resize_(nbTh); weight.resize_(nbTh); var.resize_(nbTh); } // Check whether confidence is not too high Array list(d.listOfPairs()); float minConfi=1.0; for (int pair=0; pair0.0 ) maximize(maxGap, float ((minimum(operator()(pcp)-Tim2(val1), Tim2(val2)-operator()(pcp))) / (2.0 * span(vari)))); } minimize(minConfi,maxGap); } minConfi-=1E-5; if ( confidence > minConfi ) flog<< endl << "The required confidence of " << setprecision(3) << setw(6) << confidence << " has been reset to " << setw(6) << (confidence=minConfi) << endl << flush; // compute the weights if ( weighting==1 ) // correlation with output { for (int pCP=0; pCP cardi(2,d.nbCats(),0.0); for (int set=0; set0, it might happen in some rare occasions weight(pCP) = -1E10; else { cardi /= cardi.sum().t(); double entropyPos=0.0; double entropyNeg=0.0; for (set=0; set0.0 ? cardi(1,set)*log(cardi(1,set)) : 0.0 ); entropyNeg += ( cardi(0,set)>0.0 ? cardi(0,set)*log(cardi(0,set)) : 0.0 ); } weight(pCP) = float(maximum(entropyPos,entropyNeg)); } } } else if ( weighting==2 && method ) { weight=1E10; for (int attr=0; attr singleton(1,1,attr); multiDS proj(d,singleton); proj.sort().checkMult(); for (int clL=0; clL=0 ) for (int pth=first; pth<=last; pth++) minimize(weight(pth),float(minimum( absolute(Tim2(val1)-operator()(pth)), absolute(Tim2(val2)-operator()(pth))))* float(proj.ds[clL].multiplicity(indL))* float(proj.ds[clR].multiplicity(indR))); } } for (int pth=firstIndex(attr); pth singleton(1,1,attr); multiDS proj(d,singleton); proj.sort().checkMult(); for (int clL=0; clL=0 ) for (int pth=first; pth<=last; pth++) weight(pth) += float(minimum( absolute(Tim2(val1)-operator()(pth)), absolute(Tim2(val2)-operator()(pth))))* float(proj.ds[clL].multiplicity(indL))* float(proj.ds[clR].multiplicity(indR)); } } weight.s(0,1,firstIndex(attr),nbCP(attr))/=(2.0*span(attr)); } } } tcT cutPtsSet::cutPtsSet (cutPtsSet& from, const Matrix& index) : Matrix(index.n()), origin(0), weight(index.n()), var(index.n()), sigmaCP(1,from.nbVars(),0), span(from.span) { if ( from.origin.n() ) origin.resize(index.n()); Matrix ind(index); ind.sort(); for (int th=0; th::operator()(th) = from(ind(th)); if ( origin.n() ) origin(th) = from.origin(ind(th)); weight(th) = from.weight(ind(th)); var(th) = from.var(ind(th)); if ( th && var(th)!=var(th-1) ) for (int j=var(th-1)+1; j<=var(th); j++) sigmaCP(j) = sigmaCP(j-1); sigmaCP(var(th))++; } for (int j=var(th-1)+1; j::nbVars () const { return sigmaCP.n(); } tcT /*inline*/ int cutPtsSet::nbCP () const { return sigmaCP(nbVars()-1); } tcT /*inline*/ int cutPtsSet::nbCP (int vari) const { #ifdef _CHECK_INDEX if ( vari<0 || vari>=nbVars() ) throw whereOutOfRange( "int cutPtsSet::nbCP(int vari) const", "vari", vari, 0, nbVars()-1); #endif if ( !vari ) return sigmaCP(0); else return sigmaCP(vari)-sigmaCP(vari-1); } tcT /*inline*/ int cutPtsSet::firstIndex (int vari) const { #ifdef _CHECK_INDEX if ( vari<0 || vari>=nbVars() ) throw whereOutOfRange( "int cutPtsSet::firstIndex(int vari) const", "vari", vari, 0, nbVars()-1); #endif return (vari ? sigmaCP(vari-1) : 0); } tcT /*inline*/ int cutPtsSet::lastIndex (int vari) const { #ifdef _CHECK_INDEX if ( vari<0 || vari>=nbVars() ) throw whereOutOfRange( "int cutPtsSet::lastIndex(int vari) const", "vari", vari, 0, nbVars()-1); #endif return sigmaCP(vari)-1; } tcT /**inline**/ int cutPtsSet::attribute (int cpIndex) const { #ifdef _CHECK_INDEX if ( cpIndex<0 || cpIndex>=nbCP() ) throw whereOutOfRange( "int cutPtsSet::attribute(int thInd) const", "thInd", cpIndex, 0, nbCP()-1); #endif return var(cpIndex); } tcT /*inline*/ int cutPtsSet::index (int cpIndex) const { #ifdef _CHECK_INDEX if ( cpIndex<0 || cpIndex>=nbCP() ) throw whereOutOfRange( "int cutPtsSet::index(int cpIndex) const", "cpIndex", cpIndex, 0, nbCP()-1); #endif return (var(cpIndex) ? cpIndex - sigmaCP(var(cpIndex)-1) : cpIndex); } tcT /*inline*/ T cutPtsSet::operator () (int cpIndex) const { #ifdef _CHECK_INDEX if ( cpIndex<0 || cpIndex>=nbCP() ) throw whereOutOfRange( "T cutPtsSet::operator()(int cpIndex) const", "cpIndex", cpIndex, 0, nbCP()-1); #endif return Array::operator()(cpIndex); } tcT /**inline**/ boolean cutPtsSet::above (T val, int cpIndex) const { if(unknown(val)) return(false); #if _val_GEQ_cp return ( Tim2(val) >= operator()(cpIndex) ); #else return ( Tim2(val) > operator()(cpIndex) ); #endif } tcT int cutPtsSet::compare (T val, int cpIndex, float confidence/**=0.0**/) const { if(unknown(val)) return(-1); confidence *= span(attribute(cpIndex)); #if _val_GEQ_cp if ( float(val) >= float(operator()(cpIndex))/2.0 + confidence ) return +1; else if ( float(val) < float(operator()(cpIndex))/2.0 - confidence ) return 0; else return -1; #else if ( float(val) > float(operator()(cpIndex))/2.0 + confidence ) return +1; else if ( float(val) <= float(operator()(cpIndex))/2.0 - confidence ) return 0; else return -1; #endif } tcT boolean cutPtsSet::between (T val1, T val2, int cpIndex, float confidence/**=0.0**/) const { if ( val1==val2 || unknown(val1) || unknown(val2)) return false; confidence *= span(attribute(cpIndex)); #if _val_GEQ_cp if ( val1 < val2 ) return float(val1)+confidence < float(operator()(cpIndex))/2.0 && float(val2)-confidence >= float(operator()(cpIndex))/2.0; else return float(val2)+confidence < float(operator()(cpIndex))/2.0 && float(val1)-confidence >= float(operator()(cpIndex))/2.0; #else if ( val1 < val2 ) return float(val1)+confidence <= float(operator()(cpIndex))/2.0 && float(val2)-confidence > float(operator()(cpIndex))/2.0; else return float(val2)+confidence <= float(operator()(cpIndex))/2.0 && float(val1)-confidence > float(operator()(cpIndex))/2.0; #endif } tcT /*inline*/ T cutPtsSet::operator () (int cpInV, int vari) const { #ifdef _CHECK_INDEX if ( vari<0 || vari>=nbVars() ) throw whereOutOfRange( "T cutPtsSet::operator()(int cpInV, int vari) const", "vari", vari, 0, nbVars()-1); if ( cpInV<0 || cpInV>=nbCP(vari) ) throw whereOutOfRange( "T cutPtsSet::operator()(int cpInV, int vari) const", "cpInV", cpInV, 0, nbCP(vari)-1); #endif return Array::operator()(firstIndex(vari)+cpInV); } tcT /*inline*/ boolean cutPtsSet::above (T val, int cpInV, int vari) const { if(unknown(val)) return(false); #if _val_GEQ_cp return ( Tim2(val)>=operator()(cpInV,vari) ); #else return ( Tim2(val)> operator()(cpInV,vari) ); #endif } tcT int cutPtsSet::compare (T val, int cpInV, int vari, float confidence/**=0.0**/) const { if(unknown(val)) return(-1); confidence *= span(vari); #if _val_GEQ_cp if ( float(val) >= float(operator()(cpInV,vari))/2.0 + confidence ) return +1; else if ( float(val) < float(operator()(cpInV,vari))/2.0 - confidence ) return -1; else return 0; #else if ( float(val) > float(operator()(cpInV,vari))/2.0 + confidence ) return +1; else if ( float(val) <= float(operator()(cpInV,vari))/2.0 - confidence ) return -1; else return 0; #endif } tcT boolean cutPtsSet::between (T val1, T val2, int cpInV, int v, float confidence/**=0.0**/) const { if ( val1==val2 || unknown(val1) || unknown(val2) ) return false; #if _val_GEQ_cp if ( val1 < val2 ) return float(val1)+confidence < float(operator()(cpInV,v))/2.0 && float(val2)-confidence >= float(operator()(cpInV,v))/2.0; else return float(val2)+confidence < float(operator()(cpInV,v))/2.0 && float(val1)-confidence >= float(operator()(cpInV,v))/2.0; #else if ( val1 < val2 ) return float(val1)+confidence <= float(operator()(cpInV,v))/2.0 && float(val2)-confidence > float(operator()(cpInV,v))/2.0; else return float(val2)+confidence <= float(operator()(cpInV,v))/2.0 && float(val1)-confidence > float(operator()(cpInV,v))/2.0; #endif } tcT /*inline*/ T cutPtsSet::value (int cpInV, int vari) const { #ifdef _CHECK_INDEX if ( vari<0 || vari>=nbVars() ) throw whereOutOfRange( "T cutPtsSet::operator()(int cpInV, int vari) const", "vari", vari, 0, nbVars()-1); if ( cpInV<0 || cpInV>=nbCP(vari) ) throw whereOutOfRange( "T cutPtsSet::operator()(int cpInV, int vari) const", "cpInV", cpInV, 0, nbCP(vari)-1); #endif #if _val_GEQ_cp return Div2(Plus1(Array::operator()(firstIndex(vari)+cpInV))); #else return Div2(Array::operator()(firstIndex(vari)+cpInV)); #endif } tcT /*inline*/ float cutPtsSet::original (int cpInV, int vari) const { #ifdef _CHECK_INDEX if ( vari<0 || vari>=nbVars() ) throw whereOutOfRange( "T cutPtsSet::original(int cpInV, int vari) const", "vari", vari, 0, nbVars()-1); if ( cpInV<0 || cpInV>=nbCP(vari) ) throw whereOutOfRange( "T cutPtsSet::original(int cpInV, int vari) const", "cpInV", cpInV, 0, nbCP(vari)-1); #endif return ( origin.n() ? float(origin(firstIndex(vari)+cpInV))/2.0 : float(Array::operator()(firstIndex(vari)+cpInV))/2.0); } tcT int cutPtsSet::firstCPG (T val, int vari, float confidence/**=0.0**/) const { if(unknown(val)) return -1; int last = lastIndex(vari); for (int index=firstIndex(vari); index<=last; index++) if ( compare(val,index,confidence)==0 ) return index; return -1; } tcT int cutPtsSet::lastCPL (T val, int vari, float confidence/**=0.0**/) const { if(unknown(val)) return -1; int first = firstIndex(vari); for (int index=lastIndex(vari); index>= first; index--) if ( compare(val,index,confidence)==1 ) return index; return -1; } /////////////// // Operators // /////////////// tcT cutPtsSet& cutPtsSet::normalize (multiDS& d) { if ( d.normalized ) return *this; for (int set=0; set nd(d.ds[set].distinct(),d.ds[set].dim()); nd = d.ds[set].matrix(); for (int vari=0; vari=nbCP(vari) ) { nd(row,vari) = (T)(tI); break; } d.ds[set] = dataSet(nd,d.ds[set]); } if ( !origin.n() ) origin.resize(*this); for (int row=0; row::operator()(firstIndex(row)+tI) = (T)(2*tI+1); d.normalized=true; return *this; } tcT cutPtsSet& cutPtsSet::reduce (multiDS& d, int method, float& separability, float confidence/**=0.0**/) { // Determine the set of pairs of points from two different classes. Array list(d.listOfPairs()); // The indices of the final cut points will be ssind(0),...,ssind(ssSize-1). Matrix ssind(0); int ssSize=0; if ( method<0 ) cout<< endl << "There are X cut points of weight>=Y, of min sep Z\n"; if ( method<=1 ) { ssind.resize(1,nbCP()); for (int i=0; i separabil(1,list.m()); separabil=0.0; float sepMin; for (int curW=nbCP()-1; curW>=0; curW--) { int vari=attribute(ssind(curW)); for (int pair=0; pair= separability ) { while ( curW && weight(ssind(curW))==weight(ssind(--curW)) ) ssSize++; break; } } if ( ssSize < nbCP() ) ssind.s(0,1,0,ssSize) = ssind.s(0,1,nbCP()-ssSize,ssSize); } if ( method==2 ) { binMatrix constraint(list.m(), nbCP(), false); for (int row=0; row=0 && last>=0 && last>=first ) { constraint.s(row,1,first,last-first+1)=true; totSep+=last-first+1; } } } if ( totSep < separability ) flog<< endl << "### Pair (" << d.ds[list(row,0)].label(list(row,1)) << "," << d.ds[list(row,2)].label(list(row,3)) << ") has a total separability of" << setw(3) << totSep << ", strictly less than " << setw(3) << separability << endl << flush; else if ( !totSep ) throw whereWhich( "cutPtsSet::cutPtsSet (const multiDS& d, float& SEP, int)", "Unseparable pair of points"); } setCover sc(constraint); sc.greedy(flog, 1,int(separability),false); if ( int(separability)==1 ) sc.reduce(); ssind.resize(sc.solution()); ssSize=ssind.n(); } if ( method==3 ) { // flog << "\n Hello, I am entering the greedy GSCP\n" << flush; // Construct a matrix SCP with one column per pair of points from // differents classes, and one row per current cut points, such that // SCP(k,(i,j)) gives a non-negative value expressing the quality of the // separation of the points i and j using cutPtsSet k: SCP(k,(i,j)) = 0 // if k does not separate i and j, and than SCP(k,(i,j)) is proportional // to the minimum gap between i and k and j and k. // In this implementation, SCP(k,(i,j)) is in {0,...,60}. 60 corresponds // to a cutPtsSet of value 0.5 for a pair of boolean points (0,1). // Row 0 and columns 0-1 of SCP will be used later. const int hOff=2; const int vOff=1; // col and row offsets int bound = int(separability * 120.0); // Thus separability==0.5 implies bound==60. if ( bound > 240 ) // this will lead to results of sums above 255 throw whereWhich( "cutPtsSet::cutPtsSet (const multiDS& d, float& SEP, int)", "SEP can not be bigger than 2"); // flog << "There are " << list.m() << " pairs and " << nbCP() << " cut points\n"; // flog << "I try to declare SCP\n" << flush; // Count the number of non zero coefficient of the matrix int nbNZ=0; int pair; for (pair=0; pair SCP(nbCP(),nbNZ); // setup the constraint matrix and compute the total sum for each pair Matrix sumTot(list.m()); unsigned char aux; for (int th=0; th criticalPairs(list.m()); int nbcriticalPairs=0; for (pair=0; pair= bound, Xi in {0,1}. // row 0 of SCP contains the sum due to the current solution // col 0 of SCP contains the index of the cutPtsSet ssind.resize(nbCP()); ssSize=0; int nbR=nbCP(); int nbC=list.m(); int row,col; unsigned char v; int bestRow; int minToFill; int toFill; Matrix auxsum(1,list.m(),0); Matrix partsum(1,list.m(),0); Matrix activeR(1,nbCP(),true); Matrix activeC(1,list.m(),true); Matrix lweight(weight); // if there are some pairs with separability<=required_separability // then we first select all the cut points that separate these pairs if ( nbcriticalPairs ) { // Introduce all the required rows in the solution, suppressed them // from the table, and mark all the columns sufficiently covered for (int pairInd=0; pairInd=bound ) { activeC(cc)=false; nbC--; } } // flog << "I am starting solving the problem\n" << flush; // main loop while (nbC && nbR) { // find the best row minToFill=nbC*bound; bestRow=0; for (row=0; rowlweight(bestRow)) || ((lweight(row)==lweight(bestRow)) && flipAtail) ) ) ) { minToFill=toFill; bestRow=row; } } // add bestRow into solution, update the sum and suppress bestRow ssind(ssSize++) = bestRow; activeR(bestRow)=false; nbR--; if ( SCP.first(bestRow,col,v) ) do if ( activeC(col) ) if ( partsum(col)+v < bound ) partsum(col) += v; else { activeC(col)=false; nbC--; } while ( SCP.next(bestRow,col,v) ); } } // flog << "I did it!\n" << flush; // Decode the solution, and construct the final set of cut points. ssind.s(0,1,0,ssSize).sort(); // Since ssind represents a subset and the indices a orders, one can // assume that ssind(i)>=i for all i=0,...,ssSize-1. sigmaCP = 0; for (int s=0; s::operator()(s) = Array::operator()(ssind(s)); var(s) = var(ssind(s)); weight(s) = weight(ssind(s)); if ( origin.n() ) origin(s) = origin(ssind(s)); if ( s && var(s)!=var(s-1) ) sigmaCP.s(0,1,var(s-1)+1,var(s)-var(s-1)) = sigmaCP(var(s-1)); sigmaCP(var(s))++; } if ( var(s-1)+1 < sigmaCP.n() ) sigmaCP.s(0,1,var(s-1)+1) = sigmaCP(var(s-1)); resize_(ssSize); weight.resize_(ssSize); var.resize_(ssSize); if ( origin.n() ) origin.resize_(ssSize); return *this; } tcT ostream& operator << (ostream& s, const cutPtsSet& myself) { s.setf(ios::fixed | ios::showpoint); s << endl; int curCP=0; for (int v = 0; v < myself.nbVars(); v++) if ( myself.nbCP(v) ) { s << "v" << setw(2) << v+1 << ":"; for (int ptr=0; ptr