#include "multiDS.h" #include "../Matrices/binMatrix.h" #include "../Matrices/setCovering.h" #include "../LP/linearProgram.h" #include "main_def.h" #define _CHECK_INDEX ////////////////// // Constructors // ////////////////// tcT multiDS::multiDS () : ds(0), label(), nbSets(0), list(), normalized(false) {} tcT multiDS::multiDS (int nbCategories) : ds(new dataSet [nbCategories]), label(nbCategories), nbSets(nbCategories), list(), normalized(false) {} tcT multiDS::multiDS (const multiDS& from, float rate) : ds(new dataSet [from.nbCats()]), label(from.label), nbSets(from.nbSets), list(), normalized(from.normalized) { for (int set=0; set(from.ds[set], rate); } tcT multiDS::multiDS (const multiDS& from, const Array& varIndex) : ds(new dataSet [from.nbCats()]), label(from.label), nbSets(from.nbSets), list(), normalized(false) { for (int set=0; set(from.ds[set], varIndex); } tcT multiDS::multiDS (const dataSet& from) : ds(0), label(), nbSets(0), list(), normalized(false) { #ifdef _CHECK_INDEX if ( from.classCol()<0 || from.classCol()>=from.dim() ) throw whereOutOfRange( "multiDS::multiDS(const dataSet&, int from.classCol())", "from.classCol()",from.classCol(),0,from.dim()-1); #endif // determine the number of distinct categories Matrix singleton(1,1,from.classCol()); int garbage; dataSet categories(from,singleton); categories.sort().checkMult(garbage); Matrix catIndex(categories.matrix().ta()); ds = new dataSet [catIndex.n()]; label.resize(catIndex.n()); for (int cI=0; cI cm(from.distinct(),from.dim()-1); Matrix multiple(from.distinct()); Matrix labelle(from.distinct()); int cmPtr=0; for (int dPtr=0; dPtr 0 ) cm.s(cmPtr,1,0,from.classCol()) = from(dPtr).s(0,1,0,from.classCol()); if ( from.classCol() < from.dim()-1 ) cm.s(cmPtr,1,from.classCol()) = from(dPtr).s(0,1,from.classCol()+1); multiple(cmPtr) = from.multiplicity(dPtr); labelle(cmPtr) = from.label(dPtr); cmPtr++; } if ( cmPtr ) { ds[nbSets] = dataSet( cm.resize_(cmPtr,cm.n()), multiple.resize_(cmPtr), labelle.resize_(cmPtr)); label(nbSets) = catIndex(cI); nbSets++; } } if ( nbSets < catIndex.n() ) // some categories were inexistant { label.resize_(nbSets); dataSet* aux = ds; ds = new dataSet [nbSets]; for (int i=0; i::multiDS ( const dataSet& from, const Matrix& catIndex) : ds(new dataSet [catIndex.n()]), label(catIndex.n()), nbSets(0), list(), normalized(false) { #ifdef _CHECK_INDEX if ( from.classCol()<0 || from.classCol()>=from.dim() ) throw whereOutOfRange( "multiDS::multiDS(const dataSet&, int, const Matrix&)", "from.classCol()",from.classCol(),0,from.dim()); #endif for (int cI=0; cI cm(from.distinct(),from.dim()-1); Matrix multiple(from.distinct()); Matrix labelle(from.distinct()); int cmPtr=0; for (int dPtr=0; dPtr 0 ) cm.s(cmPtr,1,0,from.classCol()) = from(dPtr).s(0,1,0,from.classCol()); if ( from.classCol() < from.dim()-1 ) cm.s(cmPtr,1,from.classCol()) = from(dPtr).s(0,1,from.classCol()+1); multiple(cmPtr) = from.multiplicity(dPtr); labelle(cmPtr) = from.label(dPtr); cmPtr++; } if ( cmPtr ) { ds[nbSets] = dataSet( cm.resize_(cmPtr,cm.n()), multiple.resize_(cmPtr), labelle.resize_(cmPtr)); label(nbSets) = catIndex(cI); nbSets++; } } if ( nbSets++ < catIndex.n() ) // some categories were inexistant { label.resize_(nbSets); dataSet* aux = ds; ds = new dataSet [nbSets]; for (int i=0; i::~multiDS () { delete [] ds; } /////////////// // Operators // /////////////// tcT /*inline*/ int multiDS::nbCats() const { return nbSets; } tcT /*inline*/ int multiDS::dim() const { return ds[0].dim(); } tcT int multiDS::nbPairs() const { int nbConstrs=0; for (int clL=0; clL::distinct() const { int res=0; for (int cl=0; cl::card() const { int res=0; for (int cl=0; cl::monotone(const int attr) const { if ( !ds[0].monotone.n() ) return nonMonotonicAttr; else return ds[0].monotone(attr); } tcT multiDS& multiDS::checkMult() { int saved; for (int set=0; set& multiDS::sort() { for (int set=0; set& multiDS::reorganize() { for (int set=0; set ostream& operator << (ostream& s, const multiDS& myself) { for (int set=0; set::split(multiDS& first, multiDS& second, float rate) const { if ( first.nbCats()!=nbCats() ) { delete first.ds; first.ds = new dataSet [nbCats()]; first.nbSets=nbCats(); first.list.resize(); } if ( second.nbCats()!=nbCats()) { delete second.ds; second.ds = new dataSet [nbCats()]; second.nbSets=nbCats(); second.list.resize(); } first.label.resize(label); first.normalized=normalized; second.label.resize(label); second.normalized=normalized; for (int set=0; set::partition(Matrix& part, int K) const // part is a maxCard+K+1 x nbCats() matrix. // part(0,s)=K for all s. // part(k,s)=cardinality of fold k in set s, k=1,...,K. // part(K+i,s)=1,...,K, label of the i^th el. of set s, i=1,...,ds[s].card(). { // compute the maximum card among all datasets int maxCard=0; for (int set=0; set smallerPart(1,K,1); int nbSmaller = K; // For the purpose of balancing all the parts, the number of elements // in two different parts will never differ from more than 1. smallerPart(k) // indicates those parts with a currently smaller number of elements and // nbSmaller = smallerPart.sum(). for (set=0; set0 && nbSmaller<=toAdd ) { part.s(1,K,set,1) += smallerPart.t(); toAdd -= nbSmaller; nbSmaller = K; smallerPart = 1; } if ( toAdd>0 ) { // choose at random toAdd parts among the nbSmaller ones for which // smallerPart is 1. int skipped=0; for (int k=0; k::setFold(multiDS& data, const Matrix& part, const int fold) const { if ( data.nbCats()!=nbCats() ) { delete data.ds; data.ds = new dataSet [nbCats()]; data.nbSets=nbCats(); data.list.resize(); } const int K = part(0,0); data.label.resize(label); data.normalized=normalized; for (int set=0; set0 ? part(fold,set) : ds[set].card()-part(-fold,set)); Matrix d(cardin,dim()); Matrix multiple(1,cardin,1); Matrix labelle(1,cardin); int ptrD=0; int ptrDS=0; int mu=1; for (int ptrP=0; ptrP ds[set].multiplicity(ptrDS) ) { mu=1; ptrDS++; } if ( fold>0 ? part(K+1+ptrP,set)==fold : part(K+1+ptrP,set)!=(-fold) ) { d.s(ptrD)=ds[set](ptrDS); labelle(ptrD)=ds[set].label(ptrDS); ptrD++; } } data.ds[set] = dataSet(d,multiple,labelle,ds[set].monotone); } } tcT boolean multiDS::isConsistant(const boolean signalIfNot/**=true**/) const { boolean res=true; for (int set1=0; set1ds[set2](data2,attr) && monotone(attr)!=positiveAttr ) ) { different=true; break; } if ( !different ) if ( signalIfNot ) { res=false; flog<<"\n### Data" << setw(6) << ds[set1].label(data1) << " cannot be distinguished from data" << setw(6) << ds[set2].label(data2) << endl << ds[set1](data1) << ds[set2](data2) ; } else return false; } return res; } tcT boolean isInBox (Array& corner1, Array& corner2, Array& candidate) { // Check whether candidate[0] is in the box delimitated by the two for (int v = 1; v < corner1.n(); v++) // v start at one since 0 is already OK due to the sorting if ( corner1(v) < corner2(v) ) { if ( candidate(v) < corner1(v) || candidate(v) > corner2(v) ) return false; } else if ( candidate(v) < corner2(v) || candidate(v) > corner1(v) ) return false; return true; } tcT Array multiDS::listOfPairs( boolean compress/**=false**/, boolean recreate/**=false**/) { if ( !recreate && list.n() ) return list; int clL, clR; list.resize(nbPairs(),4); int ptr=0; boolean addThis=true; for (clL=0; clL2 ) cout<< endl << "point " << ds[clL].label(l) << " in class " << clL << " and point " << ds[clR].label(r) << " in class " << clR << "suppressed by point-in-a-box"; } } if ( trace>1 && ptr& multiDS::binarize(const cutPtsSet& cps) { int garbage; Matrix oldMono(ds[0].monotone); for (int set=0; set bd(ds[set].distinct(),cps.nbCP()); for (int row=0; row(bd,ds[set]); ds[set].checkMult(garbage).sort().reorganize(); normalized=true; } if ( oldMono.n() ) { Matrix newMono(cps.nbCP()); for (int cp=0; cp& multiDS::binarize(const cutPtsSet& cps, float confidence) { int garbage; Matrix oldMono(ds[0].monotone); for (int set=0; set bd(ds[set].distinct(),cps.nbCP()); for (int row=0; row(bd,ds[set]); ds[set].checkMult(garbage).sort().reorganize(); normalized=true; } if ( oldMono.n() ) { Matrix newMono(cps.nbCP()); for (int cp=0; cp::mapCube (Matrix& net, const int expectedDim/**=10**/) { // define 2 to the power expectDim int twoToDim=1; for (int i=0; i index(card(),2), ind(card(),2); Matrix meanData(1,dim()); meanData=0.0; int pos=0; for (int set=0; set> eta0; float decr0;cout<< "Give the decr factor for rate 0:"; cin>> decr0; float eta1; cout<< "Give the initial learning rate 1:"; cin>> eta1; float decr1;cout<< "Give the decr factor for rate 1:"; cin>> decr1; float eta2; cout<< "Give the initial learning rate 2:"; cin>> eta2; float decr2;cout<< "Give the decr factor for rate 2:"; cin>> decr2; float lastEta=0.1; // Main loop constructing the network Matrix point(dim()); int minDist,dist,winner; Matrix clas(twoToDim); clas=0; int clasW=50; cout<<"Class weight:"; cin>> clasW; #if _graphic const int xmin=0; const int xmax=40; const int ymin=0; const int ymax=40; const int offs=3; const int xr1=xmin-offs/2; const int yr1=ymin-offs/2; const int xr2=xmax+offs/2; const int yr2=ymax+offs/2; const int xtext=xmin-offs+1; const int ytext=ymin-offs+1; const int sca=100; char symb[4]; symb[0]='o'; symb[1]='\0'; symb[2]='+'; symb[3]='\0'; openpl(); erase(); space(sca*(xmin-offs),sca*(ymin-offs),sca*(xmax+offs),sca*(ymax+offs)); box(sca*xr1,sca*yr1,sca*xr2,sca*yr2); closepl(); #endif do { #if _graphic if ( dim()==2 ) { erase(); box(sca*xr1,sca*yr1,sca*xr2,sca*yr2); for (int j=0; j0; nbpt--) { // pick one point at random int temp = randomI(nbpt); set = ind(temp,0); int pt = ind(temp,1); ind.s(temp) = ind.s(nbpt-1); for (int d=0; d dist) { minDist=dist; winner=i; } } // modify the winner net.s(winner) += eta0 * (point - net.s(winner)); clas(winner) += eta0 * (2*set-1 - clas(winner)); // modify the neighbours int neighbour; for (int bit=1; bit lastEta); } tcT multiDS& multiDS::binarize(const Matrix net) { int garbage; int oldDim=dim(); for (int set=0; set=30 ) throw whereWhich( "multiDS& multiDS::binarize(const Matrix net)", "Number of columns of net is not a power of 2"); Matrix bd(ds[set].distinct(),newDim); for (int row=0; row dist) { minDist=dist; winner=i; } } // Binarize the point indexed by row for (int d=0; d>=1) bd(row,d) = winner & 1; } ds[set] = dataSet(bd,ds[set]); ds[set].checkMult(garbage).sort().reorganize(); normalized=true; } return *this; } /////////////////////// int patInd (int n, int d, binArray& wordP, const Matrix& binomial) { if ( n==d || !d ) return 0; int x1; for (x1=0; x1& binomial) { if ( !d ) wordP = false; else if ( d==1 ) { wordP = false; wordP.setOn(index); } else if ( n==d ) wordP = true; else { int nextN = n-1; while ( index >= binomial(d-1,nextN) ) index-=binomial(d-1,nextN--); if ( nextN < n-1 ) wordP.s(0,1,0,n-1-nextN)=false; wordP.setOn(n-1-nextN); indPat(nextN,d-1,index,wordP.s(0,1,n-nextN),binomial); } } void nextWord (binMatrix& w) { int l=w.n()-1; if ( w(l) ) { while ( l && w(l-1) ) l--; int nb1=w.n()-l; while ( l && !w(l-1) ) l--; w.s(0,1,l+1,nb1) = true; if ( l+1+nb1 < w.n() ) w.s(0,1,l+1+nb1) = false; } else while ( l && !w(l-1) ) l--; if ( !l ) throw whereWhich("void nextWord(binMatrix&)","No next word"); w.setOn(l); w.setOff(l-1); } boolean coverOne(const Matrix& rowInd, const binArray& table) { unsigned char conjunction; for (int col=0; col& rowInd, const binArray& table, const Array& mult, const int minCover) { char unsigned conjunction; int nbCover=0; for (int col=0; col=minCover ) return true; } return false; } int nbCover(const Matrix& rowInd, const binArray& table, const Array& mult) { char unsigned conjunction; int res=0; for (int col=0; col& rowInd, const binArray& table, Matrix& coverage) { binMatrix conjunction(table.n()); coverage.resize(table.n()); int nbC=0; conjunction = table.s(rowInd(0)); for (int row=1; row& rowInd, const binArray& table, const int minDist, int toler) { for (int col=0; col= minDist ) break; if ( dist& sopMult, const Array& posMult, const Array negMult, const int nbPos, const int nbNeg, Matrix& posCover, int gap2Neg, float tolerance, binMatrix& might, int& nbCandidates, int minPosCoverage, patterns& pList, int& nbPatterns) { Matrix indices(size); int ic=0; int bars1I=barsI; for (int pp=0; pp= minPosCoverage ) { int toler=(int)(float(nbCov)*tolerance*float(nbNeg)/float(nbPos)); if ( coverAtLeast(indices,neg,negMult,toler+1) || (gap2Neg>1 && !minDistAtLeast(indices,neg,gap2Neg,toler)) ) { if ( size covered; whoIsCovered(indices,pos,covered); for (int Iam=0; Iam::patchPatterns (patterns& patList, const Array& posClass, const Array& negClass, const boolean orientation, int& puc) const { int i,j,row,set,record; int posDistinct=0, max_dist=0; int n = dim(); for (i=0; i pos_uncover(posClass.n(),max_dist); for (set=0; set thisData(ds[posClass(set)](record)); pos_uncover(set,record)=1; for (patList.first(); !patList.eol(); patList.next()) if ( patList.fire(thisData) ) { pos_uncover(set,record)=0; break; } } int cur_row=-1, cur_set=-1; for(set=0; set=0) // all this is for the positive patterns { Matrix candidate(1,2*n,0); for(j=0;j::patchPattern) !\n"; // get this point out of the list of uncovered points pos_uncover(cur_set,cur_row)=0; } else { do { literal=0, maxMinDistance=0; for(j=0;j<2*n; j++) // here we find the literal to be dropped next if(candidate(j)) { int minDistance=n+1; candidate(j)=0; // here we momentarily drop the candidate literal for(set=0; set maxMinDistance) {literal=j; maxMinDistance= minDistance;} candidate(j)=1; // put the current literal back to try with the next } if (maxMinDistance>0) candidate(literal)=0; // here we drop the chosen literal } while(maxMinDistance>0); // keep dropping literals while possible int size=0,next=0; // here we measure the size of the pattern to be included for(i=0;i index(size); // here we put the pattern in the index matrix for(i=0;i<2*n;i++) if(candidate(i)==1) index(next++)=i; int cover=0,fire; // here we find the coverage of the new pattern for (set=0; set thisData(ds[posClass(set)](record)); ++puc; for (patList.first(); !patList.eol(); patList.next()) if ( patList.fire(thisData) ) { pos_uncover(set,record)=0; --puc; break; } } } cur_row=-1, cur_set=-1; for(set=0; set::printPatFile (patterns& posList, patterns& negList, const Array& posClass, const Array& negClass, ostream& s, int getPat, int pos_neg, Array& stat) const { int set, row, c_pos, e_pos, c_neg, e_neg, counter=0; int max_dist=0, i; int pos_count[2], neg_count[2]; double total_pos, total_neg; pos_count[0]=pos_count[1]=neg_count[0]=neg_count[1]=0; for (i=0; i pos_error(posClass.n(),max_dist); max_dist=0; for (i=0; i neg_error(negClass.n(),max_dist); for(set=0;set thisData(ds[posClass(set)](row)); pos_error(set,row)=0; pos_count[1]+=ds[posClass(set)].multiplicity(row); for (posList.first(); !posList.eol(); posList.next()) if ( posList.fire(thisData) ) { total_pos += posList.weight(); } for (negList.first(); !negList.eol(); negList.next()) if ( negList.fire(thisData) ) { total_neg += negList.weight(); } if(total_pos <= total_neg) { pos_error(set,row)=1; pos_count[1]-=ds[posClass(set)].multiplicity(row); pos_count[0]+=ds[posClass(set)].multiplicity(row); } } for(set=0;set thisData(ds[negClass(set)](row)); neg_error(set,row)=0; neg_count[1]+=ds[negClass(set)].multiplicity(row); for (posList.first(); !posList.eol(); posList.next()) if ( posList.fire(thisData) ) { total_pos += posList.weight(); } for (negList.first(); !negList.eol(); negList.next()) if ( negList.fire(thisData) ) { total_neg += negList.weight(); } if(total_neg <= total_pos) { neg_error(set,row)=1; neg_count[1]-=ds[negClass(set)].multiplicity(row); neg_count[0]+=ds[negClass(set)].multiplicity(row); } } if(!getPat) { s << " Training Testing Patterns \n" << " c_pos e_pos c_neg e_neg c_pos e_pos c_neg e_neg "; if(pos_neg) s << " positive\n\n"; else s << " negative\n\n"; } if (pos_neg) { stat(0,0)= pos_count[1]; stat(0,1)= pos_count[0]; stat(0,2)= neg_count[1]; stat(0,3)= neg_count[0]; } else { stat(0,0)= neg_count[1]; stat(0,1)= neg_count[0]; stat(0,2)= pos_count[1]; stat(0,3)= pos_count[0]; } for (posList.first(); !posList.eol(); posList.next()) { counter++; c_pos=e_pos=c_neg=e_neg=0; for(set=0; set< posClass.n(); set++) for (row=0; row< ds[posClass(set)].distinct(); row++) { Array thisData(ds[posClass(set)](row)); if(posList.fire(thisData) && pos_error(set,row)==0) c_pos+=ds[posClass(set)].multiplicity(row); else if(posList.fire(thisData) && pos_error(set,row)==1) e_pos+=ds[posClass(set)].multiplicity(row); } for(set=0; set< negClass.n(); set++) for (row=0; row< ds[negClass(set)].distinct(); row++) { Array thisData(ds[negClass(set)](row)); if(posList.fire(thisData) && neg_error(set,row)==0) c_neg+=ds[negClass(set)].multiplicity(row); else if(posList.fire(thisData) && neg_error(set,row)==1) e_neg+=ds[negClass(set)].multiplicity(row); } if(pos_neg) { stat(counter,0)= c_pos; stat(counter,1)= e_pos; stat(counter,2)= c_neg; stat(counter,3)= e_neg; } else { stat(counter,0)= c_neg; stat(counter,1)= e_neg; stat(counter,2)= c_pos; stat(counter,3)= e_pos; } } } // End function printpatfile tcT void multiDS::smallPatterns ( patterns& patList, int& nbNonCovered, const Array& posClass, const Array& negClass, const boolean orientation, int maxSize/**=4**/, int minTrue/**=1**/, int coverNeeded/**=10**/, int gap2F/**=1**/, float tolerance/**=0.0**/ //, float rateLimit/**=0.3**/ ) const { chrono smallPatGen; int n = dim(); int i,j,record; int posCard=0; int posDistinct=0; int negCard=0; int negDistinct=0; for (i=0; i posM(posDistinct); Matrix sopM(posDistinct); Matrix negM(negDistinct); for (i=0; i posCover(1,posDistinct,0); Matrix sop2pos(posDistinct); for (i=0; i binomial(maxSize+1,n+1,1); Matrix power2(maxSize+1); power2(0)=1; for (i=1; i<=maxSize; i++) { power2(i) = 2*power2(i-1); for (j=i+1; j<=n; j++) binomial(i,j)=binomial(i-1,j-1)+binomial(i,j-1); } // Declaration of the global variables containing the patterns // And the potential patterns binMatrix mightbcp[2]; int current,previous; if ( maxSize < 2 ) { mightbcp[0].resize(1,1); mightbcp[1].resize(1,1); current = 0; } else if ( 2*maxSize -4 < n ) { mightbcp[0].resize(binomial(maxSize-2,n),power2(maxSize-2)); mightbcp[1].resize(binomial(maxSize-1,n),power2(maxSize-1)); current = maxSize%2; } else { mightbcp[0].resize(binomial(n/2,n),power2(maxSize-2)); mightbcp[1].resize(binomial(n/2,n),power2(maxSize-1)); current = maxSize%2; } mightbcp[current].s(0,1,0,1) = true; patList.empty(); float candidateRate = 1.0; if ( trace>2 ) { cout << "\n time #Po dg #patt #candid #terms |"; for (i=0; i<21; i++) cout << setw(3) << i; cout << " >\n"; cout.precision(1); cout.setf(ios::showpoint | ios::fixed); cout<< setw(6) << smallPatGen.time() << setw(4) << nbSop << setw(3) << 0 << setw(6) << 0 << setw(8) << 1 << setw(10) << 1 << " |\n"; } // Main loop on the size of the terms for (int size=1; size<=maxSize; size++) { previous = current; current = 1-current; if ( size < maxSize ) { // initialization of the table with patterns of size SIZE // This table is initialized empty mightbcp[current].s(0,binomial(size,n),0,power2(size)) = false; } // declaration of some working variables int nbPatterns=0; int nbCandidates=0; if ( size > 1 && false ) // candidateRate <= rateLimit ) { boolean foundOne = false; binMatrix wordP(1,n); binMatrix word1P(1,n); int fromVar; int nbBlocks = ( size < 5 ) ? 1 : power2(size-4); // loop on the word (sequence of variables) of size SIZE-1 for (int wordI=0; wordI::smallPatterns(...)", "patInd(indPat(x))!=x"); if ( wordP(n-1) ) goto nextWordI; if ( size==1 ) fromVar=0; else { fromVar=n-1; while ( !wordP(fromVar-1) ) fromVar--; } } // at this point, foundOne=true // and wordP and fromVar are defined // loop on the possible bars within block barsB int stopAt=minimum(packetSize*(barsB+1),power2(size-1)); for (int barsI=packetSize*barsB; barsI rateLimit ) { binMatrix wordP(1,n,false); wordP.s(0,1,0,size) = true; binMatrix word1P(1,n,false); Matrix subWordIndex(1,n); // loop on the word (sequence of variables) of size SIZE for (int wordI=0; wordI>=1; if ( orientation ? ( ( negated && monotone(v)==positiveAttr ) || ( !negated && monotone(v)==negativeAttr ) ) : ( ( !negated && monotone(v)==positiveAttr ) || ( negated && monotone(v)==negativeAttr ) ) ) { monotonicityOK=false; break; } } if ( !monotonicityOK ) continue; } // Check whether all its subterms are in mightbcp[previous] word1P = wordP; int rank=1; int bars1I, rem; boolean isCandid=true; // loop on all the subterms of the current word for (int sub=0; sub 0.5 ) if ( trace>2 ) { cout<< setw(6) << smallPatGen.time() << setw(4) << nbSop << setw(3) << size << setw(6) << nbPatterns << setw(8) << nbCandidates << setw(10) << power2(size)*binomial(size,n) << " |"; Matrix distr(1,22,0); for (i=0; i= coverNeeded ) { sop.s(0,sop.m(),i,1) = sop.s(0,sop.m(),nbSop-1,1); sopM(i) = sopM(nbSop-1); sop2pos(i--) = sop2pos(--nbSop); } if ( !nbSop ) goto terminate; if ( prevNbPos > nbSop ) { if ( trace>2 ) cout<< setw(6) << smallPatGen.time() << setw(4) << nbSop << flush; for (int wordI=0; wordI indices(size); int ic=0; int bars1I=barsI; for (int pp=0; pp2 ) cout<< setw(17) << nbCandidates << endl; } // if ( prevNbPos > nbSop ) } // if ( reduceTrue && size::weightPatterns (patterns& patList, const Array& posClass, const Array& negClass, int weighting/**=1**/, boolean normalize/**=true**/, boolean delZWP/**=true**/) const { switch(weighting) { case 0: for (patList.first(); !patList.eol(); patList.next()) patList.weight()=1.0; break; case 1: for (patList.first(); !patList.eol(); patList.next()) patList.weight()=float(patList.coverage()); break; case 2: case 3: { int power2[10]; power2[0]=1; for (int i=1; i<10; i++) power2[i]=2*power2[i-1]; if ( weighting==2 ) for (patList.first(); !patList.eol(); patList.next()) patList.weight()= float(power2[patList.size()]*patList.coverage()); else // ( weighting==3 ) for (patList.first(); !patList.eol(); patList.next()) patList.weight()=float(power2[10-patList.size()]); } break; case 6: for (patList.first(); !patList.eol(); patList.next()) patList.weight()=Sqr(float(patList.coverage())); break; case 7: // here the weight is the cube of the coverage Mario for (patList.first(); !patList.eol(); patList.next()) patList.weight()= Sqr(float(patList.coverage()))*(float(patList.coverage())); break; case 8: // here the weight is the exponentiation of the coverage. Mario for (patList.first(); !patList.eol(); patList.next()) { float base; // base for expontiation. int i; base=1.0; // value is arbritrary, small enough to prevent // overflow (hopefully) for(i=1;i<=patList.coverage();i++) base*=1.2; // so the weight is 1.2^coverage patList.weight()=base; } break; case 4: { // count the number of points int nbPoints=0; for (int set=0; set thisData(ds[posClass(set)](pt)); for (patList.first(); !patList.eol(); patList.next()) if ( patList.fire(thisData) ) { covered.setOn(finger); nbPoints++; break; } } // construct the LP Matrix c(1, patList.nbPatterns()+1, 0); c(patList.nbPatterns())=+1; Matrix b(1, nbPoints+1, 0); b(nbPoints)=+1; Matrix R(1, nbPoints+1, 'G'); R(nbPoints)='E'; Matrix A(nbPoints+1, patList.nbPatterns()+1, 0); A.s(nbPoints,1,0,patList.nbPatterns())=+1; A.s(0,nbPoints,patList.nbPatterns(),1)=-1; finger=0; int row=0; for (set=0; set thisData(ds[posClass(set)](pt)); int col=0; for (patList.first(); !patList.eol(); patList.next(), col++) if ( patList.fire(thisData) ) A(row,col)=1; row++; } // create and solve the LP linearProgram myLP (MAXIMIZE, c, A, R, b); double optValue; Matrix optPoint(patList.nbPatterns()+1); int status = myLP.solve(optValue,optPoint); if ( status ) { flog<< "\n### lp status is " << status << "!!!\n" << flush; return; } int col=0; for (patList.first(); !patList.eol(); patList.next()) patList.weight()=optPoint(col++); normalize=false; // it is already normalized } break; } // switch (weighting) if ( delZWP ) { float totWeight=0.0; for (patList.first(); !patList.eol(); patList.next()) totWeight+=patList.weight(); float thr = (totWeight/float(patList.nbPatterns()))/10000.0; patList.first(); while ( !patList.eol() ) if ( patList.weight() < thr ) patList.delPattern(); else patList.next(); } if ( normalize ) // normalize weights so that sum is 1.0 { float totWeight=0.0; for (patList.first(); !patList.eol(); patList.next()) totWeight+=patList.weight(); for (patList.first(); !patList.eol(); patList.next()) patList.weight()/=totWeight; } if ( trace>2 ) { float mean=0.0; float min=1000.0; float max=-1000.0; for (patList.first(); !patList.eol(); patList.next()) { mean+=patList.weight(); minimize(min,patList.weight()); maximize(max,patList.weight()); } mean /= float(patList.nbPatterns()); cout<< "mean=" << setw(6) << setprecision(2) << mean << ", span=" << setw(6) << (mean==0.0 ? 0.0 : (max-min)/mean) << " for weights of patterns covering class " << posClass; } } tcT void multiDS::balanceWeightPatterns (patterns& posList, patterns& negList, const Array& posClass, const Array& negClass, int method/**=1**/) const { // First, associate to each point (positive and negative) their multipli- // city as well as a triplet a = (f-, f+, s), where f- (resp. f+) is the // result of the pseudo-boolean function f+ (resp. f-), and s is +1 for // the positive points and -1 for the negative points. int distinctPos=0; int distinctNeg=0; int cardPos=0; for (int set=0; set a(distinctPos+distinctNeg,3); a = 0.0; Matrix m(1,distinctPos+distinctNeg); int point=0; for (set=0; set thisData(ds[negClass(set)](example)); for (negList.first(); !negList.eol(); negList.next()) if ( negList.fire(thisData) ) a(point,0)-=negList.weight(); for (posList.first(); !posList.eol(); posList.next()) if ( posList.fire(thisData) ) a(point,1)+=posList.weight(); a(point,2)=-1; } for (set=0; set thisData(ds[posClass(set)](example)); for (negList.first(); !negList.eol(); negList.next()) if ( negList.fire(thisData) ) a(point,0)-=negList.weight(); for (posList.first(); !posList.eol(); posList.next()) if ( posList.fire(thisData) ) a(point,1)+=posList.weight(); a(point,2)=1; } // Try to find a triplet x = (x+, x-, x0) such that a.x > 0 for all points // a. This is done by minimizing the criterion of the perceptron, using a // full gradient descent. Matrix x(1,3); x(0)=-1.0; x(1)=1.0; x(2)=0.0; x /= sqrt((x % x)(0)); Matrix bestx(x); int nbErrors=1; float objVal; float optVal=1E10; Matrix grad(1,3); float step; for (step=0.2; step>0.00001; step*=0.99) { nbErrors=0; grad=0.0; for (int pt=0; pt0.00001; step*=0.99) { min=1.1*((x % a).min()(0)); grad=0.0; for (int pt=0; pt optVal ) { optVal = min; bestx = x; } //grad /= sqrt(grad % grad); x += (step * grad); x /= sqrt((x % x)(0)); } } x = bestx; // translate the solution x into an appropriate modification of the // weights of the patterns if ( x(0) >= 0.0) throw whereWhich( "void multiDS::balanceWeightPatterns(...) const", "Strang solution x(0)>0"); x /= -x(0); if ( x(1) != 1.0 ) for (posList.first(); !posList.eol(); posList.next()) posList.weight() *= x(1); if ( x(2) != 0.0 ) { Matrix emptyPattern(0); posList.add(dim(),emptyPattern,cardPos,x(2)); } } tcT void multiDS::cleanPatterns (patterns& patList, const Array& posClass) const { int nbDistinct=0; for (int set=0; set thisData(ds[posClass(set)](point)); int row=0; for (patList.first(); !patList.eol(); patList.next(), row++) if ( patList.fire(thisData) ) { A.setOn(row,col); pointCovered=true; } if ( pointCovered ) col++; } A.resize_(A.m(),col); Matrix info(patList.nbPatterns(),2); int i; for (i=0, patList.first(); i deleted(patList.nbPatterns()); int nbPat=A.m(); int comp; for (int upper=0; upper 0 ) // comp=a+b, a= 1 if A.s(upper) included in A.s(lower) // a= 0 if A.s(upper) == A.s(lower) // a=-1 if A.s(upper) contains A.s(lower) // b= 1 if degree upper > degree lower // b= 0 if degree upper == degree lower // b=-1 if degree upper < degree lower // Thus, if comp==+1,+2, upper is deleted, if comp==-1,-2, // lower is deleted, while if comp==0, nobody is deleted. { A.s(upper) = A.s(--nbPat); deleted(nbDel++)=info(upper,0); info.s(upper--) = info.s(nbPat); lower = nbPat; } else if ( comp < 0 ) { A.s(lower) = A.s(--nbPat); deleted(nbDel++)=info(lower,0); info.s(lower--) = info.s(nbPat); } if ( nbDel ) { deleted.resize_(nbDel); deleted.sort(); // delete the furthest in the list first, since delPattern // place the last element of the list at the place of the one // deleted. for (int ptr=nbDel-1; ptr>=0; ptr--) patList.delPattern(deleted(ptr)); patList.do_sort(); } } tcT void multiDS::reducePatterns ( patterns& patList, const Array& posClass, int minCover/**=1**/) const { int nbDistinct=0; for (int set=0; set thisData(ds[posClass(set)](point)); int col=0; int nbCover=0; for (patList.first(); !patList.eol(); patList.next(), col++) if ( patList.fire(thisData) ) { A.setOn(row,col); nbCover++; } if ( nbCover ) { row++; minimize(minCoverage,nbCover); } if ( nbCover < minCover ) flog<< "\n### data " << setw(3) << ds[posClass(set)].label(point) << " is covered by " << nbCover << " pattern" << (nbCover ? (nbCover==1 ? " only" : "s only") : "") << endl << flush; } A.resize_(row,A.n()); setCover sc(A); sc.greedy(flog,1,-minCover,false); if ( minCover==1 ) sc.reduce(); Matrix sol(sc.solution()); if ( sol.n() < A.n() ) { sol.sort(); // delete the furthest missing from the list first, since delPattern // place the last element of the list at the place of the one // deleted. int pos=sol.n()-1; for (int ptr=A.n()-1; ptr>=0; ptr--) if (pos>=0 && ptr==sol(pos)) pos--; else patList.delPattern(ptr); patList.do_sort(); } } tcT int multiDS::nbErrors( patterns& posList, patterns& negList, float errorTolerance, int& wronglyOn, int& wronglyOff, int& undecidable, char* fileName/**=NULL**/) const { int res=0; wronglyOn=0; wronglyOff=0; undecidable=0; ofstream out, out_err; if ( fileName ) { out.open(fileName); out.setf(ios::fixed); out.precision(5); char fne[80]; sprintf(fne,"%s.error",fileName); out_err.open(fne); out_err.setf(ios::fixed); out_err.precision(5); } for (int set=0; set<2; set++) for (int example=0; example thisData(ds[set](example)); // evaluate the Pseudo-boolean function for (posList.first(); !posList.eol(); posList.next()) if ( posList.fire(thisData) ) totalPos+=posList.weight(); for (negList.first(); !negList.eol(); negList.next()) if ( negList.fire(thisData) ) totalNeg+=negList.weight(); if ( fileName ) out << setw(1) << set << setw(4) << ds[set].label(example) << setw(4) << ds[set].multiplicity(example) << setw(10) << totalPos << setw(10) << totalNeg << setw(10) << totalPos-totalNeg << endl; if ( ( set && totalPos-totalNeg<-errorTolerance ) || ( !set && totalPos-totalNeg>+errorTolerance ) ) { res+=ds[set].multiplicity(example); if (set) wronglyOff+=ds[set].multiplicity(example); else wronglyOn +=ds[set].multiplicity(example); if ( fileName ) { out_err << "\n\npoint " << ds[set].label(example) << ", from class " << set << setw(10) << totalPos << setw(10) << totalNeg << setw(10) << totalPos-totalNeg; int nbPinL=0; for (posList.first(); !posList.eol(); posList.next()) if ( posList.fire(thisData) ) { if (!nbPinL) out_err << "\npositive firing patterns:\n"; else out_err << ", "; if (!(++nbPinL % 5)) out_err << endl; out_err << "("; posList.printPattern(out_err); out_err << ")"; } nbPinL=0; for (negList.first(); !negList.eol(); negList.next()) if ( negList.fire(thisData) ) { if (!nbPinL) out_err << "\nnegative firing patterns:\n"; else out_err << ", "; if (!(++nbPinL % 5)) out_err << endl; out_err << "("; negList.printPattern(out_err); out_err << ")"; } } } else if ( absolute(totalPos-totalNeg)<=errorTolerance ) { undecidable+=ds[set].multiplicity(example); res+=ds[set].multiplicity(example); } } return res; } tcT int multiDS::boundOnErrors(const multiDS& basis) const // In this form, this routine works only if basis is consistant. // In case of inconsistant basis its a mess to decide what will be // a mistake. // We also assume that multiplicities are checked in basis and in *this. { int res=0; if ( !basis.isConsistant() ) return -1; binMatrix error(1,distinct(),false); int cumul=0; for (int set=0; set multiDS::merge(const boolean class_first/**=true**/) { Matrix d(distinct(),dim()+1);// data set matrix Matrix m(distinct()); // multiplicity vector Matrix l(distinct()); // label vector int position=0; for(int set=0;set< nbCats();position+=ds[set++].distinct()) { d.s(position,ds[set].distinct(),class_first,dim())= ds[set].matrix(); if(class_first) d.s(position,ds[set].distinct(),0,1)= (T) (set); else d.s(position,ds[set].distinct(),dim(),1)= (T) (set); for(int i=0; i(d,m,l,ds[0].monotone)); } tcT multiDS& multiDS::separate (const dataSet& from) { #ifdef _CHECK_INDEX if ( from.classCol()<0 || from.classCol()>=from.dim() ) throw whereOutOfRange( "multiDS::multiDS(const dataSet&, int from.classCol())", "from.classCol()",from.classCol(),0,from.dim()-1); #endif // determine the number of distinct categories Matrix singleton(1,1,from.classCol()); int garbage; dataSet categories(from,singleton); categories.sort().checkMult(garbage); Matrix catIndex(categories.matrix().ta()); ds = new dataSet [catIndex.n()]; label.resize(catIndex.n()); for (int cI=0; cI cm(from.distinct(),from.dim()-1); Matrix multiple(from.distinct()); Matrix labelle(from.distinct()); int cmPtr=0; for (int dPtr=0; dPtr 0 ) cm.s(cmPtr,1,0,from.classCol()) = from(dPtr).s(0,1,0,from.classCol()); if ( from.classCol() < from.dim()-1 ) cm.s(cmPtr,1,from.classCol()) = from(dPtr).s(0,1,from.classCol()+1); multiple(cmPtr) = from.multiplicity(dPtr); labelle(cmPtr) = from.label(dPtr); cmPtr++; } if ( cmPtr ) { ds[nbSets] = dataSet( cm.resize_(cmPtr,cm.n()), multiple.resize_(cmPtr), labelle.resize_(cmPtr)); label(nbSets) = catIndex(cI); nbSets++; } } if ( nbSets < catIndex.n() ) // some categories were inexistant { label.resize_(nbSets); dataSet* aux = ds; ds = new dataSet [nbSets]; for (int i=0; i