ltraitement_point_apprentissage.c

Go to the documentation of this file.
00001 /*
00002 *    Copyright (c) 2007. The BATI team. All right reserved.
00003 *
00004 *    This file is part of BATI library.
00005 *
00006 *    BATI library is free software: you can redistribute it and/or modify
00007 *    it under the terms of the GNU General Public License as published by
00008 *    the Free Software Foundation, either version 3 of the License, or
00009 *    (at your option) any later version.
00010 *
00011 *    BATI library  is distributed in the hope that it will be useful,
00012 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 *    GNU General Public License for more details.
00015 *
00016 *    You should have received a copy of the GNU General Public License
00017 *    along with BATI library.  If not, see <http://www.gnu.org/licenses/>.
00018 */
00019 
00020  /* ********* traitement de données d'apprentissages  **************************/
00021 /*                                                                            */
00022 /*                                                                            */
00023 /*                                                                            */
00024 /*  Derniere modif : Lavinia DARLEA, 27/07/06                                 */
00025 /* ****************************************************************************/
00026 
00027 
00028 
00043 
00044 
00045 #include <stdio.h>
00046 #include <stdarg.h>
00047 #include <math.h>
00048 #include "image.h"
00049 #include "proto2D.h"
00050 #include "ff_symb.h"
00051 #include "classifier.h"
00052 #include "traitement_point_apprentissage.h"
00053 
00054 int intersch (double *a, double *b)
00055 {double x;
00056   x = *a;
00057   *a = *b;
00058   *b = x;
00059   return(0);
00060 }
00061 
00062 int sum (int i, lgr_d id)
00063 {
00064   int j, a = 0;
00065   for (j = 0; j < i; j++)
00066     a = a + id.nb_pts[j];
00067   return (a);
00068 }
00069 
00070 double min(double a, double b)
00071 {
00072   if (a < b)
00073     return a;
00074   return b;
00075 }
00076 
00077 double max(double a, double b)
00078 {
00079   if (a > b)
00080     return a;
00081   return b;
00082 }
00083 
00084 /* *************************  LECTURE  *******************************/
00098 param *traitement_donnees_lect(lgr_d *a, param *ptp, char *debq){
00099 
00100   char question[500];
00101   int i;
00102 
00103 //file that is to be processed
00104   sprintf(question, "%s Data file containing the learning points", debq);
00105   lec_param(question, ptp);
00106   a->fisin = ptp->rep;
00107   ptp = ptp->next;
00108 
00109   sprintf(question, "%s Number of classes in the current file", debq);
00110   lec_param(question, ptp);
00111   a->nb_class = atoi(ptp->rep);
00112   ptp = ptp->next;
00113 
00114   a->nb_pts = (int*)malloc(a->nb_class*sizeof(int));
00115 
00116   for (i = 0; i < a->nb_class; i++){
00117     sprintf(question, "%s Number of learning points in class %d", debq, i); 
00118     lec_param(question, ptp);
00119     a->nb_pts[i] = atoi(ptp->rep);
00120     ptp = ptp->next;
00121   }
00122 
00123   sprintf(question, "%s Number of attributes in the curent file", debq); 
00124   lec_param(question, ptp);
00125   a->nb_attr = atoi(ptp->rep);
00126   ptp = ptp->next;
00127 
00128 //output file, it will contain the processed data
00129   sprintf(question, "%s Data file containing the processed learning set", debq);
00130   lec_param(question, ptp);
00131   a->fisout1 = ptp->rep;
00132   ptp = ptp->next;
00133 
00134   return(ptp);
00135 }
00136 
00137 /* *************************  INITIALISATION  ***************************/
00149 int traitement_donnees_init(lgr_d a, learning_set *ls)
00150 {
00151   int nb_points, i, j, m;
00152   FILE * myfile;
00153 
00154 //initialization of the learning_set variable
00155  if ((myfile=fopen(a.fisin, "r")) == NULL) {
00156    printf("ERREUR: impossible d'ouvrir le fichier %s\n",a.fisin);
00157         exit(1);
00158  }
00159 
00160   nb_points = 0;
00161   for (i = 0; i < a.nb_class; i++)
00162     nb_points = nb_points + a.nb_pts[i];
00163 
00164 
00165 
00166   ls->input_ref.nb_attr = a.nb_attr;
00167   ls->input_ref.nb_pts = nb_points;
00168   ls->output_ref.nb_class = a.nb_class;
00169   ls->output_ref.nb_pts = nb_points;
00170   ls->output_ref.type = CRISP_CHOICE;
00171 
00172   if ((m = alloc_classifier_data_input_completely(&ls->input_ref)) != 0)
00173     printf("Probleme d'allocation memoire pour les donnees d'entree\n");
00174 
00175   m = alloc_classifier_data_output(&ls->output_ref);
00176 
00177 
00178  for (i=0; i<nb_points; i++)
00179    { for (j = 0; j < a.nb_attr; j++)
00180        fscanf(myfile,"%f ", &(ls->input_ref.attributes[j][i]));
00181 /*      printf("%f %f\n", ls->input_ref.attributes[0][i], ls->input_ref.attributes[1][i]); */
00182        fscanf(myfile, "%d", &a);
00183    }
00184 
00185  fclose(myfile);
00186 
00187  return 1;
00188 }
00189 
00190  float minim(float a, float b)
00191  {
00192    if (a > b)
00193        return b;
00194 
00195    return a;
00196 
00197  }
00198 
00199  float maxim(float a, float b)
00200  {
00201    if (a < b)
00202        return b;
00203 
00204    return a;
00205 
00206  }
00207 
00208 float dmin(learning_set ls, int pos_start, int pos_stop, int att)
00209 {
00210   int i, j;
00211   float dm;
00212 
00213 
00214 //intermediary function, computes the minimum distance between 2 points in a cloud that corresponds to a class
00215   for (i = pos_start; i < pos_stop - 1; i++)
00216     for (j = i + 1; j < pos_stop; j++)
00217       if (fabs(ls.input_ref.attributes[att][i] - ls.input_ref.attributes[att][j]) != 0.0)
00218         {
00219           dm = fabs(ls.input_ref.attributes[att][i] - ls.input_ref.attributes[att][j]);
00220           i = pos_stop;
00221           j = pos_stop;
00222         }
00223 
00224   for (i = pos_start; i < pos_stop - 1; i++)
00225     for (j = i + 1; j < pos_stop; j++)
00226       if (fabs(ls.input_ref.attributes[att][i] - ls.input_ref.attributes[att][j]) != 0.0)
00227         {
00228           dm = minim(dm, fabs(ls.input_ref.attributes[att][i] - ls.input_ref.attributes[att][j]));
00229           //printf("%f %f \n", dm, fabs(ls.input_ref.attributes[att][i] - ls.input_ref.attributes[att][j]));
00230         }
00231 
00232   return (dm);
00233 }
00234 
00235 
00236 //reorganization of the learning set: adding information on the frequency, reduction of the dimensions by eliminating the multiple points
00237 int *clean_learning_set(learning_set *iris, lgr_d id)
00238 {
00239 
00240   int i, j, k = -1, a, att, p, n, m;
00241   int *sim, *verif;
00242   FILE *myfile;
00243 
00244 // The function reduces the dimension of the learning set by eliminating the points that repeat themselves. A recall of the number of appearances for every point that is kept is also available in the variable sim. The number of the vector corresponds to the point in the learning set, for the order has been kept
00245 
00246 
00247   myfile = fopen("learn_clean.data", "a");
00248 
00249   sim = (int*)malloc(iris->input_ref.nb_pts*sizeof(int));
00250   verif = (int*)malloc(iris->input_ref.nb_pts*sizeof(int));
00251 
00252     for (j = 0; j < iris->input_ref.nb_pts; j++)
00253       {
00254         sim[j] = 0;
00255         verif[j] = 1;
00256       }
00257 
00258     n = 0;
00259 
00260 
00261   for (i = 0; i < iris->output_ref.nb_class; i++)
00262     {
00263       n = n + id.nb_pts[i];
00264       for (j = 0; j < id.nb_pts[i]; j++)
00265         {
00266           k++;
00267           if (verif[k])
00268             {
00269               sim[k] = 1;
00270               for (m = k+1; m < n; m++)
00271                 {
00272                   a = 0;
00273                   for (att = 0; att < iris->input_ref.nb_attr; att++)
00274                     if (iris->input_ref.attributes[att][m] != iris->input_ref.attributes[att][k])
00275                       a = 1;
00276                   if (!a)
00277                     {
00278                       sim[k]++;
00279                       verif[m] = 0;
00280                     }
00281                 }
00282             }
00283         }
00284     }
00285 
00286 
00287   n = 0;
00288   m = 0; k = -1;
00289   for (i = 0; i < iris->output_ref.nb_class; i++)
00290     {
00291       p = 0;
00292       for (j = 0; j < id.nb_pts[i]; j++)
00293         {
00294           k++;
00295           if (sim[k])
00296             {
00297               for (att = 0; att < iris->input_ref.nb_attr; att++)
00298                 {
00299                   iris->input_ref.attributes[att][m] = iris->input_ref.attributes[att][k];
00300                   fprintf(myfile, "%f ", iris->input_ref.attributes[att][m]);
00301                 }
00302               fprintf(myfile, "%d\n", sim[k]);
00303               verif[m] = sim[k];
00304               m++;
00305               p++;
00306             }
00307         }
00308       fprintf(myfile, "\n");
00309       id.nb_pts[i] = p;
00310     }
00311 
00312   iris->input_ref.nb_pts = 0;
00313   for (i = 0; i < iris->output_ref.nb_class; i++)
00314     iris->input_ref.nb_pts = iris->input_ref.nb_pts + id.nb_pts[i];
00315 
00316   free(sim);
00317 
00318   sim = (int*)malloc(iris->input_ref.nb_pts * sizeof(int));
00319 
00320   for (i = 0; i < iris->input_ref.nb_pts; i++)
00321     sim[i] = verif[i];
00322 
00323   free(verif);
00324   fclose(myfile);
00325 
00326   return(sim);
00327 }
00328 
00329 float val_min(learning_set ls, int pos_start, int pos_stop, int att)
00330 {
00331   float m;
00332   int i;
00333 
00334   m = ls.input_ref.attributes[att][pos_start];
00335 
00336   for (i = pos_start+1; i < pos_stop; i++)
00337     m = minim(m, ls.input_ref.attributes[att][i]);
00338 
00339   return m;
00340 
00341 }
00342 
00343 float val_max(learning_set ls, int pos_start, int pos_stop, int att)
00344 {
00345   float m;
00346   int i;
00347 
00348   m = ls.input_ref.attributes[att][pos_start];
00349   for (i = pos_start+1; i < pos_stop; i++)
00350     m = maxim(m, ls.input_ref.attributes[att][i]);
00351 
00352   return m;
00353 
00354 }
00355 
00356 
00357 //Connex components
00358 int competitive_agglomeration(learning_set *iris, int start, int stop, int att1, int att2, int classe)
00359 {
00360   const nb_pts = stop - start;
00361   const C = min(10, max(2,nb_pts/10));
00362   double *exemple[2], card[C], *prot[2], d, dmin, *d_intr[2];
00363   int i, j, l, k = -1, n, m = 0, *class, cl = C, flag = 1, t, s;
00364   FILE *myfile;
00365 
00366 
00367    if ((myfile = fopen("compet_agg.txt", "a")) == NULL) 
00368      printf("Error in opening the text file for the unsupervised clustering\n"); 
00369 
00370 
00371   exemple[0] = (double*)malloc(nb_pts * sizeof(double));
00372   exemple[1] = (double*)malloc(nb_pts * sizeof(double));
00373   class = (int*)malloc(nb_pts * sizeof(int));
00374   prot[0] = (double*)malloc(C * sizeof(double));
00375   prot[1] = (double*)malloc(C * sizeof(double));
00376 
00377 
00378   for (i = 0; i < nb_pts; i++)
00379     {
00380       exemple[0][i] = iris->input_ref.attributes[att1][i+start];
00381       exemple[1][i] = iris->input_ref.attributes[att2][i+start];
00382     }
00383 
00384 //the initial partition and the initial prototypes
00385   n = nb_pts / C;
00386   k = -1;
00387   for (l = 0; l < C-1; l++)
00388     {
00389       prot[0][l] = prot[1][l] = card[l] = 0.0;
00390       for (j = 0; j < n; j++)
00391         {
00392           k++;
00393           class[k] = l;
00394           prot[0][l] = prot[0][l] + exemple[0][k];
00395           prot[1][l] = prot[1][l] + exemple[1][k];
00396           card[l]++;
00397         }
00398       prot[0][l] = prot[0][l] / card[l];
00399       prot[1][l] = prot[1][l] / card[l];
00400     }
00401   prot[0][l] = prot[1][l] = card[l] = 0.0;
00402   for (j = k+1; j < nb_pts; j++)
00403     {
00404       class[j] = C - 1;
00405       card[l]++;
00406       prot[0][l] = prot[0][l] + exemple[0][j];
00407       prot[1][l] = prot[1][l] + exemple[1][j];
00408     }
00409   prot[0][l] = prot[0][l] / card[l];
00410   prot[1][l] = prot[1][l] / card[l];
00411 
00412 
00413   //basic isodata
00414   k = 1; j = 0;
00415   while(k && j < 1000)
00416     {
00417       k = 0; j++;
00418       for (i = 0; i < nb_pts; i++)
00419         {
00420           dmin = sqrt(pow(exemple[0][i] - prot[0][class[i]], 2) + pow(exemple[1][i] - prot[1][class[i]], 2));
00421           for (l = 0; l < C; l++)
00422             {
00423               d = sqrt(pow(exemple[0][i] - prot[0][l], 2) + pow(exemple[1][i] - prot[1][l], 2));
00424               if (d < dmin)
00425                 {
00426                   dmin = d;
00427                   class[i] = l;
00428                   k = 1;
00429                 }
00430             }
00431         }
00432       for (l = 0; l < C; l++)
00433         prot[0][l] = prot[1][l] = card[l] = 0;
00434       for (i = 0; i < nb_pts; i++)
00435         {
00436           card[class[i]]++;
00437           prot[0][class[i]] = prot[0][class[i]] + exemple[0][i];
00438           prot[1][class[i]] = prot[1][class[i]] + exemple[1][i];
00439         }
00440       for (l = 0; l < C; l++)
00441         {
00442           prot[0][l] = prot[0][l] / card[l];
00443           prot[1][l] = prot[1][l] / card[l];
00444         }
00445     }
00446 
00447 
00448 //"cluster fusion" - the points that belong to neighbour clusters are allocated to the same cluster if they are really close. They will be allocated to the cluster whose cardinality is greatest
00449  k = 0;
00450  m = nb_pts;
00451      d_intr[0] = (double*)malloc(cl*sizeof(double));
00452      d_intr[1] = (double*)malloc(cl*sizeof(double));
00453      flag = 0;
00454      for (j = 0; j < nb_pts; j++)
00455        {
00456          for (n = j+1; n < nb_pts; n++)
00457            if (class[j] == class[n])
00458              {
00459                d = fabs(exemple[0][j] - exemple[0][n]);
00460                dmin = fabs(exemple[1][j] - exemple[1][n]);
00461                n = m;
00462              }
00463          for (n = j+1; n < nb_pts; n++)
00464            if (class[j] == class[n])
00465              {
00466                d = minim(fabs(exemple[0][j] - exemple[0][n]), d);
00467                dmin = minim(fabs(exemple[1][j] - exemple[1][n]), dmin);
00468                n = m;
00469              }
00470          d_intr[0][class[j]] = d_intr[0][class[j]] + d;
00471          d_intr[1][class[j]] = d_intr[1][class[j]] + dmin;
00472        }
00473      for (l = 0; l < cl; l++)
00474        {
00475          d_intr[0][l] = d_intr[0][l] / card[l];
00476          d_intr[1][l] = d_intr[1][l] / card[l];
00477        }
00478        while(!flag)
00479          {
00480            flag = 1;
00481            for (j = k; j < m - 1; j++)
00482              for (n = k; n < m; n++)
00483                  if (class[j] != class[n])
00484                    {
00485                      d = fabs(exemple[0][j] - exemple[0][n]);
00486                      dmin =  fabs(exemple[1][j] - exemple[1][n]);
00487                      if ((d < 1.5 * minim(d_intr[0][class[j]], d_intr[0][class[n]])) && (dmin < 1.5 * minim(d_intr[1][class[j]], d_intr[0][class[n]])))
00488                        {
00489                          flag = 0;
00490                          if (card [class[j]] > card[class[n]])
00491                            {
00492                              card[class[n]]--;
00493                              card[class[j]]++;
00494                              class[n] = class[j];
00495                            }
00496                          else
00497                            {
00498                              card[class[j]]--;
00499                              card[class[n]]++;
00500                              class[j] = class[n];
00501                            }
00502                        }
00503                    }
00504          }
00505 /*      free(d_intr[0]); */
00506 /*      free(d_intr[1]); */
00507        k =0;
00508      for (l = 0; l < cl; l++)
00509        if (card[l])
00510          {
00511            for (j = 0; j < nb_pts; j++)
00512              if (class[j] == l)
00513                class[j] = k;
00514            k++;
00515          }
00516      cl = k;
00517 
00518 
00519 
00520 //Modification of the learning set as to separate the learning set into the connex parts that were identified
00521   t = 0;
00522   if (cl > 1)
00523     {
00524       for (l = 0; l < cl-1; l++)
00525         {
00526           card[l] = 0;
00527           for (j = 0; j < nb_pts; j++)
00528             if (class[j] == l)
00529               {
00530                 intersch(&exemple[0][j], &exemple[0][t]);
00531                 intersch(&exemple[1][j], &exemple[1][t]);
00532                 n = class[j];
00533                 class[j] = class[t];
00534                 class[t] = n;
00535                 t++;
00536                 card[l]++;
00537               }
00538         }
00539     }
00540 
00541  for (i = 0; i < nb_pts; i++)
00542    {
00543      iris->input_ref.attributes[att1][i+start] = exemple[0][i];
00544      iris->input_ref.attributes[att2][i+start] = exemple[1][i];
00545   }
00546 
00547    for (j = 0; j < nb_pts; j++)
00548      fprintf(myfile, "%f %f %d \n", exemple[0][j], exemple[1][j], class[j]);
00549   fprintf(myfile, "\n");
00550 
00551 
00552   fclose(myfile);
00553 
00554   myfile = fopen("learning_set_processing.txt", "a");
00555   fprintf(myfile, "%d\n", cl);
00556   fclose(myfile);
00557 
00558   m = start;
00559   if (cl > 1)
00560     {
00561       for (l = 0; l < cl-1; l++)
00562         {
00563           t = m;
00564           m = m + card[l];
00565           convex_verif(*iris, t, m, att1, att2);
00566         }
00567       convex_verif(*iris, m, stop, att1, att2);
00568     }
00569 
00570 /*   free(exemple[0]); */
00571 /*   free(exemple[1]);  */
00572 /*   free(class);  */
00573 /*   free(u); */
00574 /*   free(prot[0]); */
00575 /*   free(prot[1]); */
00576 
00577 
00578   return(cl);
00579 
00580 }
00581 
00582 
00583 
00584 
00585 
00586 elem_contour *contour_extraction(learning_set iris, int start, int stop, int att1, int att2)
00587 {
00588   elem_contour *first, *current, *new;
00589   elem_contour *firstm;
00590   int i, k, j, m, cont = 1;
00591 //  FILE *myfile, *myfile1;
00592 
00593 /*   myfile = fopen("contour.txt", "w"); */
00594 /*   myfile1 = fopen("convex_hull.txt", "a"); */
00595 
00596 
00597 //The contour on taking the vertical extreme points
00598 
00599 
00600   current = (elem_contour*)malloc(sizeof(elem_contour));
00601   current->x = iris.input_ref.attributes[att1][start];
00602   current->y = iris.input_ref.attributes[att2][start];
00603   current->next = current;
00604   current->ant = current;
00605   first = current;
00606 
00607   cont = 1;
00608 
00609 
00610   for (j = start; j < stop; j++)
00611     {
00612       k = 1; m = 0;
00613       current = first->next;
00614       while (k)
00615         {
00616           if (current == first)
00617           k = 0;
00618           if (current->y == iris.input_ref.attributes[att2][j])
00619             {
00620               m = 1;
00621               if (current->x < iris.input_ref.attributes[att1][j])
00622                 current->x = iris.input_ref.attributes[att1][j];
00623             }
00624           current = current->next;
00625         }
00626       if(!m)
00627         {
00628           cont++;
00629           new = (elem_contour*)malloc(sizeof(elem_contour));
00630           new->x = iris.input_ref.attributes[att1][j];
00631           new->y = iris.input_ref.attributes[att2][j];
00632           new->next = current;
00633           new->ant = current->ant;
00634           current->ant->next = new;
00635           current->ant = new;
00636         }
00637     }
00638 
00639 
00640   current = (elem_contour*)malloc(sizeof(elem_contour));
00641   current->x = iris.input_ref.attributes[att1][start];
00642   current->y = iris.input_ref.attributes[att2][start];
00643   current->next = current;
00644   current->ant = current;
00645   firstm = current;
00646 
00647   for (j = start; j < stop; j++)
00648     {
00649       k = 1; m = 0;
00650       current = firstm->next;
00651       while (k)
00652         {
00653           if (current == firstm)
00654             k = 0;
00655           if (current->y == iris.input_ref.attributes[att2][j])
00656             {
00657               m = 1;
00658               if (current->x > iris.input_ref.attributes[att1][j])
00659                 current->x = iris.input_ref.attributes[att1][j];
00660             }
00661           current = current->next;
00662         }
00663       if(!m)
00664         {
00665           new = (elem_contour*)malloc(sizeof(elem_contour));
00666           new->x = iris.input_ref.attributes[att1][j];
00667           new->y = iris.input_ref.attributes[att2][j];
00668           new->next = current;
00669           new->ant = current->ant;
00670           current->ant->next = new;
00671           current->ant = new;
00672         }
00673     }
00674 
00675 
00676   current = first->ant;
00677   new = firstm->ant;
00678   first->ant = new;
00679   firstm->ant = current;
00680   current->next = firstm;
00681   new->next = first;
00682 
00683 //  firstm = conv_hull(firstm);
00684 
00685 
00686 /*  k = 1; */
00687 /*   while (k) */
00688 /*     { */
00689 /*       fprintf(myfile1, "%f %f\n", current->x, current->y); */
00690 /*       current = current->next; */
00691 /*       if (current == firstm) */
00692 /*      k = 0; */
00693 /*     } */
00694 
00695 /*   fprintf(myfile1, "\n"); */
00696 
00697 /*   fclose(myfile); */
00698 /*   fclose(myfile1); */
00699 
00700   return(firstm);
00701 }
00702 
00703 
00704 //is the point cloud convex?
00705 int convex_verif(learning_set ls, int start, int stop, int att1, int att2)
00706 {
00707   elem_contour *first, *current;
00708   double d_minx, d_miny, stp_x, stp_y, Nx, Ny, m, n;
00709   double xmin, xmax, ymin, ymax;
00710   int k, l, i, j, t, indic, **density, *cadran, card[4] = {0, 0, 0, 0};
00711   int x1, y1, x2, y2, x, y;
00712   int S, ind1, ind2, s_ls, ns;
00713   FILE *myfile;
00714 
00715 //  matrice = fopen("matrice.txt", "w");
00716 
00717 
00718   if ((stop - start) > 3)
00719     {
00720       first = contour_extraction(ls, start, stop, att1, att2);
00721 
00722 
00723 
00724       d_minx = dmin(ls, start, stop, att1);
00725       d_miny = dmin(ls, start, stop, att2);
00726       stp_x = 3 * d_minx;
00727       stp_y = 3 * d_miny;
00728       xmin = val_min(ls, start, stop, att1 );
00729       ymin = val_min(ls, start, stop, att2);
00730       xmax = val_max(ls, start, stop, att1) + stp_x;
00731       ymax = val_max(ls, start, stop, att2) + stp_y;
00732       Nx = ceil((xmax - xmin)/stp_x);
00733       Ny = ceil((ymax - ymin)/stp_y);
00734       //In case we want an equal fixed nomber of intervals on both axes
00735       /*              xmax = val_max(ls, n, m, at1); */
00736       /*              ymax = val_max(ls, n, m, at2); */
00737       /*              stp_x = (xmax - xmin) / 30.0; */
00738       /*              stp_y = (ymax - ymin) / 30.0; */
00739       /*              Nx = 31; */
00740       /*              Ny = 31; */
00741       density = (int**)malloc((Nx + 2) * sizeof(int));
00742       for (j = 0; j < Nx + 2; j++)
00743         density[j] = (int*)malloc((Ny + 2) * sizeof(int));
00744       for (k = 0; k < Nx + 2; k++)
00745         for (l = 0; l < Ny + 2; l++)
00746           density[k][l] = 0;
00747       current = first;
00748       k = 1;
00749       indic = 1;
00750       while (k)
00751         {
00752           i = floor((current->x - xmin)/stp_x) +1;
00753           j = floor((current->y - ymin)/stp_y) +1;
00754 //        printf("%d %d, date de %f %f\n", i, j, current->x, current->y);
00755           if (!density[i][j])
00756             {
00757               density[i][j] = indic;
00758               indic++;
00759             }
00760           current = current->next;
00761           if (current == first)
00762             k = 0;
00763         }
00764 
00765       for (i = 1; i < indic; i++)
00766         {
00767           m =0;
00768           for (k = 0; k < Nx + 2; k++)
00769             for (l = 0; l < Ny + 2; l++)
00770               {
00771                 if (density[k][l] == i)
00772                   {
00773                     x1 = k;
00774                     y1 = l;
00775                     m++;
00776                   }
00777                 if (density[k][l] == i % (indic-1) + 1)
00778                   {
00779                     x2 = k;
00780                     y2 = l;
00781                     m++;
00782                   }
00783                 if (m == 2)
00784                   {
00785                     k = Nx+3;
00786                     l = Ny+3;
00787                   }
00788               }
00789           if (fabs(x1 - x2) > 1 || fabs(y1 - y2) > 1)
00790             {
00791               if (x2 - x1)
00792                 m = (y2 - y1)/(x2 - x1+0.0);
00793               else
00794                 m = (y2 - y1)/0.001;
00795               n = y1 - (float)m * x1;
00796               if (fabs(x1 - x2) > fabs(y1 - y2))
00797                 for (k = min(x1, x2) + 1; k < max(x1, x2); k++)
00798                   {
00799                     y = floor(m * k + n);
00800                     if (!density[k][y])
00801                       density[k][y] = -1;
00802                   }
00803               else
00804                 for (l = min(y1, y2) + 1; l < max(y1, y2); l++)
00805                   {
00806                     x = floor((l - n)/m);
00807                     if (!density[x][l])
00808                       density[x][l] = -1;
00809                   }
00810             }
00811         }
00812 
00813 
00814       S = 0;
00815       for (j = 0; j < Ny+2; j++)
00816         {
00817           m = 0;
00818           for (k = 0; k < Nx+2; k++)
00819             if(density[k][j])
00820               {
00821                 ind1 = k;
00822                 k = Nx+2;
00823                 m = 1;
00824               }
00825           for (k = Nx+1; k > -1; k--)
00826             if(density[k][j])
00827               {
00828                 ind2 = k;
00829                 k = -1;
00830               }
00831           if (m)
00832             S = S + (ind2 - ind1) + 1;
00833         }
00834 
00835       s_ls = 0;
00836 
00837       for (k = 0; k < Nx + 2; k++)
00838         for (l = 0; l < Ny + 2; l++)
00839           density[k][l] = 0;
00840       for (i = start; i < stop; i++)
00841         {
00842           k = floor((ls.input_ref.attributes[att1][i] - xmin)/stp_x) + 1;
00843           l = floor((ls.input_ref.attributes[att2][i] - ymin)/stp_y) + 1;
00844           if (!density[k][l])
00845             {
00846               density[k][l] = 1;
00847               s_ls++;
00848             }
00849         }
00850       //  printf("suprafata acoperita: %d\n", s_ls);
00851 
00852       /*  for (k = 0; k < Nx + 2; k++) */
00853       /*    { */
00854       /*     for (l = 0; l < Ny + 2; l++) */
00855       /*       fprintf(matrice, "%d ", density[k][l]); */
00856       /*     fprintf(matrice, "\n"); */
00857       /*    } */
00858 
00859       card[0] = stop - start;
00860 
00861       if ((s_ls / (float)S) < 0.7)
00862         {
00863           cadran = (int*)malloc((stop - start + 1)*sizeof(int));
00864           for (i = start; i < stop; i++)
00865             if (ls.input_ref.attributes[att1][i] > (xmin + xmax)/2)
00866               if (ls.input_ref.attributes[att2][i] > (ymin + ymax)/2)
00867                 cadran[i-start] = 0;
00868               else
00869                 cadran[i-start] = 3;
00870             else
00871               if (ls.input_ref.attributes[att2][i] > (ymin + ymax)/2)
00872                 cadran[i-start] = 1;
00873               else
00874                 cadran[i-start] = 2;
00875           t = 0;    
00876           for (l = 0; l < 3; l++)
00877             {
00878               card[l] = 0;
00879               for (j = 0; j < stop - start; j++)
00880                 if (cadran[j] == l)
00881                   {
00882                     m = ls.input_ref.attributes[0][j+start];
00883                     ls.input_ref.attributes[att1][j+start] = ls.input_ref.attributes[att1][t+start];
00884                     ls.input_ref.attributes[att1][t+start] = m;
00885                     m = ls.input_ref.attributes[1][j+start];
00886                     ls.input_ref.attributes[att2][j+start] = ls.input_ref.attributes[att2][t+start];
00887                     ls.input_ref.attributes[att2][t+start] = m;
00888                     n = cadran[j];
00889                     cadran[j] = cadran[t];
00890                     cadran[t] = n;
00891                     t++;
00892                     card[l]++;
00893                   }
00894             }
00895           card[3] = stop - start - card[0] - card[1] - card[2];
00896         }
00897     }
00898   else
00899     card[0] = stop - start;
00900 
00901   ns = 0;
00902   for (l = 0; l < 4; l++)
00903     if (card[l])
00904       ns ++;
00905 
00906 
00907 
00908 //Writting in the file that will contain the final processed learning set, in the fix format required by the rules learning operator
00909  if ((myfile = fopen("learning_set_processing.txt", "a")) == NULL)
00910    printf("Impossible to open the learning_set_processing.txt file\n");
00911 
00912  fprintf(myfile, "%d\n", ns);
00913  for (i = 0; i < ns; i++)
00914    fprintf(myfile, "%d ", card[i]);
00915  fprintf(myfile, "\n");
00916 
00917  t = start - 1;
00918  for (i = 0; i < ns; i++)
00919    for (j = 0; j < card[i]; j++)
00920      {
00921        t++;
00922        fprintf(myfile, "%f %f \n", ls.input_ref.attributes[att1][t], ls.input_ref.attributes[att2][t]);
00923      }
00924 
00925 
00926 
00927  //free(cadran);
00928 // fclose(matrice);
00929  fclose(myfile);
00930 
00931 
00932   return(0);
00933 }
00934 
00935 //filtering and chaining all the 3 processings
00936 int process_chain(learning_set ls, int att1, int att2, lgr_d a)
00937 {
00938   int i, j, k, l, m, n, at1, at2, ns, nb_pts = 0;
00939   int Nx, Ny;
00940   int sum, card, av;
00941   float d_minx, d_miny, xmin, ymin, xmax, ymax;
00942   float stp_x, stp_y;
00943   int *freq, **density;
00944   short flag;
00945   FILE * myfile1, *myfile;
00946 
00947 
00948   freq = clean_learning_set(&ls, a);
00949 
00950 
00951 //the filtering of the learning set resulted after the cleaning: establish the grid, determines the points that are to be eliminated and eliminates them...
00952 
00953 
00954   m = 0;
00955   for (i = 0; i < ls.output_ref.nb_class; i++)
00956     {
00957       n = m;
00958       m = m + a.nb_pts[i];
00959       if (i == 0)
00960         {
00961           myfile1 = fopen("learning_set_processing.txt", "a");
00962           fprintf(myfile1, "%d %d\n", att1, att2);
00963           fclose(myfile1);
00964         }
00965       d_minx = dmin(ls, n, m, 0);
00966       d_miny = dmin(ls, n, m, 1);
00967       if (!d_minx)
00968         d_minx = 0.001;
00969       if (!d_miny)
00970         d_miny = 0.001;
00971       stp_x = 3 * d_minx;
00972       stp_y = 3 * d_miny;
00973       xmin = val_min(ls, n, m, 0);
00974       ymin = val_min(ls, n, m, 1);
00975       xmax = val_max(ls, n, m, 0) + stp_x;
00976       ymax = val_max(ls, n, m, 1) + stp_y;
00977       Nx = ceil((xmax - xmin)/stp_x);
00978       Ny = ceil((ymax - ymin)/stp_y);
00979       //In case we want an equal fixed nomber of intervals on both axes
00980       /*              xmax = val_max(ls, n, m, at1); */
00981       /*              ymax = val_max(ls, n, m, at2); */
00982       /*              stp_x = (xmax - xmin) / 30.0; */
00983       /*              stp_y = (ymax - ymin) / 30.0; */
00984       /*              Nx = 31; */
00985       /*              Ny = 31; */
00986       density = (int**)malloc((Nx + 2) * sizeof(int));
00987       for (j = 0; j < Nx + 2; j++)
00988         density[j] = (int*)malloc((Ny + 2) * sizeof(int));
00989       for (k = 0; k < Nx + 2; k++)
00990         for (l = 0; l < Ny + 2; l++)
00991           density[k][l] = 0;
00992       for (j = n; j < m; j++)
00993         {
00994           k = floor((ls.input_ref.attributes[0][j] - xmin) / stp_x) + 1;
00995           l = floor((ls.input_ref.attributes[1][j] - ymin) / stp_y) + 1;
00996           density[k][l] = density[k][l] + freq[j];
00997         }
00998       sum = card =0;
00999       for (k = 0; k < Nx + 2; k++)
01000         for (l = 0; l < Ny + 2; l++)
01001           if (density[k][l])
01002             {
01003               sum = sum + density[k][l];
01004               card++;
01005             }
01006       av = sum/card;
01007       flag = 0;
01008       while (!flag)
01009         {
01010           flag = 1;
01011           for (k = 1; k < Nx + 1; k++)
01012             for (l = 1; l < Ny + 1; l++)
01013               {
01014                 if (density[k][l])
01015                   if (density[k][l] < (av/2))
01016                     {
01017                       density[k][l] = 0;
01018                       flag = 0;
01019                     }
01020               }
01021         }
01022       flag = n;
01023       for (j = n; j < m; j++)
01024         {
01025           k = floor((ls.input_ref.attributes[0][j] - xmin) / stp_x) + 1;
01026           l = floor((ls.input_ref.attributes[1][j] - ymin) / stp_y) + 1;
01027           if (density[k][l])
01028             {
01029               ls.input_ref.attributes[0][flag] = ls.input_ref.attributes[0][j];
01030               ls.input_ref.attributes[1][flag] = ls.input_ref.attributes[1][j];
01031               flag++;
01032             }
01033         }
01034 
01035       /*            fprintf(myfile, "%d %d %d %d\n", i, at1, at2, flag - n); */
01036       /*            for (j = n; j < flag; j++) */
01037       /*              fprintf(myfile, "%f %f\n", ls.input_ref.attributes[at1][j], ls.input_ref.attributes[at2][j]); */
01038       for (k = 0; k < Nx; k++)
01039         free(density[k]);
01040       if(competitive_agglomeration(&ls, n ,flag, 0, 1, i) == 1)
01041         convex_verif(ls, n, flag, 0, 1);
01042     }
01043 
01044       
01045 
01046 }
01047 
01048 
01049 /* *************************  CALCUL  ***************************/
01061 int traitement_donnees_calc(learning_set ls1, lgr_d a)
01062 {
01063 
01064   int i, j, k, l, sum, at1, at2, *freq, **density;
01065   int ns, nb_pts, m, n, *nb_pct;
01066   FILE *myfile, *myfile1;
01067   learning_set ls;
01068 
01069   nb_pct = (int*)malloc(a.nb_class * sizeof(int));
01070 
01071   myfile = fopen("learn_clean.data", "w");
01072 
01073   fclose(myfile);
01074 
01075   myfile = fopen("compet_agg.txt", "w");
01076 
01077   fclose(myfile);
01078 
01079 /*   myfile = fopen("convex_hull.txt", "w"); */
01080 
01081 /*   fclose(myfile); */
01082 
01083   myfile1 = fopen("learning_set_processing.txt", "w");
01084 
01085   fprintf(myfile1, "%d\n", a.nb_class);
01086 
01087   fclose(myfile1);
01088 
01089   myfile = fopen(a.fisout1, "w");
01090 
01091   for (i = 0; i < a.nb_class; i++)
01092     nb_pct[i] = a.nb_pts[i];
01093 
01094   if (ls1.input_ref.nb_attr == 2)
01095     {
01096       process_chain(ls1, 0, 1, a);
01097       ls = ls1;
01098     }
01099   else 
01100     if (ls1.input_ref.nb_attr < 2)
01101       {
01102         printf("The number of attributes is not enough to construct the classification rules\n");
01103         exit(1);
01104       }
01105     else 
01106       for (at1 = 0; at1 < a.nb_attr - 1; at1++)
01107         for (at2 = at1 + 1; at2 < a.nb_attr; at2++)
01108           {
01109             ls.input_ref.nb_attr = 2;
01110             ls.input_ref.nb_pts = ls1.input_ref.nb_pts;
01111             ls.output_ref.nb_class = ls1.output_ref.nb_class;
01112             ls.output_ref.nb_pts = ls1.output_ref.nb_pts;
01113             ls.output_ref.type = CRISP_CHOICE;
01114 
01115             if ((m = alloc_classifier_data_input_completely(&ls.input_ref)) != 0)
01116               printf("Probleme d'allocation memoire pour les donnees d'entree\n");
01117 
01118             m = alloc_classifier_data_output(&ls.output_ref);
01119 
01120             for (i = 0; i < ls.input_ref.nb_pts; i++)
01121               {
01122                 ls.input_ref.attributes[0][i] = ls1.input_ref.attributes[at1][i];
01123                 ls.input_ref.attributes[1][i] = ls1.input_ref.attributes[at2][i];
01124               }
01125 
01126             process_chain(ls, 0, 1, a);
01127 
01128             free_data_input(&ls.input_ref);
01129             free_data_output(&ls.output_ref);
01130 
01131             for (i = 0; i < a.nb_class; i++)
01132               a.nb_pts[i] = nb_pct[i];
01133           }
01134 
01135 
01136   if ((myfile1 = fopen("learning_set_processing.txt", "r")) == NULL)
01137     printf("Impossible to open the learning_set.processing.txt file for reading ");
01138 
01139   fscanf(myfile1, "%d", &k);
01140   fprintf(myfile, "%d\n", k);
01141 
01142 
01143   for (at1 = 0; at1 < a.nb_attr - 1; at1++)
01144     for (at2 = at1 + 1; at2 < a.nb_attr; at2++)
01145       {
01146         fprintf(myfile, "%d %d\n", at1, at2);
01147         fscanf(myfile1, "%d %d", &i, &i);
01148         for (i = 0; i < a.nb_class; i++)
01149           {
01150             sum = 0;
01151             fscanf(myfile1, "%d", &ns);
01152             density = (int**)malloc(ns*sizeof(int));
01153             freq = (int*)malloc(ns*sizeof(int));
01154             nb_pts = 0;
01155             for (j = 0; j < ns; j++)
01156               {
01157                 fscanf(myfile1, "%d", &freq[j]);
01158                 density[j] = (int*)malloc(freq[j]*sizeof(int));
01159                 sum = sum + freq[j];
01160                 n = nb_pts;
01161                 for (l = 0; l < freq[j]; l++)
01162                   {
01163                     fscanf(myfile1, "%d", &density[j][l]);
01164                     nb_pts = nb_pts + density[j][l];
01165                   }
01166                 for (m = n; m < nb_pts; m++)
01167                   fscanf(myfile1, "%f %f", &ls1.input_ref.attributes[0][m], &ls1.input_ref.attributes[1][m]);
01168               }
01169             fprintf(myfile, "%d\n", sum);
01170             for (k = 0; k < ns; k++)
01171               for (j = 0; j < freq[k]; j++)
01172                 fprintf(myfile, "%d ", density[k][j]);
01173             fprintf(myfile, "\n");
01174             for (m = 0; m < nb_pts; m++)
01175               fprintf(myfile, "%f %f\n", ls1.input_ref.attributes[0][m], ls1.input_ref.attributes[1][m]);
01176           }
01177       }
01178 
01179   free(nb_pct);
01180   fclose(myfile1);
01181   fclose(myfile);
01182 
01183 }
01184 
01185 
01186 
01187 

Generated on Tue Apr 22 13:31:02 2008 for donnee1D by  doxygen 1.5.3