00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00043
00044
00045 #include <stdio.h>
00046 #include <stdarg.h>
00047 #include <math.h>
00048 #include "image.h"
00049 #include "proto2D.h"
00050 #include "ff_symb.h"
00051 #include "classifier.h"
00052 #include "traitement_point_apprentissage.h"
00053
00054 int intersch (double *a, double *b)
00055 {double x;
00056 x = *a;
00057 *a = *b;
00058 *b = x;
00059 return(0);
00060 }
00061
00062 int sum (int i, lgr_d id)
00063 {
00064 int j, a = 0;
00065 for (j = 0; j < i; j++)
00066 a = a + id.nb_pts[j];
00067 return (a);
00068 }
00069
00070 double min(double a, double b)
00071 {
00072 if (a < b)
00073 return a;
00074 return b;
00075 }
00076
00077 double max(double a, double b)
00078 {
00079 if (a > b)
00080 return a;
00081 return b;
00082 }
00083
00084
00098 param *traitement_donnees_lect(lgr_d *a, param *ptp, char *debq){
00099
00100 char question[500];
00101 int i;
00102
00103
00104 sprintf(question, "%s Data file containing the learning points", debq);
00105 lec_param(question, ptp);
00106 a->fisin = ptp->rep;
00107 ptp = ptp->next;
00108
00109 sprintf(question, "%s Number of classes in the current file", debq);
00110 lec_param(question, ptp);
00111 a->nb_class = atoi(ptp->rep);
00112 ptp = ptp->next;
00113
00114 a->nb_pts = (int*)malloc(a->nb_class*sizeof(int));
00115
00116 for (i = 0; i < a->nb_class; i++){
00117 sprintf(question, "%s Number of learning points in class %d", debq, i);
00118 lec_param(question, ptp);
00119 a->nb_pts[i] = atoi(ptp->rep);
00120 ptp = ptp->next;
00121 }
00122
00123 sprintf(question, "%s Number of attributes in the curent file", debq);
00124 lec_param(question, ptp);
00125 a->nb_attr = atoi(ptp->rep);
00126 ptp = ptp->next;
00127
00128
00129 sprintf(question, "%s Data file containing the processed learning set", debq);
00130 lec_param(question, ptp);
00131 a->fisout1 = ptp->rep;
00132 ptp = ptp->next;
00133
00134 return(ptp);
00135 }
00136
00137
00149 int traitement_donnees_init(lgr_d a, learning_set *ls)
00150 {
00151 int nb_points, i, j, m;
00152 FILE * myfile;
00153
00154
00155 if ((myfile=fopen(a.fisin, "r")) == NULL) {
00156 printf("ERREUR: impossible d'ouvrir le fichier %s\n",a.fisin);
00157 exit(1);
00158 }
00159
00160 nb_points = 0;
00161 for (i = 0; i < a.nb_class; i++)
00162 nb_points = nb_points + a.nb_pts[i];
00163
00164
00165
00166 ls->input_ref.nb_attr = a.nb_attr;
00167 ls->input_ref.nb_pts = nb_points;
00168 ls->output_ref.nb_class = a.nb_class;
00169 ls->output_ref.nb_pts = nb_points;
00170 ls->output_ref.type = CRISP_CHOICE;
00171
00172 if ((m = alloc_classifier_data_input_completely(&ls->input_ref)) != 0)
00173 printf("Probleme d'allocation memoire pour les donnees d'entree\n");
00174
00175 m = alloc_classifier_data_output(&ls->output_ref);
00176
00177
00178 for (i=0; i<nb_points; i++)
00179 { for (j = 0; j < a.nb_attr; j++)
00180 fscanf(myfile,"%f ", &(ls->input_ref.attributes[j][i]));
00181
00182 fscanf(myfile, "%d", &a);
00183 }
00184
00185 fclose(myfile);
00186
00187 return 1;
00188 }
00189
00190 float minim(float a, float b)
00191 {
00192 if (a > b)
00193 return b;
00194
00195 return a;
00196
00197 }
00198
00199 float maxim(float a, float b)
00200 {
00201 if (a < b)
00202 return b;
00203
00204 return a;
00205
00206 }
00207
00208 float dmin(learning_set ls, int pos_start, int pos_stop, int att)
00209 {
00210 int i, j;
00211 float dm;
00212
00213
00214
00215 for (i = pos_start; i < pos_stop - 1; i++)
00216 for (j = i + 1; j < pos_stop; j++)
00217 if (fabs(ls.input_ref.attributes[att][i] - ls.input_ref.attributes[att][j]) != 0.0)
00218 {
00219 dm = fabs(ls.input_ref.attributes[att][i] - ls.input_ref.attributes[att][j]);
00220 i = pos_stop;
00221 j = pos_stop;
00222 }
00223
00224 for (i = pos_start; i < pos_stop - 1; i++)
00225 for (j = i + 1; j < pos_stop; j++)
00226 if (fabs(ls.input_ref.attributes[att][i] - ls.input_ref.attributes[att][j]) != 0.0)
00227 {
00228 dm = minim(dm, fabs(ls.input_ref.attributes[att][i] - ls.input_ref.attributes[att][j]));
00229
00230 }
00231
00232 return (dm);
00233 }
00234
00235
00236
00237 int *clean_learning_set(learning_set *iris, lgr_d id)
00238 {
00239
00240 int i, j, k = -1, a, att, p, n, m;
00241 int *sim, *verif;
00242 FILE *myfile;
00243
00244
00245
00246
00247 myfile = fopen("learn_clean.data", "a");
00248
00249 sim = (int*)malloc(iris->input_ref.nb_pts*sizeof(int));
00250 verif = (int*)malloc(iris->input_ref.nb_pts*sizeof(int));
00251
00252 for (j = 0; j < iris->input_ref.nb_pts; j++)
00253 {
00254 sim[j] = 0;
00255 verif[j] = 1;
00256 }
00257
00258 n = 0;
00259
00260
00261 for (i = 0; i < iris->output_ref.nb_class; i++)
00262 {
00263 n = n + id.nb_pts[i];
00264 for (j = 0; j < id.nb_pts[i]; j++)
00265 {
00266 k++;
00267 if (verif[k])
00268 {
00269 sim[k] = 1;
00270 for (m = k+1; m < n; m++)
00271 {
00272 a = 0;
00273 for (att = 0; att < iris->input_ref.nb_attr; att++)
00274 if (iris->input_ref.attributes[att][m] != iris->input_ref.attributes[att][k])
00275 a = 1;
00276 if (!a)
00277 {
00278 sim[k]++;
00279 verif[m] = 0;
00280 }
00281 }
00282 }
00283 }
00284 }
00285
00286
00287 n = 0;
00288 m = 0; k = -1;
00289 for (i = 0; i < iris->output_ref.nb_class; i++)
00290 {
00291 p = 0;
00292 for (j = 0; j < id.nb_pts[i]; j++)
00293 {
00294 k++;
00295 if (sim[k])
00296 {
00297 for (att = 0; att < iris->input_ref.nb_attr; att++)
00298 {
00299 iris->input_ref.attributes[att][m] = iris->input_ref.attributes[att][k];
00300 fprintf(myfile, "%f ", iris->input_ref.attributes[att][m]);
00301 }
00302 fprintf(myfile, "%d\n", sim[k]);
00303 verif[m] = sim[k];
00304 m++;
00305 p++;
00306 }
00307 }
00308 fprintf(myfile, "\n");
00309 id.nb_pts[i] = p;
00310 }
00311
00312 iris->input_ref.nb_pts = 0;
00313 for (i = 0; i < iris->output_ref.nb_class; i++)
00314 iris->input_ref.nb_pts = iris->input_ref.nb_pts + id.nb_pts[i];
00315
00316 free(sim);
00317
00318 sim = (int*)malloc(iris->input_ref.nb_pts * sizeof(int));
00319
00320 for (i = 0; i < iris->input_ref.nb_pts; i++)
00321 sim[i] = verif[i];
00322
00323 free(verif);
00324 fclose(myfile);
00325
00326 return(sim);
00327 }
00328
00329 float val_min(learning_set ls, int pos_start, int pos_stop, int att)
00330 {
00331 float m;
00332 int i;
00333
00334 m = ls.input_ref.attributes[att][pos_start];
00335
00336 for (i = pos_start+1; i < pos_stop; i++)
00337 m = minim(m, ls.input_ref.attributes[att][i]);
00338
00339 return m;
00340
00341 }
00342
00343 float val_max(learning_set ls, int pos_start, int pos_stop, int att)
00344 {
00345 float m;
00346 int i;
00347
00348 m = ls.input_ref.attributes[att][pos_start];
00349 for (i = pos_start+1; i < pos_stop; i++)
00350 m = maxim(m, ls.input_ref.attributes[att][i]);
00351
00352 return m;
00353
00354 }
00355
00356
00357
00358 int competitive_agglomeration(learning_set *iris, int start, int stop, int att1, int att2, int classe)
00359 {
00360 const nb_pts = stop - start;
00361 const C = min(10, max(2,nb_pts/10));
00362 double *exemple[2], card[C], *prot[2], d, dmin, *d_intr[2];
00363 int i, j, l, k = -1, n, m = 0, *class, cl = C, flag = 1, t, s;
00364 FILE *myfile;
00365
00366
00367 if ((myfile = fopen("compet_agg.txt", "a")) == NULL)
00368 printf("Error in opening the text file for the unsupervised clustering\n");
00369
00370
00371 exemple[0] = (double*)malloc(nb_pts * sizeof(double));
00372 exemple[1] = (double*)malloc(nb_pts * sizeof(double));
00373 class = (int*)malloc(nb_pts * sizeof(int));
00374 prot[0] = (double*)malloc(C * sizeof(double));
00375 prot[1] = (double*)malloc(C * sizeof(double));
00376
00377
00378 for (i = 0; i < nb_pts; i++)
00379 {
00380 exemple[0][i] = iris->input_ref.attributes[att1][i+start];
00381 exemple[1][i] = iris->input_ref.attributes[att2][i+start];
00382 }
00383
00384
00385 n = nb_pts / C;
00386 k = -1;
00387 for (l = 0; l < C-1; l++)
00388 {
00389 prot[0][l] = prot[1][l] = card[l] = 0.0;
00390 for (j = 0; j < n; j++)
00391 {
00392 k++;
00393 class[k] = l;
00394 prot[0][l] = prot[0][l] + exemple[0][k];
00395 prot[1][l] = prot[1][l] + exemple[1][k];
00396 card[l]++;
00397 }
00398 prot[0][l] = prot[0][l] / card[l];
00399 prot[1][l] = prot[1][l] / card[l];
00400 }
00401 prot[0][l] = prot[1][l] = card[l] = 0.0;
00402 for (j = k+1; j < nb_pts; j++)
00403 {
00404 class[j] = C - 1;
00405 card[l]++;
00406 prot[0][l] = prot[0][l] + exemple[0][j];
00407 prot[1][l] = prot[1][l] + exemple[1][j];
00408 }
00409 prot[0][l] = prot[0][l] / card[l];
00410 prot[1][l] = prot[1][l] / card[l];
00411
00412
00413
00414 k = 1; j = 0;
00415 while(k && j < 1000)
00416 {
00417 k = 0; j++;
00418 for (i = 0; i < nb_pts; i++)
00419 {
00420 dmin = sqrt(pow(exemple[0][i] - prot[0][class[i]], 2) + pow(exemple[1][i] - prot[1][class[i]], 2));
00421 for (l = 0; l < C; l++)
00422 {
00423 d = sqrt(pow(exemple[0][i] - prot[0][l], 2) + pow(exemple[1][i] - prot[1][l], 2));
00424 if (d < dmin)
00425 {
00426 dmin = d;
00427 class[i] = l;
00428 k = 1;
00429 }
00430 }
00431 }
00432 for (l = 0; l < C; l++)
00433 prot[0][l] = prot[1][l] = card[l] = 0;
00434 for (i = 0; i < nb_pts; i++)
00435 {
00436 card[class[i]]++;
00437 prot[0][class[i]] = prot[0][class[i]] + exemple[0][i];
00438 prot[1][class[i]] = prot[1][class[i]] + exemple[1][i];
00439 }
00440 for (l = 0; l < C; l++)
00441 {
00442 prot[0][l] = prot[0][l] / card[l];
00443 prot[1][l] = prot[1][l] / card[l];
00444 }
00445 }
00446
00447
00448
00449 k = 0;
00450 m = nb_pts;
00451 d_intr[0] = (double*)malloc(cl*sizeof(double));
00452 d_intr[1] = (double*)malloc(cl*sizeof(double));
00453 flag = 0;
00454 for (j = 0; j < nb_pts; j++)
00455 {
00456 for (n = j+1; n < nb_pts; n++)
00457 if (class[j] == class[n])
00458 {
00459 d = fabs(exemple[0][j] - exemple[0][n]);
00460 dmin = fabs(exemple[1][j] - exemple[1][n]);
00461 n = m;
00462 }
00463 for (n = j+1; n < nb_pts; n++)
00464 if (class[j] == class[n])
00465 {
00466 d = minim(fabs(exemple[0][j] - exemple[0][n]), d);
00467 dmin = minim(fabs(exemple[1][j] - exemple[1][n]), dmin);
00468 n = m;
00469 }
00470 d_intr[0][class[j]] = d_intr[0][class[j]] + d;
00471 d_intr[1][class[j]] = d_intr[1][class[j]] + dmin;
00472 }
00473 for (l = 0; l < cl; l++)
00474 {
00475 d_intr[0][l] = d_intr[0][l] / card[l];
00476 d_intr[1][l] = d_intr[1][l] / card[l];
00477 }
00478 while(!flag)
00479 {
00480 flag = 1;
00481 for (j = k; j < m - 1; j++)
00482 for (n = k; n < m; n++)
00483 if (class[j] != class[n])
00484 {
00485 d = fabs(exemple[0][j] - exemple[0][n]);
00486 dmin = fabs(exemple[1][j] - exemple[1][n]);
00487 if ((d < 1.5 * minim(d_intr[0][class[j]], d_intr[0][class[n]])) && (dmin < 1.5 * minim(d_intr[1][class[j]], d_intr[0][class[n]])))
00488 {
00489 flag = 0;
00490 if (card [class[j]] > card[class[n]])
00491 {
00492 card[class[n]]--;
00493 card[class[j]]++;
00494 class[n] = class[j];
00495 }
00496 else
00497 {
00498 card[class[j]]--;
00499 card[class[n]]++;
00500 class[j] = class[n];
00501 }
00502 }
00503 }
00504 }
00505
00506
00507 k =0;
00508 for (l = 0; l < cl; l++)
00509 if (card[l])
00510 {
00511 for (j = 0; j < nb_pts; j++)
00512 if (class[j] == l)
00513 class[j] = k;
00514 k++;
00515 }
00516 cl = k;
00517
00518
00519
00520
00521 t = 0;
00522 if (cl > 1)
00523 {
00524 for (l = 0; l < cl-1; l++)
00525 {
00526 card[l] = 0;
00527 for (j = 0; j < nb_pts; j++)
00528 if (class[j] == l)
00529 {
00530 intersch(&exemple[0][j], &exemple[0][t]);
00531 intersch(&exemple[1][j], &exemple[1][t]);
00532 n = class[j];
00533 class[j] = class[t];
00534 class[t] = n;
00535 t++;
00536 card[l]++;
00537 }
00538 }
00539 }
00540
00541 for (i = 0; i < nb_pts; i++)
00542 {
00543 iris->input_ref.attributes[att1][i+start] = exemple[0][i];
00544 iris->input_ref.attributes[att2][i+start] = exemple[1][i];
00545 }
00546
00547 for (j = 0; j < nb_pts; j++)
00548 fprintf(myfile, "%f %f %d \n", exemple[0][j], exemple[1][j], class[j]);
00549 fprintf(myfile, "\n");
00550
00551
00552 fclose(myfile);
00553
00554 myfile = fopen("learning_set_processing.txt", "a");
00555 fprintf(myfile, "%d\n", cl);
00556 fclose(myfile);
00557
00558 m = start;
00559 if (cl > 1)
00560 {
00561 for (l = 0; l < cl-1; l++)
00562 {
00563 t = m;
00564 m = m + card[l];
00565 convex_verif(*iris, t, m, att1, att2);
00566 }
00567 convex_verif(*iris, m, stop, att1, att2);
00568 }
00569
00570
00571
00572
00573
00574
00575
00576
00577
00578 return(cl);
00579
00580 }
00581
00582
00583
00584
00585
00586 elem_contour *contour_extraction(learning_set iris, int start, int stop, int att1, int att2)
00587 {
00588 elem_contour *first, *current, *new;
00589 elem_contour *firstm;
00590 int i, k, j, m, cont = 1;
00591
00592
00593
00594
00595
00596
00597
00598
00599
00600 current = (elem_contour*)malloc(sizeof(elem_contour));
00601 current->x = iris.input_ref.attributes[att1][start];
00602 current->y = iris.input_ref.attributes[att2][start];
00603 current->next = current;
00604 current->ant = current;
00605 first = current;
00606
00607 cont = 1;
00608
00609
00610 for (j = start; j < stop; j++)
00611 {
00612 k = 1; m = 0;
00613 current = first->next;
00614 while (k)
00615 {
00616 if (current == first)
00617 k = 0;
00618 if (current->y == iris.input_ref.attributes[att2][j])
00619 {
00620 m = 1;
00621 if (current->x < iris.input_ref.attributes[att1][j])
00622 current->x = iris.input_ref.attributes[att1][j];
00623 }
00624 current = current->next;
00625 }
00626 if(!m)
00627 {
00628 cont++;
00629 new = (elem_contour*)malloc(sizeof(elem_contour));
00630 new->x = iris.input_ref.attributes[att1][j];
00631 new->y = iris.input_ref.attributes[att2][j];
00632 new->next = current;
00633 new->ant = current->ant;
00634 current->ant->next = new;
00635 current->ant = new;
00636 }
00637 }
00638
00639
00640 current = (elem_contour*)malloc(sizeof(elem_contour));
00641 current->x = iris.input_ref.attributes[att1][start];
00642 current->y = iris.input_ref.attributes[att2][start];
00643 current->next = current;
00644 current->ant = current;
00645 firstm = current;
00646
00647 for (j = start; j < stop; j++)
00648 {
00649 k = 1; m = 0;
00650 current = firstm->next;
00651 while (k)
00652 {
00653 if (current == firstm)
00654 k = 0;
00655 if (current->y == iris.input_ref.attributes[att2][j])
00656 {
00657 m = 1;
00658 if (current->x > iris.input_ref.attributes[att1][j])
00659 current->x = iris.input_ref.attributes[att1][j];
00660 }
00661 current = current->next;
00662 }
00663 if(!m)
00664 {
00665 new = (elem_contour*)malloc(sizeof(elem_contour));
00666 new->x = iris.input_ref.attributes[att1][j];
00667 new->y = iris.input_ref.attributes[att2][j];
00668 new->next = current;
00669 new->ant = current->ant;
00670 current->ant->next = new;
00671 current->ant = new;
00672 }
00673 }
00674
00675
00676 current = first->ant;
00677 new = firstm->ant;
00678 first->ant = new;
00679 firstm->ant = current;
00680 current->next = firstm;
00681 new->next = first;
00682
00683
00684
00685
00686
00687
00688
00689
00690
00691
00692
00693
00694
00695
00696
00697
00698
00699
00700 return(firstm);
00701 }
00702
00703
00704
00705 int convex_verif(learning_set ls, int start, int stop, int att1, int att2)
00706 {
00707 elem_contour *first, *current;
00708 double d_minx, d_miny, stp_x, stp_y, Nx, Ny, m, n;
00709 double xmin, xmax, ymin, ymax;
00710 int k, l, i, j, t, indic, **density, *cadran, card[4] = {0, 0, 0, 0};
00711 int x1, y1, x2, y2, x, y;
00712 int S, ind1, ind2, s_ls, ns;
00713 FILE *myfile;
00714
00715
00716
00717
00718 if ((stop - start) > 3)
00719 {
00720 first = contour_extraction(ls, start, stop, att1, att2);
00721
00722
00723
00724 d_minx = dmin(ls, start, stop, att1);
00725 d_miny = dmin(ls, start, stop, att2);
00726 stp_x = 3 * d_minx;
00727 stp_y = 3 * d_miny;
00728 xmin = val_min(ls, start, stop, att1 );
00729 ymin = val_min(ls, start, stop, att2);
00730 xmax = val_max(ls, start, stop, att1) + stp_x;
00731 ymax = val_max(ls, start, stop, att2) + stp_y;
00732 Nx = ceil((xmax - xmin)/stp_x);
00733 Ny = ceil((ymax - ymin)/stp_y);
00734
00735
00736
00737
00738
00739
00740
00741 density = (int**)malloc((Nx + 2) * sizeof(int));
00742 for (j = 0; j < Nx + 2; j++)
00743 density[j] = (int*)malloc((Ny + 2) * sizeof(int));
00744 for (k = 0; k < Nx + 2; k++)
00745 for (l = 0; l < Ny + 2; l++)
00746 density[k][l] = 0;
00747 current = first;
00748 k = 1;
00749 indic = 1;
00750 while (k)
00751 {
00752 i = floor((current->x - xmin)/stp_x) +1;
00753 j = floor((current->y - ymin)/stp_y) +1;
00754
00755 if (!density[i][j])
00756 {
00757 density[i][j] = indic;
00758 indic++;
00759 }
00760 current = current->next;
00761 if (current == first)
00762 k = 0;
00763 }
00764
00765 for (i = 1; i < indic; i++)
00766 {
00767 m =0;
00768 for (k = 0; k < Nx + 2; k++)
00769 for (l = 0; l < Ny + 2; l++)
00770 {
00771 if (density[k][l] == i)
00772 {
00773 x1 = k;
00774 y1 = l;
00775 m++;
00776 }
00777 if (density[k][l] == i % (indic-1) + 1)
00778 {
00779 x2 = k;
00780 y2 = l;
00781 m++;
00782 }
00783 if (m == 2)
00784 {
00785 k = Nx+3;
00786 l = Ny+3;
00787 }
00788 }
00789 if (fabs(x1 - x2) > 1 || fabs(y1 - y2) > 1)
00790 {
00791 if (x2 - x1)
00792 m = (y2 - y1)/(x2 - x1+0.0);
00793 else
00794 m = (y2 - y1)/0.001;
00795 n = y1 - (float)m * x1;
00796 if (fabs(x1 - x2) > fabs(y1 - y2))
00797 for (k = min(x1, x2) + 1; k < max(x1, x2); k++)
00798 {
00799 y = floor(m * k + n);
00800 if (!density[k][y])
00801 density[k][y] = -1;
00802 }
00803 else
00804 for (l = min(y1, y2) + 1; l < max(y1, y2); l++)
00805 {
00806 x = floor((l - n)/m);
00807 if (!density[x][l])
00808 density[x][l] = -1;
00809 }
00810 }
00811 }
00812
00813
00814 S = 0;
00815 for (j = 0; j < Ny+2; j++)
00816 {
00817 m = 0;
00818 for (k = 0; k < Nx+2; k++)
00819 if(density[k][j])
00820 {
00821 ind1 = k;
00822 k = Nx+2;
00823 m = 1;
00824 }
00825 for (k = Nx+1; k > -1; k--)
00826 if(density[k][j])
00827 {
00828 ind2 = k;
00829 k = -1;
00830 }
00831 if (m)
00832 S = S + (ind2 - ind1) + 1;
00833 }
00834
00835 s_ls = 0;
00836
00837 for (k = 0; k < Nx + 2; k++)
00838 for (l = 0; l < Ny + 2; l++)
00839 density[k][l] = 0;
00840 for (i = start; i < stop; i++)
00841 {
00842 k = floor((ls.input_ref.attributes[att1][i] - xmin)/stp_x) + 1;
00843 l = floor((ls.input_ref.attributes[att2][i] - ymin)/stp_y) + 1;
00844 if (!density[k][l])
00845 {
00846 density[k][l] = 1;
00847 s_ls++;
00848 }
00849 }
00850
00851
00852
00853
00854
00855
00856
00857
00858
00859 card[0] = stop - start;
00860
00861 if ((s_ls / (float)S) < 0.7)
00862 {
00863 cadran = (int*)malloc((stop - start + 1)*sizeof(int));
00864 for (i = start; i < stop; i++)
00865 if (ls.input_ref.attributes[att1][i] > (xmin + xmax)/2)
00866 if (ls.input_ref.attributes[att2][i] > (ymin + ymax)/2)
00867 cadran[i-start] = 0;
00868 else
00869 cadran[i-start] = 3;
00870 else
00871 if (ls.input_ref.attributes[att2][i] > (ymin + ymax)/2)
00872 cadran[i-start] = 1;
00873 else
00874 cadran[i-start] = 2;
00875 t = 0;
00876 for (l = 0; l < 3; l++)
00877 {
00878 card[l] = 0;
00879 for (j = 0; j < stop - start; j++)
00880 if (cadran[j] == l)
00881 {
00882 m = ls.input_ref.attributes[0][j+start];
00883 ls.input_ref.attributes[att1][j+start] = ls.input_ref.attributes[att1][t+start];
00884 ls.input_ref.attributes[att1][t+start] = m;
00885 m = ls.input_ref.attributes[1][j+start];
00886 ls.input_ref.attributes[att2][j+start] = ls.input_ref.attributes[att2][t+start];
00887 ls.input_ref.attributes[att2][t+start] = m;
00888 n = cadran[j];
00889 cadran[j] = cadran[t];
00890 cadran[t] = n;
00891 t++;
00892 card[l]++;
00893 }
00894 }
00895 card[3] = stop - start - card[0] - card[1] - card[2];
00896 }
00897 }
00898 else
00899 card[0] = stop - start;
00900
00901 ns = 0;
00902 for (l = 0; l < 4; l++)
00903 if (card[l])
00904 ns ++;
00905
00906
00907
00908
00909 if ((myfile = fopen("learning_set_processing.txt", "a")) == NULL)
00910 printf("Impossible to open the learning_set_processing.txt file\n");
00911
00912 fprintf(myfile, "%d\n", ns);
00913 for (i = 0; i < ns; i++)
00914 fprintf(myfile, "%d ", card[i]);
00915 fprintf(myfile, "\n");
00916
00917 t = start - 1;
00918 for (i = 0; i < ns; i++)
00919 for (j = 0; j < card[i]; j++)
00920 {
00921 t++;
00922 fprintf(myfile, "%f %f \n", ls.input_ref.attributes[att1][t], ls.input_ref.attributes[att2][t]);
00923 }
00924
00925
00926
00927
00928
00929 fclose(myfile);
00930
00931
00932 return(0);
00933 }
00934
00935
00936 int process_chain(learning_set ls, int att1, int att2, lgr_d a)
00937 {
00938 int i, j, k, l, m, n, at1, at2, ns, nb_pts = 0;
00939 int Nx, Ny;
00940 int sum, card, av;
00941 float d_minx, d_miny, xmin, ymin, xmax, ymax;
00942 float stp_x, stp_y;
00943 int *freq, **density;
00944 short flag;
00945 FILE * myfile1, *myfile;
00946
00947
00948 freq = clean_learning_set(&ls, a);
00949
00950
00951
00952
00953
00954 m = 0;
00955 for (i = 0; i < ls.output_ref.nb_class; i++)
00956 {
00957 n = m;
00958 m = m + a.nb_pts[i];
00959 if (i == 0)
00960 {
00961 myfile1 = fopen("learning_set_processing.txt", "a");
00962 fprintf(myfile1, "%d %d\n", att1, att2);
00963 fclose(myfile1);
00964 }
00965 d_minx = dmin(ls, n, m, 0);
00966 d_miny = dmin(ls, n, m, 1);
00967 if (!d_minx)
00968 d_minx = 0.001;
00969 if (!d_miny)
00970 d_miny = 0.001;
00971 stp_x = 3 * d_minx;
00972 stp_y = 3 * d_miny;
00973 xmin = val_min(ls, n, m, 0);
00974 ymin = val_min(ls, n, m, 1);
00975 xmax = val_max(ls, n, m, 0) + stp_x;
00976 ymax = val_max(ls, n, m, 1) + stp_y;
00977 Nx = ceil((xmax - xmin)/stp_x);
00978 Ny = ceil((ymax - ymin)/stp_y);
00979
00980
00981
00982
00983
00984
00985
00986 density = (int**)malloc((Nx + 2) * sizeof(int));
00987 for (j = 0; j < Nx + 2; j++)
00988 density[j] = (int*)malloc((Ny + 2) * sizeof(int));
00989 for (k = 0; k < Nx + 2; k++)
00990 for (l = 0; l < Ny + 2; l++)
00991 density[k][l] = 0;
00992 for (j = n; j < m; j++)
00993 {
00994 k = floor((ls.input_ref.attributes[0][j] - xmin) / stp_x) + 1;
00995 l = floor((ls.input_ref.attributes[1][j] - ymin) / stp_y) + 1;
00996 density[k][l] = density[k][l] + freq[j];
00997 }
00998 sum = card =0;
00999 for (k = 0; k < Nx + 2; k++)
01000 for (l = 0; l < Ny + 2; l++)
01001 if (density[k][l])
01002 {
01003 sum = sum + density[k][l];
01004 card++;
01005 }
01006 av = sum/card;
01007 flag = 0;
01008 while (!flag)
01009 {
01010 flag = 1;
01011 for (k = 1; k < Nx + 1; k++)
01012 for (l = 1; l < Ny + 1; l++)
01013 {
01014 if (density[k][l])
01015 if (density[k][l] < (av/2))
01016 {
01017 density[k][l] = 0;
01018 flag = 0;
01019 }
01020 }
01021 }
01022 flag = n;
01023 for (j = n; j < m; j++)
01024 {
01025 k = floor((ls.input_ref.attributes[0][j] - xmin) / stp_x) + 1;
01026 l = floor((ls.input_ref.attributes[1][j] - ymin) / stp_y) + 1;
01027 if (density[k][l])
01028 {
01029 ls.input_ref.attributes[0][flag] = ls.input_ref.attributes[0][j];
01030 ls.input_ref.attributes[1][flag] = ls.input_ref.attributes[1][j];
01031 flag++;
01032 }
01033 }
01034
01035
01036
01037
01038 for (k = 0; k < Nx; k++)
01039 free(density[k]);
01040 if(competitive_agglomeration(&ls, n ,flag, 0, 1, i) == 1)
01041 convex_verif(ls, n, flag, 0, 1);
01042 }
01043
01044
01045
01046 }
01047
01048
01049
01061 int traitement_donnees_calc(learning_set ls1, lgr_d a)
01062 {
01063
01064 int i, j, k, l, sum, at1, at2, *freq, **density;
01065 int ns, nb_pts, m, n, *nb_pct;
01066 FILE *myfile, *myfile1;
01067 learning_set ls;
01068
01069 nb_pct = (int*)malloc(a.nb_class * sizeof(int));
01070
01071 myfile = fopen("learn_clean.data", "w");
01072
01073 fclose(myfile);
01074
01075 myfile = fopen("compet_agg.txt", "w");
01076
01077 fclose(myfile);
01078
01079
01080
01081
01082
01083 myfile1 = fopen("learning_set_processing.txt", "w");
01084
01085 fprintf(myfile1, "%d\n", a.nb_class);
01086
01087 fclose(myfile1);
01088
01089 myfile = fopen(a.fisout1, "w");
01090
01091 for (i = 0; i < a.nb_class; i++)
01092 nb_pct[i] = a.nb_pts[i];
01093
01094 if (ls1.input_ref.nb_attr == 2)
01095 {
01096 process_chain(ls1, 0, 1, a);
01097 ls = ls1;
01098 }
01099 else
01100 if (ls1.input_ref.nb_attr < 2)
01101 {
01102 printf("The number of attributes is not enough to construct the classification rules\n");
01103 exit(1);
01104 }
01105 else
01106 for (at1 = 0; at1 < a.nb_attr - 1; at1++)
01107 for (at2 = at1 + 1; at2 < a.nb_attr; at2++)
01108 {
01109 ls.input_ref.nb_attr = 2;
01110 ls.input_ref.nb_pts = ls1.input_ref.nb_pts;
01111 ls.output_ref.nb_class = ls1.output_ref.nb_class;
01112 ls.output_ref.nb_pts = ls1.output_ref.nb_pts;
01113 ls.output_ref.type = CRISP_CHOICE;
01114
01115 if ((m = alloc_classifier_data_input_completely(&ls.input_ref)) != 0)
01116 printf("Probleme d'allocation memoire pour les donnees d'entree\n");
01117
01118 m = alloc_classifier_data_output(&ls.output_ref);
01119
01120 for (i = 0; i < ls.input_ref.nb_pts; i++)
01121 {
01122 ls.input_ref.attributes[0][i] = ls1.input_ref.attributes[at1][i];
01123 ls.input_ref.attributes[1][i] = ls1.input_ref.attributes[at2][i];
01124 }
01125
01126 process_chain(ls, 0, 1, a);
01127
01128 free_data_input(&ls.input_ref);
01129 free_data_output(&ls.output_ref);
01130
01131 for (i = 0; i < a.nb_class; i++)
01132 a.nb_pts[i] = nb_pct[i];
01133 }
01134
01135
01136 if ((myfile1 = fopen("learning_set_processing.txt", "r")) == NULL)
01137 printf("Impossible to open the learning_set.processing.txt file for reading ");
01138
01139 fscanf(myfile1, "%d", &k);
01140 fprintf(myfile, "%d\n", k);
01141
01142
01143 for (at1 = 0; at1 < a.nb_attr - 1; at1++)
01144 for (at2 = at1 + 1; at2 < a.nb_attr; at2++)
01145 {
01146 fprintf(myfile, "%d %d\n", at1, at2);
01147 fscanf(myfile1, "%d %d", &i, &i);
01148 for (i = 0; i < a.nb_class; i++)
01149 {
01150 sum = 0;
01151 fscanf(myfile1, "%d", &ns);
01152 density = (int**)malloc(ns*sizeof(int));
01153 freq = (int*)malloc(ns*sizeof(int));
01154 nb_pts = 0;
01155 for (j = 0; j < ns; j++)
01156 {
01157 fscanf(myfile1, "%d", &freq[j]);
01158 density[j] = (int*)malloc(freq[j]*sizeof(int));
01159 sum = sum + freq[j];
01160 n = nb_pts;
01161 for (l = 0; l < freq[j]; l++)
01162 {
01163 fscanf(myfile1, "%d", &density[j][l]);
01164 nb_pts = nb_pts + density[j][l];
01165 }
01166 for (m = n; m < nb_pts; m++)
01167 fscanf(myfile1, "%f %f", &ls1.input_ref.attributes[0][m], &ls1.input_ref.attributes[1][m]);
01168 }
01169 fprintf(myfile, "%d\n", sum);
01170 for (k = 0; k < ns; k++)
01171 for (j = 0; j < freq[k]; j++)
01172 fprintf(myfile, "%d ", density[k][j]);
01173 fprintf(myfile, "\n");
01174 for (m = 0; m < nb_pts; m++)
01175 fprintf(myfile, "%f %f\n", ls1.input_ref.attributes[0][m], ls1.input_ref.attributes[1][m]);
01176 }
01177 }
01178
01179 free(nb_pct);
01180 fclose(myfile1);
01181 fclose(myfile);
01182
01183 }
01184
01185
01186
01187