Edinburgh Speech Tools 2.4-release
 
Loading...
Searching...
No Matches
EST_cluster.cc
1/*************************************************************************/
2/* */
3/* Centre for Speech Technology Research */
4/* University of Edinburgh, UK */
5/* Copyright (c) 1995,1996 */
6/* All Rights Reserved. */
7/* */
8/* Permission is hereby granted, free of charge, to use and distribute */
9/* this software and its documentation without restriction, including */
10/* without limitation the rights to use, copy, modify, merge, publish, */
11/* distribute, sublicense, and/or sell copies of this work, and to */
12/* permit persons to whom this work is furnished to do so, subject to */
13/* the following conditions: */
14/* 1. The code must retain the above copyright notice, this list of */
15/* conditions and the following disclaimer. */
16/* 2. Any modifications must be clearly marked as such. */
17/* 3. Original authors' names are not deleted. */
18/* 4. The authors' names are not used to endorse or promote products */
19/* derived from this software without specific prior written */
20/* permission. */
21/* */
22/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30/* THIS SOFTWARE. */
31/* */
32/*************************************************************************/
33/* Author : Paul Taylor */
34/* Date : July 1995 */
35/*-----------------------------------------------------------------------*/
36/* Event RFC labelling */
37/* */
38/*=======================================================================*/
39
40#include <cstdlib>
41#include "EST_system.h"
42#include "EST_FMatrix.h"
43#include "EST_cluster.h"
44#include <fstream>
45#include "EST_string_aux.h"
46#include <cfloat>
47
48int fn_cluster(EST_FMatrix &m, EST_CBK &cbk, float d);
49int nn_cluster(EST_FMatrix &m, EST_CBK &cbk, float d);
50int nn_cluster2(EST_FMatrix &m, EST_CBK &cbk, float d);
51float lval(EST_FMatrix &a, float floor, int &row, int &col);
52float nn_cluster3(EST_FMatrix &m, EST_CBK &cbk, EST_String method);
53
54void init_cluster(EST_CBK &cbk, int n)
55{
56 int i;
58
59 for (i = 0; i < n; ++i)
60 {
61 tmp.clear();
62 tmp.append(i);
63 cbk.append(tmp);
64 }
65}
66
69{
70 float dist;
71 while (cbk.length() > 1)
72 {
73 dist = nn_cluster3(m, cbk, method);
74 ans.append(print_codebook(cbk, dist, names));
75 }
76 return 0;
77}
78
79// return true if list l contains integer n
80int contains(EST_TList<int> &l, int n)
81{
82 EST_Litem *p;
83
84 for (p = l.head(); p != 0; p = p->next())
85 if (l(p) == n)
86 return 1;
87
88 return 0;
89}
90
91void remove_distances(EST_FMatrix &d , EST_TList<int> &group)
92{
93 EST_Litem *pi, *pj;
94 int i, j;
95
96 for (i = 0, pi = group.head(); pi != 0; pi = pi->next(), ++i)
97 for (j = 0, pj = group.head(); pj != 0; pj = pj->next(), ++j)
98 d(group(pi), group(pj)) = 0.0;
99}
100
101/*EST_FMatrix remove_line(Fmatrix a, int r)
102{
103 int n;
104 n = a.num_rows() - 1;
105
106 Fmatrix b(n, n);
107
108 int i, j;
109
110 for (i = i2 = 0; i < n; ++i. ++i2)
111 for (j = j2 = 0; j < n; ++j, ++j2)
112 if (i == r)
113 ;
114
115*/
116
117void collapse(EST_FMatrix &d, EST_CBK &cbk, int row, int col)
118{
119 EST_Litem *pi, *pj;
120
121 for (pi = cbk.head(); pi != 0; pi = pi->next())
122 if (contains(cbk(pi), row))
123 break;
124
125 for (pj = cbk.head(); pj != 0; pj = pj->next())
126 if (contains(cbk(pj), col))
127 break;
128
129 cbk(pi) += cbk(pj);
130 remove_distances(d, cbk(pi));
131 cbk.remove(pj);
132}
133
134float min(float a, float b)
135{
136 return (a < b) ? a: b;
137}
138
139float max(float a, float b)
140{
141 return (a > b) ? a: b;
142}
143
144// combine codebook groups "row" and "col" into one group and calculate a
145// new distance matrix
146void collapse3(EST_FMatrix &d, EST_CBK &cbk, int row, int col, EST_String method)
147{
148 int i;
149 EST_Litem *pi;
151 float fm;
152
153 cout << "Removing row/column " << col << endl;
154 cout << "row " <<cbk.nth(row) << endl;
155 cout << "col " <<cbk.nth(col) << endl;
156 cbk.nth(row) += cbk.nth(col);
157 cout << "row " <<cbk.nth(row) << endl;
158
159 for (i = 0; i < d.num_rows(); ++i)
160 {
161 if ((i != row) && (i != col))
162 v.append(i);
163 }
164
165 cout << "row " << row << " col " << col << " left out " << v;
166
167 for (pi = v.head(); pi != 0; pi = pi->next())
168 {
169 if (method == "nearest")
170 fm = min(d(row,v(pi)),d(col,v(pi)));
171 else if (method == "furthest")
172 fm = max(d(row,v(pi)),d(col,v(pi)));
173 else
174 fm = min(d(row,v(pi)),d(col,v(pi)));
175
176 cout << "writing values to " << v(pi) << ", " << row << " min "
177 << fm << endl;
178 d(v(pi), row) = fm;
179 d(row, v(pi)) = fm;
180 }
181
182 d = sub(d, col, col);
183 cbk.remove_nth(col);
184}
185
186int nn_cluster2(EST_FMatrix &m, EST_CBK &cbk, float d)
187{
188 static float smallest = 0.0;
189 int row=0, col=0;
190 (void)d;
191
192// Change so that all values aprt from lowest in codebook get set to
193// Nan (or whatever)
194
195 smallest = lval(m, smallest, row, col);
196 cout << "smallest = " << smallest << endl;
197 cout << "row = " << row << " col " << col << endl;
198 collapse(m, cbk, row, col);
199
200 for (EST_Litem *p = cbk.head(); p != 0; p = p->next())
201 cout << cbk(p);
202 cout << "New matrix\n" << m;
203
204 // cout << cbk;
205 return 1;
206}
207
208float nn_cluster3(EST_FMatrix &m, EST_CBK &cbk, EST_String method)
209{
210 static float smallest = 0.0;
211 int row=0, col=0;
212
213// Change so that all values aprt from lowest in codebook get set to
214// Nan (or whatever)
215
216 cout << "analysing matrix\n" << m;
217 smallest = lval(m, smallest, row, col);
218 cout << "smallest = " << smallest << endl;
219 cout << "row = " << row << " col " << col << endl;
220 collapse3(m, cbk, row, col, method);
221
222 for (EST_Litem *p = cbk.head(); p != 0; p = p->next())
223 cout << cbk(p);
224 cout << "New matrix\n" << m << endl << endl;
225
226 // cout << cbk;
227 return smallest;
228}
229
230int nn_cluster(EST_FMatrix &m, EST_CBK &cbk, float d)
231{
232 int i;
233 EST_Litem *pi, *pj;
234 float smallest;
235 int c = 0;
236
237 i = 0;
238 for (pi = cbk.head(); pi != 0; pi = pi->next(), ++i)
239 {
240 for (pj = pi->next(); pj != 0; pj = pj->next())
241 {
242 smallest = lowestval(m, cbk(pj), cbk(pi));
243 if (smallest < d)
244 {
245 cbk(pi) += cbk(pj);
246 cbk(pj).clear();
247 }
248 }
249 }
250
251 for (pi = cbk.head(); pi != 0; pi = pi->next())
252 {
253 if (cbk(pi).empty())
254 {
255 cout << "Empty entry\n";
256 pi = cbk.remove(pi);
257 c = 1;
258 }
259 else
260 cout << cbk(pi);
261 }
262 return c;
263}
264
265int fn_cluster(EST_FMatrix &m, EST_CBK &cbk, float d)
266{
267 int i;
268 EST_Litem *pi, *pj;
269 float smallest;
270 int c = 0;
271
272 i = 0;
273 for (pi = cbk.head(); pi != 0; pi = pi->next(), ++i)
274 {
275 for (pj = pi->next(); pj != 0; pj = pj->next())
276 {
277 smallest = highestval(m, cbk(pj), cbk(pi));
278 if (smallest < d)
279 {
280 cbk(pi) += cbk(pj);
281 cbk(pj).clear();
282 }
283 }
284 }
285
286 for (pi = cbk.head(); pi != 0; pi = pi->next())
287 {
288 if (cbk(pi).empty())
289 {
290 cout << "Empty entry\n";
291 pi = cbk.remove(pi);
292 c = 1;
293 }
294 else
295 cout << cbk(pi);
296 }
297 return c;
298}
299
300
301static int sorttest(const void *a, const void *b)
302{ // for use with qsort C library function.
303 float *c = (float *)a;
304 float *d = (float *)b;
305 float res = (*c - *d);
306 if (res == 0.0)
307 return 0;
308 return (res < 0.0) ? -1 : 1;
309}
310
311EST_FVector sort_matrix(EST_FMatrix &m)
312{
313 int i, j, k;
314 float *v;
315 int n_vals;
316
317 // determine size of triangular part of matrix, excluding diagonal
318 int size = m.num_rows() - 1;
319
320 n_vals = 0;
321 for (i = 0; i < size; ++i)
322 n_vals+=(i + 1);
323
324 cout<<"number of values in EST_FMatrix:" << n_vals << " size " << size << endl;
325
326 v = new float[n_vals];
327
328 for (i = k = 0; i < m.num_rows(); ++i)
329 for (j = i + 1; j < m.num_columns(); ++j, ++k)
330 {
331 cout << i << " " << j << " " << k << " " << (i * size) + k << endl;
332 v[k] = m(j, i);
333 }
334
335 for (i = 0; i < n_vals; ++i)
336 cout << "v[" << i << "] = " << v[i] << endl;
337
338 qsort(v, n_vals, sizeof(float), sorttest);
339
341 for (i = 0; i < n_vals; ++i)
342 vsort[i] = v[i];
343
344 return vsort;
345}
346
347EST_String print_codebook(EST_CBK &cbk, float d, EST_TList<EST_String> &names)
348{
349 EST_Litem *pi;
350 EST_Litem *pj;
351 EST_String s;
352
353 s = ftoString(d) + " ";
354 for (pi = cbk.head(); pi != 0; pi = pi->next())
355 {
356 s += "(";
357 for (pj = cbk(pi).head(); pj != 0; pj = pj->next())
358 {
359 if (names.empty())
360 s += itoString(cbk.item(pi).item(pj));
361 else
362 s += names.nth(cbk.item(pi).item(pj));
363 if (pj->next() != 0)
364 s += " ";
365 }
366 s += ") ";
367 }
368 return s;
369}
370
371void cluster3(EST_FMatrix &m, float d)
372{
373 int n = m.num_rows();
375
376 int i, j;
377 float smallest;
378
379 for (i = 0; i < n; ++i)
380 oldcbk[i].append(i);
381
382 for (i = 0; i < n; ++i)
383 cout << "n: " << i << " " << oldcbk[i] << endl;
384
385
386 for (i = 0; i < n; ++i)
387 for (j = i + 1; j < n; ++j)
388 {
389 smallest = lowestval(m, oldcbk[j], oldcbk[i]);
390 cout << "smallest = " << smallest << " d= " << d << endl << endl;
391 if (smallest < d)
392 {
393 cout << "merging " << i << " " << j << endl << endl;
394 merge(oldcbk, i, j);
395 n--;
396 }
397 }
398
399 for (i = 0; i < n; ++i)
400 cout << "n: " << i << " " << oldcbk[i] << endl;
401}
402/*
403 int cluster2(EST_FMatrix &dist, float d)
404 {
405 int n = dist.num_frames();
406 EST_TList<int> oldcbk[12];
407 EST_TList<int> newcbk[12];
408 float sortval = {2.0, 3.0, 4.0, 5.0, 6.0};
409 int i, j, n, m;
410 EST_Litem *p;
411
412 for (i = 0; i < n; ++i)
413 oldcbk[i].append(i);
414
415 i = 0;
416 while (n > 2)
417 {
418 s = findval(dist, m, n, sortval[i++]);
419 merge9u
420 }
421
422 }
423
424 float findval(EST_FMatrix &dist, int &n, int &m, float val)
425 {
426 int i, j;
427
428 for (i = 0; i < m.num_frames(); ++i)
429 for (j = 0; j < m.order(); ++j)
430 if ((m.x[i][j] < (val + 0.001)) && (m.x[i][j] > (val - 0.001)))
431 return;
432
433 cerr << "Couldn't find value " << val << endl;
434 }
435 */
436float lowestval(EST_FMatrix &m, EST_TList<int> &a, EST_TList<int> &b)
437{
438 EST_Litem *pa, *pb;
439 float lowest = 100000.0;
440
441 cout << "list a:" << a << "list b:" << b;
442
443 for (pa = a.head(); pa != 0; pa = pa->next())
444 for (pb = b.head(); pb != 0; pb = pb->next())
445 {
446 // cout << "m:" << a(pa) << " " << b(pb) << " " << m.x[a(pa)][b(pb)] << endl;
447 if (m(a(pa), b(pb)) < lowest)
448 lowest = m(a(pa), b(pb));
449 }
450 // cout << "lowest " << lowest << endl;
451 return lowest;
452}
453
454// find the lowest value in matrix a above floor, and return it. Also
455// set row and column to be the indices of it.
456float lval(EST_FMatrix &a, float floor, int &row, int &col)
457{
458 int i, j;
459 float lowest = FLT_MAX;
460
461 for (i = 0; i < a.num_rows(); ++i)
462 for (j = 0; j < a.num_rows(); ++j)
463 if ((a(i, j) < lowest) && (a(i, j) > floor))
464 {
465 lowest = a(i, j);
466 row = i;
467 col = j;
468 }
469 return lowest;
470}
471
472float highestval(EST_FMatrix &m, EST_TList<int> &a, EST_TList<int> &b)
473{
474 EST_Litem *pa, *pb;
475 float h = 0.0;
476
477 cout << "list a:" << a << "list b:" << b;
478
479 for (pa = a.head(); pa != 0; pa = pa->next())
480 for (pb = b.head(); pb != 0; pb = pb->next())
481 {
482 if (m(a(pa), b(pb)) > h)
483 h = m(a(pa), b(pb));
484 }
485 // cout << "lowest " << lowest << endl;
486 return h;
487}
488/*
489 float nearest(EST_FMatrix &m, EST_TList<int> &cbk)
490 {
491 EST_Litem *p;
492 float lowest = 100000.0;
493
494 for (p = cbk.head(); p != 0; p = p->next())
495 {
496 cout << "cbk(p) " << cbk(p) << endl;
497 if (cbk(p) < lowest)
498 lowest = cbk(p);
499 }
500
501 cout << "lowest = " << lowest << endl;
502 return lowest;
503 }
504 */
505void merge(EST_TList<int> cbk[], int i, int j)
506{
507 EST_Litem *p;
508
509 for (p = cbk[j].head(); p != 0; p = p->next())
510 cbk[i].append(cbk[j].item(p));
511
512 cbk[j].clear();
513}
514
515int load_names(EST_String file, EST_TList<EST_String> &names)
516{
517 char inbuf[1000];
519
521 if (!inf) cerr << "Can't open names file " << file << endl;
522
523 while(inf.getline(inbuf, 1000))
524 {
525 tmpstr = inbuf;
526 names.append(tmpstr);
527 }
528 return 0;
529}
530
531/*int merge(EST_TList<int> &a, EST_TList<int> &b)
532 {
533 EST_TList<int> newgroup;
534 EST_Litem *p;
535
536 for (p = cbk[j].head(); p != 0; p = p->next())
537 cbk[i].append(cbk[j].item(p));
538
539 cbk[j].clear();
540 }
541 */
542
int num_rows() const
return number of rows