VirtualBox

source: vbox/trunk/src/libs/libvorbis-1.3.7/vq/vqgen.c@ 103131

Last change on this file since 103131 was 96468, checked in by vboxsync, 2 years ago

libs/libvorbis-1.3.7: Re-exporting, hopefully this time everything is there. bugref:10275

File size: 15.6 KB
Line 
1/********************************************************************
2 * *
3 * THIS FILE IS PART OF THE OggVorbis SOFTWARE CODEC SOURCE CODE. *
4 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS *
5 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
6 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. *
7 * *
8 * THE OggVorbis SOURCE CODE IS (C) COPYRIGHT 1994-2001 *
9 * by the Xiph.Org Foundation https://xiph.org/ *
10 * *
11 ********************************************************************
12
13 function: train a VQ codebook
14
15 ********************************************************************/
16
17/* This code is *not* part of libvorbis. It is used to generate
18 trained codebooks offline and then spit the results into a
19 pregenerated codebook that is compiled into libvorbis. It is an
20 expensive (but good) algorithm. Run it on big iron. */
21
22/* There are so many optimizations to explore in *both* stages that
23 considering the undertaking is almost withering. For now, we brute
24 force it all */
25
26#include <stdlib.h>
27#include <stdio.h>
28#include <math.h>
29#include <string.h>
30
31#include "vqgen.h"
32#include "bookutil.h"
33
34/* Codebook generation happens in two steps:
35
36 1) Train the codebook with data collected from the encoder: We use
37 one of a few error metrics (which represent the distance between a
38 given data point and a candidate point in the training set) to
39 divide the training set up into cells representing roughly equal
40 probability of occurring.
41
42 2) Generate the codebook and auxiliary data from the trained data set
43*/
44
45/* Codebook training ****************************************************
46 *
47 * The basic idea here is that a VQ codebook is like an m-dimensional
48 * foam with n bubbles. The bubbles compete for space/volume and are
49 * 'pressurized' [biased] according to some metric. The basic alg
50 * iterates through allowing the bubbles to compete for space until
51 * they converge (if the damping is dome properly) on a steady-state
52 * solution. Individual input points, collected from libvorbis, are
53 * used to train the algorithm monte-carlo style. */
54
55/* internal helpers *****************************************************/
56#define vN(data,i) (data+v->elements*i)
57
58/* default metric; squared 'distance' from desired value. */
59float _dist(vqgen *v,float *a, float *b){
60 int i;
61 int el=v->elements;
62 float acc=0.f;
63 for(i=0;i<el;i++){
64 float val=(a[i]-b[i]);
65 acc+=val*val;
66 }
67 return sqrt(acc);
68}
69
70float *_weight_null(vqgen *v,float *a){
71 return a;
72}
73
74/* *must* be beefed up. */
75void _vqgen_seed(vqgen *v){
76 long i;
77 for(i=0;i<v->entries;i++)
78 memcpy(_now(v,i),_point(v,i),sizeof(float)*v->elements);
79 v->seeded=1;
80}
81
82int directdsort(const void *a, const void *b){
83 float av=*((float *)a);
84 float bv=*((float *)b);
85 return (av<bv)-(av>bv);
86}
87
88void vqgen_cellmetric(vqgen *v){
89 int j,k;
90 float min=-1.f,max=-1.f,mean=0.f,acc=0.f;
91 long dup=0,unused=0;
92 #ifdef NOISY
93 int i;
94 char buff[80];
95 float spacings[v->entries];
96 int count=0;
97 FILE *cells;
98 sprintf(buff,"cellspace%d.m",v->it);
99 cells=fopen(buff,"w");
100#endif
101
102 /* minimum, maximum, cell spacing */
103 for(j=0;j<v->entries;j++){
104 float localmin=-1.;
105
106 for(k=0;k<v->entries;k++){
107 if(j!=k){
108 float this=_dist(v,_now(v,j),_now(v,k));
109 if(this>0){
110 if(v->assigned[k] && (localmin==-1 || this<localmin))
111 localmin=this;
112 }else{
113 if(k<j){
114 dup++;
115 break;
116 }
117 }
118 }
119 }
120 if(k<v->entries)continue;
121
122 if(v->assigned[j]==0){
123 unused++;
124 continue;
125 }
126
127 localmin=v->max[j]+localmin/2; /* this gives us rough diameter */
128 if(min==-1 || localmin<min)min=localmin;
129 if(max==-1 || localmin>max)max=localmin;
130 mean+=localmin;
131 acc++;
132#ifdef NOISY
133 spacings[count++]=localmin;
134#endif
135 }
136
137 fprintf(stderr,"cell diameter: %.03g::%.03g::%.03g (%ld unused/%ld dup)\n",
138 min,mean/acc,max,unused,dup);
139
140#ifdef NOISY
141 qsort(spacings,count,sizeof(float),directdsort);
142 for(i=0;i<count;i++)
143 fprintf(cells,"%g\n",spacings[i]);
144 fclose(cells);
145#endif
146
147}
148
149/* External calls *******************************************************/
150
151/* We have two forms of quantization; in the first, each vector
152 element in the codebook entry is orthogonal. Residues would use this
153 quantization for example.
154
155 In the second, we have a sequence of monotonically increasing
156 values that we wish to quantize as deltas (to save space). We
157 still need to quantize so that absolute values are accurate. For
158 example, LSP quantizes all absolute values, but the book encodes
159 distance between values because each successive value is larger
160 than the preceeding value. Thus the desired quantibits apply to
161 the encoded (delta) values, not abs positions. This requires minor
162 additional encode-side trickery. */
163
164void vqgen_quantize(vqgen *v,quant_meta *q){
165
166 float maxdel;
167 float mindel;
168
169 float delta;
170 float maxquant=((1<<q->quant)-1);
171
172 int j,k;
173
174 mindel=maxdel=_now(v,0)[0];
175
176 for(j=0;j<v->entries;j++){
177 float last=0.f;
178 for(k=0;k<v->elements;k++){
179 if(mindel>_now(v,j)[k]-last)mindel=_now(v,j)[k]-last;
180 if(maxdel<_now(v,j)[k]-last)maxdel=_now(v,j)[k]-last;
181 if(q->sequencep)last=_now(v,j)[k];
182 }
183 }
184
185
186 /* first find the basic delta amount from the maximum span to be
187 encoded. Loosen the delta slightly to allow for additional error
188 during sequence quantization */
189
190 delta=(maxdel-mindel)/((1<<q->quant)-1.5f);
191
192 q->min=_float32_pack(mindel);
193 q->delta=_float32_pack(delta);
194
195 mindel=_float32_unpack(q->min);
196 delta=_float32_unpack(q->delta);
197
198 for(j=0;j<v->entries;j++){
199 float last=0;
200 for(k=0;k<v->elements;k++){
201 float val=_now(v,j)[k];
202 float now=rint((val-last-mindel)/delta);
203
204 _now(v,j)[k]=now;
205 if(now<0){
206 /* be paranoid; this should be impossible */
207 fprintf(stderr,"fault; quantized value<0\n");
208 exit(1);
209 }
210
211 if(now>maxquant){
212 /* be paranoid; this should be impossible */
213 fprintf(stderr,"fault; quantized value>max\n");
214 exit(1);
215 }
216 if(q->sequencep)last=(now*delta)+mindel+last;
217 }
218 }
219}
220
221/* much easier :-). Unlike in the codebook, we don't un-log log
222 scales; we just make sure they're properly offset. */
223void vqgen_unquantize(vqgen *v,quant_meta *q){
224 long j,k;
225 float mindel=_float32_unpack(q->min);
226 float delta=_float32_unpack(q->delta);
227
228 for(j=0;j<v->entries;j++){
229 float last=0.f;
230 for(k=0;k<v->elements;k++){
231 float now=_now(v,j)[k];
232 now=fabs(now)*delta+last+mindel;
233 if(q->sequencep)last=now;
234 _now(v,j)[k]=now;
235 }
236 }
237}
238
239void vqgen_init(vqgen *v,int elements,int aux,int entries,float mindist,
240 float (*metric)(vqgen *,float *, float *),
241 float *(*weight)(vqgen *,float *),int centroid){
242 memset(v,0,sizeof(vqgen));
243
244 v->centroid=centroid;
245 v->elements=elements;
246 v->aux=aux;
247 v->mindist=mindist;
248 v->allocated=32768;
249 v->pointlist=_ogg_malloc(v->allocated*(v->elements+v->aux)*sizeof(float));
250
251 v->entries=entries;
252 v->entrylist=_ogg_malloc(v->entries*v->elements*sizeof(float));
253 v->assigned=_ogg_malloc(v->entries*sizeof(long));
254 v->bias=_ogg_calloc(v->entries,sizeof(float));
255 v->max=_ogg_calloc(v->entries,sizeof(float));
256 if(metric)
257 v->metric_func=metric;
258 else
259 v->metric_func=_dist;
260 if(weight)
261 v->weight_func=weight;
262 else
263 v->weight_func=_weight_null;
264
265 v->asciipoints=tmpfile();
266
267}
268
269void vqgen_addpoint(vqgen *v, float *p,float *a){
270 int k;
271 for(k=0;k<v->elements;k++)
272 fprintf(v->asciipoints,"%.12g\n",p[k]);
273 for(k=0;k<v->aux;k++)
274 fprintf(v->asciipoints,"%.12g\n",a[k]);
275
276 if(v->points>=v->allocated){
277 v->allocated*=2;
278 v->pointlist=_ogg_realloc(v->pointlist,v->allocated*(v->elements+v->aux)*
279 sizeof(float));
280 }
281
282 memcpy(_point(v,v->points),p,sizeof(float)*v->elements);
283 if(v->aux)memcpy(_point(v,v->points)+v->elements,a,sizeof(float)*v->aux);
284
285 /* quantize to the density mesh if it's selected */
286 if(v->mindist>0.f){
287 /* quantize to the mesh */
288 for(k=0;k<v->elements+v->aux;k++)
289 _point(v,v->points)[k]=
290 rint(_point(v,v->points)[k]/v->mindist)*v->mindist;
291 }
292 v->points++;
293 if(!(v->points&0xff))spinnit("loading... ",v->points);
294}
295
296/* yes, not threadsafe. These utils aren't */
297static int sortit=0;
298static int sortsize=0;
299static int meshcomp(const void *a,const void *b){
300 if(((sortit++)&0xfff)==0)spinnit("sorting mesh...",sortit);
301 return(memcmp(a,b,sortsize));
302}
303
304void vqgen_sortmesh(vqgen *v){
305 sortit=0;
306 if(v->mindist>0.f){
307 long i,march=1;
308
309 /* sort to make uniqueness detection trivial */
310 sortsize=(v->elements+v->aux)*sizeof(float);
311 qsort(v->pointlist,v->points,sortsize,meshcomp);
312
313 /* now march through and eliminate dupes */
314 for(i=1;i<v->points;i++){
315 if(memcmp(_point(v,i),_point(v,i-1),sortsize)){
316 /* a new, unique entry. march it down */
317 if(i>march)memcpy(_point(v,march),_point(v,i),sortsize);
318 march++;
319 }
320 spinnit("eliminating density... ",v->points-i);
321 }
322
323 /* we're done */
324 fprintf(stderr,"\r%ld training points remining out of %ld"
325 " after density mesh (%ld%%)\n",march,v->points,march*100/v->points);
326 v->points=march;
327
328 }
329 v->sorted=1;
330}
331
332float vqgen_iterate(vqgen *v,int biasp){
333 long i,j,k;
334
335 float fdesired;
336 long desired;
337 long desired2;
338
339 float asserror=0.f;
340 float meterror=0.f;
341 float *new;
342 float *new2;
343 long *nearcount;
344 float *nearbias;
345 #ifdef NOISY
346 char buff[80];
347 FILE *assig;
348 FILE *bias;
349 FILE *cells;
350 sprintf(buff,"cells%d.m",v->it);
351 cells=fopen(buff,"w");
352 sprintf(buff,"assig%d.m",v->it);
353 assig=fopen(buff,"w");
354 sprintf(buff,"bias%d.m",v->it);
355 bias=fopen(buff,"w");
356 #endif
357
358
359 if(v->entries<2){
360 fprintf(stderr,"generation requires at least two entries\n");
361 exit(1);
362 }
363
364 if(!v->sorted)vqgen_sortmesh(v);
365 if(!v->seeded)_vqgen_seed(v);
366
367 fdesired=(float)v->points/v->entries;
368 desired=fdesired;
369 desired2=desired*2;
370 new=_ogg_malloc(sizeof(float)*v->entries*v->elements);
371 new2=_ogg_malloc(sizeof(float)*v->entries*v->elements);
372 nearcount=_ogg_malloc(v->entries*sizeof(long));
373 nearbias=_ogg_malloc(v->entries*desired2*sizeof(float));
374
375 /* fill in nearest points for entry biasing */
376 /*memset(v->bias,0,sizeof(float)*v->entries);*/
377 memset(nearcount,0,sizeof(long)*v->entries);
378 memset(v->assigned,0,sizeof(long)*v->entries);
379 if(biasp){
380 for(i=0;i<v->points;i++){
381 float *ppt=v->weight_func(v,_point(v,i));
382 float firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0];
383 float secondmetric=v->metric_func(v,_now(v,1),ppt)+v->bias[1];
384 long firstentry=0;
385 long secondentry=1;
386
387 if(!(i&0xff))spinnit("biasing... ",v->points+v->points+v->entries-i);
388
389 if(firstmetric>secondmetric){
390 float temp=firstmetric;
391 firstmetric=secondmetric;
392 secondmetric=temp;
393 firstentry=1;
394 secondentry=0;
395 }
396
397 for(j=2;j<v->entries;j++){
398 float thismetric=v->metric_func(v,_now(v,j),ppt)+v->bias[j];
399 if(thismetric<secondmetric){
400 if(thismetric<firstmetric){
401 secondmetric=firstmetric;
402 secondentry=firstentry;
403 firstmetric=thismetric;
404 firstentry=j;
405 }else{
406 secondmetric=thismetric;
407 secondentry=j;
408 }
409 }
410 }
411
412 j=firstentry;
413 for(j=0;j<v->entries;j++){
414
415 float thismetric,localmetric;
416 float *nearbiasptr=nearbias+desired2*j;
417 long k=nearcount[j];
418
419 localmetric=v->metric_func(v,_now(v,j),ppt);
420 /* 'thismetric' is to be the bias value necessary in the current
421 arrangement for entry j to capture point i */
422 if(firstentry==j){
423 /* use the secondary entry as the threshhold */
424 thismetric=secondmetric-localmetric;
425 }else{
426 /* use the primary entry as the threshhold */
427 thismetric=firstmetric-localmetric;
428 }
429
430 /* support the idea of 'minimum distance'... if we want the
431 cells in a codebook to be roughly some minimum size (as with
432 the low resolution residue books) */
433
434 /* a cute two-stage delayed sorting hack */
435 if(k<desired){
436 nearbiasptr[k]=thismetric;
437 k++;
438 if(k==desired){
439 spinnit("biasing... ",v->points+v->points+v->entries-i);
440 qsort(nearbiasptr,desired,sizeof(float),directdsort);
441 }
442
443 }else if(thismetric>nearbiasptr[desired-1]){
444 nearbiasptr[k]=thismetric;
445 k++;
446 if(k==desired2){
447 spinnit("biasing... ",v->points+v->points+v->entries-i);
448 qsort(nearbiasptr,desired2,sizeof(float),directdsort);
449 k=desired;
450 }
451 }
452 nearcount[j]=k;
453 }
454 }
455
456 /* inflate/deflate */
457
458 for(i=0;i<v->entries;i++){
459 float *nearbiasptr=nearbias+desired2*i;
460
461 spinnit("biasing... ",v->points+v->entries-i);
462
463 /* due to the delayed sorting, we likely need to finish it off....*/
464 if(nearcount[i]>desired)
465 qsort(nearbiasptr,nearcount[i],sizeof(float),directdsort);
466
467 v->bias[i]=nearbiasptr[desired-1];
468
469 }
470 }else{
471 memset(v->bias,0,v->entries*sizeof(float));
472 }
473
474 /* Now assign with new bias and find new midpoints */
475 for(i=0;i<v->points;i++){
476 float *ppt=v->weight_func(v,_point(v,i));
477 float firstmetric=v->metric_func(v,_now(v,0),ppt)+v->bias[0];
478 long firstentry=0;
479
480 if(!(i&0xff))spinnit("centering... ",v->points-i);
481
482 for(j=0;j<v->entries;j++){
483 float thismetric=v->metric_func(v,_now(v,j),ppt)+v->bias[j];
484 if(thismetric<firstmetric){
485 firstmetric=thismetric;
486 firstentry=j;
487 }
488 }
489
490 j=firstentry;
491
492#ifdef NOISY
493 fprintf(cells,"%g %g\n%g %g\n\n",
494 _now(v,j)[0],_now(v,j)[1],
495 ppt[0],ppt[1]);
496#endif
497
498 firstmetric-=v->bias[j];
499 meterror+=firstmetric;
500
501 if(v->centroid==0){
502 /* set up midpoints for next iter */
503 if(v->assigned[j]++){
504 for(k=0;k<v->elements;k++)
505 vN(new,j)[k]+=ppt[k];
506 if(firstmetric>v->max[j])v->max[j]=firstmetric;
507 }else{
508 for(k=0;k<v->elements;k++)
509 vN(new,j)[k]=ppt[k];
510 v->max[j]=firstmetric;
511 }
512 }else{
513 /* centroid */
514 if(v->assigned[j]++){
515 for(k=0;k<v->elements;k++){
516 if(vN(new,j)[k]>ppt[k])vN(new,j)[k]=ppt[k];
517 if(vN(new2,j)[k]<ppt[k])vN(new2,j)[k]=ppt[k];
518 }
519 if(firstmetric>v->max[firstentry])v->max[j]=firstmetric;
520 }else{
521 for(k=0;k<v->elements;k++){
522 vN(new,j)[k]=ppt[k];
523 vN(new2,j)[k]=ppt[k];
524 }
525 v->max[firstentry]=firstmetric;
526 }
527 }
528 }
529
530 /* assign midpoints */
531
532 for(j=0;j<v->entries;j++){
533#ifdef NOISY
534 fprintf(assig,"%ld\n",v->assigned[j]);
535 fprintf(bias,"%g\n",v->bias[j]);
536#endif
537 asserror+=fabs(v->assigned[j]-fdesired);
538 if(v->assigned[j]){
539 if(v->centroid==0){
540 for(k=0;k<v->elements;k++)
541 _now(v,j)[k]=vN(new,j)[k]/v->assigned[j];
542 }else{
543 for(k=0;k<v->elements;k++)
544 _now(v,j)[k]=(vN(new,j)[k]+vN(new2,j)[k])/2.f;
545 }
546 }
547 }
548
549 asserror/=(v->entries*fdesired);
550
551 fprintf(stderr,"Pass #%d... ",v->it);
552 fprintf(stderr,": dist %g(%g) metric error=%g \n",
553 asserror,fdesired,meterror/v->points);
554 v->it++;
555
556 free(new);
557 free(nearcount);
558 free(nearbias);
559#ifdef NOISY
560 fclose(assig);
561 fclose(bias);
562 fclose(cells);
563#endif
564 return(asserror);
565}
566
Note: See TracBrowser for help on using the repository browser.

© 2024 Oracle Support Privacy / Do Not Sell My Info Terms of Use Trademark Policy Automated Access Etiquette