PocketSphinx 5prealpha
fsg_lextree.c
Go to the documentation of this file.
1/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
2/* ====================================================================
3 * Copyright (c) 1999-2010 Carnegie Mellon University. All rights
4 * reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 *
18 *
19 * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
20 * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
21 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
23 * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 *
31 * ====================================================================
32 *
33 */
41/* System headers. */
42#include <stdio.h>
43#include <string.h>
44#include <assert.h>
45
46/* SphinxBase headers. */
47#include <sphinxbase/ckd_alloc.h>
48#include <sphinxbase/err.h>
49
50/* Local headers. */
51#include "fsg_lextree.h"
52
53#define __FSG_DBG__ 0
54
55/* A linklist structure that is actually used to build local lextrees at grammar nodes */
56typedef struct fsg_glist_linklist_t {
57 int32 ci, rc;
58 glist_t glist;
59 struct fsg_glist_linklist_t *next;
61
68static fsg_pnode_t *fsg_psubtree_init(fsg_lextree_t *tree,
69 fsg_model_t *fsg,
70 int32 from_state,
71 fsg_pnode_t **alloc_head);
72
77static void fsg_psubtree_free(fsg_pnode_t *alloc_head);
78
83static void fsg_psubtree_dump(fsg_lextree_t *tree, fsg_pnode_t *root, FILE *fp);
84
88static void
89fsg_lextree_lc_rc(fsg_lextree_t *lextree)
90{
91 int32 s, i, j;
92 int32 n_ci;
93 fsg_model_t *fsg;
94 int32 silcipid;
95 int32 len;
96
97 silcipid = bin_mdef_silphone(lextree->mdef);
98 assert(silcipid >= 0);
99 n_ci = bin_mdef_n_ciphone(lextree->mdef);
100
101 fsg = lextree->fsg;
102 /*
103 * lextree->lc[s] = set of left context CIphones for state s. Similarly, rc[s]
104 * for right context CIphones.
105 */
106 lextree->lc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->lc));
107 lextree->rc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->rc));
108 E_INFO("Allocated %d bytes (%d KiB) for left and right context phones\n",
109 fsg->n_state * (n_ci + 1) * 2,
110 fsg->n_state * (n_ci + 1) * 2 / 1024);
111
112
113 for (s = 0; s < fsg->n_state; s++) {
114 fsg_arciter_t *itor;
115 for (itor = fsg_model_arcs(fsg, s); itor; itor = fsg_arciter_next(itor)) {
116 fsg_link_t *l = fsg_arciter_get(itor);
117 int32 dictwid;
119 if (fsg_link_wid(l) >= 0) {
120 dictwid = dict_wordid(lextree->dict,
121 fsg_model_word_str(lextree->fsg, l->wid));
122
123 /*
124 * Add the first CIphone of l->wid to the rclist of state s, and
125 * the last CIphone to lclist of state d.
126 * (Filler phones are a pain to deal with. There is no direct
127 * marking of a filler phone; but only filler words are supposed to
128 * use such phones, so we use that fact. HACK!! FRAGILE!!)
129 *
130 * UPD: tests carsh here if .fsg model used with wrong hmm and
131 * dictionary
132 */
133 if (fsg_model_is_filler(fsg, fsg_link_wid(l))) {
134 /* Filler phone; use silence phone as context */
135 lextree->rc[fsg_link_from_state(l)][silcipid] = 1;
136 lextree->lc[fsg_link_to_state(l)][silcipid] = 1;
137 }
138 else {
139 len = dict_pronlen(lextree->dict, dictwid);
140 lextree->rc[fsg_link_from_state(l)][dict_pron(lextree->dict, dictwid, 0)] = 1;
141 lextree->lc[fsg_link_to_state(l)][dict_pron(lextree->dict, dictwid, len - 1)] = 1;
142 }
143 }
144 }
145 }
146
147 for (s = 0; s < fsg->n_state; s++) {
148 /*
149 * Add SIL phone to the lclist and rclist of each state. Strictly
150 * speaking, only needed at start and final states, respectively, but
151 * all states considered since the user may change the start and final
152 * states. In any case, most applications would have a silence self
153 * loop at each state, hence these would be needed anyway.
154 */
155 lextree->lc[s][silcipid] = 1;
156 lextree->rc[s][silcipid] = 1;
157 }
158
159 /*
160 * Propagate lc and rc lists past null transitions. (Since FSG contains
161 * null transitions closure, no need to worry about a chain of successive
162 * null transitions. Right??)
163 *
164 * This can't be joined with the previous loop because we first calculate
165 * contexts and only then we can propagate them.
166 */
167 for (s = 0; s < fsg->n_state; s++) {
168 fsg_arciter_t *itor;
169 for (itor = fsg_model_arcs(fsg, s); itor; itor = fsg_arciter_next(itor)) {
170 fsg_link_t *l = fsg_arciter_get(itor);
171 if (fsg_link_wid(l) < 0) {
172 /*
173 * lclist(d) |= lclist(s), because all the words ending up at s, can
174 * now also end at d, becoming the left context for words leaving d.
175 */
176 for (i = 0; i < n_ci; i++)
177 lextree->lc[fsg_link_to_state(l)][i] |= lextree->lc[fsg_link_from_state(l)][i];
178 /*
179 * Similarly, rclist(s) |= rclist(d), because all the words leaving d
180 * can equivalently leave s, becoming the right context for words
181 * ending up at s.
182 */
183 for (i = 0; i < n_ci; i++)
184 lextree->rc[fsg_link_from_state(l)][i] |= lextree->rc[fsg_link_to_state(l)][i];
185 }
186 }
187 }
188
189 /* Convert the bit-vector representation into a list */
190 for (s = 0; s < fsg->n_state; s++) {
191 j = 0;
192 for (i = 0; i < n_ci; i++) {
193 if (lextree->lc[s][i]) {
194 lextree->lc[s][j] = i;
195 j++;
196 }
197 }
198 lextree->lc[s][j] = -1; /* Terminate the list */
199
200 j = 0;
201 for (i = 0; i < n_ci; i++) {
202 if (lextree->rc[s][i]) {
203 lextree->rc[s][j] = i;
204 j++;
205 }
206 }
207 lextree->rc[s][j] = -1; /* Terminate the list */
208 }
209}
210
211/*
212 * For now, allocate the entire lextree statically.
213 */
215fsg_lextree_init(fsg_model_t * fsg, dict_t *dict, dict2pid_t *d2p,
216 bin_mdef_t *mdef, hmm_context_t *ctx,
217 int32 wip, int32 pip)
218{
219 int32 s, n_leaves;
220 fsg_lextree_t *lextree;
221 fsg_pnode_t *pn;
222
223 lextree = ckd_calloc(1, sizeof(fsg_lextree_t));
224 lextree->fsg = fsg;
225 lextree->root = ckd_calloc(fsg_model_n_state(fsg),
226 sizeof(fsg_pnode_t *));
227 lextree->alloc_head = ckd_calloc(fsg_model_n_state(fsg),
228 sizeof(fsg_pnode_t *));
229 lextree->ctx = ctx;
230 lextree->dict = dict;
231 lextree->d2p = d2p;
232 lextree->mdef = mdef;
233 lextree->wip = wip;
234 lextree->pip = pip;
235
236 /* Compute lc and rc for fsg. */
237 fsg_lextree_lc_rc(lextree);
238
239 /* Create lextree for each state, i.e. an HMM network that
240 * represents words for all arcs exiting that state. Note that
241 * for a dense grammar such as an N-gram model, this will
242 * rapidly exhaust all available memory. */
243 lextree->n_pnode = 0;
244 n_leaves = 0;
245 for (s = 0; s < fsg_model_n_state(fsg); s++) {
246 lextree->root[s] =
247 fsg_psubtree_init(lextree, fsg, s, &(lextree->alloc_head[s]));
248
249 for (pn = lextree->alloc_head[s]; pn; pn = pn->alloc_next) {
250 lextree->n_pnode++;
251 if (pn->leaf)
252 ++n_leaves;
253 }
254 }
255 E_INFO("%d HMM nodes in lextree (%d leaves)\n",
256 lextree->n_pnode, n_leaves);
257 E_INFO("Allocated %d bytes (%d KiB) for all lextree nodes\n",
258 lextree->n_pnode * sizeof(fsg_pnode_t),
259 lextree->n_pnode * sizeof(fsg_pnode_t) / 1024);
260 E_INFO("Allocated %d bytes (%d KiB) for lextree leafnodes\n",
261 n_leaves * sizeof(fsg_pnode_t),
262 n_leaves * sizeof(fsg_pnode_t) / 1024);
263
264#if __FSG_DBG__
265 fsg_lextree_dump(lextree, stdout);
266#endif
267
268 return lextree;
269}
270
271
272void
273fsg_lextree_dump(fsg_lextree_t * lextree, FILE * fp)
274{
275 int32 s;
276
277 for (s = 0; s < fsg_model_n_state(lextree->fsg); s++) {
278 fprintf(fp, "State %5d root %p\n", s, lextree->root[s]);
279 fsg_psubtree_dump(lextree, lextree->root[s], fp);
280 }
281 fflush(fp);
282}
283
284
285void
287{
288 int32 s;
289
290 if (lextree == NULL)
291 return;
292
293 if (lextree->fsg)
294 for (s = 0; s < fsg_model_n_state(lextree->fsg); s++)
295 fsg_psubtree_free(lextree->alloc_head[s]);
296
297 ckd_free_2d(lextree->lc);
298 ckd_free_2d(lextree->rc);
299 ckd_free(lextree->root);
300 ckd_free(lextree->alloc_head);
301 ckd_free(lextree);
302}
303
304/******************************
305 * psubtree stuff starts here *
306 ******************************/
307
308void fsg_glist_linklist_free(fsg_glist_linklist_t *glist)
309{
310 if (glist) {
311 fsg_glist_linklist_t *nxtglist;
312 if (glist->glist)
313 glist_free(glist->glist);
314 nxtglist = glist->next;
315 while (nxtglist) {
316 ckd_free(glist);
317 glist = nxtglist;
318 if (glist->glist)
319 glist_free(glist->glist);
320 nxtglist = glist->next;
321 }
322 ckd_free(glist);
323 }
324 return;
325}
326
327void
329{
330 int32 i;
331
332 for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++)
333 ctxt->bv[i] = 0xffffffff;
334}
335
337{
338 int32 i;
339 uint32 res = 0;
340
341 for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++)
342 res |= (src->bv[i] = ~(sub->bv[i]) & src->bv[i]);
343 return res;
344}
345
346
347/*
348 * fsg_pnode_ctxt_sub(fsg_pnode_ctxt_t * src, fsg_pnode_ctxt_t * sub)
349 * This has been moved into a macro in fsg_psubtree.h
350 * because it is called so frequently!
351 */
352
353
354/*
355 * Add the word emitted by the given transition (fsglink) to the given lextree
356 * (rooted at root), and return the new lextree root. (There may actually be
357 * several root nodes, maintained in a linked list via fsg_pnode_t.sibling.
358 * "root" is the head of this list.)
359 * lclist, rclist: sets of left and right context phones for this link.
360 * alloc_head: head of a linear list of all allocated pnodes for the parent
361 * FSG state, kept elsewhere and updated by this routine.
362 */
363static fsg_pnode_t *
364psubtree_add_trans(fsg_lextree_t *lextree,
365 fsg_pnode_t * root,
366 fsg_glist_linklist_t **curglist,
367 fsg_link_t * fsglink,
368 int16 *lclist, int16 *rclist,
369 fsg_pnode_t ** alloc_head)
370{
371 int32 silcipid; /* Silence CI phone ID */
372 int32 pronlen; /* Pronunciation length */
373 int32 wid; /* FSG (not dictionary!!) word ID */
374 int32 dictwid; /* Dictionary (not FSG!!) word ID */
375 int32 ssid; /* Senone Sequence ID */
376 int32 tmatid;
377 gnode_t *gn;
378 fsg_pnode_t *pnode, *pred, *head;
379 int32 n_ci, p, lc, rc;
380 glist_t lc_pnodelist; /* Temp pnodes list for different left contexts */
381 glist_t rc_pnodelist; /* Temp pnodes list for different right contexts */
382 int32 i, j;
383 int n_lc_alloc = 0, n_int_alloc = 0, n_rc_alloc = 0;
384
385 silcipid = bin_mdef_silphone(lextree->mdef);
386 n_ci = bin_mdef_n_ciphone(lextree->mdef);
387
388 wid = fsg_link_wid(fsglink);
389 assert(wid >= 0); /* Cannot be a null transition */
390 dictwid = dict_wordid(lextree->dict,
391 fsg_model_word_str(lextree->fsg, wid));
392 pronlen = dict_pronlen(lextree->dict, dictwid);
393 assert(pronlen >= 1);
394
395 assert(lclist[0] >= 0); /* At least one phonetic context provided */
396 assert(rclist[0] >= 0);
397
398 head = *alloc_head;
399 pred = NULL;
400
401 if (pronlen == 1) { /* Single-phone word */
402 int ci = dict_first_phone(lextree->dict, dictwid);
403 /* Only non-filler words are mpx */
404 if (!dict_filler_word(lextree->dict, dictwid)) {
405 /*
406 * Left diphone ID for single-phone words already assumes SIL is right
407 * context; only left contexts need to be handled.
408 */
409 lc_pnodelist = NULL;
410
411 for (i = 0; lclist[i] >= 0; i++) {
412 lc = lclist[i];
413 ssid = dict2pid_lrdiph_rc(lextree->d2p, ci, lc, silcipid);
414 tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_first_phone(lextree->dict, dictwid));
415 /* Check if this ssid already allocated for some other context */
416 for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
417 pnode = (fsg_pnode_t *) gnode_ptr(gn);
418
419 if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) {
420 /* already allocated; share it for this context phone */
421 fsg_pnode_add_ctxt(pnode, lc);
422 break;
423 }
424 }
425
426 if (!gn) { /* ssid not already allocated */
427 pnode =
428 (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
429 pnode->ctx = lextree->ctx;
430 pnode->next.fsglink = fsglink;
431 pnode->logs2prob =
432 (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT)
433 + lextree->wip + lextree->pip;
434 pnode->ci_ext = dict_first_phone(lextree->dict, dictwid);
435 pnode->ppos = 0;
436 pnode->leaf = TRUE;
437 pnode->sibling = root; /* All root nodes linked together */
438 fsg_pnode_add_ctxt(pnode, lc); /* Initially zeroed by calloc above */
439 pnode->alloc_next = head;
440 head = pnode;
441 root = pnode;
442 ++n_lc_alloc;
443
444 hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid);
445
446 lc_pnodelist =
447 glist_add_ptr(lc_pnodelist, (void *) pnode);
448 }
449 }
450
451 glist_free(lc_pnodelist);
452 }
453 else { /* Filler word; no context modelled */
454 ssid = bin_mdef_pid2ssid(lextree->mdef, ci); /* probably the same... */
455 tmatid = bin_mdef_pid2tmatid(lextree->mdef, ci);
456
457 pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
458 pnode->ctx = lextree->ctx;
459 pnode->next.fsglink = fsglink;
460 pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT)
461 + lextree->wip + lextree->pip;
462 pnode->ci_ext = silcipid; /* Presents SIL as context to neighbors */
463 pnode->ppos = 0;
464 pnode->leaf = TRUE;
465 pnode->sibling = root;
466 fsg_pnode_add_all_ctxt(&(pnode->ctxt));
467 pnode->alloc_next = head;
468 head = pnode;
469 root = pnode;
470 ++n_int_alloc;
471
472 hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid);
473 }
474 }
475 else { /* Multi-phone word */
476 fsg_pnode_t **ssid_pnode_map; /* Temp array of ssid->pnode mapping */
477 ssid_pnode_map =
478 (fsg_pnode_t **) ckd_calloc(n_ci, sizeof(fsg_pnode_t *));
479 lc_pnodelist = NULL;
480 rc_pnodelist = NULL;
481
482 for (p = 0; p < pronlen; p++) {
483 int ci = dict_pron(lextree->dict, dictwid, p);
484 if (p == 0) { /* Root phone, handle required left contexts */
485 /* Find if we already have an lc_pnodelist for the first phone of this word */
487
488 rc = dict_pron(lextree->dict, dictwid, 1);
489 for (glist = *curglist;
490 glist && glist->glist;
491 glist = glist->next) {
492 if (glist->ci == ci && glist->rc == rc)
493 break;
494 }
495 if (glist && glist->glist) {
496 assert(glist->ci == ci && glist->rc == rc);
497 /* We've found a valid glist. Hook to it and move to next phoneme */
498 E_DEBUG(2,("Found match for (%d,%d)\n", ci, rc));
499 lc_pnodelist = glist->glist;
500 /* Set the predecessor node for the future tree first */
501 pred = (fsg_pnode_t *) gnode_ptr(lc_pnodelist);
502 continue;
503 }
504 else {
505 /* Two cases that can bring us here
506 * a. glist == NULL, i.e. end of current list. Create new entry.
507 * b. glist->glist == NULL, i.e. first entry into list.
508 */
509 if (glist == NULL) { /* Case a; reduce it to case b by allocing glist */
510 glist = (fsg_glist_linklist_t*) ckd_calloc(1, sizeof(fsg_glist_linklist_t));
511 glist->next = *curglist;
512 *curglist = glist;
513 }
514 glist->ci = ci;
515 glist->rc = rc;
516 lc_pnodelist = glist->glist = NULL; /* Gets created below */
517 }
518
519 for (i = 0; lclist[i] >= 0; i++) {
520 lc = lclist[i];
521 ssid = dict2pid_ldiph_lc(lextree->d2p, ci, rc, lc);
522 tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_first_phone(lextree->dict, dictwid));
523 /* Compression is not done by d2p, so we do it
524 * here. This might be slow, but it might not
525 * be... we'll see. */
526 pnode = ssid_pnode_map[0];
527 for (j = 0; j < n_ci && ssid_pnode_map[j] != NULL; ++j) {
528 pnode = ssid_pnode_map[j];
529 if (hmm_nonmpx_ssid(&pnode->hmm) == ssid)
530 break;
531 }
532 assert(j < n_ci);
533 if (!pnode) { /* Allocate pnode for this new ssid */
534 pnode =
535 (fsg_pnode_t *) ckd_calloc(1,
536 sizeof
537 (fsg_pnode_t));
538 pnode->ctx = lextree->ctx;
539 /* This bit is tricky! For now we'll put the prob in the final link only */
540 /* pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT)
541 + lextree->wip + lextree->pip; */
542 pnode->logs2prob = lextree->wip + lextree->pip;
543 pnode->ci_ext = dict_first_phone(lextree->dict, dictwid);
544 pnode->ppos = 0;
545 pnode->leaf = FALSE;
546 pnode->sibling = root; /* All root nodes linked together */
547 pnode->alloc_next = head;
548 head = pnode;
549 root = pnode;
550 ++n_lc_alloc;
551
552 hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid);
553
554 lc_pnodelist =
555 glist_add_ptr(lc_pnodelist, (void *) pnode);
556 ssid_pnode_map[j] = pnode;
557 }
558 fsg_pnode_add_ctxt(pnode, lc);
559 }
560 /* Put the lc_pnodelist back into glist */
561 glist->glist = lc_pnodelist;
562
563 /* The predecessor node for the future tree is the root */
564 pred = root;
565 }
566 else if (p != pronlen - 1) { /* Word internal phone */
567 fsg_pnode_t *pnodeyoungest;
568
569 ssid = dict2pid_internal(lextree->d2p, dictwid, p);
570 tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_pron (lextree->dict, dictwid, p));
571 /* First check if we already have this ssid in our tree */
572 pnode = pred->next.succ;
573 pnodeyoungest = pnode; /* The youngest sibling */
574 while (pnode && (hmm_nonmpx_ssid(&pnode->hmm) != ssid || pnode->leaf)) {
575 pnode = pnode->sibling;
576 }
577 if (pnode && (hmm_nonmpx_ssid(&pnode->hmm) == ssid && !pnode->leaf)) {
578 /* Found the ssid; go to next phoneme */
579 E_DEBUG(2,("Found match for %d\n", ci));
580 pred = pnode;
581 continue;
582 }
583
584 /* pnode not found, allocate it */
585 pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t));
586 pnode->ctx = lextree->ctx;
587 pnode->logs2prob = lextree->pip;
588 pnode->ci_ext = dict_pron(lextree->dict, dictwid, p);
589 pnode->ppos = p;
590 pnode->leaf = FALSE;
591 pnode->sibling = pnodeyoungest; /* May be NULL */
592 if (p == 1) { /* Predecessor = set of root nodes for left ctxts */
593 for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
594 pred = (fsg_pnode_t *) gnode_ptr(gn);
595 pred->next.succ = pnode;
596 }
597 }
598 else { /* Predecessor = word internal node */
599 pred->next.succ = pnode;
600 }
601 pnode->alloc_next = head;
602 head = pnode;
603 ++n_int_alloc;
604
605 hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid);
606
607 pred = pnode;
608 }
609 else { /* Leaf phone, handle required right contexts */
610 /* Note, leaf phones are not part of the tree */
611 xwdssid_t *rssid;
612 memset((void *) ssid_pnode_map, 0,
613 n_ci * sizeof(fsg_pnode_t *));
614 lc = dict_pron(lextree->dict, dictwid, p-1);
615 rssid = dict2pid_rssid(lextree->d2p, ci, lc);
616 tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_pron (lextree->dict, dictwid, p));
617
618 for (i = 0; rclist[i] >= 0; i++) {
619 rc = rclist[i];
620
621 j = rssid->cimap[rc];
622 ssid = rssid->ssid[j];
623 pnode = ssid_pnode_map[j];
624
625 if (!pnode) { /* Allocate pnode for this new ssid */
626 pnode =
627 (fsg_pnode_t *) ckd_calloc(1,
628 sizeof
629 (fsg_pnode_t));
630 pnode->ctx = lextree->ctx;
631 /* We are plugging the word prob here. Ugly */
632 /* pnode->logs2prob = lextree->pip; */
633 pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT)
634 + lextree->pip;
635 pnode->ci_ext = dict_pron(lextree->dict, dictwid, p);
636 pnode->ppos = p;
637 pnode->leaf = TRUE;
638 pnode->sibling = rc_pnodelist ?
639 (fsg_pnode_t *) gnode_ptr(rc_pnodelist) : NULL;
640 pnode->next.fsglink = fsglink;
641 pnode->alloc_next = head;
642 head = pnode;
643 ++n_rc_alloc;
644
645 hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid);
646
647 rc_pnodelist =
648 glist_add_ptr(rc_pnodelist, (void *) pnode);
649 ssid_pnode_map[j] = pnode;
650 }
651 else {
652 assert(hmm_nonmpx_ssid(&pnode->hmm) == ssid);
653 }
654 fsg_pnode_add_ctxt(pnode, rc);
655 }
656
657 if (p == 1) { /* Predecessor = set of root nodes for left ctxts */
658 for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) {
659 pred = (fsg_pnode_t *) gnode_ptr(gn);
660 if (!pred->next.succ)
661 pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist);
662 else {
663 /* Link to the end of the sibling chain */
664 fsg_pnode_t *succ = pred->next.succ;
665 while (succ->sibling) succ = succ->sibling;
666 succ->sibling = (fsg_pnode_t*) gnode_ptr(rc_pnodelist);
667 /* Since all entries of lc_pnodelist point
668 to the same array, sufficient to update it once */
669 break;
670 }
671 }
672 }
673 else { /* Predecessor = word internal node */
674 if (!pred->next.succ)
675 pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist);
676 else {
677 /* Link to the end of the sibling chain */
678 fsg_pnode_t *succ = pred->next.succ;
679 while (succ->sibling) succ = succ->sibling;
680 succ->sibling = (fsg_pnode_t *) gnode_ptr(rc_pnodelist);
681 }
682 }
683 }
684 }
685
686 ckd_free((void *) ssid_pnode_map);
687 /* glist_free(lc_pnodelist); Nope; this gets freed outside */
688 glist_free(rc_pnodelist);
689 }
690
691 E_DEBUG(2,("Allocated %d HMMs (%d lc, %d rc, %d internal)\n",
692 n_lc_alloc + n_rc_alloc + n_int_alloc,
693 n_lc_alloc, n_rc_alloc, n_int_alloc));
694 *alloc_head = head;
695
696 return root;
697}
698
699
700static fsg_pnode_t *
701fsg_psubtree_init(fsg_lextree_t *lextree,
702 fsg_model_t * fsg, int32 from_state,
703 fsg_pnode_t ** alloc_head)
704{
705 fsg_arciter_t *itor;
706 fsg_link_t *fsglink;
707 fsg_pnode_t *root;
708 int32 n_ci, n_arc;
709 fsg_glist_linklist_t *glist = NULL;
710
711 root = NULL;
712 assert(*alloc_head == NULL);
713
714 n_ci = bin_mdef_n_ciphone(lextree->mdef);
715 if (n_ci > (FSG_PNODE_CTXT_BVSZ * 32)) {
716 E_FATAL
717 ("#phones > %d; increase FSG_PNODE_CTXT_BVSZ and recompile\n",
718 FSG_PNODE_CTXT_BVSZ * 32);
719 }
720
721 n_arc = 0;
722 for (itor = fsg_model_arcs(fsg, from_state); itor;
723 itor = fsg_arciter_next(itor)) {
724 int32 dst;
725 fsglink = fsg_arciter_get(itor);
726 dst = fsglink->to_state;
727
728 if (fsg_link_wid(fsglink) < 0)
729 continue;
730
731 E_DEBUG(2,("Building lextree for arc from %d to %d: %s\n",
732 from_state, dst, fsg_model_word_str(fsg, fsg_link_wid(fsglink))));
733 root = psubtree_add_trans(lextree, root, &glist, fsglink,
734 lextree->lc[from_state],
735 lextree->rc[dst],
736 alloc_head);
737 ++n_arc;
738 }
739 E_DEBUG(2,("State %d has %d outgoing arcs\n", from_state, n_arc));
740
741 fsg_glist_linklist_free(glist);
742
743 return root;
744}
745
746
747static void
748fsg_psubtree_free(fsg_pnode_t * head)
749{
750 fsg_pnode_t *next;
751
752 while (head) {
753 next = head->alloc_next;
754 hmm_deinit(&head->hmm);
755 ckd_free(head);
756 head = next;
757 }
758}
759
760void fsg_psubtree_dump_node(fsg_lextree_t *tree, fsg_pnode_t *node, FILE *fp)
761{
762 int32 i;
763 fsg_link_t *tl;
764
765 /* Indentation */
766 for (i = 0; i <= node->ppos; i++)
767 fprintf(fp, " ");
768
769 fprintf(fp, "%p.@", node); /* Pointer used as node
770 * ID */
771 fprintf(fp, " %5d.SS", hmm_nonmpx_ssid(&node->hmm));
772 fprintf(fp, " %10d.LP", node->logs2prob);
773 fprintf(fp, " %p.SIB", node->sibling);
774 fprintf(fp, " %s.%d", bin_mdef_ciphone_str(tree->mdef, node->ci_ext), node->ppos);
775 if ((node->ppos == 0) || node->leaf) {
776 fprintf(fp, " [");
777 for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++)
778 fprintf(fp, "%08x", node->ctxt.bv[i]);
779 fprintf(fp, "]");
780 }
781 if (node->leaf) {
782 tl = node->next.fsglink;
783 fprintf(fp, " {%s[%d->%d](%d)}",
784 fsg_model_word_str(tree->fsg, tl->wid),
785 tl->from_state, tl->to_state, tl->logs2prob);
786 } else {
787 fprintf(fp, " %p.NXT", node->next.succ);
788 }
789 fprintf(fp, "\n");
790
791 return;
792}
793
794void
795fsg_psubtree_dump(fsg_lextree_t *tree, fsg_pnode_t *root, FILE * fp)
796{
797 fsg_pnode_t *succ;
798
799 if (root == NULL) return;
800 if (root->ppos == 0) {
801 while(root->sibling && root->sibling->next.succ == root->next.succ) {
802 fsg_psubtree_dump_node(tree, root, fp);
803 root = root->sibling;
804 }
805 fflush(fp);
806 }
807
808 fsg_psubtree_dump_node(tree, root, fp);
809
810 if (root->leaf) {
811 if (root->ppos == 0 && root->sibling) { /* For single-phone words */
812 fsg_psubtree_dump(tree, root->sibling,fp);
813 }
814 return;
815 }
816
817 succ = root->next.succ;
818 while(succ) {
819 fsg_psubtree_dump(tree, succ,fp);
820 succ = succ->sibling;
821 }
822
823 if (root->ppos == 0) {
824 fsg_psubtree_dump(tree, root->sibling,fp);
825 fflush(fp);
826 }
827
828 return;
829}
830
831void
833{
834 hmm_clear(&pnode->hmm);
835}
s3ssid_t dict2pid_internal(dict2pid_t *d2p, int32 wid, int pos)
Return the senone sequence ID for the given word position.
Definition dict2pid.c:367
#define dict2pid_rssid(d, ci, lc)
Access macros; not designed for arbitrary use.
Definition dict2pid.h:115
#define dict_pron(d, w, p)
The CI phones of the word w at position p.
Definition dict.h:165
void fsg_lextree_free(fsg_lextree_t *lextree)
Free lextrees for an FSG.
void fsg_lextree_dump(fsg_lextree_t *lextree, FILE *fp)
Print an FSG lextree to a file for debugging.
void fsg_psubtree_pnode_deactivate(fsg_pnode_t *pnode)
Mark the given pnode as inactive (for search).
void fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t *ctxt)
Set all flags on in the given context bitvector.
fsg_lextree_t * fsg_lextree_init(fsg_model_t *fsg, dict_t *dict, dict2pid_t *d2p, bin_mdef_t *mdef, hmm_context_t *ctx, int32 wip, int32 pip)
Create, initialize, and return a new phonetic lextree for the given FSG.
uint32 fsg_pnode_ctxt_sub_generic(fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub)
Generic variant for arbitrary size.
#define SENSCR_SHIFT
Shift count for senone scores.
Definition hmm.h:73
Building composite triphone (as well as word internal triphones) with the dictionary.
Definition dict2pid.h:84
a structure for a dictionary.
Definition dict.h:76
Collection of lextrees for an FSG.
int16 ** lc
Left context triphone mappings for FSG.
fsg_model_t * fsg
The fsg for which this lextree is built.
int16 ** rc
Right context triphone mappings for FSG.
dict_t * dict
Pronunciation dictionary for this FSG.
dict2pid_t * d2p
Context-dependent phone mappings for this FSG.
bin_mdef_t * mdef
Model definition (triphone mappings).
hmm_context_t * ctx
HMM context structure.
Shared information between a set of HMMs.
cross word triphone model structure
Definition dict2pid.h:73
s3cipid_t * cimap
Index into ssid[] above for each ci phone.
Definition dict2pid.h:75
s3ssid_t * ssid
Senone Sequence ID list for all context ciphones.
Definition dict2pid.h:74