#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "segStdLib.h"
#include "SEGRec.h"
#include "segV3Compat.h"
Functions | |
int | ReadGrammar (char *gramFName, SEG_MODEL_SET models, double **initProb, double ***biProb, double **exitProb) |
Read bigram grammar (bigram posterior probability table). | |
int | MLCheckModelSet (SEG_MODEL_SET *modelSet) |
Check the consistency of model parameter set. | |
int | MLReadModelSet (char *modelList, SEG_MODEL_SET *modelSet, int *longestDur) |
Read a set of multi-level segmental-HMMs in the file of model name list. | |
int | MLWriteModelSet (SEG_MODEL_SET *modelSet, char *dirName) |
Write a set of models to a file. | |
int | MakeStrictLRTopology (SEG_MODEL_SET *modelSet) |
Force models to maintain strict left-to-right topology. | |
int | WriteDump (SEG_MODEL_SET *modelSet, char *fName) |
Write dump file contents. | |
double | ViterbiDecoder (double **spData, int sT, int eT, SEG_SYNTAX synt, SEG_MODEL_SET hmm, int maxDur, SEG_DP_ITEM ***dpMat, double *segProb, int *breakPt) |
Generic Viterbi-type decorder for segmental HMMs. | |
int | FreeDPMat (SEG_DP_ITEM **dpMat, int numStates) |
Free memory space storing all the paths in the trellis. | |
int * | RecoverStateSeq (SEG_SYNTAX syn, SEG_DP_ITEM **dpHist, int numVecs, int *numLabs) |
Recover the optimal state sequence from the path record of node. | |
SEG_STATE ** | RecoverStateIds (SEG_SYNTAX syn, int *stateSeq, int numVecs) |
Recover the sequence of state identities from a state sequence. | |
SEG_ANN_DATA * | RecoverRecogSeq (SEG_SYNTAX syn, SEG_DP_ITEM **dpMat, int numVecs, int numLabs) |
Recover the model labels and their start and end times. | |
int | FreeStateSeq (int *stateSeq) |
Free memory space of state sequence. | |
int | WriteMLFData (SEG_ANN_DATA recOp, int nLabs, int winSize, FILE *mlfFile, char *labFileName) |
Generate and write new annotations from the classification/recognition output. | |
int | GetDurationLimits (SEG_MODEL_SET models, SEG_SYNTAX synt, int eStates, int *minUttDur, int *maxUttDur) |
Find the minimum and maximum possible durations for a given syntax. | |
void | segSetDecoderParams (SEG_DEC_PARAM dec_param) |
Set decoding parameters. | |
void | segGetDecoderParams (SEG_DEC_PARAM *dec_param) |
Get decoding parameters. | |
int | MLIncrementLinearHMMAccumulators (SEG_STATE **stateId, int *stateSeq, SEG_SYNTAX syn, double **spData, int sT, int eT, int maxDur) |
Increment the reestimation bins, according to the current utterance alignment. | |
int | MLInitialiseHMMAccumulators (SEG_MODEL_SET *hmmSet) |
Initialise reestimation accumulators for the entire set of models. | |
int | MLReestSegModels (SEG_MODEL_SET *hmmSet, double minVar, double slopeRate, double varRate, double durRegParam) |
Reestimate linear segmental-HMM parameters. | |
int | GetSegVitConfig (char *cFile, char *hmmListFile, char *trnFile, int *maxIt, double *stopDiff, char *logFile, char *mlfFile, int *embedTrain, int *maxDur, double *minVar, char *hmmOpDir, int *runType, double *slopeRate, double *varRate, int *verbose, char *aMatFile, char *iMatFile, char *classFile, int *opMats, char *grammarFile, int *LMScale, char **dumpPtr) |
Read SEGRec configuration. | |
int | MakeTrainingSyntax (SEG_UTT_LIST_ITEM utt, SEG_MODEL_SET models, SEG_SYNTAX *synt, int *eStates) |
Create 'forced alignment' syntax (for supervised training and classification). | |
int | MakeRecognitionSyntax (SEG_MODEL_SET models, SEG_SYNTAX *synt, int recogType, double *initProb, double *exitProb, double **biProb, double lmff) |
Create recognition syntax. | |
int | CheckSyn (SEG_MODEL_SET models, SEG_SYNTAX *synt) |
Display the contents of syntax for debugging. | |
int | FreeSyntaxMemory (SEG_SYNTAX *synt, int totStates) |
Free memory allocated to training/recognition syntax. | |
int | CheckMasterList (SEG_UTT_LIST uL, int detail) |
Display details of the utterance list for debugging. | |
int | Time2FrameNum (SEG_UTT_LIST uL, int winSize, int numVecs) |
Convert annotation times. | |
int | FreeBreakPoints (int uttVecs, int **breakPt) |
Free memory allocated to break points. | |
int | FreeMasterList (int totLabs, SEG_UTT_LIST masterList) |
Free memory space allocated to the master utterance list. | |
int | ReadSpData (int numVecs, int vecSize, FILE *curSpFile, double ***spData) |
Read in the acoustic feature vectors. | |
int | FreeSpData (int numVecs, double ***spData) |
Free memory space allocated to the current speech data. | |
int | CheckSpData (int sT, int eT, int vecSize, double **spData) |
Display the speech vectors for debugging. | |
int | JoinLabels (int *startT, int *endT, char ***labList, FILE *curLabFile, int nLabs) |
Extract overall start and end times of the utterance and loads label names. | |
int | GetBreakPoints (int startT, int endT, FILE *curLabFile, int **breakPt, int emb, int winSize, int nLabs, int verbose) |
Identify the frames that correspond to the start of a new utterance. | |
int | CountLabels (FILE *labFile, int numVecs, int winSize) |
Count the number of labels in an annotation file. | |
int | WriteAlignedVectors (FILE *aMatFptr, FILE *iMatFptr, SEG_STATE **stateId, int *stateSeq, SEG_SYNTAX syn, double **spData, int sT, int eT) |
Write out aligned sets of articulatory trajectory values and acoustic vectors. |
int CheckMasterList | ( | SEG_UTT_LIST | uL, | |
int | detail | |||
) |
Display details of the utterance list for debugging.
[in] | uL | Utterance list |
[in] | detail | Level of debugging |
NO_ERROR | Successful execution |
int CheckSpData | ( | int | sT, | |
int | eT, | |||
int | vecSize, | |||
double ** | spData | |||
) |
Display the speech vectors for debugging.
[in] | sT | Start time of the utterance |
[in] | eT | End time of the utterance |
[in] | vecSize | Length of acoustic feature vector in bytes |
[in] | spData | Acoustic feature vectors of speech |
NO_ERROR | Successful execution |
int CheckSyn | ( | SEG_MODEL_SET | models, | |
SEG_SYNTAX * | synt | |||
) |
Display the contents of syntax for debugging.
[in] | models | Model set (pointer to a variable of struct modelSet) |
[in] | synt | Syntax (pointer to a variable of struct syntax) |
int CountLabels | ( | FILE * | labFile, | |
int | numVecs, | |||
int | winSize | |||
) |
Count the number of labels in an annotation file.
[in] | labFile | File pointer to the annotation file |
[in] | numVecs | Number of frames |
[in] | winSize | Frame shift size |
int FreeBreakPoints | ( | int | uttVecs, | |
int ** | breakPt | |||
) |
Free memory allocated to break points.
[in] | uttVecs | Number of utterances |
[in] | breakPt | Pointer to the break point array |
NO_ERROR | Successful execution |
int FreeDPMat | ( | SEG_DP_ITEM ** | dpMat, | |
int | numStates | |||
) |
Free memory space storing all the paths in the trellis.
[in] | dpMat | Paths in the trellis for back-tracing |
[in] | numStates | Number of states |
NO_ERROR | Successful execution |
int FreeMasterList | ( | int | totLabs, | |
SEG_UTT_LIST | masterList | |||
) |
Free memory space allocated to the master utterance list.
[in] | totLabs | Number of labels in the utterance |
[in] | masterList | Master utterance list |
NO_ERROR | Successful execution |
int FreeSpData | ( | int | numVecs, | |
double *** | spData | |||
) |
Free memory space allocated to the current speech data.
[in] | numVecs | Number of frames in the utterance |
[in] | spData | Pointer to memory storing speech data |
NO_ERROR |
int FreeStateSeq | ( | int * | stateSeq | ) |
Free memory space of state sequence.
[in] | stateSeq | State (index) sequence |
NO_ERROR | Successful execution |
int FreeSyntaxMemory | ( | SEG_SYNTAX * | synt, | |
int | totStates | |||
) |
Free memory allocated to training/recognition syntax.
[in] | synt | Syntax |
[in] | totStates | Number of states in the syntax |
NO_ERROR |
int GetBreakPoints | ( | int | startT, | |
int | endT, | |||
FILE * | curLabFile, | |||
int ** | breakPt, | |||
int | emb, | |||
int | winSize, | |||
int | nLabs, | |||
int | verbose | |||
) |
Identify the frames that correspond to the start of a new utterance.
[in] | startT | Start time of the utterance |
[in] | endT | End time of the utterance |
[in] | curLabFile | File pointer to the current label file |
[in,out] | breakPt | Array for the break points |
[in] | emb | Training type (supervised/embedded) |
[in] | winSize | Frame shift size |
[in] | nLabs | Number of labels int the utterance |
[in] | verbose | Level of reporting during execution |
RTN_ERROR | Memory allocation error / File read error | |
otherwise | Number of elements in the break point array |
int GetDurationLimits | ( | SEG_MODEL_SET | models, | |
SEG_SYNTAX | synt, | |||
int | eStates, | |||
int * | minUttDur, | |||
int * | maxUttDur | |||
) |
Find the minimum and maximum possible durations for a given syntax.
[in] | models | Model set |
[in] | synt | Syntax |
[in] | eStates | Number of emitting states in the syntax |
[in] | minUttDur | Minimum utterance duration |
[in] | maxUttDur | Maximum utterance duration |
RTN_ERROR | Inconsistency in the models | |
NO_ERROR | Successfully executed |
int GetSegVitConfig | ( | char * | cFile, | |
char * | hmmListFile, | |||
char * | trnFile, | |||
int * | maxIt, | |||
double * | stopDiff, | |||
char * | logFile, | |||
char * | mlfFile, | |||
int * | embedTrain, | |||
int * | maxDur, | |||
double * | minVar, | |||
char * | hmmOpDir, | |||
int * | runType, | |||
double * | slopeRate, | |||
double * | varRate, | |||
int * | verbose, | |||
char * | aMatFile, | |||
char * | iMatFile, | |||
char * | classFile, | |||
int * | opMats, | |||
char * | grammarFile, | |||
int * | LMScale, | |||
char ** | dumpPtr | |||
) |
Read SEGRec configuration.
[in] | cFile | Name of configuration file |
[in,out] | hmmListFile | Name of HMM list file |
[in,out] | trnFile | Name of speech list file |
[in,out] | maxIt | Maximum number of iterations for training |
[in,out] | stopDiff | Fractional threshold to stop iterations |
[in,out] | logFile | Name of log file |
[in,out] | mlfFile | Name of MLF file |
[in,out] | embedTrain | Flag switching supervised or embedded decoding |
[in,out] | maxDur | Maximal segment-duration |
[in,out] | minVar | Variance floor |
[in,out] | hmmOpDir | Name of directory where models are stored |
[in,out] | runType | Flag switching recognition or training |
[in,out] | slopeRate | Update rate for slope parameter |
[in,out] | varRate | Update rate for variance parameter |
[in,out] | verbose | Level of reporting displayed during execution |
[in,out] | aMatFile | Name of file storing acoustic features |
[in,out] | iMatFile | Name of file storing articulatory features |
[in,out] | classFile | Name of class file |
[in,out] | opMats | Variable specifying additional debug info to be output to certain files |
[in,out] | grammarFile | Name of grammar file |
[in,out] | LMScale | Weight for prob from language model against acoustic probabilities |
[in,out] | dumpPtr | Name of dump file |
ERROR_FOPEN | File open error | |
ERROR_ALLOC | Memory allocation error | |
RTN_ERROR | Other errors | |
otherwise | Number of parameters read |
int JoinLabels | ( | int * | startT, | |
int * | endT, | |||
char *** | labList, | |||
FILE * | curLabFile, | |||
int | nLabs | |||
) |
Extract overall start and end times of the utterance and loads label names.
[in,out] | startT | Start time of the utterance |
[in,out] | endT | End time of the utterance |
[in,out] | labList | Pointer to the label name list |
[in] | curLabFile | File pointer to the current label file |
[in] | nLabs | Number of labels in the utterance |
RTN_ERROR | Memory allocation error / File read error | |
otherwise | Index of the last label in the file |
int MakeRecognitionSyntax | ( | SEG_MODEL_SET | models, | |
SEG_SYNTAX * | synt, | |||
int | recogType, | |||
double * | initProb, | |||
double * | exitProb, | |||
double ** | biProb, | |||
double | lmff | |||
) |
Create recognition syntax.
[in] | models | Set of models with model parameters |
[in,out] | synt | Syntax to be built |
[in] | recogType | Type of testing (RECOGNITION or CLASSIFICATION) |
[in] | initProb | Entry costs for the models |
[in] | exitProb | Exit costs for the models |
[in] | biProb | N-gram posterior probabilities table |
[in] | lmff | Scale factor for the language model |
RTN_ERROR | Memory allocation error / Inconsistency between models | |
otherwise | Number of nodes in the syntax |
int MakeStrictLRTopology | ( | SEG_MODEL_SET * | modelSet | ) |
Force models to maintain strict left-to-right topology.
[in,out] | modelSet | Pointer to a struct variable for the model set |
NO_ERROR | Successful execution |
int MakeTrainingSyntax | ( | SEG_UTT_LIST_ITEM | utt, | |
SEG_MODEL_SET | models, | |||
SEG_SYNTAX * | synt, | |||
int * | eStates | |||
) |
Create 'forced alignment' syntax (for supervised training and classification).
[in] | utt | Utterance |
[in] | models | Set of models with model parameters |
[in,out] | synt | Syntax to be built |
[in,out] | eStates | Number of emitting states |
RTN_ERROR | Memory allocation error / Inconsistency between models | |
otherwise | Number of nodes in the syntax |
int MLCheckModelSet | ( | SEG_MODEL_SET * | modelSet | ) |
Check the consistency of model parameter set.
[in] | modelSet | Pointer to a struct variable for the model set |
NO_ERROR | Successful execution | |
RTN_ERROR | Inconsistency found in the set |
int MLIncrementLinearHMMAccumulators | ( | SEG_STATE ** | stateId, | |
int * | stateSeq, | |||
SEG_SYNTAX | syn, | |||
double ** | spData, | |||
int | sT, | |||
int | eT, | |||
int | maxDur | |||
) |
Increment the reestimation bins, according to the current utterance alignment.
[in,out] | stateId | Array of state IDs of the best state-sequence |
[in] | stateSeq | The best state sequence |
[in] | syn | Syntax |
[in] | spData | Acoustic feature vectors of speech data |
[in] | sT | Start time of the utterance |
[in] | eT | End time of the utterance |
[in] | maxDur | Maximum segment duration |
RTN_ERROR | Memory allocation error | |
NO_ERROR | Successfully executed |
int MLInitialiseHMMAccumulators | ( | SEG_MODEL_SET * | hmmSet | ) |
Initialise reestimation accumulators for the entire set of models.
[in,out] | hmmSet | Model set |
int MLReadModelSet | ( | char * | modelList, | |
SEG_MODEL_SET * | modelSet, | |||
int * | longestDur | |||
) |
Read a set of multi-level segmental-HMMs in the file of model name list.
[in] | modelList | Name of model-name list file |
[in,out] | modelSet | Model set |
[in] | longestDur | Maximal segment-duration |
RTN_ERROR | Error | |
otherwise | Dimension of the intermediate features |
int MLReestSegModels | ( | SEG_MODEL_SET * | hmmSet, | |
double | minVar, | |||
double | slopeRate, | |||
double | varRate, | |||
double | durRegParam | |||
) |
Reestimate linear segmental-HMM parameters.
[in,out] | hmmSet | Model set |
[in] | minVar | Variance floor |
[in] | slopeRate | Update rate of slope in comparison to the mean |
[in] | varRate | Update rate of variance in comparison to the mean |
[in] | durRegParam | Regularisation parameter for the durational model |
NO_ERROR | Successfully executed | |
RTN_ERROR | Too small number of samples for the model |
int MLWriteModelSet | ( | SEG_MODEL_SET * | modelSet, | |
char * | dirName | |||
) |
Write a set of models to a file.
[in] | modelSet | Pointer to the model-set structure variable |
[in] | dirName | Name of directory where the models are saved |
RTN_ERROR | Memory allocation error / File write error | |
NO_ERROR | Successful execution |
int ReadGrammar | ( | char * | gramFName, | |
SEG_MODEL_SET | models, | |||
double ** | initProb, | |||
double *** | biProb, | |||
double ** | exitProb | |||
) |
Read bigram grammar (bigram posterior probability table).
[in] | gramFName | Name of the grammar file |
[in] | models | Model set |
[in,out] | initProb | Entry costs for the models |
[in,out] | biProb | N-gram posterior probabilities table |
[in,out] | exitProb | Exit costs for the models |
RTN_ERROR | File open/read error / Memory allocation error | |
NO_ERROR | Successfully executed |
int ReadSpData | ( | int | numVecs, | |
int | vecSize, | |||
FILE * | curSpFile, | |||
double *** | spData | |||
) |
Read in the acoustic feature vectors.
[in] | numVecs | Number of frames in the utterance |
[in] | vecSize | Sample size |
[in,out] | curSpFile | File pointer to the speech file |
[in,out] | spData | Pointer to a 2D array used to store the vectors |
RTN_ERROR | Memory allocation error | |
totRead | Total number of items in the speech file that are read |
SEG_ANN_DATA* RecoverRecogSeq | ( | SEG_SYNTAX | syn, | |
SEG_DP_ITEM ** | dpMat, | |||
int | numVecs, | |||
int | numLabs | |||
) |
Recover the model labels and their start and end times.
[in] | syn | Syntax |
[in] | dpMat | Paths in the trellis for back-tracing |
[in] | numVecs | Number of frames in the sequence |
[in] | numLabs | Number of models in the sequence |
NULL | Memory allocation error | |
otherwise | Pointer to the array of struct segAnnData variables each of which contains a label, and start and end times. |
SEG_STATE** RecoverStateIds | ( | SEG_SYNTAX | syn, | |
int * | stateSeq, | |||
int | numVecs | |||
) |
Recover the sequence of state identities from a state sequence.
[in] | syn | Syntax |
[in] | stateSeq | State sequence |
[in] | numVecs | Number of frames in the utterance |
NULL | Memory allocation error | |
otherwise | Pointer to the array of struct segStateInfo pointers |
int* RecoverStateSeq | ( | SEG_SYNTAX | syn, | |
SEG_DP_ITEM ** | dpHist, | |||
int | numVecs, | |||
int * | numLabs | |||
) |
Recover the optimal state sequence from the path record of node.
[in] | syn | Syntax |
[in] | dpHist | Paths in the trellis for back-tracing |
[in] | numVecs | Number of frames in the utterance |
[in,out] | numLabs | Number of models in the sequence recovered |
NULL | Memory allocation error | |
Otherwise | Pointer to the array of state sequence recovered |
void segGetDecoderParams | ( | SEG_DEC_PARAM * | dec_param | ) |
Get decoding parameters.
[in] | dec_param | Struct variable of decoding parameters |
void segSetDecoderParams | ( | SEG_DEC_PARAM | dec_param | ) |
Set decoding parameters.
[in] | dec_param | Struct variable of decoding parameters |
int Time2FrameNum | ( | SEG_UTT_LIST | uL, | |
int | winSize, | |||
int | numVecs | |||
) |
Convert annotation times.
[in,out] | uL | Utterance list |
[in] | winSize | Size of window (ms) used in processing speech data |
[in] | numVecs | Number of frames in the utterance |
NO_ERROR | Successful execution |
double ViterbiDecoder | ( | double ** | spData, | |
int | sT, | |||
int | eT, | |||
SEG_SYNTAX | synt, | |||
SEG_MODEL_SET | hmm, | |||
int | maxDur, | |||
SEG_DP_ITEM *** | dpMat, | |||
double * | segProb, | |||
int * | breakPt | |||
) |
Generic Viterbi-type decorder for segmental HMMs.
[in] | spData | Acoustic feature vectors |
[in] | sT | Utterance start time (frame No) |
[in] | eT | Utterance end time (frame No) |
[in] | synt | Syntax |
[in] | hmm | Model set |
[in] | maxDur | Maximum segment-duration |
[out] | dpMat | Paths in the trellis for back-tracing |
[out] | segProb | Output probabilities for SHMMs |
[in] | breakPt | Break point attributes |
RTN_ERROR | Memory allocation error | |
Otherwise | The best cost (min probability) computed |
int WriteAlignedVectors | ( | FILE * | aMatFptr, | |
FILE * | iMatFptr, | |||
SEG_STATE ** | stateId, | |||
int * | stateSeq, | |||
SEG_SYNTAX | syn, | |||
double ** | spData, | |||
int | sT, | |||
int | eT | |||
) |
Write out aligned sets of articulatory trajectory values and acoustic vectors.
[in] | aMatFptr | File pointer to the acoustic feature file |
[in] | iMatFptr | File pointer to the articulatory feature file |
[in] | stateId | Array of state identities of the best state sequence |
[in] | stateSeq | Best state-number sequence |
[in] | syn | Syntax |
[in] | spData | Feature vectors of actual speech |
[in] | sT | Start time (frame) of the utterance |
[in] | eT | End time (frame) of the utterance |
RTN_ERROR | Memory allocation error | |
NO_ERROR | Successfully executed |
int WriteDump | ( | SEG_MODEL_SET * | modelSet, | |
char * | fName | |||
) |
Write dump file contents.
[in] | modelSet | Pointer to the model-set structure variable |
[in] | fName | Name of dump file |
RTN_ERROR | File open error | |
NO_ERROR | Successful execution |
int WriteMLFData | ( | SEG_ANN_DATA | recOp, | |
int | nLabs, | |||
int | winSize, | |||
FILE * | mlfFile, | |||
char * | labFileName | |||
) |
Generate and write new annotations from the classification/recognition output.
[in] | recOp | Array of struct annData variables |
[in] | nLabs | Number of labels in the utterance |
[in] | winSize | Size of the window (ms) used in processing speech data |
[in] | mlfFile | File pointer to a MLF file |
[in] | labFileName | Name of the label file |
RTN_ERROR | File write error | |
NO_ERROR | Successful execution |