54bool (*local_pruning_function)(
const int i,
62 local_cost_function
lcf,
63 local_pruning_function
lpf,
69 local_cost_function
lcf,
73bool local_prune(
const int i,
const int j,
79static bool show_cost=FALSE;
80static int prune_width = 100;
95float insertion_cost = 1;
96float deletion_cost = 1;
97float substitution_cost = 1;
166 null_sym->set_name(
"<null>");
170 "dp <options> \"pattern 1\" \"pattern 2\"\n"+
171 "Find the best alignment of a pair of symbol sequences (e.g. word pronuciations).\n"+
172 "-vocab <string> file containing vocabulary\n"+
173 "-place_holder <string> which vocab item is the place holder (default is " + null_sym->name() +
" )\n"+
174 "-show_cost show cost of matching path\n"+
175 "-o <string> output file\n"+
176 "-p <int> 'beam' width\n"+
178 "-i <float> insertion cost\n"+
179 "-d <float> deletion cost\n"+
180 "-s <float> substitution cost\n"+
182 "-cost_matrix <string> file containing cost matrix\n",
187 if (
al.present(
"-vocab"))
188 load_vocab(
al.val(
"-vocab"));
195 if (
al.present(
"-p"))
196 prune_width =
al.ival(
"-p");
198 if (
al.present(
"-cost_matrix"))
200 if(
al.present(
"-i") ||
al.present(
"-d") ||
al.present(
"-s") )
202 cerr <<
"Can't have ins/del/subs costs as well as matrix !" <<
endl;
205 distance_measure=
"matrix";
206 cost_matrix.
load(
al.val(
"-cost_matrix"));
208 if(
al.present(
"-place_holder"))
209 null_sym->set_name(
al.val(
"-place_holder"));
211 if(StrVector_index(vocab,null_sym->name()) < 0)
213 cerr <<
"The place holder symbol '" << null_sym->name();
214 cerr <<
"' is not in the vocbulary !" <<
endl;
220 cerr <<
"Cost matrix number of columns must match vocabulary size !" <<
endl;
225 cerr <<
"Cost matrix number of rows must match vocabulary size !" <<
endl;
230 else if(
al.present(
"-i") &&
al.present(
"-d") &&
al.present(
"-s") )
232 insertion_cost =
al.fval(
"-i");
233 deletion_cost =
al.fval(
"-d");
234 substitution_cost =
al.fval(
"-s");
238 cerr <<
"Must give either ins/del/subs costs or cost matrix !" <<
endl;
243 if(
al.present(
"-show_cost"))
246 if(
files.length() != 2)
248 cerr <<
"Must give 2 patterns !" <<
endl;
252 StringtoStrList(
files(
files.head()),pattern1_l,
" ");
253 StringtoStrList(
files(
files.head()->next()),pattern2_l,
" ");
261 for(p=pattern1_l.head();p != 0; p=p->next())
263 if( StrVector_index(vocab,pattern1_l(p)) < 0)
265 cerr << pattern1_l(p) <<
" is not in the vocabulary !" <<
endl;
273 for(p=pattern2_l.head();p != 0; p=p->next())
275 if( StrVector_index(vocab,pattern2_l(p)) < 0)
277 cerr << pattern2_l(p) <<
" is not in the vocabulary !" <<
endl;
291 local_cost,local_prune,null_sym))
294 cerr <<
"No match could be found." <<
endl;
321 StrList_to_StrVector(vocab_l,vocab);
332 if(distance_measure ==
"simple")
334 if(s1->name() == s2->name())
339 return insertion_cost;
340 else if(s2 == null_sym)
341 return deletion_cost;
343 return substitution_cost;
350 return cost_matrix(StrVector_index(vocab,s1->name()),
351 StrVector_index(vocab,s2->name()));
355bool local_prune(
const int i,
const int j,
375 (
abs((
int)(
near_j - (
float)
j)) > prune_width) )
EST_read_status load(const EST_String &filename)
Load from file (ascii or binary as defined in file)
void append(const T &item)
add item onto end of list
int num_columns() const
return number of columns
int num_rows() const
return number of rows
INLINE int length() const
number of items in vector.
EST_write_status save(const EST_String &filename, const EST_String &type="est_ascii") const
EST_Relation * create_relation(const EST_String &relname)
create a new relation called <parameter>n</parameter>.