diff options
Diffstat (limited to 'sorsyl/lib/ipa_table.ml')
-rw-r--r-- | sorsyl/lib/ipa_table.ml | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/sorsyl/lib/ipa_table.ml b/sorsyl/lib/ipa_table.ml new file mode 100644 index 0000000..bee027a --- /dev/null +++ b/sorsyl/lib/ipa_table.ml @@ -0,0 +1,85 @@ +(** Type representing a segment as a set of feature specifications *) +(* an association list I guess . Use List.assoc to handle*) + +(** Decision tree for computing sonority values *) +type bool_tree = + | Leaf of int (** Terminal node with sonority value *) + | Node of { + test : Feature.segment -> bool; (** Test function *) + t_branch : bool_tree; (** Branch to follow if test is true *) + f_branch : bool_tree; (** Branch to follow if test is false *) + } + +type ipa_entry = { ipa : string; features : Feature.segment } +(** Type representing a row from the IPA CSV file *) + +(** Storage for loaded IPA data *) +let ipa_table : (string, Feature.segment) Hashtbl.t = Hashtbl.create 1000 + +(** Parse a single row from the CSV file *) +let parse_row (row : string list) : ipa_entry option = + match row with + | [] -> None + | ipa :: features -> ( + let feature_names = + [ + "syl"; + "son"; + "cons"; + "cont"; + "delrel"; + "lat"; + "nas"; + "strid"; + "voi"; + "sg"; + "cg"; + "ant"; + "cor"; + "distr"; + "lab"; + "hi"; + "lo"; + "back"; + "round"; + "velaric"; + "tense"; + "long"; + "hitone"; + "hireg"; + ] + in + (* Skip the header row *) + if ipa = "ipa" then None + else + let rec build_features names values acc = + match (names, values) with + | [], [] -> Some (List.rev acc) + | name :: ns, value :: vs -> + let fval = Feature.value_of_string value in + let fname = Feature.string_of_feature name in + build_features ns vs ((fname, fval) :: acc) + | _ -> None (* Mismatched lengths *) + in + match build_features feature_names features [] with + | Some feature_list -> Some { ipa; features = feature_list } + | None -> None) + +(** Load IPA data from CSV file *) +let load_csv filename = + let ic = open_in filename in + let csv = Csv.of_channel ic in + try + Csv.iter + ~f:(fun row -> + match parse_row row with + | Some entry -> Hashtbl.add ipa_table entry.ipa entry.features + | None -> ()) + csv; + close_in ic + with e -> + close_in ic; + raise e + +(** Look up features for an IPA segment *) +let lookup_segment ipa = Hashtbl.find_opt ipa_table ipa |