(** Functional IPA table implementation without global state *) open Base module StringTrie = Trie.Of_string type ipa_entry = { ipa : string; features : Feature.segment } (** Type representing a row from the IPA CSV file *) type ipa_table = (string, Feature.segment, String.comparator_witness) Map.t (** Type representing the IPA table *) (* type t = (string, Feature.segment, String.comparator_witness) Map.t *) type t = { table : ipa_table; trie : (string, Feature.segment, StringTrie.Keychain.keychain_description) Trie.t; } (** Parse a single row from the CSV file *) let parse_row (row : string list) : ipa_entry option = match row with | [] -> None | ipa :: features -> ( let feature_names = [ "syl"; "son"; "cons"; "cont"; "delrel"; "lat"; "nas"; "strid"; "voi"; "sg"; "cg"; "ant"; "cor"; "distr"; "lab"; "hi"; "lo"; "back"; "round"; "velaric"; "tense"; "long"; "hitone"; "hireg"; ] in (* Skip the header row *) if String.equal ipa "ipa" then None else let rec build_features names values acc = match (names, values) with | [], [] -> Some (List.rev acc) | name :: ns, value :: vs -> let fval = Feature.value_of_string value in let fname = Feature.feature_of_string name in build_features ns vs ((fname, fval) :: acc) | _ -> None (* Mismatched lengths *) in match build_features feature_names features [] with | Some feature_list -> Some { ipa; features = feature_list } | None -> None) (** Load IPA data from CSV file and return the table *) let load_csv (data_dir : string) : t = let filename = Stdlib.Filename.concat data_dir "ipa_all.csv" in let ic = Stdio.In_channel.create filename in let csv = Csv.of_channel ic in let result = try let entries = Csv.fold_left csv ~init:[] ~f:(fun acc row -> match parse_row row with Some entry -> entry :: acc | None -> acc) in let table = List.fold entries ~init:(Map.empty (module String)) ~f:(fun acc entry -> Map.set acc ~key:entry.ipa ~data:entry.features) in let alist = Map.to_alist table in let trie = Trie.of_alist_exn StringTrie.Keychain.keychainable alist in Stdio.In_channel.close ic; { table; trie } with e -> Stdio.In_channel.close ic; raise e in result (** Look up features for an IPA segment *) let lookup_segment table (ipa : string) : Feature.segment option = Map.find table ipa (** Get all segments in the table *) let all_segments table : (string * Feature.segment) list = Map.to_alist table (** Check if a segment exists in the table *) let mem table (ipa : string) : bool = Map.mem table ipa (** Get the number of segments in the table *) let length table : int = Map.length table let fts ?(_normalize = true) table ipa = Map.find table ipa let longest_one_seg_prefix trie word = let rec aux trie remaining = if String.is_empty remaining then None else let res = Trie.find trie remaining in match res with | None -> aux trie (String.drop_suffix remaining 1) | Some _data -> Some remaining in aux trie word let ipa_segs ?(_normalize = true) data word = let rec aux acc remaining = match longest_one_seg_prefix data.trie remaining with | None -> acc | Some seg -> aux (seg :: acc) (String.drop_prefix remaining (String.length seg)) in List.rev (aux [] word)