summaryrefslogtreecommitdiff
path: root/sorsyl/lib/ipa_table.ml
diff options
context:
space:
mode:
Diffstat (limited to 'sorsyl/lib/ipa_table.ml')
-rw-r--r--sorsyl/lib/ipa_table.ml102
1 files changed, 73 insertions, 29 deletions
diff --git a/sorsyl/lib/ipa_table.ml b/sorsyl/lib/ipa_table.ml
index bee027a..295100d 100644
--- a/sorsyl/lib/ipa_table.ml
+++ b/sorsyl/lib/ipa_table.ml
@@ -1,20 +1,21 @@
-(** Type representing a segment as a set of feature specifications *)
-(* an association list I guess . Use List.assoc to handle*)
+(** Functional IPA table implementation without global state *)
-(** Decision tree for computing sonority values *)
-type bool_tree =
- | Leaf of int (** Terminal node with sonority value *)
- | Node of {
- test : Feature.segment -> bool; (** Test function *)
- t_branch : bool_tree; (** Branch to follow if test is true *)
- f_branch : bool_tree; (** Branch to follow if test is false *)
- }
+open Base
+module StringTrie = Trie.Of_string
type ipa_entry = { ipa : string; features : Feature.segment }
(** Type representing a row from the IPA CSV file *)
-(** Storage for loaded IPA data *)
-let ipa_table : (string, Feature.segment) Hashtbl.t = Hashtbl.create 1000
+type ipa_table = (string, Feature.segment, String.comparator_witness) Map.t
+(** Type representing the IPA table *)
+
+(* type t = (string, Feature.segment, String.comparator_witness) Map.t *)
+
+type t = {
+ table : ipa_table;
+ trie :
+ (string, Feature.segment, StringTrie.Keychain.keychain_description) Trie.t;
+}
(** Parse a single row from the CSV file *)
let parse_row (row : string list) : ipa_entry option =
@@ -50,14 +51,14 @@ let parse_row (row : string list) : ipa_entry option =
]
in
(* Skip the header row *)
- if ipa = "ipa" then None
+ if String.equal ipa "ipa" then None
else
let rec build_features names values acc =
match (names, values) with
| [], [] -> Some (List.rev acc)
| name :: ns, value :: vs ->
let fval = Feature.value_of_string value in
- let fname = Feature.string_of_feature name in
+ let fname = Feature.feature_of_string name in
build_features ns vs ((fname, fval) :: acc)
| _ -> None (* Mismatched lengths *)
in
@@ -65,21 +66,64 @@ let parse_row (row : string list) : ipa_entry option =
| Some feature_list -> Some { ipa; features = feature_list }
| None -> None)
-(** Load IPA data from CSV file *)
-let load_csv filename =
- let ic = open_in filename in
+(** Load IPA data from CSV file and return the table *)
+let load_csv (data_dir : string) : t =
+ let filename = Stdlib.Filename.concat data_dir "ipa_all.csv" in
+ let ic = Stdio.In_channel.create filename in
let csv = Csv.of_channel ic in
- try
- Csv.iter
- ~f:(fun row ->
- match parse_row row with
- | Some entry -> Hashtbl.add ipa_table entry.ipa entry.features
- | None -> ())
- csv;
- close_in ic
- with e ->
- close_in ic;
- raise e
+
+ let result =
+ try
+ let entries =
+ Csv.fold_left csv ~init:[] ~f:(fun acc row ->
+ match parse_row row with Some entry -> entry :: acc | None -> acc)
+ in
+ let table =
+ List.fold entries
+ ~init:(Map.empty (module String))
+ ~f:(fun acc entry -> Map.set acc ~key:entry.ipa ~data:entry.features)
+ in
+ let alist = Map.to_alist table in
+ let trie = Trie.of_alist_exn StringTrie.Keychain.keychainable alist in
+ Stdio.In_channel.close ic;
+ { table; trie }
+ with e ->
+ Stdio.In_channel.close ic;
+ raise e
+ in
+ result
(** Look up features for an IPA segment *)
-let lookup_segment ipa = Hashtbl.find_opt ipa_table ipa
+let lookup_segment table (ipa : string) : Feature.segment option =
+ Map.find table ipa
+
+(** Get all segments in the table *)
+let all_segments table : (string * Feature.segment) list = Map.to_alist table
+
+(** Check if a segment exists in the table *)
+let mem table (ipa : string) : bool = Map.mem table ipa
+
+(** Get the number of segments in the table *)
+let length table : int = Map.length table
+
+let fts ?(_normalize = true) table ipa = Map.find table ipa
+
+let longest_one_seg_prefix trie word =
+ let rec aux trie remaining =
+ if String.is_empty remaining then None
+ else
+ let res = Trie.find trie remaining in
+ match res with
+ | None -> aux trie (String.drop_suffix remaining 1)
+ | Some _data -> Some remaining
+ in
+ aux trie word
+
+let ipa_segs ?(_normalize = true) data word =
+ let rec aux acc remaining =
+ match longest_one_seg_prefix data.trie remaining with
+ | None -> acc
+ | Some seg ->
+ aux (seg :: acc) (String.drop_prefix remaining (String.length seg))
+ in
+ List.rev (aux [] word)