summaryrefslogtreecommitdiff
path: root/sorsyl/lib/ipa_tableold.ml
blob: eb7d3fcdddbaa56498d1bded151e4c9d5530c740 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
(** Type representing a segment as a set of feature specifications *)
(* an association list I guess . Use List.assoc to handle*)

(** Decision tree for computing sonority values *)
type bool_tree =
  | Leaf of int  (** Terminal node with sonority value *)
  | Node of {
      test : Feature.segment -> bool;  (** Test function *)
      t_branch : bool_tree;  (** Branch to follow if test is true *)
      f_branch : bool_tree;  (** Branch to follow if test is false *)
    }

type ipa_entry = { ipa : string; features : Feature.segment }
(** Type representing a row from the IPA CSV file *)

(** Storage for loaded IPA data *)
let ipa_table : (string, Feature.segment) Hashtbl.t = Hashtbl.create 1000

(** Parse a single row from the CSV file *)
let parse_row (row : string list) : ipa_entry option =
  match row with
  | [] -> None
  | ipa :: features -> (
      let feature_names =
        [
          "syl";
          "son";
          "cons";
          "cont";
          "delrel";
          "lat";
          "nas";
          "strid";
          "voi";
          "sg";
          "cg";
          "ant";
          "cor";
          "distr";
          "lab";
          "hi";
          "lo";
          "back";
          "round";
          "velaric";
          "tense";
          "long";
          "hitone";
          "hireg";
        ]
      in
      (* Skip the header row *)
      if ipa = "ipa" then None
      else
        let rec build_features names values acc =
          match (names, values) with
          | [], [] -> Some (List.rev acc)
          | name :: ns, value :: vs ->
              let fval = Feature.value_of_string value in
              let fname = Feature.feature_of_string name in
              build_features ns vs ((fname, fval) :: acc)
          | _ -> None (* Mismatched lengths *)
        in
        match build_features feature_names features [] with
        | Some feature_list -> Some { ipa; features = feature_list }
        | None -> None)

(** Load IPA data from CSV file *)
let load_csv filename =
  let ic = open_in filename in
  let csv = Csv.of_channel ic in
  try
    Csv.iter
      ~f:(fun row ->
        match parse_row row with
        | Some entry -> Hashtbl.add ipa_table entry.ipa entry.features
        | None -> ())
      csv;
    close_in ic
  with e ->
    close_in ic;
    raise e

(** Look up features for an IPA segment *)
let lookup_segment ipa = Hashtbl.find_opt ipa_table ipa