From b43fe0d51da9a247bf94af27898d63f79d424073 Mon Sep 17 00:00:00 2001 From: polwex Date: Sun, 22 Jun 2025 09:21:58 +0700 Subject: getting there --- sorsyl/test/dune | 8 ++ sorsyl/test/test_sonority.ml | 60 ++++++++---- sorsyl/test/test_sonorityold.ml | 133 +++++++++++++++++++++++++ sorsyl/test/test_table.ml | 208 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 388 insertions(+), 21 deletions(-) create mode 100644 sorsyl/test/test_sonorityold.ml create mode 100644 sorsyl/test/test_table.ml (limited to 'sorsyl/test') diff --git a/sorsyl/test/dune b/sorsyl/test/dune index 701e92d..e79f200 100644 --- a/sorsyl/test/dune +++ b/sorsyl/test/dune @@ -1,3 +1,11 @@ +; (test +; (name test_sonorityold) +; (libraries sorsyl)) + (test (name test_sonority) (libraries sorsyl)) + +(test + (name test_table) + (libraries sorsyl)) diff --git a/sorsyl/test/test_sonority.ml b/sorsyl/test/test_sonority.ml index 70845a6..c24f4b4 100644 --- a/sorsyl/test/test_sonority.ml +++ b/sorsyl/test/test_sonority.ml @@ -1,11 +1,9 @@ -(** Tests for the Sonority module *) +(** Tests for the functional Sonority module *) open Sorsyl -(** Test fixture - initialize the module once *) -let () = - (* Initialize with the data directory *) - (* When run with dune test, the working directory is _build/default/test *) +(** Test fixture - create the sonority calculator once *) +let sonority_calc = let data_dir = if Sys.file_exists "./data" then "./data" else if Sys.file_exists "../data" then "../data" @@ -14,13 +12,13 @@ let () = Printf.eprintf "Current directory: %s\n" (Sys.getcwd ()); failwith "Cannot find data directory") in - Sonority.init data_dir + Sonority.create data_dir (** Test sonority value 9 - Low vowels *) let test_sonority_nine () = let segments = [ "a"; "ɑ"; "æ"; "ɒ"; "e"; "o̥" ] in let expected = [ 9; 9; 9; 9; 9; 9 ] in - let results = List.map Sonority.sonority segments in + let results = List.map (Sonority.sonority sonority_calc) segments in assert (results = expected); Printf.printf "test_sonority_nine: PASSED\n" @@ -28,7 +26,7 @@ let test_sonority_nine () = let test_sonority_eight () = let segments = [ "i"; "y"; "ɨ"; "ʉ"; "ɯ"; "u" ] in let expected = [ 8; 8; 8; 8; 8; 8 ] in - let results = List.map Sonority.sonority segments in + let results = List.map (Sonority.sonority sonority_calc) segments in assert (results = expected); Printf.printf "test_sonority_eight: PASSED\n" @@ -36,7 +34,7 @@ let test_sonority_eight () = let test_sonority_seven () = let segments = [ "j"; "w"; "ʋ"; "ɰ"; "ɹ"; "e̯" ] in let expected = [ 7; 7; 7; 7; 7; 7 ] in - let results = List.map Sonority.sonority segments in + let results = List.map (Sonority.sonority sonority_calc) segments in assert (results = expected); Printf.printf "test_sonority_seven: PASSED\n" @@ -44,7 +42,7 @@ let test_sonority_seven () = let test_sonority_six () = let segments = [ "l"; "ɭ"; "r"; "ɾ" ] in let expected = [ 6; 6; 6; 6 ] in - let results = List.map Sonority.sonority segments in + let results = List.map (Sonority.sonority sonority_calc) segments in assert (results = expected); Printf.printf "test_sonority_six: PASSED\n" @@ -52,7 +50,7 @@ let test_sonority_six () = let test_sonority_five () = let segments = [ "n"; "m"; "ŋ"; "ɴ" ] in let expected = [ 5; 5; 5; 5 ] in - let results = List.map Sonority.sonority segments in + let results = List.map (Sonority.sonority sonority_calc) segments in assert (results = expected); Printf.printf "test_sonority_five: PASSED\n" @@ -60,15 +58,18 @@ let test_sonority_five () = let test_sonority_four () = let segments = [ "v"; "z"; "ʒ"; "ɣ" ] in let expected = [ 4; 4; 4; 4 ] in - let results = List.map Sonority.sonority segments in + let results = List.map (Sonority.sonority sonority_calc) segments in + let results_string = + List.fold_left (fun acc item -> Printf.sprintf "%s-%d" acc item) "" results + in assert (results = expected); - Printf.printf "test_sonority_four: PASSED\n" + Printf.printf "test_sonority_four: %s\nPASSED\n" results_string (** Test sonority value 3 - Voiceless fricatives *) let test_sonority_three () = let segments = [ "f"; "s"; "x"; "ħ"; "ʃ" ] in let expected = [ 3; 3; 3; 3; 3 ] in - let results = List.map Sonority.sonority segments in + let results = List.map (Sonority.sonority sonority_calc) segments in assert (results = expected); Printf.printf "test_sonority_three: PASSED\n" @@ -76,7 +77,7 @@ let test_sonority_three () = let test_sonority_two () = let segments = [ "b"; "ɡ"; "d"; "ɢ" ] in let expected = [ 2; 2; 2; 2 ] in - let results = List.map Sonority.sonority segments in + let results = List.map (Sonority.sonority sonority_calc) segments in assert (results = expected); Printf.printf "test_sonority_two: PASSED\n" @@ -84,14 +85,14 @@ let test_sonority_two () = let test_sonority_one () = let segments = [ "p"; "k"; "c"; "q" ] in let expected = [ 1; 1; 1; 1 ] in - let results = List.map Sonority.sonority segments in + let results = List.map (Sonority.sonority sonority_calc) segments in assert (results = expected); Printf.printf "test_sonority_one: PASSED\n" (** Test unknown segment handling *) let test_unknown_segment () = try - let _ = Sonority.sonority "🦆" in + let _ = Sonority.sonority sonority_calc "🦆" in assert false (* Should not reach here *) with | Failure msg when String.sub msg 0 20 = "Unknown IPA segment:" -> @@ -110,14 +111,30 @@ let test_sonority_from_features () = (Feature.Voiced, Feature.Minus); ] in - let result = Sonority.sonority_from_features segment in + let result = Sonority.sonority_from_features sonority_calc segment in assert (result = 1); Printf.printf "test_sonority_from_features: PASSED\n" +(** Test that we can create multiple calculators (no global state) *) +let test_multiple_calculators () = + let data_dir = + if Sys.file_exists "./data" then "./data" + else if Sys.file_exists "../data" then "../data" + else if Sys.file_exists "../../../data" then "../../../data" + else failwith "Cannot find data directory" + in + let calc1 = Sonority.create data_dir in + let calc2 = Sonority.create data_dir in + + (* Both should work independently *) + assert (Sonority.sonority calc1 "a" = 9); + assert (Sonority.sonority calc2 "a" = 9); + Printf.printf "test_multiple_calculators: PASSED\n" + (** Run all tests *) let () = Printf.printf "Running Sonority module tests...\n"; - Printf.printf "================================\n"; + Printf.printf "===================================\n"; test_sonority_nine (); test_sonority_eight (); test_sonority_seven (); @@ -129,5 +146,6 @@ let () = test_sonority_one (); test_unknown_segment (); test_sonority_from_features (); - Printf.printf "================================\n"; - Printf.printf "All tests passed!\n" + test_multiple_calculators (); + Printf.printf "===================================\n"; + Printf.printf "All Sonority tests passed!\n" diff --git a/sorsyl/test/test_sonorityold.ml b/sorsyl/test/test_sonorityold.ml new file mode 100644 index 0000000..70845a6 --- /dev/null +++ b/sorsyl/test/test_sonorityold.ml @@ -0,0 +1,133 @@ +(** Tests for the Sonority module *) + +open Sorsyl + +(** Test fixture - initialize the module once *) +let () = + (* Initialize with the data directory *) + (* When run with dune test, the working directory is _build/default/test *) + let data_dir = + if Sys.file_exists "./data" then "./data" + else if Sys.file_exists "../data" then "../data" + else if Sys.file_exists "../../../data" then "../../../data" + else ( + Printf.eprintf "Current directory: %s\n" (Sys.getcwd ()); + failwith "Cannot find data directory") + in + Sonority.init data_dir + +(** Test sonority value 9 - Low vowels *) +let test_sonority_nine () = + let segments = [ "a"; "ɑ"; "æ"; "ɒ"; "e"; "o̥" ] in + let expected = [ 9; 9; 9; 9; 9; 9 ] in + let results = List.map Sonority.sonority segments in + assert (results = expected); + Printf.printf "test_sonority_nine: PASSED\n" + +(** Test sonority value 8 - High vowels *) +let test_sonority_eight () = + let segments = [ "i"; "y"; "ɨ"; "ʉ"; "ɯ"; "u" ] in + let expected = [ 8; 8; 8; 8; 8; 8 ] in + let results = List.map Sonority.sonority segments in + assert (results = expected); + Printf.printf "test_sonority_eight: PASSED\n" + +(** Test sonority value 7 - Glides/approximants *) +let test_sonority_seven () = + let segments = [ "j"; "w"; "ʋ"; "ɰ"; "ɹ"; "e̯" ] in + let expected = [ 7; 7; 7; 7; 7; 7 ] in + let results = List.map Sonority.sonority segments in + assert (results = expected); + Printf.printf "test_sonority_seven: PASSED\n" + +(** Test sonority value 6 - Liquids *) +let test_sonority_six () = + let segments = [ "l"; "ɭ"; "r"; "ɾ" ] in + let expected = [ 6; 6; 6; 6 ] in + let results = List.map Sonority.sonority segments in + assert (results = expected); + Printf.printf "test_sonority_six: PASSED\n" + +(** Test sonority value 5 - Nasals *) +let test_sonority_five () = + let segments = [ "n"; "m"; "ŋ"; "ɴ" ] in + let expected = [ 5; 5; 5; 5 ] in + let results = List.map Sonority.sonority segments in + assert (results = expected); + Printf.printf "test_sonority_five: PASSED\n" + +(** Test sonority value 4 - Voiced fricatives *) +let test_sonority_four () = + let segments = [ "v"; "z"; "ʒ"; "ɣ" ] in + let expected = [ 4; 4; 4; 4 ] in + let results = List.map Sonority.sonority segments in + assert (results = expected); + Printf.printf "test_sonority_four: PASSED\n" + +(** Test sonority value 3 - Voiceless fricatives *) +let test_sonority_three () = + let segments = [ "f"; "s"; "x"; "ħ"; "ʃ" ] in + let expected = [ 3; 3; 3; 3; 3 ] in + let results = List.map Sonority.sonority segments in + assert (results = expected); + Printf.printf "test_sonority_three: PASSED\n" + +(** Test sonority value 2 - Voiced stops *) +let test_sonority_two () = + let segments = [ "b"; "ɡ"; "d"; "ɢ" ] in + let expected = [ 2; 2; 2; 2 ] in + let results = List.map Sonority.sonority segments in + assert (results = expected); + Printf.printf "test_sonority_two: PASSED\n" + +(** Test sonority value 1 - Voiceless stops *) +let test_sonority_one () = + let segments = [ "p"; "k"; "c"; "q" ] in + let expected = [ 1; 1; 1; 1 ] in + let results = List.map Sonority.sonority segments in + assert (results = expected); + Printf.printf "test_sonority_one: PASSED\n" + +(** Test unknown segment handling *) +let test_unknown_segment () = + try + let _ = Sonority.sonority "🦆" in + assert false (* Should not reach here *) + with + | Failure msg when String.sub msg 0 20 = "Unknown IPA segment:" -> + Printf.printf "test_unknown_segment: PASSED\n" + | _ -> assert false + +(** Test feature-based sonority calculation *) +let test_sonority_from_features () = + (* Test a simple voiceless stop: -syl, +cons, -son, -cont, -voi *) + let segment = + [ + (Feature.Syllabic, Feature.Minus); + (Feature.Consonantal, Feature.Plus); + (Feature.Sonorant, Feature.Minus); + (Feature.Continuant, Feature.Minus); + (Feature.Voiced, Feature.Minus); + ] + in + let result = Sonority.sonority_from_features segment in + assert (result = 1); + Printf.printf "test_sonority_from_features: PASSED\n" + +(** Run all tests *) +let () = + Printf.printf "Running Sonority module tests...\n"; + Printf.printf "================================\n"; + test_sonority_nine (); + test_sonority_eight (); + test_sonority_seven (); + test_sonority_six (); + test_sonority_five (); + test_sonority_four (); + test_sonority_three (); + test_sonority_two (); + test_sonority_one (); + test_unknown_segment (); + test_sonority_from_features (); + Printf.printf "================================\n"; + Printf.printf "All tests passed!\n" diff --git a/sorsyl/test/test_table.ml b/sorsyl/test/test_table.ml new file mode 100644 index 0000000..89cb4a2 --- /dev/null +++ b/sorsyl/test/test_table.ml @@ -0,0 +1,208 @@ +open Sorsyl + +(* let es = *) +(* String.split_on_char ' ' *) +(* "la ˌinteɾnˌaθjonˌaliθaθjˈon del kˌoɾaθˈon i el ðˌikθjonˈaɾjo" *) + +(* let de = *) +(* [ *) +(* "kɔmˈpjuːtɐ"; *) +(* "teleˈfoːn"; *) +(* "fɑˈmiːliːjə"; *) +(* "ˈʔɑːpɔtekə"; *) +(* "ˈʃoːkolɑdə"; *) +(* "ˈtoːmɑtən"; *) +(* "ˈbananə"; *) +(* "ˈpoːlitsae"; *) +(* "ˈmuːzɔøm"; *) +(* "ˈbʏçɐ̯ae"; *) +(* "mediˈt͡siːn"; *) +(* "ˈpɾoːfɛszoɾ"; *) +(* "ʔeleˈfɑnt"; *) +(* "dokumɛnt"; *) +(* "ˈʔɪntɛɾnət"; *) +(* "ˈʔʊnʔivɛɾziˈtɛːt"; *) +(* "ˈkaɾtɔffɛln"; *) +(* "ˈmatemɑtik"; *) +(* "gəˈbʊɾtstɑk"; *) +(* "ˈvœʁtɐˌbuːx"; *) +(* "ˈbɪbli̯oːtək"; *) +(* "demɔkɾɑˈtiːjə"; *) +(* "fotɔgɾɑˈfiːjə"; *) +(* "tɛçnoloˈgiːjə"; *) +(* "bioloˈgiːjə"; *) +(* "psʏçoloˈgiːjə"; *) +(* "filozɔfiːjə"; *) +(* "ˈʃoːkolɑdə"; *) +(* "ˈmaɾmelɑdə"; *) +(* "ˈzɛkɾetɛɾɪn"; *) +(* "ʔɛntˈʃʊldɪgʊŋ"; *) +(* "ˈkɾaŋkɛnvɛɾzɪçɐ̯ʊŋ"; *) +(* "gəˈbʊɾtstagspaɾty"; *) +(* "kɔmmunikɑˈtsĭoːn"; *) +(* "ʔɔɾgɑnizɑˈtsĭoːn"; *) +(* "bʏɾgɛɾˈməɪstɐ̯"; *) +(* "zeɛnsvʏɾdɪkˈkəiːt"; *) +(* "ˈmiːneɾalvaszɐ̯"; *) +(* "ˈtsuːzammenaɾˈbəiːt"; *) +(* "mœglɪçˈkəiːtən"; *) +(* "gəlegɛnˈəiːtən"; *) +(* "naxˈmɪtˌtɑːk"; *) +(* "ʔɪnfɔɾmɑˈtsĭoːn"; *) +(* "televiˈzĭoːn"; *) +(* "gəʃvɪndɪkkaetsbɛgɾentsʊŋ"; *) +(* "ˈkɾaŋkɛnaozaofɛntalt"; *) +(* "ʔaɾbaetslozɪkˈkəiːt"; *) +(* "fɛːɐ̯ˈʔantvɔɾtlɪçˈkəiːt"; *) +(* "zeɛnsvʏɾdɪkˈkəiːtən"; *) +(* "ˈzɛlpstvɛɾstɛntlɪç"; *) +(* "ˈvaenaxtsgɛʃɛŋkə"; *) +(* "gəˈbʊɾtstaksgɛʃɛŋk"; *) +(* "tuɾɪstenɪnfɔɾmɑˈtsĭoːn"; *) +(* "ˈʔʊnˌʔiːvɛɾzitɛtspɾofɛszoɾ"; *) +(* "ˈleːbɛnsmɪtɛlgɛʃɛft"; *) +(* "ˈfɑɾɾatvɛɾlae"; *) +(* "ˈbʊndɛstakzapgeɔɾdnətɐ̯"; *) +(* "ˈʃtɾaeçɔltsʃɛçtɛlçən"; *) +(* "ˈfɾɔøntʃaftsbetsiːʊŋən"; *) +(* "ˈɾɛçtsʃʊtsvɛɾzɪçɐ̯ʊŋ"; *) +(* "nɑɾʊŋsmɪtelʊnvɛɾtɾɛglɪçˈkəiːt"; *) +(* ] *) + +(* let en1 = *) +(* [ *) +(* "ˈæpəɫ"; *) +(* "ˈðɪs"; *) +(* "ˈɪs"; *) +(* "ˈeɪ"; *) +(* "ɫɪŋˈɡwɪstɪks"; *) +(* "kəˈtæstɹəfi"; *) +(* "wɪˈθaʊt"; *) +(* "ˈpɹɛsədənt"; *) +(* "ˈtu"; *) +(* "ədˈmɪt"; *) +(* "ˈænd"; *) +(* "ˈɪts"; *) +(* "ˌɪnstəˈɡeɪʃən"; *) +(* "ˈʃʊd"; *) +(* "ˈbi"; *) +(* "ˈpənɪʃt"; *) +(* ] *) + +let en2 = + [ + "d͡ʒɹ̩mən"; + "ˈpɹɛzənt"; + "ˈɑɹtɪkəɫ"; + "pɹəˈvaɪdz"; + "ˌɹiəˈnæɫəsəs"; + "kˈɔːɹɑːnəl"; + "kɝˈoʊnəɫ"; + "ˈɑptɪks"; + "ˈɛksəɫəns"; + "əbˈstɹuənts"; + "ˈdʒɝmən"; + "ˈɛŋɡɫɪʃ"; + "ˈkɑmənɫi"; + "əˈsumd"; + "ˈdʒɪps"; + "ˈpɫæstɝ"; + "ˈɛŋɡɫɪʃ"; + (* "ˈɫæps"; *) + "ˈɑɹɡjud"; + "bɪˈɫoʊ"; + "ˌɛkstɹəsɪˈɫæbɪk"; + "ˈkɑnsənənts"; + "ˌdɛɹəˈveɪʃənəɫ"; + "ˈsteɪdʒ"; + "ˌɛkstɹəsɪɫəˈbɪsɪti"; + "ˈaɪðɝ"; + "ˈɫæŋɡwɪdʒɪz"; + "ɪɡˈzɪsts"; + "ˈɛvədəns"; + "ˈkɑmənɫi"; + "pɹɪˈzɛntɪd"; + "səˈpɔɹt"; + "ˌɛkstɹəsɪˈɫæbɪk"; + "ˈkɑnsənənts"; + "kəmˈpætəbəɫ"; + "ˈfʊɫi"; + "ˈsɝfəs"; + "ˌɑptəˈmæɫəti"; + "ˌθiɝˈɛtɪk"; + "ˈtɹitmənt"; + "kənˈstɹeɪnts"; + "ɹɪˈfɝɪŋ"; + "ˈfʊɫi"; + "səˈɫæbəˌfaɪd"; + "ˈaʊtˌpʊt"; + "ˌɹɛpɹəzɛnˈteɪʃənz"; + "pɹəˈpoʊzd"; + ] + +(* let zh = [ "/t͡ɕi⁵¹ ti⁵¹ pʰi⁵¹/" ] *) + +(* let ws = *) +(* [ *) +(* ( "/nuˌmɑ.noʊ.ʌl.tɹə.maɪ.kɹoʊˈskɑ.pɪkˌsɪ.lɪ.koʊ.vɑl.keɪ.noʊ.koʊ.niˈoʊ.sɪs/", *) +(* "/əˈbæn.dn̩.əd.li/", *) +(* " /əˈbæn.dn̩.əd.li/", *) +(* "/əˈbæn.dn̩.mn̩t/", *) +(* "/-ˌbiːə-/", *) +(* "/əˈbluː.ʃn̩/xx" ); *) +(* ] *) + +let get_data = + Printf.printf "Getting data\n"; + let data_dir = + if Sys.file_exists "./data" then "./data" + else if Sys.file_exists "../data" then "../data" + else if Sys.file_exists "../../../data" then "../../../data" + else ( + Printf.eprintf "Current directory: %s\n" (Sys.getcwd ()); + failwith "Cannot find data directory") + in + Ipa_table.load_csv data_dir + +let test_fts table = + let result = Ipa_table.fts table "s" in + match result with + | None -> () + | Some seg -> Printf.printf "fts\n %s\n" (Feature.string_of_segment seg) +(* let expected = None in *) +(* let results = Ipa_table.fts *) +(* assert (result = expected); *) +(* Printf.printf "test_fts: PASSED\n" *) + +let test_segs data = + let words = en2 in + (* let expected = [] in *) + let _results = + Base.List.map words ~f:(fun word -> + let res = Ipa_table.ipa_segs data word in + Printf.printf "%s\n" word; + let xl = + Base.List.fold res ~init:"" ~f:(fun acc char -> + Printf.sprintf "%s-%s" acc char) + in + Printf.printf "%s\n" xl; + res) + in + (* Base.List.iter results ~f:(fun x -> *) + (* let xl = *) + (* Base.List.fold x ~init:"" ~f:(fun acc char -> *) + (* Printf.sprintf "%s-%s" acc char) *) + (* in *) + (* Printf.printf "%s\n" xl) *) + (* assert (result = expected) *) + Printf.printf "test_fts: PASSED\n" + +(** Run all tests *) +let () = + let data = get_data in + Printf.printf "Running IPA Table module tests...\n"; + Printf.printf "===================================\n"; + test_fts data.table; + test_segs data; + Printf.printf "===================================\n"; + Printf.printf "All IPA Table tests passed!\n" -- cgit v1.2.3