open Sorsyl (* let es = *) (* String.split_on_char ' ' *) (* "la ˌinteɾnˌaθjonˌaliθaθjˈon del kˌoɾaθˈon i el ðˌikθjonˈaɾjo" *) (* let de = *) (* [ *) (* "kɔmˈpjuːtɐ"; *) (* "teleˈfoːn"; *) (* "fɑˈmiːliːjə"; *) (* "ˈʔɑːpɔtekə"; *) (* "ˈʃoːkolɑdə"; *) (* "ˈtoːmɑtən"; *) (* "ˈbananə"; *) (* "ˈpoːlitsae"; *) (* "ˈmuːzɔøm"; *) (* "ˈbʏçɐ̯ae"; *) (* "mediˈt͡siːn"; *) (* "ˈpɾoːfɛszoɾ"; *) (* "ʔeleˈfɑnt"; *) (* "dokumɛnt"; *) (* "ˈʔɪntɛɾnət"; *) (* "ˈʔʊnʔivɛɾziˈtɛːt"; *) (* "ˈkaɾtɔffɛln"; *) (* "ˈmatemɑtik"; *) (* "gəˈbʊɾtstɑk"; *) (* "ˈvœʁtɐˌbuːx"; *) (* "ˈbɪbli̯oːtək"; *) (* "demɔkɾɑˈtiːjə"; *) (* "fotɔgɾɑˈfiːjə"; *) (* "tɛçnoloˈgiːjə"; *) (* "bioloˈgiːjə"; *) (* "psʏçoloˈgiːjə"; *) (* "filozɔfiːjə"; *) (* "ˈʃoːkolɑdə"; *) (* "ˈmaɾmelɑdə"; *) (* "ˈzɛkɾetɛɾɪn"; *) (* "ʔɛntˈʃʊldɪgʊŋ"; *) (* "ˈkɾaŋkɛnvɛɾzɪçɐ̯ʊŋ"; *) (* "gəˈbʊɾtstagspaɾty"; *) (* "kɔmmunikɑˈtsĭoːn"; *) (* "ʔɔɾgɑnizɑˈtsĭoːn"; *) (* "bʏɾgɛɾˈməɪstɐ̯"; *) (* "zeɛnsvʏɾdɪkˈkəiːt"; *) (* "ˈmiːneɾalvaszɐ̯"; *) (* "ˈtsuːzammenaɾˈbəiːt"; *) (* "mœglɪçˈkəiːtən"; *) (* "gəlegɛnˈəiːtən"; *) (* "naxˈmɪtˌtɑːk"; *) (* "ʔɪnfɔɾmɑˈtsĭoːn"; *) (* "televiˈzĭoːn"; *) (* "gəʃvɪndɪkkaetsbɛgɾentsʊŋ"; *) (* "ˈkɾaŋkɛnaozaofɛntalt"; *) (* "ʔaɾbaetslozɪkˈkəiːt"; *) (* "fɛːɐ̯ˈʔantvɔɾtlɪçˈkəiːt"; *) (* "zeɛnsvʏɾdɪkˈkəiːtən"; *) (* "ˈzɛlpstvɛɾstɛntlɪç"; *) (* "ˈvaenaxtsgɛʃɛŋkə"; *) (* "gəˈbʊɾtstaksgɛʃɛŋk"; *) (* "tuɾɪstenɪnfɔɾmɑˈtsĭoːn"; *) (* "ˈʔʊnˌʔiːvɛɾzitɛtspɾofɛszoɾ"; *) (* "ˈleːbɛnsmɪtɛlgɛʃɛft"; *) (* "ˈfɑɾɾatvɛɾlae"; *) (* "ˈbʊndɛstakzapgeɔɾdnətɐ̯"; *) (* "ˈʃtɾaeçɔltsʃɛçtɛlçən"; *) (* "ˈfɾɔøntʃaftsbetsiːʊŋən"; *) (* "ˈɾɛçtsʃʊtsvɛɾzɪçɐ̯ʊŋ"; *) (* "nɑɾʊŋsmɪtelʊnvɛɾtɾɛglɪçˈkəiːt"; *) (* ] *) (* let en1 = *) (* [ *) (* "ˈæpəɫ"; *) (* "ˈðɪs"; *) (* "ˈɪs"; *) (* "ˈeɪ"; *) (* "ɫɪŋˈɡwɪstɪks"; *) (* "kəˈtæstɹəfi"; *) (* "wɪˈθaʊt"; *) (* "ˈpɹɛsədənt"; *) (* "ˈtu"; *) (* "ədˈmɪt"; *) (* "ˈænd"; *) (* "ˈɪts"; *) (* "ˌɪnstəˈɡeɪʃən"; *) (* "ˈʃʊd"; *) (* "ˈbi"; *) (* "ˈpənɪʃt"; *) (* ] *) let en2 = [ "d͡ʒɹ̩mən"; "ˈpɹɛzənt"; "ˈɑɹtɪkəɫ"; "pɹəˈvaɪdz"; "ˌɹiəˈnæɫəsəs"; "kˈɔːɹɑːnəl"; "kɝˈoʊnəɫ"; "ˈɑptɪks"; "ˈɛksəɫəns"; "əbˈstɹuənts"; "ˈdʒɝmən"; "ˈɛŋɡɫɪʃ"; "ˈkɑmənɫi"; "əˈsumd"; "ˈdʒɪps"; "ˈpɫæstɝ"; "ˈɛŋɡɫɪʃ"; (* "ˈɫæps"; *) "ˈɑɹɡjud"; "bɪˈɫoʊ"; "ˌɛkstɹəsɪˈɫæbɪk"; "ˈkɑnsənənts"; "ˌdɛɹəˈveɪʃənəɫ"; "ˈsteɪdʒ"; "ˌɛkstɹəsɪɫəˈbɪsɪti"; "ˈaɪðɝ"; "ˈɫæŋɡwɪdʒɪz"; "ɪɡˈzɪsts"; "ˈɛvədəns"; "ˈkɑmənɫi"; "pɹɪˈzɛntɪd"; "səˈpɔɹt"; "ˌɛkstɹəsɪˈɫæbɪk"; "ˈkɑnsənənts"; "kəmˈpætəbəɫ"; "ˈfʊɫi"; "ˈsɝfəs"; "ˌɑptəˈmæɫəti"; "ˌθiɝˈɛtɪk"; "ˈtɹitmənt"; "kənˈstɹeɪnts"; "ɹɪˈfɝɪŋ"; "ˈfʊɫi"; "səˈɫæbəˌfaɪd"; "ˈaʊtˌpʊt"; "ˌɹɛpɹəzɛnˈteɪʃənz"; "pɹəˈpoʊzd"; ] (* let zh = [ "/t͡ɕi⁵¹ ti⁵¹ pʰi⁵¹/" ] *) (* let ws = *) (* [ *) (* ( "/nuˌmɑ.noʊ.ʌl.tɹə.maɪ.kɹoʊˈskɑ.pɪkˌsɪ.lɪ.koʊ.vɑl.keɪ.noʊ.koʊ.niˈoʊ.sɪs/", *) (* "/əˈbæn.dn̩.əd.li/", *) (* " /əˈbæn.dn̩.əd.li/", *) (* "/əˈbæn.dn̩.mn̩t/", *) (* "/-ˌbiːə-/", *) (* "/əˈbluː.ʃn̩/xx" ); *) (* ] *) let get_data = Printf.printf "Getting data\n"; let data_dir = if Sys.file_exists "./data" then "./data" else if Sys.file_exists "../data" then "../data" else if Sys.file_exists "../../../data" then "../../../data" else ( Printf.eprintf "Current directory: %s\n" (Sys.getcwd ()); failwith "Cannot find data directory") in Ipa_table.load_csv data_dir let test_fts table = let result = Ipa_table.fts table "s" in match result with | None -> () | Some seg -> Printf.printf "fts\n %s\n" (Feature.string_of_segment seg) (* let expected = None in *) (* let results = Ipa_table.fts *) (* assert (result = expected); *) (* Printf.printf "test_fts: PASSED\n" *) let test_segs data = let words = en2 in (* let expected = [] in *) let _results = Base.List.map words ~f:(fun word -> let res = Ipa_table.ipa_segs data word in Printf.printf "%s\n" word; let xl = Base.List.fold res ~init:"" ~f:(fun acc char -> Printf.sprintf "%s-%s" acc char) in Printf.printf "%s\n" xl; res) in (* Base.List.iter results ~f:(fun x -> *) (* let xl = *) (* Base.List.fold x ~init:"" ~f:(fun acc char -> *) (* Printf.sprintf "%s-%s" acc char) *) (* in *) (* Printf.printf "%s\n" xl) *) (* assert (result = expected) *) Printf.printf "test_fts: PASSED\n" (** Run all tests *) let () = let data = get_data in Printf.printf "Running IPA Table module tests...\n"; Printf.printf "===================================\n"; test_fts data.table; test_segs data; Printf.printf "===================================\n"; Printf.printf "All IPA Table tests passed!\n"