summaryrefslogtreecommitdiff
path: root/packages/prosody-ui/src/logic/utils.ts
diff options
context:
space:
mode:
Diffstat (limited to 'packages/prosody-ui/src/logic/utils.ts')
-rw-r--r--packages/prosody-ui/src/logic/utils.ts66
1 files changed, 66 insertions, 0 deletions
diff --git a/packages/prosody-ui/src/logic/utils.ts b/packages/prosody-ui/src/logic/utils.ts
new file mode 100644
index 0000000..737a6ec
--- /dev/null
+++ b/packages/prosody-ui/src/logic/utils.ts
@@ -0,0 +1,66 @@
+import type { Result } from "sortug";
+
+export function detectScript(text: string): Result<string> {
+ const scripts = {
+ Latin: /[\u0000-\u007F\u00A0-\u00FF\u0100-\u017F\u0180-\u024F]/g,
+ Cyrillic: /[\u0400-\u04FF\u0500-\u052F\u2DE0-\u2DFF\uA640-\uA69F]/g,
+ Greek: /[\u0370-\u03FF\u1F00-\u1FFF]/g,
+ Hebrew: /[\u0590-\u05FF]/g,
+ Arabic: /[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF]/g,
+ Devanagari: /[\u0900-\u097F]/g, // Hindi, Sanskrit, etc.
+ Bengali: /[\u0980-\u09FF]/g,
+ Thai: /[\u0E00-\u0E7F]/g,
+ Chinese:
+ /[\u4E00-\u9FFF\u3400-\u4DBF\u20000-\u2A6DF\u2A700-\u2B73F\u2B740-\u2B81F]/g,
+ Japanese: /[\u3040-\u309F\u30A0-\u30FF\uFF00-\uFFEF\u4E00-\u9FAF]/g, // Includes Hiragana, Katakana
+ Korean: /[\uAC00-\uD7AF\u1100-\u11FF\u3130-\u318F]/g, // Includes Hangul
+ Armenian: /[\u0530-\u058F]/g,
+ Georgian: /[\u10A0-\u10FF]/g,
+ Khmer: /[\u1780-\u17FF]/g, // Cambodian
+ Myanmar: /[\u1000-\u109F]/g, // Burmese
+ Tamil: /[\u0B80-\u0BFF]/g,
+ Telugu: /[\u0C00-\u0C7F]/g,
+ Amharic: /[\u1200-\u137F]/g, // Ethiopian
+ };
+ const counts: Record<string, number> = {};
+
+ for (const [scriptName, regex] of Object.entries(scripts)) {
+ // Create an array of matches and count its length
+ const matches = text.match(regex) || [];
+ counts[scriptName] = matches.length;
+ }
+
+ let maxCount = 0;
+ let dominantScript = "Unknown";
+
+ for (const [scriptName, count] of Object.entries(counts)) {
+ if (count > maxCount) {
+ maxCount = count;
+ dominantScript = scriptName;
+ }
+ }
+ if (dominantScript === "Unknown") return { error: "Not detected" };
+ else return { ok: dominantScript };
+}
+
+export function langFromScript(script: string): Result<string> {
+ if (script === "Thai") return { ok: "th" };
+ if (script === "Japanese") return { ok: "ja" };
+ if (script === "Chinese") return { ok: "zh" };
+ if (script === "Korean") return { ok: "ko" };
+ else return { error: "too generic" };
+}
+export function scriptFromLang(lang: string, text: string): string {
+ if (lang == "th") return "Thai";
+ if (lang == "tha") return "Thai";
+ if (lang == "en") return "Engl";
+ if (lang == "es") return "Span";
+ if (lang == "cn") return "Hant";
+ if (lang == "zh") return "Hant";
+ if (lang == "ja") return "Japn";
+ else {
+ const res = detectScript(text);
+ if ("ok" in res) return res.ok;
+ else return "";
+ }
+}