diff options
Diffstat (limited to 'packages/prosody-ui/src/logic/utils.ts')
| -rw-r--r-- | packages/prosody-ui/src/logic/utils.ts | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/packages/prosody-ui/src/logic/utils.ts b/packages/prosody-ui/src/logic/utils.ts new file mode 100644 index 0000000..737a6ec --- /dev/null +++ b/packages/prosody-ui/src/logic/utils.ts @@ -0,0 +1,66 @@ +import type { Result } from "sortug"; + +export function detectScript(text: string): Result<string> { + const scripts = { + Latin: /[\u0000-\u007F\u00A0-\u00FF\u0100-\u017F\u0180-\u024F]/g, + Cyrillic: /[\u0400-\u04FF\u0500-\u052F\u2DE0-\u2DFF\uA640-\uA69F]/g, + Greek: /[\u0370-\u03FF\u1F00-\u1FFF]/g, + Hebrew: /[\u0590-\u05FF]/g, + Arabic: /[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF]/g, + Devanagari: /[\u0900-\u097F]/g, // Hindi, Sanskrit, etc. + Bengali: /[\u0980-\u09FF]/g, + Thai: /[\u0E00-\u0E7F]/g, + Chinese: + /[\u4E00-\u9FFF\u3400-\u4DBF\u20000-\u2A6DF\u2A700-\u2B73F\u2B740-\u2B81F]/g, + Japanese: /[\u3040-\u309F\u30A0-\u30FF\uFF00-\uFFEF\u4E00-\u9FAF]/g, // Includes Hiragana, Katakana + Korean: /[\uAC00-\uD7AF\u1100-\u11FF\u3130-\u318F]/g, // Includes Hangul + Armenian: /[\u0530-\u058F]/g, + Georgian: /[\u10A0-\u10FF]/g, + Khmer: /[\u1780-\u17FF]/g, // Cambodian + Myanmar: /[\u1000-\u109F]/g, // Burmese + Tamil: /[\u0B80-\u0BFF]/g, + Telugu: /[\u0C00-\u0C7F]/g, + Amharic: /[\u1200-\u137F]/g, // Ethiopian + }; + const counts: Record<string, number> = {}; + + for (const [scriptName, regex] of Object.entries(scripts)) { + // Create an array of matches and count its length + const matches = text.match(regex) || []; + counts[scriptName] = matches.length; + } + + let maxCount = 0; + let dominantScript = "Unknown"; + + for (const [scriptName, count] of Object.entries(counts)) { + if (count > maxCount) { + maxCount = count; + dominantScript = scriptName; + } + } + if (dominantScript === "Unknown") return { error: "Not detected" }; + else return { ok: dominantScript }; +} + +export function langFromScript(script: string): Result<string> { + if (script === "Thai") return { ok: "th" }; + if (script === "Japanese") return { ok: "ja" }; + if (script === "Chinese") return { ok: "zh" }; + if (script === "Korean") return { ok: "ko" }; + else return { error: "too generic" }; +} +export function scriptFromLang(lang: string, text: string): string { + if (lang == "th") return "Thai"; + if (lang == "tha") return "Thai"; + if (lang == "en") return "Engl"; + if (lang == "es") return "Span"; + if (lang == "cn") return "Hant"; + if (lang == "zh") return "Hant"; + if (lang == "ja") return "Japn"; + else { + const res = detectScript(text); + if ("ok" in res) return res.ok; + else return ""; + } +} |
