import type { Result } from "@sortug/lib"; export function detectScript(text: string): Result { const scripts = { Latin: /[\u0000-\u007F\u00A0-\u00FF\u0100-\u017F\u0180-\u024F]/g, Cyrillic: /[\u0400-\u04FF\u0500-\u052F\u2DE0-\u2DFF\uA640-\uA69F]/g, Greek: /[\u0370-\u03FF\u1F00-\u1FFF]/g, Hebrew: /[\u0590-\u05FF]/g, Arabic: /[\u0600-\u06FF\u0750-\u077F\u08A0-\u08FF]/g, Devanagari: /[\u0900-\u097F]/g, // Hindi, Sanskrit, etc. Bengali: /[\u0980-\u09FF]/g, Thai: /[\u0E00-\u0E7F]/g, Chinese: /[\u4E00-\u9FFF\u3400-\u4DBF\u20000-\u2A6DF\u2A700-\u2B73F\u2B740-\u2B81F]/g, Japanese: /[\u3040-\u309F\u30A0-\u30FF\uFF00-\uFFEF\u4E00-\u9FAF]/g, // Includes Hiragana, Katakana Korean: /[\uAC00-\uD7AF\u1100-\u11FF\u3130-\u318F]/g, // Includes Hangul Armenian: /[\u0530-\u058F]/g, Georgian: /[\u10A0-\u10FF]/g, Khmer: /[\u1780-\u17FF]/g, // Cambodian Myanmar: /[\u1000-\u109F]/g, // Burmese Tamil: /[\u0B80-\u0BFF]/g, Telugu: /[\u0C00-\u0C7F]/g, Amharic: /[\u1200-\u137F]/g, // Ethiopian }; const counts: Record = {}; for (const [scriptName, regex] of Object.entries(scripts)) { // Create an array of matches and count its length const matches = text.match(regex) || []; counts[scriptName] = matches.length; } let maxCount = 0; let dominantScript = "Unknown"; for (const [scriptName, count] of Object.entries(counts)) { if (count > maxCount) { maxCount = count; dominantScript = scriptName; } } if (dominantScript === "Unknown") return { error: "Not detected" }; else return { ok: dominantScript }; } export function langFromScript(script: string): Result { if (script === "Thai") return { ok: "th" }; if (script === "Japanese") return { ok: "ja" }; if (script === "Chinese") return { ok: "zh" }; if (script === "Korean") return { ok: "ko" }; else return { error: "too generic" }; } export function scriptFromLang(lang: string, text: string): string { if (lang == "th") return "Thai"; if (lang == "tha") return "Thai"; if (lang == "en") return "Engl"; if (lang == "es") return "Span"; if (lang == "cn") return "Hant"; if (lang == "zh") return "Hant"; if (lang == "ja") return "Japn"; else { const res = detectScript(text); if ("ok" in res) return res.ok; else return ""; } }