From 42dd99bfac9777a4ecc6700b87edf26a5c984de6 Mon Sep 17 00:00:00 2001 From: polwex Date: Wed, 23 Jul 2025 02:37:15 +0700 Subject: checkpoint --- src/nlp/nlp.ts | 208 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 208 insertions(+) create mode 100644 src/nlp/nlp.ts (limited to 'src/nlp/nlp.ts') diff --git a/src/nlp/nlp.ts b/src/nlp/nlp.ts new file mode 100644 index 0000000..3b1e3a7 --- /dev/null +++ b/src/nlp/nlp.ts @@ -0,0 +1,208 @@ +export const isPunctuation = (text: string): boolean => { + // Common punctuation characters + const punctuationRegex = /^[.,;:!?()[\]{}'"«»""''…-]+$/; + return punctuationRegex.test(text); +}; + +// Get color for different syntactic categories +export function getColorForType(type: string): string { + const colors: Record = { + // Phrasal categories + S: "#6495ED", // Sentence - cornflower blue + NP: "#FF7F50", // Noun Phrase - coral + VP: "#32CD32", // Verb Phrase - lime green + PP: "#9370DB", // Prepositional Phrase - medium purple + ADJP: "#FFD700", // Adjective Phrase - gold + ADVP: "#FF69B4", // Adverb Phrase - hot pink + + // Part-of-speech tags + NN: "#FFA07A", // Noun - light salmon + NNS: "#FFA07A", // Plural Noun - light salmon + NNP: "#FFA07A", // Proper Noun - light salmon + VB: "#90EE90", // Verb - light green + VBP: "#90EE90", // Present tense verb - light green + VBG: "#90EE90", // Gerund verb - light green + VBZ: "#90EE90", // 3rd person singular present verb - light green + VBD: "#90EE90", // Past tense verb - light green + VBN: "#90EE90", // Past participle verb - light green + JJ: "#F0E68C", // Adjective - khaki + RB: "#DDA0DD", // Adverb - plum + IN: "#87CEFA", // Preposition - light sky blue + DT: "#D3D3D3", // Determiner - light gray + PRP: "#D8BFD8", // Personal pronoun - thistle + CC: "#A9A9A9", // Coordinating conjunction - dark gray + + // Default + ROOT: "#000000", // Root - black + LEAF: "#666666", // Leaf nodes - dark gray + }; + + return colors[type] || "#666666"; +} + +// Get a description for node types +export function getDescription(type: string): string { + const descriptions: Record = { + S: "Sentence", + SBAR: "Subordinating conjunction clause", + SBARQ: "Direct question", + SINV: "Declarative sentence with subject-aux inversion", + SQ: "Subconstituent of SBARQ excluding wh-word", + WHADVP: "wh-adverb phrase", + WHNP: "wh-nounphrase", + WHPP: "wh-prepositional phrase", + WDT: "wh-determiner", + WP: "wh-pronoun", + WRB: "wh-adverb", + WP$: "possesive wh-pronoun", + MD: "modal", + X: "Unknown", + NP: "Noun Phrase", + VP: "Verb Phrase", + PP: "Prepositional Phrase", + ADJP: "Adjective Phrase", + ADVP: "Adverb Phrase", + LS: "List item market", + SYM: "Symbol", + NN: "Noun", + NNS: "Plural Noun", + NNP: "Proper Noun", + NNPS: "Proper Noun, Plural", + VB: "Verb (base form)", + VBP: "Verb (present tense)", + VBG: "Verb (gerund/present participle)", + VBZ: "Verb (3rd person singular present)", + VBD: "Verb (past tense)", + VBN: "Verb (past participle)", + JJ: "Adjective", + JJR: "Adjective, comparative", + JJS: "Adjective, superlative", + EX: "Existential there", + RB: "Adverb", + RBR: "Adverb, comparative", + RBS: "Adverb, superlative", + RP: "Particle", + IN: "Preposition", + TO: "to", + DT: "Determiner", + PDT: "Predeterminer", + PRP: "Personal Pronoun", + PP$: "Possesive Pronoun", + PRP$: "Possesive Pronoun", + POS: "Possesive ending", + FW: "Foreign Word", + CC: "Coordinating Conjunction", + CD: "Cardinal number", + UH: "interjection", + ROOT: "Root Node", + CLR: "figurative motion", + FRAG: "fragment", + ":": "Colon/Semicolon", + ",": "Comma", + ".": "Period", + }; + + return descriptions[type] || type; +} + +// https://universaldependencies.org/u/dep/xcomp.htmlexport + +export function unpackDeprel(type: string): string { + const descriptions: Record = { + nsubj: "nominal subject", + obj: "object", + iobj: "indirect object", + csubj: "clausal subject", + ccomp: "clausal complement", + xcomp: "open clausal complement", + obl: "oblique nominal", + vocative: "vocative", + expl: "expletive", + dislocated: "dislocated", + nmod: "nominal modifier", + appos: "appositional modifier", + nummod: "numeric modifier", + advcl: "adverbial clause modifier", + acl: "admonimal clause", + advmod: "adverbial modifier", + discourse: "dicourse element", + aux: "auxiliary", + cop: "copula", + mark: "marker", + amod: "adjectival modifier", + det: "determiner", + clf: "classifier", + case: "case marker", + conj: "conjunction", + cc: "coordinating conjunction", + fixed: "fixed multiword expression", + flat: "flat expression", + list: "list", + parataxis: "parataxis", + compound: "compound", + orphan: "orphan", + goeswith: "goes with", + reparandum: "overriden disfluency", + punct: "punctuation", + root: "root", + dep: "unspecified dependency", + }; + const res = descriptions[type]; + if (!res) console.log("tag not found!!", type); + + return res || type; +} + +export function deprelColors(type: string): string { + const colors: Record = { + // Phrasal categories + s: "#6495ED", // Sentence - cornflower blue + nsubj: "#6495ED", // Sentence - cornflower blue + root: "#FFD700", // Adjective Phrase - gold + p: "#FFD700", // Adjective Phrase - gold + NP: "#FF7F50", // Noun Phrase - coral + VP: "#32CD32", // Verb Phrase - lime green + PP: "#9370DB", // Prepositional Phrase - medium purple + ADVP: "#FF69B4", // Adverb Phrase - hot pink + + // Part-of-speech tags + NN: "#FFA07A", // Noun - light salmon + NNS: "#FFA07A", // Plural Noun - light salmon + NNP: "#FFA07A", // Proper Noun - light salmon + VB: "#90EE90", // Verb - light green + VBP: "#90EE90", // Present tense verb - light green + VBG: "#90EE90", // Gerund verb - light green + VBZ: "#90EE90", // 3rd person singular present verb - light green + VBD: "#90EE90", // Past tense verb - light green + VBN: "#90EE90", // Past participle verb - light green + JJ: "#F0E68C", // Adjective - khaki + RB: "#DDA0DD", // Adverb - plum + IN: "#87CEFA", // Preposition - light sky blue + DT: "#D3D3D3", // Determiner - light gray + PRP: "#D8BFD8", // Personal pronoun - thistle + CC: "#A9A9A9", // Coordinating conjunction - dark gray + + // Default + ROOT: "#000000", // Root - black + LEAF: "#666666", // Leaf nodes - dark gray + }; + + return colors[type] || "#666666"; +} +export function unpackPos(pos: string): string { + const map: Record = { + adj: "adjective", + adv: "adverb", + adv_phrase: "adverbial phrase", + combining_form: "combining form", + conj: "conjunction", + det: "determinant", + intj: "interjection", + num: "number", + prep: "preposition", + prep_phrase: "prepositional phrase", + pron: "pronoun", + punct: "punctuation", + }; + return map[pos] || pos; +} -- cgit v1.2.3