summaryrefslogtreecommitdiff
path: root/src/nlp/nlp.ts
diff options
context:
space:
mode:
Diffstat (limited to 'src/nlp/nlp.ts')
-rw-r--r--src/nlp/nlp.ts208
1 files changed, 208 insertions, 0 deletions
diff --git a/src/nlp/nlp.ts b/src/nlp/nlp.ts
new file mode 100644
index 0000000..3b1e3a7
--- /dev/null
+++ b/src/nlp/nlp.ts
@@ -0,0 +1,208 @@
+export const isPunctuation = (text: string): boolean => {
+ // Common punctuation characters
+ const punctuationRegex = /^[.,;:!?()[\]{}'"«»""''…-]+$/;
+ return punctuationRegex.test(text);
+};
+
+// Get color for different syntactic categories
+export function getColorForType(type: string): string {
+ const colors: Record<string, string> = {
+ // Phrasal categories
+ S: "#6495ED", // Sentence - cornflower blue
+ NP: "#FF7F50", // Noun Phrase - coral
+ VP: "#32CD32", // Verb Phrase - lime green
+ PP: "#9370DB", // Prepositional Phrase - medium purple
+ ADJP: "#FFD700", // Adjective Phrase - gold
+ ADVP: "#FF69B4", // Adverb Phrase - hot pink
+
+ // Part-of-speech tags
+ NN: "#FFA07A", // Noun - light salmon
+ NNS: "#FFA07A", // Plural Noun - light salmon
+ NNP: "#FFA07A", // Proper Noun - light salmon
+ VB: "#90EE90", // Verb - light green
+ VBP: "#90EE90", // Present tense verb - light green
+ VBG: "#90EE90", // Gerund verb - light green
+ VBZ: "#90EE90", // 3rd person singular present verb - light green
+ VBD: "#90EE90", // Past tense verb - light green
+ VBN: "#90EE90", // Past participle verb - light green
+ JJ: "#F0E68C", // Adjective - khaki
+ RB: "#DDA0DD", // Adverb - plum
+ IN: "#87CEFA", // Preposition - light sky blue
+ DT: "#D3D3D3", // Determiner - light gray
+ PRP: "#D8BFD8", // Personal pronoun - thistle
+ CC: "#A9A9A9", // Coordinating conjunction - dark gray
+
+ // Default
+ ROOT: "#000000", // Root - black
+ LEAF: "#666666", // Leaf nodes - dark gray
+ };
+
+ return colors[type] || "#666666";
+}
+
+// Get a description for node types
+export function getDescription(type: string): string {
+ const descriptions: Record<string, string> = {
+ S: "Sentence",
+ SBAR: "Subordinating conjunction clause",
+ SBARQ: "Direct question",
+ SINV: "Declarative sentence with subject-aux inversion",
+ SQ: "Subconstituent of SBARQ excluding wh-word",
+ WHADVP: "wh-adverb phrase",
+ WHNP: "wh-nounphrase",
+ WHPP: "wh-prepositional phrase",
+ WDT: "wh-determiner",
+ WP: "wh-pronoun",
+ WRB: "wh-adverb",
+ WP$: "possesive wh-pronoun",
+ MD: "modal",
+ X: "Unknown",
+ NP: "Noun Phrase",
+ VP: "Verb Phrase",
+ PP: "Prepositional Phrase",
+ ADJP: "Adjective Phrase",
+ ADVP: "Adverb Phrase",
+ LS: "List item market",
+ SYM: "Symbol",
+ NN: "Noun",
+ NNS: "Plural Noun",
+ NNP: "Proper Noun",
+ NNPS: "Proper Noun, Plural",
+ VB: "Verb (base form)",
+ VBP: "Verb (present tense)",
+ VBG: "Verb (gerund/present participle)",
+ VBZ: "Verb (3rd person singular present)",
+ VBD: "Verb (past tense)",
+ VBN: "Verb (past participle)",
+ JJ: "Adjective",
+ JJR: "Adjective, comparative",
+ JJS: "Adjective, superlative",
+ EX: "Existential there",
+ RB: "Adverb",
+ RBR: "Adverb, comparative",
+ RBS: "Adverb, superlative",
+ RP: "Particle",
+ IN: "Preposition",
+ TO: "to",
+ DT: "Determiner",
+ PDT: "Predeterminer",
+ PRP: "Personal Pronoun",
+ PP$: "Possesive Pronoun",
+ PRP$: "Possesive Pronoun",
+ POS: "Possesive ending",
+ FW: "Foreign Word",
+ CC: "Coordinating Conjunction",
+ CD: "Cardinal number",
+ UH: "interjection",
+ ROOT: "Root Node",
+ CLR: "figurative motion",
+ FRAG: "fragment",
+ ":": "Colon/Semicolon",
+ ",": "Comma",
+ ".": "Period",
+ };
+
+ return descriptions[type] || type;
+}
+
+// https://universaldependencies.org/u/dep/xcomp.htmlexport
+
+export function unpackDeprel(type: string): string {
+ const descriptions: Record<string, string> = {
+ nsubj: "nominal subject",
+ obj: "object",
+ iobj: "indirect object",
+ csubj: "clausal subject",
+ ccomp: "clausal complement",
+ xcomp: "open clausal complement",
+ obl: "oblique nominal",
+ vocative: "vocative",
+ expl: "expletive",
+ dislocated: "dislocated",
+ nmod: "nominal modifier",
+ appos: "appositional modifier",
+ nummod: "numeric modifier",
+ advcl: "adverbial clause modifier",
+ acl: "admonimal clause",
+ advmod: "adverbial modifier",
+ discourse: "dicourse element",
+ aux: "auxiliary",
+ cop: "copula",
+ mark: "marker",
+ amod: "adjectival modifier",
+ det: "determiner",
+ clf: "classifier",
+ case: "case marker",
+ conj: "conjunction",
+ cc: "coordinating conjunction",
+ fixed: "fixed multiword expression",
+ flat: "flat expression",
+ list: "list",
+ parataxis: "parataxis",
+ compound: "compound",
+ orphan: "orphan",
+ goeswith: "goes with",
+ reparandum: "overriden disfluency",
+ punct: "punctuation",
+ root: "root",
+ dep: "unspecified dependency",
+ };
+ const res = descriptions[type];
+ if (!res) console.log("tag not found!!", type);
+
+ return res || type;
+}
+
+export function deprelColors(type: string): string {
+ const colors: Record<string, string> = {
+ // Phrasal categories
+ s: "#6495ED", // Sentence - cornflower blue
+ nsubj: "#6495ED", // Sentence - cornflower blue
+ root: "#FFD700", // Adjective Phrase - gold
+ p: "#FFD700", // Adjective Phrase - gold
+ NP: "#FF7F50", // Noun Phrase - coral
+ VP: "#32CD32", // Verb Phrase - lime green
+ PP: "#9370DB", // Prepositional Phrase - medium purple
+ ADVP: "#FF69B4", // Adverb Phrase - hot pink
+
+ // Part-of-speech tags
+ NN: "#FFA07A", // Noun - light salmon
+ NNS: "#FFA07A", // Plural Noun - light salmon
+ NNP: "#FFA07A", // Proper Noun - light salmon
+ VB: "#90EE90", // Verb - light green
+ VBP: "#90EE90", // Present tense verb - light green
+ VBG: "#90EE90", // Gerund verb - light green
+ VBZ: "#90EE90", // 3rd person singular present verb - light green
+ VBD: "#90EE90", // Past tense verb - light green
+ VBN: "#90EE90", // Past participle verb - light green
+ JJ: "#F0E68C", // Adjective - khaki
+ RB: "#DDA0DD", // Adverb - plum
+ IN: "#87CEFA", // Preposition - light sky blue
+ DT: "#D3D3D3", // Determiner - light gray
+ PRP: "#D8BFD8", // Personal pronoun - thistle
+ CC: "#A9A9A9", // Coordinating conjunction - dark gray
+
+ // Default
+ ROOT: "#000000", // Root - black
+ LEAF: "#666666", // Leaf nodes - dark gray
+ };
+
+ return colors[type] || "#666666";
+}
+export function unpackPos(pos: string): string {
+ const map: Record<string, string> = {
+ adj: "adjective",
+ adv: "adverb",
+ adv_phrase: "adverbial phrase",
+ combining_form: "combining form",
+ conj: "conjunction",
+ det: "determinant",
+ intj: "interjection",
+ num: "number",
+ prep: "preposition",
+ prep_phrase: "prepositional phrase",
+ pron: "pronoun",
+ punct: "punctuation",
+ };
+ return map[pos] || pos;
+}