From fd86dc15734f3b7126d88f0130897c597100e30a Mon Sep 17 00:00:00 2001 From: polwex Date: Thu, 15 May 2025 20:32:25 +0700 Subject: m --- src/picker/App.tsx | 396 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 396 insertions(+) create mode 100644 src/picker/App.tsx (limited to 'src/picker/App.tsx') diff --git a/src/picker/App.tsx b/src/picker/App.tsx new file mode 100644 index 0000000..a17a006 --- /dev/null +++ b/src/picker/App.tsx @@ -0,0 +1,396 @@ +// +"use client"; + +import React, { useState, useCallback, useMemo, useEffect } from "react"; +import { + TextSelect, + Combine, + WholeWord, + Highlighter, + Atom, + Mic2, + CheckCircle2, + ExternalLink, + Brain, + Zap, +} from "lucide-react"; +import { NLP } from "sortug-ai"; + +// --- Granularity Definition --- +const GRANULARITY_LEVELS = [ + { id: "text", name: "Text", icon: TextSelect }, + { id: "paragraph", name: "Paragraph", icon: Combine }, + { id: "sentence", name: "Sentence", icon: Highlighter }, + { id: "clause", name: "Clause (Sentence Lvl)", icon: Highlighter }, + { id: "word", name: "Word/Token", icon: WholeWord }, + { id: "syllable", name: "Syllable (Word Lvl)", icon: Mic2 }, + { id: "phoneme", name: "Phoneme (Word Lvl)", icon: Atom }, +] as const; +type GranularityId = (typeof GRANULARITY_LEVELS)[number]["id"]; +type AnalysisEngine = "spacy" | "stanza"; + +// --- Sample Data (Simplified) --- + +interface Paragraph { + id: string; + text: string; + start_char: number; + end_char: number; + sentences: NLP.Spacy.Sentence[]; +} + +const segmentByParagraphs = ( + inputText: string, + allSentences: NLP.Spacy.Sentence[], +): Paragraph[] => { + const paragraphs: Paragraph[] = []; + const paraTexts = inputText.split(/\n\n+/); + let currentDocCharOffset = 0; + let sentenceIdx = 0; + + paraTexts.forEach((paraText, idx) => { + const paraStartChar = currentDocCharOffset; + const paraEndChar = paraStartChar + paraText.length; + const paraSentences: NLP.Spacy.Sentence[] = []; + + while (sentenceIdx < allSentences.length) { + const sent = allSentences[sentenceIdx]!; + if (sent.start < paraEndChar) { + paraSentences.push(sent); + sentenceIdx++; + } else { + break; + } + } + + paragraphs.push({ + id: `para-${idx}`, + text: paraText, + start_char: paraStartChar, + end_char: paraEndChar, + sentences: paraSentences, + }); + currentDocCharOffset = + paraEndChar + + (inputText.substring(paraEndChar).match(/^\n\n+/)?.[0].length || 0); + }); + return paragraphs; +}; + +// --- Granularity Menu --- +interface GranularityMenuProps { + selectedGranularity: GranularityId; + onSelectGranularity: (granularity: GranularityId) => void; +} +const GranularityMenu: React.FC = ({ + selectedGranularity, + onSelectGranularity, +}) => ( + +); + +// --- Text Viewer --- +interface TextViewerProps { + nlpData: NLP.Spacy.SpacyRes; + engine: AnalysisEngine; + granularity: GranularityId; + onElementSelect: ( + elementType: GranularityId, + elementData: any, + fullText: string, + ) => void; +} + +const TextViewer: React.FC = ({ + nlpData, + engine, + granularity, + onElementSelect, +}) => { + const paragraphs = useMemo( + () => segmentByParagraphs(nlpData.input, nlpData.segments), + [nlpData], + ); + + const getElementText = (element: any, fullInput: string): string => { + if (element.text) return element.text; // Already has text + if ("start_char" in element && "end_char" in element) { + // Stanza word/token/sentence/entity + return fullInput.substring(element.start_char, element.end_char); + } + if ("start" in element && "end" in element) { + // spaCy token/sentence/entity + return fullInput.substring(element.start, element.end); + } + return "N/A"; + }; + + const renderInteractiveSpan = ( + key: string | number, + text: string, + data: any, + type: GranularityId, + baseClasses: string = "", + hoverClasses: string = "hover:bg-yellow-200", + ) => ( + { + e.stopPropagation(); // Prevent clicks bubbling to parent elements + onElementSelect(type, data, getElementText(data, nlpData.input)); + }} + > + {text} + + ); + + return ( +
+ {granularity === "text" + ? renderInteractiveSpan( + "full-text", + nlpData.input, + nlpData, + "text", + "block", + "hover:bg-sky-100", + ) + : paragraphs.map((para) => ( +
{ + e.stopPropagation(); + onElementSelect("paragraph", para, para.text); + } + : undefined + } + style={granularity === "paragraph" ? { cursor: "pointer" } : {}} + > + {para.sentences.map((sent, sentIdx) => { + const sentenceText = getElementText(sent, nlpData.input); + const sentenceKey = `sent-${para.id}-${sentIdx}`; + + if (granularity === "sentence" || granularity === "clause") { + return renderInteractiveSpan( + sentenceKey, + sentenceText, + sent, + granularity, + "mr-1 inline-block bg-gray-100 shadow-xs", + "hover:bg-sky-200", + ); + } else if ( + granularity === "word" || + granularity === "syllable" || + granularity === "phoneme" + ) { + let currentWordRenderIndex = 0; // to add spaces correctly + return ( + + {" "} + {/* Sentence wrapper */} + {sent.words.map((word, idx) => { + const wordText = getElementText(word, nlpData.input); + const wordKey = `${sentenceKey}-tok-${idx}-word-${word}`; + const space = currentWordRenderIndex > 0 ? " " : ""; + currentWordRenderIndex++; + return ( + + {space} + {renderInteractiveSpan( + wordKey, + wordText, + word, + granularity, + "bg-gray-50", + "hover:bg-yellow-300", + )} + + ); + })} + + ); + } + // Fallback for paragraph view if no other granularity matches (should not happen if logic is correct) + return ( + + {sentenceText} + + ); + })} +
+ ))} +
+ ); +}; + +// --- Main Application Component --- +export default function NlpTextAnalysisScreen() { + const [selectedGranularity, setSelectedGranularity] = + useState("word"); + const [currentEngine, setCurrentEngine] = useState("stanza"); + const [selectedElementInfo, setSelectedElementInfo] = useState( + null, + ); + const [activeNlpData, setData] = useState(); + useEffect(() => { + // const nlpdata = sessionStorage.getItem( + // currentEngine === "spacy" ? "spacyres" : "stanzares", + // ); + // const activeNlpData = JSON.parse(nlpdata!); + }, []); + + const handleGranularityChange = useCallback((granularity: GranularityId) => { + setSelectedGranularity(granularity); + setSelectedElementInfo(null); + }, []); + + const handleElementSelect = useCallback( + (elementType: GranularityId, elementData: any, elementText: string) => { + let info = `Selected: ${elementType.toUpperCase()} (${currentEngine})\n`; + info += `Text: "${elementText}"\n`; + + if (elementType === "syllable" || elementType === "phoneme") { + info += `(Granularity: ${elementType}, showing parent Word/Token details)\n`; + } + + // Add specific details based on element type and engine + if (elementType === "word") { + if (currentEngine === "stanza" && elementData.lemma) { + // StanzaWord + info += `Lemma: ${elementData.lemma}\nUPOS: ${elementData.upos}\nXPOS: ${elementData.xpos}\nDepRel: ${elementData.deprel} (Head ID: ${elementData.head})\n`; + if (elementData.parentToken?.ner) + info += `NER (Token): ${elementData.parentToken.ner}\n`; + } else if (currentEngine === "spacy" && elementData.lemma_) { + // SpacyToken + info += `Lemma: ${elementData.lemma_}\nPOS: ${elementData.pos_}\nTag: ${elementData.tag_}\nDep: ${elementData.dep_} (Head ID: ${elementData.head?.i})\n`; + if (elementData.ent_type_) + info += `Entity: ${elementData.ent_type_} (${elementData.ent_iob_})\n`; + } + } else if (elementType === "sentence") { + if ( + currentEngine === "stanza" && + (elementData as NLP.Stanza.Sentence).sentiment + ) { + info += `Sentiment: ${(elementData as NLP.Stanza.Sentence).sentiment}\n`; + } + if ( + (elementData as NLP.Stanza.Sentence | NLP.Spacy.Sentence).entities + ?.length + ) { + info += `Entities in sentence: ${(elementData.entities as any[]).map((e) => `${e.text} (${e.type || e.label_})`).join(", ")}\n`; + } + } else if (elementType === "paragraph") { + info += `Char range: ${elementData.start_char}-${elementData.end_char}\n`; + info += `Sentence count: ${elementData.sentences.length}\n`; + } + + info += `Raw Data Keys: ${Object.keys(elementData).slice(0, 5).join(", ")}...`; // Show some keys + setSelectedElementInfo(info); + console.log( + "Selected Element:", + elementType, + elementData, + "Text:", + elementText, + ); + }, + [currentEngine], + ); + + const toggleEngine = () => { + setCurrentEngine((prev) => (prev === "spacy" ? "stanza" : "spacy")); + setSelectedElementInfo(null); + }; + + return ( +
+
+

+ NLP Text Analyzer +

+ +

+ Currently viewing with:{" "} + {currentEngine.toUpperCase()} +

+
+ +
+ + +
+ {" "} + {/* min-w-0 for flex child to prevent overflow */} + +
+
+ + +
+ ); +} -- cgit v1.2.3