diff options
Diffstat (limited to 'src/picker/TextViewer.tsx')
-rw-r--r-- | src/picker/TextViewer.tsx | 194 |
1 files changed, 194 insertions, 0 deletions
diff --git a/src/picker/TextViewer.tsx b/src/picker/TextViewer.tsx new file mode 100644 index 0000000..0b9115c --- /dev/null +++ b/src/picker/TextViewer.tsx @@ -0,0 +1,194 @@ +"use client"; + +import React, { useMemo } from "react"; +import { GranularityId } from "./LevelPicker"; +import { NLP } from "sortug-ai"; + +type AnalysisEngine = "spacy" | "stanza"; + +interface Paragraph { + id: string; + text: string; + start_char: number; + end_char: number; + sentences: NLP.Spacy.Sentence[]; +} + +const segmentByParagraphs = ( + inputText: string, + allSentences: NLP.Spacy.Sentence[], +): Paragraph[] => { + const paragraphs: Paragraph[] = []; + const paraTexts = inputText.split(/\n\n+/); + let currentDocCharOffset = 0; + let sentenceIdx = 0; + + paraTexts.forEach((paraText, idx) => { + const paraStartChar = currentDocCharOffset; + const paraEndChar = paraStartChar + paraText.length; + const paraSentences: NLP.Spacy.Sentence[] = []; + + while (sentenceIdx < allSentences.length) { + const sent = allSentences[sentenceIdx]!; + if (sent.start < paraEndChar) { + paraSentences.push(sent); + sentenceIdx++; + } else { + break; + } + } + + paragraphs.push({ + id: `para-${idx}`, + text: paraText, + start_char: paraStartChar, + end_char: paraEndChar, + sentences: paraSentences, + }); + currentDocCharOffset = + paraEndChar + + (inputText.substring(paraEndChar).match(/^\n\n+/)?.[0].length || 0); + }); + return paragraphs; +}; + +// --- Text Viewer --- +interface TextViewerProps { + nlpData: NLP.Spacy.SpacyRes; + engine: AnalysisEngine; + granularity: GranularityId; + onElementSelect: ( + elementType: GranularityId, + elementData: any, + fullText: string, + ) => void; +} + +const TextViewer: React.FC<TextViewerProps> = ({ + nlpData, + engine, + granularity, + onElementSelect, +}) => { + const paragraphs = useMemo( + () => segmentByParagraphs(nlpData.input, nlpData.segments), + [nlpData], + ); + + const getElementText = (element: any, fullInput: string): string => { + if (element.text) return element.text; // Already has text + if ("start_char" in element && "end_char" in element) { + // Stanza word/token/sentence/entity + return fullInput.substring(element.start_char, element.end_char); + } + if ("start" in element && "end" in element) { + // spaCy token/sentence/entity + return fullInput.substring(element.start, element.end); + } + return "N/A"; + }; + + const renderInteractiveSpan = ( + key: string | number, + text: string, + data: any, + type: GranularityId, + baseClasses: string = "", + hoverClasses: string = "hover:bg-yellow-200", + ) => ( + <span + key={key} + className={`cursor-pointer transition-colors duration-150 ${baseClasses} ${hoverClasses} p-0.5 rounded`} + onClick={(e) => { + e.stopPropagation(); // Prevent clicks bubbling to parent elements + onElementSelect(type, data, getElementText(data, nlpData.input)); + }} + > + {text} + </span> + ); + + return ( + <div className="text-lg text-gray-800 leading-relaxed bg-white p-4 sm:p-6 rounded-xl shadow-inner"> + {granularity === "text" + ? renderInteractiveSpan( + "full-text", + nlpData.input, + nlpData, + "text", + "block", + "hover:bg-sky-100", + ) + : paragraphs.map((para) => ( + <div + key={para.id} + className={`mb-4 ${granularity === "paragraph" ? "p-2 rounded-md shadow-sm bg-gray-50" : ""}`} + onClick={ + granularity === "paragraph" + ? (e) => { + e.stopPropagation(); + onElementSelect("paragraph", para, para.text); + } + : undefined + } + style={granularity === "paragraph" ? { cursor: "pointer" } : {}} + > + {para.sentences.map((sent, sentIdx) => { + const sentenceText = getElementText(sent, nlpData.input); + const sentenceKey = `sent-${para.id}-${sentIdx}`; + + if (granularity === "sentence" || granularity === "clause") { + return renderInteractiveSpan( + sentenceKey, + sentenceText, + sent, + granularity, + "mr-1 inline-block bg-gray-100 shadow-xs", + "hover:bg-sky-200", + ); + } else if ( + granularity === "word" || + granularity === "syllable" || + granularity === "phoneme" + ) { + let currentWordRenderIndex = 0; // to add spaces correctly + return ( + <span key={sentenceKey} className="mr-1"> + {" "} + {/* Sentence wrapper */} + {sent.words.map((word, idx) => { + const wordText = getElementText(word, nlpData.input); + const wordKey = `${sentenceKey}-tok-${idx}-word-${word}`; + const space = currentWordRenderIndex > 0 ? " " : ""; + currentWordRenderIndex++; + return ( + <React.Fragment key={wordKey}> + {space} + {renderInteractiveSpan( + wordKey, + wordText, + word, + granularity, + "bg-gray-50", + "hover:bg-yellow-300", + )} + </React.Fragment> + ); + })} + </span> + ); + } + // Fallback for paragraph view if no other granularity matches (should not happen if logic is correct) + return ( + <span key={sentenceKey} className="mr-1"> + {sentenceText} + </span> + ); + })} + </div> + ))} + </div> + ); +}; + +export default TextViewer; |