diff options
Diffstat (limited to 'bs5/server/rsc/Markdown.ml')
-rw-r--r-- | bs5/server/rsc/Markdown.ml | 202 |
1 files changed, 202 insertions, 0 deletions
diff --git a/bs5/server/rsc/Markdown.ml b/bs5/server/rsc/Markdown.ml new file mode 100644 index 0000000..e8c8942 --- /dev/null +++ b/bs5/server/rsc/Markdown.ml @@ -0,0 +1,202 @@ +module List = struct + include List + let rec take lst n = + match (lst, n) with + | ([], _) -> [] + | (_, 0) -> [] + | (x :: xs, n) -> x :: take xs (n - 1) +end + +let convert_headings text = + text + |> Str.global_replace (Str.regexp "^#### \\(.*\\)$") "<h4>\\1</h4>" + |> Str.global_replace (Str.regexp "^### \\(.*\\)$") "<h3>\\1</h3>" + |> Str.global_replace (Str.regexp "^## \\(.*\\)$") "<h2>\\1</h2>" + |> Str.global_replace (Str.regexp "^# \\(.*\\)$") "<h1>\\1</h1>" + +let convert_emphasis text = + text + |> Str.global_replace + (Str.regexp "\\*\\*\\([^*]*\\)\\*\\*") + "<strong>\\1</strong>" + |> Str.global_replace + (Str.regexp "__\\([^_]*\\)__") + "<strong>\\1</strong>" + |> Str.global_replace (Str.regexp "\\*\\([^*]*\\)\\*") "<em>\\1</em>" + |> Str.global_replace (Str.regexp "_\\([^_]*\\)_") "<em>\\1</em>" + +let convert_code text = + text + |> Str.global_replace + (Str.regexp "```\\([^`]*\\)```") + "<pre><code>\\1</code></pre>" + |> Str.global_replace (Str.regexp "`\\([^`]*\\)`") "<code>\\1</code>" + +let convert_links text = + text + |> Str.global_replace + (Str.regexp "\\[\\([^]]*\\)\\](\\([^)]*\\))") + "<a href=\"\\2\">\\1</a>" + +let convert_lists text = + let lines = String.split_on_char '\n' text in + + let process_line line = + match line with + | line when Str.string_match (Str.regexp "^-\\s*\\(.*\\)$") line 0 -> + "<li>" ^ Str.matched_group 1 line ^ "</li>" + | line when Str.string_match (Str.regexp "^\\+\\s*\\(.*\\)$") line 0 -> + "<li>" ^ Str.matched_group 1 line ^ "</li>" + | line when Str.string_match (Str.regexp "^\\*\\s*\\(.*\\)$") line 0 -> + "<li>" ^ Str.matched_group 1 line ^ "</li>" + | line + when Str.string_match (Str.regexp "^\\d+\\.\\s*\\(.*\\)$") line 0 -> + "<li>" ^ Str.matched_group 1 line ^ "</li>" + | _ -> line + in + + let wrap_consecutive_items lines = + let rec aux acc current_list lines = + match (current_list, lines) with + | ([], []) -> List.rev acc + | (hd :: tl, []) -> + List.rev [ + "<ul>" ^ String.concat "\n" (List.rev (hd :: tl)) ^ "</ul>"; + ] @ acc + | ([], line :: rest) -> + if Str.string_match (Str.regexp "^<li>") line 0 then + aux acc [line] rest + else + aux (line :: acc) [] rest + | (items, line :: rest) -> + if Str.string_match (Str.regexp "^<li>") line 0 then + aux acc (line :: current_list) rest + else + aux + (line :: ("<ul>" ^ String.concat "\n" (List.rev items) ^ "</ul>") :: acc) + [] + rest + in + aux [] [] lines + in + + lines + |> List.map process_line + |> wrap_consecutive_items + |> String.concat "\n" + +let wrap_lists text = + text + |> Str.global_replace + (Str.regexp "<li>.*</li>\\(\n<li>.*</li>\\)*") + "<ul>\\0</ul>" + +let convert_blockquotes text = + let lines = String.split_on_char '\n' text in + + let rec process_lines acc in_quote lines = + match lines with + | [] when in_quote -> List.rev ("</blockquote>" :: acc) + | [] -> List.rev acc + | line :: rest -> + let trimmed = String.trim line in + if Str.string_match (Str.regexp "^>\\s*\\(.*\\)$") trimmed 0 then + let content = Str.matched_group 1 trimmed in + if in_quote then + process_lines (content :: acc) true rest + else + process_lines (content :: "<blockquote>" :: acc) true rest + else if trimmed = "" then + if in_quote then + process_lines ("</blockquote>" :: acc) false rest + else + process_lines (line :: acc) false rest + else if in_quote then + process_lines (line :: acc) true rest + else + process_lines (line :: acc) false rest + in + + lines |> process_lines [] false |> String.concat "\n" + +let convert_paragraphs text = + let lines = String.split_on_char '\n' text in + + let is_block_element line = + Str.string_match + (Str.regexp "^<\\(h[1-6]\\|ul\\|ol\\|blockquote\\|pre\\)>") + line + 0 + in + + let wrap_paragraphs lines = + let rec aux acc current_p lines = + match lines with + | [] when current_p <> "" -> + List.rev (("<p>" ^ current_p ^ "</p>") :: acc) + | [] -> List.rev acc + | line :: rest when is_block_element line -> + if current_p <> "" then + aux (line :: ("<p>" ^ current_p ^ "</p>") :: acc) "" rest + else + aux (line :: acc) "" rest + | line :: rest when String.trim line = "" -> + if current_p <> "" then + aux (("<p>" ^ current_p ^ "</p>") :: acc) "" rest + else + aux acc "" rest + | line :: rest -> + let sep = + if current_p = "" then + "" + else + " " + in + aux acc (current_p ^ sep ^ String.trim line) rest + in + aux [] "" lines + in + + lines |> wrap_paragraphs |> String.concat "\n" + +let to_html markdown = + markdown + |> convert_headings + |> convert_emphasis + |> convert_code + |> convert_links + |> convert_lists + |> wrap_lists + |> convert_blockquotes + |> convert_paragraphs + |> String.trim + +let extract_text markdown = + markdown + |> Str.global_replace (Str.regexp "\\[([^]]*)\\]\\([^)]*\\)") "\\1" + |> Str.global_replace (Str.regexp "\\*\\*\\([^*]*\\)\\*\\*") "\\1" + |> Str.global_replace (Str.regexp "\\*\\([^*]*\\)\\*") "\\1" + |> Str.global_replace (Str.regexp "__\\([^_]*\\)__") "\\1" + |> Str.global_replace (Str.regexp "_\\([^_]*\\)_") "\\1" + |> Str.global_replace (Str.regexp "~~\\([^~]*\\)~~") "\\1" + |> Str.global_replace (Str.regexp "`\\([^`]*\\)`") "\\1" + |> Str.global_replace (Str.regexp "```[^`]*```") "" + |> Str.global_replace (Str.regexp "^#+ .*$") "\n" + |> Str.global_replace (Str.regexp "^#* .*$") "\n" + |> Str.global_replace (Str.regexp "> \\|>") "" + |> Str.global_replace (Str.regexp "\\[\\|\\]\\|\\(\\|\\)") "" + |> Str.global_replace (Str.regexp "-\\|\\+\\|\\*\\s+") "" + |> Str.global_replace (Str.regexp "^\\d+\\.\\s+") "" + |> Str.global_replace (Str.regexp "\\\\") "" + |> String.trim + +let summarize text ~words:n = + let words = Str.split (Str.regexp "[ \n\r\t]+") text in + let truncated = List.take words n in + let dots = + if List.length words > n then + "..." + else + "" + in + String.concat " " truncated ^ dots
\ No newline at end of file |