summaryrefslogtreecommitdiff
path: root/bs5/server/rsc/Markdown.ml
diff options
context:
space:
mode:
Diffstat (limited to 'bs5/server/rsc/Markdown.ml')
-rw-r--r--bs5/server/rsc/Markdown.ml202
1 files changed, 202 insertions, 0 deletions
diff --git a/bs5/server/rsc/Markdown.ml b/bs5/server/rsc/Markdown.ml
new file mode 100644
index 0000000..e8c8942
--- /dev/null
+++ b/bs5/server/rsc/Markdown.ml
@@ -0,0 +1,202 @@
+module List = struct
+ include List
+ let rec take lst n =
+ match (lst, n) with
+ | ([], _) -> []
+ | (_, 0) -> []
+ | (x :: xs, n) -> x :: take xs (n - 1)
+end
+
+let convert_headings text =
+ text
+ |> Str.global_replace (Str.regexp "^#### \\(.*\\)$") "<h4>\\1</h4>"
+ |> Str.global_replace (Str.regexp "^### \\(.*\\)$") "<h3>\\1</h3>"
+ |> Str.global_replace (Str.regexp "^## \\(.*\\)$") "<h2>\\1</h2>"
+ |> Str.global_replace (Str.regexp "^# \\(.*\\)$") "<h1>\\1</h1>"
+
+let convert_emphasis text =
+ text
+ |> Str.global_replace
+ (Str.regexp "\\*\\*\\([^*]*\\)\\*\\*")
+ "<strong>\\1</strong>"
+ |> Str.global_replace
+ (Str.regexp "__\\([^_]*\\)__")
+ "<strong>\\1</strong>"
+ |> Str.global_replace (Str.regexp "\\*\\([^*]*\\)\\*") "<em>\\1</em>"
+ |> Str.global_replace (Str.regexp "_\\([^_]*\\)_") "<em>\\1</em>"
+
+let convert_code text =
+ text
+ |> Str.global_replace
+ (Str.regexp "```\\([^`]*\\)```")
+ "<pre><code>\\1</code></pre>"
+ |> Str.global_replace (Str.regexp "`\\([^`]*\\)`") "<code>\\1</code>"
+
+let convert_links text =
+ text
+ |> Str.global_replace
+ (Str.regexp "\\[\\([^]]*\\)\\](\\([^)]*\\))")
+ "<a href=\"\\2\">\\1</a>"
+
+let convert_lists text =
+ let lines = String.split_on_char '\n' text in
+
+ let process_line line =
+ match line with
+ | line when Str.string_match (Str.regexp "^-\\s*\\(.*\\)$") line 0 ->
+ "<li>" ^ Str.matched_group 1 line ^ "</li>"
+ | line when Str.string_match (Str.regexp "^\\+\\s*\\(.*\\)$") line 0 ->
+ "<li>" ^ Str.matched_group 1 line ^ "</li>"
+ | line when Str.string_match (Str.regexp "^\\*\\s*\\(.*\\)$") line 0 ->
+ "<li>" ^ Str.matched_group 1 line ^ "</li>"
+ | line
+ when Str.string_match (Str.regexp "^\\d+\\.\\s*\\(.*\\)$") line 0 ->
+ "<li>" ^ Str.matched_group 1 line ^ "</li>"
+ | _ -> line
+ in
+
+ let wrap_consecutive_items lines =
+ let rec aux acc current_list lines =
+ match (current_list, lines) with
+ | ([], []) -> List.rev acc
+ | (hd :: tl, []) ->
+ List.rev [
+ "<ul>" ^ String.concat "\n" (List.rev (hd :: tl)) ^ "</ul>";
+ ] @ acc
+ | ([], line :: rest) ->
+ if Str.string_match (Str.regexp "^<li>") line 0 then
+ aux acc [line] rest
+ else
+ aux (line :: acc) [] rest
+ | (items, line :: rest) ->
+ if Str.string_match (Str.regexp "^<li>") line 0 then
+ aux acc (line :: current_list) rest
+ else
+ aux
+ (line :: ("<ul>" ^ String.concat "\n" (List.rev items) ^ "</ul>") :: acc)
+ []
+ rest
+ in
+ aux [] [] lines
+ in
+
+ lines
+ |> List.map process_line
+ |> wrap_consecutive_items
+ |> String.concat "\n"
+
+let wrap_lists text =
+ text
+ |> Str.global_replace
+ (Str.regexp "<li>.*</li>\\(\n<li>.*</li>\\)*")
+ "<ul>\\0</ul>"
+
+let convert_blockquotes text =
+ let lines = String.split_on_char '\n' text in
+
+ let rec process_lines acc in_quote lines =
+ match lines with
+ | [] when in_quote -> List.rev ("</blockquote>" :: acc)
+ | [] -> List.rev acc
+ | line :: rest ->
+ let trimmed = String.trim line in
+ if Str.string_match (Str.regexp "^>\\s*\\(.*\\)$") trimmed 0 then
+ let content = Str.matched_group 1 trimmed in
+ if in_quote then
+ process_lines (content :: acc) true rest
+ else
+ process_lines (content :: "<blockquote>" :: acc) true rest
+ else if trimmed = "" then
+ if in_quote then
+ process_lines ("</blockquote>" :: acc) false rest
+ else
+ process_lines (line :: acc) false rest
+ else if in_quote then
+ process_lines (line :: acc) true rest
+ else
+ process_lines (line :: acc) false rest
+ in
+
+ lines |> process_lines [] false |> String.concat "\n"
+
+let convert_paragraphs text =
+ let lines = String.split_on_char '\n' text in
+
+ let is_block_element line =
+ Str.string_match
+ (Str.regexp "^<\\(h[1-6]\\|ul\\|ol\\|blockquote\\|pre\\)>")
+ line
+ 0
+ in
+
+ let wrap_paragraphs lines =
+ let rec aux acc current_p lines =
+ match lines with
+ | [] when current_p <> "" ->
+ List.rev (("<p>" ^ current_p ^ "</p>") :: acc)
+ | [] -> List.rev acc
+ | line :: rest when is_block_element line ->
+ if current_p <> "" then
+ aux (line :: ("<p>" ^ current_p ^ "</p>") :: acc) "" rest
+ else
+ aux (line :: acc) "" rest
+ | line :: rest when String.trim line = "" ->
+ if current_p <> "" then
+ aux (("<p>" ^ current_p ^ "</p>") :: acc) "" rest
+ else
+ aux acc "" rest
+ | line :: rest ->
+ let sep =
+ if current_p = "" then
+ ""
+ else
+ " "
+ in
+ aux acc (current_p ^ sep ^ String.trim line) rest
+ in
+ aux [] "" lines
+ in
+
+ lines |> wrap_paragraphs |> String.concat "\n"
+
+let to_html markdown =
+ markdown
+ |> convert_headings
+ |> convert_emphasis
+ |> convert_code
+ |> convert_links
+ |> convert_lists
+ |> wrap_lists
+ |> convert_blockquotes
+ |> convert_paragraphs
+ |> String.trim
+
+let extract_text markdown =
+ markdown
+ |> Str.global_replace (Str.regexp "\\[([^]]*)\\]\\([^)]*\\)") "\\1"
+ |> Str.global_replace (Str.regexp "\\*\\*\\([^*]*\\)\\*\\*") "\\1"
+ |> Str.global_replace (Str.regexp "\\*\\([^*]*\\)\\*") "\\1"
+ |> Str.global_replace (Str.regexp "__\\([^_]*\\)__") "\\1"
+ |> Str.global_replace (Str.regexp "_\\([^_]*\\)_") "\\1"
+ |> Str.global_replace (Str.regexp "~~\\([^~]*\\)~~") "\\1"
+ |> Str.global_replace (Str.regexp "`\\([^`]*\\)`") "\\1"
+ |> Str.global_replace (Str.regexp "```[^`]*```") ""
+ |> Str.global_replace (Str.regexp "^#+ .*$") "\n"
+ |> Str.global_replace (Str.regexp "^#* .*$") "\n"
+ |> Str.global_replace (Str.regexp "> \\|>") ""
+ |> Str.global_replace (Str.regexp "\\[\\|\\]\\|\\(\\|\\)") ""
+ |> Str.global_replace (Str.regexp "-\\|\\+\\|\\*\\s+") ""
+ |> Str.global_replace (Str.regexp "^\\d+\\.\\s+") ""
+ |> Str.global_replace (Str.regexp "\\\\") ""
+ |> String.trim
+
+let summarize text ~words:n =
+ let words = Str.split (Str.regexp "[ \n\r\t]+") text in
+ let truncated = List.take words n in
+ let dots =
+ if List.length words > n then
+ "..."
+ else
+ ""
+ in
+ String.concat " " truncated ^ dots \ No newline at end of file