readLine($line); } $builder->emit(); } class MarkdownDocumentParser { /** * @var MarkdownEntity[] $stack */ private array $stack; private string $document; public function __construct() { $this->document = ""; $this->stack = []; } public function emit() { if ($this->topTypeIs('p')) { $this->pop(); } $stack_size = count($this->stack); if ($stack_size > 0) { log_warning("Stack has $stack_size markdown entities"); } while (count($this->stack) > 0) { $this->pop(); } echo $this->document; } public function readLine(string $line) { $this->readStartOfLineEntity($line); $this->readEntitiesFrom($line); $this->popOneLiners(); } private function readEntitiesFrom(string &$line) { $count = 0; while (strlen($line) > 0) { $this->readEntityFrom($line); $count += 1; if ($count > 5000) { throw new LogicException("Too many entities on line"); } } } private function peek(): ?MarkdownEntity { return $this->stack[count($this->stack) - 1] ?? null; } private function push(string $type, array $props = []) { $node = new MarkdownEntity($type); foreach ($props as $key => $val) { $node->setProp($key, $val); } $this->document .= $node->renderStart(); array_push($this->stack, $node); } private function pop() { /** * @var MarkdownEntity $node */ $node = array_pop($this->stack); $this->document .= $node->renderRest(); } private function topType(): ?string { $top = $this->peek(); if ($top !== null) { return $top->type; } return null; } private function topTypeIs(string... $matches): bool { return in_array($this->topType(), $matches); } private function countOf(string $type): int { $count = 0; foreach ($this->stack as $node) { if ($node->type == $type) { $count += 1; } } return $count; } public function readStartOfLineEntity(string &$line) { if (strlen(trim($line)) == 0) { if ($this->topTypeIs('p')) { $this->pop(); } } else if (remove_prefix($line, "#### ")) { $this->push('h4', [ 'docpos' => strlen($this->document) ]); } else if (remove_prefix($line, "### ")) { $this->push('h3', [ 'docpos' => strlen($this->document) ]); } else if (remove_prefix($line, "## ")) { $this->push('h2', [ 'docpos' => strlen($this->document) ]); } else if (remove_prefix($line, "# ")) { $this->push('h1', [ 'docpos' => strlen($this->document) ]); } else if ($this->topTypeIs('li', 'ul', 'p') && $this->countOf('li') > 0) { do { $indent = str_repeat(" ", 2 * $this->countOf("li")); if (remove_prefix($line, "{$indent}- ")) { if ($this->topTypeIs('ul')) { $this->push('li'); break; } else if ($this->topTypeIs('li')) { // one more indent satisfied in this case $this->push('ul'); $this->push('li'); break; } } else if (!$this->topTypeIs('p') && remove_prefix($line, "{$indent}")) { if ($this->topTypeIs('li')) { $this->push('p'); break; } else if ($this->topTypeIs('ul')) { $this->pop(); $this->push('p'); break; } } $this->pop(); } while ($this->topTypeIs('li', 'ul', 'p')); } else if (remove_prefix($line, "- ")) { $this->push('ul'); $this->push('li'); } else if (!$this->topTypeIs('p')) { $this->push('p'); } } public function popOneLiners() { while ($this->topTypeIs('h1', 'h2', 'h3', 'h4', 'h5', 'h6')) { // Need to retroactively add heading id for anchors $start = (int) $this->peek()->getProp("docpos"); $taglen = strlen($this->peek()->renderStart()); $contents = substr($this->document, $start + $taglen); $contents = strip_tags($contents); $slug = trim(preg_replace("/[^a-z]/", '-', trim(strtolower($contents))), '-'); $this->document = substr_replace($this->document, " id=\"$slug\"", $start + $taglen - 1, 0); $this->pop(); } } public function readEntityFrom(string &$line) { if (str_starts_with($line, "[")) { $matches = []; if (preg_match('/^\[(.*?[^\\\\])\]\((.+?)\)/', $line, $matches)) { $this->push('a', [ 'href' => $matches[2] ]); $contents = str_replace("\\]", "]", $matches[1]); $this->readEntitiesFrom($contents); $this->pop(); remove_prefix($line, $matches[0]); return; } } if (str_starts_with($line, "<")) { $matches = []; if (preg_match('/^\<(.*?[^\\\\])\>/', $line, $matches)) { $contents = str_replace("\\>", ">", $matches[1]); $this->push('a', [ 'href' => $contents ]); $this->readEntitiesFrom($contents); $this->pop(); remove_prefix($line, $matches[0]); return; } } if (str_starts_with($line, "**")) { $matches = []; if (preg_match('/\*\*(.*?[^\\\\])\*\*/', $line, $matches)) { $this->push('strong'); $contents = str_replace("\\**", "**", $matches[1]); $this->readEntitiesFrom($contents); $this->pop(); remove_prefix($line, $matches[0]); return; } } if (str_starts_with($line, "*")) { $matches = []; if (preg_match('/\*(.*?[^\\\\])\*/', $line, $matches)) { $this->push('em'); $contents = str_replace("\\*", "*", $matches[1]); $this->readEntitiesFrom($contents); $this->pop(); remove_prefix($line, $matches[0]); return; } } if (str_starts_with($line, "`")) { $matches = []; if (preg_match('/\`(.*?[^\\\\])\`/', $line, $matches)) { $this->push('code'); $contents = str_replace("\\`", "`", $matches[1]); $this->readEntitiesFrom($contents); $this->pop(); remove_prefix($line, $matches[0]); return; } } // consume word $matches = []; if (preg_match('/^\s*\S+\s*/', $line, $matches)) { if (!str_ends_with($this->document, " ") && !str_ends_with($this->document, ">") && !str_starts_with($matches[0], " ")) { $this->document .= " "; } $this->document .= $matches[0]; remove_prefix($line, $matches[0]); return; } log_value($this, LoggingVerbosity::Warning); log_value($line, LoggingVerbosity::Warning); throw new LogicException("Could not parse any entity"); } } class MarkdownEntity { public function __construct(string $type) { $this->type = $type; $this->props = []; } public function setProp(string $prop, string $value) { $this->props[$prop] = $value; } public function appendProp(string $prop, string $value) { $this->props[$prop] ??= ''; $this->props[$prop] .= $value; } public function getProp(string $prop): string { return $this->props[$prop]; } public string $type; private array $props; public function renderStart(): string { global $REPOSITORY_CANONICAL_URL_FILES, $SITE_CANONICAL_URL; $type = $this->type; switch ($type) { case 'a': $href = $this->getProp('href'); if (!str_starts_with($href, "#") && !str_contains($href, "://")) { $href = "$REPOSITORY_CANONICAL_URL_FILES/$href"; } remove_prefix($href, $SITE_CANONICAL_URL); if (empty($href)) { $href = "/"; } return "\n"; default: return "\n<$type>"; } } public function renderRest(): string { $type = $this->type; switch ($type) { default: return ""; } } }