1 files changed, 363 insertions, 0 deletions
diff --git a/scripts/php/functions.php b/scripts/php/functions.php
new file mode 100644
index 0000000..3e8e7a4
--- /dev/null
+++ b/scripts/php/functions.php
@@ -0,0 +1,363 @@
+<?php declare(strict_types=1);
+
+const HEADER = '/^(#+)\s+(.*)/';
+const LINK = '/^=&gt;\s+(\S+)\s*(.*)/';
+const LIST_ITEM = '/^\*\s+(.*)/';
+const PRE = '/^```(.*)?/';
+const QUOTE = '/^&gt;\s+(.*)/';
+
+function getPages(string $src, string $output): array
+{
+    $src = realpath($src);
+
+    $iter = new \RecursiveIteratorIterator(
+        new \RecursiveDirectoryIterator($src, \FilesystemIterator::SKIP_DOTS),
+        \RecursiveIteratorIterator::SELF_FIRST
+    );
+
+    $pages = [];
+
+    foreach ($iter as $fileInfo) {
+        if (!is_file($fileInfo->getRealPath())) {
+            continue;
+        }
+        $pages[] = getPageMetaData($fileInfo, $src, $output);
+    }
+
+    return $pages;
+}
+
+function getPageMetaData(\SplFileInfo $fileInfo, string $src, string $output): array
+{
+    $input = $fileInfo->getRealPath();
+
+    $pathData = parsePath($input);
+
+    $url = str_replace([$src, '.gmi'], ['', '.html'], $input);
+
+    $contentData = parseContent(file_get_contents($input));
+
+    return [
+        'input'  => $input,
+        'url'    => $url,
+        'date'   => $pathData['date'],
+        'tag'    => $pathData['tag'],
+        'isPost' => $pathData['isPost'],
+        'isTag'  => $pathData['isTag'],
+        'title'  => $contentData['title'],
+        'author' => $contentData['author'],
+        'html'   => gemtext2hmtl(file_get_contents($input)),
+        'output' => "$output$url",
+    ];
+}
+
+function parsePath(string $path): array
+{
+    $date = null;
+    $tag = null;
+    $isPost = false;
+    $isTag = false;
+
+    /**
+     * Assume that only posts have both a date and a tag.
+     */
+    if (preg_match('/\/posts\/(.+)\/(\d\d\d\d-\d\d-\d\d)\//', $path, $matches) === 1) {
+        [, $tag, $date] = $matches;
+
+        $isPost = true;
+    }
+
+    /**
+     * Assume tags have an index file that contains related posts.
+     */
+    if (preg_match('/posts\/(?:[^\/]|\/\/)+\/index/', $path, $matches) === 1) {
+        $tag = explode('/', $matches[0])[1];
+        $isTag = true;
+    }
+
+    return [
+        'date'   => $date,
+        'tag'    => $tag,
+        'isPost' => $isPost,
+        'isTag'  => $isTag,
+    ];
+}
+
+function parseContent(string $content): array
+{
+    $title = null;
+    $author = null;
+
+    if (preg_match('/^# (.+)$/m', $content, $matches) === 1) {
+        $title = $matches[1];
+    }
+
+    if (preg_match('/^> .+ By (.+)$/m', $content, $matches) === 1) {
+        $author = $matches[1];
+    }
+
+    return [
+        'title'  => $title,
+        'author' => $author,
+    ];
+}
+
+function buildWWWSite(array $pages, string $hostname, string $htmlTemplateDiretory, string $output): void
+{
+    foreach ($pages as $page) {
+        $destDirectory = dirname($page['output']);
+
+        if (!file_exists($destDirectory) && !mkdir($destDirectory, 0777, true)) {
+            echo "Unable to create WWW site directory $destDirectory.";
+            exit(1);
+        }
+
+        file_put_contents(
+            $page['output'],
+            buildHtmlFile(
+                $page['title'],
+                $page['html'],
+                file_get_contents($htmlTemplateDiretory.DIRECTORY_SEPARATOR.'default.html')
+            )
+        );
+    }
+
+    generateAtomFeeds($pages, $hostname);
+
+    generateSiteMap($pages, $hostname, $output);
+}
+
+function buildHtmlFile(string $title, string $contents, string $template): string
+{
+    return str_replace(
+        [
+            '{{ $title }}',
+            '{{ $contents }}'
+        ],
+        [
+            $title,
+            $contents,
+        ],
+        $template
+    );
+}
+
+function gemtext2hmtl(string $gemtext): string
+{
+    $html = [];
+
+    $lines = preg_split('/\r?\n/', htmlspecialchars($gemtext));
+
+    $line = fn ($index) => $lines[$index];
+
+    $index = 0;
+
+    $numLines = count($lines);
+
+    while($index < $numLines) {
+        if (preg_match(HEADER, $line($index), $matches) === 1) {
+            [, $levels, $content] = $matches;
+
+            $level = strlen($levels);
+
+            $html[] = "<h$level>$content</h$level>";
+        } elseif (preg_match(LINK, $line($index), $matches) === 1) {
+            [, $href, $content] = $matches;
+
+            $content = $content ?: $href;
+
+            $html[] = "<a href=\"$href\">$content</a>";
+        } elseif (preg_match(LIST_ITEM, $line($index), $matches) === 1) {
+            $items = [];
+
+            while ($index < $numLines) {
+                if (preg_match(LIST_ITEM, $line($index), $matches) === 0) {
+                    break;
+                }
+
+                $items[] = "<li>$matches[1]</li>";
+
+                $index++;
+            }
+
+            $index--;
+
+            $html[] =  sprintf("<ul>%s</ul>", implode('', $items));
+        } elseif (preg_match(PRE, $line($index), $matches) === 1) {
+            [, $alt] = $matches;
+
+            $items = [];
+
+            $index++;
+
+            while ($index < $numLines) {
+                $item = $line($index);
+
+                if (preg_match(PRE, $item, $matches) === 1) {
+                    break;
+                }
+
+                $items[] = $item;
+
+                $index++;
+            }
+
+            $items = implode("\n", $items);
+
+            $html[] = $alt ? "<pre><code class=\"$alt\">$items</code></pre>" : "<pre>$items</pre>";
+        } elseif (preg_match(QUOTE, $line($index), $matches) === 1) {
+            $html[] =  "<blockquote>$matches[1]</blockquote>";
+        } else {
+            if ($line($index) !== '') {
+                $html[] =  "<p>{$line($index)}</p>";
+            }
+        }
+
+        $index++;
+    }
+
+    return implode('', $html);
+}
+
+function generateAtomFeeds(array $pages, string $hostname): void
+{
+    $posts = array_filter($pages, fn ($post) => $post['isPost']);
+
+    $tags = array_filter($pages, fn ($post) => $post['isTag']);
+
+    /**
+     * Sort by latest to previous date.
+     */
+    usort($posts, fn ($a, $b) => $b['date'] <=> $a['date']);
+
+    /**
+     * Group posts by tag.
+     */
+    $groupedPosts = array_reduce($posts, function ($carry, $post) {
+        $carry[$post['tag']][] = $post;
+
+        return $carry;
+    }, []);
+
+    /**
+     * Put posts with their tag page.
+     */
+    $tags = array_map(function ($tag) use ($groupedPosts) {
+        return array_merge($tag, ['posts' => $groupedPosts[$tag['tag']]]);
+    }, $tags);
+
+    foreach ($tags as $tag) {
+        tagToAtomFeed($tag, $hostname);
+    }
+
+    /**
+     * Get the page that lists all posts.
+     */
+    $allPostsIndex = array_values(array_filter($pages, fn ($post) => preg_match('/posts\/index/', $post['url']) == 1))[0];
+
+    $allPostsIndex['posts'] = $posts;
+
+    tagToAtomFeed($allPostsIndex, $hostname);
+}
+
+function tagToAtomFeed(array $tag, string $hostname): void
+{
+    file_put_contents(
+        str_replace('index.html', 'atom.xml', $tag['output']),
+        buildAtomFeed(
+            $tag['title'],
+            "https://$hostname".str_replace('index.html', 'atom.xml', $tag['url']),
+            "https://$hostname".$tag['url'],
+            $tag['posts'][0]['date'].'T12:00:00Z',
+            implode('', array_map(fn ($post) => postToAtomEntry($post, $hostname), $tag['posts']))
+        )
+    );
+}
+
+function postToAtomEntry(array $post, string $hostname): string
+{
+    return buildAtomEntry(
+        $post['title'],
+        "https://$hostname{$post['url']}",
+        $post['author'],
+        "{$post['date']}T12:00:00Z",
+        htmlspecialchars($post['html']),
+    );
+}
+
+function buildAtomFeed(string $title, string $href, string $altHref, string $date, string $entries): string
+{
+    return <<<EOF_STR
+<?xml version="1.0" encoding="utf-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom">
+    <title type="text">$title</title>
+    <id>$href</id>
+    <link rel="alternate" type="text/html" href="$altHref"/>
+    <link rel="self" type="application/atom+xml" href="$href"/>
+    <updated>$date</updated>
+    $entries
+</feed>
+EOF_STR;
+}
+
+function buildAtomEntry(string $title, string $href, string $author, string $date, string $content): string
+{
+    return <<<EOF_STR
+<entry>
+    <title type="text">$title</title>
+    <id>$href</id>
+    <link rel="alternate" type="text/html" href="$href"/>
+    <author><name>$author</name></author>
+    <published>$date</published>
+    <updated>$date</updated>
+    <content type="html">$content</content>
+</entry>
+EOF_STR;
+}
+
+function generateSiteMap(array $pages, string $hostname, $output): void
+{
+    $posts = array_filter($pages, fn ($post) => $post['isPost']);
+
+    /**
+     * Sort by latest to previous date.
+     */
+    usort($posts, fn ($a, $b) => $b['date'] <=> $a['date']);
+
+    file_put_contents(
+        $output.DIRECTORY_SEPARATOR.'sitemap.xml',
+        buildSiteMap(
+            implode('', array_map(fn ($post) => postToSiteMapUrl($post, $hostname), $posts))
+        )
+    );
+
+}
+
+function postToSiteMapUrl(array $post, string $hostname): string
+{
+    return buildSiteMapUrl(
+        "https://$hostname{$post['url']}",
+        "{$post['date']}T12:00:00Z"
+    );
+}
+
+function buildSiteMap(string $urls): string
+{
+    return <<<EOF_STR
+<?xml version="1.0" encoding="utf-8"?>
+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
+    $urls
+</urlset>
+EOF_STR;
+}
+
+function buildSiteMapUrl(string $loc, string $lastmod): string
+{
+    return <<<EOF_STR
+<url>
+    <loc>$loc</loc>
+    <lastmod>$lastmod</lastmod>
+    <changefreq>never</changefreq>
+</url>
+EOF_STR;
+}