summaryrefslogtreecommitdiff
path: root/scripts/php/functions.php
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/php/functions.php')
-rw-r--r--scripts/php/functions.php363
1 files changed, 363 insertions, 0 deletions
diff --git a/scripts/php/functions.php b/scripts/php/functions.php
new file mode 100644
index 0000000..3e8e7a4
--- /dev/null
+++ b/scripts/php/functions.php
@@ -0,0 +1,363 @@
+<?php declare(strict_types=1);
+
+const HEADER = '/^(#+)\s+(.*)/';
+const LINK = '/^=&gt;\s+(\S+)\s*(.*)/';
+const LIST_ITEM = '/^\*\s+(.*)/';
+const PRE = '/^```(.*)?/';
+const QUOTE = '/^&gt;\s+(.*)/';
+
+function getPages(string $src, string $output): array
+{
+ $src = realpath($src);
+
+ $iter = new \RecursiveIteratorIterator(
+ new \RecursiveDirectoryIterator($src, \FilesystemIterator::SKIP_DOTS),
+ \RecursiveIteratorIterator::SELF_FIRST
+ );
+
+ $pages = [];
+
+ foreach ($iter as $fileInfo) {
+ if (!is_file($fileInfo->getRealPath())) {
+ continue;
+ }
+ $pages[] = getPageMetaData($fileInfo, $src, $output);
+ }
+
+ return $pages;
+}
+
+function getPageMetaData(\SplFileInfo $fileInfo, string $src, string $output): array
+{
+ $input = $fileInfo->getRealPath();
+
+ $pathData = parsePath($input);
+
+ $url = str_replace([$src, '.gmi'], ['', '.html'], $input);
+
+ $contentData = parseContent(file_get_contents($input));
+
+ return [
+ 'input' => $input,
+ 'url' => $url,
+ 'date' => $pathData['date'],
+ 'tag' => $pathData['tag'],
+ 'isPost' => $pathData['isPost'],
+ 'isTag' => $pathData['isTag'],
+ 'title' => $contentData['title'],
+ 'author' => $contentData['author'],
+ 'html' => gemtext2hmtl(file_get_contents($input)),
+ 'output' => "$output$url",
+ ];
+}
+
+function parsePath(string $path): array
+{
+ $date = null;
+ $tag = null;
+ $isPost = false;
+ $isTag = false;
+
+ /**
+ * Assume that only posts have both a date and a tag.
+ */
+ if (preg_match('/\/posts\/(.+)\/(\d\d\d\d-\d\d-\d\d)\//', $path, $matches) === 1) {
+ [, $tag, $date] = $matches;
+
+ $isPost = true;
+ }
+
+ /**
+ * Assume tags have an index file that contains related posts.
+ */
+ if (preg_match('/posts\/(?:[^\/]|\/\/)+\/index/', $path, $matches) === 1) {
+ $tag = explode('/', $matches[0])[1];
+ $isTag = true;
+ }
+
+ return [
+ 'date' => $date,
+ 'tag' => $tag,
+ 'isPost' => $isPost,
+ 'isTag' => $isTag,
+ ];
+}
+
+function parseContent(string $content): array
+{
+ $title = null;
+ $author = null;
+
+ if (preg_match('/^# (.+)$/m', $content, $matches) === 1) {
+ $title = $matches[1];
+ }
+
+ if (preg_match('/^> .+ By (.+)$/m', $content, $matches) === 1) {
+ $author = $matches[1];
+ }
+
+ return [
+ 'title' => $title,
+ 'author' => $author,
+ ];
+}
+
+function buildWWWSite(array $pages, string $hostname, string $htmlTemplateDiretory, string $output): void
+{
+ foreach ($pages as $page) {
+ $destDirectory = dirname($page['output']);
+
+ if (!file_exists($destDirectory) && !mkdir($destDirectory, 0777, true)) {
+ echo "Unable to create WWW site directory $destDirectory.";
+ exit(1);
+ }
+
+ file_put_contents(
+ $page['output'],
+ buildHtmlFile(
+ $page['title'],
+ $page['html'],
+ file_get_contents($htmlTemplateDiretory.DIRECTORY_SEPARATOR.'default.html')
+ )
+ );
+ }
+
+ generateAtomFeeds($pages, $hostname);
+
+ generateSiteMap($pages, $hostname, $output);
+}
+
+function buildHtmlFile(string $title, string $contents, string $template): string
+{
+ return str_replace(
+ [
+ '{{ $title }}',
+ '{{ $contents }}'
+ ],
+ [
+ $title,
+ $contents,
+ ],
+ $template
+ );
+}
+
+function gemtext2hmtl(string $gemtext): string
+{
+ $html = [];
+
+ $lines = preg_split('/\r?\n/', htmlspecialchars($gemtext));
+
+ $line = fn ($index) => $lines[$index];
+
+ $index = 0;
+
+ $numLines = count($lines);
+
+ while($index < $numLines) {
+ if (preg_match(HEADER, $line($index), $matches) === 1) {
+ [, $levels, $content] = $matches;
+
+ $level = strlen($levels);
+
+ $html[] = "<h$level>$content</h$level>";
+ } elseif (preg_match(LINK, $line($index), $matches) === 1) {
+ [, $href, $content] = $matches;
+
+ $content = $content ?: $href;
+
+ $html[] = "<a href=\"$href\">$content</a>";
+ } elseif (preg_match(LIST_ITEM, $line($index), $matches) === 1) {
+ $items = [];
+
+ while ($index < $numLines) {
+ if (preg_match(LIST_ITEM, $line($index), $matches) === 0) {
+ break;
+ }
+
+ $items[] = "<li>$matches[1]</li>";
+
+ $index++;
+ }
+
+ $index--;
+
+ $html[] = sprintf("<ul>%s</ul>", implode('', $items));
+ } elseif (preg_match(PRE, $line($index), $matches) === 1) {
+ [, $alt] = $matches;
+
+ $items = [];
+
+ $index++;
+
+ while ($index < $numLines) {
+ $item = $line($index);
+
+ if (preg_match(PRE, $item, $matches) === 1) {
+ break;
+ }
+
+ $items[] = $item;
+
+ $index++;
+ }
+
+ $items = implode("\n", $items);
+
+ $html[] = $alt ? "<pre><code class=\"$alt\">$items</code></pre>" : "<pre>$items</pre>";
+ } elseif (preg_match(QUOTE, $line($index), $matches) === 1) {
+ $html[] = "<blockquote>$matches[1]</blockquote>";
+ } else {
+ if ($line($index) !== '') {
+ $html[] = "<p>{$line($index)}</p>";
+ }
+ }
+
+ $index++;
+ }
+
+ return implode('', $html);
+}
+
+function generateAtomFeeds(array $pages, string $hostname): void
+{
+ $posts = array_filter($pages, fn ($post) => $post['isPost']);
+
+ $tags = array_filter($pages, fn ($post) => $post['isTag']);
+
+ /**
+ * Sort by latest to previous date.
+ */
+ usort($posts, fn ($a, $b) => $b['date'] <=> $a['date']);
+
+ /**
+ * Group posts by tag.
+ */
+ $groupedPosts = array_reduce($posts, function ($carry, $post) {
+ $carry[$post['tag']][] = $post;
+
+ return $carry;
+ }, []);
+
+ /**
+ * Put posts with their tag page.
+ */
+ $tags = array_map(function ($tag) use ($groupedPosts) {
+ return array_merge($tag, ['posts' => $groupedPosts[$tag['tag']]]);
+ }, $tags);
+
+ foreach ($tags as $tag) {
+ tagToAtomFeed($tag, $hostname);
+ }
+
+ /**
+ * Get the page that lists all posts.
+ */
+ $allPostsIndex = array_values(array_filter($pages, fn ($post) => preg_match('/posts\/index/', $post['url']) == 1))[0];
+
+ $allPostsIndex['posts'] = $posts;
+
+ tagToAtomFeed($allPostsIndex, $hostname);
+}
+
+function tagToAtomFeed(array $tag, string $hostname): void
+{
+ file_put_contents(
+ str_replace('index.html', 'atom.xml', $tag['output']),
+ buildAtomFeed(
+ $tag['title'],
+ "https://$hostname".str_replace('index.html', 'atom.xml', $tag['url']),
+ "https://$hostname".$tag['url'],
+ $tag['posts'][0]['date'].'T12:00:00Z',
+ implode('', array_map(fn ($post) => postToAtomEntry($post, $hostname), $tag['posts']))
+ )
+ );
+}
+
+function postToAtomEntry(array $post, string $hostname): string
+{
+ return buildAtomEntry(
+ $post['title'],
+ "https://$hostname{$post['url']}",
+ $post['author'],
+ "{$post['date']}T12:00:00Z",
+ htmlspecialchars($post['html']),
+ );
+}
+
+function buildAtomFeed(string $title, string $href, string $altHref, string $date, string $entries): string
+{
+ return <<<EOF_STR
+<?xml version="1.0" encoding="utf-8"?>
+<feed xmlns="http://www.w3.org/2005/Atom">
+ <title type="text">$title</title>
+ <id>$href</id>
+ <link rel="alternate" type="text/html" href="$altHref"/>
+ <link rel="self" type="application/atom+xml" href="$href"/>
+ <updated>$date</updated>
+ $entries
+</feed>
+EOF_STR;
+}
+
+function buildAtomEntry(string $title, string $href, string $author, string $date, string $content): string
+{
+ return <<<EOF_STR
+<entry>
+ <title type="text">$title</title>
+ <id>$href</id>
+ <link rel="alternate" type="text/html" href="$href"/>
+ <author><name>$author</name></author>
+ <published>$date</published>
+ <updated>$date</updated>
+ <content type="html">$content</content>
+</entry>
+EOF_STR;
+}
+
+function generateSiteMap(array $pages, string $hostname, $output): void
+{
+ $posts = array_filter($pages, fn ($post) => $post['isPost']);
+
+ /**
+ * Sort by latest to previous date.
+ */
+ usort($posts, fn ($a, $b) => $b['date'] <=> $a['date']);
+
+ file_put_contents(
+ $output.DIRECTORY_SEPARATOR.'sitemap.xml',
+ buildSiteMap(
+ implode('', array_map(fn ($post) => postToSiteMapUrl($post, $hostname), $posts))
+ )
+ );
+
+}
+
+function postToSiteMapUrl(array $post, string $hostname): string
+{
+ return buildSiteMapUrl(
+ "https://$hostname{$post['url']}",
+ "{$post['date']}T12:00:00Z"
+ );
+}
+
+function buildSiteMap(string $urls): string
+{
+ return <<<EOF_STR
+<?xml version="1.0" encoding="utf-8"?>
+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
+ $urls
+</urlset>
+EOF_STR;
+}
+
+function buildSiteMapUrl(string $loc, string $lastmod): string
+{
+ return <<<EOF_STR
+<url>
+ <loc>$loc</loc>
+ <lastmod>$lastmod</lastmod>
+ <changefreq>never</changefreq>
+</url>
+EOF_STR;
+}