diff options
Diffstat (limited to 'scripts/php/functions.php')
| -rw-r--r-- | scripts/php/functions.php | 363 |
1 files changed, 363 insertions, 0 deletions
diff --git a/scripts/php/functions.php b/scripts/php/functions.php new file mode 100644 index 0000000..3e8e7a4 --- /dev/null +++ b/scripts/php/functions.php @@ -0,0 +1,363 @@ +<?php declare(strict_types=1); + +const HEADER = '/^(#+)\s+(.*)/'; +const LINK = '/^=>\s+(\S+)\s*(.*)/'; +const LIST_ITEM = '/^\*\s+(.*)/'; +const PRE = '/^```(.*)?/'; +const QUOTE = '/^>\s+(.*)/'; + +function getPages(string $src, string $output): array +{ + $src = realpath($src); + + $iter = new \RecursiveIteratorIterator( + new \RecursiveDirectoryIterator($src, \FilesystemIterator::SKIP_DOTS), + \RecursiveIteratorIterator::SELF_FIRST + ); + + $pages = []; + + foreach ($iter as $fileInfo) { + if (!is_file($fileInfo->getRealPath())) { + continue; + } + $pages[] = getPageMetaData($fileInfo, $src, $output); + } + + return $pages; +} + +function getPageMetaData(\SplFileInfo $fileInfo, string $src, string $output): array +{ + $input = $fileInfo->getRealPath(); + + $pathData = parsePath($input); + + $url = str_replace([$src, '.gmi'], ['', '.html'], $input); + + $contentData = parseContent(file_get_contents($input)); + + return [ + 'input' => $input, + 'url' => $url, + 'date' => $pathData['date'], + 'tag' => $pathData['tag'], + 'isPost' => $pathData['isPost'], + 'isTag' => $pathData['isTag'], + 'title' => $contentData['title'], + 'author' => $contentData['author'], + 'html' => gemtext2hmtl(file_get_contents($input)), + 'output' => "$output$url", + ]; +} + +function parsePath(string $path): array +{ + $date = null; + $tag = null; + $isPost = false; + $isTag = false; + + /** + * Assume that only posts have both a date and a tag. + */ + if (preg_match('/\/posts\/(.+)\/(\d\d\d\d-\d\d-\d\d)\//', $path, $matches) === 1) { + [, $tag, $date] = $matches; + + $isPost = true; + } + + /** + * Assume tags have an index file that contains related posts. + */ + if (preg_match('/posts\/(?:[^\/]|\/\/)+\/index/', $path, $matches) === 1) { + $tag = explode('/', $matches[0])[1]; + $isTag = true; + } + + return [ + 'date' => $date, + 'tag' => $tag, + 'isPost' => $isPost, + 'isTag' => $isTag, + ]; +} + +function parseContent(string $content): array +{ + $title = null; + $author = null; + + if (preg_match('/^# (.+)$/m', $content, $matches) === 1) { + $title = $matches[1]; + } + + if (preg_match('/^> .+ By (.+)$/m', $content, $matches) === 1) { + $author = $matches[1]; + } + + return [ + 'title' => $title, + 'author' => $author, + ]; +} + +function buildWWWSite(array $pages, string $hostname, string $htmlTemplateDiretory, string $output): void +{ + foreach ($pages as $page) { + $destDirectory = dirname($page['output']); + + if (!file_exists($destDirectory) && !mkdir($destDirectory, 0777, true)) { + echo "Unable to create WWW site directory $destDirectory."; + exit(1); + } + + file_put_contents( + $page['output'], + buildHtmlFile( + $page['title'], + $page['html'], + file_get_contents($htmlTemplateDiretory.DIRECTORY_SEPARATOR.'default.html') + ) + ); + } + + generateAtomFeeds($pages, $hostname); + + generateSiteMap($pages, $hostname, $output); +} + +function buildHtmlFile(string $title, string $contents, string $template): string +{ + return str_replace( + [ + '{{ $title }}', + '{{ $contents }}' + ], + [ + $title, + $contents, + ], + $template + ); +} + +function gemtext2hmtl(string $gemtext): string +{ + $html = []; + + $lines = preg_split('/\r?\n/', htmlspecialchars($gemtext)); + + $line = fn ($index) => $lines[$index]; + + $index = 0; + + $numLines = count($lines); + + while($index < $numLines) { + if (preg_match(HEADER, $line($index), $matches) === 1) { + [, $levels, $content] = $matches; + + $level = strlen($levels); + + $html[] = "<h$level>$content</h$level>"; + } elseif (preg_match(LINK, $line($index), $matches) === 1) { + [, $href, $content] = $matches; + + $content = $content ?: $href; + + $html[] = "<a href=\"$href\">$content</a>"; + } elseif (preg_match(LIST_ITEM, $line($index), $matches) === 1) { + $items = []; + + while ($index < $numLines) { + if (preg_match(LIST_ITEM, $line($index), $matches) === 0) { + break; + } + + $items[] = "<li>$matches[1]</li>"; + + $index++; + } + + $index--; + + $html[] = sprintf("<ul>%s</ul>", implode('', $items)); + } elseif (preg_match(PRE, $line($index), $matches) === 1) { + [, $alt] = $matches; + + $items = []; + + $index++; + + while ($index < $numLines) { + $item = $line($index); + + if (preg_match(PRE, $item, $matches) === 1) { + break; + } + + $items[] = $item; + + $index++; + } + + $items = implode("\n", $items); + + $html[] = $alt ? "<pre><code class=\"$alt\">$items</code></pre>" : "<pre>$items</pre>"; + } elseif (preg_match(QUOTE, $line($index), $matches) === 1) { + $html[] = "<blockquote>$matches[1]</blockquote>"; + } else { + if ($line($index) !== '') { + $html[] = "<p>{$line($index)}</p>"; + } + } + + $index++; + } + + return implode('', $html); +} + +function generateAtomFeeds(array $pages, string $hostname): void +{ + $posts = array_filter($pages, fn ($post) => $post['isPost']); + + $tags = array_filter($pages, fn ($post) => $post['isTag']); + + /** + * Sort by latest to previous date. + */ + usort($posts, fn ($a, $b) => $b['date'] <=> $a['date']); + + /** + * Group posts by tag. + */ + $groupedPosts = array_reduce($posts, function ($carry, $post) { + $carry[$post['tag']][] = $post; + + return $carry; + }, []); + + /** + * Put posts with their tag page. + */ + $tags = array_map(function ($tag) use ($groupedPosts) { + return array_merge($tag, ['posts' => $groupedPosts[$tag['tag']]]); + }, $tags); + + foreach ($tags as $tag) { + tagToAtomFeed($tag, $hostname); + } + + /** + * Get the page that lists all posts. + */ + $allPostsIndex = array_values(array_filter($pages, fn ($post) => preg_match('/posts\/index/', $post['url']) == 1))[0]; + + $allPostsIndex['posts'] = $posts; + + tagToAtomFeed($allPostsIndex, $hostname); +} + +function tagToAtomFeed(array $tag, string $hostname): void +{ + file_put_contents( + str_replace('index.html', 'atom.xml', $tag['output']), + buildAtomFeed( + $tag['title'], + "https://$hostname".str_replace('index.html', 'atom.xml', $tag['url']), + "https://$hostname".$tag['url'], + $tag['posts'][0]['date'].'T12:00:00Z', + implode('', array_map(fn ($post) => postToAtomEntry($post, $hostname), $tag['posts'])) + ) + ); +} + +function postToAtomEntry(array $post, string $hostname): string +{ + return buildAtomEntry( + $post['title'], + "https://$hostname{$post['url']}", + $post['author'], + "{$post['date']}T12:00:00Z", + htmlspecialchars($post['html']), + ); +} + +function buildAtomFeed(string $title, string $href, string $altHref, string $date, string $entries): string +{ + return <<<EOF_STR +<?xml version="1.0" encoding="utf-8"?> +<feed xmlns="http://www.w3.org/2005/Atom"> + <title type="text">$title</title> + <id>$href</id> + <link rel="alternate" type="text/html" href="$altHref"/> + <link rel="self" type="application/atom+xml" href="$href"/> + <updated>$date</updated> + $entries +</feed> +EOF_STR; +} + +function buildAtomEntry(string $title, string $href, string $author, string $date, string $content): string +{ + return <<<EOF_STR +<entry> + <title type="text">$title</title> + <id>$href</id> + <link rel="alternate" type="text/html" href="$href"/> + <author><name>$author</name></author> + <published>$date</published> + <updated>$date</updated> + <content type="html">$content</content> +</entry> +EOF_STR; +} + +function generateSiteMap(array $pages, string $hostname, $output): void +{ + $posts = array_filter($pages, fn ($post) => $post['isPost']); + + /** + * Sort by latest to previous date. + */ + usort($posts, fn ($a, $b) => $b['date'] <=> $a['date']); + + file_put_contents( + $output.DIRECTORY_SEPARATOR.'sitemap.xml', + buildSiteMap( + implode('', array_map(fn ($post) => postToSiteMapUrl($post, $hostname), $posts)) + ) + ); + +} + +function postToSiteMapUrl(array $post, string $hostname): string +{ + return buildSiteMapUrl( + "https://$hostname{$post['url']}", + "{$post['date']}T12:00:00Z" + ); +} + +function buildSiteMap(string $urls): string +{ + return <<<EOF_STR +<?xml version="1.0" encoding="utf-8"?> +<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"> + $urls +</urlset> +EOF_STR; +} + +function buildSiteMapUrl(string $loc, string $lastmod): string +{ + return <<<EOF_STR +<url> + <loc>$loc</loc> + <lastmod>$lastmod</lastmod> + <changefreq>never</changefreq> +</url> +EOF_STR; +} |
