From a616477a70fd0837a2bedee5e863c1ae3db7d2f4 Mon Sep 17 00:00:00 2001 From: "David T. Sadler" Date: Thu, 27 May 2021 20:55:30 +0100 Subject: Tidy up scripts --- scripts/php/build_www_site.php | 46 ++++++ scripts/php/functions.php | 363 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 409 insertions(+) create mode 100644 scripts/php/build_www_site.php create mode 100644 scripts/php/functions.php (limited to 'scripts/php') diff --git a/scripts/php/build_www_site.php b/scripts/php/build_www_site.php new file mode 100644 index 0000000..2defb92 --- /dev/null +++ b/scripts/php/build_www_site.php @@ -0,0 +1,46 @@ +getRealPath())) { + continue; + } + $pages[] = getPageMetaData($fileInfo, $src, $output); + } + + return $pages; +} + +function getPageMetaData(\SplFileInfo $fileInfo, string $src, string $output): array +{ + $input = $fileInfo->getRealPath(); + + $pathData = parsePath($input); + + $url = str_replace([$src, '.gmi'], ['', '.html'], $input); + + $contentData = parseContent(file_get_contents($input)); + + return [ + 'input' => $input, + 'url' => $url, + 'date' => $pathData['date'], + 'tag' => $pathData['tag'], + 'isPost' => $pathData['isPost'], + 'isTag' => $pathData['isTag'], + 'title' => $contentData['title'], + 'author' => $contentData['author'], + 'html' => gemtext2hmtl(file_get_contents($input)), + 'output' => "$output$url", + ]; +} + +function parsePath(string $path): array +{ + $date = null; + $tag = null; + $isPost = false; + $isTag = false; + + /** + * Assume that only posts have both a date and a tag. + */ + if (preg_match('/\/posts\/(.+)\/(\d\d\d\d-\d\d-\d\d)\//', $path, $matches) === 1) { + [, $tag, $date] = $matches; + + $isPost = true; + } + + /** + * Assume tags have an index file that contains related posts. + */ + if (preg_match('/posts\/(?:[^\/]|\/\/)+\/index/', $path, $matches) === 1) { + $tag = explode('/', $matches[0])[1]; + $isTag = true; + } + + return [ + 'date' => $date, + 'tag' => $tag, + 'isPost' => $isPost, + 'isTag' => $isTag, + ]; +} + +function parseContent(string $content): array +{ + $title = null; + $author = null; + + if (preg_match('/^# (.+)$/m', $content, $matches) === 1) { + $title = $matches[1]; + } + + if (preg_match('/^> .+ By (.+)$/m', $content, $matches) === 1) { + $author = $matches[1]; + } + + return [ + 'title' => $title, + 'author' => $author, + ]; +} + +function buildWWWSite(array $pages, string $hostname, string $htmlTemplateDiretory, string $output): void +{ + foreach ($pages as $page) { + $destDirectory = dirname($page['output']); + + if (!file_exists($destDirectory) && !mkdir($destDirectory, 0777, true)) { + echo "Unable to create WWW site directory $destDirectory."; + exit(1); + } + + file_put_contents( + $page['output'], + buildHtmlFile( + $page['title'], + $page['html'], + file_get_contents($htmlTemplateDiretory.DIRECTORY_SEPARATOR.'default.html') + ) + ); + } + + generateAtomFeeds($pages, $hostname); + + generateSiteMap($pages, $hostname, $output); +} + +function buildHtmlFile(string $title, string $contents, string $template): string +{ + return str_replace( + [ + '{{ $title }}', + '{{ $contents }}' + ], + [ + $title, + $contents, + ], + $template + ); +} + +function gemtext2hmtl(string $gemtext): string +{ + $html = []; + + $lines = preg_split('/\r?\n/', htmlspecialchars($gemtext)); + + $line = fn ($index) => $lines[$index]; + + $index = 0; + + $numLines = count($lines); + + while($index < $numLines) { + if (preg_match(HEADER, $line($index), $matches) === 1) { + [, $levels, $content] = $matches; + + $level = strlen($levels); + + $html[] = "$content"; + } elseif (preg_match(LINK, $line($index), $matches) === 1) { + [, $href, $content] = $matches; + + $content = $content ?: $href; + + $html[] = "$content"; + } elseif (preg_match(LIST_ITEM, $line($index), $matches) === 1) { + $items = []; + + while ($index < $numLines) { + if (preg_match(LIST_ITEM, $line($index), $matches) === 0) { + break; + } + + $items[] = "
  • $matches[1]
  • "; + + $index++; + } + + $index--; + + $html[] = sprintf("", implode('', $items)); + } elseif (preg_match(PRE, $line($index), $matches) === 1) { + [, $alt] = $matches; + + $items = []; + + $index++; + + while ($index < $numLines) { + $item = $line($index); + + if (preg_match(PRE, $item, $matches) === 1) { + break; + } + + $items[] = $item; + + $index++; + } + + $items = implode("\n", $items); + + $html[] = $alt ? "
    $items
    " : "
    $items
    "; + } elseif (preg_match(QUOTE, $line($index), $matches) === 1) { + $html[] = "
    $matches[1]
    "; + } else { + if ($line($index) !== '') { + $html[] = "

    {$line($index)}

    "; + } + } + + $index++; + } + + return implode('', $html); +} + +function generateAtomFeeds(array $pages, string $hostname): void +{ + $posts = array_filter($pages, fn ($post) => $post['isPost']); + + $tags = array_filter($pages, fn ($post) => $post['isTag']); + + /** + * Sort by latest to previous date. + */ + usort($posts, fn ($a, $b) => $b['date'] <=> $a['date']); + + /** + * Group posts by tag. + */ + $groupedPosts = array_reduce($posts, function ($carry, $post) { + $carry[$post['tag']][] = $post; + + return $carry; + }, []); + + /** + * Put posts with their tag page. + */ + $tags = array_map(function ($tag) use ($groupedPosts) { + return array_merge($tag, ['posts' => $groupedPosts[$tag['tag']]]); + }, $tags); + + foreach ($tags as $tag) { + tagToAtomFeed($tag, $hostname); + } + + /** + * Get the page that lists all posts. + */ + $allPostsIndex = array_values(array_filter($pages, fn ($post) => preg_match('/posts\/index/', $post['url']) == 1))[0]; + + $allPostsIndex['posts'] = $posts; + + tagToAtomFeed($allPostsIndex, $hostname); +} + +function tagToAtomFeed(array $tag, string $hostname): void +{ + file_put_contents( + str_replace('index.html', 'atom.xml', $tag['output']), + buildAtomFeed( + $tag['title'], + "https://$hostname".str_replace('index.html', 'atom.xml', $tag['url']), + "https://$hostname".$tag['url'], + $tag['posts'][0]['date'].'T12:00:00Z', + implode('', array_map(fn ($post) => postToAtomEntry($post, $hostname), $tag['posts'])) + ) + ); +} + +function postToAtomEntry(array $post, string $hostname): string +{ + return buildAtomEntry( + $post['title'], + "https://$hostname{$post['url']}", + $post['author'], + "{$post['date']}T12:00:00Z", + htmlspecialchars($post['html']), + ); +} + +function buildAtomFeed(string $title, string $href, string $altHref, string $date, string $entries): string +{ + return << + + $title + $href + + + $date + $entries + +EOF_STR; +} + +function buildAtomEntry(string $title, string $href, string $author, string $date, string $content): string +{ + return << + $title + $href + + $author + $date + $date + $content + +EOF_STR; +} + +function generateSiteMap(array $pages, string $hostname, $output): void +{ + $posts = array_filter($pages, fn ($post) => $post['isPost']); + + /** + * Sort by latest to previous date. + */ + usort($posts, fn ($a, $b) => $b['date'] <=> $a['date']); + + file_put_contents( + $output.DIRECTORY_SEPARATOR.'sitemap.xml', + buildSiteMap( + implode('', array_map(fn ($post) => postToSiteMapUrl($post, $hostname), $posts)) + ) + ); + +} + +function postToSiteMapUrl(array $post, string $hostname): string +{ + return buildSiteMapUrl( + "https://$hostname{$post['url']}", + "{$post['date']}T12:00:00Z" + ); +} + +function buildSiteMap(string $urls): string +{ + return << + + $urls + +EOF_STR; +} + +function buildSiteMapUrl(string $loc, string $lastmod): string +{ + return << + $loc + $lastmod + never + +EOF_STR; +} -- cgit v1.2.3-13-gbd6f