nixhacks.net/rst_blg.py
2022-08-07 13:40:39 +03:00

237 lines
8.3 KiB
Python

__version__ = '0.1.0'
import os
import sys
import shutil
import datetime
import logging
import toml
import jinja2
from typing import List
from collections import namedtuple
from docutils.core import publish_parts
from docutils.core import publish_doctree
from docutils.writers import html5_polyglot
from docutils import nodes
from docutils.parsers.rst import directives
from pygments import highlight
from pygments.lexers import get_lexer_by_name
from pygments.formatters import HtmlFormatter
# ------------------------------------------------------------- #
# Read TOML configuration file. #
# ------------------------------------------------------------- #
with open('settings.toml', 'r') as file:
settings = toml.loads(file.read())
# ------------------------------------------------------------- #
# Setup logger. #
# ------------------------------------------------------------- #
log = logging.getLogger('blog')
log.setLevel(logging.INFO)
handler = logging.StreamHandler(stream=sys.stdout)
handler.setFormatter(
logging.Formatter(fmt = '[%(asctime)s: %(levelname)s] %(message)s'))
log.addHandler(handler)
# ------------------------------------------------------------- #
# Parse docinfo from rST files. #
# ------------------------------------------------------------- #
# Code below is copy-pasted from https://github.com/zeddee/parsedocinfo
# and modified a little bit. Zeddee, thank you!
# Original license SPDX identifier: Apache-2.0
# -- parsedocinfo BEGIN --
DocInfo = namedtuple("DocInfo", 'name body')
def _traverse_fields(field: List) -> DocInfo:
field_name = field.getElementsByTagName("field_name")[0]
field_body = field.getElementsByTagName("field_body")[0]
return DocInfo(field_name.firstChild.nodeValue,
" ".join(val.firstChild.nodeValue for val in field_body.childNodes))
def _traverse_docinfo(docinfo_list: List) -> List[DocInfo]:
out = []
for i in docinfo_list:
for node in i.childNodes:
if node.tagName == "field":
out.append(_traverse_fields(node))
else:
out.append(DocInfo(node.tagName,
" ".join(val.nodeValue for val in node.childNodes)
)
)
return out
def parsedocinfo(data: str) -> dict:
docinfo = publish_doctree(data).asdom().getElementsByTagName("docinfo")
return dict(_traverse_docinfo(docinfo))
# -- parsedocinfo END --
# ------------------------------------------------------------- #
# Extra reStructuredText directives and roles #
# ------------------------------------------------------------- #
# Pygments reST `code-block` directive.
# Source: https://docutils.sourceforge.io/sandbox/code-block-directive/
# `code-block` BEGIN
pygments_formatter = HtmlFormatter()
def pygments_directive(name, arguments, options, content, lineno,
content_offset, block_text, state, state_machine):
try:
lexer = get_lexer_by_name(arguments[0])
except ValueError:
# no lexer found - use the text one instead of an exception
lexer = get_lexer_by_name('text')
parsed = highlight(u'\n'.join(content), lexer, pygments_formatter)
return [nodes.raw('', parsed, format='html')]
pygments_directive.arguments = (1, 0, 1)
pygments_directive.content = 1
directives.register_directive('code-block', pygments_directive)
# `code-block` END
# ------------------------------------------------------------- #
# Jinja2 specific functions. #
# ------------------------------------------------------------- #
def render_template(template: str, templates_dir = '.', **kwargs) -> str:
"""Render Jinja2 template from file. Usage::
render_template('index.j2',
templates_dir = './templates',
title = 'My title')
"""
env = jinja2.Environment(loader = jinja2.FileSystemLoader(templates_dir))
return env.get_template(template).render(**kwargs)
# ------------------------------------------------------------- #
# Render HTML from reStructuredText. #
# ------------------------------------------------------------- #
def render_html_body(text: str) -> str:
"""Return HTML body converted from reStructuredText.
See:
* help(docutils.core.publish_parts)
* https://docutils.sourceforge.io/docs/user/config.html
"""
html = publish_parts(source = text, writer = html5_polyglot.Writer(),
settings_overrides = settings['docutils']
)
return html['body']
# ------------------------------------------------------------- #
# File operations. #
# ------------------------------------------------------------- #
def find_rst_files(directory: str) -> list:
"""Return the list of rST files from directory.
Scan subdirectories too.
"""
file_list = []
for root, dirs, files in os.walk(directory):
for name in files:
if os.path.splitext(name)[1] == '.rst':
file_list.append(os.path.join(root, name))
return file_list
def write_to_file(path: str, data: str):
with open(path, 'w', encoding = 'utf-8') as file:
file.write(data)
def copy_files(source_dir: str, destination_dir: str):
shutil.copytree(source_dir, destination_dir,
ignore = shutil.ignore_patterns('*.rst'),
dirs_exist_ok = True)
# ------------------------------------------------------------- #
# Build site! #
# ------------------------------------------------------------- #
def validate_date_fmt(date: str):
try:
dt = datetime.datetime.strptime(date,
settings['site']['datetime_format'])
except ValueError as err:
log.error('Error: Wrong date format: %s' % err)
sys.exit(1)
def build_site():
build_dir = settings['build']['build_dir']
content_dir = settings['build']['content_dir']
templates_dir = settings['build']['templates_dir']
assets_dir = settings['build']['assets_dir']
os.makedirs(build_dir, exist_ok = True)
files = find_rst_files(content_dir)
posts_list = []
for rst_file in files:
with open(rst_file, 'r', encoding = 'utf-8') as rst:
source = rst.read()
log.info('parse docinfo: %s' % rst_file)
meta = parsedocinfo(source)
meta_dict = meta
# TODO обработать ошибку при отсутствии :date: в файле
# TODO показывать ошибку при отсутствии :title:
validate_date_fmt(meta_dict['date'])
meta_dict['path'] = os.path.basename(rst_file).replace('.rst', '.html')
# Don't append single pages into posts list
try:
meta_dict['not_a_post']
except KeyError:
posts_list.append(meta_dict)
# Render HTML files
html_file = os.path.basename(rst_file).replace('.rst', '.html')
log.info('render html: %s' % html_file)
html_body = render_html_body(source)
html_page = render_template('post.j2',
templates_dir = templates_dir,
pygments_theme = settings['pygments']['theme'],
site_title = settings['site']['title'],
page_title = meta['title'],
post = html_body)
write_to_file(os.path.join(build_dir, html_file), html_page)
# Copy additional files to build_dir
log.info('copy assets and files from {}, {} to {}'.format(
assets_dir, content_dir, build_dir))
assets_dest_dir = os.path.join(build_dir,
os.path.basename(assets_dir))
copy_files(assets_dir, assets_dest_dir)
copy_files(content_dir, build_dir)
# Sort posts by date (newest in top)
posts_list.sort(
key=lambda date: datetime.datetime.strptime(date['date'], "%d %b %Y"),
reverse = True)
# Redner index page
log.info('generate index.html')
html_page = render_template('index.j2',
templates_dir = templates_dir,
pygments_theme = settings['pygments']['theme'],
site_title = settings['site']['title'],
page_title = settings['site']['index_page_title'],
posts = posts_list)
write_to_file(os.path.join(build_dir, 'index.html'), html_page)
log.info('success')
if __name__ == '__main__':
build_site()