__version__ = '0.1.0' import os import sys import shutil import datetime import logging import toml import jinja2 from typing import List from collections import namedtuple from docutils.core import publish_parts from docutils.core import publish_doctree from docutils.writers import html5_polyglot from docutils import nodes from docutils.parsers.rst import directives from pygments import highlight from pygments.lexers import get_lexer_by_name from pygments.formatters import HtmlFormatter # ------------------------------------------------------------- # # Read TOML configuration file. # # ------------------------------------------------------------- # with open('settings.toml', 'r') as file: settings = toml.loads(file.read()) # ------------------------------------------------------------- # # Setup logger. # # ------------------------------------------------------------- # log = logging.getLogger('blog') log.setLevel(logging.INFO) handler = logging.StreamHandler(stream=sys.stdout) handler.setFormatter( logging.Formatter(fmt = '[%(asctime)s: %(levelname)s] %(message)s')) log.addHandler(handler) # ------------------------------------------------------------- # # Parse docinfo from rST files. # # ------------------------------------------------------------- # # Code below is copy-pasted from https://github.com/zeddee/parsedocinfo # and modified a little bit. Zeddee, thank you! # Original license SPDX identifier: Apache-2.0 # -- parsedocinfo BEGIN -- DocInfo = namedtuple("DocInfo", 'name body') def _traverse_fields(field: List) -> DocInfo: field_name = field.getElementsByTagName("field_name")[0] field_body = field.getElementsByTagName("field_body")[0] return DocInfo(field_name.firstChild.nodeValue, " ".join(val.firstChild.nodeValue for val in field_body.childNodes)) def _traverse_docinfo(docinfo_list: List) -> List[DocInfo]: out = [] for i in docinfo_list: for node in i.childNodes: if node.tagName == "field": out.append(_traverse_fields(node)) else: out.append(DocInfo(node.tagName, " ".join(val.nodeValue for val in node.childNodes) ) ) return out def parsedocinfo(data: str) -> dict: docinfo = publish_doctree(data).asdom().getElementsByTagName("docinfo") return dict(_traverse_docinfo(docinfo)) # -- parsedocinfo END -- # ------------------------------------------------------------- # # Extra reStructuredText directives and roles # # ------------------------------------------------------------- # # Pygments reST `code-block` directive. # Source: https://docutils.sourceforge.io/sandbox/code-block-directive/ # `code-block` BEGIN pygments_formatter = HtmlFormatter() def pygments_directive(name, arguments, options, content, lineno, content_offset, block_text, state, state_machine): try: lexer = get_lexer_by_name(arguments[0]) except ValueError: # no lexer found - use the text one instead of an exception lexer = get_lexer_by_name('text') parsed = highlight(u'\n'.join(content), lexer, pygments_formatter) return [nodes.raw('', parsed, format='html')] pygments_directive.arguments = (1, 0, 1) pygments_directive.content = 1 directives.register_directive('code-block', pygments_directive) # `code-block` END # ------------------------------------------------------------- # # Jinja2 specific functions. # # ------------------------------------------------------------- # def render_template(template: str, templates_dir = '.', **kwargs) -> str: """Render Jinja2 template from file. Usage:: render_template('index.j2', templates_dir = './templates', title = 'My title') """ env = jinja2.Environment(loader = jinja2.FileSystemLoader(templates_dir)) return env.get_template(template).render(**kwargs) # ------------------------------------------------------------- # # Render HTML from reStructuredText. # # ------------------------------------------------------------- # def render_html_body(text: str) -> str: """Return HTML body converted from reStructuredText. See: * help(docutils.core.publish_parts) * https://docutils.sourceforge.io/docs/user/config.html """ html = publish_parts(source = text, writer = html5_polyglot.Writer(), settings_overrides = settings['docutils'] ) return html['body'] # ------------------------------------------------------------- # # File operations. # # ------------------------------------------------------------- # def find_rst_files(directory: str) -> list: """Return the list of rST files from directory. Scan subdirectories too. """ file_list = [] for root, dirs, files in os.walk(directory): for name in files: if os.path.splitext(name)[1] == '.rst': file_list.append(os.path.join(root, name)) return file_list def write_to_file(path: str, data: str): with open(path, 'w', encoding = 'utf-8') as file: file.write(data) def copy_files(source_dir: str, destination_dir: str): shutil.copytree(source_dir, destination_dir, ignore = shutil.ignore_patterns('*.rst'), dirs_exist_ok = True) # ------------------------------------------------------------- # # Build site! # # ------------------------------------------------------------- # def validate_date_fmt(date: str): try: dt = datetime.datetime.strptime(date, settings['site']['datetime_format']) except ValueError as err: log.error('Error: Wrong date format: %s' % err) sys.exit(1) def build_site(): build_dir = settings['build']['build_dir'] content_dir = settings['build']['content_dir'] templates_dir = settings['build']['templates_dir'] assets_dir = settings['build']['assets_dir'] os.makedirs(build_dir, exist_ok = True) files = find_rst_files(content_dir) posts_list = [] for rst_file in files: with open(rst_file, 'r', encoding = 'utf-8') as rst: source = rst.read() log.info('parse docinfo: %s' % rst_file) meta = parsedocinfo(source) meta_dict = meta # TODO обработать ошибку при отсутствии :date: в файле # TODO показывать ошибку при отсутствии :title: validate_date_fmt(meta_dict['date']) meta_dict['path'] = os.path.basename(rst_file).replace('.rst', '.html') # Don't append single pages into posts list try: meta_dict['not_a_post'] except KeyError: posts_list.append(meta_dict) # Render HTML files html_file = os.path.basename(rst_file).replace('.rst', '.html') log.info('render html: %s' % html_file) html_body = render_html_body(source) html_page = render_template('post.j2', templates_dir = templates_dir, pygments_theme = settings['pygments']['theme'], site_title = settings['site']['title'], page_title = meta['title'], post = html_body) write_to_file(os.path.join(build_dir, html_file), html_page) # Copy additional files to build_dir log.info('copy assets and files from {}, {} to {}'.format( assets_dir, content_dir, build_dir)) assets_dest_dir = os.path.join(build_dir, os.path.basename(assets_dir)) copy_files(assets_dir, assets_dest_dir) copy_files(content_dir, build_dir) # Sort posts by date (newest in top) posts_list.sort( key=lambda date: datetime.datetime.strptime(date['date'], "%d %b %Y"), reverse = True) # Redner index page log.info('generate index.html') html_page = render_template('index.j2', templates_dir = templates_dir, pygments_theme = settings['pygments']['theme'], site_title = settings['site']['title'], page_title = settings['site']['index_page_title'], posts = posts_list) write_to_file(os.path.join(build_dir, 'index.html'), html_page) log.info('success') if __name__ == '__main__': build_site()