# $Id: __init__.py 9272 2022-11-24 20:28:40Z milde $
# :Author: Günter Milde <milde@users.sf.net>
# Based on the html4css1 writer by David Goodger.
# :Maintainer: docutils-develop@lists.sourceforge.net
# :Copyright: © 2005, 2009, 2015 Günter Milde,
# portions from html4css1 © David Goodger.
# :License: Released under the terms of the `2-Clause BSD license`_, in short:
#
# Copying and distribution of this file, with or without modification,
# are permitted in any medium without royalty provided the copyright
# notice and this notice are preserved.
# This file is offered as-is, without any warranty.
#
# .. _2-Clause BSD license: https://opensource.org/licenses/BSD-2-Clause
# Use "best practice" as recommended by the W3C:
# http://www.w3.org/2009/cheatsheet/
"""
Plain HyperText Markup Language document tree Writer.
The output conforms to the `HTML 5` specification.
The cascading style sheet "minimal.css" is required for proper viewing,
the style sheet "plain.css" improves reading experience.
"""
__docformat__ = 'reStructuredText'
import mimetypes
from pathlib import Path
from docutils import frontend, nodes
from docutils.writers import _html_base
class Writer(_html_base.Writer):
supported = ('html5', 'xhtml', 'html')
"""Formats this writer supports."""
default_stylesheets = ['minimal.css', 'plain.css']
default_stylesheet_dirs = ['.', str(Path(__file__).parent)]
default_template = Path(__file__).parent / 'template.txt'
# use a copy of the parent spec with some modifications
settings_spec = frontend.filter_settings_spec(
_html_base.Writer.settings_spec,
template=(
f'Template file. (UTF-8 encoded, default: "{default_template}")',
['--template'],
{'default': default_template, 'metavar': '<file>'}),
stylesheet_path=(
'Comma separated list of stylesheet paths. '
'Relative paths are expanded if a matching file is found in '
'the --stylesheet-dirs. With --link-stylesheet, '
'the path is rewritten relative to the output HTML file. '
'(default: "%s")' % ','.join(default_stylesheets),
['--stylesheet-path'],
{'metavar': '<file[,file,...]>', 'overrides': 'stylesheet',
'validator': frontend.validate_comma_separated_list,
'default': default_stylesheets}),
stylesheet_dirs=(
'Comma-separated list of directories where stylesheets are found. '
'Used by --stylesheet-path when expanding relative path '
'arguments. (default: "%s")' % ','.join(default_stylesheet_dirs),
['--stylesheet-dirs'],
{'metavar': '<dir[,dir,...]>',
'validator': frontend.validate_comma_separated_list,
'default': default_stylesheet_dirs}),
initial_header_level=(
'Specify the initial header level. Does not affect document '
'title & subtitle (see --no-doc-title). (default: 2 for "<h2>")',
['--initial-header-level'],
{'choices': '1 2 3 4 5 6'.split(), 'default': '2',
'metavar': '<level>'}),
no_xml_declaration=(
'Omit the XML declaration.',
['--no-xml-declaration'],
{'dest': 'xml_declaration', 'action': 'store_false'}),
)
settings_spec = settings_spec + (
'HTML5 Writer Options',
'',
((frontend.SUPPRESS_HELP, # Obsoleted by "--image-loading"
['--embed-images'],
{'action': 'store_true',
'validator': frontend.validate_boolean}),
(frontend.SUPPRESS_HELP, # Obsoleted by "--image-loading"
['--link-images'],
{'dest': 'embed_images', 'action': 'store_false'}),
('Suggest at which point images should be loaded: '
'"embed", "link" (default), or "lazy".',
['--image-loading'],
{'choices': ('embed', 'link', 'lazy'),
# 'default': 'link' # default set in _html_base.py
}),
('Append a self-link to section headings.',
['--section-self-link'],
{'default': 0, 'action': 'store_true'}),
('Do not append a self-link to section headings. (default)',
['--no-section-self-link'],
{'dest': 'section_self_link', 'action': 'store_false'}),
)
)
config_section = 'html5 writer'
def __init__(self):
self.parts = {}
self.translator_class = HTMLTranslator
class HTMLTranslator(_html_base.HTMLTranslator):
"""
This writer generates `polyglot markup`: HTML5 that is also valid XML.
Safe subclassing: when overriding, treat ``visit_*`` and ``depart_*``
methods as a unit to prevent breaks due to internal changes. See the
docstring of docutils.writers._html_base.HTMLTranslator for details
and examples.
"""
# self.starttag() arguments for the main document
documenttag_args = {'tagname': 'main'}
# add meta tag to fix rendering in mobile browsers
def __init__(self, document):
super().__init__(document)
self.meta.append('<meta name="viewport" '
'content="width=device-width, initial-scale=1" />\n')
# <acronym> tag obsolete in HTML5. Use the <abbr> tag instead.
def visit_acronym(self, node):
# @@@ implementation incomplete ("title" attribute)
self.body.append(self.starttag(node, 'abbr', ''))
def depart_acronym(self, node):
self.body.append('</abbr>')
# no standard meta tag name in HTML5, use separate "author" meta tags
# https://www.w3.org/TR/html5/document-metadata.html#standard-metadata-names
def visit_authors(self, node):
self.visit_docinfo_item(node, 'authors', meta=False)
for subnode in node:
self.meta.append('<meta name="author" content='
f'"{self.attval(subnode.astext())}" />\n')
def depart_authors(self, node):
self.depart_docinfo_item()
# use the <figcaption> semantic tag.
def visit_caption(self, node):
if isinstance(node.parent, nodes.figure):
self.body.append('<figcaption>\n')
self.body.append(self.starttag(node, 'p', ''))
def depart_caption(self, node):
self.body.append('</p>\n')
# <figcaption> is closed in depart_figure(), as legend may follow.
# use HTML block-level tags if matching class value found
supported_block_tags = {'ins', 'del'}
def visit_container(self, node):
# If there is exactly one of the "supported block tags" in
# the list of class values, use it as tag name:
classes = node['classes']
tags = [cls for cls in classes
if cls in self.supported_block_tags]
if len(tags) == 1:
node.html5tagname = tags[0]
classes.remove(tags[0])
else:
node.html5tagname = 'div'
self.body.append(self.starttag(node, node.html5tagname,
CLASS='docutils container'))
def depart_container(self, node):
self.body.append(f'</{node.html5tagname}>\n')
# no standard meta tag name in HTML5, use dcterms.rights
# see https://wiki.whatwg.org/wiki/MetaExtensions
def visit_copyright(self, node):
self.visit_docinfo_item(node, 'copyright', meta=False)
self.meta.append('<meta name="dcterms.rights" '
f'content="{self.attval(node.astext())}" />\n')
def depart_copyright(self, node):
self.depart_docinfo_item()
# no standard meta tag name in HTML5, use dcterms.date
def visit_date(self, node):
self.visit_docinfo_item(node, 'date', meta=False)
self.meta.append('<meta name="dcterms.date" '
f'content="{self.attval(node.astext())}" />\n')
def depart_date(self, node):
self.depart_docinfo_item()
# use new HTML5 <figure> and <figcaption> elements
def visit_figure(self, node):
atts = {}
if node.get('width'):
atts['style'] = f"width: {node['width']}"
if node.get('align'):
atts['class'] = f"align-{node['align']}"
self.body.append(self.starttag(node, 'figure', **atts))
def depart_figure(self, node):
if len(node) > 1:
self.body.append('</figcaption>\n')
self.body.append('</figure>\n')
# use HTML5 <footer> element
def visit_footer(self, node):
self.context.append(len(self.body))
def depart_footer(self, node):
start = self.context.pop()
footer = [self.starttag(node, 'footer')]
footer.extend(self.body[start:])
footer.append('</footer>\n')
self.footer.extend(footer)
self.body_suffix[:0] = footer
del self.body[start:]
# use HTML5 <header> element
def visit_header(self, node):
self.context.append(len(self.body))
def depart_header(self, node):
start = self.context.pop()
header = [self.starttag(node, 'header')]
header.extend(self.body[start:])
header.append('</header>\n')
self.body_prefix.extend(header)
self.header.extend(header)
del self.body[start:]
# MIME types supported by the HTML5 <video> element
videotypes = ('video/mp4', 'video/webm', 'video/ogg')
def visit_image(self, node):
atts = {}
uri = node['uri']
mimetype = mimetypes.guess_type(uri)[0]
if mimetype not in self.videotypes:
return super().visit_image(node)
# image size
if 'width' in node:
atts['width'] = node['width'].replace('px', '')
if 'height' in node:
atts['height'] = node['height'].replace('px', '')
if 'align' in node:
atts['class'] = f"align-{node['align']}"
if 'controls' in node['classes']:
atts['controls'] = 'controls'
node['classes'].remove('controls')
atts['title'] = node.get('alt', uri)
if getattr(self.settings, 'image_loading', None) == 'lazy':
atts['loading'] = 'lazy'
# No newline in inline context or if surrounded by <a>...</a>.
if (isinstance(node.parent, nodes.TextElement)
or (isinstance(node.parent, nodes.reference)
and not isinstance(node.parent.parent, nodes.TextElement))):
suffix = ''
else:
suffix = '\n'
fallback = node.get('alt', uri)
self.body.append(
self.starttag(node, "video", suffix, src=uri, **atts)
+ f'<a href="{uri}">{fallback}</a>{suffix}</video>{suffix}')
def depart_image(self, node):
pass
# use HTML text-level tags if matching class value found
supported_inline_tags = {'code', 'kbd', 'dfn', 'samp', 'var',
'bdi', 'del', 'ins', 'mark', 'small',
'b', 'i', 'q', 's', 'u'}
# Use `supported_inline_tags` if found in class values
def visit_inline(self, node):
classes = node['classes']
tags = [cls for cls in self.supported_inline_tags
if cls in classes]
if len(tags):
node.html5tagname = tags[0]
classes.remove(tags[0])
elif (classes == ['ln']
and isinstance(node.parent, nodes.literal_block)
and 'code' in node.parent.get('classes')):
if self.body[-1] == '<code>':
del self.body[-1]
else:
self.body.append('</code>')
node.html5tagname = 'small'
else:
node.html5tagname = 'span'
self.body.append(self.starttag(node, node.html5tagname, ''))
def depart_inline(self, node):
self.body.append(f'</{node.html5tagname}>')
if (node.html5tagname == 'small' and node.get('classes') == ['ln']
and isinstance(node.parent, nodes.literal_block)):
self.body.append(f'<code data-lineno="{node.astext()}">')
del node.html5tagname
# place inside HTML5 <figcaption> element (together with caption)
def visit_legend(self, node):
if not isinstance(node.parent[1], nodes.caption):
self.body.append('<figcaption>\n')
self.body.append(self.starttag(node, 'div', CLASS='legend'))
def depart_legend(self, node):
self.body.append('</div>\n')
# <figcaption> closed in visit_figure()
# use HTML text-level tags if matching class value found
def visit_literal(self, node):
classes = node['classes']
tags = [cls for cls in self.supported_inline_tags
if cls in classes]
if len(tags):
tagname = tags[0]
classes.remove(tags[0])
else:
tagname = 'span'
if tagname == 'code':
self.body.append(self.starttag(node, 'code', ''))
return
self.body.append(
self.starttag(node, tagname, '', CLASS='docutils literal'))
text = node.astext()
# remove hard line breaks (except if in a parsed-literal block)
if not isinstance(node.parent, nodes.literal_block):
text = text.replace('\n', ' ')
# Protect text like ``--an-option`` and the regular expression
# ``[+]?(\d+(\.\d*)?|\.\d+)`` from bad line wrapping
for token in self.words_and_spaces.findall(text):
if token.strip() and self.in_word_wrap_point.search(token):
self.body.append(
f'<span class="pre">{self.encode(token)}</span>')
else:
self.body.append(self.encode(token))
self.body.append(f'</{tagname}>')
# Content already processed:
raise nodes.SkipNode
def depart_literal(self, node):
# skipped unless literal element is from "code" role:
self.body.append('</code>')
# Meta tags: 'lang' attribute replaced by 'xml:lang' in XHTML 1.1
# HTML5/polyglot recommends using both
def visit_meta(self, node):
if node.hasattr('lang'):
node['xml:lang'] = node['lang']
self.meta.append(self.emptytag(node, 'meta',
**node.non_default_attributes()))
def depart_meta(self, node):
pass
# no standard meta tag name in HTML5
def visit_organization(self, node):
self.visit_docinfo_item(node, 'organization', meta=False)
def depart_organization(self, node):
self.depart_docinfo_item()
# use the new HTML5 element <section>
def visit_section(self, node):
self.section_level += 1
self.body.append(
self.starttag(node, 'section'))
def depart_section(self, node):
self.section_level -= 1
self.body.append('</section>\n')
# use the new HTML5 element <aside>
def visit_sidebar(self, node):
self.body.append(
self.starttag(node, 'aside', CLASS='sidebar'))
self.in_sidebar = True
def depart_sidebar(self, node):
self.body.append('</aside>\n')
self.in_sidebar = False
# Use new HTML5 element <aside> or <nav>
# Add class value to <body>, if there is a ToC in the document
# (see responsive.css how this is used for a navigation sidebar).
def visit_topic(self, node):
atts = {'classes': ['topic']}
if 'contents' in node['classes']:
node.html_tagname = 'nav'
del atts['classes']
if isinstance(node.parent, nodes.document):
atts['role'] = 'doc-toc'
self.body_prefix[0] = '</head>\n<body class="with-toc">\n'
elif 'abstract' in node['classes']:
node.html_tagname = 'div'
atts['role'] = 'doc-abstract'
elif 'dedication' in node['classes']:
node.html_tagname = 'div'
atts['role'] = 'doc-dedication'
else:
node.html_tagname = 'aside'
self.body.append(self.starttag(node, node.html_tagname, **atts))
def depart_topic(self, node):
self.body.append(f'</{node.html_tagname}>\n')
del node.html_tagname
# append self-link
def section_title_tags(self, node):
start_tag, close_tag = super().section_title_tags(node)
ids = node.parent['ids']
if (ids and getattr(self.settings, 'section_self_link', None)
and not isinstance(node.parent, nodes.document)):
self_link = ('<a class="self-link" title="link to this section"'
f' href="#{ids[0]}"></a>')
close_tag = close_tag.replace('</h', self_link + '</h')
return start_tag, close_tag