python · nmassman · Dec 30, 2019 · Dec 30, 2019 · Dec 31, 2019
@@ -14,7 +14,7 @@
 # ---------------------
 
 extensions = ['sphinx.ext.coverage', 'sphinx.ext.doctest',
-              'pyspecific', 'c_annotations', 'escape4chm']
+              'pyspecific', 'c_annotations', 'escape4chm', 'unidata_version']
 
 
 doctest_global_setup = '''
@@ -226,3 +226,7 @@
 
 # Relative filename of the reference count data file.
 refcount_file = 'data/refcounts.dat'
+
+# Unicode data version for unidata_version extension.
+# Managed by Tools/unicode/makeunicodedata.py
+UNIDATA_VERSION = "12.1.0"
diff --git a/Doc/library/stdtypes.rst b/Doc/library/stdtypes.rst
@@ -350,7 +350,7 @@ Notes:
    The numeric literals accepted include the digits ``0`` to ``9`` or any
    Unicode equivalent (code points with the ``Nd`` property).
 
-   See http://www.unicode.org/Public/12.1.0/ucd/extracted/DerivedNumericType.txt
+   See http://www.unicode.org/Public/|UNIDATA_VERSION|/ucd/extracted/DerivedNumericType.txt
    for a complete list of code points with the ``Nd`` property.
 
 

diff --git a/Doc/library/unicodedata.rst b/Doc/library/unicodedata.rst
@@ -17,8 +17,8 @@
 
 This module provides access to the Unicode Character Database (UCD) which
 defines character properties for all Unicode characters. The data contained in
-this database is compiled from the `UCD version 12.1.0
-<http://www.unicode.org/Public/12.1.0/ucd>`_.
+this database is compiled from the `UCD version |UNIDATA_VERSION|
+<http://www.unicode.org/Public/|UNIDATA_VERSION|/ucd>`_.
 
 The module uses the same names and symbols as defined by Unicode
 Standard Annex #44, `"Unicode Character Database"
@@ -175,6 +175,6 @@ Examples:
 
 .. rubric:: Footnotes
 
-.. [#] http://www.unicode.org/Public/12.1.0/ucd/NameAliases.txt
+.. [#] http://www.unicode.org/Public/|UNIDATA_VERSION|/ucd/NameAliases.txt
 
-.. [#] http://www.unicode.org/Public/12.1.0/ucd/NamedSequences.txt
+.. [#] http://www.unicode.org/Public/|UNIDATA_VERSION|/ucd/NamedSequences.txt
@@ -316,7 +316,7 @@ The Unicode category codes mentioned above stand for:
 * *Nd* - decimal numbers
 * *Pc* - connector punctuations
 * *Other_ID_Start* - explicit list of characters in `PropList.txt
-  <http://www.unicode.org/Public/12.1.0/ucd/PropList.txt>`_ to support backwards
+  <http://www.unicode.org/Public/|UNIDATA_VERSION|/ucd/PropList.txt>`_ to support backwards
   compatibility
 * *Other_ID_Continue* - likewise
 
@@ -929,4 +929,4 @@ occurrence outside string literals and comments is an unconditional error:
 
 .. rubric:: Footnotes
 
-.. [#] http://www.unicode.org/Public/11.0.0/ucd/NameAliases.txt
+.. [#] http://www.unicode.org/Public/|UNIDATA_VERSION|/ucd/NameAliases.txt
@@ -0,0 +1,18 @@
+"""
+    unidata_version.py
+    ~~~~~~~~~~~~~
+
+    Sphinx extension to handle updating the Unicode data version in docs.
+
+    https://bugs.python.org/issue22593
+"""
+import re
+
+def parse_UNIDATA_VERSION(app, docname, source):
+    parsed = re.sub(r'\|UNIDATA_VERSION\|', app.config.UNIDATA_VERSION, source[0])
+    source[0] = parsed
+
+def setup(app):
+    app.add_config_value('UNIDATA_VERSION', '0.0.0', 'env')
+    app.connect('source-read', parse_UNIDATA_VERSION)
+    return {'version': '1.0', 'parallel_read_safe': True}
diff --git a/Misc/NEWS.d/next/Documentation/2019-12-29-21-52-00.bpo-22593.97jKwb.rst b/Misc/NEWS.d/next/Documentation/2019-12-29-21-52-00.bpo-22593.97jKwb.rst
@@ -0,0 +1,3 @@
+Automated updating of Unicode data version in documentation. Any reference in docs
+to ``|UNIDATA_VERSION|`` will be replaced with the current Unicode data version.
+Contributed by Noah Massman-Hall
diff --git a/Tools/unicode/makeunicodedata.py b/Tools/unicode/makeunicodedata.py
@@ -22,6 +22,7 @@
 # 2008-06-11 gb   add PRINTABLE_MASK for Atsuo Ishimoto's ascii() patch
 # 2011-10-21 ezio add support for name aliases and named sequences
 # 2012-01    benjamin add full case mappings
+# 2019-12-29 nmassman address bpo-22593 to automate doc updates
 #
 # written by Fredrik Lundh (fredrik@pythonware.com)
 #
@@ -30,6 +31,7 @@
 import os
 import sys
 import zipfile
+import re
 
 from functools import partial
 from textwrap import dedent
@@ -40,7 +42,9 @@
 
 # The Unicode Database
 # --------------------
-# When changing UCD version please update
+# References in documentation to '|UNIDATA_VERSION|' will be set
+#   to the version below.
+# Current references:
 #   * Doc/library/stdtypes.rst, and
 #   * Doc/library/unicodedata.rst
 #   * Doc/reference/lexical_analysis.rst (two occurrences)
@@ -128,6 +132,16 @@ def maketables(trace=0):
     makeunicodedata(unicode, trace)
     makeunicodetype(unicode, trace)
 
+    # Write the unicode version for inclusion in documentation.
+    print("--- Writing UNIDATA_VERSION", UNIDATA_VERSION, "to Doc/conf.py ...")
+    with open('Doc/conf.py', 'r') as fin:
+        newconf = re.sub(r'(?<=UNIDATA_VERSION = )".*?"',
+                         f'"{UNIDATA_VERSION}"',
+                         fin.read())
+    with open('Doc/conf.py', 'w') as fout:
+        fout.write(newconf)
+
+
 
 # --------------------------------------------------------------------
 # unicode character properties