Browse Source

[2853] Add the doxygen2pydoc.py tool

JINMEI Tatuya 12 years ago
parent
commit
faffa474e2

+ 2 - 0
configure.ac

@@ -1408,6 +1408,7 @@ AC_OUTPUT([doc/version.ent
            src/lib/log/tests/logger_lock_test.sh
            src/lib/log/tests/severity_test.sh
            src/lib/log/tests/tempdir.h
+           src/lib/util/python/doxygen2pydoc.py
            src/lib/util/python/mkpywrapper.py
            src/lib/util/python/gen_wiredata.py
            src/lib/server_common/tests/data_path.h
@@ -1439,6 +1440,7 @@ AC_OUTPUT([doc/version.ent
            chmod +x src/lib/log/tests/local_file_test.sh
            chmod +x src/lib/log/tests/logger_lock_test.sh
            chmod +x src/lib/log/tests/severity_test.sh
+           chmod +x src/lib/util/python/doxygen2pydoc.py
            chmod +x src/lib/util/python/mkpywrapper.py
            chmod +x src/lib/util/python/gen_wiredata.py
            chmod +x src/lib/python/isc/log/tests/log_console.py

+ 1 - 0
src/lib/util/python/.gitignore

@@ -1,2 +1,3 @@
+/doxygen2pydoc.py
 /gen_wiredata.py
 /mkpywrapper.py

+ 1 - 1
src/lib/util/python/Makefile.am

@@ -1,3 +1,3 @@
-noinst_SCRIPTS = gen_wiredata.py mkpywrapper.py const2hdr.py \
+noinst_SCRIPTS = doxygen2pydoc.py gen_wiredata.py mkpywrapper.py const2hdr.py \
 	pythonize_constants.py
 EXTRA_DIST = const2hdr.py pythonize_constants.py

+ 554 - 0
src/lib/util/python/doxygen2pydoc.py.in

@@ -0,0 +1,554 @@
+#!@PYTHON@
+
+# Copyright (C) 2011  Internet Systems Consortium.
+#
+# Permission to use, copy, modify, and distribute this software for any
+# purpose with or without fee is hereby granted, provided that the above
+# copyright notice and this permission notice appear in all copies.
+#
+# THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SYSTEMS CONSORTIUM
+# DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
+# INTERNET SYSTEMS CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT,
+# INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
+# FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
+# NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
+# WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+r'''
+How to use it
+
+Use the "xmlonly" doxygen command to generate a special XML tag in the
+XML output.  The block enclosed by \xmlonly and \endxmlonly should contain
+a verbatim XML tag named "pythonlisting", in which the python code should
+be placed.
+/// \code
+///    Name name("example.com");
+///    std::cout << name.toText() << std::endl;
+/// \endcode
+///
+/// \xmlonly <pythonlisting>
+///    name = Name("example.com")
+///    print(name.to_text())
+/// </pythonlisting> \endxmlonly
+
+Note that there must be a blank line between \endcode and \xmlonly.
+doxygen2pydoc assume the pythonlisting tag is in a separate <para> node.
+This blank ensures doxygen will produce the XML file that meets the
+assumption.
+
+generate XML output by doxygen.  Use bind10/doc/Doxyfile-xml:
+% cd bind10/doc
+% doxygen Doxyfile-xml
+(XML files will be generated under bind10/doc/html/xml)
+
+This simplified utility assumes the following structure:
+...
+  <compounddef ...>
+    <compoundname>isc::dns::TSIGError</compoundname>
+    <sectiondef kind="user-defined">
+      constructor, destructor
+    </sectiondef>
+    <sectiondef kind="public-type">
+      ..
+    </sectiondef>
+    <sectiondef kind="public-func">
+      <memberdef kind="function"...>
+        <type>return type (if any)</type>
+        <argsstring>(...) [const]</argsstring>
+        <name>method name</name>
+        <briefdescription>method's brief description</briefdescription>
+        <detaileddescription>
+          <para>...</para>...
+          <para>
+            <parameterlist kind="exception">
+              <parameteritem>
+                <parameternamelist>
+                  <parametername>Exception name</parametername>
+                </parameternamelist>
+                <parameterdescription>
+                  <para>exception desc</para>
+                </parameterdescription>
+              </parameteritem>
+            </parameterlist>
+            <parameterlist kind="param">
+              <parameteritem>
+                <parameternamelist>
+                  <parametername>param name</parametername>
+                </parameternamelist>
+                <parameterdescription>
+                  <para>param desc</para>
+                </parameterdescription>
+              </parameteritem>
+              ...
+            </parameterlist>
+            <simplesect kind="return">Return value</simplesect>
+          </para>
+        </detaileddescription>
+      </memberdef>
+    </sectiondef>
+    <sectiondef kind="public-static-attrib|user-defined">
+      <memberdef kind="variable"...>
+        <name>class-specific-constant</name>
+        <initializer>value</initializer>
+        <brief|detaileddescription>paragraph(s)</brief|detaileddescription>
+    </sectiondef>
+    <briefdescription>
+      class's brief description
+    </briefdescription>
+    <detaileddescription>
+      class's detailed description
+    </detaileddescription>
+  </compounddef>
+'''
+
+import re, string, sys, textwrap
+from xml.dom.minidom import parse
+from textwrap import fill, dedent, TextWrapper
+
+camel_replacements = {}
+member_functions = []
+constructors = []
+class_variables = []
+
+RE_CAMELTERM = re.compile('([\s\.]|^)[a-z]+[A-Z]\S*')
+RE_SIMPLECAMEL = re.compile("([a-z])([A-Z])")
+RE_CAMELAFTERUPPER = re.compile("([A-Z])([A-Z])([a-z])")
+
+class Paragraph:
+    TEXT = 0
+    ITEMIZEDLIST = 1
+    CPPLISTING = 2
+    PYLISTING = 3
+    VERBATIM = 4
+
+    def __init__(self, xml_node):
+        if len(xml_node.getElementsByTagName("pythonlisting")) > 0:
+            self.type = self.PYLISTING
+            self.text = re.sub("///", "", get_text(xml_node))
+        elif len(xml_node.getElementsByTagName("verbatim")) > 0:
+            self.type = self.VERBATIM
+            self.text = get_text(xml_node)
+        elif len(xml_node.getElementsByTagName("programlisting")) > 0:
+            # We ignore node containing a "programlisting" tag.
+            # They are C++ example code, and we are not interested in them
+            # in pydoc.
+            self.type = self.CPPLISTING
+        elif len(xml_node.getElementsByTagName("itemizedlist")) > 0:
+            self.type = self.ITEMIZEDLIST
+            self.items = []
+            for item in xml_node.getElementsByTagName("listitem"):
+                self.items.append(get_text(item))
+        else:
+            self.type = self.TEXT
+
+            # A single textual paragraph could have multiple simple sections
+            # if it contains notes.
+
+            self.texts = []
+            subnodes = []
+            for child in xml_node.childNodes:
+                if child.nodeType == child.ELEMENT_NODE and \
+                        child.nodeName == 'simplesect' and \
+                        child.getAttribute('kind') == 'note':
+                    if len(subnodes) > 0:
+                        self.texts.append(get_text_fromnodelist(subnodes))
+                        subnodes = []
+                    subtext = 'Note: '
+                    for t in child.childNodes:
+                        subtext += get_text(t)
+                    self.texts.append(subtext)
+                else:
+                    subnodes.append(child)
+            if len(subnodes) > 0:
+                self.texts.append(get_text_fromnodelist(subnodes))
+
+    def dump(self, f, wrapper):
+        if self.type == self.CPPLISTING:
+            return
+        elif self.type == self.ITEMIZEDLIST:
+            for item in self.items:
+                item_wrapper = TextWrapper(\
+                    initial_indent=wrapper.initial_indent + "- ",
+                    subsequent_indent=wrapper.subsequent_indent + "  ")
+                dump_filled_text(f, item_wrapper, item)
+                f.write("\\n\\\n")
+        elif self.type == self.TEXT:
+            for text in self.texts:
+                if text != self.texts[0]:
+                    f.write("\\n\\\n")
+                dump_filled_text(f, wrapper, text)
+                f.write("\\n\\\n")
+        else:
+            dump_filled_text(f, None, self.text)
+            f.write("\\n\\\n")
+        f.write("\\n\\\n")
+
+class NamedItem:
+    def __init__(self, name, desc):
+        self.name = name
+        self.desc = desc
+
+    def dump(self, f, wrapper):
+        # we use deeper indent inside the item list.
+        new_initial_indent = wrapper.initial_indent + " " * 2
+        new_subsequent_indent = wrapper.subsequent_indent + " " * (2 + 11)
+        local_wrapper = TextWrapper(initial_indent=new_initial_indent,
+                                    subsequent_indent=new_subsequent_indent)
+
+        # concatenate name and description with a fixed width (up to 10 chars)
+        # for the name, and wrap the entire text, then dump it to file.
+        dump_filled_text(f, local_wrapper, "%-10s %s" % (self.name, self.desc))
+        f.write("\\n\\\n")
+
+class FunctionDefinition:
+    # function types
+    CONSTRUCTOR = 0
+    COPY_CONSTRUCTOR = 1
+    DESTRUCTOR = 2
+    ASSIGNMENT_OP = 3
+    OTHER = 4
+
+    def __init__(self):
+        self.type = self.OTHER
+        self.name = None
+        self.pyname = None
+        self.args = ""
+        self.ret_type = None
+        self.brief_desc = None
+        self.detailed_desc = []
+        self.exceptions = []
+        self.parameters = []
+        self.returns = None
+        self.have_param = False
+
+    def dump_doc(self, f, wrapper=TextWrapper()):
+        f.write(self.pyname + "(" + self.args + ")")
+        if self.ret_type is not None:
+            f.write(" -> " + self.ret_type)
+        f.write("\\n\\\n\\n\\\n")
+
+        if self.brief_desc is not None:
+            dump_filled_text(f, wrapper, self.brief_desc)
+            f.write("\\n\\\n\\n\\\n")
+
+        for para in self.detailed_desc:
+            para.dump(f, wrapper)
+
+        if len(self.exceptions) > 0:
+            f.write(wrapper.fill("Exceptions:") + "\\n\\\n")
+            for ex_desc in self.exceptions:
+                ex_desc.dump(f, wrapper)
+            f.write("\\n\\\n")
+        if len(self.parameters) > 0:
+            f.write(wrapper.fill("Parameters:") + "\\n\\\n")
+            for param_desc in self.parameters:
+                param_desc.dump(f, wrapper)
+            f.write("\\n\\\n")
+        if self.returns is not None:
+            dump_filled_text(f, wrapper, "Return Value(s): " + self.returns)
+            f.write("\\n\\\n")
+
+    def dump_pymethod_def(self, f, class_name):
+        f.write('    { "' + self.pyname + '", reinterpret_cast<PyCFunction>(')
+        f.write(class_name + '_' + self.name + '), ')
+        if len(self.parameters) == 0:
+            f.write('METH_NOARGS,\n')
+        else:
+            f.write('METH_VARARGS,\n')
+        f.write('      ' + class_name + '_' + self.name + '_doc },\n')
+
+class VariableDefinition:
+    def __init__(self, nodelist):
+        self.value = None
+        self.brief_desc = None
+        self.detailed_desc = []
+
+        for node in nodelist:
+            if node.nodeName == "name":
+                self.name = get_text(node)
+            elif node.nodeName == "initializer":
+                self.value = get_text(node)
+            elif node.nodeName == "briefdescription":
+                self.brief_desc = get_text(node)
+            elif node.nodeName == "detaileddescription":
+                for para in node.childNodes:
+                    if para.nodeName != "para":
+                        # ignore surrounding empty nodes
+                        continue
+                    self.detailed_desc.append(Paragraph(para))
+
+    def dump_doc(self, f, wrapper=TextWrapper()):
+        name_value = self.name
+        if self.value is not None:
+            name_value += ' = ' + self.value
+        dump_filled_text(f, wrapper, name_value)
+        f.write('\\n\\\n')
+
+        desc_initial_indent = wrapper.initial_indent + "  "
+        desc_subsequent_indent = wrapper.subsequent_indent + "  "
+        desc_wrapper = TextWrapper(initial_indent=desc_initial_indent,
+                                   subsequent_indent=desc_subsequent_indent)
+        if self.brief_desc is not None:
+            dump_filled_text(f, desc_wrapper, self.brief_desc)
+            f.write("\\n\\\n\\n\\\n")
+
+        for para in self.detailed_desc:
+            para.dump(f, desc_wrapper)
+
+def dump_filled_text(f, wrapper, text):
+    """Fill given text using wrapper, and dump it to the given file
+    appending an escaped CR at each end of line.
+    """
+    filled_text = wrapper.fill(text) if wrapper is not None else text
+    f.write("".join(re.sub("\n", r"\\n\\\n", filled_text)))
+
+def camel_to_lowerscores(matchobj):
+    oldtext = matchobj.group(0)
+    newtext = re.sub(RE_SIMPLECAMEL, r"\1_\2", oldtext)
+    newtext = re.sub(RE_CAMELAFTERUPPER, r"\1_\2\3", newtext)
+    newtext = newtext.lower()
+    camel_replacements[oldtext] = newtext
+    return newtext.lower()
+
+def cpp_to_python(text):
+    text = text.replace("::", ".")
+    text = text.replace('"', '\\"')
+
+    # convert camelCase to "_"-concatenated format
+    # (e.g. getLength -> get_length)
+    return re.sub(RE_CAMELTERM, camel_to_lowerscores, text)
+
+def convert_type_name(type_name):
+    """Convert C++ type name to python type name using common conventions"""
+    # strip off leading 'const' and trailing '&/*'
+    type_name = re.sub("^const\S*", "", type_name)
+    type_name = re.sub("\S*[&\*]$", "", type_name)
+
+    # We often typedef smart pointers as [Const]TypePtr.  Convert them to
+    # just "Type"
+    type_name = re.sub("^Const", "", type_name)
+    type_name = re.sub("Ptr$", "", type_name)
+
+    if type_name == "std::string":
+        return "string"
+    if re.search(r"(int\d+_t|size_t)", type_name):
+        return "integer"
+    return type_name
+
+def get_text(root, do_convert=True):
+    """Recursively extract bare text inside the specified node (root),
+    concatenate all extracted text and return the result.
+    """
+    nodelist = root.childNodes
+    rc = []
+    for node in nodelist:
+        if node.nodeType == node.TEXT_NODE:
+            if do_convert:
+                rc.append(cpp_to_python(node.data))
+            else:
+                rc.append(node.data)
+        elif node.nodeType == node.ELEMENT_NODE:
+            rc.append(get_text(node))
+    # return the result, removing any leading newlines (that often happens for
+    # brief descriptions, which will cause lines not well aligned)
+    return re.sub("^(\n*)", "", ''.join(rc))
+
+def get_text_fromnodelist(nodelist, do_convert=True):
+    """Recursively extract bare text inside the specified node (root),
+    concatenate all extracted text and return the result.
+    """
+    rc = []
+    for node in nodelist:
+        if node.nodeType == node.TEXT_NODE:
+            if do_convert:
+                rc.append(cpp_to_python(node.data))
+            else:
+                rc.append(node.data)
+        elif node.nodeType == node.ELEMENT_NODE:
+            rc.append(get_text(node))
+    # return the result, removing any leading newlines (that often happens for
+    # brief descriptions, which will cause lines not well aligned)
+    return re.sub("^(\n*)", "", ''.join(rc))
+
+def parse_parameters(nodelist):
+    rc = []
+    for node in nodelist:
+        if node.nodeName != "parameteritem":
+            continue
+        # for simplicity, we assume one parametername and one
+        # parameterdescription for each parameter.
+        name = get_text(node.getElementsByTagName("parametername")[0])
+        desc = get_text(node.getElementsByTagName("parameterdescription")[0])
+        rc.append(NamedItem(name, desc))
+    return rc
+
+def parse_function_description(func_def, nodelist):
+    for node in nodelist:
+        # nodelist contains beginning and ending empty text nodes.
+        # ignore them (otherwise they cause disruption below).
+        if node.nodeName != "para":
+            continue
+
+        if node.getElementsByTagName("parameterlist"):
+            # within this node there may be exception list, parameter list,
+            # and description for return value.  parse and store them
+            # seprately.
+            for paramlist in node.getElementsByTagName("parameterlist"):
+                if paramlist.getAttribute("kind") == "exception":
+                    func_def.exceptions = \
+                        parse_parameters(paramlist.childNodes)
+                elif paramlist.getAttribute("kind") == "param":
+                    func_def.parameters = \
+                        parse_parameters(paramlist.childNodes)
+            if node.getElementsByTagName("simplesect"):
+                simplesect = node.getElementsByTagName("simplesect")[0]
+                if simplesect.getAttribute("kind") == "return":
+                    func_def.returns = get_text(simplesect)
+        else:
+            # for normal text, python listing and itemized list, append them
+            # to the list of paragraphs
+            func_def.detailed_desc.append(Paragraph(node))
+
+def parse_function(func_def, class_name, nodelist):
+    for node in nodelist:
+        if node.nodeName == "name":
+            func_def.name = get_text(node, False)
+            func_def.pyname = cpp_to_python(func_def.name)
+        elif node.nodeName == "argsstring":
+            # extract parameter names only, assuming they immediately follow
+            # their type name + space, and are immeidatelly followed by
+            # either "," or ")".  If it's a pointer or reference, */& is
+            # prepended to the parameter name without a space:
+            # e.g. (int var1, char *var2, Foo &var3)
+            args = get_text(node, False)
+            # extract parameter names, possibly with */&
+            func_def.args = ', '.join(re.findall(r"\s(\S+)[,)]", args))
+            # then remove any */& symbols
+            func_def.args = re.sub("[\*&]", "", func_def.args)
+        elif node.nodeName == "type" and node.hasChildNodes():
+            func_def.ret_type = convert_type_name(get_text(node, False))
+        elif node.nodeName == "param":
+            func_def.have_param = True
+        elif node.nodeName == "briefdescription":
+            func_def.brief_desc = get_text(node)
+        elif node.nodeName == "detaileddescription":
+            parse_function_description(func_def, node.childNodes)
+    # identify the type of function using the name and arg
+    if func_def.name == class_name and \
+            re.search("^\(const " + class_name + " &[^,]*$", args):
+        # This function is ClassName(const ClassName& param), which is
+        # the copy constructor.
+        func_def.type = func_def.COPY_CONSTRUCTOR
+    elif func_def.name == class_name:
+        # if it's not the copy ctor but the function name == class name,
+        # it's a constructor.
+        func_def.type = func_def.CONSTRUCTOR
+    elif func_def.name == "~" + class_name:
+        func_def.type = func_def.DESTRUCTOR
+    elif func_def.name == "operator=":
+        func_def.type = func_def.ASSIGNMENT_OP
+
+    # register the definition to the approriate list
+    if func_def.type == func_def.CONSTRUCTOR:
+        constructors.append(func_def)
+    elif func_def.type == func_def.OTHER:
+        member_functions.append(func_def)
+
+def parse_functions(class_name, nodelist):
+    for node in nodelist:
+        if node.nodeName == "memberdef" and \
+                node.getAttribute("kind") == "function":
+            func_def = FunctionDefinition()
+            parse_function(func_def, class_name, node.childNodes)
+
+def parse_class_variables(class_name, nodelist):
+    for node in nodelist:
+        if node.nodeName == "memberdef" and \
+                node.getAttribute("kind") == "variable":
+            class_variables.append(VariableDefinition(node.childNodes))
+
+def dump(f, class_name, class_brief_doc, class_detailed_doc):
+    f.write("namespace {\n")
+
+    f.write('#ifdef COPY_THIS_TO_MAIN_CC\n')
+    for func in member_functions:
+        func.dump_pymethod_def(f, class_name)
+    f.write('#endif // COPY_THIS_TO_MAIN_CC\n\n')
+
+    f.write("const char* const " + class_name + '_doc = "\\\n')
+    if class_brief_doc is not None:
+        f.write("".join(re.sub("\n", r"\\n\\\n", fill(class_brief_doc))))
+        f.write("\\n\\\n")
+        f.write("\\n\\\n")
+    if len(class_detailed_doc) > 0:
+        for para in class_detailed_doc:
+            para.dump(f, wrapper=TextWrapper())
+
+    # dump constructors
+    for func in constructors:
+        indent = " " * 4
+        func.dump_doc(f, wrapper=TextWrapper(initial_indent=indent,
+                                             subsequent_indent=indent))
+
+    # dump class variables
+    if len(class_variables) > 0:
+        f.write("Class constant data:\\n\\\n")
+        for var in class_variables:
+            var.dump_doc(f)
+
+    f.write("\";\n")
+
+    for func in member_functions:
+        f.write("\n")
+        f.write("const char* const " + class_name + "_" + func.name + \
+                    "_doc = \"\\\n");
+        func.dump_doc(f)
+        f.write("\";\n")
+
+    f.write("} // unnamed namespace") # close namespace
+
+if __name__ == '__main__':
+    dom = parse(sys.argv[1])
+    class_elements = dom.getElementsByTagName("compounddef")[0].childNodes
+    class_brief_doc = None
+    class_detailed_doc = []
+    for node in class_elements:
+        if node.nodeName == "compoundname":
+            # class name is the last portion of the period-separated fully
+            # qualified class name. (this should exist)
+            class_name = re.split("\.", get_text(node))[-1]
+        if node.nodeName == "briefdescription":
+            # we assume a brief description consists at most one para
+            class_brief_doc = get_text(node)
+        elif node.nodeName == "detaileddescription":
+            # a detaild description consists of one or more paragraphs
+            for para in node.childNodes:
+                if para.nodeName != "para": # ignore surrounding empty nodes
+                    continue
+                class_detailed_doc.append(Paragraph(para))
+        elif node.nodeName == "sectiondef" and \
+                node.getAttribute("kind") == "public-func":
+            parse_functions(class_name, node.childNodes)
+        elif node.nodeName == "sectiondef" and \
+                node.getAttribute("kind") == "public-static-attrib":
+            parse_class_variables(class_name, node.childNodes)
+        elif node.nodeName == "sectiondef" and \
+                node.getAttribute("kind") == "user-defined":
+            # there are two possiblities: functions and variables
+            for child in node.childNodes:
+                if child.nodeName != "memberdef":
+                    continue
+                if child.getAttribute("kind") == "function":
+                    parse_function(FunctionDefinition(), class_name,
+                                   child.childNodes)
+                elif child.getAttribute("kind") == "variable":
+                    class_variables.append(VariableDefinition(child.childNodes))
+
+    dump(sys.stdout, class_name, class_brief_doc, class_detailed_doc)
+
+    if len(camel_replacements) > 0:
+        sys.stderr.write("Replaced camelCased terms:\n")
+        for oldterm in camel_replacements.keys():
+            sys.stderr.write("%s => %s\n" % (oldterm,
+                                             camel_replacements[oldterm]))