Code source de ensae_teaching_cs.homeblog.latex_file

"""
Ths file contains some functions to extract pieces of codes from a latex file


:githublink:`%|py|5`
"""

import re
import os

from pyquickhelper.loghelper import fLOG
from .program_helper import guess_language_code


[docs]class LatexCode:
    """
    many latex contains examples of codes
    this describes one of them


    :githublink:`%|py|17`
    """

    comment_analysis = re.compile("([(][-]{2}([a-z]+)[-]{2}[)])")

[docs]    def __init__(self, parent, line, content, comment=None, content_type=None):
        """
        constructor

        :param      parent:      (LatexFile) object
        :param      line: number (int), 0 is the first one
        :param      content:     code content
        :param      comment:     comment for the piece of code

        if comment contains ``(--<something>--)``, it indicates the content type of the zone (ie: py)


        :githublink:`%|py|30`
        """
        self.parent = parent
        self.line = line
        self.content = content
        self.comment = comment
        if not isinstance(line, tuple):
            raise TypeError("we expect tuple for the line number")
        if content_type is not None:
            self.content_type = content_type
        elif self.comment is None:
            self.content_type = ""
        else:
            se = LatexCode.comment_analysis.search(self.comment)
            if se:
                self.content_type = se.groups()[1]
                self.comment = self.replace(se.groups()[0], "")
            else:
                guess = guess_language_code(self.content)
                self.content_type = guess[
                    0] if guess is not None and guess[1] > 0.66 else ""

[docs]    def __str__(self):
        """
        usual


        :githublink:`%|py|54`
        """
        comment = (", comment: %s (-t:%s)" % (self.comment,
                                              self.content_type)) if self.comment is not None else ""
        return "  File \"%s\", line %d%s" % (self.parent.file, self.line[-1] + 1, comment)


[docs]class LatexIncludedFile:
    """
    Describes a file included a latex file.

    .. list-table::
        :widths: auto
        :header-rows: 1

        * - attribute
          - meaning
        * - parent
          - (LatexFile)     
        * - line
          - (int) line number     
        * - file
          - (str) file name     
        * - comment
          - (str) comment     
        * - obj
          - (LatexFile|LatexCode) object 


    :githublink:`%|py|69`
    """

[docs]    def __init__(self, parent, line, file, comment):
        """
        :param      parent:  (LatexFile) which contains this file
        :param      line:    line number where it was found in the late file it belongs to
        :param      file:    file name
        :param      comment: comment


        :githublink:`%|py|77`
        """
        self.parent = parent
        self.line = line
        self.file = file
        self.comment = comment
        self.init()

[docs]    def init(self):
        """
        Completes the contructor.


        :githublink:`%|py|87`
        """
        ext = os.path.splitext(self.file)[-1].lower()
        if ext == ".tex":
            self.obj = LatexFile(self.file, self.parent.root, line=self.line)
        elif ext in [".py", ".cpp", ".h", ".hpp", ".c", ".hhp", ".vba", ".sql",
                     ".r", ".hhk", ".iss", ".txt", ".xml", ".html", ".js"]:

            try:
                with open(self.file, "r", encoding="utf8") as f:
                    content = f.read()
            except UnicodeDecodeError:
                try:
                    with open(self.file, "r", encoding="latin-1") as f:
                        content = f.read()
                except UnicodeDecodeError:
                    with open(self.file, "r") as f:
                        content = f.read()
            sexp = ext.strip(". ")
            typ = {"html": "xml", "hpp": "cpp", "h": "cpp", "vba": "vb",
                   "py": "py", "xml": "xml", "cpp": "cpp", "js": "js", "c": "cpp",
                   "sql": "sql"}. get(sexp, None)
            self.obj = LatexCode(self.parent, self.line,
                                 content, self.comment, content_type=typ)
        else:
            raise ValueError(
                "unable to read file %s, not python, not latex" % self.file)

[docs]    def enumerate_code(self, skip_missing=False):
        """
        Enumerates all pieces of code (in ``verbatim``,
        ``verbatimx`` or ``\\inputcode`` sections.

        :return:         LatexCode


        :githublink:`%|py|120`
        """

        if isinstance(self.obj, LatexFile):
            for co in self.obj.enumerate_code(skip_missing=skip_missing):
                yield co
        elif isinstance(self.obj, LatexCode):
            yield self.obj
        else:
            raise TypeError("unexpected class for self.obj: %s" %
                            str(type(self.obj)))


[docs]class LatexFile:
    """
    Description of a latex file.

    .. list-table::
        :widths: auto
        :header-rows: 1

        * - attribute
          - meaning
        * - file
          - file name for the latex file     
        * - root
          - every file referenced in the latex will use ``root`` as a root for the relative paths     
        * - filelines
          - for each line, we store every included file here,                         it is a dictionary { line number : object file }     
        * - line
          - keeps line number in a stack (if this file is included by another one) 


    :githublink:`%|py|141`
    """

[docs]    def __init__(self, file, root=None, line=tuple()):
        """
        constructor

        :param      file:        file name
        :param      root:        for included files, the root determines
                                the folder relative paths refer to,
                                if None, the file folder will be used as a root

        :param      line:        if this file is included by another one, it keeps the line number in a stack


        :githublink:`%|py|152`
        """
        self.file = file
        self.root = root
        self.filelines = {}
        self.line = line

        if self.root is None:
            self.root = os.path.abspath(os.path.split(file)[0])

[docs]    def __str__(self):
        """
        usual


        :githublink:`%|py|164`
        """
        return "file: %s" % self.file

[docs]    def read(self):
        """
        read the latex file and stores into ``self.content``,
        if the method is called a second time,
        the function will use a member ``content``.

        :return:         string (file content)


        :githublink:`%|py|174`
        """
        if "content" in self.__dict__ and self.content is not None:
            return self.content

        else:
            try:
                with open(self.file, "r", encoding="utf8") as f:
                    content = f.read()
            except UnicodeDecodeError:
                try:
                    with open(self.file, "r", encoding="latin-1") as f:
                        content = f.read()
                except UnicodeDecodeError:
                    with open(self.file, "r") as f:
                        content = f.read()
            self.content = content

        return content

[docs]    @staticmethod
    def dichotomy_find(array, value):
        """
        find the greatest position which contains a value below ``value``

        :param      value:       value
        :param      array:       array of integers
        :return:                 position p such as array[p] <= value < array[p+1]


        :githublink:`%|py|201`
        """
        a = 0
        b = len(array) - 1
        while a < b:
            m = (a + b) // 2
            if value == array[m]:
                return m
            elif value < array[m]:
                b = m
            elif a == m:
                return a
            else:
                a = m
        return a

[docs]    def enumerate_code(self, skip_missing=False):
        """
        enumerate all pieces of code (in ``verbatim``, ``verbatimx`` or ``\\inputcode`` sections

        :param      skip_missing:        if True, avoids stopping whenever a file is not found
        :return:                         LatexCode


        :githublink:`%|py|222`
        """
        try:
            content = self.read()
        except FileNotFoundError as e:
            if skip_missing:
                fLOG("w,unable to find file", self.file)
                content = " "
            else:
                raise e
        lines = content.split("\n")

        linebeginning = []
        s = 0
        for line in lines:
            linebeginning.append(s)
            s += len(line) + 1

        p = re.compile("(\\\\begin[{]verbatim(x|no|nocut)?[}]( *[%]{3}(.*?)[%]{3})?((.|\\n)*?)\\\\end[{]verbatim(x|no|nocut)??[}])|" +
                       "(\\\\inputcodes[{]([./a-zA-Z0-9_]+?)[}][{](.*?)[}][{](.*?)[}])|" +
                       "(\\\\input[{]([./a-zA-Z0-9_]+?)[}])|" +
                       "(\\\\inputcode[{]([./a-zA-Z0-9_]+?)[}][{](.*?)[}])")

        recom = re.compile("([%]{3}(.*?)[%]{3})")

        for m in p.finditer(content):
            a = m.span()[0]
            li = LatexFile.dichotomy_find(linebeginning, a)
            gs = tuple(m.groups())

            # if gs[0] is None :
            #    for i,g in enumerate(gs) : print (i,g)

            if gs[0] is not None:
                # verbatim
                #  0                                                           1   2       3        4
                # ('\\begin{verbatimx} ... \\end{verbatimx}', 'x', None, None,
                # '\n  x = 5\n  y = 10\n  z = x + y\n  print (z)    # affiche z\n  ', ' ', 'x',
                # None, None, None, None, None, None)
                #
                comment = gs[3].strip() if gs[3] is not None else gs[3]
                if comment is None or len(comment) == 0:
                    # we check the previous line
                    ci = li - 1
                    if ci > 0:
                        com = recom.search(lines[ci])
                        if com:
                            comment = com.groups()[1]
                c = LatexCode(self, self.line + (li,), gs[4], comment)
                yield c

            elif gs[7] is not None:
                # input code
                # (None, None, None, None, None,
                # "\\inputcodes{../data/td_note_2006.py}{exercice pour ...valuer}{, correction 2006}",
                # '../data/td_note_2006.py', "exercice pour ...", ', correction 2006')
                if li not in self.filelines:
                    fil = os.path.join(self.root, gs[8])
                    self.filelines[li] = LatexIncludedFile(
                        self, self.line + (li,), fil, gs[10])

                for co in self.filelines[li].enumerate_code():
                    yield co

            elif gs[11] is not None:
                if li not in self.filelines:
                    fil = os.path.join(self.root, gs[12])
                    self.filelines[li] = LatexIncludedFile(
                        self, self.line + (li,), fil, None)

                for co in self.filelines[li].enumerate_code(skip_missing=skip_missing):
                    yield co

            elif gs[13] is not None:
                # print (len(gs),gs)
                # input code
                # (None, None, None, None, None,
                # "\\inputcodes{../data/td_note_2006.py}{exercice pour ...valuer}{, correction 2006}",
                # '../data/td_note_2006.py', "exercice pour ...", ', correction 2006')
                if li not in self.filelines:
                    fil = os.path.join(self.root, gs[14])
                    self.filelines[li] = LatexIncludedFile(
                        self, self.line + (li,), fil, gs[15])

                for co in self.filelines[li].enumerate_code():
                    yield co

[docs]    def code_in_html(self, header=None, footer=None, classpre="prettyprint", classpre_type="brush: {0}",
                     classcom="codeintro", skip_missing=False, remove_unnecessary_indentation=True):
        """
        produces html format containing all the code example

        :param      header:          if not None, it should end by ``<body>``
        :param      footer:          if not None, it should start by ``</body>``
        :param      classpre:        if not, use ``<pre>`` otherwise ``<pre class="classpre">``
        :param      classpre_type:   if the type can be guessed, then this template will used instead of the first one
        :param      classcom:        if the comment is not none, it will output ``<p class="classcom">`` (if classcom is not None)
        :param      skip_missing:    if True, avoids stopping whenever a file is not found
        :param      remove_unnecessary_indentation:  remove unnecessary indentation
        :return:                     string string


        :githublink:`%|py|321`
        """
        res = []
        if header is not None:
            res.append(header)
        for code in self.enumerate_code(skip_missing=skip_missing):
            if code.comment is not None:
                com = ("<p class=\"%s\">%s</p>" % (classcom, code.comment)
                       ) if classcom is not None else ("<p>%s</p>" % code.comment)
            else:
                com = ("<p class=\"%s\">File: %s, line %d</p>" %
                       (classcom,
                           os.path.split(code.parent.file)[-1],
                           code.line[-1])) \
                    if classcom is not None else ("<p>line %s</p>" % code.line)
            res.append(com)
            res.append("<!-- File \"%s\", lines %s -->" %
                       (code.parent.file, str(code.line)))

            if classpre_type is not None and len(classpre_type) > 0 and \
               code.content_type is not None and len(code.content_type) > 0:
                pre = ("<pre class=\"%s\">") % classpre_type.format(
                    code.content_type)
            else:
                pre = (
                    "<pre class=\"%s\">") % classpre if classpre is not None else "<pre>"
            res.append(pre)

            memocode = code.content.replace("<", "&lt;").replace(">", "&gt;")
            if remove_unnecessary_indentation:
                lines = memocode.split("\n")
                mini = None
                for line in lines:
                    temp = line.lstrip()
                    if len(temp) > 0:
                        df = len(line) - len(temp)
                        mini = df if mini is None else min(mini, df)

                df = mini
                if df is not None and df > 0:
                    for i in range(len(lines)):
                        li = lines[i]
                        if len(li) >= df:
                            lines[i] = lines[i][df:]
                    memocode = "\n".join(lines)

            res.append(memocode)
            res.append("</pre>")

        return "\n".join(res)
Liens

Contenu

Information

Code source de ensae_teaching_cs.homeblog.latex_file