Source code for pyensae.languages.antlr_grammar_build

"""
Helpers to build grammars
This module requires `antlr4 <http://www.antlr.org/>`_.
and `antlr4-python3-runtime <https://pypi.python.org/pypi/antlr4-python3-runtime/>`_.


:githublink:`%|py|7`
"""
import os
import sys
from pyquickhelper.loghelper import noLOG


[docs]def _is_syntax_is_missing(language): """ Downloads the grammar for a specific language if the files is missing. :param language: language: python, sqlite, ... :return: grammar file :githublink:`%|py|19` """ locations = { "R": "https://github.com/antlr/grammars-v4/tree/master/r/R.g4", "SQLite": "https://github.com/antlr/grammars-v4/blob/master/sqlite/SQLite.g4", "Pig": "http://wiki.apache.org/pig/PigLexer", "CSharp": "https://github.com/antlr/grammars-v4/tree/master/csharp", } folder = os.path.dirname(__file__) filename = os.path.join(folder, language + ".g4") if os.path.exists(filename): return filename if language in locations: raise FileNotFoundError( "The grammar {0} is not available, you should get it from {1}".format( language, locations[language])) raise KeyError( "unexpected language: {0}, not in {1}".format( language, ",".join( locations.keys())))
[docs]def build_grammar(g4, version="4.8", fLOG=noLOG): """ Compiles the grammar for a specific file. :param g4: grammar format antlr4 :param version: version of *antlr4* to use, 4.8 :param fLOG: logging function :return: list of files The compilation must be done with `antlr4 <http://www.antlr.org/>`_. It generates a lexer and a parser which can be imported in Python. The options for the command line are described at: `antlr4 options <https://theantlrguy.atlassian.net/wiki/display/ANTLR4/Options>`_. .. exref:: :title: Builds a Antlr4 grammar See `grammars-v4 <https://github.com/antlr/grammars-v4>`_ :: build_grammar("R.g4") :githublink:`%|py|65` """ if not g4.endswith(".g4"): fold = os.path.abspath(os.path.dirname(__file__)) g4 = os.path.join(fold, g4 + ".g4") url = "http://www.antlr.org/download/antlr-{0}-complete.jar".format( version) spl = url.split("/") domain, name = "/".join(spl[:-1]) + "/", spl[-1] folder = os.path.abspath(os.path.dirname(__file__)) final = os.path.join(folder, name) if not os.path.exists(final): from ..datasource.http_retrieve import download_data name = download_data(name, website=domain, whereTo=folder) if not os.path.exists(name): raise FileNotFoundError("unable to download: " + url) path = os.environ.get("CLASSPATH", "") if name not in path: path = ".;{0}\\antlr-{1}-complete.jar".format(folder, version) else: path = ".;{0}\\antlr-{1}-complete.jar;{2}".format( folder, version, os.environ["CLASSPATH"]) os.environ["CLASSPATH"] = path fLOG("CLASSPATH", os.environ["CLASSPATH"]) # we remove -rc... version = version.split("-")[0] cmd = "org.antlr.v4.Tool " if "Lexer" not in g4: cmd += "-Dlanguage=Python3 " cmd += g4 from pyquickhelper.loghelper import run_cmd out, err = run_cmd("java " + cmd, wait=True, fLOG=fLOG) def compiled(): if "Lexer" in g4: lexer = g4.replace(".g4", ".tokens") else: lexer = g4.replace(".g4", ".py") return os.path.exists(lexer) if not compiled() or (len(err) > 0 and "error" in err): javapath = r'C:\Program Files\Java\jre7\bin\java.exe' os.environ["PATH"] = os.environ["PATH"] + ";" + javapath if sys.platform.startswith("win") and os.path.exists(javapath): out, err = run_cmd( '"' + javapath + '" ' + cmd, wait=True, fLOG=fLOG) if not compiled() or (len(err) > 0 and "error" in err): raise Exception( "unable to compile: " + final + "\nCLASSPATH:\n" + os.environ["CLASSPATH"] + "\nERR:\n" + err + "\nCMD:\njava " + cmd + "\nYou should do it manually.") elif err: err_lines = err.split(err) err_lines = [_ for _ in err_lines if not _.startswith("warning(")] err2 = "\n".join(err_lines).strip("\n ") if len(err2) > 0: raise Exception( "unable to compile: " + final + "\nCLASSPATH:\n" + os.environ["CLASSPATH"] + "\nERR:\n" + err + "\nCMD:\njava " + cmd) if os.environ.get("USERNAME", os.environ.get("USER", "")) in g4: dest = os.path.dirname(g4) for name in os.listdir(dest): if "Parser" not in name and "Lexer" not in name and \ "Token" not in name and "Listener" not in name: continue full = os.path.join(dest, name) with open(full, "r", encoding="utf-8") as f: content = f.read() content1 = content.replace(dest, "") if content1 != content: fLOG("[build_grammar] modified", name) with open(full, "w", encoding="utf-8") as f: f.write(content1) return out + "\n---ERR---\n" + err