"""
Convert R into Python
:githublink:`%|py|5`
"""
import sys
from antlr4 import ParseTreeListener, ParseTreeWalker
from pyquickhelper.pycode import remove_extra_spaces_and_pep8
from .RParser import RParser
from .RLexer import RLexer
from .antlr_grammar_use import parse_code
[docs]class R2PyConversionError(Exception):
"""
Raised when conversion cannot be done.
:githublink:`%|py|16`
"""
[docs] def __init__(self, node, message, sofar, sostack):
"""
:githublink:`%|py|20`
"""
try:
text = node.symbol.text
except AttributeError:
text = "ERROR"
mes = "Unable to convert\n'{0}'\n{1}\n{2}\nPARENT\n'{3}'\n{4}\n---SOFAR---\n{5}\n---SOSTACK---\n{6}\n---END---".format(
text, type(node), node, ("" if isinstance(
node, str) else node.parentCtx),
(str if isinstance(node, str) else type(node.parentCtx)), sofar, sostack)
mes += "\n------------\nMESSAGE=\n" + str(message)
Exception.__init__(self, mes)
[docs]class TreeStringListener(ParseTreeListener):
"""
This class is an attempt to run through the tree and convert it into
a string.
:githublink:`%|py|37`
"""
[docs] def __init__(self, parser, fLOG=None):
"""
constructor
:param parser: parser used to parse the code
:param fLOG: logging function
:githublink:`%|py|45`
"""
ParseTreeListener.__init__(self)
self.buffer = []
self.level = 0
self.parser = parser
self.stack = []
self.elements = []
self.indent = 0
self.block = []
self.memo = []
self.imports = set()
self.in_formula = False
self._fLOG = fLOG
self.indent_level = {}
self.add_lambda = False
[docs] def fLOG(self, *l, **p):
"""
logging
:githublink:`%|py|64`
"""
if self._fLOG:
self._fLOG(*l, **p)
[docs] def get_python(self):
"""
Get the Python code for the R code.
:return: Python code
:githublink:`%|py|73`
"""
def modify(s):
if s.startswith("from"):
return s
else:
return "import {0}".format(s)
imports = "\n".join(modify(i) for i in sorted(self.imports))
if len(imports) > 0:
imports += "\n\n"
return imports + "".join(self.elements)
[docs] def add_code(self, node):
"""
Converts one node into :epkg:`python`.
:githublink:`%|py|87`
"""
name = self.terminal_node_name(node)
text = node.symbol.text
if self.in_formula:
if text in (",", "\n"):
self.stack.append(("Formop", '"'))
self.in_formula = False
self.fLOG("[TreeStringListener]", len(self.block),
"~" if self.in_formula else " ", name, text)
else:
self.fLOG("[TreeStringListener]", len(self.block), " ", name, text)
if name == "Parse":
if text.startswith("#"):
# Comment
self.elements.append(text.strip("\n"))
self.elements.append("\n")
return self.add_code_final()
elif text in (";", "\n"):
# End of an instruction
self.empty_stack()
if len(self.elements) > 0 and self.elements[-1] != '\n':
self.elements.append("\n")
return self.add_code_final()
elif text == "<EOF>":
return self.add_code_final()
elif len(text.strip()) == 0:
return self.add_code_final()
elif name == "Identifier":
name_parent = self.terminal_node_name(node.parentCtx)
if name_parent == "Formula_simple_A":
if self.in_formula:
raise R2PyConversionError(node, name, "".join(
self.elements), "\n".join(str(_) for _ in self.stack))
self.stack.append(("Formop", '"'))
self.in_formula = True
elif name_parent == "Functioncall":
if text == "system.time":
self.add_lambda = True
else:
self.add_lambda = False
self.stack.append(("Functioncall", ""))
if text == "list":
self.imports.add("from python2r_helper import list_or_dict")
self.stack.append((name, "list_or_dict"))
elif text == "is":
self.imports.add("from python2r_helper import is_")
self.stack.append((name, "is_"))
elif text == "(" and self.add_lambda:
self.stack.append((name, "lambda: "))
self.stack.append((name, node))
else:
self.stack.append((name, node))
return self.add_code_final()
elif name in ("Affectop", "Comparison"):
self.stack.append((name, node))
return self.add_code_final()
elif name == "Constant":
self.stack.append((name, node))
return self.add_code_final()
elif name == "Boolean":
self.stack.append((name, node))
return self.add_code_final()
elif name in ("Functiondef", "Functiondefargs", "Functiondefbody"):
if text == "{":
self.empty_stack()
if len(self.block) == 0:
raise R2PyConversionError(node, name, "".join(
self.elements), "\n".join(str(_) for _ in self.stack))
self.block[-1] = True
return self.add_code_final()
elif text == "}":
self.empty_stack()
if len(self.block) == 0:
raise R2PyConversionError(node, name, "".join(
self.elements), "\n".join(str(_) for _ in self.stack))
self.block.pop()
self.indent -= 1
self.fLOG(
"[TreeStringListener.add_code*] indent -= 1", "--", self.indent)
return self.add_code_final()
elif text == '\n':
return self.add_code_final()
elif text in ('(', ')', 'function'):
self.stack.append((name, node))
return self.add_code_final()
elif name == "Functiondefargslambda":
if text == "(":
return self.add_code_final()
elif text == ")":
self.stack.append((name, ":"))
return self.add_code_final()
elif name == "Expr":
if text.startswith("#"):
# Comment
self.elements.append(" " * self.indent)
self.elements.append(text.strip("\n"))
self.elements.append("\n")
return self.add_code_final()
if text in ('(', ')', '[', ']', "+", "-", "|", "&", "||", "&&", "[[", "]]"):
self.stack.append((None, node))
return self.add_code_final()
if text == "!":
self.stack.append(("Not!", node))
return self.add_code_final()
if text == "%*%":
if sys.version_info[:2] >= (3, 6):
self.stack.append((name, "@"))
else:
self.stack.append((name, "*"))
return self.add_code_final()
if text in ("%>%", "%<%", "%+%", "%-%", "%/%"):
self.stack.append((name, text.replace("%", "")))
return self.add_code_final()
if text == "\n":
self.empty_stack()
if len(self.elements) > 0 and self.elements[-1] != '\n':
self.elements.append("\n")
return self.add_code_final()
if text == "break":
self.stack.append((name, node))
return self.add_code_final()
if text == "{":
self.empty_stack()
if len(self.block) == 0:
# We are in an expression.
self.stack.append((name, "("))
return self.add_code_final()
else:
self.block[-1] = True
return self.add_code_final()
if text == "}":
# We are in an expression.
self.empty_stack()
if len(self.block) == 0:
self.stack.append((name, ")"))
return self.add_code_final()
else:
self.block.pop()
self.indent -= 1
self.fLOG(
"[TreeStringListener.add_code] indent -= 1", "--", self.indent)
return self.add_code_final()
elif name == "Form":
if text == "...":
self.stack.append((None, "*args"))
return self.add_code_final()
else:
self.stack.append((None, node))
return self.add_code_final()
elif name == "Argumentname":
self.stack.append((name, node))
return self.add_code_final()
elif name == "Formlist":
self.stack.append((None, node))
return self.add_code_final()
elif name == "Functioncall":
if text == ')':
self.stack.append((name, node))
return self.add_code_final()
if text == '(':
self.stack.append((name, node))
if self.add_lambda:
self.stack.append((name, "lambda: "))
return self.add_code_final()
if text == '\n':
return self.add_code_final()
elif name == "Affectation":
if text == '\n':
return self.add_code_final()
elif name in ("Sub", "Subnobracket"):
if text == '=':
# Named parameter.
self.stack.append((name, node))
return self.add_code_final()
if text == ':':
self.stack.append((name, node))
return self.add_code_final()
if text == '...':
self.stack.append((name, "*args"))
return self.add_code_final()
elif name == "Sublist":
if text == ",":
self.stack.append((name, node))
return self.add_code_final()
if text == '\n':
return self.add_code_final()
elif name in ("Exprlist", "Rightexpr"):
if text.startswith("#"):
# Comment
self.empty_stack()
self.elements.append(" ")
self.elements.append(text.strip("\n"))
self.elements.append("\n")
return self.add_code_final()
if text in (";", "\n"):
if self.search_parents(node, "Inlinefunction"):
self.empty_stack()
self.stack.append(("Inlinefunction", text))
return self.add_code_final()
else:
self.empty_stack()
if len(self.elements) > 0 and self.elements[-1] != '\n':
self.elements.append("\n")
return self.add_code_final()
elif name == "Elseif":
if text == "if":
if id(node.parentCtx.parentCtx) not in self.indent_level:
raise R2PyConversionError(node, name, "".join(
self.elements), "\n".join(str(_) for _ in self.stack))
else:
if self.indent_level[id(node.parentCtx.parentCtx)] != self.indent:
raise R2PyConversionError(node, name, "".join(
self.elements), "\n".join(str(_) for _ in self.stack))
self.stack.append((name, "elif"))
return self.add_code_final()
elif text == "else":
# We do nothing. If follows.
return self.add_code_final()
else:
# There should be nothing else.
pass
elif name == "Ifelseexpr" or name == "Ifexpr":
if self.search_parents(node, "Sublist") or self.search_parents(node, "Affectation", 2):
if text == "if":
self.stack.append((name, node))
return self.add_code_final()
elif text == "else":
self.stack.append((name, node))
return self.add_code_final()
elif text in ("(", ")"):
self.stack.append((name, node))
return self.add_code_final()
elif text == "\n":
raise R2PyConversionError(node, name, "".join(
self.elements), "\n".join(str(_) for _ in self.stack))
else:
if text == "if":
self.indent_level[id(node.parentCtx)] = self.indent
self.stack.append((name, node))
return self.add_code_final()
elif text == "else":
if id(node.parentCtx) not in self.indent_level:
raise R2PyConversionError(node, name, "".join(
self.elements), "\n".join(str(_) for _ in self.stack))
else:
if self.indent_level[id(node.parentCtx)] != self.indent:
raise R2PyConversionError(node, name, "".join(
self.elements), "\n".join(str(_) for _ in self.stack))
if len(self.stack):
# It does follow }
self.empty_stack()
if len(self.elements) > 0 and self.elements[-1] != '\n':
self.elements.append("\n")
self.stack.append((name, node))
self.stack.append((":EOL", None))
return self.add_code_final()
elif text == ")":
self.stack.append((":EOL", None))
return self.add_code_final()
elif text in ('(', "\n"):
return self.add_code_final()
elif name == "Forexpr":
if text == "for":
self.stack.append((name, node))
return self.add_code_final()
elif text in ('(', ')'):
return self.add_code_final()
elif text == "in":
self.stack.append((name, node))
return self.add_code_final()
elif name == "Whileexpr":
if text == "while":
self.stack.append((name, node))
return self.add_code_final()
elif text in ('(', ')'):
return self.add_code_final()
elif name == "Rangeop":
if text == ":":
self.memo.append("range")
self.stack.append((name, node))
return self.add_code_final()
elif text == ":::":
self.stack.append(("Dotop_static", node))
return self.add_code_final()
elif name == "Returnexpr":
if text == "return":
self.stack.append((name, node))
return self.add_code_final()
if text in ('(', ')'):
return self.add_code_final()
elif name == "Formop":
if text == "~":
self.imports.add("patsy")
self.stack.append((name, node))
return self.add_code_final()
elif name == "Sublistadd":
if text == "+":
self.stack.append((name, node))
return self.add_code_final()
elif text == "\n":
return self.add_code_final()
elif name == "Dotop":
if text in ("$", "@"):
self.stack.append((name, node))
return self.add_code_final()
elif name == "Formula_simple_A":
if text == ".":
self.stack.append((None, "."))
return self.add_code_final()
elif name == "Formula_simple_C":
if text in ("(", ")", "~"):
self.stack.append((name, node))
return self.add_code_final()
elif name == "Formula_simple_B":
if text == "within":
self.imports.add("from python2r_helper import within")
self.stack.append((name, node))
return self.add_code_final()
elif text in ("(",):
self.stack.append((name, node))
return self.add_code_final()
elif text == "{":
return self.add_code_final()
elif text == "}":
return self.add_code_final()
elif text == ",":
self.stack.append((name, node))
self.stack.append(("Formop", '"'))
return self.add_code_final()
elif text == ";":
self.stack.append(("Formop", '"'))
self.stack.append((name, ","))
self.stack.append(("Formop", '"'))
return self.add_code_final()
elif text == ")":
self.stack.append(("Formop", '"'))
self.stack.append((name, node))
return self.add_code_final()
elif name == "Operator":
if text == "%%":
self.stack.append((name, "%"))
return self.add_code_final()
elif text == "^":
self.stack.append((name, "**"))
return self.add_code_final()
else:
self.stack.append((name, node))
return self.add_code_final()
elif name in ("Range_simple", "Range_complexe"):
if text == ":":
self.stack.append((name, ","))
return self.add_code_final()
else:
self.stack.append((name, node))
return self.add_code_final()
elif name in ("Intersection_simple", "Intersection_complexe"):
if text == "%in%":
self.stack.append((name, node))
return self.add_code_final()
else:
self.stack.append((name, node))
return self.add_code_final()
elif name == "Implicit_column_name":
if text == ".":
self.imports.add("from python2r_helper import ImplicitColumn")
self.stack.append(("Identifier", "ImplicitColumn"))
self.stack.append((name, node))
return self.add_code_final()
elif text in ("(", ")"):
return self.add_code_final()
elif name == "Inlinefunction":
if text == "{":
self.inlinefunction = []
self.stack.append((name, "compile_inline_function('''\n"))
return self.add_code_final()
elif text in ("\n", ";"):
if hasattr(self, "inlinefunction") and len(self.inlinefunction) > 0:
self.stack.append((name, text))
return self.add_code_final()
elif text == "}":
self.stack.append((name, "''')"))
self.inlinefunction = []
return self.add_code_final()
else:
self.inlinefunction.append((name, node))
return self.add_code_final()
self.stack.append((name, node))
return self.add_code_final()
if text.startswith("#"):
# Comment
self.elements.append(" " * self.indent)
self.elements.append(text.strip("\n"))
self.elements.append("\n")
return self.add_code_final()
raise R2PyConversionError(node, name, "".join(
self.elements), "\n".join(str(_) for _ in self.stack))
[docs] def add_code_final(self):
"""
Adds extra characters if needed.
:githublink:`%|py|494`
"""
pass
[docs] def empty_stack(self):
"""
Empties the stack.
:githublink:`%|py|500`
"""
if len(self.stack) > 0 and len(self.block) > 0 and not self.block[-1]:
self.indent -= 1
self.fLOG("[TreeStringListener.empty_stack] indent -= 1",
"--", self.indent)
self.block.pop()
is_function_def = False
is_for = False
as_namespace = False
for ipos, (name, node) in enumerate(self.stack):
if name == "Functiondef" and not self.search_parents(node, "Functiondefargslambda", 3):
is_function_def = True
break
elif name == "Forexpr":
is_for = True
break
elif name == "Whileexpr":
is_for = True
break
elif name == "Identifier":
text = node if isinstance(node, str) else node.symbol.text
if text == "asNamespace":
as_namespace = True
break
if self.indent > 0:
self.elements.append(" " * self.indent)
if is_function_def:
self.elements.append("\n")
function_name = self.stack[0][1] if isinstance(
self.stack[0][1], str) else self.stack[0][1].symbol.text
self.fLOG(
"[TreeStringListener.empty_stack] add function '{0}'".format(function_name))
self.elements.append(" " * self.indent)
self.elements.append("def")
self.elements.append(" ")
function_name = function_name.strip(
'"').replace('.', "_").replace("-", "_")
self.elements.append(function_name)
self.stack = self.stack[3:]
last = self.stack[-1][1].symbol.text
if last != ")":
raise R2PyConversionError(self.stack[-1][1], last, "".join(
self.elements), "\n".join(str(_) for _ in self.stack))
# We store some end character we need to add.
closure = {}
for ipos, (name, node) in enumerate(self.stack):
if name == "Functioncall" and node == "":
# Silent addition.
continue
self.fLOG(
" cl={0} n={1} - {2}".format(len(closure), name, node))
if len(closure) > 0 and not isinstance(node, str):
rem = []
for k, (leave_node, symbol) in closure.items():
b = self.has_parent(node, leave_node)
if not b:
rem.append(k)
args = [symbol, self.terminal_node_name(leave_node),
self.terminal_node_name(node), node.symbol.text]
self.fLOG(
" closure '{0}' - L-{1} C-{2} ({3})".format(*args))
self.elements.append(symbol)
for r in rem:
del closure[r]
converted = self.to_python(name, node)
if name == "Identifier":
converted = converted.replace(".", "_")
elif name in ("Ifexpr", "Ifelseexpr"):
if node.symbol.text in ("(", ")") and (self.search_parents(node, "Sublist") or
self.search_parents(node, "Affectation", 2)):
self.fLOG(
" inlineif {0} - '{1}'".format(name, node.symbol.text))
if node.symbol.text == "(":
converted = "("
else:
# We need to add ) when leaving this node.
converted = ","
closure[id(node.parentCtx)] = (node.parentCtx, ")")
elif name == "Affectop" and self.stack[0][0] in ("Functioncall", "Inlinefunction") and \
self.stack[0][1] == "":
# How to deal with syntax names(df) = something.
# We add set add a bracket at the end.
converted = ".set("
closure[id(node.parentCtx.parentCtx)] = (
node.parentCtx.parentCtx, ")")
elif name == "Formula_simple_C":
if converted == "(":
self.elements.append('"')
closure[id(node.parentCtx)] = (node.parentCtx, '"')
if as_namespace and converted == "=":
converted = "."
if self.indent > 0 and converted and self.elements[-1] == "\n":
self.elements.append(" " * self.indent)
self.elements.append(converted)
self.elements.append(" ")
if is_for and len(self.memo) > 0 and converted == "in":
self.elements.append(self.memo[-1])
self.elements.append('(')
if name == ":EOL":
self.elements.append(":")
self.elements.append("\n")
self.indent += 1
self.fLOG(
"[TreeStringListener.empty_stack-1] indent += 1", "--", self.indent)
self.block.append(False)
if len(closure) > 0:
for k, (leave_node, symbol) in closure.items():
self.elements.append(symbol)
# closure = {}
if is_function_def or is_for:
if is_for and len(self.memo) > 0:
self.elements.append(")")
self.elements.append(":")
self.elements.append("\n")
self.indent += 1
self.fLOG(
"[TreeStringListener.empty_stack-2] indent += 1", "--", self.indent)
self.block.append(False)
self.stack.clear()
self.memo.clear()
[docs] def search_parents(self, node, substring, max_depth=None):
"""
Searches for a substring in parents' node name.
:param node: current node
:param substring: substring to search
:param max_depth: number of parents to look at
:return: boolean
:githublink:`%|py|645`
"""
if isinstance(node, str):
return False
depth = max_depth if max_depth else 0
n = node.parentCtx
while (max_depth is None or depth > 0) and n is not None:
na = self.terminal_node_name(n)
if substring in na:
return True
n = n.parentCtx
depth -= 1
return False
[docs] def has_parent(self, current, parent, depth=None):
"""
Tells if *parent* is one of the parents of *current*.
:param current: current node
:param parent: parent to look for
:return: boolean
:githublink:`%|py|665`
"""
if isinstance(current, str):
raise NotImplementedError()
if depth is None:
ide = 0
else:
ide = depth
n = current
while (depth is None or ide > 0) and n is not None:
if id(n) == id(parent):
return True
n = n.parentCtx
ide -= 1
return False
[docs] def to_python(self, name, node):
"""
Converts a couple *(name, node)* into :epkg:`Python`.
:githublink:`%|py|683`
"""
if name == "Affectop":
return "="
elif name == "Not":
return " not "
elif name == "Not!":
return " ~ "
elif name == "Boolean":
text = node.symbol.text
return text[0] + text[1:].lower()
elif name == "Dotop":
return "."
elif name == "Dotop_static":
return ".static."
elif name == "Constant":
text = node.symbol.text
if text.startswith("`") and text.endswith("`") and len(text) > 1:
return 'RCOL("{0}")'.format(text[1:-1])
if text == "NULL":
return "None"
if text.endswith("L"):
# Integer
return text[:-1]
is_formula = self.search_parents(node, "Formula")
if is_formula and text[0] == '"' and text[-1] == '"':
return '\\"{0}\\"'.format(text[1:-1])
else:
return text
elif name == "Rangeop":
text = node.symbol.text
if text == ":":
return ","
else:
return text
elif name in ("Ifexpr", "Ifelseexpr"):
text = node if isinstance(node, str) else node.symbol.text
if text in ("if", "else") and (self.search_parents(node, "Sublist") or
self.search_parents(node, "Affectation", 2)):
if text == "if":
self.imports.add("from python2r_helper import inlineif")
return "inlineif"
else:
return ","
else:
return text
elif name in ("Intersection_simple", "Intersection_complexe"):
text = node.symbol.text
if text == "%in%":
return ") & set("
else:
return text
elif isinstance(node, str):
return node
elif node is None:
return ""
elif name == "Functiondef":
if self.search_parents(node, "Functiondeflambda", 2):
return "lambda"
else:
return node.symbol.text
elif name == "Argumentname":
text = node.symbol.text
if text == "lambda":
return "lambda_"
else:
return text.replace(".", "_")
elif name == "Identifier":
text = node.symbol.text
if text == "c":
# This is a tuple.
self.imports.add("from python2r_helper import make_tuple")
return "make_tuple"
elif text == "class":
parent_name = self.terminal_node_name(node.parentCtx)
if parent_name in ("Functioncall", "Subnobracket"):
self.imports.add("from python2r_helper import make_class")
return "make_class"
else:
return text.replace(".", "_")
elif text == "finally":
self.imports.add("from python2r_helper import finallyR")
return "finallyR"
elif text == "lambda":
return "lambda_"
elif text in ("bquote", "ImplicitColumn"):
self.imports.add(
"from python2r_helper import {0}".format(text))
return text
elif text == "try":
parent_name = self.terminal_node_name(node.parentCtx)
if parent_name == "Functioncall":
self.imports.add("from python2r_helper import dotry")
return "dotry"
else:
return text.replace(".", "_")
else:
return text.replace(".", "_")
else:
text = node.symbol.text
if text == "c":
# This is a tuple.
self.imports.add("from python2r_helper import make_tuple")
return "make_tuple"
elif text == "&&":
return "and"
elif text == "||":
return "or"
else:
return text
[docs] def terminal_node_name(self, node):
"""
Converts a terminal node into a rule name.
:githublink:`%|py|796`
"""
return str(type(node.parentCtx)).split('.')[-1].strip("]['><").replace("Context", "")
[docs] def visitTerminal(self, node):
"""
event
:githublink:`%|py|802`
"""
# node: ['symbol', 'parentCtx']
# node.symbol: ['source', 'type', 'channel', 'start', 'stop', 'tokenIndex', 'line', 'column', '_text']
# help(node.parentCtx)
full_text = node.parentCtx.getText().replace("\n", " EOL ")
text = node.symbol.text.replace("\n", " EOL ")
stype = self.terminal_node_name(node)
text = "{0} [{1} - {2}] {3}".format(" " *
self.level, text, stype, full_text)
self.buffer.append(text)
self.add_code(node)
[docs] def visitErrorNode(self, node):
"""
event
:githublink:`%|py|817`
"""
text = (" " * self.level) + "error: " + str(node)
self.buffer.append(text)
[docs] def enterEveryRule(self, ctx):
"""
event
:githublink:`%|py|824`
"""
kind = str(type(ctx)).split(
".")[-1].strip("'<>").replace("Context", "")
text = (" " * self.level) + "+ {0}".format(kind)
self.buffer.append(text)
self.level += 1
[docs] def exitEveryRule(self, ctx):
"""
event
:githublink:`%|py|834`
"""
self.level -= 1
text = (" " * self.level) + "- "
self.buffer.append(text)
[docs] def __str__(self):
"""
usual
:githublink:`%|py|842`
"""
return self.get_python() + "\n----\n" + "\n".join(self.buffer)
[docs] def enterRanges(self, ctx: RParser.RangesContext):
"""
event
:githublink:`%|py|848`
"""
self.fLOG(" add 'range('")
self.stack.append(("Ranges", "range("))
[docs] def exitRanges(self, ctx: RParser.RangesContext):
"""
event
:githublink:`%|py|855`
"""
self.fLOG(" add ') # range'")
self.stack.append(("Ranges", ")"))
[docs] def enterIntersections(self, ctx: RParser.RangesContext):
"""
event
:githublink:`%|py|862`
"""
self.fLOG(" add 'set('")
self.stack.append(("Intersections", "set("))
[docs] def exitIntersections(self, ctx: RParser.RangesContext):
"""
event
:githublink:`%|py|869`
"""
self.fLOG(" add ') # set'")
self.stack.append(("Intersections", ")"))