Coverage for src/botadi/mokadi/mokadi_parser.py: 82%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

176 statements  

1# -*- coding: utf-8 -*- 

2""" 

3@file 

4@brief Parse with Mokadi'sgrammar. 

5""" 

6import io 

7import sys 

8from antlr4 import CommonTokenStream, InputStream, ParseTreeWalker 

9from .mokadi_exceptions import MokadiException 

10 

11 

12def print_level_order(node, indent=0): 

13 """ 

14 Displays a tree for the parsed text. 

15 

16 @param node output of ``parser.parse()`` 

17 @return string (see below) 

18 

19 Example of results: 

20 

21 :: 

22 

23 MOKADIbruitdetoilette - <Parse> 

24 MOKADI - <Mokadi> 

25 MOKADI - <TerminalNodeImpl> 

26 bruit - <Expression_stmt> 

27 bruit - <Expression> 

28 bruit - <Anything_stmt> 

29 bruit - <Word_name_ext> 

30 bruit - <Word_name> 

31 bruit - <TerminalNodeImpl> 

32 de - <ErrorNodeImpl> 

33 toilette - <ErrorNodeImpl> 

34 """ 

35 rows = [] 

36 cl = str(type(node)).split(".")[-1].strip("'><") # pylint: disable=C0207 

37 if cl.endswith("Context"): 

38 cl = cl[:-7] 

39 rows.append('{0}{1} - <{2}>'.format(' ' * indent, node.getText(), cl)) 

40 if hasattr(node, "getChildren"): 

41 for child in node.getChildren(): 

42 rows.append(print_level_order(child, indent + 1)) 

43 return "\n".join(rows) 

44 

45 

46def run_parse(parser): 

47 """ 

48 Parses the script and intercept standard output and error. 

49 

50 @param parser parser, output of @see fn parse_scope 

51 @return stdout, stderr, tree 

52 

53 Unfortunately, it is not multithreaded. 

54 It should be done in another way than by replacing *sys.stdout* and *sys.stderr*. 

55 """ 

56 stdout = io.StringIO() 

57 stderr = io.StringIO() 

58 kout = sys.stdout 

59 kerr = sys.stderr 

60 sys.stdout = stdout 

61 sys.stderr = stderr 

62 tree = parser.parse() 

63 sys.stdout = kout 

64 sys.stderr = kerr 

65 out = stdout.getvalue() 

66 err = stderr.getvalue() 

67 if len(err) > 0: 

68 mes = print_level_order(tree) 

69 raise SyntaxError("Mokadi parsing error:\n" + err + "\nTREE\n" + mes) 

70 return out, err, tree 

71 

72 

73def parse_mokadi(content, MokadiGrammarParser, MokadiGrammarLexer): 

74 """ 

75 Parse a sentance with mokadi language. 

76 

77 @param MokadiGrammarParser parser for a specific language 

78 @param MokadiGrammarLexer lexer for a specific language 

79 @param content string 

80 @return instance of @see cl MokadiGrammarParser 

81 """ 

82 if isinstance(content, str): 

83 # we assume it is a string 

84 content = InputStream(content) 

85 lexer = MokadiGrammarLexer(content) 

86 stream = CommonTokenStream(lexer) 

87 parser = MokadiGrammarParser(stream) 

88 return parser 

89 

90 

91def get_tree_string(MokadiGrammarListener, tree, parser, script=None): 

92 """ 

93 returns a string which shows the parsed tree 

94 

95 @param MokadiGrammarListener listener to use 

96 @param tree from @see fn parse_code 

97 @param parser the parser used to build the tree 

98 @param format None or a class ParseTreeListener 

99 @return string or C# code in Scope script (scope instructions are replace by blank lines) 

100 """ 

101 

102 class TreeStringListener(MokadiGrammarListener): 

103 

104 """ 

105 this class is an attempt to run through the tree 

106 but it is not complete 

107 """ 

108 

109 def __init__(self, parser): 

110 """ 

111 constructor 

112 

113 @param parser parser used to parse the code 

114 """ 

115 MokadiGrammarListener.__init__(self) 

116 self.buffer = None 

117 self.level = -1 

118 self.parser = parser 

119 self.solution = [] 

120 self.text_type = [] 

121 

122 @property 

123 def Result(self): 

124 """ 

125 results 

126 """ 

127 return self.solution 

128 

129 def visitTerminal(self, node): 

130 """ 

131 event 

132 """ 

133 t = self.get_type(node.parentCtx, True) 

134 self.buffer[self.level].append((node.getText(), t)) 

135 self.text_type.append((node.getText(), t)) 

136 

137 def visitErrorNode(self, node): 

138 """ 

139 event 

140 """ 

141 buffer = [] 

142 text = (" " * self.level) + "error: " + str(node) 

143 buffer.append(text) 

144 buffer.append(" ***" + str(node)) 

145 buffer.append(" ***" + str(type(node))) 

146 buffer.append(" ***" + str(node.__dict__)) 

147 buffer.append(" ###" + str(node.symbol)) 

148 buffer.append(" ###" + str(type(node.symbol))) 

149 buffer.append(" ###" + str(node.symbol.__dict__)) 

150 buffer.append(" ---" + str(node.parentCtx)) 

151 buffer.append(" ---" + str(type(node.parentCtx))) 

152 buffer.append(" ---" + str(node.parentCtx.__dict__)) 

153 raise MokadiException("\n".join(buffer)) 

154 

155 def istypeof(self, ch): 

156 """ 

157 Annotation a specific node of the grammar. 

158 

159 @param ch context, node 

160 @return string or None 

161 

162 This function is not efficient, it should be rewritten with 

163 a kind a dictionary. 

164 """ 

165 if isinstance(ch, self.parser.ParseContext): 

166 return ":P:" 

167 if isinstance(ch, self.parser.MokadiContext): 

168 return ":MOKADI:" 

169 if isinstance(ch, self.parser.Slides_stmtContext): 

170 return ":slide_exp:" 

171 if isinstance(ch, self.parser.SlidesContext): 

172 return ":slide:" 

173 if isinstance(ch, self.parser.Word_nameContext): 

174 return ":word:" 

175 if isinstance(ch, self.parser.Word_name_extContext): 

176 return ":word:" 

177 if isinstance(ch, self.parser.OperatorContext): 

178 return ":op:" 

179 if isinstance(ch, self.parser.Verb_voirContext): 

180 return ":verb_voir:" 

181 if isinstance(ch, (self.parser.Mail_stmtContext, self.parser.MailsContext)): 

182 return ":mails:" 

183 if isinstance(ch, self.parser.QuestionContext): 

184 return ":question:" 

185 if isinstance(ch, self.parser.NewsContext): 

186 return ":news:" 

187 if isinstance(ch, self.parser.News_queryContext): 

188 return ":query:" 

189 if isinstance(ch, self.parser.AproposContext): 

190 return ":apropos:" 

191 if isinstance(ch, self.parser.News_stmtContext): 

192 return ":news:" 

193 if isinstance(ch, self.parser.Time_indicationContext): 

194 return ":time_indication:" 

195 if isinstance(ch, self.parser.Integer_numberContext): 

196 return ":int:" 

197 if isinstance(ch, self.parser.Integer_number_stringContext): 

198 return ":int:" 

199 if isinstance(ch, self.parser.Number_nameContext): 

200 return ":int:" 

201 if isinstance(ch, self.parser.Questions_markContext): 

202 return ":question_mark:" 

203 if isinstance(ch, self.parser.Stop_wordsContext): 

204 return ":stopword:" 

205 if isinstance(ch, self.parser.PresentationContext): 

206 return ":presentation:" 

207 if isinstance(ch, self.parser.Emotion_stmtContext): 

208 return ":emotion:" 

209 if isinstance(ch, self.parser.HumeurContext): 

210 return ":emotion:" 

211 if isinstance(ch, self.parser.Possessif_meContext): 

212 return ":a_moi:" 

213 if isinstance(ch, self.parser.Anything_stmtContext): 

214 return ":anything:" 

215 if isinstance(ch, self.parser.With_bodyContext): 

216 return ":entier:" 

217 if isinstance(ch, self.parser.EntierContext): 

218 return ":entier:" 

219 if isinstance(ch, self.parser.NumeroContext): 

220 return ":numero:" 

221 if isinstance(ch, self.parser.DefinitionContext): 

222 return ":definition:" 

223 if isinstance(ch, self.parser.SynonymeContext): 

224 return ":synonym:" 

225 if isinstance(ch, (self.parser.Expression_stmtContext, self.parser.ExpressionContext)): 

226 return ":expression:" 

227 return None 

228 

229 def get_type(self, ctx, children=False, exc=True): 

230 """ 

231 Extract the type of a context. 

232 

233 @param ctx ctx 

234 @param children look into the children 

235 @param exc raise an exception if not found 

236 @return type as a string 

237 """ 

238 t = self.istypeof(ctx) 

239 if t is not None: 

240 return t 

241 if children: 

242 ctxi = ctx 

243 while ctxi is not None: 

244 for ch in ctxi.getChildren(): 

245 t = self.istype(ch) 

246 if t is not None: 

247 return t 

248 ctxi = ctxi.parentCtx 

249 

250 # error 

251 if exc: 

252 keep = [ctx] 

253 ctxi = ctx 

254 while ctxi is not None: 

255 for ch in ctxi.getChildren(): 

256 keep.append(ch) 

257 ctxi = ctxi.parentCtx 

258 mes = "\n".join(str(type(_)) for _ in keep) 

259 raise MokadiException("Unable to get type of \n" + mes) 

260 return None 

261 

262 def enterEveryRule(self, ctx): 

263 """ 

264 event 

265 """ 

266 

267 ty = self.get_type(ctx) 

268 text = ctx.getText() 

269 # text = self.parser._input.LT(1).text 

270 li = [] 

271 if self.level == -1: 

272 self.buffer = [] 

273 else: 

274 self.buffer[self.level].append(li) 

275 self.buffer.append(li) 

276 li.append(("enter", text, ty, ctx.start.line, ctx.start.column)) 

277 self.level += 1 

278 

279 def exitEveryRule(self, ctx): 

280 """ 

281 event 

282 """ 

283 ty = self.get_type(ctx) 

284 text = ctx.getText() 

285 self.buffer[self.level].append( 

286 ("leave", text, ty, ctx.start.line, ctx.start.column)) 

287 self.level -= 1 

288 if self.level == -1: 

289 self.solution.append(self.buffer.copy()) 

290 self.buffer.pop() 

291 

292 walker = ParseTreeWalker() 

293 listen = TreeStringListener(parser) 

294 walker.walk(listen, tree) 

295 return listen.Result, listen.text_type