Coverage for C:\Repos\leo-editor\leo\plugins\importers\javascript.py: 79%

265 statements  

« prev     ^ index     » next       coverage.py v6.4, created at 2022-05-24 10:21 -0500

1#@+leo-ver=5-thin 

2#@+node:ekr.20140723122936.18144: * @file ../plugins/importers/javascript.py 

3"""The @auto importer for JavaScript.""" 

4import re 

5import textwrap 

6import unittest 

7from typing import List 

8from leo.core import leoGlobals as g 

9from leo.plugins.importers import linescanner 

10Importer = linescanner.Importer 

11Target = linescanner.Target 

12#@+others 

13#@+node:ekr.20140723122936.18049: ** class JS_Importer 

14class JS_Importer(Importer): 

15 

16 def __init__(self, importCommands, force_at_others=False, **kwargs): 

17 """The ctor for the JS_ImportController class.""" 

18 # Init the base class. 

19 super().__init__( 

20 importCommands, 

21 gen_refs=False, # Fix #639. 

22 language='javascript', 

23 state_class=JS_ScanState, 

24 ) 

25 

26 #@+others 

27 #@+node:ekr.20180123051226.1: *3* js_i.post_pass & helpers 

28 def post_pass(self, parent): 

29 """ 

30 Optional Stage 2 of the javascript pipeline. 

31 

32 All substages **must** use the API for setting body text. Changing 

33 p.b directly will cause asserts to fail later in i.finish(). 

34 """ 

35 self.clean_all_headlines(parent) 

36 self.remove_singleton_at_others(parent) 

37 self.clean_all_nodes(parent) 

38 self.move_trailing_comments(parent) 

39 #@+node:ekr.20180123051401.1: *4* js_i.remove_singleton_at_others 

40 at_others = re.compile(r'^\s*@others\b') 

41 

42 def remove_singleton_at_others(self, parent): 

43 """Replace @others by the body of a singleton child node.""" 

44 found = False 

45 for p in parent.subtree(): 

46 if p.numberOfChildren() == 1: 

47 child = p.firstChild() 

48 lines = self.get_lines(p) 

49 matches = [i for i, s in enumerate(lines) if self.at_others.match(s)] 

50 if len(matches) == 1: 

51 found = True 

52 i = matches[0] 

53 child_lines = self.get_lines(child) 

54 lines = lines[:i] + child_lines + lines[i + 1 :] 

55 self.set_lines(p, lines) 

56 # Delete child later. Is this enough??? 

57 self.set_lines(child, []) 

58 return found 

59 #@+node:ekr.20180123060307.1: *4* js_i.remove_organizer_nodes 

60 def remove_organizer_nodes(self, parent): 

61 """Removed all organizer nodes created by i.delete_all_empty_nodes.""" 

62 # Careful: Restart this loop whenever we find an organizer. 

63 found = True 

64 while found: 

65 found = False 

66 for p in parent.subtree(): 

67 lines = self.get_lines(p) 

68 if p.h.lower() == 'organizer' and not lines: 

69 p.promote() 

70 p.doDelete() 

71 found = True # Restart the loop. 

72 #@+node:ekr.20200202071105.1: *4* js_i.clean_all_nodes 

73 def clean_all_nodes(self, parent): 

74 """Remove common leading whitespace from all nodes.""" 

75 for p in parent.subtree(): 

76 lines = self.get_lines(p) 

77 s = textwrap.dedent(''.join(lines)) 

78 self.set_lines(p, g.splitLines(s)) 

79 #@+node:ekr.20200202091613.1: *4* js_i.move_trailing_comments & helper (new) 

80 def move_trailing_comments(self, parent): 

81 """Move all trailing comments to the start of the next node.""" 

82 for p in parent.subtree(): 

83 next = p.next() 

84 if next: 

85 lines = self.get_lines(p) 

86 head_lines, tail_lines = self.get_trailing_comments(lines) 

87 if tail_lines: 

88 self.set_lines(p, head_lines) 

89 next_lines = self.get_lines(next) 

90 self.set_lines(next, tail_lines + next_lines) 

91 #@+node:ekr.20200202092332.1: *5* js_i.get_trailing_comments 

92 def get_trailing_comments(self, lines): 

93 """ 

94 Return the trailing comments of p. 

95 Return (head_lines, tail_lines). 

96 """ 

97 s = ''.join(lines) 

98 head: List[str] = [] 

99 tail: List[str] = [] 

100 if not s.strip: 

101 return head, tail 

102 in_block_comment = False 

103 head = lines 

104 for i, line in enumerate(lines): 

105 s = line.strip() 

106 if in_block_comment: 

107 tail.append(line) 

108 if s.startswith('*/'): 

109 in_block_comment = False 

110 elif s.startswith('/*'): 

111 in_block_comment = True 

112 head = lines[:i] 

113 tail = [line] 

114 elif s.startswith('//'): 

115 head = lines[:i] 

116 tail = [line] 

117 elif s: # Clear any previous comments. 

118 head = lines 

119 tail = [] 

120 return head, tail 

121 #@+node:ekr.20161105140842.5: *3* js_i.scan_line (rewritten) 

122 def scan_line(self, s, prev_state): 

123 """ 

124 Update the scan state at the *end* of the line. 

125 Return JS_ScanState({'context':context, 'curlies':curlies, 'parens':parens}) 

126 

127 This code uses JsLex to scan the tokens, which scans strings and regexs properly. 

128 

129 This code also handles *partial* tokens: tokens continued from the 

130 previous line or continued to the next line. 

131 """ 

132 context = prev_state.context 

133 curlies, parens = prev_state.curlies, prev_state.parens 

134 # Scan tokens, updating context and counts. 

135 prev_val = None 

136 for kind, val in JsLexer().lex(s): 

137 # g.trace(f"context: {context:2} kind: {kind:10} val: {val!r}") 

138 if context: 

139 if context in ('"', "'") and kind in ('other', 'punct') and val == context: 

140 context = '' 

141 elif ( 

142 context == '/*' 

143 and kind in ('other', 'punct') 

144 and prev_val == '*' 

145 and val == '/' 

146 ): 

147 context = '' 

148 elif kind in ('other', 'punct') and val in ('"', "'"): 

149 context = val 

150 elif kind in ('other', 'punct') and val == '*' and prev_val == '/': 

151 context = '/*' 

152 elif kind in ('other', 'punct'): 

153 if val == '*' and prev_val == '/': 

154 context = '/*' 

155 elif val == '{': 

156 curlies += 1 

157 elif val == '}': 

158 curlies -= 1 

159 elif val == '(': 

160 parens += 1 

161 elif val == ')': 

162 parens -= 1 

163 prev_val = val 

164 d = {'context': context, 'curlies': curlies, 'parens': parens} 

165 state = JS_ScanState(d) 

166 return state 

167 #@+node:ekr.20171224145755.1: *3* js_i.starts_block 

168 func_patterns = [ 

169 re.compile(r'.*?\)\s*=>\s*\{'), 

170 re.compile(r'\s*class\b'), 

171 re.compile(r'\s*function\b'), 

172 re.compile(r'.*?[(=,]\s*function\b'), 

173 ] 

174 

175 def starts_block(self, i, lines, new_state, prev_state): 

176 """True if the new state starts a block.""" 

177 if new_state.level() <= prev_state.level(): 

178 return False 

179 # Remove strings and regexs from the line before applying the patterns. 

180 cleaned_line = [] 

181 for kind, val in JsLexer().lex(lines[i]): 

182 if kind not in ('string', 'regex'): 

183 cleaned_line.append(val) 

184 # Search for any of the patterns. 

185 line = ''.join(cleaned_line) 

186 for pattern in self.func_patterns: 

187 if pattern.match(line) is not None: 

188 return True 

189 return False 

190 #@+node:ekr.20200131193217.1: *3* js_i.ends_block 

191 def ends_block(self, line, new_state, prev_state, stack): 

192 """True if line ends the block.""" 

193 # Comparing new_state against prev_state does not work for python. 

194 top = stack[-1] 

195 return new_state.level() < top.state.level() 

196 #@+node:ekr.20161101183354.1: *3* js_i.clean_headline 

197 clean_regex_list1 = [ 

198 # (function name ( 

199 re.compile(r'\s*\(?(function\b\s*[\w]*)\s*\('), 

200 # name: (function ( 

201 re.compile(r'\s*(\w+\s*\:\s*\(*\s*function\s*\()'), 

202 # const|let|var name = .* => 

203 re.compile(r'\s*(?:const|let|var)\s*(\w+\s*(?:=\s*.*)=>)'), 

204 ] 

205 clean_regex_list2 = [ 

206 re.compile(r'(.*\=)(\s*function)'), # .* = function 

207 ] 

208 clean_regex_list3 = [ 

209 re.compile(r'(.*\=\s*new\s*\w+)\s*\(.*(=>)'), # .* = new name .* => 

210 re.compile(r'(.*)\=\s*\(.*(=>)'), # .* = ( .* => 

211 re.compile(r'(.*)\((\s*function)'), # .* ( function 

212 re.compile(r'(.*)\(.*(=>)'), # .* ( .* => 

213 re.compile(r'(.*)(\(.*\,\s*function)'), # .* \( .*, function 

214 ] 

215 clean_regex_list4 = [ 

216 re.compile(r'(.*)\(\s*(=>)'), # .* ( => 

217 ] 

218 

219 def clean_headline(self, s, p=None, trace=False): 

220 """Return a cleaned up headline s.""" 

221 # pylint: disable=arguments-differ 

222 s = s.strip() 

223 # Don't clean a headline twice. 

224 if s.endswith('>>') and s.startswith('<<'): 

225 return s 

226 for ch in '{(=': 

227 if s.endswith(ch): 

228 s = s[:-1].strip() 

229 # First regex cleanup. Use \1. 

230 for pattern in self.clean_regex_list1: 

231 m = pattern.match(s) 

232 if m: 

233 s = m.group(1) 

234 break 

235 # Second regex cleanup. Use \1 + \2 

236 for pattern in self.clean_regex_list2: 

237 m = pattern.match(s) 

238 if m: 

239 s = m.group(1) + m.group(2) 

240 break 

241 # Third regex cleanup. Use \1 + ' ' + \2 

242 for pattern in self.clean_regex_list3: 

243 m = pattern.match(s) 

244 if m: 

245 s = m.group(1) + ' ' + m.group(2) 

246 break 

247 # Fourth cleanup. Use \1 + ' ' + \2 again 

248 for pattern in self.clean_regex_list4: 

249 m = pattern.match(s) 

250 if m: 

251 s = m.group(1) + ' ' + m.group(2) 

252 break 

253 # Final whitespace cleanups. 

254 s = s.replace(' ', ' ') 

255 s = s.replace(' (', '(') 

256 return g.truncate(s, 100) 

257 #@-others 

258#@+node:ekr.20161105092745.1: ** class JS_ScanState 

259class JS_ScanState: 

260 """A class representing the state of the javascript line-oriented scan.""" 

261 

262 def __init__(self, d=None): 

263 """JS_ScanState ctor""" 

264 if d: 

265 # d is *different* from the dict created by i.scan_line. 

266 self.context = d.get('context') 

267 self.curlies = d.get('curlies') 

268 self.parens = d.get('parens') 

269 else: 

270 self.context = '' 

271 self.curlies = self.parens = 0 

272 

273 def __repr__(self): 

274 """JS_ScanState.__repr__""" 

275 return 'JS_ScanState context: %r curlies: %s parens: %s' % ( 

276 self.context, self.curlies, self.parens) 

277 

278 __str__ = __repr__ 

279 

280 #@+others 

281 #@+node:ekr.20161119115505.1: *3* js_state.level 

282 def level(self): 

283 """JS_ScanState.level.""" 

284 return (self.curlies, self.parens) 

285 #@+node:ekr.20161119051049.1: *3* js_state.update 

286 def update(self, data): 

287 """ 

288 Update the state using the 6-tuple returned by i.scan_line. 

289 Return i = data[1] 

290 """ 

291 context, i, delta_c, delta_p, delta_s, bs_nl = data 

292 # self.bs_nl = bs_nl 

293 self.context = context 

294 self.curlies += delta_c 

295 self.parens += delta_p 

296 # self.squares += delta_s 

297 return i 

298 

299 #@-others 

300 

301#@+node:ekr.20200131110322.2: ** JsLexer... 

302# JsLex: a lexer for Javascript 

303# Written by Ned Batchelder. Used by permission. 

304# 

305# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 

306# For details: https://bitbucket.org/ned/jslex/src/default/NOTICE.txt 

307#@+node:ekr.20200131110322.4: *3* class Tok 

308class Tok: 

309 """A specification for a token class.""" 

310 

311 num = 0 

312 

313 def __init__(self, name, regex, next=None): 

314 self.id = Tok.num 

315 Tok.num += 1 

316 self.name = name 

317 self.regex = regex 

318 self.next = next 

319#@+node:ekr.20200131110322.7: *3* class Lexer 

320class Lexer: 

321 """A generic multi-state regex-based lexer.""" 

322 

323 #@+others 

324 #@+node:ekr.20200131110322.8: *4* Lexer.__init__ 

325 def __init__(self, states, first): 

326 self.regexes = {} 

327 self.toks = {} 

328 for state, rules in states.items(): 

329 parts = [] 

330 for tok in rules: 

331 groupid = "t%d" % tok.id 

332 self.toks[groupid] = tok 

333 parts.append("(?P<%s>%s)" % (groupid, tok.regex)) 

334 self.regexes[state] = re.compile("|".join(parts), re.MULTILINE | re.VERBOSE) # |re.UNICODE) 

335 self.state = first 

336 

337 #@+node:ekr.20200131110322.9: *4* Lexer.lex 

338 def lex(self, text): 

339 """Lexically analyze `text`. 

340 

341 Yields pairs (`name`, `tokentext`). 

342 """ 

343 end = len(text) 

344 state = self.state 

345 regexes = self.regexes 

346 toks = self.toks 

347 start = 0 

348 while start < end: 

349 for match in regexes[state].finditer(text, start): 

350 # g.trace(state, start, text, match) 

351 # g.printObj(regexes[state]) 

352 name = match.lastgroup 

353 tok = toks[name] 

354 toktext = match.group(name) 

355 start += len(toktext) 

356 yield(tok.name, toktext) 

357 if tok.next: 

358 state = tok.next 

359 break 

360 self.state = state 

361 #@-others 

362#@+node:ekr.20200131110322.6: *3* function: literals 

363def literals(choices, prefix="", suffix=""): 

364 """ 

365 Create a regex from a space-separated list of literal `choices`. 

366 

367 If provided, `prefix` and `suffix` will be attached to each choice 

368 individually. 

369 

370 """ 

371 return "|".join(prefix + re.escape(c) + suffix for c in choices.split()) 

372 

373#@+node:ekr.20200131110322.10: *3* class JsLexer(Lexer) 

374class JsLexer(Lexer): 

375 """A Javascript lexer 

376 

377 >>> lexer = JsLexer() 

378 >>> list(lexer.lex("a = 1")) 

379 [('id', 'a'), ('ws', ' '), ('punct', '='), ('ws', ' '), ('dnum', '1')] 

380 

381 This doesn't properly handle non-Ascii characters in the Javascript source. 

382 

383 """ 

384 

385 #@+<< constants >> 

386 #@+node:ekr.20200131190707.1: *4* << constants >> (JsLexer) 

387 

388 # Because these tokens are matched as alternatives in a regex, longer possibilities 

389 # must appear in the list before shorter ones, for example, '>>' before '>'. 

390 # 

391 # Note that we don't have to detect malformed Javascript, only properly lex 

392 # correct Javascript, so much of this is simplified. 

393 

394 # Details of Javascript lexical structure are taken from 

395 # http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-262.pdf 

396 

397 # A useful explanation of automatic semicolon insertion is at 

398 # http://inimino.org/~inimino/blog/javascript_semicolons 

399 

400 # See https://stackoverflow.com/questions/6314614/match-any-unicode-letter 

401 

402 both_before = [ 

403 Tok("comment", r"/\*(.|\n)*?\*/"), 

404 Tok("linecomment", r"//.*?$"), 

405 Tok("ws", r"\s+"), 

406 Tok("keyword", literals(""" 

407 async await 

408 break case catch class const continue debugger 

409 default delete do else enum export extends 

410 finally for function if import in instanceof new 

411 return super switch this throw try typeof var 

412 void while with 

413 """, suffix=r"\b"), next='reg'), 

414 Tok("reserved", literals("null true false", suffix=r"\b"), next='div'), 

415 # 

416 # EKR: This would work if patterns were compiled with the re.UNICODE flag. 

417 # However, \w is not the same as valid JS characters. 

418 # In any case, the JS importer doesn't need to handle id's carefully. 

419 # 

420 # Tok("id", r"""([\w$])([\w\d]*)""", next='div'), 

421 # 

422 Tok("id", r""" 

423 ([a-zA-Z_$ ]|\\u[0-9a-fA-Z]{4}) # first char 

424 ([a-zA-Z_$0-9]|\\u[0-9a-fA-F]{4})* # rest chars 

425 """, next='div'), 

426 Tok("hnum", r"0[xX][0-9a-fA-F]+", next='div'), 

427 Tok("onum", r"0[0-7]+"), 

428 Tok("dnum", r""" 

429 ( (0|[1-9][0-9]*) # DecimalIntegerLiteral 

430 \. # dot 

431 [0-9]* # DecimalDigits-opt 

432 ([eE][-+]?[0-9]+)? # ExponentPart-opt 

433 | 

434 \. # dot 

435 [0-9]+ # DecimalDigits 

436 ([eE][-+]?[0-9]+)? # ExponentPart-opt 

437 | 

438 (0|[1-9][0-9]*) # DecimalIntegerLiteral 

439 ([eE][-+]?[0-9]+)? # ExponentPart-opt 

440 ) 

441 """, next='div'), 

442 Tok("punct", literals(""" 

443 >>>= === !== >>> <<= >>= <= >= == != << >> && 

444 || += -= *= %= &= |= ^= 

445 """), next="reg"), 

446 Tok("punct", literals("++ -- ) ]"), next='div'), 

447 Tok("punct", literals("{ } ( [ . ; , < > + - * % & | ^ ! ~ ? : ="), next='reg'), 

448 Tok("string", r'"([^"\\]|(\\(.|\n)))*?"', next='div'), 

449 Tok("string", r"'([^'\\]|(\\(.|\n)))*?'", next='div'), 

450 ] 

451 

452 both_after = [ 

453 Tok("other", r"."), 

454 ] 

455 

456 states = { 

457 'div': # slash will mean division 

458 both_before + [ 

459 Tok("punct", literals("/= /"), next='reg'), 

460 ] + both_after, 

461 

462 'reg': # slash will mean regex 

463 both_before + [ 

464 Tok("regex", 

465 r""" 

466 / # opening slash 

467 # First character is.. 

468 ( [^*\\/[] # anything but * \ / or [ 

469 | \\. # or an escape sequence 

470 | \[ # or a class, which has 

471 ( [^\]\\] # anything but \ or ] 

472 | \\. # or an escape sequence 

473 )* # many times 

474 \] 

475 ) 

476 # Following characters are same, except for excluding a star 

477 ( [^\\/[] # anything but \ / or [ 

478 | \\. # or an escape sequence 

479 | \[ # or a class, which has 

480 ( [^\]\\] # anything but \ or ] 

481 | \\. # or an escape sequence 

482 )* # many times 

483 \] 

484 )* # many times 

485 / # closing slash 

486 [a-zA-Z0-9]* # trailing flags 

487 """, next='div'), 

488 ] + both_after, 

489 } 

490 #@-<< constants >> 

491 

492 #@+others 

493 #@+node:ekr.20200131110322.11: *4* JsLexer.__init__ 

494 def __init__(self): 

495 super().__init__(self.states, 'reg') 

496 #@-others 

497#@+node:ekr.20200131070055.1: ** class TestJSImporter (importers/javascript.py) 

498class TestJSImporter(unittest.TestCase): 

499 #@+others 

500 #@+node:ekr.20200202093420.1: *3* test_get_trailing_comments 

501 def test_get_trailing_comments(self): 

502 

503 table = ( 

504 # Test 1 

505 ("""\ 

506 head 

507 // tail""", 1), 

508 

509 # Test 2 

510 ("""\ 

511 head 

512 /* comment 1 

513 * comment 2 

514 */""", 3), 

515 

516 # Test 3 

517 ("""\ 

518 head 

519 /* comment 1 

520 * comment 2 

521 */ 

522 tail""", 0), # no tail 

523 

524 # Test 4 

525 ("""\ 

526 head 

527 // comment 

528 tail""", 0), # no tail 

529 

530 ) # End table. 

531 for s, expected_length in table: 

532 x = JS_Importer(None) 

533 s = textwrap.dedent(s) 

534 lines = g.splitLines(s) 

535 head, tail = x.get_trailing_comments(lines) 

536 expected_lines = lines[-expected_length :] if expected_length else [] 

537 assert tail == expected_lines, (repr(tail), repr(expected_lines)) 

538 #@+node:ekr.20200202104932.1: *3* test_JsLex 

539 def test_JsLex(self): 

540 

541 table = ( 

542 ('id', ('f_', '$', 'A1', 'abc')), 

543 ('other', ('ÁÁ',)), # Unicode strings are not handled by JsLex. 

544 ('keyword', ('async', 'await', 'if')), 

545 ('punct', ('(', ')', '{', '}', ',', ':', ';')), 

546 # ('num', ('9', '2')), # This test doesn't matter at present. 

547 ) 

548 for kind, data in table: 

549 for contents in data: 

550 for name, tok in JsLexer().lex(contents): 

551 assert name == kind, f"expected {kind!s} got {name!s} {tok!r} {contents}" 

552 # print(f"{kind!s:10} {tok!r:10}") 

553 

554 #@+node:ekr.20200203051839.1: *3* test_starts_block 

555 def test_starts_block(self): 

556 

557 table = ( 

558 (1, 'xx) => {}'), 

559 (1, 'class c1'), 

560 (1, 'function f1'), 

561 (1, 'xx(function f2'), 

562 (1, 'xx = function f3'), 

563 (1, 'xx, function f4'), 

564 (0, 'a = "function"'), 

565 (0, 'a = /function/'), 

566 ) 

567 for expected, line in table: 

568 x = JS_Importer(None) 

569 lines = [line] 

570 new_state = JS_ScanState() 

571 new_state.curlies += 1 

572 prev_state = JS_ScanState() 

573 results = x.starts_block(0, lines, new_state, prev_state) 

574 # if expected != results: x.scan_line(line, prev_state 

575 assert expected == results, f"expected: {expected} got: {int(results)} {line!r}\n" 

576 #@+node:ekr.20200203060718.1: *3* test_scan_line 

577 def test_scan_line(self): 

578 

579 table = ( 

580 # result prev_context s 

581 ((0, 0, '"'), "", r'"string'), 

582 ((0, 0, '/*'), "", r'/* line 1'), 

583 ((0, 0, '/*'), "/*", r'line 2'), # New. 

584 ((0, 0, ''), "/*", r'line 3 */'), # New. 

585 ((0, 0, ''), "", r'a + b // /*'), 

586 ((0, 1, ''), "", r'(function'), 

587 ((1, 1, ''), "", r'(function(a) {'), 

588 ((0, 0, ''), "", r'var x = /abc/'), 

589 ((0, 0, ''), "", r'var x = /a"c/'), 

590 ((0, 0, ''), "", r'var x = /a\//'), 

591 ((0, 0, ''), "", r'var x = /a\//'), 

592 ((0, 1, ''), "", r'var x = (0,'), 

593 ) 

594 for result, prev_context, s in table: 

595 importer = JS_Importer(None) 

596 prev_state = JS_ScanState() 

597 prev_state.context = prev_context 

598 new_state = importer.scan_line(s, prev_state) 

599 curlies, parens, context = result 

600 ok = ( 

601 new_state.curlies == curlies and 

602 new_state.parens == parens and 

603 new_state.context == context) 

604 assert ok, ( 

605 f"\n" 

606 f" expected: curlies: {curlies}, parens: {parens}, context: {context!r}\n" 

607 f"new_state: {new_state}\n" 

608 f" s: {s!r}") 

609 #@-others 

610#@-others 

611importer_dict = { 

612 'func': JS_Importer.do_import(), 

613 'extensions': ['.js',], 

614} 

615if __name__ == '__main__': 

616 unittest.main() 

617#@@language python 

618#@@tabwidth -4 

619#@-leo