Coverage for C:\leo.repo\leo-editor\leo\plugins\importers\javascript.py: 79%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

265 statements  

1#@+leo-ver=5-thin 

2#@+node:ekr.20140723122936.18144: * @file ../plugins/importers/javascript.py 

3"""The @auto importer for JavaScript.""" 

4import re 

5import textwrap 

6import unittest 

7from typing import List 

8from leo.core import leoGlobals as g 

9from leo.plugins.importers import linescanner 

10Importer = linescanner.Importer 

11Target = linescanner.Target 

12#@+others 

13#@+node:ekr.20140723122936.18049: ** class JS_Importer 

14class JS_Importer(Importer): 

15 

16 def __init__(self, importCommands, force_at_others=False, **kwargs): 

17 """The ctor for the JS_ImportController class.""" 

18 # Init the base class. 

19 super().__init__( 

20 importCommands, 

21 gen_refs=False, # Fix #639. 

22 language='javascript', 

23 state_class=JS_ScanState, 

24 ) 

25 

26 #@+others 

27 #@+node:ekr.20180123051226.1: *3* js_i.post_pass & helpers 

28 def post_pass(self, parent): 

29 """ 

30 Optional Stage 2 of the javascript pipeline. 

31 

32 All substages **must** use the API for setting body text. Changing 

33 p.b directly will cause asserts to fail later in i.finish(). 

34 """ 

35 self.clean_all_headlines(parent) 

36 self.remove_singleton_at_others(parent) 

37 self.clean_all_nodes(parent) 

38 self.move_trailing_comments(parent) 

39 #@+node:ekr.20180123051401.1: *4* js_i.remove_singleton_at_others 

40 at_others = re.compile(r'^\s*@others\b') 

41 

42 def remove_singleton_at_others(self, parent): 

43 """Replace @others by the body of a singleton child node.""" 

44 found = False 

45 for p in parent.subtree(): 

46 if p.numberOfChildren() == 1: 

47 child = p.firstChild() 

48 lines = self.get_lines(p) 

49 matches = [i for i, s in enumerate(lines) if self.at_others.match(s)] 

50 if len(matches) == 1: 

51 found = True 

52 i = matches[0] 

53 child_lines = self.get_lines(child) 

54 lines = lines[:i] + child_lines + lines[i + 1 :] 

55 self.set_lines(p, lines) 

56 # Delete child later. Is this enough??? 

57 self.set_lines(child, []) 

58 return found 

59 #@+node:ekr.20180123060307.1: *4* js_i.remove_organizer_nodes 

60 def remove_organizer_nodes(self, parent): 

61 """Removed all organizer nodes created by i.delete_all_empty_nodes.""" 

62 # Careful: Restart this loop whenever we find an organizer. 

63 found = True 

64 while found: 

65 found = False 

66 for p in parent.subtree(): 

67 lines = self.get_lines(p) 

68 if p.h.lower() == 'organizer' and not lines: 

69 p.promote() 

70 p.doDelete() 

71 found = True # Restart the loop. 

72 #@+node:ekr.20200202071105.1: *4* js_i.clean_all_nodes 

73 def clean_all_nodes(self, parent): 

74 """Remove common leading whitespace from all nodes.""" 

75 for p in parent.subtree(): 

76 lines = self.get_lines(p) 

77 s = textwrap.dedent(''.join(lines)) 

78 self.set_lines(p, g.splitLines(s)) 

79 #@+node:ekr.20200202091613.1: *4* js_i.move_trailing_comments & helper (new) 

80 def move_trailing_comments(self, parent): 

81 """Move all trailing comments to the start of the next node.""" 

82 for p in parent.subtree(): 

83 next = p.next() 

84 if next: 

85 lines = self.get_lines(p) 

86 head_lines, tail_lines = self.get_trailing_comments(lines) 

87 if tail_lines: 

88 self.set_lines(p, head_lines) 

89 next_lines = self.get_lines(next) 

90 self.set_lines(next, tail_lines + next_lines) 

91 #@+node:ekr.20200202092332.1: *5* js_i.get_trailing_comments 

92 def get_trailing_comments(self, lines): 

93 """ 

94 Return the trailing comments of p. 

95 Return (head_lines, tail_lines). 

96 """ 

97 s = ''.join(lines) 

98 head: List[str] = [] 

99 tail: List[str] = [] 

100 if not s.strip: 

101 return head, tail 

102 in_block_comment = False 

103 head = lines 

104 for i, line in enumerate(lines): 

105 s = line.strip() 

106 if in_block_comment: 

107 tail.append(line) 

108 if s.startswith('*/'): 

109 in_block_comment = False 

110 elif s.startswith('/*'): 

111 in_block_comment = True 

112 head = lines[:i] 

113 tail = [line] 

114 elif s.startswith('//'): 

115 head = lines[:i] 

116 tail = [line] 

117 elif s: # Clear any previous comments. 

118 head = lines 

119 tail = [] 

120 return head, tail 

121 #@+node:ekr.20161105140842.5: *3* js_i.scan_line (rewritten) 

122 def scan_line(self, s, prev_state): 

123 """ 

124 Update the scan state at the *end* of the line. 

125 Return JS_ScanState({'context':context, 'curlies':curlies, 'parens':parens}) 

126 

127 This code uses JsLex to scan the tokens, which scans strings and regexs properly. 

128 

129 This code also handles *partial* tokens: tokens continued from the 

130 previous line or continued to the next line. 

131 """ 

132 context = prev_state.context 

133 curlies, parens = prev_state.curlies, prev_state.parens 

134 # Scan tokens, updating context and counts. 

135 prev_val = None 

136 for kind, val in JsLexer().lex(s): 

137 # g.trace(f"context: {context:2} kind: {kind:10} val: {val!r}") 

138 if context: 

139 if context in ('"', "'") and kind in ('other', 'punct') and val == context: 

140 context = '' 

141 elif ( 

142 context == '/*' 

143 and kind in ('other', 'punct') 

144 and prev_val == '*' 

145 and val == '/' 

146 ): 

147 context = '' 

148 elif kind in ('other', 'punct') and val in ('"', "'"): 

149 context = val 

150 elif kind in ('other', 'punct') and val == '*' and prev_val == '/': 

151 context = '/*' 

152 elif kind in ('other', 'punct'): 

153 if val == '*' and prev_val == '/': 

154 context = '/*' 

155 elif val == '{': 

156 curlies += 1 

157 elif val == '}': 

158 curlies -= 1 

159 elif val == '(': 

160 parens += 1 

161 elif val == ')': 

162 parens -= 1 

163 prev_val = val 

164 d = {'context': context, 'curlies': curlies, 'parens': parens} 

165 state = JS_ScanState(d) 

166 return state 

167 #@+node:ekr.20171224145755.1: *3* js_i.starts_block 

168 func_patterns = [ 

169 re.compile(r'.*?\)\s*=>\s*\{'), 

170 re.compile(r'\s*class\b'), 

171 re.compile(r'\s*function\b'), 

172 re.compile(r'.*?[(=,]\s*function\b'), 

173 ] 

174 

175 def starts_block(self, i, lines, new_state, prev_state): 

176 """True if the new state starts a block.""" 

177 if new_state.level() <= prev_state.level(): 

178 return False 

179 # Remove strings and regexs from the line before applying the patterns. 

180 cleaned_line = [] 

181 for kind, val in JsLexer().lex(lines[i]): 

182 if kind not in ('string', 'regex'): 

183 cleaned_line.append(val) 

184 # Search for any of the patterns. 

185 line = ''.join(cleaned_line) 

186 for pattern in self.func_patterns: 

187 if pattern.match(line) is not None: 

188 return True 

189 return False 

190 #@+node:ekr.20200131193217.1: *3* js_i.ends_block 

191 def ends_block(self, line, new_state, prev_state, stack): 

192 """True if line ends the block.""" 

193 # Comparing new_state against prev_state does not work for python. 

194 top = stack[-1] 

195 return new_state.level() < top.state.level() 

196 #@+node:ekr.20161101183354.1: *3* js_i.clean_headline 

197 clean_regex_list1 = [ 

198 re.compile(r'\s*\(?(function\b\s*[\w]*)\s*\('), 

199 # (function name ( 

200 re.compile(r'\s*(\w+\s*\:\s*\(*\s*function\s*\()'), 

201 # name: (function ( 

202 re.compile(r'\s*(?:const|let|var)\s*(\w+\s*(?:=\s*.*)=>)'), 

203 # const|let|var name = .* => 

204 ] 

205 clean_regex_list2 = [ 

206 re.compile(r'(.*\=)(\s*function)'), 

207 # .* = function 

208 ] 

209 clean_regex_list3 = [ 

210 re.compile(r'(.*\=\s*new\s*\w+)\s*\(.*(=>)'), 

211 # .* = new name .* => 

212 re.compile(r'(.*)\=\s*\(.*(=>)'), 

213 # .* = ( .* => 

214 re.compile(r'(.*)\((\s*function)'), 

215 # .* ( function 

216 re.compile(r'(.*)\(.*(=>)'), 

217 # .* ( .* => 

218 re.compile(r'(.*)(\(.*\,\s*function)'), 

219 # .* \( .*, function 

220 ] 

221 clean_regex_list4 = [ 

222 re.compile(r'(.*)\(\s*(=>)'), 

223 # .* ( => 

224 ] 

225 

226 def clean_headline(self, s, p=None, trace=False): 

227 """Return a cleaned up headline s.""" 

228 # pylint: disable=arguments-differ 

229 s = s.strip() 

230 # Don't clean a headline twice. 

231 if s.endswith('>>') and s.startswith('<<'): 

232 return s 

233 for ch in '{(=': 

234 if s.endswith(ch): 

235 s = s[:-1].strip() 

236 # First regex cleanup. Use \1. 

237 for pattern in self.clean_regex_list1: 

238 m = pattern.match(s) 

239 if m: 

240 s = m.group(1) 

241 break 

242 # Second regex cleanup. Use \1 + \2 

243 for pattern in self.clean_regex_list2: 

244 m = pattern.match(s) 

245 if m: 

246 s = m.group(1) + m.group(2) 

247 break 

248 # Third regex cleanup. Use \1 + ' ' + \2 

249 for pattern in self.clean_regex_list3: 

250 m = pattern.match(s) 

251 if m: 

252 s = m.group(1) + ' ' + m.group(2) 

253 break 

254 # Fourth cleanup. Use \1 + ' ' + \2 again 

255 for pattern in self.clean_regex_list4: 

256 m = pattern.match(s) 

257 if m: 

258 s = m.group(1) + ' ' + m.group(2) 

259 break 

260 # Final whitespace cleanups. 

261 s = s.replace(' ', ' ') 

262 s = s.replace(' (', '(') 

263 return g.truncate(s, 100) 

264 #@-others 

265#@+node:ekr.20161105092745.1: ** class JS_ScanState 

266class JS_ScanState: 

267 """A class representing the state of the javascript line-oriented scan.""" 

268 

269 def __init__(self, d=None): 

270 """JS_ScanState ctor""" 

271 if d: 

272 # d is *different* from the dict created by i.scan_line. 

273 self.context = d.get('context') 

274 self.curlies = d.get('curlies') 

275 self.parens = d.get('parens') 

276 else: 

277 self.context = '' 

278 self.curlies = self.parens = 0 

279 

280 def __repr__(self): 

281 """JS_ScanState.__repr__""" 

282 return 'JS_ScanState context: %r curlies: %s parens: %s' % ( 

283 self.context, self.curlies, self.parens) 

284 

285 __str__ = __repr__ 

286 

287 #@+others 

288 #@+node:ekr.20161119115505.1: *3* js_state.level 

289 def level(self): 

290 """JS_ScanState.level.""" 

291 return (self.curlies, self.parens) 

292 #@+node:ekr.20161119051049.1: *3* js_state.update 

293 def update(self, data): 

294 """ 

295 Update the state using the 6-tuple returned by i.scan_line. 

296 Return i = data[1] 

297 """ 

298 context, i, delta_c, delta_p, delta_s, bs_nl = data 

299 # self.bs_nl = bs_nl 

300 self.context = context 

301 self.curlies += delta_c 

302 self.parens += delta_p 

303 # self.squares += delta_s 

304 return i 

305 

306 #@-others 

307 

308#@+node:ekr.20200131110322.2: ** JsLexer... 

309# JsLex: a lexer for Javascript 

310# Written by Ned Batchelder. Used by permission. 

311# 

312# Licensed under the Apache License: http://www.apache.org/licenses/LICENSE-2.0 

313# For details: https://bitbucket.org/ned/jslex/src/default/NOTICE.txt 

314#@+node:ekr.20200131110322.4: *3* class Tok 

315class Tok: 

316 """A specification for a token class.""" 

317 

318 num = 0 

319 

320 def __init__(self, name, regex, next=None): 

321 self.id = Tok.num 

322 Tok.num += 1 

323 self.name = name 

324 self.regex = regex 

325 self.next = next 

326#@+node:ekr.20200131110322.7: *3* class Lexer 

327class Lexer: 

328 """A generic multi-state regex-based lexer.""" 

329 

330 #@+others 

331 #@+node:ekr.20200131110322.8: *4* Lexer.__init__ 

332 def __init__(self, states, first): 

333 self.regexes = {} 

334 self.toks = {} 

335 for state, rules in states.items(): 

336 parts = [] 

337 for tok in rules: 

338 groupid = "t%d" % tok.id 

339 self.toks[groupid] = tok 

340 parts.append("(?P<%s>%s)" % (groupid, tok.regex)) 

341 self.regexes[state] = re.compile("|".join(parts), re.MULTILINE | re.VERBOSE) # |re.UNICODE) 

342 self.state = first 

343 

344 #@+node:ekr.20200131110322.9: *4* Lexer.lex 

345 def lex(self, text): 

346 """Lexically analyze `text`. 

347 

348 Yields pairs (`name`, `tokentext`). 

349 """ 

350 end = len(text) 

351 state = self.state 

352 regexes = self.regexes 

353 toks = self.toks 

354 start = 0 

355 while start < end: 

356 for match in regexes[state].finditer(text, start): 

357 # g.trace(state, start, text, match) 

358 # g.printObj(regexes[state]) 

359 name = match.lastgroup 

360 tok = toks[name] 

361 toktext = match.group(name) 

362 start += len(toktext) 

363 yield(tok.name, toktext) 

364 if tok.next: 

365 state = tok.next 

366 break 

367 self.state = state 

368 #@-others 

369#@+node:ekr.20200131110322.6: *3* function: literals 

370def literals(choices, prefix="", suffix=""): 

371 """ 

372 Create a regex from a space-separated list of literal `choices`. 

373 

374 If provided, `prefix` and `suffix` will be attached to each choice 

375 individually. 

376 

377 """ 

378 return "|".join(prefix + re.escape(c) + suffix for c in choices.split()) 

379 

380#@+node:ekr.20200131110322.10: *3* class JsLexer(Lexer) 

381class JsLexer(Lexer): 

382 """A Javascript lexer 

383 

384 >>> lexer = JsLexer() 

385 >>> list(lexer.lex("a = 1")) 

386 [('id', 'a'), ('ws', ' '), ('punct', '='), ('ws', ' '), ('dnum', '1')] 

387 

388 This doesn't properly handle non-Ascii characters in the Javascript source. 

389 

390 """ 

391 

392 #@+<< constants >> 

393 #@+node:ekr.20200131190707.1: *4* << constants >> (JsLexer) 

394 

395 # Because these tokens are matched as alternatives in a regex, longer possibilities 

396 # must appear in the list before shorter ones, for example, '>>' before '>'. 

397 # 

398 # Note that we don't have to detect malformed Javascript, only properly lex 

399 # correct Javascript, so much of this is simplified. 

400 

401 # Details of Javascript lexical structure are taken from 

402 # http://www.ecma-international.org/publications/files/ECMA-ST/ECMA-262.pdf 

403 

404 # A useful explanation of automatic semicolon insertion is at 

405 # http://inimino.org/~inimino/blog/javascript_semicolons 

406 

407 # See https://stackoverflow.com/questions/6314614/match-any-unicode-letter 

408 

409 both_before = [ 

410 Tok("comment", r"/\*(.|\n)*?\*/"), 

411 Tok("linecomment", r"//.*?$"), 

412 Tok("ws", r"\s+"), 

413 Tok("keyword", literals(""" 

414 async await 

415 break case catch class const continue debugger 

416 default delete do else enum export extends 

417 finally for function if import in instanceof new 

418 return super switch this throw try typeof var 

419 void while with 

420 """, suffix=r"\b"), next='reg'), 

421 Tok("reserved", literals("null true false", suffix=r"\b"), next='div'), 

422 # 

423 # EKR: This would work if patterns were compiled with the re.UNICODE flag. 

424 # However, \w is not the same as valid JS characters. 

425 # In any case, the JS importer doesn't need to handle id's carefully. 

426 # 

427 # Tok("id", r"""([\w$])([\w\d]*)""", next='div'), 

428 # 

429 Tok("id", r""" 

430 ([a-zA-Z_$ ]|\\u[0-9a-fA-Z]{4}) # first char 

431 ([a-zA-Z_$0-9]|\\u[0-9a-fA-F]{4})* # rest chars 

432 """, next='div'), 

433 Tok("hnum", r"0[xX][0-9a-fA-F]+", next='div'), 

434 Tok("onum", r"0[0-7]+"), 

435 Tok("dnum", r""" 

436 ( (0|[1-9][0-9]*) # DecimalIntegerLiteral 

437 \. # dot 

438 [0-9]* # DecimalDigits-opt 

439 ([eE][-+]?[0-9]+)? # ExponentPart-opt 

440 | 

441 \. # dot 

442 [0-9]+ # DecimalDigits 

443 ([eE][-+]?[0-9]+)? # ExponentPart-opt 

444 | 

445 (0|[1-9][0-9]*) # DecimalIntegerLiteral 

446 ([eE][-+]?[0-9]+)? # ExponentPart-opt 

447 ) 

448 """, next='div'), 

449 Tok("punct", literals(""" 

450 >>>= === !== >>> <<= >>= <= >= == != << >> && 

451 || += -= *= %= &= |= ^= 

452 """), next="reg"), 

453 Tok("punct", literals("++ -- ) ]"), next='div'), 

454 Tok("punct", literals("{ } ( [ . ; , < > + - * % & | ^ ! ~ ? : ="), next='reg'), 

455 Tok("string", r'"([^"\\]|(\\(.|\n)))*?"', next='div'), 

456 Tok("string", r"'([^'\\]|(\\(.|\n)))*?'", next='div'), 

457 ] 

458 

459 both_after = [ 

460 Tok("other", r"."), 

461 ] 

462 

463 states = { 

464 'div': # slash will mean division 

465 both_before + [ 

466 Tok("punct", literals("/= /"), next='reg'), 

467 ] + both_after, 

468 

469 'reg': # slash will mean regex 

470 both_before + [ 

471 Tok("regex", 

472 r""" 

473 / # opening slash 

474 # First character is.. 

475 ( [^*\\/[] # anything but * \ / or [ 

476 | \\. # or an escape sequence 

477 | \[ # or a class, which has 

478 ( [^\]\\] # anything but \ or ] 

479 | \\. # or an escape sequence 

480 )* # many times 

481 \] 

482 ) 

483 # Following characters are same, except for excluding a star 

484 ( [^\\/[] # anything but \ / or [ 

485 | \\. # or an escape sequence 

486 | \[ # or a class, which has 

487 ( [^\]\\] # anything but \ or ] 

488 | \\. # or an escape sequence 

489 )* # many times 

490 \] 

491 )* # many times 

492 / # closing slash 

493 [a-zA-Z0-9]* # trailing flags 

494 """, next='div'), 

495 ] + both_after, 

496 } 

497 #@-<< constants >> 

498 

499 #@+others 

500 #@+node:ekr.20200131110322.11: *4* JsLexer.__init__ 

501 def __init__(self): 

502 super().__init__(self.states, 'reg') 

503 #@-others 

504#@+node:ekr.20200131070055.1: ** class TestJSImporter (importers/javascript.py) 

505class TestJSImporter(unittest.TestCase): 

506 #@+others 

507 #@+node:ekr.20200202093420.1: *3* test_get_trailing_comments 

508 def test_get_trailing_comments(self): 

509 

510 table = ( 

511 # Test 1 

512 ("""\ 

513 head 

514 // tail""", 1), 

515 

516 # Test 2 

517 ("""\ 

518 head 

519 /* comment 1 

520 * comment 2 

521 */""", 3), 

522 

523 # Test 3 

524 ("""\ 

525 head 

526 /* comment 1 

527 * comment 2 

528 */ 

529 tail""", 0), # no tail 

530 

531 # Test 4 

532 ("""\ 

533 head 

534 // comment 

535 tail""", 0), # no tail 

536 

537 ) # End table. 

538 for s, expected_length in table: 

539 x = JS_Importer(None) 

540 s = textwrap.dedent(s) 

541 lines = g.splitLines(s) 

542 head, tail = x.get_trailing_comments(lines) 

543 expected_lines = lines[-expected_length :] if expected_length else [] 

544 assert tail == expected_lines, (repr(tail), repr(expected_lines)) 

545 #@+node:ekr.20200202104932.1: *3* test_JsLex 

546 def test_JsLex(self): 

547 

548 table = ( 

549 ('id', ('f_', '$', 'A1', 'abc')), 

550 ('other', ('ÁÁ',)), # Unicode strings are not handled by JsLex. 

551 ('keyword', ('async', 'await', 'if')), 

552 ('punct', ('(', ')', '{', '}', ',', ':', ';')), 

553 # ('num', ('9', '2')), # This test doesn't matter at present. 

554 ) 

555 for kind, data in table: 

556 for contents in data: 

557 for name, tok in JsLexer().lex(contents): 

558 assert name == kind, f"expected {kind!s} got {name!s} {tok!r} {contents}" 

559 # print(f"{kind!s:10} {tok!r:10}") 

560 

561 #@+node:ekr.20200203051839.1: *3* test_starts_block 

562 def test_starts_block(self): 

563 

564 table = ( 

565 (1, 'xx) => {}'), 

566 (1, 'class c1'), 

567 (1, 'function f1'), 

568 (1, 'xx(function f2'), 

569 (1, 'xx = function f3'), 

570 (1, 'xx, function f4'), 

571 (0, 'a = "function"'), 

572 (0, 'a = /function/'), 

573 ) 

574 for expected, line in table: 

575 x = JS_Importer(None) 

576 lines = [line] 

577 new_state = JS_ScanState() 

578 new_state.curlies += 1 

579 prev_state = JS_ScanState() 

580 results = x.starts_block(0, lines, new_state, prev_state) 

581 # if expected != results: x.scan_line(line, prev_state 

582 assert expected == results, f"expected: {expected} got: {int(results)} {line!r}\n" 

583 #@+node:ekr.20200203060718.1: *3* test_scan_line 

584 def test_scan_line(self): 

585 

586 table = ( 

587 # result prev_context s 

588 ((0, 0, '"'), "", r'"string'), 

589 ((0, 0, '/*'), "", r'/* line 1'), 

590 ((0, 0, '/*'), "/*", r'line 2'), # New. 

591 ((0, 0, ''), "/*", r'line 3 */'), # New. 

592 ((0, 0, ''), "", r'a + b // /*'), 

593 ((0, 1, ''), "", r'(function'), 

594 ((1, 1, ''), "", r'(function(a) {'), 

595 ((0, 0, ''), "", r'var x = /abc/'), 

596 ((0, 0, ''), "", r'var x = /a"c/'), 

597 ((0, 0, ''), "", r'var x = /a\//'), 

598 ((0, 0, ''), "", r'var x = /a\//'), 

599 ((0, 1, ''), "", r'var x = (0,'), 

600 ) 

601 for result, prev_context, s in table: 

602 importer = JS_Importer(None) 

603 prev_state = JS_ScanState() 

604 prev_state.context = prev_context 

605 new_state = importer.scan_line(s, prev_state) 

606 curlies, parens, context = result 

607 ok = ( 

608 new_state.curlies == curlies and 

609 new_state.parens == parens and 

610 new_state.context == context) 

611 assert ok, ( 

612 f"\n" 

613 f" expected: curlies: {curlies}, parens: {parens}, context: {context!r}\n" 

614 f"new_state: {new_state}\n" 

615 f" s: {s!r}") 

616 #@-others 

617#@-others 

618importer_dict = { 

619 'func': JS_Importer.do_import(), 

620 'extensions': ['.js',], 

621} 

622if __name__ == '__main__': 

623 unittest.main() 

624#@@language python 

625#@@tabwidth -4 

626#@-leo