Coverage for C:\Repos\leo-editor\leo\plugins\importers\python.py: 98%

152 statements  

« prev     ^ index     » next       coverage.py v6.4, created at 2022-05-24 10:21 -0500

1#@+leo-ver=5-thin 

2#@+node:ekr.20211209153303.1: * @file ../plugins/importers/python.py 

3"""The new, tokenize based, @auto importer for Python.""" 

4import sys 

5import tokenize 

6import token 

7from collections import defaultdict 

8import leo.core.leoGlobals as g 

9#@+others 

10#@+node:ekr.20211209052710.1: ** do_import 

11def do_import(c, s, parent): 

12 

13 if sys.version_info < (3, 7, 0): 

14 g.es_print('The python importer requires python 3.7 or above') 

15 return False 

16 split_root(parent, s.splitlines(True)) 

17 parent.b = f'@language python\n@tabwidth -4\n{parent.b}' 

18 if c.config.getBool('put-class-in-imported-headlines'): 

19 for p in parent.subtree(): # Don't change parent.h. 

20 if p.b.startswith('class ') or p.b.partition('\nclass ')[1]: 

21 p.h = f'class {p.h}' 

22 return True 

23#@+node:vitalije.20211201230203.1: ** split_root 

24SPLIT_THRESHOLD = 10 

25def split_root(root, lines): 

26 ''' 

27 Parses given lines and separates all top level function 

28 definitions and class definitions in separate nodes which 

29 are all direct children of the root. All longer class 

30 nodes are further divided, each method in a separate node. 

31 

32 This function puts comments and decorators in the same node 

33 above the definition. 

34 ''' 

35 #@+others 

36 #@+node:vitalije.20211208183603.1: *3* is_intro_line 

37 def is_intro_line(n, col): 

38 """ 

39 Intro line is either a comment line that starts at the same column as the 

40 def/class line or a decorator line 

41 """ 

42 # first we filter list of all tokens in the line n. We don't want white space tokens 

43 # we are interested only in the tokens containing some text. 

44 xs = [x for x in lntokens[n] if x[0] not in (token.DEDENT, token.INDENT, token.NL)] 

45 

46 if not xs: 

47 # all tokens in this line are white space, therefore we 

48 # have a blank line. We want to allow a blank line in the 

49 # block of comments, so we return True 

50 return True 

51 

52 t = xs[0] # this is the first non blank token in the line n 

53 if t[2][1] != col: 

54 # if it isn't at the same column as the definition, it can't be 

55 # considered as a `intro` line 

56 return False 

57 if t[0] == token.OP and t[1] == '@': 

58 # this lines starts with `@`, which means it is the decorator 

59 return True 

60 if t[0] == token.COMMENT: 

61 # this line starts with the comment at the same column as the definition 

62 return True 

63 

64 # in all other cases this isn't an `intro` line 

65 return False 

66 #@+node:vitalije.20211208084231.1: *3* get_intro 

67 def get_intro(row, col): 

68 """ 

69 Returns the number of preceeding lines that can be considered as an `intro` 

70 to this funciton/class/method definition. 

71 """ 

72 last = row 

73 for i in range(row - 1, 0, -1): 

74 if is_intro_line(i, col): 

75 last = i 

76 else: 

77 break 

78 # we don't want `intro` to start with the bunch of blank lines 

79 # they better be added to the end of the preceeding node. 

80 for i in range(last, row): 

81 if lines[i - 1].isspace(): 

82 last = i + 1 

83 return row - last 

84 #@+node:vitalije.20211206182505.1: *3* mkreadline 

85 def mkreadline(lines): 

86 # tokenize uses readline for its input 

87 itlines = iter(lines) 

88 def nextline(): 

89 try: 

90 return next(itlines) 

91 except StopIteration: 

92 return '' 

93 return nextline 

94 #@+node:vitalije.20211208092828.1: *3* itoks 

95 def itoks(i): 

96 yield from enumerate(rawtokens[i:], start=i) 

97 #@+node:vitalije.20211208092833.1: *3* search 

98 def search(i, k): 

99 for j, t in itoks(i): 

100 if t[0] == k: 

101 yield j, t 

102 #@+node:vitalije.20211208092910.1: *3* getdefn 

103 def getdefn(start): 

104 

105 # pylint: disable=undefined-loop-variable 

106 tok = rawtokens[start] 

107 if tok[0] != token.NAME or tok[1] not in ('async', 'def', 'class'): 

108 return None 

109 

110 # The following few values are easy to get 

111 if tok[1] == 'async': 

112 kind = rawtokens[start + 1][1] 

113 name = rawtokens[start + 2][1] 

114 else: 

115 kind = tok[1] 

116 name = rawtokens[start + 1][1] 

117 if kind == 'def' and rawtokens[start - 1][1] == 'async': 

118 return None 

119 a, col = tok[2] 

120 

121 # now we are searching for the end of the definition line 

122 # this one logical line may be divided in several physical 

123 # lines. At the end of this logical line, there will be a 

124 # NEWLINE token 

125 for i, t in search(start + 1, token.NEWLINE): 

126 # The last of the `header lines`. 

127 # These lines should not be indented in the node body. 

128 # The body lines *will* be indented. 

129 end_h = t[2][0] 

130 # In case we have a oneliner, let's define end_b here 

131 end_b = end_h 

132 # indented body starts on the next line 

133 start_b = end_h + 1 

134 break 

135 

136 # Look ahead to check if we have a oneline definition or not. 

137 # That is, see which whether INDENT or NEWLINE will come first. 

138 oneliner = True 

139 for (i1, t), (i2, t1) in zip(search(i + 1, token.INDENT), search(i + 1, token.NEWLINE)): 

140 # INDENT comes after the NEWLINE, means the definition is in a single line 

141 oneliner = i1 > i2 

142 break 

143 

144 # Find the end of this definition 

145 if oneliner: 

146 # The following lines will not be indented 

147 # because the definition was in the same line. 

148 c_ind = col 

149 # The end of the body is the same as the start of the body 

150 end_b = start_b 

151 else: 

152 # We have some body lines. Presumably the next token is INDENT. 

153 i += 1 

154 # This is the indentation of the first function/method/class body line 

155 c_ind = len(t[1]) + col 

156 # Now search to find the end of this function/method/body 

157 for i, t in itoks(i + 1): 

158 col2 = t[2][1] 

159 if col2 > col: 

160 continue 

161 if t[0] in (token.DEDENT, token.COMMENT): 

162 end_b = t[2][0] 

163 break 

164 

165 # Increase end_b to include all following blank lines 

166 for j in range(end_b, len(lines) + 1): 

167 if lines[j - 1].isspace(): 

168 end_b = j + 1 

169 else: 

170 break 

171 

172 # Compute the number of `intro` lines 

173 intro = get_intro(a, col) 

174 return col, a - intro, end_h, start_b, kind, name, c_ind, end_b 

175 #@+node:vitalije.20211208101750.1: *3* body 

176 def bodyLine(x, ind): 

177 if ind == 0 or x[:ind].isspace(): 

178 return x[ind:] or '\n' 

179 n = len(x) - len(x.lstrip()) 

180 return f'\\\\-{ind-n}.{x[n:]}' 

181 

182 def body(a, b, ind): 

183 xlines = (bodyLine(x, ind) for x in lines[a - 1 : b and (b - 1)]) 

184 return ''.join(xlines) 

185 #@+node:vitalije.20211208110301.1: *3* indent 

186 def indent(x, n): 

187 return x.rjust(len(x) + n) 

188 #@+node:vitalije.20211208104408.1: *3* mknode 

189 def mknode(p, start, start_b, end, l_ind, col, xdefs): 

190 # start - first line of this node 

191 # start_b - first line of this node's function/class body 

192 # end - first line after this node 

193 # l_ind - amount of white space to strip from left 

194 # col - column start of child nodes 

195 # xdefs - all definitions inside this node 

196 

197 # first let's find all defs that start at the same column 

198 # as our indented function/method/class body 

199 tdefs = [x for x in xdefs if x[0] == col] 

200 

201 if not tdefs or end - start < SPLIT_THRESHOLD: 

202 # if there are no inner definitions or the total number of 

203 # lines is less than threshold, all lines should be added 

204 # to this node and no further splitting is necessary 

205 p.b = body(start, end, l_ind) 

206 return 

207 

208 # last keeps track of the last used line 

209 last = start 

210 

211 # lets check the first inner definition 

212 col, h1, h2, start_b, kind, name, c_ind, end_b = tdefs[0] 

213 if h1 > start: 

214 # first inner definition starts later 

215 # so we have some content before at-others 

216 b1 = body(start, h1, l_ind) 

217 else: 

218 # inner definitions start at the beginning of our body 

219 # so at-others will be the first line in our body 

220 b1 = '' 

221 o = indent('@others\n', col - l_ind) 

222 

223 # now for the part after at-others we need to check the 

224 # last of inner definitions 

225 if tdefs[-1][-1] < end: 

226 # there are some lines after at-others 

227 b2 = body(tdefs[-1][-1], end, l_ind) 

228 else: 

229 # there are no lines after at-others 

230 b2 = '' 

231 # finally we can set our body 

232 p.b = f'{b1}{o}{b2}' 

233 

234 # now we can continue to add children for each of the inner definitions 

235 last = h1 

236 for col, h1, h2, start_b, kind, name, c_ind, end_b in tdefs: 

237 if h1 > last: 

238 new_body = body(last, h1, col) # #2500. 

239 # there are some declaration lines in between two inner definitions 

240 p1 = p.insertAsLastChild() 

241 p1.h = declaration_headline(new_body) # #2500 

242 p1.b = new_body 

243 last = h1 

244 p1 = p.insertAsLastChild() 

245 p1.h = name 

246 

247 # let's find all next level inner definitions 

248 # those are the definitions whose starting and end line are 

249 # between the start and the end of this node 

250 subdefs = [x for x in xdefs if x[1] > h1 and x[-1] <= end_b] 

251 if subdefs: 

252 # there are some next level inner definitions 

253 # so let's split this node 

254 mknode(p=p1 

255 , start=h1 

256 , start_b=start_b 

257 , end=end_b 

258 , l_ind=l_ind + col # increase indentation for at-others 

259 , col=c_ind 

260 , xdefs=subdefs 

261 ) 

262 else: 

263 # there are no next level inner definitions 

264 # so we can just set the body and continue 

265 # to the next definition 

266 p1.b = body(h1, end_b, col) 

267 

268 last = end_b 

269 #@+node:ekr.20220320055103.1: *3* declaration_headline 

270 def declaration_headline(body_string): # #2500 

271 """ 

272 Return an informative headline for s, a group of declarations. 

273 """ 

274 for s1 in g.splitLines(body_string): 

275 s = s1.strip() 

276 if s.startswith('#') and len(s.replace('#', '').strip()) > 1: 

277 # A non-trivial comment: Return the comment w/o the leading '#'. 

278 return s[1:].strip() 

279 if s and not s.startswith('#'): 

280 # A non-trivial non-comment. 

281 return s 

282 return "...some declarations" # Return legacy headline. 

283 #@-others 

284 # rawtokens is a list of all tokens found in input lines 

285 rawtokens = list(tokenize.generate_tokens(mkreadline(lines))) 

286 

287 # lntokens - line tokens are tokens groupped by the line number 

288 # from which they originate. 

289 lntokens = defaultdict(list) 

290 for t in rawtokens: 

291 row = t[2][0] 

292 lntokens[row].append(t) 

293 

294 # we create list of all definitions in the token list 

295 # both `def` and `class` definitions 

296 # each definition is a tuple with the following values 

297 # 

298 # 0: col - column where the definition starts 

299 # 1: h1 - line number of the first line of this node 

300 # this line may be above the starting line 

301 # (comment lines and decorators are in these lines) 

302 # 2: h2 - line number of the last line of the declaration 

303 # it is the line number where the `:` (colon) is. 

304 # 3: start_b - line number of the first indented line of the 

305 # function/class body. 

306 # 4: kind - can be 'def' or 'class' 

307 # 5: name - name of the function, class or method 

308 # 6: c_ind - column of the indented body 

309 # 7: b_ind - minimal number of leading spaces in each line of the 

310 # function, method or class body 

311 # 8: end_b - line number of the first line after the definition 

312 # 

313 # function getdefn returns None if the token at this index isn't start 

314 # of a definition, or if it isn't possible to calculate all the values 

315 # mentioned earlier. Therefore, we filter the list. 

316 definitions = list(filter(None, map(getdefn, range(len(rawtokens) - 1)))) 

317 

318 # a preparation step 

319 root.deleteAllChildren() 

320 

321 # function mknode, sets the body and adds children recursively using 

322 # precalculated definitions list. 

323 # parameters are: 

324 # p - current node 

325 # start - line number of the first line of this node 

326 # end - line number of the first line after this node 

327 # l_ind - this is the accumulated indentation through at-others 

328 # it is the number of spaces that should be stripped from 

329 # the beginning of each line in this node 

330 # ind - number of leading white spaces common to all indented 

331 # body lines of this node. It is the indentation at which 

332 # we should put the at-others directive in this body 

333 # col - the column at which start all of the inner definitions 

334 # like methods or inner functions and classes 

335 # xdefs - list of the definitions covering this node 

336 mknode(p=root 

337 , start=1 

338 , start_b=1 

339 , end=len(lines) + 1 

340 , l_ind=0 

341 , col=0 

342 , xdefs=definitions 

343 ) 

344 return definitions 

345#@-others 

346importer_dict = { 

347 'func': do_import, 

348 'extensions': ['.py', '.pyw', '.pyi'], # mypy uses .pyi extension. 

349} 

350#@@language python 

351#@@tabwidth -4 

352#@-leo