Commit 3e583307 authored by Terry Jan Reedy's avatar Terry Jan Reedy

Issue #21686: idlelib/HyperParser.py - Update docstrings and comments and

replace \ line contiuation. Tested against nearly done test_hyperparser.py.
parent e58e0c7f
""" """Provide advanced parsing abilities for the ParenMatch and other extensions.
HyperParser
=========== HyperParser uses PyParser. PyParser mostly gives information on the
This module defines the HyperParser class, which provides advanced parsing proper indentation of code. HyperParser gives additional information on
abilities for the ParenMatch and other extensions. the structure of code.
The HyperParser uses PyParser. PyParser is intended mostly to give information
on the proper indentation of code. HyperParser gives some information on the
structure of code, used by extensions to help the user.
""" """
import string import string
...@@ -15,9 +12,7 @@ from idlelib import PyParse ...@@ -15,9 +12,7 @@ from idlelib import PyParse
class HyperParser: class HyperParser:
def __init__(self, editwin, index): def __init__(self, editwin, index):
"""Initialize the HyperParser to analyze the surroundings of the given "To initialize, analyze the surroundings of the given index."
index.
"""
self.editwin = editwin self.editwin = editwin
self.text = text = editwin.text self.text = text = editwin.text
...@@ -33,9 +28,10 @@ class HyperParser: ...@@ -33,9 +28,10 @@ class HyperParser:
startat = max(lno - context, 1) startat = max(lno - context, 1)
startatindex = repr(startat) + ".0" startatindex = repr(startat) + ".0"
stopatindex = "%d.end" % lno stopatindex = "%d.end" % lno
# We add the newline because PyParse requires a newline at end. # We add the newline because PyParse requires a newline
# We add a space so that index won't be at end of line, so that # at end. We add a space so that index won't be at end
# its status will be the same as the char before it, if should. # of line, so that its status will be the same as the
# char before it, if should.
parser.set_str(text.get(startatindex, stopatindex)+' \n') parser.set_str(text.get(startatindex, stopatindex)+' \n')
bod = parser.find_good_parse_start( bod = parser.find_good_parse_start(
editwin._build_char_in_string_func(startatindex)) editwin._build_char_in_string_func(startatindex))
...@@ -49,122 +45,130 @@ class HyperParser: ...@@ -49,122 +45,130 @@ class HyperParser:
else: else:
startatindex = "1.0" startatindex = "1.0"
stopatindex = "%d.end" % lno stopatindex = "%d.end" % lno
# We add the newline because PyParse requires a newline at end. # We add the newline because PyParse requires it. We add a
# We add a space so that index won't be at end of line, so that # space so that index won't be at end of line, so that its
# its status will be the same as the char before it, if should. # status will be the same as the char before it, if should.
parser.set_str(text.get(startatindex, stopatindex)+' \n') parser.set_str(text.get(startatindex, stopatindex)+' \n')
parser.set_lo(0) parser.set_lo(0)
# We want what the parser has, except for the last newline and space. # We want what the parser has, minus the last newline and space.
self.rawtext = parser.str[:-2] self.rawtext = parser.str[:-2]
# As far as I can see, parser.str preserves the statement we are in, # Parser.str apparently preserves the statement we are in, so
# so that stopatindex can be used to synchronize the string with the # that stopatindex can be used to synchronize the string with
# text box indices. # the text box indices.
self.stopatindex = stopatindex self.stopatindex = stopatindex
self.bracketing = parser.get_last_stmt_bracketing() self.bracketing = parser.get_last_stmt_bracketing()
# find which pairs of bracketing are openers. These always correspond # find which pairs of bracketing are openers. These always
# to a character of rawtext. # correspond to a character of rawtext.
self.isopener = [i>0 and self.bracketing[i][1] > self.bracketing[i-1][1] self.isopener = [i>0 and self.bracketing[i][1] >
self.bracketing[i-1][1]
for i in range(len(self.bracketing))] for i in range(len(self.bracketing))]
self.set_index(index) self.set_index(index)
def set_index(self, index): def set_index(self, index):
"""Set the index to which the functions relate. Note that it must be """Set the index to which the functions relate.
in the same statement.
The index must be in the same statement.
""" """
indexinrawtext = \ indexinrawtext = (len(self.rawtext) -
len(self.rawtext) - len(self.text.get(index, self.stopatindex)) len(self.text.get(index, self.stopatindex)))
if indexinrawtext < 0: if indexinrawtext < 0:
raise ValueError("The index given is before the analyzed statement") raise ValueError("Index %s precedes the analyzed statement"
% index)
self.indexinrawtext = indexinrawtext self.indexinrawtext = indexinrawtext
# find the rightmost bracket to which index belongs # find the rightmost bracket to which index belongs
self.indexbracket = 0 self.indexbracket = 0
while self.indexbracket < len(self.bracketing)-1 and \ while (self.indexbracket < len(self.bracketing)-1 and
self.bracketing[self.indexbracket+1][0] < self.indexinrawtext: self.bracketing[self.indexbracket+1][0] < self.indexinrawtext):
self.indexbracket += 1 self.indexbracket += 1
if self.indexbracket < len(self.bracketing)-1 and \ if (self.indexbracket < len(self.bracketing)-1 and
self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and \ self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and
not self.isopener[self.indexbracket+1]: not self.isopener[self.indexbracket+1]):
self.indexbracket += 1 self.indexbracket += 1
def is_in_string(self): def is_in_string(self):
"""Is the index given to the HyperParser is in a string?""" """Is the index given to the HyperParser is in a string?"""
# The bracket to which we belong should be an opener. # The bracket to which we belong should be an opener.
# If it's an opener, it has to have a character. # If it's an opener, it has to have a character.
return self.isopener[self.indexbracket] and \ return (self.isopener[self.indexbracket] and
self.rawtext[self.bracketing[self.indexbracket][0]] in ('"', "'") self.rawtext[self.bracketing[self.indexbracket][0]]
in ('"', "'"))
def is_in_code(self): def is_in_code(self):
"""Is the index given to the HyperParser is in a normal code?""" """Is the index given to the HyperParser is in a normal code?"""
return not self.isopener[self.indexbracket] or \ return (not self.isopener[self.indexbracket] or
self.rawtext[self.bracketing[self.indexbracket][0]] not in \ self.rawtext[self.bracketing[self.indexbracket][0]]
('#', '"', "'") not in ('#', '"', "'"))
def get_surrounding_brackets(self, openers='([{', mustclose=False): def get_surrounding_brackets(self, openers='([{', mustclose=False):
"""If the index given to the HyperParser is surrounded by a bracket """Return bracket indexes or None.
defined in openers (or at least has one before it), return the
indices of the opening bracket and the closing bracket (or the If the index given to the HyperParser is surrounded by a
end of line, whichever comes first). bracket defined in openers (or at least has one before it),
If it is not surrounded by brackets, or the end of line comes before return the indices of the opening bracket and the closing
the closing bracket and mustclose is True, returns None. bracket (or the end of line, whichever comes first).
If it is not surrounded by brackets, or the end of line comes
before the closing bracket and mustclose is True, returns None.
""" """
bracketinglevel = self.bracketing[self.indexbracket][1] bracketinglevel = self.bracketing[self.indexbracket][1]
before = self.indexbracket before = self.indexbracket
while not self.isopener[before] or \ while (not self.isopener[before] or
self.rawtext[self.bracketing[before][0]] not in openers or \ self.rawtext[self.bracketing[before][0]] not in openers or
self.bracketing[before][1] > bracketinglevel: self.bracketing[before][1] > bracketinglevel):
before -= 1 before -= 1
if before < 0: if before < 0:
return None return None
bracketinglevel = min(bracketinglevel, self.bracketing[before][1]) bracketinglevel = min(bracketinglevel, self.bracketing[before][1])
after = self.indexbracket + 1 after = self.indexbracket + 1
while after < len(self.bracketing) and \ while (after < len(self.bracketing) and
self.bracketing[after][1] >= bracketinglevel: self.bracketing[after][1] >= bracketinglevel):
after += 1 after += 1
beforeindex = self.text.index("%s-%dc" % beforeindex = self.text.index("%s-%dc" %
(self.stopatindex, len(self.rawtext)-self.bracketing[before][0])) (self.stopatindex, len(self.rawtext)-self.bracketing[before][0]))
if after >= len(self.bracketing) or \ if (after >= len(self.bracketing) or
self.bracketing[after][0] > len(self.rawtext): self.bracketing[after][0] > len(self.rawtext)):
if mustclose: if mustclose:
return None return None
afterindex = self.stopatindex afterindex = self.stopatindex
else: else:
# We are after a real char, so it is a ')' and we give the index # We are after a real char, so it is a ')' and we give the
# before it. # index before it.
afterindex = self.text.index("%s-%dc" % afterindex = self.text.index(
(self.stopatindex, "%s-%dc" % (self.stopatindex,
len(self.rawtext)-(self.bracketing[after][0]-1))) len(self.rawtext)-(self.bracketing[after][0]-1)))
return beforeindex, afterindex return beforeindex, afterindex
# This string includes all chars that may be in a white space # Ascii chars that may be in a white space
_whitespace_chars = " \t\n\\" _whitespace_chars = " \t\n\\"
# This string includes all chars that may be in an identifier # Ascii chars that may be in an identifier
_id_chars = string.ascii_letters + string.digits + "_" _id_chars = string.ascii_letters + string.digits + "_"
# This string includes all chars that may be the first char of an identifier # Ascii chars that may be the first char of an identifier
_id_first_chars = string.ascii_letters + "_" _id_first_chars = string.ascii_letters + "_"
# Given a string and pos, return the number of chars in the identifier # Given a string and pos, return the number of chars in the
# which ends at pos, or 0 if there is no such one. Saved words are not # identifier which ends at pos, or 0 if there is no such one. Saved
# identifiers. # words are not identifiers.
def _eat_identifier(self, str, limit, pos): def _eat_identifier(self, str, limit, pos):
i = pos i = pos
while i > limit and str[i-1] in self._id_chars: while i > limit and str[i-1] in self._id_chars:
i -= 1 i -= 1
if i < pos and (str[i] not in self._id_first_chars or \ if (i < pos and (str[i] not in self._id_first_chars or
keyword.iskeyword(str[i:pos])): keyword.iskeyword(str[i:pos]))):
i = pos i = pos
return pos - i return pos - i
def get_expression(self): def get_expression(self):
"""Return a string with the Python expression which ends at the given """Return a string with the Python expression which ends at the
index, which is empty if there is no real one. given index, which is empty if there is no real one.
""" """
if not self.is_in_code(): if not self.is_in_code():
raise ValueError("get_expression should only be called if index "\ raise ValueError("get_expression should only be called"
"is inside a code.") "if index is inside a code.")
rawtext = self.rawtext rawtext = self.rawtext
bracketing = self.bracketing bracketing = self.bracketing
...@@ -177,20 +181,20 @@ class HyperParser: ...@@ -177,20 +181,20 @@ class HyperParser:
postdot_phase = True postdot_phase = True
while 1: while 1:
# Eat whitespaces, comments, and if postdot_phase is False - one dot # Eat whitespaces, comments, and if postdot_phase is False - a dot
while 1: while 1:
if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars: if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars:
# Eat a whitespace # Eat a whitespace
pos -= 1 pos -= 1
elif not postdot_phase and \ elif (not postdot_phase and
pos > brck_limit and rawtext[pos-1] == '.': pos > brck_limit and rawtext[pos-1] == '.'):
# Eat a dot # Eat a dot
pos -= 1 pos -= 1
postdot_phase = True postdot_phase = True
# The next line will fail if we are *inside* a comment, but we # The next line will fail if we are *inside* a comment,
# shouldn't be. # but we shouldn't be.
elif pos == brck_limit and brck_index > 0 and \ elif (pos == brck_limit and brck_index > 0 and
rawtext[bracketing[brck_index-1][0]] == '#': rawtext[bracketing[brck_index-1][0]] == '#'):
# Eat a comment # Eat a comment
brck_index -= 2 brck_index -= 2
brck_limit = bracketing[brck_index][0] brck_limit = bracketing[brck_index][0]
...@@ -200,8 +204,8 @@ class HyperParser: ...@@ -200,8 +204,8 @@ class HyperParser:
break break
if not postdot_phase: if not postdot_phase:
# We didn't find a dot, so the expression end at the last # We didn't find a dot, so the expression end at the
# identifier pos. # last identifier pos.
break break
ret = self._eat_identifier(rawtext, brck_limit, pos) ret = self._eat_identifier(rawtext, brck_limit, pos)
...@@ -209,13 +213,13 @@ class HyperParser: ...@@ -209,13 +213,13 @@ class HyperParser:
# There is an identifier to eat # There is an identifier to eat
pos = pos - ret pos = pos - ret
last_identifier_pos = pos last_identifier_pos = pos
# Now, in order to continue the search, we must find a dot. # Now, to continue the search, we must find a dot.
postdot_phase = False postdot_phase = False
# (the loop continues now) # (the loop continues now)
elif pos == brck_limit: elif pos == brck_limit:
# We are at a bracketing limit. If it is a closing bracket, # We are at a bracketing limit. If it is a closing
# eat the bracket, otherwise, stop the search. # bracket, eat the bracket, otherwise, stop the search.
level = bracketing[brck_index][1] level = bracketing[brck_index][1]
while brck_index > 0 and bracketing[brck_index-1][1] > level: while brck_index > 0 and bracketing[brck_index-1][1] > level:
brck_index -= 1 brck_index -= 1
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment