Skip to content

Lexer

lexer

Position = int module-attribute

A position in a file. The position correspond to the character index in the file.

TokenKindT = TypeVar('TokenKindT', bound=Enum) module-attribute

Location

Bases: NamedTuple

Structure definition a location in a file.

Source code in xdsl/utils/lexer.py
18
19
20
21
22
23
24
25
26
27
28
class Location(NamedTuple):
    "Structure definition a location in a file."

    file: str
    line: int
    "1-index of line in file"
    col: int
    "1-index of column in file"

    def __str__(self) -> str:
        return f"{self.file}:{self.line}:{self.col}"

file: str instance-attribute

line: int instance-attribute

1-index of line in file

col: int instance-attribute

1-index of column in file

__str__() -> str

Source code in xdsl/utils/lexer.py
27
28
def __str__(self) -> str:
    return f"{self.file}:{self.line}:{self.col}"

Input dataclass

Used to keep track of the input when parsing.

Source code in xdsl/utils/lexer.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
@dataclass(frozen=True)
class Input:
    """
    Used to keep track of the input when parsing.
    """

    content: str = field(repr=False)
    name: str
    len: int = field(init=False, repr=False)

    def __post_init__(self):
        object.__setattr__(self, "len", len(self.content))

    def __len__(self):
        return self.len

    def get_start_of_line(self, pos: Position) -> Position:
        """
        Returns the location of the last newline before `pos`, or 0 if there are no
        previous newlines.
        """
        # Returns -1 if not found, meaning on first line
        pos = self.content.rfind("\n", 0, pos)
        # If the result is not the beginning of the input, go past the matching newline
        return pos + 1

    def get_end_of_line(self, pos: Position) -> Position:
        """
        Returns the position of the first newline after `pos`, or the length of the
        file if there are no more newlines.
        """
        pos = self.content.find("\n", pos)
        # If the result is at the end of the input, return the length for correct slice
        # indexing
        return self.len if pos == -1 else pos

    def at(self, i: Position) -> str | None:
        if i >= self.len:
            return None
        return self.content[i]

    def slice(self, start: Position, end: Position) -> str | None:
        if end > self.len or start < 0:
            return None
        return self.content[start:end]

content: str = field(repr=False) class-attribute instance-attribute

name: str instance-attribute

len: int = field(init=False, repr=False) class-attribute instance-attribute

__init__(content: str, name: str) -> None

__post_init__()

Source code in xdsl/utils/lexer.py
41
42
def __post_init__(self):
    object.__setattr__(self, "len", len(self.content))

__len__()

Source code in xdsl/utils/lexer.py
44
45
def __len__(self):
    return self.len

get_start_of_line(pos: Position) -> Position

Returns the location of the last newline before pos, or 0 if there are no previous newlines.

Source code in xdsl/utils/lexer.py
47
48
49
50
51
52
53
54
55
def get_start_of_line(self, pos: Position) -> Position:
    """
    Returns the location of the last newline before `pos`, or 0 if there are no
    previous newlines.
    """
    # Returns -1 if not found, meaning on first line
    pos = self.content.rfind("\n", 0, pos)
    # If the result is not the beginning of the input, go past the matching newline
    return pos + 1

get_end_of_line(pos: Position) -> Position

Returns the position of the first newline after pos, or the length of the file if there are no more newlines.

Source code in xdsl/utils/lexer.py
57
58
59
60
61
62
63
64
65
def get_end_of_line(self, pos: Position) -> Position:
    """
    Returns the position of the first newline after `pos`, or the length of the
    file if there are no more newlines.
    """
    pos = self.content.find("\n", pos)
    # If the result is at the end of the input, return the length for correct slice
    # indexing
    return self.len if pos == -1 else pos

at(i: Position) -> str | None

Source code in xdsl/utils/lexer.py
67
68
69
70
def at(self, i: Position) -> str | None:
    if i >= self.len:
        return None
    return self.content[i]

slice(start: Position, end: Position) -> str | None

Source code in xdsl/utils/lexer.py
72
73
74
75
def slice(self, start: Position, end: Position) -> str | None:
    if end > self.len or start < 0:
        return None
    return self.content[start:end]

Span dataclass

Parts of the input are always passed around as spans, so we know where they originated.

Source code in xdsl/utils/lexer.py
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
@dataclass(frozen=True)
class Span:
    """
    Parts of the input are always passed around as spans, so we know where they
    originated.
    """

    start: Position
    """
    Start of tokens location in source file, global byte offset in file
    """
    end: Position
    """
    End of tokens location in source file, global byte offset in file
    """
    input: Input
    """
    The input being operated on
    """

    line_offset: int = 0
    """
    A line offset, to just add to ht file number in input when printed.
    """

    def __len__(self):
        return self.len

    @property
    def len(self):
        return self.end - self.start

    @property
    def text(self):
        return self.input.content[self.start : self.end]

    def get_location(self) -> Location:
        line_start = self.input.get_start_of_line(self.start)
        line_index_in_source = self.input.content.count("\n", 0, line_start) + 1
        line_index = line_index_in_source + self.line_offset
        column_index = self.start - line_start + 1
        return Location(self.input.name, line_index, column_index)

    def print_with_context(self, msg: str | None = None) -> str:
        """
        returns a string containing lines relevant to the span. The Span's contents
        are highlighted by up-carets beneath them (`^`). The message msg is printed
        along these.
        """
        loc = self.get_location()
        # Offset relative to the first line:
        offset = loc.col - 1
        lines_start = self.start - offset
        lines_end = self.input.get_end_of_line(self.end)
        lines = self.input.content[lines_start:lines_end].splitlines()
        remaining_len = max(self.len, 1)
        capture = StringIO()
        print(loc, file=capture)
        for line in lines:
            print(line, file=capture)
            if remaining_len < 0:
                continue
            caret_count = min(remaining_len, max(len(line) - offset, 1))
            print(" " * offset + "^" * caret_count, file=capture)
            if msg is not None:
                print(" " * offset + msg, file=capture)
                msg = None
            remaining_len -= caret_count
            offset = 0
        if msg is not None:
            print(msg, file=capture)
        return capture.getvalue()

    def __repr__(self):
        return f"{self.__class__.__name__}[{self.start}:{self.end}](text='{self.text}')"

start: Position instance-attribute

Start of tokens location in source file, global byte offset in file

end: Position instance-attribute

End of tokens location in source file, global byte offset in file

input: Input instance-attribute

The input being operated on

line_offset: int = 0 class-attribute instance-attribute

A line offset, to just add to ht file number in input when printed.

len property

text property

__init__(start: Position, end: Position, input: Input, line_offset: int = 0) -> None

__len__()

Source code in xdsl/utils/lexer.py
103
104
def __len__(self):
    return self.len

get_location() -> Location

Source code in xdsl/utils/lexer.py
114
115
116
117
118
119
def get_location(self) -> Location:
    line_start = self.input.get_start_of_line(self.start)
    line_index_in_source = self.input.content.count("\n", 0, line_start) + 1
    line_index = line_index_in_source + self.line_offset
    column_index = self.start - line_start + 1
    return Location(self.input.name, line_index, column_index)

print_with_context(msg: str | None = None) -> str

returns a string containing lines relevant to the span. The Span's contents are highlighted by up-carets beneath them (^). The message msg is printed along these.

Source code in xdsl/utils/lexer.py
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
def print_with_context(self, msg: str | None = None) -> str:
    """
    returns a string containing lines relevant to the span. The Span's contents
    are highlighted by up-carets beneath them (`^`). The message msg is printed
    along these.
    """
    loc = self.get_location()
    # Offset relative to the first line:
    offset = loc.col - 1
    lines_start = self.start - offset
    lines_end = self.input.get_end_of_line(self.end)
    lines = self.input.content[lines_start:lines_end].splitlines()
    remaining_len = max(self.len, 1)
    capture = StringIO()
    print(loc, file=capture)
    for line in lines:
        print(line, file=capture)
        if remaining_len < 0:
            continue
        caret_count = min(remaining_len, max(len(line) - offset, 1))
        print(" " * offset + "^" * caret_count, file=capture)
        if msg is not None:
            print(" " * offset + msg, file=capture)
            msg = None
        remaining_len -= caret_count
        offset = 0
    if msg is not None:
        print(msg, file=capture)
    return capture.getvalue()

__repr__()

Source code in xdsl/utils/lexer.py
151
152
def __repr__(self):
    return f"{self.__class__.__name__}[{self.start}:{self.end}](text='{self.text}')"

Token dataclass

Bases: Generic[TokenKindT]

Source code in xdsl/utils/lexer.py
158
159
160
161
162
163
164
165
166
167
@dataclass
class Token(Generic[TokenKindT]):
    kind: TokenKindT

    span: Span

    @property
    def text(self):
        """The text composing the token."""
        return self.span.text

kind: TokenKindT instance-attribute

span: Span instance-attribute

text property

The text composing the token.

__init__(kind: TokenKindT, span: Span) -> None

Lexer dataclass

Bases: ABC, Generic[TokenKindT]

Source code in xdsl/utils/lexer.py
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
@dataclass
class Lexer(ABC, Generic[TokenKindT]):
    input: Input
    """Input that is currently being lexed."""

    pos: Position = field(init=False, default=0)
    """
    Current position in the input.
    The position can be out of bounds, in which case the lexer is in EOF state.
    """

    def _form_token(self, kind: TokenKindT, start_pos: Position) -> Token[TokenKindT]:
        """
        Return a token with the given kind, and the start position.
        """
        return Token(kind, Span(start_pos, self.pos, self.input))

    @abstractmethod
    def lex(self) -> Token[TokenKindT]:
        """
        Lex a token from the input, and returns it.
        """
        raise NotImplementedError()

input: Input instance-attribute

Input that is currently being lexed.

pos: Position = field(init=False, default=0) class-attribute instance-attribute

Current position in the input. The position can be out of bounds, in which case the lexer is in EOF state.

__init__(input: Input) -> None

lex() -> Token[TokenKindT] abstractmethod

Lex a token from the input, and returns it.

Source code in xdsl/utils/lexer.py
187
188
189
190
191
192
@abstractmethod
def lex(self) -> Token[TokenKindT]:
    """
    Lex a token from the input, and returns it.
    """
    raise NotImplementedError()