51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163 | class ForRofOperation(RegisterAllocatableOperation, IRDLOperation, ABC):
lb = operand_def(GeneralRegisterType)
ub = operand_def(GeneralRegisterType)
step = operand_def(GeneralRegisterType)
iter_args = var_operand_def(X86RegisterType)
res = var_result_def(X86RegisterType)
body = region_def("single_block")
traits = traits_def(SingleBlockImplicitTerminator(YieldOp))
def __init__(
self,
lb: SSAValue | Operation,
ub: SSAValue | Operation,
step: SSAValue | Operation,
iter_args: Sequence[SSAValue | Operation],
body: Region | Sequence[Operation] | Sequence[Block] | Block,
):
if isinstance(body, Block):
body = [body]
super().__init__(
operands=[lb, ub, step, iter_args],
result_types=[[SSAValue.get(a).type for a in iter_args]],
regions=[body],
)
def verify_(self):
if (len(self.iter_args) + 1) != len(self.body.block.args):
raise VerifyException(
f"Wrong number of block arguments, expected {len(self.iter_args) + 1}, got "
f"{len(self.body.block.args)}. The body must have the induction "
f"variable and loop-carried variables as arguments."
)
if self.body.block.args and (iter_var := self.body.block.args[0]):
if not isinstance(iter_var.type, GeneralRegisterType):
raise VerifyException(
f"The first block argument of the body is of type {iter_var.type}"
" instead of riscv.IntRegisterType"
)
for idx, (arg, block_arg) in enumerate(
zip(self.iter_args, self.body.block.args[1:])
):
if block_arg.type != arg.type:
raise VerifyException(
f"Block argument {idx + 1} has wrong type, expected {arg.type}, "
f"got {block_arg.type}. Arguments after the "
f"induction variable must match the carried variables."
)
if len(self.body.ops) > 0 and isinstance(
yieldop := self.body.block.last_op, YieldOp
):
if len(yieldop.arguments) != len(self.iter_args):
raise VerifyException(
f"Expected {len(self.iter_args)} args, got {len(yieldop.arguments)}. "
f"The riscv_scf.for must yield its carried variables."
)
for iter_arg, yield_arg in zip(self.iter_args, yieldop.arguments):
if iter_arg.type != yield_arg.type:
raise VerifyException(
f"Expected {iter_arg.type}, got {yield_arg.type}. The "
f"riscv_scf.for's riscv_scf.yield must match carried"
f"variables types."
)
def iter_used_registers(self) -> Generator[RegisterType, None, None]:
# We know that all the registers for the inputs and outputs are the same, and
# that these registers will have been iterated earlier in the IR.
yield from ()
def allocate_registers(self, allocator: BlockAllocator) -> None:
# Allocate values used inside the body but defined outside.
# Their scope lasts for the whole body execution scope
live_ins = allocator.live_ins_per_block[self.body.block]
for live_in in live_ins:
allocator.allocate_value(live_in)
yield_op = self.body.block.last_op
assert yield_op is not None, (
"last op of riscv_scf.ForOp is guaranteed to be riscv_scf.Yield"
)
block_args = self.body.block.args
# The loop-carried variables are trickier
# The for op operand, block arg, and yield operand must have the same type
for block_arg, operand, yield_operand, op_result in zip(
block_args[1:], self.iter_args, yield_op.operands, self.results
):
allocator.allocate_values_same_reg(
(block_arg, operand, yield_operand, op_result)
)
# Induction variable
allocator.allocate_value(block_args[0])
# Step and ub are used throughout loop
allocator.allocate_value(self.ub)
allocator.allocate_value(self.step)
# Reserve the loop carried variables for allocation within the body
regs = self.iter_args.types
assert all(isinstance(reg, X86RegisterType) for reg in regs)
regs = cast(tuple[X86RegisterType, ...], regs)
with allocator.available_registers.reserve_registers(regs):
allocator.allocate_block(self.body.block)
# lb is only used as an input to the loop, so free induction variable before
# allocating lb to it in case it's not yet allocated
allocator.free_value(self.body.block.args[0])
allocator.allocate_value(self.lb)
|