Skip to content

Gpu

gpu

GPU = Dialect('gpu', [AllocOp, AllReduceOp, BarrierOp, BlockDimOp, BlockIdOp, DeallocOp, FuncOp, GlobalIdOp, GridDimOp, HostRegisterOp, HostUnregisterOp, LaneIdOp, LaunchOp, LaunchFuncOp, MemcpyOp, ModuleOp, NumSubgroupsOp, ReturnOp, SetDefaultDeviceOp, SubgroupIdOp, SubgroupSizeOp, TerminatorOp, ThreadIdOp, WaitOp, YieldOp], [AsyncTokenType, AllReduceOpAttr, DimensionAttr, ProcessorAttr, LoopDimMapAttr]) module-attribute

AsyncTokenType dataclass

Bases: ParametrizedAttribute, TypeAttribute

Source code in xdsl/dialects/gpu.py
64
65
66
@irdl_attr_definition
class AsyncTokenType(ParametrizedAttribute, TypeAttribute):
    name = "gpu.async.token"

name = 'gpu.async.token' class-attribute instance-attribute

AllReduceOpEnum

Bases: StrEnum

Source code in xdsl/dialects/gpu.py
69
70
71
72
73
74
75
76
class AllReduceOpEnum(StrEnum):
    Add = auto()
    And = auto()
    Max = auto()
    Min = auto()
    Mul = auto()
    Or = auto()
    Xor = auto()

Add = auto() class-attribute instance-attribute

And = auto() class-attribute instance-attribute

Max = auto() class-attribute instance-attribute

Min = auto() class-attribute instance-attribute

Mul = auto() class-attribute instance-attribute

Or = auto() class-attribute instance-attribute

Xor = auto() class-attribute instance-attribute

DimensionEnum

Bases: StrEnum

Source code in xdsl/dialects/gpu.py
79
80
81
82
class DimensionEnum(StrEnum):
    X = auto()
    Y = auto()
    Z = auto()

X = auto() class-attribute instance-attribute

Y = auto() class-attribute instance-attribute

Z = auto() class-attribute instance-attribute

ProcessorEnum

Bases: StrEnum

Source code in xdsl/dialects/gpu.py
85
86
87
88
89
90
91
92
class ProcessorEnum(StrEnum):
    Sequential = auto()
    Block_X = auto()
    Block_Y = auto()
    Block_Z = auto()
    Thread_X = auto()
    Thread_Y = auto()
    Thread_Z = auto()

Sequential = auto() class-attribute instance-attribute

Block_X = auto() class-attribute instance-attribute

Block_Y = auto() class-attribute instance-attribute

Block_Z = auto() class-attribute instance-attribute

Thread_X = auto() class-attribute instance-attribute

Thread_Y = auto() class-attribute instance-attribute

Thread_Z = auto() class-attribute instance-attribute

AllReduceOpAttr dataclass

Bases: EnumAttribute[AllReduceOpEnum], SpacedOpaqueSyntaxAttribute

Source code in xdsl/dialects/gpu.py
95
96
97
@irdl_attr_definition
class AllReduceOpAttr(EnumAttribute[AllReduceOpEnum], SpacedOpaqueSyntaxAttribute):
    name = "gpu.all_reduce_op"

name = 'gpu.all_reduce_op' class-attribute instance-attribute

DimensionAttr dataclass

Bases: EnumAttribute[DimensionEnum], SpacedOpaqueSyntaxAttribute

Source code in xdsl/dialects/gpu.py
100
101
102
@irdl_attr_definition
class DimensionAttr(EnumAttribute[DimensionEnum], SpacedOpaqueSyntaxAttribute):
    name = "gpu.dim"

name = 'gpu.dim' class-attribute instance-attribute

ProcessorAttr dataclass

Bases: EnumAttribute[ProcessorEnum], SpacedOpaqueSyntaxAttribute

Source code in xdsl/dialects/gpu.py
105
106
107
@irdl_attr_definition
class ProcessorAttr(EnumAttribute[ProcessorEnum], SpacedOpaqueSyntaxAttribute):
    name = "gpu.processor"

name = 'gpu.processor' class-attribute instance-attribute

LoopDimMapAttr dataclass

Bases: ParametrizedAttribute

Source code in xdsl/dialects/gpu.py
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
@irdl_attr_definition
class LoopDimMapAttr(ParametrizedAttribute):
    name = "gpu.loop_dim_map"

    processor: ProcessorAttr
    map: AffineMapAttr
    bound: AffineMapAttr

    def print_parameters(self, printer: Printer) -> None:
        with printer.in_angle_brackets():
            printer.print_string("processor = ")
            printer.print_string(self.processor.data)
            printer.print_string(", map = ")
            printer.print_string(str(self.map.data))
            printer.print_string(", bound = ")
            printer.print_string(str(self.bound.data))

    @classmethod
    def parse_parameters(cls, parser: AttrParser):
        with parser.in_angle_brackets():
            parser.parse_keyword("processor")
            parser.parse_punctuation("=")
            proc = ProcessorAttr.parse_parameter(parser)
            processor = ProcessorAttr(proc)
            parser.parse_punctuation(",")
            parser.parse_keyword("map")
            parser.parse_punctuation("=")
            map = AffineMapAttr(parser.parse_affine_map())
            parser.parse_punctuation(",")
            parser.parse_keyword("bound")
            parser.parse_punctuation("=")
            bound = AffineMapAttr(parser.parse_affine_map())
        return [processor, map, bound]

name = 'gpu.loop_dim_map' class-attribute instance-attribute

processor: ProcessorAttr instance-attribute

map: AffineMapAttr instance-attribute

bound: AffineMapAttr instance-attribute

print_parameters(printer: Printer) -> None

Source code in xdsl/dialects/gpu.py
118
119
120
121
122
123
124
125
def print_parameters(self, printer: Printer) -> None:
    with printer.in_angle_brackets():
        printer.print_string("processor = ")
        printer.print_string(self.processor.data)
        printer.print_string(", map = ")
        printer.print_string(str(self.map.data))
        printer.print_string(", bound = ")
        printer.print_string(str(self.bound.data))

parse_parameters(parser: AttrParser) classmethod

Source code in xdsl/dialects/gpu.py
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
@classmethod
def parse_parameters(cls, parser: AttrParser):
    with parser.in_angle_brackets():
        parser.parse_keyword("processor")
        parser.parse_punctuation("=")
        proc = ProcessorAttr.parse_parameter(parser)
        processor = ProcessorAttr(proc)
        parser.parse_punctuation(",")
        parser.parse_keyword("map")
        parser.parse_punctuation("=")
        map = AffineMapAttr(parser.parse_affine_map())
        parser.parse_punctuation(",")
        parser.parse_keyword("bound")
        parser.parse_punctuation("=")
        bound = AffineMapAttr(parser.parse_affine_map())
    return [processor, map, bound]

AllocOp

Bases: IRDLOperation

Source code in xdsl/dialects/gpu.py
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
@irdl_op_definition
class AllocOp(IRDLOperation):
    name = "gpu.alloc"
    hostShared = opt_attr_def(UnitAttr)
    asyncDependencies = var_operand_def(AsyncTokenType)
    dynamicSizes = var_operand_def(IndexType)
    symbolOperands = var_operand_def(IndexType)

    irdl_options = (AttrSizedOperandSegments(as_property=True),)

    result = result_def(memref.MemRefType)
    asyncToken = opt_result_def(AsyncTokenType)

    def verify_(self) -> None:
        ndyn = len(self.dynamicSizes)
        assert isinstance(res_type := self.result.type, memref.MemRefType)
        ndyn_type = len([i for i in res_type.get_shape() if i == DYNAMIC_INDEX])
        if ndyn != ndyn_type:
            raise VerifyException(
                f"Expected {ndyn_type} dynamic sizes, got {ndyn}. All "
                "dynamic sizes need to be set in the alloc operation."
            )

    def __init__(
        self,
        return_type: memref.MemRefType,
        dynamic_sizes: Sequence[SSAValue | Operation] | None = None,
        host_shared: bool = False,
        async_dependencies: Sequence[SSAValue | Operation] | None = None,
        is_async: bool = False,
    ):
        token_return = [AsyncTokenType()] if is_async else []
        dynamic_sizes_vals: list[SSAValue] = (
            [SSAValue.get(e) for e in dynamic_sizes] if dynamic_sizes else []
        )
        async_dependencies_vals: list[SSAValue] = (
            [SSAValue.get(e) for e in async_dependencies] if async_dependencies else []
        )
        attributes: dict[str, Attribute] = (
            {"hostShared": UnitAttr()} if host_shared else {}
        )
        super().__init__(
            operands=[async_dependencies_vals, dynamic_sizes_vals, []],
            result_types=[return_type, token_return],
            attributes=attributes,
        )

name = 'gpu.alloc' class-attribute instance-attribute

hostShared = opt_attr_def(UnitAttr) class-attribute instance-attribute

asyncDependencies = var_operand_def(AsyncTokenType) class-attribute instance-attribute

dynamicSizes = var_operand_def(IndexType) class-attribute instance-attribute

symbolOperands = var_operand_def(IndexType) class-attribute instance-attribute

irdl_options = (AttrSizedOperandSegments(as_property=True),) class-attribute instance-attribute

result = result_def(memref.MemRefType) class-attribute instance-attribute

asyncToken = opt_result_def(AsyncTokenType) class-attribute instance-attribute

verify_() -> None

Source code in xdsl/dialects/gpu.py
158
159
160
161
162
163
164
165
166
def verify_(self) -> None:
    ndyn = len(self.dynamicSizes)
    assert isinstance(res_type := self.result.type, memref.MemRefType)
    ndyn_type = len([i for i in res_type.get_shape() if i == DYNAMIC_INDEX])
    if ndyn != ndyn_type:
        raise VerifyException(
            f"Expected {ndyn_type} dynamic sizes, got {ndyn}. All "
            "dynamic sizes need to be set in the alloc operation."
        )

__init__(return_type: memref.MemRefType, dynamic_sizes: Sequence[SSAValue | Operation] | None = None, host_shared: bool = False, async_dependencies: Sequence[SSAValue | Operation] | None = None, is_async: bool = False)

Source code in xdsl/dialects/gpu.py
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
def __init__(
    self,
    return_type: memref.MemRefType,
    dynamic_sizes: Sequence[SSAValue | Operation] | None = None,
    host_shared: bool = False,
    async_dependencies: Sequence[SSAValue | Operation] | None = None,
    is_async: bool = False,
):
    token_return = [AsyncTokenType()] if is_async else []
    dynamic_sizes_vals: list[SSAValue] = (
        [SSAValue.get(e) for e in dynamic_sizes] if dynamic_sizes else []
    )
    async_dependencies_vals: list[SSAValue] = (
        [SSAValue.get(e) for e in async_dependencies] if async_dependencies else []
    )
    attributes: dict[str, Attribute] = (
        {"hostShared": UnitAttr()} if host_shared else {}
    )
    super().__init__(
        operands=[async_dependencies_vals, dynamic_sizes_vals, []],
        result_types=[return_type, token_return],
        attributes=attributes,
    )

AllReduceOp dataclass

Bases: IRDLOperation

Source code in xdsl/dialects/gpu.py
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
@irdl_op_definition
class AllReduceOp(IRDLOperation):
    name = "gpu.all_reduce"
    op = opt_prop_def(AllReduceOpAttr)
    uniform = opt_prop_def(UnitAttr)
    operand = operand_def(Attribute)
    result = result_def(Attribute)
    body = region_def()

    traits = traits_def(IsolatedFromAbove())

    @staticmethod
    def from_op(
        op: AllReduceOpAttr,
        operand: SSAValue | Operation,
        uniform: UnitAttr | None = None,
    ):
        return AllReduceOp.build(
            operands=[operand],
            result_types=[SSAValue.get(operand).type],
            properties={
                "op": op,
                "uniform": uniform,
            },
            regions=[Region()],
        )

    @staticmethod
    def from_body(
        body: Region, operand: SSAValue | Operation, uniform: UnitAttr | None = None
    ):
        return AllReduceOp.build(
            operands=[operand],
            result_types=[SSAValue.get(operand).type],
            properties={"uniform": uniform} if uniform is not None else {},
            regions=[body],
        )

    def verify_(self) -> None:
        if self.result.type != self.operand.type:
            raise VerifyException(
                f"Type mismatch: result type is {self.result.type}, operand type is "
                f"{self.operand.type}. They must be the same type for gpu.all_reduce"
            )

        non_empty_body = bool(self.body.blocks)
        op_attr = self.op is not None
        if non_empty_body == op_attr:
            if op_attr:
                raise VerifyException(
                    "gpu.all_reduce can't have both a non-empty region and an op "
                    "attribute."
                )
            else:
                raise VerifyException(
                    "gpu.all_reduce need either a non empty body or an op attribute."
                )
        if non_empty_body:
            args_types = self.body.blocks[0].arg_types
            if args_types != (self.result.type, self.operand.type):
                raise VerifyException(
                    f"Expected {[str(t) for t in [self.result.type, self.operand.type]]}, "
                    f"got {[str(t) for t in args_types]}. A gpu.all_reduce's body must "
                    "have two arguments matching the result type."
                )

name = 'gpu.all_reduce' class-attribute instance-attribute

op = opt_prop_def(AllReduceOpAttr) class-attribute instance-attribute

uniform = opt_prop_def(UnitAttr) class-attribute instance-attribute

operand = operand_def(Attribute) class-attribute instance-attribute

result = result_def(Attribute) class-attribute instance-attribute

body = region_def() class-attribute instance-attribute

traits = traits_def(IsolatedFromAbove()) class-attribute instance-attribute

from_op(op: AllReduceOpAttr, operand: SSAValue | Operation, uniform: UnitAttr | None = None) staticmethod

Source code in xdsl/dialects/gpu.py
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
@staticmethod
def from_op(
    op: AllReduceOpAttr,
    operand: SSAValue | Operation,
    uniform: UnitAttr | None = None,
):
    return AllReduceOp.build(
        operands=[operand],
        result_types=[SSAValue.get(operand).type],
        properties={
            "op": op,
            "uniform": uniform,
        },
        regions=[Region()],
    )

from_body(body: Region, operand: SSAValue | Operation, uniform: UnitAttr | None = None) staticmethod

Source code in xdsl/dialects/gpu.py
220
221
222
223
224
225
226
227
228
229
@staticmethod
def from_body(
    body: Region, operand: SSAValue | Operation, uniform: UnitAttr | None = None
):
    return AllReduceOp.build(
        operands=[operand],
        result_types=[SSAValue.get(operand).type],
        properties={"uniform": uniform} if uniform is not None else {},
        regions=[body],
    )

verify_() -> None

Source code in xdsl/dialects/gpu.py
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
def verify_(self) -> None:
    if self.result.type != self.operand.type:
        raise VerifyException(
            f"Type mismatch: result type is {self.result.type}, operand type is "
            f"{self.operand.type}. They must be the same type for gpu.all_reduce"
        )

    non_empty_body = bool(self.body.blocks)
    op_attr = self.op is not None
    if non_empty_body == op_attr:
        if op_attr:
            raise VerifyException(
                "gpu.all_reduce can't have both a non-empty region and an op "
                "attribute."
            )
        else:
            raise VerifyException(
                "gpu.all_reduce need either a non empty body or an op attribute."
            )
    if non_empty_body:
        args_types = self.body.blocks[0].arg_types
        if args_types != (self.result.type, self.operand.type):
            raise VerifyException(
                f"Expected {[str(t) for t in [self.result.type, self.operand.type]]}, "
                f"got {[str(t) for t in args_types]}. A gpu.all_reduce's body must "
                "have two arguments matching the result type."
            )

BarrierOp

Bases: IRDLOperation

Source code in xdsl/dialects/gpu.py
260
261
262
263
264
265
@irdl_op_definition
class BarrierOp(IRDLOperation):
    name = "gpu.barrier"

    def __init__(self):
        super().__init__()

name = 'gpu.barrier' class-attribute instance-attribute

__init__()

Source code in xdsl/dialects/gpu.py
264
265
def __init__(self):
    super().__init__()

BlockDimOp

Bases: IRDLOperation

Source code in xdsl/dialects/gpu.py
268
269
270
271
272
273
274
275
@irdl_op_definition
class BlockDimOp(IRDLOperation):
    name = "gpu.block_dim"
    dimension = prop_def(DimensionAttr)
    result = result_def(IndexType)

    def __init__(self, dim: DimensionAttr):
        super().__init__(result_types=[IndexType()], properties={"dimension": dim})

name = 'gpu.block_dim' class-attribute instance-attribute

dimension = prop_def(DimensionAttr) class-attribute instance-attribute

result = result_def(IndexType) class-attribute instance-attribute

__init__(dim: DimensionAttr)

Source code in xdsl/dialects/gpu.py
274
275
def __init__(self, dim: DimensionAttr):
    super().__init__(result_types=[IndexType()], properties={"dimension": dim})

BlockIdOp

Bases: IRDLOperation

Source code in xdsl/dialects/gpu.py
278
279
280
281
282
283
284
285
@irdl_op_definition
class BlockIdOp(IRDLOperation):
    name = "gpu.block_id"
    dimension = prop_def(DimensionAttr)
    result = result_def(IndexType)

    def __init__(self, dim: DimensionAttr):
        super().__init__(result_types=[IndexType()], properties={"dimension": dim})

name = 'gpu.block_id' class-attribute instance-attribute

dimension = prop_def(DimensionAttr) class-attribute instance-attribute

result = result_def(IndexType) class-attribute instance-attribute

__init__(dim: DimensionAttr)

Source code in xdsl/dialects/gpu.py
284
285
def __init__(self, dim: DimensionAttr):
    super().__init__(result_types=[IndexType()], properties={"dimension": dim})

DeallocOp

Bases: IRDLOperation

Source code in xdsl/dialects/gpu.py
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
@irdl_op_definition
class DeallocOp(IRDLOperation):
    name = "gpu.dealloc"

    asyncDependencies = var_operand_def(AsyncTokenType)
    buffer = operand_def(memref.MemRefType)

    irdl_options = (AttrSizedOperandSegments(),)

    asyncToken = opt_result_def(AsyncTokenType)

    def __init__(
        self,
        buffer: SSAValue | Operation,
        async_dependencies: Sequence[SSAValue | Operation] | None = None,
        is_async: bool = False,
    ):
        super().__init__(
            operands=[async_dependencies, buffer],
            result_types=[[AsyncTokenType()] if is_async else []],
        )

name = 'gpu.dealloc' class-attribute instance-attribute

asyncDependencies = var_operand_def(AsyncTokenType) class-attribute instance-attribute

buffer = operand_def(memref.MemRefType) class-attribute instance-attribute

irdl_options = (AttrSizedOperandSegments(),) class-attribute instance-attribute

asyncToken = opt_result_def(AsyncTokenType) class-attribute instance-attribute

__init__(buffer: SSAValue | Operation, async_dependencies: Sequence[SSAValue | Operation] | None = None, is_async: bool = False)

Source code in xdsl/dialects/gpu.py
299
300
301
302
303
304
305
306
307
308
def __init__(
    self,
    buffer: SSAValue | Operation,
    async_dependencies: Sequence[SSAValue | Operation] | None = None,
    is_async: bool = False,
):
    super().__init__(
        operands=[async_dependencies, buffer],
        result_types=[[AsyncTokenType()] if is_async else []],
    )

MemcpyOp

Bases: IRDLOperation

Source code in xdsl/dialects/gpu.py
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
@irdl_op_definition
class MemcpyOp(IRDLOperation):
    name = "gpu.memcpy"

    asyncDependencies = var_operand_def(AsyncTokenType)
    dst = operand_def(memref.MemRefType)
    src = operand_def(memref.MemRefType)

    irdl_options = (AttrSizedOperandSegments(),)

    asyncToken = opt_result_def(AsyncTokenType)

    def __init__(
        self,
        source: SSAValue | Operation,
        destination: SSAValue | Operation,
        async_dependencies: Sequence[SSAValue | Operation] | None = None,
        is_async: bool = False,
    ):
        super().__init__(
            operands=[async_dependencies, destination, source],
            result_types=[[AsyncTokenType()] if is_async else []],
        )

    def verify_(self) -> None:
        if self.src.type != self.dst.type:
            raise VerifyException(
                f"Expected {self.dst.type}, got {self.src.type}. gpu.memcpy source and "
                "destination types must match."
            )

name = 'gpu.memcpy' class-attribute instance-attribute

asyncDependencies = var_operand_def(AsyncTokenType) class-attribute instance-attribute

dst = operand_def(memref.MemRefType) class-attribute instance-attribute

src = operand_def(memref.MemRefType) class-attribute instance-attribute

irdl_options = (AttrSizedOperandSegments(),) class-attribute instance-attribute

asyncToken = opt_result_def(AsyncTokenType) class-attribute instance-attribute

__init__(source: SSAValue | Operation, destination: SSAValue | Operation, async_dependencies: Sequence[SSAValue | Operation] | None = None, is_async: bool = False)

Source code in xdsl/dialects/gpu.py
323
324
325
326
327
328
329
330
331
332
333
def __init__(
    self,
    source: SSAValue | Operation,
    destination: SSAValue | Operation,
    async_dependencies: Sequence[SSAValue | Operation] | None = None,
    is_async: bool = False,
):
    super().__init__(
        operands=[async_dependencies, destination, source],
        result_types=[[AsyncTokenType()] if is_async else []],
    )

verify_() -> None

Source code in xdsl/dialects/gpu.py
335
336
337
338
339
340
def verify_(self) -> None:
    if self.src.type != self.dst.type:
        raise VerifyException(
            f"Expected {self.dst.type}, got {self.src.type}. gpu.memcpy source and "
            "destination types must match."
        )

ModuleOp

Bases: IRDLOperation

Source code in xdsl/dialects/gpu.py
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
@irdl_op_definition
class ModuleOp(IRDLOperation):
    name = "gpu.module"

    body = region_def("single_block")
    sym_name = prop_def(SymbolNameConstraint())

    traits = traits_def(
        IsolatedFromAbove(),
        NoTerminator(),
        SymbolOpInterface(),
        SymbolTable(),
    )

    def __init__(self, name: SymbolRefAttr, body: Region):
        super().__init__(properties={"sym_name": name}, regions=[body])

name = 'gpu.module' class-attribute instance-attribute

body = region_def('single_block') class-attribute instance-attribute

sym_name = prop_def(SymbolNameConstraint()) class-attribute instance-attribute

traits = traits_def(IsolatedFromAbove(), NoTerminator(), SymbolOpInterface(), SymbolTable()) class-attribute instance-attribute

__init__(name: SymbolRefAttr, body: Region)

Source code in xdsl/dialects/gpu.py
357
358
def __init__(self, name: SymbolRefAttr, body: Region):
    super().__init__(properties={"sym_name": name}, regions=[body])

FuncOp

Bases: IRDLOperation

Source code in xdsl/dialects/gpu.py
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
@irdl_op_definition
class FuncOp(IRDLOperation):
    name = "gpu.func"

    body = region_def()
    sym_name = attr_def(SymbolNameConstraint())
    function_type = prop_def(FunctionType)
    kernel = opt_prop_def(UnitAttr)
    known_block_size = opt_attr_def(
        DenseArrayBase.constr(i32), attr_name="gpu.known_block_size"
    )
    known_grid_size = opt_attr_def(
        DenseArrayBase.constr(i32), attr_name="gpu.known_grid_size"
    )

    traits = traits_def(IsolatedFromAbove(), HasParent(ModuleOp), SymbolOpInterface())

    def __init__(
        self,
        name: str,
        function_type: FunctionType | tuple[Sequence[Attribute], Sequence[Attribute]],
        region: Region | type[Region.DEFAULT] = Region.DEFAULT,
        kernel: bool | None = None,
        known_block_size: Sequence[int] | None = None,
        known_grid_size: Sequence[int] | None = None,
    ):
        if isinstance(function_type, tuple):
            inputs, outputs = function_type
            function_type = FunctionType.from_lists(inputs, outputs)
        if not isinstance(region, Region):
            region = Region(Block(arg_types=function_type.inputs))
        attributes: dict[str, Attribute | None] = {"sym_name": StringAttr(name)}
        properties: dict[str, Attribute | None] = {
            "function_type": function_type,
        }
        if known_block_size is not None:
            attributes["gpu.known_block_size"] = DenseArrayBase.from_list(
                i32, known_block_size
            )
        if known_grid_size is not None:
            attributes["gpu.known_grid_size"] = DenseArrayBase.from_list(
                i32, known_grid_size
            )
        if kernel:
            properties["kernel"] = UnitAttr()
        super().__init__(properties=properties, attributes=attributes, regions=[region])

    def verify_(self):
        entry_block: Block = self.body.blocks[0]
        function_inputs = self.function_type.inputs.data
        block_arg_types = entry_block.arg_types
        if function_inputs != block_arg_types:
            raise VerifyException(
                "Expected first entry block arguments to have the same types as the "
                "function input types"
            )
        if (self.kernel is not None) and (len(self.function_type.outputs) != 0):
            raise VerifyException("Expected void return type for kernel function")

name = 'gpu.func' class-attribute instance-attribute

body = region_def() class-attribute instance-attribute

sym_name = attr_def(SymbolNameConstraint()) class-attribute instance-attribute

function_type = prop_def(FunctionType) class-attribute instance-attribute

kernel = opt_prop_def(UnitAttr) class-attribute instance-attribute

known_block_size = opt_attr_def(DenseArrayBase.constr(i32), attr_name='gpu.known_block_size') class-attribute instance-attribute

known_grid_size = opt_attr_def(DenseArrayBase.constr(i32), attr_name='gpu.known_grid_size') class-attribute instance-attribute

traits = traits_def(IsolatedFromAbove(), HasParent(ModuleOp), SymbolOpInterface()) class-attribute instance-attribute

__init__(name: str, function_type: FunctionType | tuple[Sequence[Attribute], Sequence[Attribute]], region: Region | type[Region.DEFAULT] = Region.DEFAULT, kernel: bool | None = None, known_block_size: Sequence[int] | None = None, known_grid_size: Sequence[int] | None = None)

Source code in xdsl/dialects/gpu.py
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
def __init__(
    self,
    name: str,
    function_type: FunctionType | tuple[Sequence[Attribute], Sequence[Attribute]],
    region: Region | type[Region.DEFAULT] = Region.DEFAULT,
    kernel: bool | None = None,
    known_block_size: Sequence[int] | None = None,
    known_grid_size: Sequence[int] | None = None,
):
    if isinstance(function_type, tuple):
        inputs, outputs = function_type
        function_type = FunctionType.from_lists(inputs, outputs)
    if not isinstance(region, Region):
        region = Region(Block(arg_types=function_type.inputs))
    attributes: dict[str, Attribute | None] = {"sym_name": StringAttr(name)}
    properties: dict[str, Attribute | None] = {
        "function_type": function_type,
    }
    if known_block_size is not None:
        attributes["gpu.known_block_size"] = DenseArrayBase.from_list(
            i32, known_block_size
        )
    if known_grid_size is not None:
        attributes["gpu.known_grid_size"] = DenseArrayBase.from_list(
            i32, known_grid_size
        )
    if kernel:
        properties["kernel"] = UnitAttr()
    super().__init__(properties=properties, attributes=attributes, regions=[region])

verify_()

Source code in xdsl/dialects/gpu.py
408
409
410
411
412
413
414
415
416
417
418
def verify_(self):
    entry_block: Block = self.body.blocks[0]
    function_inputs = self.function_type.inputs.data
    block_arg_types = entry_block.arg_types
    if function_inputs != block_arg_types:
        raise VerifyException(
            "Expected first entry block arguments to have the same types as the "
            "function input types"
        )
    if (self.kernel is not None) and (len(self.function_type.outputs) != 0):
        raise VerifyException("Expected void return type for kernel function")

GlobalIdOp

Bases: IRDLOperation

Source code in xdsl/dialects/gpu.py
421
422
423
424
425
426
427
428
@irdl_op_definition
class GlobalIdOp(IRDLOperation):
    name = "gpu.global_id"
    dimension = prop_def(DimensionAttr)
    result = result_def(IndexType)

    def __init__(self, dim: DimensionAttr):
        super().__init__(result_types=[IndexType()], properties={"dimension": dim})

name = 'gpu.global_id' class-attribute instance-attribute

dimension = prop_def(DimensionAttr) class-attribute instance-attribute

result = result_def(IndexType) class-attribute instance-attribute

__init__(dim: DimensionAttr)

Source code in xdsl/dialects/gpu.py
427
428
def __init__(self, dim: DimensionAttr):
    super().__init__(result_types=[IndexType()], properties={"dimension": dim})

GridDimOp

Bases: IRDLOperation

Source code in xdsl/dialects/gpu.py
431
432
433
434
435
436
437
438
@irdl_op_definition
class GridDimOp(IRDLOperation):
    name = "gpu.grid_dim"
    dimension = prop_def(DimensionAttr)
    result = result_def(IndexType)

    def __init__(self, dim: DimensionAttr):
        super().__init__(result_types=[IndexType()], properties={"dimension": dim})

name = 'gpu.grid_dim' class-attribute instance-attribute

dimension = prop_def(DimensionAttr) class-attribute instance-attribute

result = result_def(IndexType) class-attribute instance-attribute

__init__(dim: DimensionAttr)

Source code in xdsl/dialects/gpu.py
437
438
def __init__(self, dim: DimensionAttr):
    super().__init__(result_types=[IndexType()], properties={"dimension": dim})

HostRegisterOp

Bases: IRDLOperation

This op maps the provided host buffer into the device address space.

This operation may not be supported in every environment, there is not yet a way to check at runtime whether this feature is supported. Writes from the host are guaranteed to be visible to device kernels that are launched afterwards. Writes from the device are guaranteed to be visible on the host after synchronizing with the device kernel completion.

Source code in xdsl/dialects/gpu.py
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
@irdl_op_definition
class HostRegisterOp(IRDLOperation):
    """
    This op maps the provided host buffer into the device address space.

    This operation may not be supported in every environment, there is not yet a way to
    check at runtime whether this feature is supported.
    Writes from the host are guaranteed to be visible to device kernels that are launched
    afterwards. Writes from the device are guaranteed to be visible on the host after
    synchronizing with the device kernel completion.
    """

    name = "gpu.host_register"

    value = operand_def(memref.UnrankedMemRefType)

    def __init__(self, memref: SSAValue | Operation):
        super().__init__(operands=[SSAValue.get(memref)])

name = 'gpu.host_register' class-attribute instance-attribute

value = operand_def(memref.UnrankedMemRefType) class-attribute instance-attribute

__init__(memref: SSAValue | Operation)

Source code in xdsl/dialects/gpu.py
457
458
def __init__(self, memref: SSAValue | Operation):
    super().__init__(operands=[SSAValue.get(memref)])

HostUnregisterOp

Bases: IRDLOperation

Unregisters a memref for access from device.

Source code in xdsl/dialects/gpu.py
461
462
463
464
465
466
467
468
469
470
471
472
@irdl_op_definition
class HostUnregisterOp(IRDLOperation):
    """
    Unregisters a memref for access from device.
    """

    name = "gpu.host_unregister"

    value = operand_def(memref.UnrankedMemRefType)

    def __init__(self, memref: SSAValue | Operation):
        super().__init__(operands=[SSAValue.get(memref)])

name = 'gpu.host_unregister' class-attribute instance-attribute

value = operand_def(memref.UnrankedMemRefType) class-attribute instance-attribute

__init__(memref: SSAValue | Operation)

Source code in xdsl/dialects/gpu.py
471
472
def __init__(self, memref: SSAValue | Operation):
    super().__init__(operands=[SSAValue.get(memref)])

LaneIdOp

Bases: IRDLOperation

Source code in xdsl/dialects/gpu.py
475
476
477
478
479
480
481
@irdl_op_definition
class LaneIdOp(IRDLOperation):
    name = "gpu.lane_id"
    result = result_def(IndexType)

    def __init__(self):
        super().__init__(result_types=[IndexType()])

name = 'gpu.lane_id' class-attribute instance-attribute

result = result_def(IndexType) class-attribute instance-attribute

__init__()

Source code in xdsl/dialects/gpu.py
480
481
def __init__(self):
    super().__init__(result_types=[IndexType()])

LaunchOp

Bases: IRDLOperation

Source code in xdsl/dialects/gpu.py
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
@irdl_op_definition
class LaunchOp(IRDLOperation):
    name = "gpu.launch"
    asyncDependencies = var_operand_def(AsyncTokenType)
    gridSizeX = operand_def(IndexType)
    gridSizeY = operand_def(IndexType)
    gridSizeZ = operand_def(IndexType)
    blockSizeX = operand_def(IndexType)
    blockSizeY = operand_def(IndexType)
    blockSizeZ = operand_def(IndexType)
    clusterSizeX = opt_operand_def(IndexType)
    clusterSizeY = opt_operand_def(IndexType)
    clusterSizeZ = opt_operand_def(IndexType)
    dynamicSharedMemorySize = opt_operand_def(i32)
    asyncToken = opt_result_def(AsyncTokenType)
    body = region_def()
    irdl_options = (AttrSizedOperandSegments(as_property=True),)

    def __init__(
        self,
        body: Region,
        gridSize: Sequence[SSAValue | Operation],
        blockSize: Sequence[SSAValue | Operation],
        clusterSize: Sequence[SSAValue | Operation] = [],
        async_launch: bool = False,
        asyncDependencies: Sequence[SSAValue | Operation] | None = None,
        dynamicSharedMemorySize: SSAValue | Operation | None = None,
    ):
        if len(gridSize) != 3:
            raise ValueError(f"LaunchOp must have 3 gridSizes, got {len(gridSize)}")
        if len(blockSize) != 3:
            raise ValueError(f"LaunchOp must have 3 blockSizes, got {len(blockSize)}")
        if len(clusterSize) != 3 and len(clusterSize) != 0:
            raise ValueError(
                f"LaunchOp must have 0 or 3 clusterSizes, got {len(clusterSize)}"
            )
        operands = [
            (
                []
                if asyncDependencies is None
                else [SSAValue.get(a) for a in asyncDependencies]
            )
        ]

        operands += [SSAValue.get(gs) for gs in gridSize]
        operands += [SSAValue.get(bs) for bs in blockSize]
        if clusterSize:
            operands += [(SSAValue.get(cs),) for cs in clusterSize]
        else:
            operands += [(), (), ()]
        operands += [
            (
                []
                if dynamicSharedMemorySize is None
                else [SSAValue.get(dynamicSharedMemorySize)]
            )
        ]
        super().__init__(
            operands=operands,
            result_types=[[AsyncTokenType()] if async_launch else []],
            regions=[body],
        )

    def verify_(self) -> None:
        if not any(b.ops for b in self.body.blocks):
            raise VerifyException("gpu.launch requires a non-empty body.")
        args_type = self.body.blocks[0].arg_types
        if args_type != (IndexType(),) * 12:
            raise VerifyException(
                f"Expected [12 x {str(IndexType())}], got {[str(t) for t in args_type]}. "
                "gpu.launch's body arguments are 12 index arguments, with 3 block "
                "indices, 3 block sizes, 3 thread indices, and 3 thread counts"
            )

name = 'gpu.launch' class-attribute instance-attribute

asyncDependencies = var_operand_def(AsyncTokenType) class-attribute instance-attribute

gridSizeX = operand_def(IndexType) class-attribute instance-attribute

gridSizeY = operand_def(IndexType) class-attribute instance-attribute

gridSizeZ = operand_def(IndexType) class-attribute instance-attribute

blockSizeX = operand_def(IndexType) class-attribute instance-attribute

blockSizeY = operand_def(IndexType) class-attribute instance-attribute

blockSizeZ = operand_def(IndexType) class-attribute instance-attribute

clusterSizeX = opt_operand_def(IndexType) class-attribute instance-attribute

clusterSizeY = opt_operand_def(IndexType) class-attribute instance-attribute

clusterSizeZ = opt_operand_def(IndexType) class-attribute instance-attribute

dynamicSharedMemorySize = opt_operand_def(i32) class-attribute instance-attribute

asyncToken = opt_result_def(AsyncTokenType) class-attribute instance-attribute

body = region_def() class-attribute instance-attribute

irdl_options = (AttrSizedOperandSegments(as_property=True),) class-attribute instance-attribute

__init__(body: Region, gridSize: Sequence[SSAValue | Operation], blockSize: Sequence[SSAValue | Operation], clusterSize: Sequence[SSAValue | Operation] = [], async_launch: bool = False, asyncDependencies: Sequence[SSAValue | Operation] | None = None, dynamicSharedMemorySize: SSAValue | Operation | None = None)

Source code in xdsl/dialects/gpu.py
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
def __init__(
    self,
    body: Region,
    gridSize: Sequence[SSAValue | Operation],
    blockSize: Sequence[SSAValue | Operation],
    clusterSize: Sequence[SSAValue | Operation] = [],
    async_launch: bool = False,
    asyncDependencies: Sequence[SSAValue | Operation] | None = None,
    dynamicSharedMemorySize: SSAValue | Operation | None = None,
):
    if len(gridSize) != 3:
        raise ValueError(f"LaunchOp must have 3 gridSizes, got {len(gridSize)}")
    if len(blockSize) != 3:
        raise ValueError(f"LaunchOp must have 3 blockSizes, got {len(blockSize)}")
    if len(clusterSize) != 3 and len(clusterSize) != 0:
        raise ValueError(
            f"LaunchOp must have 0 or 3 clusterSizes, got {len(clusterSize)}"
        )
    operands = [
        (
            []
            if asyncDependencies is None
            else [SSAValue.get(a) for a in asyncDependencies]
        )
    ]

    operands += [SSAValue.get(gs) for gs in gridSize]
    operands += [SSAValue.get(bs) for bs in blockSize]
    if clusterSize:
        operands += [(SSAValue.get(cs),) for cs in clusterSize]
    else:
        operands += [(), (), ()]
    operands += [
        (
            []
            if dynamicSharedMemorySize is None
            else [SSAValue.get(dynamicSharedMemorySize)]
        )
    ]
    super().__init__(
        operands=operands,
        result_types=[[AsyncTokenType()] if async_launch else []],
        regions=[body],
    )

verify_() -> None

Source code in xdsl/dialects/gpu.py
547
548
549
550
551
552
553
554
555
556
def verify_(self) -> None:
    if not any(b.ops for b in self.body.blocks):
        raise VerifyException("gpu.launch requires a non-empty body.")
    args_type = self.body.blocks[0].arg_types
    if args_type != (IndexType(),) * 12:
        raise VerifyException(
            f"Expected [12 x {str(IndexType())}], got {[str(t) for t in args_type]}. "
            "gpu.launch's body arguments are 12 index arguments, with 3 block "
            "indices, 3 block sizes, 3 thread indices, and 3 thread counts"
        )

LaunchFuncOp

Bases: IRDLOperation

Launch a kernel function on the specified grid of thread blocks. gpu.launch operations are lowered to gpu.launch_func operations by outlining the kernel body into a function in a dedicated module, which reflects the separate compilation process. The kernel function is required to have the gpu.kernel attribute. The module containing the kernel function is required to be a gpu.module. And finally, the module containing the kernel module (which thus cannot be the top-level module) is required to have the gpu.container_module attribute. The gpu.launch_func operation has a symbol attribute named kernel to identify the fully specified kernel function to launch (both the gpu.module and func).

The gpu.launch_func supports async dependencies: the kernel does not start executing until the ops producing those async dependencies have completed.

By default, the host implicitly blocks until kernel execution has completed. If the async keyword is present, the host does not block but instead a !gpu.async.token is returned. Other async GPU ops can take this token as dependency.

The operation requires at least the grid and block sizes along the x,y,z dimensions as arguments. When a lower-dimensional kernel is required, unused sizes must be explicitly set to 1.

The remaining operands are optional. The first optional operand corresponds to the amount of dynamic shared memory a kernel's workgroup should be allocated; when this operand is not present, a zero size is assumed.

The remaining operands if present are passed as arguments to the kernel function.

Source code in xdsl/dialects/gpu.py
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
@irdl_op_definition
class LaunchFuncOp(IRDLOperation):
    """
    Launch a kernel function on the specified grid of thread blocks. gpu.launch
    operations are lowered to gpu.launch_func operations by outlining the kernel body
    into a function in a dedicated module, which reflects the separate compilation
    process. The kernel function is required to have the gpu.kernel attribute. The
    module containing the kernel function is required to be a gpu.module. And finally,
    the module containing the kernel module (which thus cannot be the top-level module)
    is required to have the gpu.container_module attribute. The gpu.launch_func operation
    has a symbol attribute named kernel to identify the fully specified kernel function
    to launch (both the gpu.module and func).

    The gpu.launch_func supports async dependencies: the kernel does not start executing
    until the ops producing those async dependencies have completed.

    By default, the host implicitly blocks until kernel execution has completed. If
    the async keyword is present, the host does not block but instead a !gpu.async.token
    is returned. Other async GPU ops can take this token as dependency.

    The operation requires at least the grid and block sizes along the x,y,z dimensions
    as arguments. When a lower-dimensional kernel is required, unused sizes must be
    explicitly set to 1.

    The remaining operands are optional. The first optional operand corresponds to the
    amount of dynamic shared memory a kernel's workgroup should be allocated; when this
    operand is not present, a zero size is assumed.

    The remaining operands if present are passed as arguments to the kernel function.
    """

    name = "gpu.launch_func"
    asyncDependencies = var_operand_def(AsyncTokenType)
    gridSizeX = operand_def(AnyOf((IndexType, i32, i64)))
    gridSizeY = operand_def(AnyOf((IndexType, i32, i64)))
    gridSizeZ = operand_def(AnyOf((IndexType, i32, i64)))
    blockSizeX = operand_def(AnyOf((IndexType, i32, i64)))
    blockSizeY = operand_def(AnyOf((IndexType, i32, i64)))
    blockSizeZ = operand_def(AnyOf((IndexType, i32, i64)))
    clusterSizeX = opt_operand_def(AnyOf((IndexType, i32, i64)))
    clusterSizeY = opt_operand_def(AnyOf((IndexType, i32, i64)))
    clusterSizeZ = opt_operand_def(AnyOf((IndexType, i32, i64)))
    dynamicSharedMemorySize = opt_operand_def(i32)
    kernelOperands = var_operand_def()
    asyncObject = opt_operand_def()

    asyncToken = opt_result_def(AsyncTokenType)

    kernel = prop_def(SymbolRefAttr)

    irdl_options = (AttrSizedOperandSegments(as_property=True),)

    def __init__(
        self,
        func: SymbolRefAttr,
        gridSize: Sequence[SSAValue | Operation],
        blockSize: Sequence[SSAValue | Operation],
        clusterSize: Sequence[SSAValue | Operation] | None = None,
        kernelOperands: Sequence[SSAValue | Operation] | None = None,
        async_launch: bool = False,
        asyncDependencies: Sequence[SSAValue | Operation] | None = None,
        dynamicSharedMemorySize: SSAValue | Operation | None = None,
    ):
        if len(gridSize) != 3:
            raise ValueError(f"LaunchOp must have 3 gridSizes, got {len(gridSize)}")
        if len(blockSize) != 3:
            raise ValueError(f"LaunchOp must have 3 blockSizes, got {len(blockSize)}")
        clusterSizeOperands: Sequence[
            SSAValue | Operation | Sequence[SSAValue | Operation]
        ]
        if clusterSize is None:
            clusterSizeOperands = [[], [], []]
        else:
            clusterSizeOperands = clusterSize
        if len(clusterSizeOperands) != 3:
            raise ValueError(
                f"LaunchFuncOp must have 3 cluterSizes if any, got {len(clusterSizeOperands)}"
            )

        super().__init__(
            operands=[
                asyncDependencies,
                *gridSize,
                *blockSize,
                *clusterSizeOperands,
                dynamicSharedMemorySize,
                kernelOperands,
                [],
            ],
            result_types=[[AsyncTokenType()] if async_launch else []],
            properties={"kernel": func},
        )

name = 'gpu.launch_func' class-attribute instance-attribute

asyncDependencies = var_operand_def(AsyncTokenType) class-attribute instance-attribute

gridSizeX = operand_def(AnyOf((IndexType, i32, i64))) class-attribute instance-attribute

gridSizeY = operand_def(AnyOf((IndexType, i32, i64))) class-attribute instance-attribute

gridSizeZ = operand_def(AnyOf((IndexType, i32, i64))) class-attribute instance-attribute

blockSizeX = operand_def(AnyOf((IndexType, i32, i64))) class-attribute instance-attribute

blockSizeY = operand_def(AnyOf((IndexType, i32, i64))) class-attribute instance-attribute

blockSizeZ = operand_def(AnyOf((IndexType, i32, i64))) class-attribute instance-attribute

clusterSizeX = opt_operand_def(AnyOf((IndexType, i32, i64))) class-attribute instance-attribute

clusterSizeY = opt_operand_def(AnyOf((IndexType, i32, i64))) class-attribute instance-attribute

clusterSizeZ = opt_operand_def(AnyOf((IndexType, i32, i64))) class-attribute instance-attribute

dynamicSharedMemorySize = opt_operand_def(i32) class-attribute instance-attribute

kernelOperands = var_operand_def() class-attribute instance-attribute

asyncObject = opt_operand_def() class-attribute instance-attribute

asyncToken = opt_result_def(AsyncTokenType) class-attribute instance-attribute

kernel = prop_def(SymbolRefAttr) class-attribute instance-attribute

irdl_options = (AttrSizedOperandSegments(as_property=True),) class-attribute instance-attribute

__init__(func: SymbolRefAttr, gridSize: Sequence[SSAValue | Operation], blockSize: Sequence[SSAValue | Operation], clusterSize: Sequence[SSAValue | Operation] | None = None, kernelOperands: Sequence[SSAValue | Operation] | None = None, async_launch: bool = False, asyncDependencies: Sequence[SSAValue | Operation] | None = None, dynamicSharedMemorySize: SSAValue | Operation | None = None)

Source code in xdsl/dialects/gpu.py
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
def __init__(
    self,
    func: SymbolRefAttr,
    gridSize: Sequence[SSAValue | Operation],
    blockSize: Sequence[SSAValue | Operation],
    clusterSize: Sequence[SSAValue | Operation] | None = None,
    kernelOperands: Sequence[SSAValue | Operation] | None = None,
    async_launch: bool = False,
    asyncDependencies: Sequence[SSAValue | Operation] | None = None,
    dynamicSharedMemorySize: SSAValue | Operation | None = None,
):
    if len(gridSize) != 3:
        raise ValueError(f"LaunchOp must have 3 gridSizes, got {len(gridSize)}")
    if len(blockSize) != 3:
        raise ValueError(f"LaunchOp must have 3 blockSizes, got {len(blockSize)}")
    clusterSizeOperands: Sequence[
        SSAValue | Operation | Sequence[SSAValue | Operation]
    ]
    if clusterSize is None:
        clusterSizeOperands = [[], [], []]
    else:
        clusterSizeOperands = clusterSize
    if len(clusterSizeOperands) != 3:
        raise ValueError(
            f"LaunchFuncOp must have 3 cluterSizes if any, got {len(clusterSizeOperands)}"
        )

    super().__init__(
        operands=[
            asyncDependencies,
            *gridSize,
            *blockSize,
            *clusterSizeOperands,
            dynamicSharedMemorySize,
            kernelOperands,
            [],
        ],
        result_types=[[AsyncTokenType()] if async_launch else []],
        properties={"kernel": func},
    )

NumSubgroupsOp

Bases: IRDLOperation

Source code in xdsl/dialects/gpu.py
653
654
655
656
657
658
659
@irdl_op_definition
class NumSubgroupsOp(IRDLOperation):
    name = "gpu.num_subgroups"
    result = result_def(IndexType)

    def __init__(self):
        super().__init__(result_types=[IndexType()])

name = 'gpu.num_subgroups' class-attribute instance-attribute

result = result_def(IndexType) class-attribute instance-attribute

__init__()

Source code in xdsl/dialects/gpu.py
658
659
def __init__(self):
    super().__init__(result_types=[IndexType()])

ReturnOp

Bases: IRDLOperation

Source code in xdsl/dialects/gpu.py
662
663
664
665
666
667
668
669
670
671
@irdl_op_definition
class ReturnOp(IRDLOperation):
    name = "gpu.return"

    args = var_operand_def()

    traits = traits_def(IsTerminator(), HasParent(FuncOp))

    def __init__(self, operands: Sequence[SSAValue | Operation]):
        super().__init__(operands=[operands])

name = 'gpu.return' class-attribute instance-attribute

args = var_operand_def() class-attribute instance-attribute

traits = traits_def(IsTerminator(), HasParent(FuncOp)) class-attribute instance-attribute

__init__(operands: Sequence[SSAValue | Operation])

Source code in xdsl/dialects/gpu.py
670
671
def __init__(self, operands: Sequence[SSAValue | Operation]):
    super().__init__(operands=[operands])

SetDefaultDeviceOp

Bases: IRDLOperation

Source code in xdsl/dialects/gpu.py
674
675
676
677
678
679
680
@irdl_op_definition
class SetDefaultDeviceOp(IRDLOperation):
    name = "gpu.set_default_device"
    devIndex = operand_def(i32)

    def __init__(self, devIndex: SSAValue | Operation):
        super().__init__(operands=[SSAValue.get(devIndex)])

name = 'gpu.set_default_device' class-attribute instance-attribute

devIndex = operand_def(i32) class-attribute instance-attribute

__init__(devIndex: SSAValue | Operation)

Source code in xdsl/dialects/gpu.py
679
680
def __init__(self, devIndex: SSAValue | Operation):
    super().__init__(operands=[SSAValue.get(devIndex)])

SubgroupIdOp

Bases: IRDLOperation

Source code in xdsl/dialects/gpu.py
683
684
685
686
687
688
689
@irdl_op_definition
class SubgroupIdOp(IRDLOperation):
    name = "gpu.subgroup_id"
    result = result_def(IndexType)

    def __init__(self):
        super().__init__(result_types=[IndexType()])

name = 'gpu.subgroup_id' class-attribute instance-attribute

result = result_def(IndexType) class-attribute instance-attribute

__init__()

Source code in xdsl/dialects/gpu.py
688
689
def __init__(self):
    super().__init__(result_types=[IndexType()])

SubgroupSizeOp

Bases: IRDLOperation

Source code in xdsl/dialects/gpu.py
692
693
694
695
696
697
698
@irdl_op_definition
class SubgroupSizeOp(IRDLOperation):
    name = "gpu.subgroup_size"
    result = result_def(IndexType)

    def __init__(self):
        super().__init__(result_types=[IndexType()])

name = 'gpu.subgroup_size' class-attribute instance-attribute

result = result_def(IndexType) class-attribute instance-attribute

__init__()

Source code in xdsl/dialects/gpu.py
697
698
def __init__(self):
    super().__init__(result_types=[IndexType()])

TerminatorOp

Bases: IRDLOperation

Source code in xdsl/dialects/gpu.py
701
702
703
704
705
706
707
708
@irdl_op_definition
class TerminatorOp(IRDLOperation):
    name = "gpu.terminator"

    traits = traits_def(HasParent(LaunchOp), IsTerminator())

    def __init__(self):
        super().__init__()

name = 'gpu.terminator' class-attribute instance-attribute

traits = traits_def(HasParent(LaunchOp), IsTerminator()) class-attribute instance-attribute

__init__()

Source code in xdsl/dialects/gpu.py
707
708
def __init__(self):
    super().__init__()

ThreadIdOp

Bases: IRDLOperation

Source code in xdsl/dialects/gpu.py
711
712
713
714
715
716
717
718
@irdl_op_definition
class ThreadIdOp(IRDLOperation):
    name = "gpu.thread_id"
    dimension = prop_def(DimensionAttr)
    result = result_def(IndexType)

    def __init__(self, dim: DimensionAttr):
        super().__init__(result_types=[IndexType()], properties={"dimension": dim})

name = 'gpu.thread_id' class-attribute instance-attribute

dimension = prop_def(DimensionAttr) class-attribute instance-attribute

result = result_def(IndexType) class-attribute instance-attribute

__init__(dim: DimensionAttr)

Source code in xdsl/dialects/gpu.py
717
718
def __init__(self, dim: DimensionAttr):
    super().__init__(result_types=[IndexType()], properties={"dimension": dim})

WaitOp

Bases: IRDLOperation

Source code in xdsl/dialects/gpu.py
721
722
723
724
725
726
727
728
729
730
731
732
733
734
@irdl_op_definition
class WaitOp(IRDLOperation):
    name = "gpu.wait"
    asyncDependencies = var_operand_def(AsyncTokenType)
    asyncToken = opt_result_def(AsyncTokenType)

    def __init__(
        self,
        async_dependencies: Sequence[SSAValue | Operation] | None = None,
    ):
        super().__init__(
            operands=[async_dependencies],
            result_types=[[AsyncTokenType()]],
        )

name = 'gpu.wait' class-attribute instance-attribute

asyncDependencies = var_operand_def(AsyncTokenType) class-attribute instance-attribute

asyncToken = opt_result_def(AsyncTokenType) class-attribute instance-attribute

__init__(async_dependencies: Sequence[SSAValue | Operation] | None = None)

Source code in xdsl/dialects/gpu.py
727
728
729
730
731
732
733
734
def __init__(
    self,
    async_dependencies: Sequence[SSAValue | Operation] | None = None,
):
    super().__init__(
        operands=[async_dependencies],
        result_types=[[AsyncTokenType()]],
    )

YieldOp

Bases: IRDLOperation

Source code in xdsl/dialects/gpu.py
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
@irdl_op_definition
class YieldOp(IRDLOperation):
    name = "gpu.yield"
    values = var_operand_def(Attribute)

    def __init__(self, operands: Sequence[SSAValue | Operation]):
        super().__init__(operands=[operands])

    traits = traits_def(IsTerminator())

    def verify_(self) -> None:
        op = self.parent_op()
        if op is not None:
            yield_type = self.values.types
            result_type = op.result_types
            if yield_type != result_type:
                raise VerifyException(
                    f"Expected {[str(t) for t in result_type]}, got {[str(t) for t in yield_type]}. The gpu.yield values "
                    "types must match its enclosing operation result types."
                )

name = 'gpu.yield' class-attribute instance-attribute

values = var_operand_def(Attribute) class-attribute instance-attribute

traits = traits_def(IsTerminator()) class-attribute instance-attribute

__init__(operands: Sequence[SSAValue | Operation])

Source code in xdsl/dialects/gpu.py
742
743
def __init__(self, operands: Sequence[SSAValue | Operation]):
    super().__init__(operands=[operands])

verify_() -> None

Source code in xdsl/dialects/gpu.py
747
748
749
750
751
752
753
754
755
756
def verify_(self) -> None:
    op = self.parent_op()
    if op is not None:
        yield_type = self.values.types
        result_type = op.result_types
        if yield_type != result_type:
            raise VerifyException(
                f"Expected {[str(t) for t in result_type]}, got {[str(t) for t in yield_type]}. The gpu.yield values "
                "types must match its enclosing operation result types."
            )