Skip to content

Gpu map parallel loops

gpu_map_parallel_loops

MappingAttrName = 'mapping' module-attribute

MapGrid = 0 module-attribute

MapBlock = 1 module-attribute

Sequential = 2 module-attribute

kNumHardwareIds = 3 module-attribute

GpuMapParallelLoopsPattern

Bases: RewritePattern

Source code in xdsl/transforms/gpu_map_parallel_loops.py
95
96
97
98
class GpuMapParallelLoopsPattern(RewritePattern):
    @op_type_rewrite_pattern
    def match_and_rewrite(self, op: ParallelOp, rewriter: PatternRewriter, /):
        mapParallelOp(op)

match_and_rewrite(op: ParallelOp, rewriter: PatternRewriter)

Source code in xdsl/transforms/gpu_map_parallel_loops.py
96
97
98
@op_type_rewrite_pattern
def match_and_rewrite(self, op: ParallelOp, rewriter: PatternRewriter, /):
    mapParallelOp(op)

GpuMapParallelLoopsPass dataclass

Bases: ModulePass

Source code in xdsl/transforms/gpu_map_parallel_loops.py
101
102
103
104
105
106
107
108
class GpuMapParallelLoopsPass(ModulePass):
    name = "gpu-map-parallel-loops"

    def apply(self, ctx: Context, op: ModuleOp) -> None:
        walker = PatternRewriteWalker(
            GreedyRewritePatternApplier([GpuMapParallelLoopsPattern()])
        )
        walker.rewrite_module(op)

name = 'gpu-map-parallel-loops' class-attribute instance-attribute

apply(ctx: Context, op: ModuleOp) -> None

Source code in xdsl/transforms/gpu_map_parallel_loops.py
104
105
106
107
108
def apply(self, ctx: Context, op: ModuleOp) -> None:
    walker = PatternRewriteWalker(
        GreedyRewritePatternApplier([GpuMapParallelLoopsPattern()])
    )
    walker.rewrite_module(op)

getHardwareIdForMapping(level: int, dimension: int) -> ProcessorEnum

Computed the hardware id to use for a given mapping level. Will assign x,y and z hardware ids for the first 3 dimensions and use sequential after.

Source code in xdsl/transforms/gpu_map_parallel_loops.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
def getHardwareIdForMapping(level: int, dimension: int) -> ProcessorEnum:
    """
    Computed the hardware id to use for a given mapping level. Will
    assign x,y and z hardware ids for the first 3 dimensions and use
    sequential after.
    """

    if dimension >= kNumHardwareIds or level == Sequential:
        return ProcessorEnum.Sequential
    match level:
        case 0:
            match dimension:
                case 0:
                    return ProcessorEnum.Block_X
                case 1:
                    return ProcessorEnum.Block_Y
                case 2:
                    return ProcessorEnum.Block_Z
                case _:
                    return ProcessorEnum.Sequential
        case 1:
            match dimension:
                case 0:
                    return ProcessorEnum.Thread_X
                case 1:
                    return ProcessorEnum.Thread_Y
                case 2:
                    return ProcessorEnum.Thread_Z
                case _:
                    return ProcessorEnum.Sequential
        case _:
            return ProcessorEnum.Sequential

mapParallelOp(parallelOp: ParallelOp, mappingLevel: int = MapGrid)

Add mapping information to the given parallel loop. Do not add mapping information if the loop already has it. Also, don't start a mapping at a nested loop.

Source code in xdsl/transforms/gpu_map_parallel_loops.py
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
def mapParallelOp(parallelOp: ParallelOp, mappingLevel: int = MapGrid):
    """
    Add mapping information to the given parallel loop. Do not add
    mapping information if the loop already has it. Also, don't
    start a mapping at a nested loop.
    """
    # Do not try to add a mapping to already mapped loops or nested loops.
    anchor: Operation | None = parallelOp.parent_op()
    while (anchor is not None) and (not isinstance(anchor, ParallelOp)):
        anchor = anchor.parent_op()

    if (MappingAttrName in parallelOp.attributes) or (
        (mappingLevel == MapGrid) and anchor is not None
    ):
        return
    attrs = [
        getHardwareIdForMapping(mappingLevel, i)
        for i in range(len(parallelOp.lowerBound))
    ]
    attrs = ArrayAttr(
        [
            LoopDimMapAttr(
                ProcessorAttr(attr),
                AffineMapAttr(AffineMap.identity(1)),
                AffineMapAttr(AffineMap.identity(1)),
            )
            for attr in reversed(attrs)
        ]
    )
    parallelOp.attributes[MappingAttrName] = attrs
    mappingLevel += 1
    for op in parallelOp.body.ops:
        if isinstance(op, ParallelOp):
            mapParallelOp(op, mappingLevel)