Skip to content

Convert scf to openmp

convert_scf_to_openmp

ConvertParallel dataclass

Bases: RewritePattern

Source code in xdsl/transforms/convert_scf_to_openmp.py
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
@dataclass
class ConvertParallel(RewritePattern):
    collapse: int | None
    nested: bool
    schedule: Literal["static", "dynamic", "auto"] | None
    chunk: int | None

    @op_type_rewrite_pattern
    def match_and_rewrite(self, loop: scf.ParallelOp, rewriter: PatternRewriter, /):
        if len(loop.initVals) > 0:
            # TODO Implement reduction, see https://github.com/xdslproject/xdsl/issues/1776
            return

        collapse = self.collapse
        if collapse is None or collapse > len(loop.lowerBound):
            collapse = len(loop.lowerBound)

        if not self.nested:
            parent = loop
            while (parent := parent.parent_op()) is not None:
                if isinstance(parent, omp.WsLoopOp):
                    return

        parallel = omp.ParallelOp(
            regions=[Region(Block())],
            operands=[[], [], [], [], [], []],
        )
        rewriter.insertion_point = InsertPoint.at_end(parallel.region.block)
        with ImplicitBuilder(rewriter):
            if self.chunk is None:
                chunk_op = []
            else:
                self.schedule = "static"
                chunk_op = [
                    arith.ConstantOp.from_int_and_width(self.chunk, IndexType())
                ]
            wsloop = omp.WsLoopOp(
                operands=[
                    [],
                    [],
                    [],
                    [],
                    [],
                    [],
                    chunk_op,
                ],
                regions=[Region(Block())],
            )
            if self.schedule is not None:
                wsloop.schedule_kind = omp.ScheduleKindAttr(
                    omp.ScheduleKind(self.schedule)
                )
            omp.TerminatorOp()

        rewriter.insertion_point = InsertPoint.at_end(wsloop.body.block)
        with ImplicitBuilder(rewriter):
            loop_nest = omp.LoopNestOp(
                operands=[
                    loop.lowerBound[:collapse],
                    loop.upperBound[:collapse],
                    loop.step[:collapse],
                ],
                regions=[Region(Block(arg_types=[IndexType()] * collapse))],
            )

        rewriter.insertion_point = InsertPoint.at_end(loop_nest.body.block)
        with ImplicitBuilder(rewriter):
            scope = memref.AllocaScopeOp(result_types=[[]], regions=[Region(Block())])
            omp.YieldOp()

        rewriter.insertion_point = InsertPoint.at_end(scope.scope.block)
        with ImplicitBuilder(rewriter):
            scope_terminator = memref.AllocaScopeReturnOp(operands=[[]])

        for newarg, oldarg in zip(
            loop_nest.body.block.args, loop.body.block.args[:collapse]
        ):
            oldarg.replace_all_uses_with(newarg)

        for _ in range(collapse):
            loop.body.block.erase_arg(loop.body.block.args[0])
        if collapse < len(loop.lowerBound):
            new_loop = scf.ParallelOp(
                lower_bounds=loop.lowerBound[collapse:],
                upper_bounds=loop.upperBound[collapse:],
                steps=loop.step[collapse:],
                body=loop.detach_region(loop.body),
            )
            new_ops = [new_loop]
        else:
            new_ops = [loop.body.block.detach_op(o) for o in loop.body.block.ops]
            last_op = new_ops.pop()
            rewriter.erase_op(last_op)
        rewriter.insert_op(new_ops, InsertPoint.before(scope_terminator))

        rewriter.replace_op(loop, parallel)

collapse: int | None instance-attribute

nested: bool instance-attribute

schedule: Literal['static', 'dynamic', 'auto'] | None instance-attribute

chunk: int | None instance-attribute

__init__(collapse: int | None, nested: bool, schedule: Literal['static', 'dynamic', 'auto'] | None, chunk: int | None) -> None

match_and_rewrite(loop: scf.ParallelOp, rewriter: PatternRewriter)

Source code in xdsl/transforms/convert_scf_to_openmp.py
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
@op_type_rewrite_pattern
def match_and_rewrite(self, loop: scf.ParallelOp, rewriter: PatternRewriter, /):
    if len(loop.initVals) > 0:
        # TODO Implement reduction, see https://github.com/xdslproject/xdsl/issues/1776
        return

    collapse = self.collapse
    if collapse is None or collapse > len(loop.lowerBound):
        collapse = len(loop.lowerBound)

    if not self.nested:
        parent = loop
        while (parent := parent.parent_op()) is not None:
            if isinstance(parent, omp.WsLoopOp):
                return

    parallel = omp.ParallelOp(
        regions=[Region(Block())],
        operands=[[], [], [], [], [], []],
    )
    rewriter.insertion_point = InsertPoint.at_end(parallel.region.block)
    with ImplicitBuilder(rewriter):
        if self.chunk is None:
            chunk_op = []
        else:
            self.schedule = "static"
            chunk_op = [
                arith.ConstantOp.from_int_and_width(self.chunk, IndexType())
            ]
        wsloop = omp.WsLoopOp(
            operands=[
                [],
                [],
                [],
                [],
                [],
                [],
                chunk_op,
            ],
            regions=[Region(Block())],
        )
        if self.schedule is not None:
            wsloop.schedule_kind = omp.ScheduleKindAttr(
                omp.ScheduleKind(self.schedule)
            )
        omp.TerminatorOp()

    rewriter.insertion_point = InsertPoint.at_end(wsloop.body.block)
    with ImplicitBuilder(rewriter):
        loop_nest = omp.LoopNestOp(
            operands=[
                loop.lowerBound[:collapse],
                loop.upperBound[:collapse],
                loop.step[:collapse],
            ],
            regions=[Region(Block(arg_types=[IndexType()] * collapse))],
        )

    rewriter.insertion_point = InsertPoint.at_end(loop_nest.body.block)
    with ImplicitBuilder(rewriter):
        scope = memref.AllocaScopeOp(result_types=[[]], regions=[Region(Block())])
        omp.YieldOp()

    rewriter.insertion_point = InsertPoint.at_end(scope.scope.block)
    with ImplicitBuilder(rewriter):
        scope_terminator = memref.AllocaScopeReturnOp(operands=[[]])

    for newarg, oldarg in zip(
        loop_nest.body.block.args, loop.body.block.args[:collapse]
    ):
        oldarg.replace_all_uses_with(newarg)

    for _ in range(collapse):
        loop.body.block.erase_arg(loop.body.block.args[0])
    if collapse < len(loop.lowerBound):
        new_loop = scf.ParallelOp(
            lower_bounds=loop.lowerBound[collapse:],
            upper_bounds=loop.upperBound[collapse:],
            steps=loop.step[collapse:],
            body=loop.detach_region(loop.body),
        )
        new_ops = [new_loop]
    else:
        new_ops = [loop.body.block.detach_op(o) for o in loop.body.block.ops]
        last_op = new_ops.pop()
        rewriter.erase_op(last_op)
    rewriter.insert_op(new_ops, InsertPoint.before(scope_terminator))

    rewriter.replace_op(loop, parallel)

ConvertScfToOpenMPPass dataclass

Bases: ModulePass

Convert scf.parallel loops to omp.wsloop constructs for parallel execution. It currently does not support reduction.

Arguments (all optional):

  • collapse : int: specify a positive number of loops to collapse. By default, the full dimensionality of converted parallel loops is collapsed. This argument allows to take a 2D loop and only OMPize the first dimension, for example.

  • nested: bool: Set this to true to convert nested parallel loops. This is rarely a good idea, and is disabled by default. Note that setting it to true mimics MLIR's convert-scf-to-openmp.

  • schedule: {"static", "dynamic", "auto"}: Set the schedule used by the OMP loop. By default, none is set, leaving the decision to MLIR's omp lowering. At the time of writing, this means static.

  • chunk: int: Set the chunk size used by the OMP loop. By default, none is set. Note that the OMP dialect does not support setting a chunk size without a schedule; Thus selecting a chunk size without a schedule will use the static schedule.

Source code in xdsl/transforms/convert_scf_to_openmp.py
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
@dataclass(frozen=True)
class ConvertScfToOpenMPPass(ModulePass):
    """
    Convert `scf.parallel` loops to `omp.wsloop` constructs for parallel execution.
    It currently does not support reduction.

    Arguments (all optional):

    - collapse : int: specify a positive number of loops to collapse. By default, the
    full dimensionality of converted parallel loops is collapsed. This argument
    allows to take a 2D loop and only OMPize the first dimension, for example.

    - nested: bool: Set this to true to convert nested parallel loops. This is
    rarely a good idea, and is disabled by default. Note that setting it to true mimics
    MLIR's convert-scf-to-openmp.

    - schedule: {"static", "dynamic", "auto"}: Set the schedule used by the OMP loop.
    By default, none is set, leaving the decision to MLIR's omp lowering. At the time
    of writing, this means static.

    - chunk: int: Set the chunk size used by the OMP loop. By default, none is set.
    Note that the OMP dialect does not support setting a chunk size without a schedule;
    Thus selecting a chunk size without a schedule will use the static schedule.
    """

    name = "convert-scf-to-openmp"

    collapse: int | None = None
    nested: bool = False
    schedule: Literal["static", "dynamic", "auto"] | None = None
    chunk: int | None = None

    def apply(self, ctx: Context, op: ModuleOp) -> None:
        PatternRewriteWalker(
            GreedyRewritePatternApplier(
                [
                    ConvertParallel(
                        self.collapse, self.nested, self.schedule, self.chunk
                    ),
                ]
            ),
            apply_recursively=False,
        ).rewrite_module(op)

name = 'convert-scf-to-openmp' class-attribute instance-attribute

collapse: int | None = None class-attribute instance-attribute

nested: bool = False class-attribute instance-attribute

schedule: Literal['static', 'dynamic', 'auto'] | None = None class-attribute instance-attribute

chunk: int | None = None class-attribute instance-attribute

__init__(collapse: int | None = None, nested: bool = False, schedule: Literal['static', 'dynamic', 'auto'] | None = None, chunk: int | None = None) -> None

apply(ctx: Context, op: ModuleOp) -> None

Source code in xdsl/transforms/convert_scf_to_openmp.py
150
151
152
153
154
155
156
157
158
159
160
def apply(self, ctx: Context, op: ModuleOp) -> None:
    PatternRewriteWalker(
        GreedyRewritePatternApplier(
            [
                ConvertParallel(
                    self.collapse, self.nested, self.schedule, self.chunk
                ),
            ]
        ),
        apply_recursively=False,
    ).rewrite_module(op)