Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
375066c
add compute in its current form
a-alveyblanc Nov 20, 2025
745f841
align compyte with inducer/main
a-alveyblanc Nov 20, 2025
8896644
clean up comments and typos
a-alveyblanc Nov 20, 2025
6230e11
switch to alpha namedisl usage
a-alveyblanc Dec 10, 2025
80839ba
start using namedisl in places other than compute
a-alveyblanc Dec 10, 2025
c1ba35b
add namedisl objects to a type signature
a-alveyblanc Dec 10, 2025
7265536
compute transform up to and including instruction creation + insertio…
a-alveyblanc Mar 16, 2026
56af4fe
invocation replacement; dependencies still need handling
a-alveyblanc Mar 17, 2026
9d12183
rough sketch of compute transform; inames not schedulable because of …
a-alveyblanc Mar 18, 2026
8667a01
compute working for tiled matmul; write race condition warning
a-alveyblanc Mar 18, 2026
959e68b
add compute matmul example
a-alveyblanc Mar 18, 2026
a566b6e
clean up compute matmul example
a-alveyblanc Mar 18, 2026
cf920b5
improve matmul example with more parameters, better post-compute tran…
a-alveyblanc Mar 19, 2026
4c7d1ac
add 2.5D FD example base; minor stylistic changes
a-alveyblanc Mar 20, 2026
781df54
improvements to 2.5D example; bug fixes in compute transform
a-alveyblanc Mar 23, 2026
cccc6a1
Feedback from meeting
inducer Mar 23, 2026
6a7f719
update footprint finding to minimize projections, find bounds
a-alveyblanc Mar 24, 2026
aa8b612
add/remove FIXMEs
a-alveyblanc Mar 24, 2026
907873c
add tiled matmul example as test; islpy -> namedisl
a-alveyblanc Apr 6, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
166 changes: 166 additions & 0 deletions examples/python/compute-examples/compute-tiled-matmul.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
import namedisl as nisl

import loopy as lp
from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2
from loopy.transform.compute import compute

import numpy as np
import numpy.linalg as la
import pyopencl as cl


def main(
M: int = 128,
N: int = 128,
K: int = 128,
bm: int = 32,
bn: int = 32,
bk: int = 16,
run_sequentially: bool = False,
use_precompute: bool = False,
use_compute: bool = False,
run_kernel: bool = False,
print_kernel: bool = False,
print_device_code: bool = False
) -> None:

knl = lp.make_kernel(
"{ [i, j, k] : 0 <= i < M and 0 <= j < N and 0 <= k < K }",
"""
a_(is, ks) := a[is, ks]
b_(ks, js) := b[ks, js]
c[i, j] = sum([k], a_(i, k) * b_(k, j))
""",
[
lp.GlobalArg("a", shape=(M, K), dtype=np.float64),
lp.GlobalArg("b", shape=(K, N), dtype=np.float64),
lp.GlobalArg("c", shape=(M, N), dtype=np.float64,
is_output=True)
]
)

# FIXME: without this, there are complaints about in-bounds access
# guarantees for the instruction that stores into c
knl = lp.fix_parameters(knl, M=M, N=N, K=K)

# shared memory tile-level splitting
knl = lp.split_iname(knl, "i", bm, inner_iname="ii", outer_iname="io")
knl = lp.split_iname(knl, "j", bn, inner_iname="ji", outer_iname="jo")
knl = lp.split_iname(knl, "k", bk, inner_iname="ki", outer_iname="ko")

compute_map_a = nisl.make_map(f"""{{
[is, ks] -> [ii_s, io, ki_s, ko] :
is = io * {bm} + ii_s and
ks = ko * {bk} + ki_s
}}""")

compute_map_b = nisl.make_map(f"""{{
[ks, js] -> [ki_s, ko, ji_s, jo] :
js = jo * {bn} + ji_s and
ks = ko * {bk} + ki_s
}}""")

if use_compute:
knl = compute(
knl,
"a_",
compute_map=compute_map_a,
storage_indices=["ii_s", "ki_s"],
temporal_inames=["io", "ko", "jo"],
temporary_address_space=lp.AddressSpace.LOCAL,
temporary_dtype=np.float64
)

knl = compute(
knl,
"b_",
compute_map=compute_map_b,
storage_indices=["ki_s", "ji_s"],
temporal_inames=["io", "ko", "jo"],
temporary_address_space=lp.AddressSpace.LOCAL,
temporary_dtype=np.float64
)

if use_precompute:
knl = lp.precompute(
knl,
"a_",
sweep_inames=["ii", "ki"],
)

if not run_sequentially:
knl = lp.tag_inames(
knl, {
"io" : "g.0", # outer block loop over block rows
"jo" : "g.1", # outer block loop over block cols

"ii" : "l.0", # inner block loop over rows
"ji" : "l.1", # inner block loop over cols

"ii_s" : "l.0", # inner storage loop over a rows
"ji_s" : "l.0", # inner storage loop over b cols
"ki_s" : "l.1" # inner storage loop over a cols / b rows
}
)

knl = lp.add_inames_for_unused_hw_axes(knl)

if run_kernel:
a = np.random.randn(M, K)
b = np.random.randn(K, N)

ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

ex = knl.executor(ctx)
_, out = ex(queue, a=a, b=b)

print(20*"=", "Tiled matmul report", 20*"=")
print(f"Problem size: M = {M:-4}, N = {N:-4}, K = {K:-4}")
print(f"Tile size : BM = {bm:-4}, BN = {bn:-4}, BK = {bk:-4}")
print(f"Relative error = {la.norm((a @ b) - out) / la.norm(a @ b)}")
print((40 + len(" Tiled matmul report "))*"=")

if print_device_code:
print(lp.generate_code_v2(knl).device_code())

if print_kernel:
print(knl)


if __name__ == "__main__":
import argparse

parser = argparse.ArgumentParser()

_ = parser.add_argument("--precompute", action="store_true")
_ = parser.add_argument("--compute", action="store_true")
_ = parser.add_argument("--run-kernel", action="store_true")
_ = parser.add_argument("--print-kernel", action="store_true")
_ = parser.add_argument("--print-device-code", action="store_true")
_ = parser.add_argument("--run-sequentially", action="store_true")

_ = parser.add_argument("--m", action="store", type=int, default=128)
_ = parser.add_argument("--n", action="store", type=int, default=128)
_ = parser.add_argument("--k", action="store", type=int, default=128)

_ = parser.add_argument("--bm", action="store", type=int, default=32)
_ = parser.add_argument("--bn", action="store", type=int, default=32)
_ = parser.add_argument("--bk", action="store", type=int, default=16)

args = parser.parse_args()

main(
M=args.m,
N=args.n,
K=args.k,
bm=args.bm,
bn=args.bn,
bk=args.bk,
use_precompute=args.precompute,
use_compute=args.compute,
run_kernel=args.run_kernel,
print_kernel=args.print_kernel,
print_device_code=args.print_device_code,
run_sequentially=args.run_sequentially
)
131 changes: 131 additions & 0 deletions examples/python/compute-examples/finite-difference-2-5D.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
import loopy as lp
from loopy.version import LOOPY_USE_LANGUAGE_VERSION_2018_2
from loopy.transform.compute import compute

import namedisl as nisl

import numpy as np
import numpy.linalg as la

import pyopencl as cl


# FIXME: more complicated function, or better yet define a set of functions
# with sympy and get the exact laplacian symbolically
def f(x, y, z):
return x**2 + y**2 + z**2


def laplacian_f(x, y, z):
return 6 * np.ones_like(x)


def main(
use_compute: bool = False,
print_device_code: bool = False,
print_kernel: bool = False
) -> None:
npts = 64
pts = np.linspace(-1, 1, num=npts, endpoint=True)
h = pts[1] - pts[0]

x, y, z = np.meshgrid(*(pts,)*3)

dtype = np.float32
x = x.reshape(*(npts,)*3).astype(np.float32)
y = y.reshape(*(npts,)*3).astype(np.float32)
z = z.reshape(*(npts,)*3).astype(np.float32)

f_ = f(x, y, z)
lap_fd = np.zeros_like(f_)
c = (np.array([-1/12, 4/3, -5/2, 4/3, -1/12]) / h**2).astype(dtype)

m = 5
r = m // 2

bm = bn = m

# FIXME: the usage on the k dimension is incorrect since we are only testing
# tiling (i, j) planes
knl = lp.make_kernel(
"{ [i, j, k, l] : r <= i, j, k < npts - r and -r <= l < r + 1 }",
"""
u_(is, js, ks) := u[is, js, ks]

lap_u[i,j,k] = sum(
[l],
c[l+r] * (u_(i-l,j,k) + u_(i,j-l,k) + u[i,j,k-l])
)
""",
[
lp.GlobalArg("u", dtype=dtype, shape=(npts,npts,npts)),
lp.GlobalArg("lap_u", dtype=dtype, shape=(npts,npts,npts),
is_output=True),
lp.GlobalArg("c", dtype=dtype, shape=(m))
]
)

knl = lp.fix_parameters(knl, npts=npts, r=r)

knl = lp.split_iname(knl, "i", bm, inner_iname="ii", outer_iname="io")
knl = lp.split_iname(knl, "j", bn, inner_iname="ji", outer_iname="jo")

# FIXME: need to split k dimension

if use_compute:
compute_map = nisl.make_map(
f"""
{{
[is, js, ks] -> [io, ii_s, jo, ji_s, k] :
is = io * {bm} + ii_s - {r} and
js = jo * {bn} + ji_s - {r} and
ks = k
}}
"""
)

knl = compute(
knl,
"u_",
compute_map=compute_map,
storage_indices=["ii_s", "ji_s"],
temporal_inames=["io", "jo", "k"],
temporary_name="u_compute",
temporary_address_space=lp.AddressSpace.LOCAL,
temporary_dtype=np.float32
)

if print_device_code:
print(lp.generate_code_v2(knl).device_code())

if print_kernel:
print(knl)

ctx = cl.create_some_context()
queue = cl.CommandQueue(ctx)

ex = knl.executor(queue)
_, lap_fd = ex(queue, u=f(x, y, z), c=c)

lap_true = laplacian_f(x, y, z)
sl = (slice(r, npts - r),)*3

print(la.norm(lap_true[sl] - lap_fd[0][sl]) / la.norm(lap_true[sl]))


if __name__ == "__main__":
import argparse

parser = argparse.ArgumentParser()

_ = parser.add_argument("--compute", action="store_true")
_ = parser.add_argument("--print-device-code", action="store_true")
_ = parser.add_argument("--print-kernel", action="store_true")

args = parser.parse_args()

main(
use_compute=args.compute,
print_device_code=args.print_device_code,
print_kernel=args.print_kernel
)
40 changes: 28 additions & 12 deletions loopy/symbolic.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@
from constantdict import constantdict
from typing_extensions import Self, override

import namedisl as nisl
import islpy as isl

Check failure on line 52 in loopy/symbolic.py

View workflow job for this annotation

GitHub Actions / basedpyright

Import "namedisl" could not be resolved (reportMissingImports)
import pymbolic.primitives as p
import pytools.lex
from pymbolic.mapper import (
Expand Down Expand Up @@ -2044,45 +2045,60 @@

# {{{ (pw)aff to expr conversion

def aff_to_expr(aff: isl.Aff) -> ArithmeticExpression:
def aff_to_expr(aff: isl.Aff | nisl.Aff) -> ArithmeticExpression:
from pymbolic import var

# FIXME: remove this once namedisl is the standard in loopy
denom = aff.get_denominator_val().to_python()

result = (aff.get_constant_val()*denom).to_python()
for dt in [isl.dim_type.in_, isl.dim_type.param]:
for i in range(aff.dim(dt)):
coeff = (aff.get_coefficient_val(dt, i)*denom).to_python()
if isinstance(aff, isl.Aff):
for dt in [isl.dim_type.in_, isl.dim_type.param]:
for i in range(aff.dim(dt)):
coeff = (aff.get_coefficient_val(dt, i)*denom).to_python()
if coeff:
dim_name = not_none(aff.get_dim_name(dt, i))
result += coeff*var(dim_name)

for i in range(aff.dim(isl.dim_type.div)):
coeff = (aff.get_coefficient_val(isl.dim_type.div, i)*denom).to_python()
if coeff:
result += coeff*aff_to_expr(aff.get_div(i))

else:
in_names = set(aff.dim_type_names(isl.dim_type.in_))
param_names = set(aff.dim_type_names(isl.dim_type.param))

for name in in_names | param_names:
coeff = (aff.get_coefficient_val(name) * denom).to_python()
if coeff:
dim_name = not_none(aff.get_dim_name(dt, i))
result += coeff*var(dim_name)
result = coeff * var(name)

for i in range(aff.dim(isl.dim_type.div)):
coeff = (aff.get_coefficient_val(isl.dim_type.div, i)*denom).to_python()
if coeff:
result += coeff*aff_to_expr(aff.get_div(i))
for name in aff.dim_type_names(isl.dim_type.div):
coeff = (aff.get_coefficient_val(name) * denom).to_python()
if coeff:
result += coeff * aff_to_expr(aff.get_div(name))

assert not isinstance(result, complex)

Check warning on line 2081 in loopy/symbolic.py

View workflow job for this annotation

GitHub Actions / basedpyright

Type of "Aff" is unknown (reportUnknownMemberType)

Check warning on line 2081 in loopy/symbolic.py

View workflow job for this annotation

GitHub Actions / basedpyright

Type of parameter "aff" is partially unknown   Parameter type is "Aff | Unknown" (reportUnknownParameterType)
return flatten(result // denom)


def pw_aff_to_expr(

Check warning on line 2085 in loopy/symbolic.py

View workflow job for this annotation

GitHub Actions / basedpyright

Type of "to_python" is partially unknown   Type of "to_python" is "(() -> int) | Unknown" (reportUnknownMemberType)

Check warning on line 2085 in loopy/symbolic.py

View workflow job for this annotation

GitHub Actions / basedpyright

Type of "get_denominator_val" is partially unknown   Type of "get_denominator_val" is "(() -> Val) | Unknown" (reportUnknownMemberType)
pw_aff: int | isl.PwAff | isl.Aff,
pw_aff: int | isl.PwAff | isl.Aff | nisl.PwAff | nisl.Aff,

Check warning on line 2086 in loopy/symbolic.py

View workflow job for this annotation

GitHub Actions / basedpyright

Type of "get_constant_val" is partially unknown   Type of "get_constant_val" is "(() -> Val) | Unknown" (reportUnknownMemberType)

Check warning on line 2086 in loopy/symbolic.py

View workflow job for this annotation

GitHub Actions / basedpyright

Type of "to_python" is partially unknown   Type of "to_python" is "(() -> int) | Unknown" (reportUnknownMemberType)
int_ok: bool = False
) -> ArithmeticExpression:
if isinstance(pw_aff, int):
if not int_ok:

Check warning on line 2090 in loopy/symbolic.py

View workflow job for this annotation

GitHub Actions / basedpyright

Type of "to_python" is partially unknown   Type of "to_python" is "(() -> int) | Unknown" (reportUnknownMemberType)
warn(f"expected PwAff, got int: {pw_aff}", stacklevel=2)

return pw_aff

pieces = pw_aff.get_pieces()
last_expr = aff_to_expr(pieces[-1][1])

Check warning on line 2096 in loopy/symbolic.py

View workflow job for this annotation

GitHub Actions / basedpyright

Type of "to_python" is partially unknown   Type of "to_python" is "(() -> int) | Unknown" (reportUnknownMemberType)

pairs = [(set_to_cond_expr(constr_set), aff_to_expr(aff))
for constr_set, aff in pieces[:-1]]

from pymbolic.primitives import If

Check warning on line 2101 in loopy/symbolic.py

View workflow job for this annotation

GitHub Actions / basedpyright

Argument type is unknown   Argument corresponds to parameter "iterable" in function "__init__" (reportUnknownArgumentType)

Check warning on line 2101 in loopy/symbolic.py

View workflow job for this annotation

GitHub Actions / basedpyright

Type of "dim_type_names" is unknown (reportUnknownMemberType)

expr = last_expr
for condition, then_expr in reversed(pairs):
Expand Down
Loading
Loading