From 746f251f0b30c98615858d54782a12b2f21651e6 Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Thu, 12 Mar 2026 10:22:23 +0000
Subject: [PATCH 01/25] Add memory64 and table64 support to the Canonical ABI

Parameterize the Canonical ABI to handle 32-bit or 64-bit memory
addresses and table indices. This is done by adding two new fields to
`LiftOptions` to indicate if the `memory64`/`table64` feature is being
used in a core module.
---
 design/mvp/CanonicalABI.md              | 382 +++++++++++++-----------
 design/mvp/Concurrency.md               |   8 +-
 design/mvp/Explainer.md                 | 135 +++++----
 design/mvp/canonical-abi/definitions.py | 322 ++++++++++----------
 design/mvp/canonical-abi/run_tests.py   | 287 +++++++++++-------
 test/wasm-tools/memory64.wast           |  19 +-
 6 files changed, 645 insertions(+), 508 deletions(-)

diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index 32fb81fd..0bf9eae9 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -246,14 +246,44 @@ when lifting individual parameters and results:
 class LiftOptions:
   string_encoding: str = 'utf8'
   memory: Optional[bytearray] = None
+  addr_type: str = 'i32'
+  tbl_idx_type: str = 'i32'
 
   def equal(lhs, rhs):
     return lhs.string_encoding == rhs.string_encoding and \
-           lhs.memory is rhs.memory
+           lhs.memory is rhs.memory and \
+           lhs.addr_type == rhs.addr_type and \
+           lhs.tbl_idx_type == rhs.tbl_idx_type
 ```
 The `equal` static method is used by `task.return` below to dynamically
 compare equality of just this subset of `canonopt`.
 
+The `addr_type` is `'i32'` when the `memory` canonopt refers to a memory32
+and `'i64'` when it refers to a memory64. The `tbl_idx_type` is `'i32'` when
+the `table` canonopt refers to a table32 and `'i64'` when it refers to a
+table64. These two dimensions are independent (e.g., a 64-bit memory with
+32-bit table indices is valid).
+
+The following helper functions return the byte size and core value type of
+memory pointers and table indices, based on the options:
+```python
+def ptr_size(opts):
+  match opts.addr_type:
+    case 'i32': return 4
+    case 'i64': return 8
+
+def ptr_type(opts):
+  return opts.addr_type
+
+def idx_size(opts):
+  match opts.tbl_idx_type:
+    case 'i32': return 4
+    case 'i64': return 8
+
+def idx_type(opts):
+  return opts.tbl_idx_type
+```
+
 The `LiftLowerOptions` class contains the subset of [`canonopt`] which are
 needed when lifting *or* lowering individual parameters and results:
 ```python
@@ -438,10 +468,11 @@ The other fields of `ComponentInstance` are described below as they are used.
 #### Table State
 
 The `Table` class encapsulates a mutable, growable array of opaque elements
-that are represented in Core WebAssembly as `i32` indices into the array.
-Currently, every component instance contains two tables: a `threads` table
-containing all the component's [threads](#thread-state) and a `handles`
-table containing everything else ([resource handles](#resource-state),
+that are represented in Core WebAssembly as `i32` or `i64` indices into the
+array (based on the `tbl_idx_type` canonopt). Currently, every component
+instance contains two tables: a `threads` table containing all the
+component's [threads](#thread-state) and a `handles` table containing
+everything else ([resource handles](#resource-state),
 [waitables and waitable sets](#waitable-state) and
 [error contexts](#-canon-error-contextnew)).
 ```python
@@ -490,9 +521,9 @@ that are used in preference to growing the table. The free list is represented
 as a Python list here, but an optimizing implementation could instead store the
 free list in the free elements of `array`.
 
-The limit of `2**28` ensures that the high 2 bits of table indices are unset
-and available for other use in guest code (e.g., for tagging, packed words or
-sentinel values).
+The limit of `2**28` ensures that the high 4 bits of `i32` table indices are
+unset and available for other use in guest code (e.g., for tagging, packed
+words or sentinel values).
 
 
 #### Resource State
@@ -1355,8 +1386,8 @@ class BufferGuestImpl(Buffer):
   def __init__(self, t, cx, ptr, length):
     trap_if(length > Buffer.MAX_LENGTH)
     if t and length > 0:
-      trap_if(ptr != align_to(ptr, alignment(t)))
-      trap_if(ptr + length * elem_size(t) > len(cx.opts.memory))
+      trap_if(ptr != align_to(ptr, alignment(t, cx.opts)))
+      trap_if(ptr + length * elem_size(t, cx.opts) > len(cx.opts.memory))
     self.cx = cx
     self.t = t
     self.ptr = ptr
@@ -1374,7 +1405,7 @@ class ReadableBufferGuestImpl(BufferGuestImpl):
     assert(n <= self.remain())
     if self.t:
       vs = load_list_from_valid_range(self.cx, self.ptr, n, self.t)
-      self.ptr += n * elem_size(self.t)
+      self.ptr += n * elem_size(self.t, self.cx.opts)
     else:
       vs = n * [()]
     self.progress += n
@@ -1385,7 +1416,7 @@ class WritableBufferGuestImpl(BufferGuestImpl, WritableBuffer):
     assert(len(vs) <= self.remain())
     if self.t:
       store_list_into_valid_range(self.cx, vs, self.ptr, self.t)
-      self.ptr += len(vs) * elem_size(self.t)
+      self.ptr += len(vs) * elem_size(self.t, self.cx.opts)
     else:
       assert(all(v == () for v in vs))
     self.progress += len(vs)
@@ -1860,7 +1891,7 @@ Each value type is assigned an [alignment] which is used by subsequent
 Canonical ABI definitions. Presenting the definition of `alignment` piecewise,
 we start with the top-level case analysis:
 ```python
-def alignment(t):
+def alignment(t, opts):
   match despecialize(t):
     case BoolType()                  : return 1
     case S8Type() | U8Type()         : return 1
@@ -1870,31 +1901,31 @@ def alignment(t):
     case F32Type()                   : return 4
     case F64Type()                   : return 8
     case CharType()                  : return 4
-    case StringType()                : return 4
-    case ErrorContextType()          : return 4
-    case ListType(t, l)              : return alignment_list(t, l)
-    case RecordType(fields)          : return alignment_record(fields)
-    case VariantType(cases)          : return alignment_variant(cases)
+    case StringType()                : return ptr_size(opts)
+    case ErrorContextType()          : return idx_size(opts)
+    case ListType(t, l)              : return alignment_list(t, l, opts)
+    case RecordType(fields)          : return alignment_record(fields, opts)
+    case VariantType(cases)          : return alignment_variant(cases, opts)
     case FlagsType(labels)           : return alignment_flags(labels)
-    case OwnType() | BorrowType()    : return 4
-    case StreamType() | FutureType() : return 4
+    case OwnType() | BorrowType()    : return idx_size(opts)
+    case StreamType() | FutureType() : return idx_size(opts)
 ```
 
 List alignment is the same as tuple alignment when the length is fixed and
 otherwise uses the alignment of pointers.
 ```python
-def alignment_list(elem_type, maybe_length):
+def alignment_list(elem_type, maybe_length, opts):
   if maybe_length is not None:
-    return alignment(elem_type)
-  return 4
+    return alignment(elem_type, opts)
+  return ptr_size(opts)
 ```
 
 Record alignment is tuple alignment, with the definitions split for reuse below:
 ```python
-def alignment_record(fields):
+def alignment_record(fields, opts):
   a = 1
   for f in fields:
-    a = max(a, alignment(f.t))
+    a = max(a, alignment(f.t, opts))
   return a
 ```
 
@@ -1904,8 +1935,8 @@ covering the number of cases in the variant (with cases numbered in order from
 compact representations of variants in memory. This smallest integer type is
 selected by the following function, used above and below:
 ```python
-def alignment_variant(cases):
-  return max(alignment(discriminant_type(cases)), max_case_alignment(cases))
+def alignment_variant(cases, opts):
+  return max(alignment(discriminant_type(cases), opts), max_case_alignment(cases, opts))
 
 def discriminant_type(cases):
   n = len(cases)
@@ -1916,11 +1947,11 @@ def discriminant_type(cases):
     case 2: return U16Type()
     case 3: return U32Type()
 
-def max_case_alignment(cases):
+def max_case_alignment(cases, opts):
   a = 1
   for c in cases:
     if c.t is not None:
-      a = max(a, alignment(c.t))
+      a = max(a, alignment(c.t, opts))
   return a
 ```
 
@@ -1946,7 +1977,7 @@ maps well to languages which represent `list`s as random-access arrays. Empty
 types, such as records with no fields, are not permitted, to avoid
 complications in source languages.
 ```python
-def elem_size(t):
+def elem_size(t, opts):
   match despecialize(t):
     case BoolType()                  : return 1
     case S8Type() | U8Type()         : return 1
@@ -1956,40 +1987,40 @@ def elem_size(t):
     case F32Type()                   : return 4
     case F64Type()                   : return 8
     case CharType()                  : return 4
-    case StringType()                : return 8
-    case ErrorContextType()          : return 4
-    case ListType(t, l)              : return elem_size_list(t, l)
-    case RecordType(fields)          : return elem_size_record(fields)
-    case VariantType(cases)          : return elem_size_variant(cases)
+    case StringType()                : return 2 * ptr_size(opts)
+    case ErrorContextType()          : return idx_size(opts)
+    case ListType(t, l)              : return elem_size_list(t, l, opts)
+    case RecordType(fields)          : return elem_size_record(fields, opts)
+    case VariantType(cases)          : return elem_size_variant(cases, opts)
     case FlagsType(labels)           : return elem_size_flags(labels)
-    case OwnType() | BorrowType()    : return 4
-    case StreamType() | FutureType() : return 4
+    case OwnType() | BorrowType()    : return idx_size(opts)
+    case StreamType() | FutureType() : return idx_size(opts)
 
-def elem_size_list(elem_type, maybe_length):
+def elem_size_list(elem_type, maybe_length, opts):
   if maybe_length is not None:
-    return maybe_length * elem_size(elem_type)
-  return 8
+    return maybe_length * elem_size(elem_type, opts)
+  return 2 * ptr_size(opts)
 
-def elem_size_record(fields):
+def elem_size_record(fields, opts):
   s = 0
   for f in fields:
-    s = align_to(s, alignment(f.t))
-    s += elem_size(f.t)
+    s = align_to(s, alignment(f.t, opts))
+    s += elem_size(f.t, opts)
   assert(s > 0)
-  return align_to(s, alignment_record(fields))
+  return align_to(s, alignment_record(fields, opts))
 
 def align_to(ptr, alignment):
   return math.ceil(ptr / alignment) * alignment
 
-def elem_size_variant(cases):
-  s = elem_size(discriminant_type(cases))
-  s = align_to(s, max_case_alignment(cases))
+def elem_size_variant(cases, opts):
+  s = elem_size(discriminant_type(cases), opts)
+  s = align_to(s, max_case_alignment(cases, opts))
   cs = 0
   for c in cases:
     if c.t is not None:
-      cs = max(cs, elem_size(c.t))
+      cs = max(cs, elem_size(c.t, opts))
   s += cs
-  return align_to(s, alignment_variant(cases))
+  return align_to(s, alignment_variant(cases, opts))
 
 def elem_size_flags(labels):
   n = len(labels)
@@ -2007,8 +2038,8 @@ as a Python value. Presenting the definition of `load` piecewise, we start with
 the top-level case analysis:
 ```python
 def load(cx, ptr, t):
-  assert(ptr == align_to(ptr, alignment(t)))
-  assert(ptr + elem_size(t) <= len(cx.opts.memory))
+  assert(ptr == align_to(ptr, alignment(t, cx.opts)))
+  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory))
   match despecialize(t):
     case BoolType()         : return convert_int_to_bool(load_int(cx, ptr, 1))
     case U8Type()           : return load_int(cx, ptr, 1)
@@ -2023,15 +2054,15 @@ def load(cx, ptr, t):
     case F64Type()          : return decode_i64_as_float(load_int(cx, ptr, 8))
     case CharType()         : return convert_i32_to_char(cx, load_int(cx, ptr, 4))
     case StringType()       : return load_string(cx, ptr)
-    case ErrorContextType() : return lift_error_context(cx, load_int(cx, ptr, 4))
+    case ErrorContextType() : return lift_error_context(cx, load_int(cx, ptr, idx_size(cx.opts)))
     case ListType(t, l)     : return load_list(cx, ptr, t, l)
     case RecordType(fields) : return load_record(cx, ptr, fields)
     case VariantType(cases) : return load_variant(cx, ptr, cases)
     case FlagsType(labels)  : return load_flags(cx, ptr, labels)
-    case OwnType()          : return lift_own(cx, load_int(cx, ptr, 4), t)
-    case BorrowType()       : return lift_borrow(cx, load_int(cx, ptr, 4), t)
-    case StreamType(t)      : return lift_stream(cx, load_int(cx, ptr, 4), t)
-    case FutureType(t)      : return lift_future(cx, load_int(cx, ptr, 4), t)
+    case OwnType()          : return lift_own(cx, load_int(cx, ptr, idx_size(cx.opts)), t)
+    case BorrowType()       : return lift_borrow(cx, load_int(cx, ptr, idx_size(cx.opts)), t)
+    case StreamType(t)      : return lift_stream(cx, load_int(cx, ptr, idx_size(cx.opts)), t)
+    case FutureType(t)      : return lift_future(cx, load_int(cx, ptr, idx_size(cx.opts)), t)
 ```
 
 Integers are loaded directly from memory, with their high-order bit interpreted
@@ -2098,24 +2129,29 @@ def convert_i32_to_char(cx, i):
   return chr(i)
 ```
 
-Strings are loaded from two `i32` values: a pointer (offset in linear memory)
-and a number of [code units]. There are three supported string encodings in
-[`canonopt`]: [UTF-8], [UTF-16] and `latin1+utf16`. This last options allows a
-*dynamic* choice between [Latin-1] and UTF-16, indicated by the high bit of the
-second `i32`. String values include their original encoding and length in
-tagged code units as a "hint" that enables `store_string` (defined below) to
-make better up-front allocation size choices in many cases. Thus, the value
-produced by `load_string` isn't simply a Python `str`, but a *tuple* containing
-a `str`, the original encoding and the number of source code units.
+Strings are loaded from two pointer-sized values: a pointer (offset in linear
+memory) and a number of [code units]. There are three supported string
+encodings in [`canonopt`]: [UTF-8], [UTF-16] and `latin1+utf16`. This last
+option allows a *dynamic* choice between [Latin-1] and UTF-16, indicated by
+the high bit of the second pointer-sized value. String values include their
+original encoding and length in tagged code units as a "hint" that enables
+`store_string` (defined below) to make better up-front allocation size choices
+in many cases. Thus, the value produced by `load_string` isn't simply a Python
+`str`, but a *tuple* containing a `str`, the original encoding and the number
+of source code units.
 ```python
 String = tuple[str, str, int]
 
 def load_string(cx, ptr) -> String:
-  begin = load_int(cx, ptr, 4)
-  tagged_code_units = load_int(cx, ptr + 4, 4)
+  begin = load_int(cx, ptr, ptr_size(cx.opts))
+  tagged_code_units = load_int(cx, ptr + ptr_size(cx.opts), ptr_size(cx.opts))
   return load_string_from_range(cx, begin, tagged_code_units)
 
-UTF16_TAG = 1 << 31
+def utf16_tag(opts):
+  return 1 << (ptr_size(opts) * 8 - 1)
+
+def max_string_byte_length(opts):
+  return (1 << (ptr_size(opts) * 8 - 1)) - 1
 
 def load_string_from_range(cx, ptr, tagged_code_units) -> String:
   match cx.opts.string_encoding:
@@ -2129,8 +2165,8 @@ def load_string_from_range(cx, ptr, tagged_code_units) -> String:
       encoding = 'utf-16-le'
     case 'latin1+utf16':
       alignment = 2
-      if bool(tagged_code_units & UTF16_TAG):
-        byte_length = 2 * (tagged_code_units ^ UTF16_TAG)
+      if bool(tagged_code_units & utf16_tag(cx.opts)):
+        byte_length = 2 * (tagged_code_units ^ utf16_tag(cx.opts))
         encoding = 'utf-16-le'
       else:
         byte_length = tagged_code_units
@@ -2160,27 +2196,27 @@ Lists and records are loaded by recursively loading their elements/fields:
 def load_list(cx, ptr, elem_type, maybe_length):
   if maybe_length is not None:
     return load_list_from_valid_range(cx, ptr, maybe_length, elem_type)
-  begin = load_int(cx, ptr, 4)
-  length = load_int(cx, ptr + 4, 4)
+  begin = load_int(cx, ptr, ptr_size(cx.opts))
+  length = load_int(cx, ptr + ptr_size(cx.opts), ptr_size(cx.opts))
   return load_list_from_range(cx, begin, length, elem_type)
 
 def load_list_from_range(cx, ptr, length, elem_type):
-  trap_if(ptr != align_to(ptr, alignment(elem_type)))
-  trap_if(ptr + length * elem_size(elem_type) > len(cx.opts.memory))
+  trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts)))
+  trap_if(ptr + length * elem_size(elem_type, cx.opts) > len(cx.opts.memory))
   return load_list_from_valid_range(cx, ptr, length, elem_type)
 
 def load_list_from_valid_range(cx, ptr, length, elem_type):
   a = []
   for i in range(length):
-    a.append(load(cx, ptr + i * elem_size(elem_type), elem_type))
+    a.append(load(cx, ptr + i * elem_size(elem_type, cx.opts), elem_type))
   return a
 
 def load_record(cx, ptr, fields):
   record = {}
   for field in fields:
-    ptr = align_to(ptr, alignment(field.t))
+    ptr = align_to(ptr, alignment(field.t, cx.opts))
     record[field.label] = load(cx, ptr, field.t)
-    ptr += elem_size(field.t)
+    ptr += elem_size(field.t, cx.opts)
   return record
 ```
 As a technical detail: the `align_to` in the loop in `load_record` is
@@ -2194,12 +2230,12 @@ implementation can build the appropriate index tables at compile-time so that
 variant-passing is always O(1) and not involving string operations.
 ```python
 def load_variant(cx, ptr, cases):
-  disc_size = elem_size(discriminant_type(cases))
+  disc_size = elem_size(discriminant_type(cases), cx.opts)
   case_index = load_int(cx, ptr, disc_size)
   ptr += disc_size
   trap_if(case_index >= len(cases))
   c = cases[case_index]
-  ptr = align_to(ptr, max_case_alignment(cases))
+  ptr = align_to(ptr, max_case_alignment(cases, cx.opts))
   if c.t is None:
     return { c.label: None }
   return { c.label: load(cx, ptr, c.t) }
@@ -2291,8 +2327,8 @@ The `store` function defines how to write a value `v` of a given value type
 `store` piecewise, we start with the top-level case analysis:
 ```python
 def store(cx, v, t, ptr):
-  assert(ptr == align_to(ptr, alignment(t)))
-  assert(ptr + elem_size(t) <= len(cx.opts.memory))
+  assert(ptr == align_to(ptr, alignment(t, cx.opts)))
+  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory))
   match despecialize(t):
     case BoolType()         : store_int(cx, int(bool(v)), ptr, 1)
     case U8Type()           : store_int(cx, v, ptr, 1)
@@ -2307,15 +2343,15 @@ def store(cx, v, t, ptr):
     case F64Type()          : store_int(cx, encode_float_as_i64(v), ptr, 8)
     case CharType()         : store_int(cx, char_to_i32(v), ptr, 4)
     case StringType()       : store_string(cx, v, ptr)
-    case ErrorContextType() : store_int(cx, lower_error_context(cx, v), ptr, 4)
+    case ErrorContextType() : store_int(cx, lower_error_context(cx, v), ptr, idx_size(cx.opts))
     case ListType(t, l)     : store_list(cx, v, ptr, t, l)
     case RecordType(fields) : store_record(cx, v, ptr, fields)
     case VariantType(cases) : store_variant(cx, v, ptr, cases)
     case FlagsType(labels)  : store_flags(cx, v, ptr, labels)
-    case OwnType()          : store_int(cx, lower_own(cx, v, t), ptr, 4)
-    case BorrowType()       : store_int(cx, lower_borrow(cx, v, t), ptr, 4)
-    case StreamType(t)      : store_int(cx, lower_stream(cx, v, t), ptr, 4)
-    case FutureType(t)      : store_int(cx, lower_future(cx, v, t), ptr, 4)
+    case OwnType()          : store_int(cx, lower_own(cx, v, t), ptr, idx_size(cx.opts))
+    case BorrowType()       : store_int(cx, lower_borrow(cx, v, t), ptr, idx_size(cx.opts))
+    case StreamType(t)      : store_int(cx, lower_stream(cx, v, t), ptr, idx_size(cx.opts))
+    case FutureType(t)      : store_int(cx, lower_future(cx, v, t), ptr, idx_size(cx.opts))
 ```
 
 Integers are stored directly into memory. Because the input domain is exactly
@@ -2405,20 +2441,20 @@ original encoding and number of source [code units]. From this hint data,
 
 We start with a case analysis to enumerate all the meaningful encoding
 combinations, subdividing the `latin1+utf16` encoding into either `latin1` or
-`utf16` based on the `UTF16_TAG` flag set by `load_string`:
+`utf16` based on the `utf16_tag` flag set by `load_string`:
 ```python
 def store_string(cx, v: String, ptr):
   begin, tagged_code_units = store_string_into_range(cx, v)
-  store_int(cx, begin, ptr, 4)
-  store_int(cx, tagged_code_units, ptr + 4, 4)
+  store_int(cx, begin, ptr, ptr_size(cx.opts))
+  store_int(cx, tagged_code_units, ptr + ptr_size(cx.opts), ptr_size(cx.opts))
 
 def store_string_into_range(cx, v: String):
   src, src_encoding, src_tagged_code_units = v
 
   if src_encoding == 'latin1+utf16':
-    if bool(src_tagged_code_units & UTF16_TAG):
+    if bool(src_tagged_code_units & utf16_tag(cx.opts)):
       src_simple_encoding = 'utf16'
-      src_code_units = src_tagged_code_units ^ UTF16_TAG
+      src_code_units = src_tagged_code_units ^ utf16_tag(cx.opts)
     else:
       src_simple_encoding = 'latin1'
       src_code_units = src_tagged_code_units
@@ -2451,11 +2487,9 @@ The simplest 4 cases above can compute the exact destination size and then copy
 with a simply loop (that possibly inflates Latin-1 to UTF-16 by injecting a 0
 byte after every Latin-1 byte).
 ```python
-MAX_STRING_BYTE_LENGTH = (1 << 31) - 1
-
 def store_string_copy(cx, src, src_code_units, dst_code_unit_size, dst_alignment, dst_encoding):
   dst_byte_length = dst_code_unit_size * src_code_units
-  trap_if(dst_byte_length > MAX_STRING_BYTE_LENGTH)
+  trap_if(dst_byte_length > max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, dst_alignment, dst_byte_length)
   trap_if(ptr != align_to(ptr, dst_alignment))
   trap_if(ptr + dst_byte_length > len(cx.opts.memory))
@@ -2464,8 +2498,8 @@ def store_string_copy(cx, src, src_code_units, dst_code_unit_size, dst_alignment
   cx.opts.memory[ptr : ptr+len(encoded)] = encoded
   return (ptr, src_code_units)
 ```
-The choice of `MAX_STRING_BYTE_LENGTH` constant ensures that the high bit of a
-string's number of code units is never set, keeping it clear for `UTF16_TAG`.
+The `max_string_byte_length` function ensures that the high bit of a
+string's number of code units is never set, keeping it clear for `utf16_tag`.
 
 The 2 cases of transcoding into UTF-8 share an algorithm that starts by
 optimistically assuming that each code unit of the source string fits in a
@@ -2481,14 +2515,14 @@ def store_latin1_to_utf8(cx, src, src_code_units):
   return store_string_to_utf8(cx, src, src_code_units, worst_case_size)
 
 def store_string_to_utf8(cx, src, src_code_units, worst_case_size):
-  assert(src_code_units <= MAX_STRING_BYTE_LENGTH)
+  assert(src_code_units <= max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 1, src_code_units)
   trap_if(ptr + src_code_units > len(cx.opts.memory))
   for i,code_point in enumerate(src):
     if ord(code_point) < 2**7:
       cx.opts.memory[ptr + i] = ord(code_point)
     else:
-      trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
+      trap_if(worst_case_size > max_string_byte_length(cx.opts))
       ptr = cx.opts.realloc(ptr, src_code_units, 1, worst_case_size)
       trap_if(ptr + worst_case_size > len(cx.opts.memory))
       encoded = src.encode('utf-8')
@@ -2507,7 +2541,7 @@ if multiple UTF-8 bytes were collapsed into a single 2-byte UTF-16 code unit:
 ```python
 def store_utf8_to_utf16(cx, src, src_code_units):
   worst_case_size = 2 * src_code_units
-  trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
+  trap_if(worst_case_size > max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 2, worst_case_size)
   trap_if(ptr != align_to(ptr, 2))
   trap_if(ptr + worst_case_size > len(cx.opts.memory))
@@ -2531,7 +2565,7 @@ after every Latin-1 byte (iterating in reverse to avoid clobbering later
 bytes):
 ```python
 def store_string_to_latin1_or_utf16(cx, src, src_code_units):
-  assert(src_code_units <= MAX_STRING_BYTE_LENGTH)
+  assert(src_code_units <= max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 2, src_code_units)
   trap_if(ptr != align_to(ptr, 2))
   trap_if(ptr + src_code_units > len(cx.opts.memory))
@@ -2542,7 +2576,7 @@ def store_string_to_latin1_or_utf16(cx, src, src_code_units):
       dst_byte_length += 1
     else:
       worst_case_size = 2 * src_code_units
-      trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
+      trap_if(worst_case_size > max_string_byte_length(cx.opts))
       ptr = cx.opts.realloc(ptr, src_code_units, 2, worst_case_size)
       trap_if(ptr != align_to(ptr, 2))
       trap_if(ptr + worst_case_size > len(cx.opts.memory))
@@ -2555,7 +2589,7 @@ def store_string_to_latin1_or_utf16(cx, src, src_code_units):
         ptr = cx.opts.realloc(ptr, worst_case_size, 2, len(encoded))
         trap_if(ptr != align_to(ptr, 2))
         trap_if(ptr + len(encoded) > len(cx.opts.memory))
-      tagged_code_units = int(len(encoded) / 2) | UTF16_TAG
+      tagged_code_units = int(len(encoded) / 2) | utf16_tag(cx.opts)
       return (ptr, tagged_code_units)
   if dst_byte_length < src_code_units:
     ptr = cx.opts.realloc(ptr, src_code_units, 2, dst_byte_length)
@@ -2577,14 +2611,14 @@ inexpensively fused with the UTF-16 validate+copy loop.)
 ```python
 def store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units):
   src_byte_length = 2 * src_code_units
-  trap_if(src_byte_length > MAX_STRING_BYTE_LENGTH)
+  trap_if(src_byte_length > max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 2, src_byte_length)
   trap_if(ptr != align_to(ptr, 2))
   trap_if(ptr + src_byte_length > len(cx.opts.memory))
   encoded = src.encode('utf-16-le')
   cx.opts.memory[ptr : ptr+len(encoded)] = encoded
   if any(ord(c) >= (1 << 8) for c in src):
-    tagged_code_units = int(len(encoded) / 2) | UTF16_TAG
+    tagged_code_units = int(len(encoded) / 2) | utf16_tag(cx.opts)
     return (ptr, tagged_code_units)
   latin1_size = int(len(encoded) / 2)
   for i in range(latin1_size):
@@ -2612,27 +2646,27 @@ def store_list(cx, v, ptr, elem_type, maybe_length):
     store_list_into_valid_range(cx, v, ptr, elem_type)
     return
   begin, length = store_list_into_range(cx, v, elem_type)
-  store_int(cx, begin, ptr, 4)
-  store_int(cx, length, ptr + 4, 4)
+  store_int(cx, begin, ptr, ptr_size(cx.opts))
+  store_int(cx, length, ptr + ptr_size(cx.opts), ptr_size(cx.opts))
 
 def store_list_into_range(cx, v, elem_type):
-  byte_length = len(v) * elem_size(elem_type)
-  trap_if(byte_length >= (1 << 32))
-  ptr = cx.opts.realloc(0, 0, alignment(elem_type), byte_length)
-  trap_if(ptr != align_to(ptr, alignment(elem_type)))
+  byte_length = len(v) * elem_size(elem_type, cx.opts)
+  trap_if(byte_length >= (1 << (ptr_size(cx.opts) * 8)))
+  ptr = cx.opts.realloc(0, 0, alignment(elem_type, cx.opts), byte_length)
+  trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts)))
   trap_if(ptr + byte_length > len(cx.opts.memory))
   store_list_into_valid_range(cx, v, ptr, elem_type)
   return (ptr, len(v))
 
 def store_list_into_valid_range(cx, v, ptr, elem_type):
   for i,e in enumerate(v):
-    store(cx, e, elem_type, ptr + i * elem_size(elem_type))
+    store(cx, e, elem_type, ptr + i * elem_size(elem_type, cx.opts))
 
 def store_record(cx, v, ptr, fields):
   for f in fields:
-    ptr = align_to(ptr, alignment(f.t))
+    ptr = align_to(ptr, alignment(f.t, cx.opts))
     store(cx, v[f.label], f.t, ptr)
-    ptr += elem_size(f.t)
+    ptr += elem_size(f.t, cx.opts)
 ```
 
 Variant values are represented as Python dictionaries containing exactly one
@@ -2645,10 +2679,10 @@ indices.
 ```python
 def store_variant(cx, v, ptr, cases):
   case_index, case_value = match_case(v, cases)
-  disc_size = elem_size(discriminant_type(cases))
+  disc_size = elem_size(discriminant_type(cases), cx.opts)
   store_int(cx, case_index, ptr, disc_size)
   ptr += disc_size
-  ptr = align_to(ptr, max_case_alignment(cases))
+  ptr = align_to(ptr, max_case_alignment(cases, cx.opts))
   c = cases[case_index]
   if c.t is not None:
     store(cx, case_value, c.t, ptr)
@@ -2752,38 +2786,38 @@ MAX_FLAT_ASYNC_PARAMS = 4
 MAX_FLAT_RESULTS = 1
 
 def flatten_functype(opts, ft, context):
-  flat_params = flatten_types(ft.param_types())
-  flat_results = flatten_types(ft.result_type())
+  flat_params = flatten_types(ft.param_types(), opts)
+  flat_results = flatten_types(ft.result_type(), opts)
   if not opts.async_:
     if len(flat_params) > MAX_FLAT_PARAMS:
-      flat_params = ['i32']
+      flat_params = [ptr_type(opts)]
     if len(flat_results) > MAX_FLAT_RESULTS:
       match context:
         case 'lift':
-          flat_results = ['i32']
+          flat_results = [ptr_type(opts)]
         case 'lower':
-          flat_params += ['i32']
+          flat_params += [ptr_type(opts)]
           flat_results = []
     return CoreFuncType(flat_params, flat_results)
   else:
     match context:
       case 'lift':
         if len(flat_params) > MAX_FLAT_PARAMS:
-          flat_params = ['i32']
+          flat_params = [ptr_type(opts)]
         if opts.callback:
           flat_results = ['i32']
         else:
           flat_results = []
       case 'lower':
         if len(flat_params) > MAX_FLAT_ASYNC_PARAMS:
-          flat_params = ['i32']
+          flat_params = [ptr_type(opts)]
         if len(flat_results) > 0:
-          flat_params += ['i32']
+          flat_params += [ptr_type(opts)]
         flat_results = ['i32']
     return CoreFuncType(flat_params, flat_results)
 
-def flatten_types(ts):
-  return [ft for t in ts for ft in flatten_type(t)]
+def flatten_types(ts, opts):
+  return [ft for t in ts for ft in flatten_type(t, opts)]
 ```
 As shown here, the core signatures `async` functions use a lower limit on the
 maximum number of parameters (1) and results (0) passed as scalars before
@@ -2792,7 +2826,7 @@ falling back to passing through memory.
 Presenting the definition of `flatten_type` piecewise, we start with the
 top-level case analysis:
 ```python
-def flatten_type(t):
+def flatten_type(t, opts):
   match despecialize(t):
     case BoolType()                       : return ['i32']
     case U8Type() | U16Type() | U32Type() : return ['i32']
@@ -2801,31 +2835,31 @@ def flatten_type(t):
     case F32Type()                        : return ['f32']
     case F64Type()                        : return ['f64']
     case CharType()                       : return ['i32']
-    case StringType()                     : return ['i32', 'i32']
-    case ErrorContextType()               : return ['i32']
-    case ListType(t, l)                   : return flatten_list(t, l)
-    case RecordType(fields)               : return flatten_record(fields)
-    case VariantType(cases)               : return flatten_variant(cases)
+    case StringType()                     : return [ptr_type(opts), ptr_type(opts)]
+    case ErrorContextType()               : return [idx_type(opts)]
+    case ListType(t, l)                   : return flatten_list(t, l, opts)
+    case RecordType(fields)               : return flatten_record(fields, opts)
+    case VariantType(cases)               : return flatten_variant(cases, opts)
     case FlagsType(labels)                : return ['i32']
-    case OwnType() | BorrowType()         : return ['i32']
-    case StreamType() | FutureType()      : return ['i32']
+    case OwnType() | BorrowType()         : return [idx_type(opts)]
+    case StreamType() | FutureType()      : return [idx_type(opts)]
 ```
 
 List flattening of a fixed-length list uses the same flattening as a tuple
 (via `flatten_record` below).
 ```python
-def flatten_list(elem_type, maybe_length):
+def flatten_list(elem_type, maybe_length, opts):
   if maybe_length is not None:
-    return flatten_type(elem_type) * maybe_length
-  return ['i32', 'i32']
+    return flatten_type(elem_type, opts) * maybe_length
+  return [ptr_type(opts), ptr_type(opts)]
 ```
 
 Record flattening simply flattens each field in sequence.
 ```python
-def flatten_record(fields):
+def flatten_record(fields, opts):
   flat = []
   for f in fields:
-    flat += flatten_type(f.t)
+    flat += flatten_type(f.t, opts)
   return flat
 ```
 
@@ -2838,16 +2872,16 @@ case, all flattened variants are passed with the same static set of core types,
 which may involve, e.g., reinterpreting an `f32` as an `i32` or zero-extending
 an `i32` into an `i64`.
 ```python
-def flatten_variant(cases):
+def flatten_variant(cases, opts):
   flat = []
   for c in cases:
     if c.t is not None:
-      for i,ft in enumerate(flatten_type(c.t)):
+      for i,ft in enumerate(flatten_type(c.t, opts)):
         if i < len(flat):
           flat[i] = join(flat[i], ft)
         else:
           flat.append(ft)
-  return flatten_type(discriminant_type(cases)) + flat
+  return flatten_type(discriminant_type(cases), opts) + flat
 
 def join(a, b):
   if a == b: return a
@@ -2904,15 +2938,15 @@ def lift_flat(cx, vi, t):
     case F64Type()          : return canonicalize_nan64(vi.next('f64'))
     case CharType()         : return convert_i32_to_char(cx, vi.next('i32'))
     case StringType()       : return lift_flat_string(cx, vi)
-    case ErrorContextType() : return lift_error_context(cx, vi.next('i32'))
+    case ErrorContextType() : return lift_error_context(cx, vi.next(idx_type(cx.opts)))
     case ListType(t, l)     : return lift_flat_list(cx, vi, t, l)
     case RecordType(fields) : return lift_flat_record(cx, vi, fields)
     case VariantType(cases) : return lift_flat_variant(cx, vi, cases)
     case FlagsType(labels)  : return lift_flat_flags(vi, labels)
-    case OwnType()          : return lift_own(cx, vi.next('i32'), t)
-    case BorrowType()       : return lift_borrow(cx, vi.next('i32'), t)
-    case StreamType(t)      : return lift_stream(cx, vi.next('i32'), t)
-    case FutureType(t)      : return lift_future(cx, vi.next('i32'), t)
+    case OwnType()          : return lift_own(cx, vi.next(idx_type(cx.opts)), t)
+    case BorrowType()       : return lift_borrow(cx, vi.next(idx_type(cx.opts)), t)
+    case StreamType(t)      : return lift_stream(cx, vi.next(idx_type(cx.opts)), t)
+    case FutureType(t)      : return lift_future(cx, vi.next(idx_type(cx.opts)), t)
 ```
 
 Integers are lifted from core `i32` or `i64` values using the signedness of the
@@ -2943,8 +2977,8 @@ of from linear memory. Fixed-length lists are lifted the same way as a
 tuple (via `lift_flat_record` below).
 ```python
 def lift_flat_string(cx, vi):
-  ptr = vi.next('i32')
-  packed_length = vi.next('i32')
+  ptr = vi.next(ptr_type(cx.opts))
+  packed_length = vi.next(ptr_type(cx.opts))
   return load_string_from_range(cx, ptr, packed_length)
 
 def lift_flat_list(cx, vi, elem_type, maybe_length):
@@ -2953,8 +2987,8 @@ def lift_flat_list(cx, vi, elem_type, maybe_length):
     for i in range(maybe_length):
       a.append(lift_flat(cx, vi, elem_type))
     return a
-  ptr = vi.next('i32')
-  length = vi.next('i32')
+  ptr = vi.next(ptr_type(cx.opts))
+  length = vi.next(ptr_type(cx.opts))
   return load_list_from_range(cx, ptr, length, elem_type)
 ```
 
@@ -2975,7 +3009,7 @@ reinterprets between the different types appropriately and also traps if the
 high bits of an `i64` are set for a 32-bit type:
 ```python
 def lift_flat_variant(cx, vi, cases):
-  flat_types = flatten_variant(cases)
+  flat_types = flatten_variant(cases, cx.opts)
   assert(flat_types.pop(0) == 'i32')
   case_index = vi.next('i32')
   trap_if(case_index >= len(cases))
@@ -3092,14 +3126,14 @@ manually coercing the otherwise-incompatible type pairings allowed by `join`:
 ```python
 def lower_flat_variant(cx, v, cases):
   case_index, case_value = match_case(v, cases)
-  flat_types = flatten_variant(cases)
+  flat_types = flatten_variant(cases, cx.opts)
   assert(flat_types.pop(0) == 'i32')
   c = cases[case_index]
   if c.t is None:
     payload = []
   else:
     payload = lower_flat(cx, case_value, c.t)
-    for i,(fv,have) in enumerate(zip(payload, flatten_type(c.t))):
+    for i,(fv,have) in enumerate(zip(payload, flatten_type(c.t, cx.opts))):
       want = flat_types.pop(0)
       match (have, want):
         case ('f32', 'i32') : payload[i] = encode_float_as_i32(fv)
@@ -3126,12 +3160,12 @@ parameters or results (given by the `CoreValueIter` `vi`) into a tuple
 of component-level values with types `ts`.
 ```python
 def lift_flat_values(cx, max_flat, vi, ts):
-  flat_types = flatten_types(ts)
+  flat_types = flatten_types(ts, cx.opts)
   if len(flat_types) > max_flat:
-    ptr = vi.next('i32')
+    ptr = vi.next(ptr_type(cx.opts))
     tuple_type = TupleType(ts)
-    trap_if(ptr != align_to(ptr, alignment(tuple_type)))
-    trap_if(ptr + elem_size(tuple_type) > len(cx.opts.memory))
+    trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts)))
+    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory))
     return list(load(cx, ptr, tuple_type).values())
   else:
     return [ lift_flat(cx, vi, t) for t in ts ]
@@ -3146,18 +3180,18 @@ out-param:
 ```python
 def lower_flat_values(cx, max_flat, vs, ts, out_param = None):
   cx.inst.may_leave = False
-  flat_types = flatten_types(ts)
+  flat_types = flatten_types(ts, cx.opts)
   if len(flat_types) > max_flat:
     tuple_type = TupleType(ts)
     tuple_value = {str(i): v for i,v in enumerate(vs)}
     if out_param is None:
-      ptr = cx.opts.realloc(0, 0, alignment(tuple_type), elem_size(tuple_type))
+      ptr = cx.opts.realloc(0, 0, alignment(tuple_type, cx.opts), elem_size(tuple_type, cx.opts))
       flat_vals = [ptr]
     else:
-      ptr = out_param.next('i32')
+      ptr = out_param.next(ptr_type(cx.opts))
       flat_vals = []
-    trap_if(ptr != align_to(ptr, alignment(tuple_type)))
-    trap_if(ptr + elem_size(tuple_type) > len(cx.opts.memory))
+    trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts)))
+    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory))
     store(cx, tuple_value, tuple_type, ptr)
   else:
     flat_vals = []
@@ -3714,15 +3748,15 @@ For a canonical definition:
 (canon context.get $t $i (core func $f))
 ```
 validation specifies:
-* `$t` must be `i32` (for now; see [here][thread-local storage])
+* `$t` must be `i32` or `i64` (see [here][thread-local storage])
 * `$i` must be less than `Thread.CONTEXT_LENGTH` (`2`)
-* `$f` is given type `(func (result i32))`
+* `$f` is given type `(func (result $t))`
 
 Calling `$f` invokes the following function, which reads the [thread-local
 storage] of the [current thread]:
 ```python
 def canon_context_get(t, i, thread):
-  assert(t == 'i32')
+  assert(t == 'i32' or t == 'i64')
   assert(i < Thread.CONTEXT_LENGTH)
   return [thread.context[i]]
 ```
@@ -3735,15 +3769,15 @@ For a canonical definition:
 (canon context.set $t $i (core func $f))
 ```
 validation specifies:
-* `$t` must be `i32` (for now; see [here][thread-local storage])
+* `$t` must be `i32` or `i64` (see [here][thread-local storage])
 * `$i` must be less than `Thread.CONTEXT_LENGTH` (`2`)
-* `$f` is given type `(func (param $v i32))`
+* `$f` is given type `(func (param $v $t))`
 
 Calling `$f` invokes the following function, which writes to the [thread-local
 storage] of the [current thread]:
 ```python
 def canon_context_set(t, i, thread, v):
-  assert(t == 'i32')
+  assert(t == 'i32' or t == 'i64')
   assert(i < Thread.CONTEXT_LENGTH)
   thread.context[i] = v
   return []
@@ -3913,17 +3947,17 @@ Calling `$f` invokes the following function which waits for progress to be made
 on a `Waitable` in the given waitable set (indicated by index `$si`) and then
 returning its `EventCode` and writing the payload values into linear memory:
 ```python
-def canon_waitable_set_wait(cancellable, mem, thread, si, ptr):
+def canon_waitable_set_wait(cancellable, mem, opts, thread, si, ptr):
   trap_if(not thread.task.inst.may_leave)
   trap_if(not thread.task.may_block())
   wset = thread.task.inst.handles.get(si)
   trap_if(not isinstance(wset, WaitableSet))
   event = thread.task.wait_until(lambda: True, thread, wset, cancellable)
-  return unpack_event(mem, thread, ptr, event)
+  return unpack_event(mem, opts, thread, ptr, event)
 
-def unpack_event(mem, thread, ptr, e: EventTuple):
+def unpack_event(mem, opts, thread, ptr, e: EventTuple):
   event, p1, p2 = e
-  cx = LiftLowerContext(LiftLowerOptions(memory = mem), thread.task.inst)
+  cx = LiftLowerContext(LiftLowerOptions(memory = mem, addr_type = opts.addr_type, tbl_idx_type = opts.tbl_idx_type), thread.task.inst)
   store(cx, p1, U32Type(), ptr)
   store(cx, p2, U32Type(), ptr + 4)
   return [event]
@@ -3956,7 +3990,7 @@ Calling `$f` invokes the following function, which either returns an event that
 was pending on one of the waitables in the given waitable set (the same way as
 `waitable-set.wait`) or, if there is none, returns `0`.
 ```python
-def canon_waitable_set_poll(cancellable, mem, thread, si, ptr):
+def canon_waitable_set_poll(cancellable, mem, opts, thread, si, ptr):
   trap_if(not thread.task.inst.may_leave)
   wset = thread.task.inst.handles.get(si)
   trap_if(not isinstance(wset, WaitableSet))
@@ -3966,7 +4000,7 @@ def canon_waitable_set_poll(cancellable, mem, thread, si, ptr):
     event = (EventCode.NONE, 0, 0)
   else:
     event = wset.get_pending_event()
-  return unpack_event(mem, thread, ptr, event)
+  return unpack_event(mem, opts, thread, ptr, event)
 ```
 If `cancellable` is set, then `waitable-set.poll` will return whether the
 supertask has already or concurrently requested cancellation.
diff --git a/design/mvp/Concurrency.md b/design/mvp/Concurrency.md
index 6c9c5f6f..7dd5395e 100644
--- a/design/mvp/Concurrency.md
+++ b/design/mvp/Concurrency.md
@@ -413,10 +413,10 @@ Each thread contains a distinct mutable **thread-local storage** array. The
 current thread's thread-local storage can be read and written from core wasm
 code by calling the [`context.get`] and [`context.set`] built-ins.
 
-The thread-local storage array's length is currently fixed to contain exactly
-2 `i32`s with the goal of allowing this array to be stored inline in whatever
-existing runtime data structure is already efficiently reachable from ambient
-compiled wasm code. Because module instantiation is declarative in the
+The thread-local storage array's length is currently fixed to contain exactly 2
+`i32`s or `i64`s with the goal of allowing this array to be stored inline in
+whatever existing runtime data structure is already efficiently reachable from
+ambient compiled wasm code. Because module instantiation is declarative in the
 Component Model, the imported `context.{get,set}` built-ins can be inlined by
 the core wasm compiler as-if they were instructions, allowing the generated
 machine code to be a single load or store. This makes thread-local storage a
diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index d8c0bdd1..e8cc1fad 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -690,11 +690,11 @@ only be used indirectly by untrusted user-mode processes via their integer
 index in the table.
 
 In the Component Model, handles are lifted-from and lowered-into `i32` values
-that index an encapsulated per-component-instance table that is maintained by
-the canonical function definitions described [below](#canonical-definitions).
-In the future, handles could be backwards-compatibly lifted and lowered from
-[reference types]  (via the addition of a new `canonopt`, as introduced
-[below](#canonical-abi)).
+(or `i64` values when the `table64` canonopt is present) that index an
+encapsulated per-component-instance table that is maintained by the canonical
+function definitions described [below](#canonical-definitions). In the future, handles
+could be backwards-compatibly lifted and lowered from [reference types]  (via
+the addition of a new `canonopt`, as introduced [below](#canonical-abi)).
 
 The uniqueness and dropping conditions mentioned above are enforced at runtime
 by the Component Model through these canonical definitions. The `typeidx`
@@ -715,8 +715,9 @@ destination components. Thus, in the abstract, `stream` and `future` can be
 thought of as inter-component control-flow or synchronization mechanisms.
 
 Just like with handles, in the Component Model, async value types are
-lifted-from and lowered-into `i32` values that index an encapsulated
-per-component-instance table that is maintained by the canonical ABI built-ins
+lifted-from and lowered-into `i32` values (or `i64` values when the `table64`
+canonopt is present) that index an encapsulated per-component-instance table
+that is maintained by the canonical ABI built-ins
 [below](#canonical-definitions). The Component-Model-defined ABI for creating,
 writing-to and reading-from `stream` and `future` values is meant to be bound
 to analogous source-language features like promises, futures, streams,
@@ -858,7 +859,7 @@ which is a *subtype* of some other type. Currently, the only supported bound is
 "any resource type". Thus, only resource types can be imported/exported
 abstractly, not arbitrary value types. This allows type imports to always be
 compiled independently of their arguments using a "universal representation" for
-handle values (viz., `i32`, as defined by the [Canonical ABI](CanonicalABI.md)).
+handle values (viz., `i32` or `i64`, as defined by the [Canonical ABI](CanonicalABI.md)).
 In the future, `sub` may be extended to allow referencing other resource types,
 thereby allowing abstract resource subtyping.
 
@@ -1281,6 +1282,7 @@ canonopt ::= string-encoding=utf8
            | (memory <core:memidx>)
            | (realloc <core:funcidx>)
            | (post-return <core:funcidx>)
+           | table64
            | async 🔀
            | (callback <core:funcidx>) 🔀
 ```
@@ -1304,15 +1306,17 @@ validation requires this option to be present (there is no default).
 The `(realloc ...)` option specifies a core function that is validated to
 have the following core function type:
 ```wat
-(func (param $originalPtr i32)
-      (param $originalSize i32)
+(func (param $originalPtr $addr)
+      (param $originalSize $addr)
       (param $alignment i32)
-      (param $newSize i32)
-      (result i32))
+      (param $newSize $addr)
+      (result $addr))
 ```
-The Canonical ABI will use `realloc` both to allocate (passing `0` for the
-first two parameters) and reallocate. If the Canonical ABI needs `realloc`,
-validation requires this option to be present (there is no default).
+where `$addr` is `i32` when the `memory` canonopt refers to a 32-bit memory or
+`i64` when it refers to a 64-bit memory. The Canonical ABI will use `realloc`
+both to allocate (passing `0` for the first two parameters) and reallocate. If
+the Canonical ABI needs `realloc`, validation requires this option to be
+present (there is no default).
 
 The `(post-return ...)` option may only be present in `canon lift` when
 `async` is not present and specifies a core function to be called with the
@@ -1335,9 +1339,10 @@ validated to have the following core function type:
 ```wat
 (func (param $ctx i32)
       (param $event i32)
-      (param $payload i32)
+      (param $payload $addr)
       (result $done i32))
 ```
+where `$addr` is determined by the `memory` canonopt as described above.
 Again, see the [concurrency explainer] for more details.
 
 Based on this description of the AST, the [Canonical ABI explainer] gives a
@@ -1463,6 +1468,10 @@ canon ::= ...
         | (canon thread.available-parallelism (core func <id>?)) 🧵②
 ```
 
+In the Canonical ABI signatures below, `$addr` is `i32` when the `memory`
+canonopt refers to a 32-bit memory or `i64` for a 64-bit memory, and `$idx` is
+`i32` by default or `i64` when the `table64` canonopt is present.
+
 ##### Resource built-ins
 
 ###### `resource.new`
@@ -1470,7 +1479,7 @@ canon ::= ...
 | Synopsis                   |                            |
 | -------------------------- | -------------------------- |
 | Approximate WIT signature  | `func<T>(rep: T.rep) -> T` |
-| Canonical ABI signature    | `[rep:i32] -> [i32]`       |
+| Canonical ABI signature    | `[rep:i32] -> [$idx]`      |
 
 The `resource.new` built-in creates a new resource (of resource type `T`) with
 `rep` as its representation, and returns a new handle pointing to the new
@@ -1480,7 +1489,7 @@ component that defined `T`.
 In the Canonical ABI, `T.rep` is defined to be the `$rep` in the
 `(type $T (resource (rep $rep) ...))` type definition that defined `T`. While
 it's designed to allow different types in the future, it is currently
-hard-coded to always be `i32`.
+hard-coded to always be `i32` or `i64`.
 
 For details, see [`canon_resource_new`] in the Canonical ABI explainer.
 
@@ -1489,7 +1498,7 @@ For details, see [`canon_resource_new`] in the Canonical ABI explainer.
 | Synopsis                   |                                    |
 | -------------------------- | ---------------------------------- |
 | Approximate WIT signature  | `func<T>(t: T)`                    |
-| Canonical ABI signature    | `[t:i32] -> []`                    |
+| Canonical ABI signature    | `[t:$idx] -> []`                   |
 
 The `resource.drop` built-in drops a resource handle `t` (with resource type
 `T`). If the dropped handle owns the resource, the resource's `dtor` is called,
@@ -1503,7 +1512,7 @@ For details, see [`canon_resource_drop`] in the Canonical ABI explainer.
 | Synopsis                   |                          |
 | -------------------------- | ------------------------ |
 | Approximate WIT signature  | `func<T>(t: T) -> T.rep` |
-| Canonical ABI signature    | `[t:i32] -> [i32]`       |
+| Canonical ABI signature    | `[t:$idx] -> [i32]`      |
 
 The `resource.rep` built-in returns the representation of the resource (with
 resource type `T`) pointed to by the handle `t`. Validation only allows
@@ -1521,12 +1530,12 @@ allowing it to create and return new resources to its client:
   (import "Libc" (core module $Libc ...))
   (core instance $libc (instantiate $Libc))
   (type $R (resource (rep i32) (dtor (func $libc "free"))))
-  (core func $R_new (param i32) (result i32)
+  (core func $R_new (param i32) (result $idx)
     (canon resource.new $R)
   )
   (core module $Main
-    (import "canon" "R_new" (func $R_new (param i32) (result i32)))
-    (func (export "make_R") (param ...) (result i32)
+    (import "canon" "R_new" (func $R_new (param i32) (result $idx)))
+    (func (export "make_R") (param ...) (result $idx)
       (return (call $R_new ...))
     )
   )
@@ -1539,7 +1548,7 @@ allowing it to create and return new resources to its client:
   )
 )
 ```
-Here, the `i32` returned by `resource.new`, which is an index into the current
+Here, the `$idx` returned by `resource.new`, which is an index into the current
 component instance's table, is immediately returned by `make_R`, thereby
 transferring ownership of the newly-created resource to the export's caller.
 
@@ -1554,12 +1563,12 @@ See the [concurrency explainer] for background.
 | Synopsis                   |                    |
 | -------------------------- | ------------------ |
 | Approximate WIT signature  | `func<T,i>() -> T` |
-| Canonical ABI signature    | `[] -> [i32]`      |
+| Canonical ABI signature    | `[] -> [$addr]`       |
 
 The `context.get` built-in returns the `i`th element of the [current thread]'s
 [thread-local storage] array. Validation currently restricts `i` to be less
-than 2 and `t` to be `i32`, but these restrictions may be relaxed in the
-future.
+than 2 and `t` to be `i32` or `i64`, but these restrictions may be relaxed in
+the future.
 
 For details, see [Thread-Local Storage] in the concurrency explainer and
 [`canon_context_get`] in the Canonical ABI explainer.
@@ -1569,12 +1578,12 @@ For details, see [Thread-Local Storage] in the concurrency explainer and
 | Synopsis                   |                   |
 | -------------------------- | ----------------- |
 | Approximate WIT signature  | `func<T,i>(v: T)` |
-| Canonical ABI signature    | `[i32] -> []`     |
+| Canonical ABI signature    | `[$addr] -> []`      |
 
 The `context.set` built-in sets the `i`th element of the [current thread]'s
 [thread-local storage] array to the value `v`. Validation currently restricts
-`i` to be less than 2 and `t` to be `i32`, but these restrictions may be
-relaxed in the future.
+`i` to be less than 2 and `t` to be `i32` or `i64`, but these restrictions may
+be relaxed in the future.
 
 For details, see [Thread-Local Storage] in the concurrency explainer and
 [`canon_context_set`] in the Canonical ABI explainer.
@@ -1658,9 +1667,9 @@ For details, see [Cancellation] in the concurrency explainer and
 | Synopsis                   |                          |
 | -------------------------- | ------------------------ |
 | Approximate WIT signature  | `func() -> waitable-set` |
-| Canonical ABI signature    | `[] -> [i32]`            |
+| Canonical ABI signature    | `[] -> [$idx]`           |
 
-The `waitable-set.new` built-in returns the `i32` index of a new [waitable
+The `waitable-set.new` built-in returns the `$idx` index of a new [waitable
 set]. The `waitable-set` type is not a true WIT-level type but instead serves
 to document associated built-ins below. Waitable sets start out empty and are
 populated explicitly with [waitables] by `waitable.join`.
@@ -1673,7 +1682,7 @@ For details, see [Waitables and Waitable Sets] in the concurrency explainer and
 | Synopsis                   |                                                |
 | -------------------------- | ---------------------------------------------- |
 | Approximate WIT signature  | `func<cancellable?>(s: waitable-set) -> event` |
-| Canonical ABI signature    | `[s:i32 payload-addr:i32] -> [event-code:i32]` |
+| Canonical ABI signature    | `[s:$idx payload-addr:$addr] -> [event-code:i32]` |
 
 where `event` is defined in WIT as:
 ```wit
@@ -1727,7 +1736,7 @@ part [`stream.read` and `stream.write`](#-streamread-and-streamwrite) and
 [`future.read` and `future.write`](#-futureread-and-futurewrite) below.
 
 In the Canonical ABI, the `event-code` return value provides the `event`
-discriminant and the case payloads are stored as two contiguous `i32`s at the
+discriminant and the case payloads are stored as two contiguous `$idx`s at the
 8-byte-aligned address `payload-addr`.
 
 For details, see [Waitables and Waitable Sets] in the concurrency explainer and
@@ -1738,7 +1747,7 @@ For details, see [Waitables and Waitable Sets] in the concurrency explainer and
 | Synopsis                   |                                                |
 | -------------------------- | ---------------------------------------------- |
 | Approximate WIT signature  | `func<cancellable?>(s: waitable-set) -> event` |
-| Canonical ABI signature    | `[s:i32 payload-addr:i32] -> [event-code:i32]` |
+| Canonical ABI signature    | `[s:$idx payload-addr:$addr] -> [event-code:i32]` |
 
 where `event` is defined as in [`waitable-set.wait`](#-waitable-setwait).
 
@@ -1762,7 +1771,7 @@ For details, see [Waitables and Waitable Sets] in the concurrency explainer and
 | Synopsis                   |                          |
 | -------------------------- | ------------------------ |
 | Approximate WIT signature  | `func(s: waitable-set)` |
-| Canonical ABI signature    | `[s:i32] -> []`    |
+| Canonical ABI signature    | `[s:$idx] -> []`   |
 
 The `waitable-set.drop` built-in removes the indicated [waitable set] from the
 current component instance's table, trapping if the waitable set is not empty
@@ -1776,7 +1785,7 @@ For details, see [Waitables and Waitable Sets] in the concurrency explainer and
 | Synopsis                   |                                                      |
 | -------------------------- | ---------------------------------------------------- |
 | Approximate WIT signature  | `func(w: waitable, maybe_set: option<waitable-set>)` |
-| Canonical ABI signature    | `[w:i32, maybe_set:i32] -> []`                       |
+| Canonical ABI signature    | `[w:$idx, maybe_set:$idx] -> []`                     |
 
 The `waitable.join` built-in may be called given a [waitable] and an optional
 [waitable set]. `join` first removes `w` from any waitable set that it is a
@@ -1798,7 +1807,7 @@ For details, see [Waitables and Waitable Sets] in the concurrency explainer and
 | Synopsis                   |                                                           |
 | -------------------------- | --------------------------------------------------------- |
 | Approximate WIT signature  | `func<async?>(subtask: subtask) -> option<subtask-state>` |
-| Canonical ABI signature    | `[subtask:i32] -> [i32]`                                  |
+| Canonical ABI signature    | `[subtask:$idx] -> [i32]`                                 |
 
 The `subtask.cancel` built-in requests [cancellation] of the indicated subtask.
 If the `async` is present, `none` is returned (reprented as `-1` in the
@@ -1819,7 +1828,7 @@ For details, see [Cancellation] in the concurrency explainer and
 | Synopsis                   |                          |
 | -------------------------- | ------------------------ |
 | Approximate WIT signature  | `func(subtask: subtask)` |
-| Canonical ABI signature    | `[subtask:i32] -> []`    |
+| Canonical ABI signature    | `[subtask:$idx] -> []`   |
 
 The `subtask.drop` built-in removes the indicated [subtask] from the current
 component instance's table, trapping if the subtask hasn't returned.
@@ -1836,9 +1845,9 @@ For details, see [`canon_subtask_drop`] in the Canonical ABI explainer.
 
 The `stream.new` and `future.new` built-ins return the [readable and writable
 ends] of a new `stream<T?>` or `future<T?>`. The readable and writable ends are
-added to the current component instance's table and then the two `i32` indices
+added to the current component instance's table and then the two `$idx` indices
 of the two ends are packed into a single `i64` return value (with the readable
-end in the low 32 bits).
+end in the low bits).
 
 The types `readable-stream-end<T?>` and `writable-stream-end<T?>` are not WIT
 types; they are the conceptual lower-level types that describe how the
@@ -1856,7 +1865,7 @@ For details, see [Streams and Futures] in the concurrency explainer and
 | -------------------------------------------- | ----------------------------------------------------------------------------------------------- |
 | Approximate WIT signature for `stream.read`  | `func<stream<T?>>(e: readable-stream-end<T?>, b: writable-buffer<T>?) -> option<stream-result>` |
 | Approximate WIT signature for `stream.write` | `func<stream<T?>>(e: writable-stream-end<T?>, b: readable-buffer<T>?) -> option<stream-result>` |
-| Canonical ABI signature                      | `[stream-end:i32 ptr:i32 num:i32] -> [i32]`                                                     |
+| Canonical ABI signature                      | `[stream-end:$idx ptr:$addr num:i32] -> [i32]`                                                   |
 
 where `stream-result` is defined in WIT as:
 ```wit
@@ -1911,9 +1920,9 @@ If `stream.{read,write}` return `dropped` (synchronously or asynchronously),
 any subsequent operation on the stream other than `stream.drop-{readable,writable}`
 traps.
 
-In the Canonical ABI, the `{readable,writable}-stream-end` is passed as an
-`i32` index into the component instance's table followed by a pair of `i32`s
-describing the linear memory offset and size-in-elements of the
+In the Canonical ABI, the `{readable,writable}-stream-end` is passed as a
+`$idx` index into the component instance's table followed by the `$addr` linear
+memory offset and an `i32` size-in-elements of the
 `{readable,writable}-buffer<T>`. The `option<stream-result>` return value is
 bit-packed into a single `i32` where:
 * `0xffff_ffff` represents `none`.
@@ -1929,7 +1938,7 @@ For details, see [Streams and Futures] in the concurrency explainer and
 | -------------------------------------------- | -------------------------------------------------------------------------------------------------------- |
 | Approximate WIT signature for `future.read`  | `func<future<T?>>(e: readable-future-end<T?>, b: writable-buffer<T; 1>?) -> option<future-read-result>`  |
 | Approximate WIT signature for `future.write` | `func<future<T?>>(e: writable-future-end<T?>, v: readable-buffer<T; 1>?) -> option<future-write-result>` |
-| Canonical ABI signature                      | `[readable-future-end:i32 ptr:i32] -> [i32]`                                                             |
+| Canonical ABI signature                      | `[readable-future-end:$idx ptr:$addr] -> [i32]`                                                          |
 
 where `future-{read,write}-result` are defined in WIT as:
 ```wit
@@ -1978,9 +1987,9 @@ A component *may* call `future.drop-readable` *before* successfully reading a
 value to indicate a loss of interest. `future.drop-writable` will trap if
 called before successfully writing a value.
 
-In the Canonical ABI, the `{readable,writable}-future-end` is passed as an
-`i32` index into the component instance's table followed by a single
-`i32` describing the linear memory offset of the
+In the Canonical ABI, the `{readable,writable}-future-end` is passed as a
+`$idx` index into the component instance's table followed by a single
+`$addr` describing the linear memory offset of the
 `{readable,writable}-buffer<T; 1>`. The `option<future-{read,write}-result>`
 return value is bit-packed into the single `i32` return value where
 `0xffff_ffff` represents `none`. And, `future-read-result.cancelled` is encoded
@@ -1998,7 +2007,7 @@ For details, see [Streams and Futures] in the concurrency explainer and
 | Approximate WIT signature for `stream.cancel-write` | `func<stream<T?>>(e: writable-stream-end<T?>) -> option<stream-result>`       |
 | Approximate WIT signature for `future.cancel-read`  | `func<future<T?>>(e: readable-future-end<T?>) -> option<future-read-result>`  |
 | Approximate WIT signature for `future.cancel-write` | `func<future<T?>>(e: writable-future-end<T?>) -> option<future-write-result>` |
-| Canonical ABI signature                             | `[e: i32] -> [i32]`                                                           |
+| Canonical ABI signature                             | `[e: $idx] -> [i32]`                                                          |
 
 The `{stream,future}.cancel-{read,write}` built-ins take the matching [readable
 or writable end] of a stream or future that has a pending `async`
@@ -2026,7 +2035,7 @@ For details, see [Streams and Futures] in the concurrency explainer and
 | Approximate WIT signature for `stream.drop-writable` | `func<stream<T?>>(e: writable-stream-end<T?>)` |
 | Approximate WIT signature for `future.drop-readable` | `func<future<T?>>(e: readable-future-end<T?>)` |
 | Approximate WIT signature for `future.drop-writable` | `func<future<T?>>(e: writable-future-end<T?>)` |
-| Canonical ABI signature                              | `[end:i32 err:i32] -> []`                      |
+| Canonical ABI signature                              | `[end:$idx err:$idx] -> []`                    |
 
 The `{stream,future}.drop-{readable,writable}` built-ins remove the indicated
 [stream or future] from the current component instance's table, trapping if the
@@ -2042,7 +2051,7 @@ For details, see [Streams and Futures] in the concurrency explainer and
 | Synopsis                   |                 |
 | -------------------------- | --------------- |
 | Approximate WIT signature  | `func() -> u32` |
-| Canonical ABI signature    | `[] -> [i32]`   |
+| Canonical ABI signature    | `[] -> [$idx]`  |
 
 The `thread.index` built-in returns the index of the [current thread] in the
 component instance's table. While `thread.new-indirect` also returns the index
@@ -2057,7 +2066,7 @@ For details, see [Thread Built-ins] in the concurrency explainer and
 | Synopsis                   |                                                               |
 | -------------------------- | ------------------------------------------------------------- |
 | Approximate WIT signature  | `func<FuncT,tableidx>(fi: u32, c: FuncT.params[0]) -> thread` |
-| Canonical ABI signature    | `[fi:i32 c:i32] -> [i32]`                                     |
+| Canonical ABI signature    | `[fi:i32 c:i32] -> [$idx]`                                    |
 
 The `thread.new-indirect` built-in adds a new thread to the current component
 instance's table, returning the index of the new thread. The function table
@@ -2068,7 +2077,7 @@ with `c` as its first and only parameter.
 
 Currently, `FuncT` must be `(func (param i32))` and thus `c` must always be an
 `i32`, but this restriction can be loosened in the future as the Canonical
-ABI is extended for [memory64] and [GC].
+ABI is extended for [GC].
 
 As explained in the [concurrency explainer], a thread created by
 `thread.new-indirect` is initially in a suspended state and must be resumed
@@ -2083,7 +2092,7 @@ For details, see [Thread Built-ins] in the concurrency explainer and
 | Synopsis                   |                                                   |
 | -------------------------- | ------------------------------------------------- |
 | Approximate WIT signature  | `func<cancellable?>(t: thread) -> suspend-result` |
-| Canonical ABI signature    | `[t:i32] -> [i32]`                                |
+| Canonical ABI signature    | `[t:$idx] -> [i32]`                               |
 
 where `suspend-result` is defined in WIT as:
 ```wit
@@ -2137,7 +2146,7 @@ For details, see [Thread Built-ins] in the concurrency explainer and
 | Synopsis                   |                   |
 | -------------------------- | ----------------- |
 | Approximate WIT signature  | `func(t: thread)` |
-| Canonical ABI signature    | `[t:i32] -> []`   |
+| Canonical ABI signature    | `[t:$idx] -> []`  |
 
 The `thread.resume-later` built-in changes the state of thread `t` from
 "suspended" to "ready" (trapping if `t` is not in a "suspended" state) so that
@@ -2151,7 +2160,7 @@ For details, see [Thread Built-ins] in the concurrency explainer and
 | Synopsis                   |                                 |
 | -------------------------- | ------------------------------- |
 | Approximate WIT signature  | `func<cancellable?>(t: thread)` |
-| Canonical ABI signature    | `[t:i32] -> [suspend-result]`   |
+| Canonical ABI signature    | `[t:$idx] -> [suspend-result]`  |
 
 The `thread.yield-to` built-in immediately resumes execution of the thread `t`,
 (trapping if `t` is not in a "suspended" state) leaving the [current thread] in
@@ -2251,7 +2260,7 @@ explainer.
 | Synopsis                         |                                          |
 | -------------------------------- | ---------------------------------------- |
 | Approximate WIT signature        | `func(message: string) -> error-context` |
-| Canonical ABI signature          | `[ptr:i32 len:i32] -> [i32]`             |
+| Canonical ABI signature          | `[ptr:$addr len:$addr] -> [$idx]`        |
 
 The `error-context.new` built-in returns a new `error-context` value. The given
 string is non-deterministically transformed to produce the `error-context`'s
@@ -2267,14 +2276,14 @@ For details, see [`canon_error_context_new`] in the Canonical ABI explainer.
 | Synopsis                         |                                         |
 | -------------------------------- | --------------------------------------- |
 | Approximate WIT signature        | `func(errctx: error-context) -> string` |
-| Canonical ABI signature          | `[errctxi:i32 ptr:i32] -> []`           |
+| Canonical ABI signature          | `[errctxi:$idx ptr:$addr] -> []`        |
 
 The `error-context.debug-message` built-in returns the
 [debug message](#error-context-type) of the given `error-context`.
 
-In the Canonical ABI, it writes the debug message into `ptr` as an 8-byte
-(`ptr`, `length`) pair, according to the Canonical ABI for `string`, given the
-`<canonopt>*` immediates.
+In the Canonical ABI, it writes the debug message into `ptr` as a (`ptr`,
+`length`) pair (whose size depends on the `memory` canonopt), according to the
+Canonical ABI for `string`, given the `<canonopt>*` immediates.
 
 For details, see [`canon_error_context_debug_message`] in the Canonical ABI
 explainer.
@@ -2284,7 +2293,7 @@ explainer.
 | Synopsis                         |                               |
 | -------------------------------- | ----------------------------- |
 | Approximate WIT signature        | `func(errctx: error-context)` |
-| Canonical ABI signature          | `[errctxi:i32] -> []`         |
+| Canonical ABI signature          | `[errctxi:$idx] -> []`        |
 
 The `error-context.drop` built-in drops the given `error-context` value from
 the component instance.
diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index 32db6db0..abee7ef2 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -234,10 +234,30 @@ def __init__(self, opts, inst, borrow_scope = None):
 class LiftOptions:
   string_encoding: str = 'utf8'
   memory: Optional[bytearray] = None
+  addr_type: str = 'i32'
+  tbl_idx_type: str = 'i32'
 
   def equal(lhs, rhs):
     return lhs.string_encoding == rhs.string_encoding and \
-           lhs.memory is rhs.memory
+           lhs.memory is rhs.memory and \
+           lhs.addr_type == rhs.addr_type and \
+           lhs.tbl_idx_type == rhs.tbl_idx_type
+
+def ptr_size(opts):
+  match opts.addr_type:
+    case 'i32': return 4
+    case 'i64': return 8
+
+def ptr_type(opts):
+  return opts.addr_type
+
+def idx_size(opts):
+  match opts.tbl_idx_type:
+    case 'i32': return 4
+    case 'i64': return 8
+
+def idx_type(opts):
+  return opts.tbl_idx_type
 
 @dataclass
 class LiftLowerOptions(LiftOptions):
@@ -775,8 +795,8 @@ class BufferGuestImpl(Buffer):
   def __init__(self, t, cx, ptr, length):
     trap_if(length > Buffer.MAX_LENGTH)
     if t and length > 0:
-      trap_if(ptr != align_to(ptr, alignment(t)))
-      trap_if(ptr + length * elem_size(t) > len(cx.opts.memory))
+      trap_if(ptr != align_to(ptr, alignment(t, cx.opts)))
+      trap_if(ptr + length * elem_size(t, cx.opts) > len(cx.opts.memory))
     self.cx = cx
     self.t = t
     self.ptr = ptr
@@ -794,7 +814,7 @@ def read(self, n):
     assert(n <= self.remain())
     if self.t:
       vs = load_list_from_valid_range(self.cx, self.ptr, n, self.t)
-      self.ptr += n * elem_size(self.t)
+      self.ptr += n * elem_size(self.t, self.cx.opts)
     else:
       vs = n * [()]
     self.progress += n
@@ -805,7 +825,7 @@ def write(self, vs):
     assert(len(vs) <= self.remain())
     if self.t:
       store_list_into_valid_range(self.cx, vs, self.ptr, self.t)
-      self.ptr += len(vs) * elem_size(self.t)
+      self.ptr += len(vs) * elem_size(self.t, self.cx.opts)
     else:
       assert(all(v == () for v in vs))
     self.progress += len(vs)
@@ -1062,7 +1082,7 @@ def contains(t, p):
 
 ### Alignment
 
-def alignment(t):
+def alignment(t, opts):
   match despecialize(t):
     case BoolType()                  : return 1
     case S8Type() | U8Type()         : return 1
@@ -1072,28 +1092,28 @@ def alignment(t):
     case F32Type()                   : return 4
     case F64Type()                   : return 8
     case CharType()                  : return 4
-    case StringType()                : return 4
-    case ErrorContextType()          : return 4
-    case ListType(t, l)              : return alignment_list(t, l)
-    case RecordType(fields)          : return alignment_record(fields)
-    case VariantType(cases)          : return alignment_variant(cases)
+    case StringType()                : return ptr_size(opts)
+    case ErrorContextType()          : return idx_size(opts)
+    case ListType(t, l)              : return alignment_list(t, l, opts)
+    case RecordType(fields)          : return alignment_record(fields, opts)
+    case VariantType(cases)          : return alignment_variant(cases, opts)
     case FlagsType(labels)           : return alignment_flags(labels)
-    case OwnType() | BorrowType()    : return 4
-    case StreamType() | FutureType() : return 4
+    case OwnType() | BorrowType()    : return idx_size(opts)
+    case StreamType() | FutureType() : return idx_size(opts)
 
-def alignment_list(elem_type, maybe_length):
+def alignment_list(elem_type, maybe_length, opts):
   if maybe_length is not None:
-    return alignment(elem_type)
-  return 4
+    return alignment(elem_type, opts)
+  return ptr_size(opts)
 
-def alignment_record(fields):
+def alignment_record(fields, opts):
   a = 1
   for f in fields:
-    a = max(a, alignment(f.t))
+    a = max(a, alignment(f.t, opts))
   return a
 
-def alignment_variant(cases):
-  return max(alignment(discriminant_type(cases)), max_case_alignment(cases))
+def alignment_variant(cases, opts):
+  return max(alignment(discriminant_type(cases), opts), max_case_alignment(cases, opts))
 
 def discriminant_type(cases):
   n = len(cases)
@@ -1104,11 +1124,11 @@ def discriminant_type(cases):
     case 2: return U16Type()
     case 3: return U32Type()
 
-def max_case_alignment(cases):
+def max_case_alignment(cases, opts):
   a = 1
   for c in cases:
     if c.t is not None:
-      a = max(a, alignment(c.t))
+      a = max(a, alignment(c.t, opts))
   return a
 
 def alignment_flags(labels):
@@ -1120,7 +1140,7 @@ def alignment_flags(labels):
 
 ### Element Size
 
-def elem_size(t):
+def elem_size(t, opts):
   match despecialize(t):
     case BoolType()                  : return 1
     case S8Type() | U8Type()         : return 1
@@ -1130,40 +1150,40 @@ def elem_size(t):
     case F32Type()                   : return 4
     case F64Type()                   : return 8
     case CharType()                  : return 4
-    case StringType()                : return 8
-    case ErrorContextType()          : return 4
-    case ListType(t, l)              : return elem_size_list(t, l)
-    case RecordType(fields)          : return elem_size_record(fields)
-    case VariantType(cases)          : return elem_size_variant(cases)
+    case StringType()                : return 2 * ptr_size(opts)
+    case ErrorContextType()          : return idx_size(opts)
+    case ListType(t, l)              : return elem_size_list(t, l, opts)
+    case RecordType(fields)          : return elem_size_record(fields, opts)
+    case VariantType(cases)          : return elem_size_variant(cases, opts)
     case FlagsType(labels)           : return elem_size_flags(labels)
-    case OwnType() | BorrowType()    : return 4
-    case StreamType() | FutureType() : return 4
+    case OwnType() | BorrowType()    : return idx_size(opts)
+    case StreamType() | FutureType() : return idx_size(opts)
 
-def elem_size_list(elem_type, maybe_length):
+def elem_size_list(elem_type, maybe_length, opts):
   if maybe_length is not None:
-    return maybe_length * elem_size(elem_type)
-  return 8
+    return maybe_length * elem_size(elem_type, opts)
+  return 2 * ptr_size(opts)
 
-def elem_size_record(fields):
+def elem_size_record(fields, opts):
   s = 0
   for f in fields:
-    s = align_to(s, alignment(f.t))
-    s += elem_size(f.t)
+    s = align_to(s, alignment(f.t, opts))
+    s += elem_size(f.t, opts)
   assert(s > 0)
-  return align_to(s, alignment_record(fields))
+  return align_to(s, alignment_record(fields, opts))
 
 def align_to(ptr, alignment):
   return math.ceil(ptr / alignment) * alignment
 
-def elem_size_variant(cases):
-  s = elem_size(discriminant_type(cases))
-  s = align_to(s, max_case_alignment(cases))
+def elem_size_variant(cases, opts):
+  s = elem_size(discriminant_type(cases), opts)
+  s = align_to(s, max_case_alignment(cases, opts))
   cs = 0
   for c in cases:
     if c.t is not None:
-      cs = max(cs, elem_size(c.t))
+      cs = max(cs, elem_size(c.t, opts))
   s += cs
-  return align_to(s, alignment_variant(cases))
+  return align_to(s, alignment_variant(cases, opts))
 
 def elem_size_flags(labels):
   n = len(labels)
@@ -1175,8 +1195,8 @@ def elem_size_flags(labels):
 ### Loading
 
 def load(cx, ptr, t):
-  assert(ptr == align_to(ptr, alignment(t)))
-  assert(ptr + elem_size(t) <= len(cx.opts.memory))
+  assert(ptr == align_to(ptr, alignment(t, cx.opts)))
+  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory))
   match despecialize(t):
     case BoolType()         : return convert_int_to_bool(load_int(cx, ptr, 1))
     case U8Type()           : return load_int(cx, ptr, 1)
@@ -1191,15 +1211,15 @@ def load(cx, ptr, t):
     case F64Type()          : return decode_i64_as_float(load_int(cx, ptr, 8))
     case CharType()         : return convert_i32_to_char(cx, load_int(cx, ptr, 4))
     case StringType()       : return load_string(cx, ptr)
-    case ErrorContextType() : return lift_error_context(cx, load_int(cx, ptr, 4))
+    case ErrorContextType() : return lift_error_context(cx, load_int(cx, ptr, idx_size(cx.opts)))
     case ListType(t, l)     : return load_list(cx, ptr, t, l)
     case RecordType(fields) : return load_record(cx, ptr, fields)
     case VariantType(cases) : return load_variant(cx, ptr, cases)
     case FlagsType(labels)  : return load_flags(cx, ptr, labels)
-    case OwnType()          : return lift_own(cx, load_int(cx, ptr, 4), t)
-    case BorrowType()       : return lift_borrow(cx, load_int(cx, ptr, 4), t)
-    case StreamType(t)      : return lift_stream(cx, load_int(cx, ptr, 4), t)
-    case FutureType(t)      : return lift_future(cx, load_int(cx, ptr, 4), t)
+    case OwnType()          : return lift_own(cx, load_int(cx, ptr, idx_size(cx.opts)), t)
+    case BorrowType()       : return lift_borrow(cx, load_int(cx, ptr, idx_size(cx.opts)), t)
+    case StreamType(t)      : return lift_stream(cx, load_int(cx, ptr, idx_size(cx.opts)), t)
+    case FutureType(t)      : return lift_future(cx, load_int(cx, ptr, idx_size(cx.opts)), t)
 
 def load_int(cx, ptr, nbytes, signed = False):
   return int.from_bytes(cx.opts.memory[ptr : ptr+nbytes], 'little', signed = signed)
@@ -1245,11 +1265,12 @@ def convert_i32_to_char(cx, i):
 String = tuple[str, str, int]
 
 def load_string(cx, ptr) -> String:
-  begin = load_int(cx, ptr, 4)
-  tagged_code_units = load_int(cx, ptr + 4, 4)
+  begin = load_int(cx, ptr, ptr_size(cx.opts))
+  tagged_code_units = load_int(cx, ptr + ptr_size(cx.opts), ptr_size(cx.opts))
   return load_string_from_range(cx, begin, tagged_code_units)
 
-UTF16_TAG = 1 << 31
+def utf16_tag(opts):
+  return 1 << (ptr_size(opts) * 8 - 1)
 
 def load_string_from_range(cx, ptr, tagged_code_units) -> String:
   match cx.opts.string_encoding:
@@ -1263,8 +1284,8 @@ def load_string_from_range(cx, ptr, tagged_code_units) -> String:
       encoding = 'utf-16-le'
     case 'latin1+utf16':
       alignment = 2
-      if bool(tagged_code_units & UTF16_TAG):
-        byte_length = 2 * (tagged_code_units ^ UTF16_TAG)
+      if bool(tagged_code_units & utf16_tag(cx.opts)):
+        byte_length = 2 * (tagged_code_units ^ utf16_tag(cx.opts))
         encoding = 'utf-16-le'
       else:
         byte_length = tagged_code_units
@@ -1287,36 +1308,36 @@ def lift_error_context(cx, i):
 def load_list(cx, ptr, elem_type, maybe_length):
   if maybe_length is not None:
     return load_list_from_valid_range(cx, ptr, maybe_length, elem_type)
-  begin = load_int(cx, ptr, 4)
-  length = load_int(cx, ptr + 4, 4)
+  begin = load_int(cx, ptr, ptr_size(cx.opts))
+  length = load_int(cx, ptr + ptr_size(cx.opts), ptr_size(cx.opts))
   return load_list_from_range(cx, begin, length, elem_type)
 
 def load_list_from_range(cx, ptr, length, elem_type):
-  trap_if(ptr != align_to(ptr, alignment(elem_type)))
-  trap_if(ptr + length * elem_size(elem_type) > len(cx.opts.memory))
+  trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts)))
+  trap_if(ptr + length * elem_size(elem_type, cx.opts) > len(cx.opts.memory))
   return load_list_from_valid_range(cx, ptr, length, elem_type)
 
 def load_list_from_valid_range(cx, ptr, length, elem_type):
   a = []
   for i in range(length):
-    a.append(load(cx, ptr + i * elem_size(elem_type), elem_type))
+    a.append(load(cx, ptr + i * elem_size(elem_type, cx.opts), elem_type))
   return a
 
 def load_record(cx, ptr, fields):
   record = {}
   for field in fields:
-    ptr = align_to(ptr, alignment(field.t))
+    ptr = align_to(ptr, alignment(field.t, cx.opts))
     record[field.label] = load(cx, ptr, field.t)
-    ptr += elem_size(field.t)
+    ptr += elem_size(field.t, cx.opts)
   return record
 
 def load_variant(cx, ptr, cases):
-  disc_size = elem_size(discriminant_type(cases))
+  disc_size = elem_size(discriminant_type(cases), cx.opts)
   case_index = load_int(cx, ptr, disc_size)
   ptr += disc_size
   trap_if(case_index >= len(cases))
   c = cases[case_index]
-  ptr = align_to(ptr, max_case_alignment(cases))
+  ptr = align_to(ptr, max_case_alignment(cases, cx.opts))
   if c.t is None:
     return { c.label: None }
   return { c.label: load(cx, ptr, c.t) }
@@ -1365,8 +1386,8 @@ def lift_async_value(ReadableEndT, cx, i, t):
 ### Storing
 
 def store(cx, v, t, ptr):
-  assert(ptr == align_to(ptr, alignment(t)))
-  assert(ptr + elem_size(t) <= len(cx.opts.memory))
+  assert(ptr == align_to(ptr, alignment(t, cx.opts)))
+  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory))
   match despecialize(t):
     case BoolType()         : store_int(cx, int(bool(v)), ptr, 1)
     case U8Type()           : store_int(cx, v, ptr, 1)
@@ -1381,15 +1402,15 @@ def store(cx, v, t, ptr):
     case F64Type()          : store_int(cx, encode_float_as_i64(v), ptr, 8)
     case CharType()         : store_int(cx, char_to_i32(v), ptr, 4)
     case StringType()       : store_string(cx, v, ptr)
-    case ErrorContextType() : store_int(cx, lower_error_context(cx, v), ptr, 4)
+    case ErrorContextType() : store_int(cx, lower_error_context(cx, v), ptr, idx_size(cx.opts))
     case ListType(t, l)     : store_list(cx, v, ptr, t, l)
     case RecordType(fields) : store_record(cx, v, ptr, fields)
     case VariantType(cases) : store_variant(cx, v, ptr, cases)
     case FlagsType(labels)  : store_flags(cx, v, ptr, labels)
-    case OwnType()          : store_int(cx, lower_own(cx, v, t), ptr, 4)
-    case BorrowType()       : store_int(cx, lower_borrow(cx, v, t), ptr, 4)
-    case StreamType(t)      : store_int(cx, lower_stream(cx, v, t), ptr, 4)
-    case FutureType(t)      : store_int(cx, lower_future(cx, v, t), ptr, 4)
+    case OwnType()          : store_int(cx, lower_own(cx, v, t), ptr, idx_size(cx.opts))
+    case BorrowType()       : store_int(cx, lower_borrow(cx, v, t), ptr, idx_size(cx.opts))
+    case StreamType(t)      : store_int(cx, lower_stream(cx, v, t), ptr, idx_size(cx.opts))
+    case FutureType(t)      : store_int(cx, lower_future(cx, v, t), ptr, idx_size(cx.opts))
 
 def store_int(cx, v, ptr, nbytes, signed = False):
   cx.opts.memory[ptr : ptr+nbytes] = int.to_bytes(v, nbytes, 'little', signed = signed)
@@ -1438,16 +1459,16 @@ def char_to_i32(c):
 
 def store_string(cx, v: String, ptr):
   begin, tagged_code_units = store_string_into_range(cx, v)
-  store_int(cx, begin, ptr, 4)
-  store_int(cx, tagged_code_units, ptr + 4, 4)
+  store_int(cx, begin, ptr, ptr_size(cx.opts))
+  store_int(cx, tagged_code_units, ptr + ptr_size(cx.opts), ptr_size(cx.opts))
 
 def store_string_into_range(cx, v: String):
   src, src_encoding, src_tagged_code_units = v
 
   if src_encoding == 'latin1+utf16':
-    if bool(src_tagged_code_units & UTF16_TAG):
+    if bool(src_tagged_code_units & utf16_tag(cx.opts)):
       src_simple_encoding = 'utf16'
-      src_code_units = src_tagged_code_units ^ UTF16_TAG
+      src_code_units = src_tagged_code_units ^ utf16_tag(cx.opts)
     else:
       src_simple_encoding = 'latin1'
       src_code_units = src_tagged_code_units
@@ -1475,11 +1496,12 @@ def store_string_into_range(cx, v: String):
             case 'latin1'   : return store_string_copy(cx, src, src_code_units, 1, 2, 'latin-1')
             case 'utf16'    : return store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units)
 
-MAX_STRING_BYTE_LENGTH = (1 << 31) - 1
+def max_string_byte_length(opts):
+  return (1 << (ptr_size(opts) * 8 - 1)) - 1
 
 def store_string_copy(cx, src, src_code_units, dst_code_unit_size, dst_alignment, dst_encoding):
   dst_byte_length = dst_code_unit_size * src_code_units
-  trap_if(dst_byte_length > MAX_STRING_BYTE_LENGTH)
+  trap_if(dst_byte_length > max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, dst_alignment, dst_byte_length)
   trap_if(ptr != align_to(ptr, dst_alignment))
   trap_if(ptr + dst_byte_length > len(cx.opts.memory))
@@ -1497,14 +1519,14 @@ def store_latin1_to_utf8(cx, src, src_code_units):
   return store_string_to_utf8(cx, src, src_code_units, worst_case_size)
 
 def store_string_to_utf8(cx, src, src_code_units, worst_case_size):
-  assert(src_code_units <= MAX_STRING_BYTE_LENGTH)
+  assert(src_code_units <= max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 1, src_code_units)
   trap_if(ptr + src_code_units > len(cx.opts.memory))
   for i,code_point in enumerate(src):
     if ord(code_point) < 2**7:
       cx.opts.memory[ptr + i] = ord(code_point)
     else:
-      trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
+      trap_if(worst_case_size > max_string_byte_length(cx.opts))
       ptr = cx.opts.realloc(ptr, src_code_units, 1, worst_case_size)
       trap_if(ptr + worst_case_size > len(cx.opts.memory))
       encoded = src.encode('utf-8')
@@ -1517,7 +1539,7 @@ def store_string_to_utf8(cx, src, src_code_units, worst_case_size):
 
 def store_utf8_to_utf16(cx, src, src_code_units):
   worst_case_size = 2 * src_code_units
-  trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
+  trap_if(worst_case_size > max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 2, worst_case_size)
   trap_if(ptr != align_to(ptr, 2))
   trap_if(ptr + worst_case_size > len(cx.opts.memory))
@@ -1531,7 +1553,7 @@ def store_utf8_to_utf16(cx, src, src_code_units):
   return (ptr, code_units)
 
 def store_string_to_latin1_or_utf16(cx, src, src_code_units):
-  assert(src_code_units <= MAX_STRING_BYTE_LENGTH)
+  assert(src_code_units <= max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 2, src_code_units)
   trap_if(ptr != align_to(ptr, 2))
   trap_if(ptr + src_code_units > len(cx.opts.memory))
@@ -1542,7 +1564,7 @@ def store_string_to_latin1_or_utf16(cx, src, src_code_units):
       dst_byte_length += 1
     else:
       worst_case_size = 2 * src_code_units
-      trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
+      trap_if(worst_case_size > max_string_byte_length(cx.opts))
       ptr = cx.opts.realloc(ptr, src_code_units, 2, worst_case_size)
       trap_if(ptr != align_to(ptr, 2))
       trap_if(ptr + worst_case_size > len(cx.opts.memory))
@@ -1555,7 +1577,7 @@ def store_string_to_latin1_or_utf16(cx, src, src_code_units):
         ptr = cx.opts.realloc(ptr, worst_case_size, 2, len(encoded))
         trap_if(ptr != align_to(ptr, 2))
         trap_if(ptr + len(encoded) > len(cx.opts.memory))
-      tagged_code_units = int(len(encoded) / 2) | UTF16_TAG
+      tagged_code_units = int(len(encoded) / 2) | utf16_tag(cx.opts)
       return (ptr, tagged_code_units)
   if dst_byte_length < src_code_units:
     ptr = cx.opts.realloc(ptr, src_code_units, 2, dst_byte_length)
@@ -1565,14 +1587,14 @@ def store_string_to_latin1_or_utf16(cx, src, src_code_units):
 
 def store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units):
   src_byte_length = 2 * src_code_units
-  trap_if(src_byte_length > MAX_STRING_BYTE_LENGTH)
+  trap_if(src_byte_length > max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 2, src_byte_length)
   trap_if(ptr != align_to(ptr, 2))
   trap_if(ptr + src_byte_length > len(cx.opts.memory))
   encoded = src.encode('utf-16-le')
   cx.opts.memory[ptr : ptr+len(encoded)] = encoded
   if any(ord(c) >= (1 << 8) for c in src):
-    tagged_code_units = int(len(encoded) / 2) | UTF16_TAG
+    tagged_code_units = int(len(encoded) / 2) | utf16_tag(cx.opts)
     return (ptr, tagged_code_units)
   latin1_size = int(len(encoded) / 2)
   for i in range(latin1_size):
@@ -1590,34 +1612,34 @@ def store_list(cx, v, ptr, elem_type, maybe_length):
     store_list_into_valid_range(cx, v, ptr, elem_type)
     return
   begin, length = store_list_into_range(cx, v, elem_type)
-  store_int(cx, begin, ptr, 4)
-  store_int(cx, length, ptr + 4, 4)
+  store_int(cx, begin, ptr, ptr_size(cx.opts))
+  store_int(cx, length, ptr + ptr_size(cx.opts), ptr_size(cx.opts))
 
 def store_list_into_range(cx, v, elem_type):
-  byte_length = len(v) * elem_size(elem_type)
-  trap_if(byte_length >= (1 << 32))
-  ptr = cx.opts.realloc(0, 0, alignment(elem_type), byte_length)
-  trap_if(ptr != align_to(ptr, alignment(elem_type)))
+  byte_length = len(v) * elem_size(elem_type, cx.opts)
+  trap_if(byte_length >= (1 << (ptr_size(cx.opts) * 8)))
+  ptr = cx.opts.realloc(0, 0, alignment(elem_type, cx.opts), byte_length)
+  trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts)))
   trap_if(ptr + byte_length > len(cx.opts.memory))
   store_list_into_valid_range(cx, v, ptr, elem_type)
   return (ptr, len(v))
 
 def store_list_into_valid_range(cx, v, ptr, elem_type):
   for i,e in enumerate(v):
-    store(cx, e, elem_type, ptr + i * elem_size(elem_type))
+    store(cx, e, elem_type, ptr + i * elem_size(elem_type, cx.opts))
 
 def store_record(cx, v, ptr, fields):
   for f in fields:
-    ptr = align_to(ptr, alignment(f.t))
+    ptr = align_to(ptr, alignment(f.t, cx.opts))
     store(cx, v[f.label], f.t, ptr)
-    ptr += elem_size(f.t)
+    ptr += elem_size(f.t, cx.opts)
 
 def store_variant(cx, v, ptr, cases):
   case_index, case_value = match_case(v, cases)
-  disc_size = elem_size(discriminant_type(cases))
+  disc_size = elem_size(discriminant_type(cases), cx.opts)
   store_int(cx, case_index, ptr, disc_size)
   ptr += disc_size
-  ptr = align_to(ptr, max_case_alignment(cases))
+  ptr = align_to(ptr, max_case_alignment(cases, cx.opts))
   c = cases[case_index]
   if c.t is not None:
     store(cx, case_value, c.t, ptr)
@@ -1669,40 +1691,40 @@ def lower_future(cx, v, t):
 MAX_FLAT_RESULTS = 1
 
 def flatten_functype(opts, ft, context):
-  flat_params = flatten_types(ft.param_types())
-  flat_results = flatten_types(ft.result_type())
+  flat_params = flatten_types(ft.param_types(), opts)
+  flat_results = flatten_types(ft.result_type(), opts)
   if not opts.async_:
     if len(flat_params) > MAX_FLAT_PARAMS:
-      flat_params = ['i32']
+      flat_params = [ptr_type(opts)]
     if len(flat_results) > MAX_FLAT_RESULTS:
       match context:
         case 'lift':
-          flat_results = ['i32']
+          flat_results = [ptr_type(opts)]
         case 'lower':
-          flat_params += ['i32']
+          flat_params += [ptr_type(opts)]
           flat_results = []
     return CoreFuncType(flat_params, flat_results)
   else:
     match context:
       case 'lift':
         if len(flat_params) > MAX_FLAT_PARAMS:
-          flat_params = ['i32']
+          flat_params = [ptr_type(opts)]
         if opts.callback:
           flat_results = ['i32']
         else:
           flat_results = []
       case 'lower':
         if len(flat_params) > MAX_FLAT_ASYNC_PARAMS:
-          flat_params = ['i32']
+          flat_params = [ptr_type(opts)]
         if len(flat_results) > 0:
-          flat_params += ['i32']
+          flat_params += [ptr_type(opts)]
         flat_results = ['i32']
     return CoreFuncType(flat_params, flat_results)
 
-def flatten_types(ts):
-  return [ft for t in ts for ft in flatten_type(t)]
+def flatten_types(ts, opts):
+  return [ft for t in ts for ft in flatten_type(t, opts)]
 
-def flatten_type(t):
+def flatten_type(t, opts):
   match despecialize(t):
     case BoolType()                       : return ['i32']
     case U8Type() | U16Type() | U32Type() : return ['i32']
@@ -1711,36 +1733,36 @@ def flatten_type(t):
     case F32Type()                        : return ['f32']
     case F64Type()                        : return ['f64']
     case CharType()                       : return ['i32']
-    case StringType()                     : return ['i32', 'i32']
-    case ErrorContextType()               : return ['i32']
-    case ListType(t, l)                   : return flatten_list(t, l)
-    case RecordType(fields)               : return flatten_record(fields)
-    case VariantType(cases)               : return flatten_variant(cases)
+    case StringType()                     : return [ptr_type(opts), ptr_type(opts)]
+    case ErrorContextType()               : return [idx_type(opts)]
+    case ListType(t, l)                   : return flatten_list(t, l, opts)
+    case RecordType(fields)               : return flatten_record(fields, opts)
+    case VariantType(cases)               : return flatten_variant(cases, opts)
     case FlagsType(labels)                : return ['i32']
-    case OwnType() | BorrowType()         : return ['i32']
-    case StreamType() | FutureType()      : return ['i32']
+    case OwnType() | BorrowType()         : return [idx_type(opts)]
+    case StreamType() | FutureType()      : return [idx_type(opts)]
 
-def flatten_list(elem_type, maybe_length):
+def flatten_list(elem_type, maybe_length, opts):
   if maybe_length is not None:
-    return flatten_type(elem_type) * maybe_length
-  return ['i32', 'i32']
+    return flatten_type(elem_type, opts) * maybe_length
+  return [ptr_type(opts), ptr_type(opts)]
 
-def flatten_record(fields):
+def flatten_record(fields, opts):
   flat = []
   for f in fields:
-    flat += flatten_type(f.t)
+    flat += flatten_type(f.t, opts)
   return flat
 
-def flatten_variant(cases):
+def flatten_variant(cases, opts):
   flat = []
   for c in cases:
     if c.t is not None:
-      for i,ft in enumerate(flatten_type(c.t)):
+      for i,ft in enumerate(flatten_type(c.t, opts)):
         if i < len(flat):
           flat[i] = join(flat[i], ft)
         else:
           flat.append(ft)
-  return flatten_type(discriminant_type(cases)) + flat
+  return flatten_type(discriminant_type(cases), opts) + flat
 
 def join(a, b):
   if a == b: return a
@@ -1786,15 +1808,15 @@ def lift_flat(cx, vi, t):
     case F64Type()          : return canonicalize_nan64(vi.next('f64'))
     case CharType()         : return convert_i32_to_char(cx, vi.next('i32'))
     case StringType()       : return lift_flat_string(cx, vi)
-    case ErrorContextType() : return lift_error_context(cx, vi.next('i32'))
+    case ErrorContextType() : return lift_error_context(cx, vi.next(idx_type(cx.opts)))
     case ListType(t, l)     : return lift_flat_list(cx, vi, t, l)
     case RecordType(fields) : return lift_flat_record(cx, vi, fields)
     case VariantType(cases) : return lift_flat_variant(cx, vi, cases)
     case FlagsType(labels)  : return lift_flat_flags(vi, labels)
-    case OwnType()          : return lift_own(cx, vi.next('i32'), t)
-    case BorrowType()       : return lift_borrow(cx, vi.next('i32'), t)
-    case StreamType(t)      : return lift_stream(cx, vi.next('i32'), t)
-    case FutureType(t)      : return lift_future(cx, vi.next('i32'), t)
+    case OwnType()          : return lift_own(cx, vi.next(idx_type(cx.opts)), t)
+    case BorrowType()       : return lift_borrow(cx, vi.next(idx_type(cx.opts)), t)
+    case StreamType(t)      : return lift_stream(cx, vi.next(idx_type(cx.opts)), t)
+    case FutureType(t)      : return lift_future(cx, vi.next(idx_type(cx.opts)), t)
 
 def lift_flat_unsigned(vi, core_width, t_width):
   i = vi.next('i' + str(core_width))
@@ -1810,8 +1832,8 @@ def lift_flat_signed(vi, core_width, t_width):
   return i
 
 def lift_flat_string(cx, vi):
-  ptr = vi.next('i32')
-  packed_length = vi.next('i32')
+  ptr = vi.next(ptr_type(cx.opts))
+  packed_length = vi.next(ptr_type(cx.opts))
   return load_string_from_range(cx, ptr, packed_length)
 
 def lift_flat_list(cx, vi, elem_type, maybe_length):
@@ -1820,8 +1842,8 @@ def lift_flat_list(cx, vi, elem_type, maybe_length):
     for i in range(maybe_length):
       a.append(lift_flat(cx, vi, elem_type))
     return a
-  ptr = vi.next('i32')
-  length = vi.next('i32')
+  ptr = vi.next(ptr_type(cx.opts))
+  length = vi.next(ptr_type(cx.opts))
   return load_list_from_range(cx, ptr, length, elem_type)
 
 def lift_flat_record(cx, vi, fields):
@@ -1831,7 +1853,7 @@ def lift_flat_record(cx, vi, fields):
   return record
 
 def lift_flat_variant(cx, vi, cases):
-  flat_types = flatten_variant(cases)
+  flat_types = flatten_variant(cases, cx.opts)
   assert(flat_types.pop(0) == 'i32')
   case_index = vi.next('i32')
   trap_if(case_index >= len(cases))
@@ -1917,14 +1939,14 @@ def lower_flat_record(cx, v, fields):
 
 def lower_flat_variant(cx, v, cases):
   case_index, case_value = match_case(v, cases)
-  flat_types = flatten_variant(cases)
+  flat_types = flatten_variant(cases, cx.opts)
   assert(flat_types.pop(0) == 'i32')
   c = cases[case_index]
   if c.t is None:
     payload = []
   else:
     payload = lower_flat(cx, case_value, c.t)
-    for i,(fv,have) in enumerate(zip(payload, flatten_type(c.t))):
+    for i,(fv,have) in enumerate(zip(payload, flatten_type(c.t, cx.opts))):
       want = flat_types.pop(0)
       match (have, want):
         case ('f32', 'i32') : payload[i] = encode_float_as_i32(fv)
@@ -1943,30 +1965,30 @@ def lower_flat_flags(v, labels):
 ### Lifting and Lowering Values
 
 def lift_flat_values(cx, max_flat, vi, ts):
-  flat_types = flatten_types(ts)
+  flat_types = flatten_types(ts, cx.opts)
   if len(flat_types) > max_flat:
-    ptr = vi.next('i32')
+    ptr = vi.next(ptr_type(cx.opts))
     tuple_type = TupleType(ts)
-    trap_if(ptr != align_to(ptr, alignment(tuple_type)))
-    trap_if(ptr + elem_size(tuple_type) > len(cx.opts.memory))
+    trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts)))
+    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory))
     return list(load(cx, ptr, tuple_type).values())
   else:
     return [ lift_flat(cx, vi, t) for t in ts ]
 
 def lower_flat_values(cx, max_flat, vs, ts, out_param = None):
   cx.inst.may_leave = False
-  flat_types = flatten_types(ts)
+  flat_types = flatten_types(ts, cx.opts)
   if len(flat_types) > max_flat:
     tuple_type = TupleType(ts)
     tuple_value = {str(i): v for i,v in enumerate(vs)}
     if out_param is None:
-      ptr = cx.opts.realloc(0, 0, alignment(tuple_type), elem_size(tuple_type))
+      ptr = cx.opts.realloc(0, 0, alignment(tuple_type, cx.opts), elem_size(tuple_type, cx.opts))
       flat_vals = [ptr]
     else:
-      ptr = out_param.next('i32')
+      ptr = out_param.next(ptr_type(cx.opts))
       flat_vals = []
-    trap_if(ptr != align_to(ptr, alignment(tuple_type)))
-    trap_if(ptr + elem_size(tuple_type) > len(cx.opts.memory))
+    trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts)))
+    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory))
     store(cx, tuple_value, tuple_type, ptr)
   else:
     flat_vals = []
@@ -2177,14 +2199,14 @@ def canon_resource_rep(rt, thread, i):
 ### 🔀 `canon context.get`
 
 def canon_context_get(t, i, thread):
-  assert(t == 'i32')
+  assert(t == 'i32' or t == 'i64')
   assert(i < Thread.CONTEXT_LENGTH)
   return [thread.context[i]]
 
 ### 🔀 `canon context.set`
 
 def canon_context_set(t, i, thread, v):
-  assert(t == 'i32')
+  assert(t == 'i32' or t == 'i64')
   assert(i < Thread.CONTEXT_LENGTH)
   thread.context[i] = v
   return []
@@ -2240,24 +2262,24 @@ def canon_waitable_set_new(thread):
 
 ### 🔀 `canon waitable-set.wait`
 
-def canon_waitable_set_wait(cancellable, mem, thread, si, ptr):
+def canon_waitable_set_wait(cancellable, mem, opts, thread, si, ptr):
   trap_if(not thread.task.inst.may_leave)
   trap_if(not thread.task.may_block())
   wset = thread.task.inst.handles.get(si)
   trap_if(not isinstance(wset, WaitableSet))
   event = thread.task.wait_until(lambda: True, thread, wset, cancellable)
-  return unpack_event(mem, thread, ptr, event)
+  return unpack_event(mem, opts, thread, ptr, event)
 
-def unpack_event(mem, thread, ptr, e: EventTuple):
+def unpack_event(mem, opts, thread, ptr, e: EventTuple):
   event, p1, p2 = e
-  cx = LiftLowerContext(LiftLowerOptions(memory = mem), thread.task.inst)
+  cx = LiftLowerContext(LiftLowerOptions(memory = mem, addr_type = opts.addr_type, tbl_idx_type = opts.tbl_idx_type), thread.task.inst)
   store(cx, p1, U32Type(), ptr)
   store(cx, p2, U32Type(), ptr + 4)
   return [event]
 
 ### 🔀 `canon waitable-set.poll`
 
-def canon_waitable_set_poll(cancellable, mem, thread, si, ptr):
+def canon_waitable_set_poll(cancellable, mem, opts, thread, si, ptr):
   trap_if(not thread.task.inst.may_leave)
   wset = thread.task.inst.handles.get(si)
   trap_if(not isinstance(wset, WaitableSet))
@@ -2267,7 +2289,7 @@ def canon_waitable_set_poll(cancellable, mem, thread, si, ptr):
     event = (EventCode.NONE, 0, 0)
   else:
     event = wset.get_pending_event()
-  return unpack_event(mem, thread, ptr, event)
+  return unpack_event(mem, opts, thread, ptr, event)
 
 ### 🔀 `canon waitable-set.drop`
 
diff --git a/design/mvp/canonical-abi/run_tests.py b/design/mvp/canonical-abi/run_tests.py
index cd7ee74a..07b0b5c4 100644
--- a/design/mvp/canonical-abi/run_tests.py
+++ b/design/mvp/canonical-abi/run_tests.py
@@ -35,7 +35,7 @@ def realloc(self, original_ptr, original_size, alignment, new_size):
     self.memory[ret : ret + original_size] = self.memory[original_ptr : original_ptr + original_size]
     return ret
 
-def mk_opts(memory = bytearray(), encoding = 'utf8', realloc = None, post_return = None, sync_task_return = False, async_ = False):
+def mk_opts(memory = bytearray(), encoding = 'utf8', realloc = None, post_return = None, sync_task_return = False, async_ = False, addr_type = 'i32', tbl_idx_type = 'i32'):
   opts = CanonicalOptions()
   opts.memory = memory
   opts.string_encoding = encoding
@@ -44,10 +44,12 @@ def mk_opts(memory = bytearray(), encoding = 'utf8', realloc = None, post_return
   opts.sync_task_return = sync_task_return
   opts.async_ = async_
   opts.callback = None
+  opts.addr_type = addr_type
+  opts.tbl_idx_type = tbl_idx_type
   return opts
 
-def mk_cx(memory = bytearray(), encoding = 'utf8', realloc = None, post_return = None):
-  opts = mk_opts(memory, encoding, realloc, post_return)
+def mk_cx(memory = bytearray(), encoding = 'utf8', realloc = None, post_return = None, addr_type = 'i32', tbl_idx_type = 'i32'):
+  opts = mk_opts(memory, encoding, realloc, post_return, addr_type=addr_type, tbl_idx_type=tbl_idx_type)
   inst = ComponentInstance(Store())
   return LiftLowerContext(opts, inst)
 
@@ -132,7 +134,7 @@ def test_name():
   heap = Heap(5*len(cx.opts.memory))
   if dst_encoding is None:
     dst_encoding = cx.opts.string_encoding
-  cx = mk_cx(heap.memory, dst_encoding, heap.realloc)
+  cx = mk_cx(heap.memory, dst_encoding, heap.realloc, addr_type=cx.opts.addr_type, tbl_idx_type=cx.opts.tbl_idx_type)
   lowered_vals = lower_flat(cx, v, lower_t)
 
   vi = CoreValueIter(lowered_vals)
@@ -243,32 +245,32 @@ def test_nan64(inbits, outbits):
 test_nan64(0x7ff0000000000000, 0x7ff0000000000000)
 test_nan64(0x3ff0000000000000, 0x3ff0000000000000)
 
-def test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units):
+def test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units, addr_type='i32'):
   heap = Heap(len(encoded))
   heap.memory[:] = encoded[:]
-  cx = mk_cx(heap.memory, src_encoding)
+  cx = mk_cx(heap.memory, src_encoding, addr_type=addr_type)
   v = (s, src_encoding, tagged_code_units)
   test(StringType(), [0, tagged_code_units], v, cx, dst_encoding)
 
-def test_string(src_encoding, dst_encoding, s):
+def test_string(src_encoding, dst_encoding, s, addr_type='i32'):
   if src_encoding == 'utf8':
     encoded = s.encode('utf-8')
     tagged_code_units = len(encoded)
-    test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units)
+    test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units, addr_type)
   elif src_encoding == 'utf16':
     encoded = s.encode('utf-16-le')
     tagged_code_units = int(len(encoded) / 2)
-    test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units)
+    test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units, addr_type)
   elif src_encoding == 'latin1+utf16':
     try:
       encoded = s.encode('latin-1')
       tagged_code_units = len(encoded)
-      test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units)
+      test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units, addr_type)
     except UnicodeEncodeError:
       pass
     encoded = s.encode('utf-16-le')
-    tagged_code_units = int(len(encoded) / 2) | UTF16_TAG
-    test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units)
+    tagged_code_units = int(len(encoded) / 2) | utf16_tag(LiftLowerOptions(addr_type=addr_type))
+    test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units, addr_type)
 
 encodings = ['utf8', 'utf16', 'latin1+utf16']
 
@@ -276,14 +278,15 @@ def test_string(src_encoding, dst_encoding, s):
                '\u01ffy', 'xy\u01ff', 'a\ud7ffb', 'a\u02ff\u03ff\u04ffbc',
                '\uf123', '\uf123\uf123abc', 'abcdef\uf123']
 
-for src_encoding in encodings:
-  for dst_encoding in encodings:
-    for s in fun_strings:
-      test_string(src_encoding, dst_encoding, s)
+for addr_type in ['i32', 'i64']:
+  for src_encoding in encodings:
+    for dst_encoding in encodings:
+      for s in fun_strings:
+        test_string(src_encoding, dst_encoding, s, addr_type)
 
-def test_heap(t, expect, args, byte_array):
+def test_heap(t, expect, args, byte_array, addr_type='i32', tbl_idx_type='i32'):
   heap = Heap(byte_array)
-  cx = mk_cx(heap.memory)
+  cx = mk_cx(heap.memory, addr_type=addr_type, tbl_idx_type=tbl_idx_type)
   test(t, args, expect, cx)
 
 # Empty record types are not permitted yet.
@@ -309,15 +312,34 @@ def test_heap(t, expect, args, byte_array):
 test_heap(ListType(StringType()), [mk_str("hi"),mk_str("wat")], [0,2],
           [16,0,0,0, 2,0,0,0, 21,0,0,0, 3,0,0,0,
            ord('h'), ord('i'),   0xf,0xf,0xf,   ord('w'), ord('a'), ord('t')])
+test_heap(ListType(StringType()), [mk_str("hi"),mk_str("wat")], [0,2],
+          [32,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0,
+           37,0,0,0,0,0,0,0, 3,0,0,0,0,0,0,0,
+           ord('h'), ord('i'),   0xf,0xf,0xf,   ord('w'), ord('a'), ord('t')],
+          addr_type='i64')
 test_heap(ListType(ListType(U8Type())), [[3,4,5],[],[6,7]], [0,3],
           [24,0,0,0, 3,0,0,0, 0,0,0,0, 0,0,0,0, 27,0,0,0, 2,0,0,0,
           3,4,5,  6,7])
+test_heap(ListType(ListType(U8Type())), [[3,4,5],[],[6,7]], [0,3],
+          [48,0,0,0,0,0,0,0, 3,0,0,0,0,0,0,0,
+           0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
+           51,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0,
+           3,4,5, 6,7],
+          addr_type='i64')
 test_heap(ListType(ListType(U16Type())), [[5,6]], [0,1],
           [8,0,0,0, 2,0,0,0,
           5,0, 6,0])
+test_heap(ListType(ListType(U16Type())), [[5,6]], [0,1],
+          [16,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0,
+          5,0, 6,0],
+          addr_type='i64')
 test_heap(ListType(ListType(U16Type())), None, [0,1],
           [9,0,0,0, 2,0,0,0,
           0, 5,0, 6,0])
+test_heap(ListType(ListType(U16Type())), None, [0,1],
+          [17,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0,
+          0, 5,0, 6,0],
+          addr_type='i64')
 test_heap(ListType(ListType(U8Type(),2)), [[1,2],[3,4]], [0,2],
           [1,2, 3,4])
 test_heap(ListType(ListType(U32Type(),2)), [[1,2],[3,4]], [0,2],
@@ -369,21 +391,22 @@ def test_heap(t, expect, args, byte_array):
 test_heap(t, v, [0,2],
           [0xff,0xff,0xff,0xff, 0,0,0,0])
 
-def test_flatten(t, params, results):
+def test_flatten(t, params, results, addr_type='i32', tbl_idx_type='i32'):
+  opts = mk_opts(addr_type=addr_type, tbl_idx_type=tbl_idx_type)
   expect = CoreFuncType(params, results)
 
   if len(params) > definitions.MAX_FLAT_PARAMS:
-    expect.params = ['i32']
+    expect.params = [addr_type]
 
   if len(results) > definitions.MAX_FLAT_RESULTS:
-    expect.results = ['i32']
-  got = flatten_functype(CanonicalOptions(), t, 'lift')
+    expect.results = [addr_type]
+  got = flatten_functype(opts, t, 'lift')
   assert(got == expect)
 
   if len(results) > definitions.MAX_FLAT_RESULTS:
-    expect.params += ['i32']
+    expect.params += [addr_type]
     expect.results = []
-  got = flatten_functype(CanonicalOptions(), t, 'lower')
+  got = flatten_functype(opts, t, 'lower')
   assert(got == expect)
 
 test_flatten(FuncType([U8Type(),F32Type(),F64Type()],[]), ['i32','f32','f64'], [])
@@ -393,11 +416,13 @@ def test_flatten(t, params, results):
 test_flatten(FuncType([U8Type(),F32Type(),F64Type()],[TupleType([F32Type(),F32Type()])]), ['i32','f32','f64'], ['f32','f32'])
 test_flatten(FuncType([U8Type(),F32Type(),F64Type()],[F32Type(),F32Type()]), ['i32','f32','f64'], ['f32','f32'])
 test_flatten(FuncType([U8Type() for _ in range(17)],[]), ['i32' for _ in range(17)], [])
+test_flatten(FuncType([U8Type() for _ in range(17)],[]), ['i32' for _ in range(17)], [], addr_type='i64')
 test_flatten(FuncType([U8Type() for _ in range(17)],[TupleType([U8Type(),U8Type()])]), ['i32' for _ in range(17)], ['i32','i32'])
+test_flatten(FuncType([U8Type() for _ in range(17)],[TupleType([U8Type(),U8Type()])]), ['i32' for _ in range(17)], ['i32','i32'], addr_type='i64')
 
 
 def test_roundtrips():
-  def test_roundtrip(t, v):
+  def test_roundtrip(t, v, addr_type='i32', tbl_idx_type='i32'):
     before = definitions.MAX_FLAT_RESULTS
     definitions.MAX_FLAT_RESULTS = 16
 
@@ -408,9 +433,8 @@ def callee(thread, x):
       return x
 
     callee_heap = Heap(1000)
-    callee_opts = mk_opts(callee_heap.memory, 'utf8', callee_heap.realloc)
+    callee_opts = mk_opts(callee_heap.memory, 'utf8', callee_heap.realloc, addr_type=addr_type, tbl_idx_type=tbl_idx_type)
     callee_inst = ComponentInstance(store)
-    lifted_callee = partial(canon_lift, callee_opts, callee_inst, ft, callee)
 
     got = None
     def on_start():
@@ -425,17 +449,22 @@ def on_resolve(result):
 
     definitions.MAX_FLAT_RESULTS = before
 
-  test_roundtrip(S8Type(), -1)
-  test_roundtrip(TupleType([U16Type(),U16Type()]), mk_tup(3,4))
-  test_roundtrip(ListType(StringType()), [mk_str("hello there")])
-  test_roundtrip(ListType(ListType(StringType())), [[mk_str("one"),mk_str("two")],[mk_str("three")]])
-  test_roundtrip(ListType(OptionType(TupleType([StringType(),U16Type()]))), [{'some':mk_tup(mk_str("answer"),42)}])
-  test_roundtrip(VariantType([CaseType('x', TupleType([U32Type(),U32Type(),U32Type(),U32Type(),
-                                                       U32Type(),U32Type(),U32Type(),U32Type(),
-                                                       U32Type(),U32Type(),U32Type(),U32Type(),
-                                                       U32Type(),U32Type(),U32Type(),U32Type(),
-                                                       StringType()]))]),
-                       {'x': mk_tup(1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, mk_str("wat"))})
+  cases = [
+    (S8Type(), -1),
+    (TupleType([U16Type(),U16Type()]), mk_tup(3,4)),
+    (ListType(StringType()), [mk_str("hello there")]),
+    (ListType(ListType(StringType())), [[mk_str("one"),mk_str("two")],[mk_str("three")]]),
+    (ListType(OptionType(TupleType([StringType(),U16Type()]))), [{'some':mk_tup(mk_str("answer"),42)}]),
+    (VariantType([CaseType('x', TupleType([U32Type(),U32Type(),U32Type(),U32Type(),
+                                           U32Type(),U32Type(),U32Type(),U32Type(),
+                                           U32Type(),U32Type(),U32Type(),U32Type(),
+                                           U32Type(),U32Type(),U32Type(),U32Type(),
+                                           StringType()]))]),
+                  {'x': mk_tup(1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, mk_str("wat"))}),
+  ]
+  for addr_type in ['i32', 'i64']:
+    for t, v in cases:
+      test_roundtrip(t, v, addr_type=addr_type)
 
 
 def test_handles():
@@ -449,12 +478,6 @@ def dtor(thread, args):
     dtor_value = args[0]
     return []
 
-  store = Store()
-  rt = ResourceType(ComponentInstance(store), dtor) # usable in imports and exports
-  inst = ComponentInstance(store)
-  rt2 = ResourceType(inst, dtor) # only usable in exports
-  opts = mk_opts()
-
   def host_import(caller, on_start, on_resolve):
     args = on_start()
     assert(len(args) == 2)
@@ -518,34 +541,39 @@ def core_wasm(thread, args):
 
     return [h, h2, h4]
 
-  ft = FuncType([
-    OwnType(rt),
-    OwnType(rt),
-    BorrowType(rt),
-    BorrowType(rt2)
-  ],[
-    OwnType(rt),
-    OwnType(rt),
-    OwnType(rt)
-  ])
+  for tbl_idx_type in ['i32', 'i64']:
+    store = Store()
+    rt = ResourceType(ComponentInstance(store), dtor) # usable in imports and exports
+    inst = ComponentInstance(store)
+    rt2 = ResourceType(inst, dtor) # only usable in exports
+    opts = mk_opts(tbl_idx_type=tbl_idx_type)
+
+    ft = FuncType([
+      OwnType(rt),
+      OwnType(rt),
+      BorrowType(rt),
+      BorrowType(rt2)
+    ],[
+      OwnType(rt),
+      OwnType(rt),
+      OwnType(rt)
+    ])
 
-  def on_start():
-    return [ 42, 43, 44, 13 ]
+    got = None
+    def on_resolve(results):
+      nonlocal got
+      got = results
 
-  got = None
-  def on_resolve(results):
-    nonlocal got
-    got = results
+    run_lift(opts, inst, ft, core_wasm, lambda: [42, 43, 44, 13], on_resolve)
 
-  run_lift(opts, inst, ft, core_wasm, on_start, on_resolve)
+    assert(len(got) == 3)
+    assert(got[0] == 46)
+    assert(got[1] == 43)
+    assert(got[2] == 45)
+    assert(len(inst.handles.array) == 5)
+    assert(all(inst.handles.array[i] is None for i in range(4)))
+    assert(len(inst.handles.free) == 4)
 
-  assert(len(got) == 3)
-  assert(got[0] == 46)
-  assert(got[1] == 43)
-  assert(got[2] == 45)
-  assert(len(inst.handles.array) == 5)
-  assert(all(inst.handles.array[i] is None for i in range(4)))
-  assert(len(inst.handles.free) == 4)
   definitions.MAX_FLAT_RESULTS = before
 
 
@@ -617,21 +645,21 @@ def consumer(thread, args):
     fut1_1.set()
 
     waitretp = consumer_heap.realloc(0, 0, 8, 4)
-    [event] = canon_waitable_set_wait(True, consumer_heap.memory, thread, seti, waitretp)
+    [event] = canon_waitable_set_wait(True, consumer_heap.memory, LiftLowerOptions(), thread, seti, waitretp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_heap.memory[waitretp] == subi1)
     assert(consumer_heap.memory[waitretp+4] == Subtask.State.RETURNED)
     [] = canon_subtask_drop(thread, subi1)
     fut1_2.set()
 
-    [event] = canon_waitable_set_wait(True, consumer_heap.memory, thread, seti, waitretp)
+    [event] = canon_waitable_set_wait(True, consumer_heap.memory, LiftLowerOptions(), thread, seti, waitretp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_heap.memory[waitretp] == subi2)
     assert(consumer_heap.memory[waitretp+4] == Subtask.State.STARTED)
     assert(consumer_heap.memory[retp] == 13)
     fut2.set()
 
-    [event] = canon_waitable_set_wait(True, consumer_heap.memory, thread, seti, waitretp)
+    [event] = canon_waitable_set_wait(True, consumer_heap.memory, LiftLowerOptions(), thread, seti, waitretp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_heap.memory[waitretp] == subi2)
     assert(consumer_heap.memory[waitretp+4] == Subtask.State.RETURNED)
@@ -852,7 +880,7 @@ def core_consumer(thread, args):
     assert(ret == CopyResult.COMPLETED)
 
     retp = 0
-    [event] = canon_waitable_set_wait(True, consumer_mem, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, consumer_mem, LiftLowerOptions(), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp+0] == subi2)
     assert(consumer_mem[retp+4] == Subtask.State.STARTED)
@@ -864,14 +892,14 @@ def core_consumer(thread, args):
       [ret] = canon_thread_yield(True, thread)
       assert(ret == 0)
       retp = 0
-      [ret] = canon_waitable_set_poll(True, consumer_mem, thread, seti, retp)
+      [ret] = canon_waitable_set_poll(True, consumer_mem, LiftLowerOptions(), thread, seti, retp)
       assert(ret == EventCode.NONE)
 
     [ret] = canon_future_write(FutureType(None), consumer_opts, thread, wfut21, 0xdeadbeef)
     assert(ret == CopyResult.COMPLETED)
 
     retp = 0
-    [event] = canon_waitable_set_wait(True, consumer_mem, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, consumer_mem, LiftLowerOptions(), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp+0] == subi1)
     assert(consumer_mem[retp+4] == Subtask.State.RETURNED)
@@ -885,14 +913,14 @@ def core_consumer(thread, args):
       [ret] = canon_thread_yield(True, thread)
       assert(ret == 0)
       retp = 0
-      [ret] = canon_waitable_set_poll(True, consumer_mem, thread, seti, retp)
+      [ret] = canon_waitable_set_poll(True, consumer_mem, LiftLowerOptions(), thread, seti, retp)
       assert(ret == EventCode.NONE)
 
     [ret] = canon_future_write(FutureType(None), consumer_opts, thread, wfut13, 0xdeadbeef)
     assert(ret == CopyResult.COMPLETED)
 
     retp = 0
-    [event] = canon_waitable_set_wait(True, consumer_mem, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, consumer_mem, LiftLowerOptions(), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp+0] == subi2)
     assert(consumer_mem[retp+4] == Subtask.State.RETURNED)
@@ -909,7 +937,7 @@ def core_consumer(thread, args):
     assert(ret == CopyResult.COMPLETED)
 
     retp = 0
-    [event] = canon_waitable_set_wait(True, consumer_mem, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, consumer_mem, LiftLowerOptions(), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp+0] == subi3)
     assert(consumer_mem[retp+4] == Subtask.State.RETURNED)
@@ -967,7 +995,7 @@ def core_caller(thread, args):
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, subi, seti)
     retp3 = 12
-    [event] = canon_waitable_set_wait(True, caller_mem, thread, seti, retp3)
+    [event] = canon_waitable_set_wait(True, caller_mem, LiftLowerOptions(), thread, seti, retp3)
     assert(event == EventCode.SUBTASK)
     assert(caller_mem[retp3+0] == subi)
     assert(caller_mem[retp3+4] == Subtask.State.RETURNED)
@@ -1034,7 +1062,7 @@ def consumer(thread, args):
       [ret] = canon_thread_yield(True, thread)
       assert(ret == 0)
       retp = 8
-      [event] = canon_waitable_set_poll(True, consumer_heap.memory, thread, seti, retp)
+      [event] = canon_waitable_set_poll(True, consumer_heap.memory, LiftLowerOptions(), thread, seti, retp)
       if event == EventCode.NONE:
         continue
       assert(event == EventCode.SUBTASK)
@@ -1120,7 +1148,7 @@ def consumer(thread, args):
     remain = [subi1, subi2]
     while remain:
       retp = 8
-      [event] = canon_waitable_set_wait(True, consumer_heap.memory, thread, seti, retp)
+      [event] = canon_waitable_set_wait(True, consumer_heap.memory, LiftLowerOptions(), thread, seti, retp)
       assert(event == EventCode.SUBTASK)
       assert(consumer_heap.memory[retp+4] == Subtask.State.RETURNED)
       subi = consumer_heap.memory[retp]
@@ -1186,14 +1214,14 @@ def core_func(thread, args):
     fut1.set()
 
     retp = lower_heap.realloc(0,0,8,4)
-    [event] = canon_waitable_set_wait(True, lower_heap.memory, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, lower_heap.memory, LiftLowerOptions(), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(lower_heap.memory[retp] == subi1)
     assert(lower_heap.memory[retp+4] == Subtask.State.RETURNED)
 
     fut2.set()
 
-    [event] = canon_waitable_set_wait(True, lower_heap.memory, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, lower_heap.memory, LiftLowerOptions(), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(lower_heap.memory[retp] == subi2)
     assert(lower_heap.memory[retp+4] == Subtask.State.RETURNED)
@@ -1510,7 +1538,7 @@ def core_func(thread, args):
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rsi1, seti)
     definitions.throw_it = True
-    [event] = canon_waitable_set_wait(True, mem, thread, seti, retp) ##
+    [event] = canon_waitable_set_wait(True, mem, LiftLowerOptions(), thread, seti, retp) ##
     assert(event == EventCode.STREAM_READ)
     assert(mem[retp+0] == rsi1)
     result,n = unpack_result(mem[retp+4])
@@ -1526,7 +1554,7 @@ def core_func(thread, args):
     assert(ret == definitions.BLOCKED)
     host_import_incoming.set_remain(100)
     [] = canon_waitable_join(thread, wsi3, seti)
-    [event] = canon_waitable_set_wait(True, mem, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, mem, LiftLowerOptions(), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem[retp+0] == wsi3)
     result,n = unpack_result(mem[retp+4])
@@ -1538,7 +1566,7 @@ def core_func(thread, args):
     assert(ret == definitions.BLOCKED)
     dst_stream.set_remain(100)
     [] = canon_waitable_join(thread, wsi2, seti)
-    [event] = canon_waitable_set_wait(True, mem, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, mem, LiftLowerOptions(), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem[retp+0] == wsi2)
     result,n = unpack_result(mem[retp+4])
@@ -1557,7 +1585,7 @@ def core_func(thread, args):
     [ret] = canon_stream_read(StreamType(U8Type()), opts, thread, rsi4, 0, 4)
     assert(ret == definitions.BLOCKED)
     [] = canon_waitable_join(thread, rsi4, seti)
-    [event] = canon_waitable_set_wait(True, mem, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, mem, LiftLowerOptions(), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem[retp+0] == rsi4)
     result,n = unpack_result(mem[retp+4])
@@ -1681,7 +1709,7 @@ def core_func(thread, args):
 
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rsi, seti)
-    [event] = canon_waitable_set_wait(True, mem, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, mem, LiftLowerOptions(), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem[retp+0] == rsi)
     result,n = unpack_result(mem[retp+4])
@@ -1702,7 +1730,7 @@ def core_func(thread, args):
     assert(ret == definitions.BLOCKED)
     dst.set_remain(4)
     [] = canon_waitable_join(thread, wsi, seti)
-    [event] = canon_waitable_set_wait(True, mem, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, mem, LiftLowerOptions(), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem[retp+0] == wsi)
     result,n = unpack_result(mem[retp+4])
@@ -1763,7 +1791,7 @@ def core_func1(thread, args):
     retp = 16
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, wsi, seti)
-    [event] = canon_waitable_set_wait(True, mem1, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, mem1, LiftLowerOptions(), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem1[retp+0] == wsi)
     result,n = unpack_result(mem1[retp+4])
@@ -1774,7 +1802,7 @@ def core_func1(thread, args):
 
     fut4.set()
 
-    [event] = canon_waitable_set_wait(True, mem1, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, mem1, LiftLowerOptions(), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem1[retp+0] == wsi)
     assert(mem1[retp+4] == 0)
@@ -1812,7 +1840,7 @@ def core_func2(thread, args):
 
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rsi, seti)
-    [event] = canon_waitable_set_wait(True, mem2, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, mem2, LiftLowerOptions(), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem2[retp+0] == rsi)
     result,n = unpack_result(mem2[retp+4])
@@ -1840,7 +1868,7 @@ def core_func2(thread, args):
     [ret] = canon_stream_read(StreamType(U8Type()), opts2, thread, rsi, 12345, 0)
     assert(ret == definitions.BLOCKED)
 
-    [event] = canon_waitable_set_wait(True, mem2, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, mem2, LiftLowerOptions(), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem2[retp+0] == rsi)
     p2 = int.from_bytes(mem2[retp+4 : retp+8], 'little', signed=False)
@@ -1886,7 +1914,7 @@ def core_func1(thread, args):
     retp = 16
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, wsi, seti)
-    [event] = canon_waitable_set_wait(True, mem1, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, mem1, LiftLowerOptions(), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem1[retp+0] == wsi)
     result,n = unpack_result(mem1[retp+4])
@@ -1923,7 +1951,7 @@ def core_func2(thread, args):
 
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rsi, seti)
-    [event] = canon_waitable_set_wait(True, mem2, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, mem2, LiftLowerOptions(), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem2[retp+0] == rsi)
     result,n = unpack_result(mem2[retp+4])
@@ -2040,7 +2068,7 @@ def core_func(thread, args):
     host_source.unblock_cancel()
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rsi, seti)
-    [event] = canon_waitable_set_wait(True, mem, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, mem, LiftLowerOptions(), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem[retp+0] == rsi)
     result,n = unpack_result(mem[retp+4])
@@ -2145,7 +2173,7 @@ def core_func(thread, args):
 
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rfi, seti)
-    [event] = canon_waitable_set_wait(True, mem, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, mem, LiftLowerOptions(), thread, seti, retp)
     assert(event == EventCode.FUTURE_READ)
     assert(mem[retp+0] == rfi)
     assert(mem[retp+4] == CopyResult.COMPLETED)
@@ -2211,7 +2239,7 @@ def core_callee1(thread, args):
   def core_callee2(thread, args):
     [x] = args
     [si] = canon_waitable_set_new(thread)
-    [ret] = canon_waitable_set_wait(True, callee_heap.memory, thread, si, 0)
+    [ret] = canon_waitable_set_wait(True, callee_heap.memory, LiftLowerOptions(), thread, si, 0)
     assert(ret == EventCode.TASK_CANCELLED)
     match x:
       case 1:
@@ -2258,9 +2286,9 @@ def core_callee4(thread, args):
     except Trap:
       pass
     [seti] = canon_waitable_set_new(thread)
-    [result] = canon_waitable_set_wait(True, callee_heap.memory, thread, seti, 0)
+    [result] = canon_waitable_set_wait(True, callee_heap.memory, LiftLowerOptions(), thread, seti, 0)
     assert(result == EventCode.TASK_CANCELLED)
-    [result] = canon_waitable_set_poll(True, callee_heap.memory, thread, seti, 0)
+    [result] = canon_waitable_set_poll(True, callee_heap.memory, LiftLowerOptions(), thread, seti, 0)
     assert(result == EventCode.NONE)
     [] = canon_task_cancel(thread)
     return []
@@ -2395,7 +2423,7 @@ def core_caller(thread, args):
     assert(caller_heap.memory[0] == 13)
     [] = canon_waitable_join(thread, subi3, seti)
     retp = 8
-    [ret] = canon_waitable_set_wait(True, caller_heap.memory, thread, seti, retp)
+    [ret] = canon_waitable_set_wait(True, caller_heap.memory, LiftLowerOptions(), thread, seti, retp)
     assert(ret == EventCode.SUBTASK)
     assert(caller_heap.memory[retp+0] == subi3)
     assert(caller_heap.memory[retp+4] == Subtask.State.RETURNED)
@@ -2414,7 +2442,7 @@ def core_caller(thread, args):
     assert(caller_heap.memory[0] == 13)
     [] = canon_waitable_join(thread, subi4, seti)
     retp = 8
-    [ret] = canon_waitable_set_wait(True, caller_heap.memory, thread, seti, retp)
+    [ret] = canon_waitable_set_wait(True, caller_heap.memory, LiftLowerOptions(), thread, seti, retp)
     assert(ret == EventCode.SUBTASK)
     assert(caller_heap.memory[retp+0] == subi4)
     assert(caller_heap.memory[retp+4] == Subtask.State.CANCELLED_BEFORE_RETURNED)
@@ -2456,7 +2484,7 @@ def core_caller(thread, args):
     host_fut4.set()
     [] = canon_waitable_join(thread, subi, seti)
     waitretp = 4
-    [event] = canon_waitable_set_wait(True, caller_heap.memory, thread, seti, waitretp)
+    [event] = canon_waitable_set_wait(True, caller_heap.memory, LiftLowerOptions(), thread, seti, waitretp)
     assert(event == EventCode.SUBTASK)
     assert(caller_heap.memory[waitretp] == subi)
     assert(caller_heap.memory[waitretp+4] == Subtask.State.CANCELLED_BEFORE_RETURNED)
@@ -2472,7 +2500,7 @@ def core_caller(thread, args):
     host_fut5.set()
     [] = canon_waitable_join(thread, subi, seti)
     waitretp = 4
-    [event] = canon_waitable_set_wait(True, caller_heap.memory, thread, seti, waitretp)
+    [event] = canon_waitable_set_wait(True, caller_heap.memory, LiftLowerOptions(), thread, seti, waitretp)
     assert(event == EventCode.SUBTASK)
     assert(caller_heap.memory[waitretp] == subi)
     assert(caller_heap.memory[waitretp+4] == Subtask.State.RETURNED)
@@ -2487,7 +2515,7 @@ def core_caller(thread, args):
     assert(ret == definitions.BLOCKED)
 
     [] = canon_waitable_join(thread, subi, seti)
-    [event] = canon_waitable_set_wait(True, caller_heap.memory, thread, seti, 4)
+    [event] = canon_waitable_set_wait(True, caller_heap.memory, LiftLowerOptions(), thread, seti, 4)
     assert(event == EventCode.SUBTASK)
     assert(caller_heap.memory[0] == 45)
     assert(caller_heap.memory[4] == subi)
@@ -2534,7 +2562,7 @@ def core_func(thread, args):
     [] = canon_future_drop_readable(FutureType(elemt), thread, rfi)
 
     [] = canon_waitable_join(thread, wfi, seti)
-    [event] = canon_waitable_set_wait(True, mem, thread, seti, 0)
+    [event] = canon_waitable_set_wait(True, mem, LiftLowerOptions(), thread, seti, 0)
     assert(event == EventCode.FUTURE_WRITE)
     assert(mem[0] == wfi)
     assert(mem[4] == CopyResult.COMPLETED)
@@ -2554,7 +2582,7 @@ def core_func(thread, args):
     [] = canon_stream_drop_readable(StreamType(elemt), thread, rsi)
 
     [] = canon_waitable_join(thread, wsi, seti)
-    [event] = canon_waitable_set_wait(True, mem, thread, seti, 0)
+    [event] = canon_waitable_set_wait(True, mem, LiftLowerOptions(), thread, seti, 0)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem[0] == wsi)
     result,n = unpack_result(mem[4])
@@ -2568,6 +2596,7 @@ def core_func(thread, args):
   run_lift(sync_opts, inst, ft, core_func, lambda:[], lambda _:())
 
 
+
 def test_async_flat_params():
   store = Store()
   heap = Heap(1000)
@@ -2745,14 +2774,14 @@ def core_consumer(thread, args):
     retp3 = 16
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, subi1, seti)
-    [event] = canon_waitable_set_wait(True, consumer_mem, thread, seti, retp3)
+    [event] = canon_waitable_set_wait(True, consumer_mem, LiftLowerOptions(), thread, seti, retp3)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp3] == subi1)
     assert(consumer_mem[retp3+4] == Subtask.State.RETURNED)
     assert(consumer_mem[retp1] == 42)
 
     [] = canon_waitable_join(thread, subi2, seti)
-    [event] = canon_waitable_set_wait(True, consumer_mem, thread, seti, retp3)
+    [event] = canon_waitable_set_wait(True, consumer_mem, LiftLowerOptions(), thread, seti, retp3)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp3] == subi2)
     assert(consumer_mem[retp3+4] == Subtask.State.RETURNED)
@@ -2801,6 +2830,47 @@ def mk_task(supertask, inst):
   assert(call_might_be_recursive(p_task, c2))
 
 
+def test_mixed_table_memory_types():
+  store = Store()
+  rt = ResourceType(ComponentInstance(store), None)
+
+  # Verify alignment and elem_size for mixed configurations
+  opts64_addr = LiftLowerOptions(addr_type='i64', tbl_idx_type='i32')
+  assert(alignment(StringType(), opts64_addr) == 8)
+  assert(elem_size(StringType(), opts64_addr) == 16)
+  assert(alignment(OwnType(rt), opts64_addr) == 4)
+  assert(elem_size(OwnType(rt), opts64_addr) == 4)
+
+  opts64_tbl = LiftLowerOptions(addr_type='i32', tbl_idx_type='i64')
+  assert(alignment(StringType(), opts64_tbl) == 4)
+  assert(elem_size(StringType(), opts64_tbl) == 8)
+  assert(alignment(OwnType(rt), opts64_tbl) == 8)
+  assert(elem_size(OwnType(rt), opts64_tbl) == 8)
+
+  # Round-trip a type exercising both memory pointers and table pointers
+  before = definitions.MAX_FLAT_RESULTS
+  definitions.MAX_FLAT_RESULTS = 16
+  t = TupleType([ListType(OwnType(rt)), StringType()])
+
+  def core_wasm(thread, args):
+    return args
+
+  for addr_type, tbl_idx_type in [('i64','i32'), ('i32','i64')]:
+    heap = Heap(1000)
+    inst = ComponentInstance(store)
+    opts = mk_opts(heap.memory, 'utf8', heap.realloc, addr_type=addr_type, tbl_idx_type=tbl_idx_type)
+
+    ft = FuncType([t], [t])
+    v = {'0': [42, 43], '1': mk_str("hello")}
+    got = None
+    def on_resolve(results):
+      nonlocal got
+      got = results
+    run_lift(opts, inst, ft, core_wasm, lambda: [v], on_resolve)
+    assert(got[0] == v)
+
+  definitions.MAX_FLAT_RESULTS = before
+
 test_roundtrips()
 test_handles()
 test_async_to_async()
@@ -2827,5 +2897,6 @@ def mk_task(supertask, inst):
 test_threads()
 test_thread_cancel_callback()
 test_reentrance()
+test_mixed_table_memory_types()
 
 print("All tests passed")
diff --git a/test/wasm-tools/memory64.wast b/test/wasm-tools/memory64.wast
index 0ec55341..a72eb3b3 100644
--- a/test/wasm-tools/memory64.wast
+++ b/test/wasm-tools/memory64.wast
@@ -42,13 +42,14 @@
   (core instance (instantiate $B (with "" (instance (export "" (table $m))))))
 )
 
-(assert_invalid
-  (component
-    (import "x" (func $x (param "x" string)))
-    (core module $A
-      (memory (export "m") i64 1))
-    (core instance $A (instantiate $A))
-    (alias core export $A "m" (core memory $m))
-    (core func (canon lower (func $x) (memory $m)))
+(component
+  (import "x" (func $x (param "x" string)))
+  (core module $A
+    (memory (export "m") i64 1)
+    (func (export "realloc") (param i64 i64 i64 i64) (result i64) unreachable)
   )
-  "canonical ABI memory is not a 32-bit linear memory")
+  (core instance $A (instantiate $A))
+  (alias core export $A "m" (core memory $m))
+  (core func $realloc (alias core export $A "realloc"))
+  (core func (canon lower (func $x) (memory $m) (realloc (func $realloc))))
+)

From 2d35ea9a94fe91922189ab53b21263e0b734ef57 Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Wed, 18 Mar 2026 12:46:02 +0000
Subject: [PATCH 02/25] Correct types that are in host tables.

---
 design/mvp/CanonicalABI.md              | 140 ++++++++++++-----------
 design/mvp/Concurrency.md               |  30 ++---
 design/mvp/Explainer.md                 | 143 ++++++++++++------------
 design/mvp/canonical-abi/definitions.py |  50 ++++-----
 design/mvp/canonical-abi/run_tests.py   |   4 +-
 5 files changed, 188 insertions(+), 179 deletions(-)

diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index 0bf9eae9..0ae383da 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -259,10 +259,10 @@ The `equal` static method is used by `task.return` below to dynamically
 compare equality of just this subset of `canonopt`.
 
 The `addr_type` is `'i32'` when the `memory` canonopt refers to a memory32
-and `'i64'` when it refers to a memory64. The `tbl_idx_type` is `'i32'` when
-the `table` canonopt refers to a table32 and `'i64'` when it refers to a
-table64. These two dimensions are independent (e.g., a 64-bit memory with
-32-bit table indices is valid).
+and `'i64'` when it refers to a memory64. The `tbl_idx_type` is `'i32'` by
+default and `'i64'` when the `table64` canonopt is present. These two
+dimensions are independent (e.g., a 64-bit memory with 32-bit table indices
+is valid).
 
 The following helper functions return the byte size and core value type of
 memory pointers and table indices, based on the options:
@@ -468,11 +468,10 @@ The other fields of `ComponentInstance` are described below as they are used.
 #### Table State
 
 The `Table` class encapsulates a mutable, growable array of opaque elements
-that are represented in Core WebAssembly as `i32` or `i64` indices into the
-array (based on the `tbl_idx_type` canonopt). Currently, every component
-instance contains two tables: a `threads` table containing all the
-component's [threads](#thread-state) and a `handles` table containing
-everything else ([resource handles](#resource-state),
+that are represented in Core WebAssembly as `i32` indices into the array.
+Currently, every component instance contains two tables: a `threads` table
+containing all the component's [threads](#thread-state) and a `handles`
+table containing everything else ([resource handles](#resource-state),
 [waitables and waitable sets](#waitable-state) and
 [error contexts](#-canon-error-contextnew)).
 ```python
@@ -521,9 +520,9 @@ that are used in preference to growing the table. The free list is represented
 as a Python list here, but an optimizing implementation could instead store the
 free list in the free elements of `array`.
 
-The limit of `2**28` ensures that the high 4 bits of `i32` table indices are
-unset and available for other use in guest code (e.g., for tagging, packed
-words or sentinel values).
+The limit of `2**28` ensures that the high 2 bits of table indices are unset
+and available for other use in guest code (e.g., for tagging, packed words or
+sentinel values).
 
 
 #### Resource State
@@ -1902,13 +1901,13 @@ def alignment(t, opts):
     case F64Type()                   : return 8
     case CharType()                  : return 4
     case StringType()                : return ptr_size(opts)
-    case ErrorContextType()          : return idx_size(opts)
+    case ErrorContextType()          : return 4
     case ListType(t, l)              : return alignment_list(t, l, opts)
     case RecordType(fields)          : return alignment_record(fields, opts)
     case VariantType(cases)          : return alignment_variant(cases, opts)
     case FlagsType(labels)           : return alignment_flags(labels)
-    case OwnType() | BorrowType()    : return idx_size(opts)
-    case StreamType() | FutureType() : return idx_size(opts)
+    case OwnType() | BorrowType()    : return 4
+    case StreamType() | FutureType() : return 4
 ```
 
 List alignment is the same as tuple alignment when the length is fixed and
@@ -1988,13 +1987,13 @@ def elem_size(t, opts):
     case F64Type()                   : return 8
     case CharType()                  : return 4
     case StringType()                : return 2 * ptr_size(opts)
-    case ErrorContextType()          : return idx_size(opts)
+    case ErrorContextType()          : return 4
     case ListType(t, l)              : return elem_size_list(t, l, opts)
     case RecordType(fields)          : return elem_size_record(fields, opts)
     case VariantType(cases)          : return elem_size_variant(cases, opts)
     case FlagsType(labels)           : return elem_size_flags(labels)
-    case OwnType() | BorrowType()    : return idx_size(opts)
-    case StreamType() | FutureType() : return idx_size(opts)
+    case OwnType() | BorrowType()    : return 4
+    case StreamType() | FutureType() : return 4
 
 def elem_size_list(elem_type, maybe_length, opts):
   if maybe_length is not None:
@@ -2054,15 +2053,15 @@ def load(cx, ptr, t):
     case F64Type()          : return decode_i64_as_float(load_int(cx, ptr, 8))
     case CharType()         : return convert_i32_to_char(cx, load_int(cx, ptr, 4))
     case StringType()       : return load_string(cx, ptr)
-    case ErrorContextType() : return lift_error_context(cx, load_int(cx, ptr, idx_size(cx.opts)))
+    case ErrorContextType() : return lift_error_context(cx, load_int(cx, ptr, 4))
     case ListType(t, l)     : return load_list(cx, ptr, t, l)
     case RecordType(fields) : return load_record(cx, ptr, fields)
     case VariantType(cases) : return load_variant(cx, ptr, cases)
     case FlagsType(labels)  : return load_flags(cx, ptr, labels)
-    case OwnType()          : return lift_own(cx, load_int(cx, ptr, idx_size(cx.opts)), t)
-    case BorrowType()       : return lift_borrow(cx, load_int(cx, ptr, idx_size(cx.opts)), t)
-    case StreamType(t)      : return lift_stream(cx, load_int(cx, ptr, idx_size(cx.opts)), t)
-    case FutureType(t)      : return lift_future(cx, load_int(cx, ptr, idx_size(cx.opts)), t)
+    case OwnType()          : return lift_own(cx, load_int(cx, ptr, 4), t)
+    case BorrowType()       : return lift_borrow(cx, load_int(cx, ptr, 4), t)
+    case StreamType(t)      : return lift_stream(cx, load_int(cx, ptr, 4), t)
+    case FutureType(t)      : return lift_future(cx, load_int(cx, ptr, 4), t)
 ```
 
 Integers are loaded directly from memory, with their high-order bit interpreted
@@ -2150,9 +2149,6 @@ def load_string(cx, ptr) -> String:
 def utf16_tag(opts):
   return 1 << (ptr_size(opts) * 8 - 1)
 
-def max_string_byte_length(opts):
-  return (1 << (ptr_size(opts) * 8 - 1)) - 1
-
 def load_string_from_range(cx, ptr, tagged_code_units) -> String:
   match cx.opts.string_encoding:
     case 'utf8':
@@ -2343,15 +2339,15 @@ def store(cx, v, t, ptr):
     case F64Type()          : store_int(cx, encode_float_as_i64(v), ptr, 8)
     case CharType()         : store_int(cx, char_to_i32(v), ptr, 4)
     case StringType()       : store_string(cx, v, ptr)
-    case ErrorContextType() : store_int(cx, lower_error_context(cx, v), ptr, idx_size(cx.opts))
+    case ErrorContextType() : store_int(cx, lower_error_context(cx, v), ptr, 4)
     case ListType(t, l)     : store_list(cx, v, ptr, t, l)
     case RecordType(fields) : store_record(cx, v, ptr, fields)
     case VariantType(cases) : store_variant(cx, v, ptr, cases)
     case FlagsType(labels)  : store_flags(cx, v, ptr, labels)
-    case OwnType()          : store_int(cx, lower_own(cx, v, t), ptr, idx_size(cx.opts))
-    case BorrowType()       : store_int(cx, lower_borrow(cx, v, t), ptr, idx_size(cx.opts))
-    case StreamType(t)      : store_int(cx, lower_stream(cx, v, t), ptr, idx_size(cx.opts))
-    case FutureType(t)      : store_int(cx, lower_future(cx, v, t), ptr, idx_size(cx.opts))
+    case OwnType()          : store_int(cx, lower_own(cx, v, t), ptr, 4)
+    case BorrowType()       : store_int(cx, lower_borrow(cx, v, t), ptr, 4)
+    case StreamType(t)      : store_int(cx, lower_stream(cx, v, t), ptr, 4)
+    case FutureType(t)      : store_int(cx, lower_future(cx, v, t), ptr, 4)
 ```
 
 Integers are stored directly into memory. Because the input domain is exactly
@@ -2487,6 +2483,9 @@ The simplest 4 cases above can compute the exact destination size and then copy
 with a simply loop (that possibly inflates Latin-1 to UTF-16 by injecting a 0
 byte after every Latin-1 byte).
 ```python
+def max_string_byte_length(opts):
+  return (1 << (ptr_size(opts) * 8 - 1)) - 1
+
 def store_string_copy(cx, src, src_code_units, dst_code_unit_size, dst_alignment, dst_encoding):
   dst_byte_length = dst_code_unit_size * src_code_units
   trap_if(dst_byte_length > max_string_byte_length(cx.opts))
@@ -2836,13 +2835,13 @@ def flatten_type(t, opts):
     case F64Type()                        : return ['f64']
     case CharType()                       : return ['i32']
     case StringType()                     : return [ptr_type(opts), ptr_type(opts)]
-    case ErrorContextType()               : return [idx_type(opts)]
+    case ErrorContextType()               : return ['i32']
     case ListType(t, l)                   : return flatten_list(t, l, opts)
     case RecordType(fields)               : return flatten_record(fields, opts)
     case VariantType(cases)               : return flatten_variant(cases, opts)
     case FlagsType(labels)                : return ['i32']
-    case OwnType() | BorrowType()         : return [idx_type(opts)]
-    case StreamType() | FutureType()      : return [idx_type(opts)]
+    case OwnType() | BorrowType()         : return ['i32']
+    case StreamType() | FutureType()      : return ['i32']
 ```
 
 List flattening of a fixed-length list uses the same flattening as a tuple
@@ -2938,15 +2937,15 @@ def lift_flat(cx, vi, t):
     case F64Type()          : return canonicalize_nan64(vi.next('f64'))
     case CharType()         : return convert_i32_to_char(cx, vi.next('i32'))
     case StringType()       : return lift_flat_string(cx, vi)
-    case ErrorContextType() : return lift_error_context(cx, vi.next(idx_type(cx.opts)))
+    case ErrorContextType() : return lift_error_context(cx, vi.next('i32'))
     case ListType(t, l)     : return lift_flat_list(cx, vi, t, l)
     case RecordType(fields) : return lift_flat_record(cx, vi, fields)
     case VariantType(cases) : return lift_flat_variant(cx, vi, cases)
     case FlagsType(labels)  : return lift_flat_flags(vi, labels)
-    case OwnType()          : return lift_own(cx, vi.next(idx_type(cx.opts)), t)
-    case BorrowType()       : return lift_borrow(cx, vi.next(idx_type(cx.opts)), t)
-    case StreamType(t)      : return lift_stream(cx, vi.next(idx_type(cx.opts)), t)
-    case FutureType(t)      : return lift_future(cx, vi.next(idx_type(cx.opts)), t)
+    case OwnType()          : return lift_own(cx, vi.next('i32'), t)
+    case BorrowType()       : return lift_borrow(cx, vi.next('i32'), t)
+    case StreamType(t)      : return lift_stream(cx, vi.next('i32'), t)
+    case FutureType(t)      : return lift_future(cx, vi.next('i32'), t)
 ```
 
 Integers are lifted from core `i32` or `i64` values using the signedness of the
@@ -2972,8 +2971,8 @@ def lift_flat_signed(vi, core_width, t_width):
 
 The contents of strings and variable-length lists are stored in memory so
 lifting these types is essentially the same as loading them from memory; the
-only difference is that the pointer and length come from `i32` values instead
-of from linear memory. Fixed-length lists are lifted the same way as a
+only difference is that the pointer and length come from ptr-sized values
+instead of from linear memory. Fixed-length lists are lifted the same way as a
 tuple (via `lift_flat_record` below).
 ```python
 def lift_flat_string(cx, vi):
@@ -3222,15 +3221,17 @@ present, is validated as such:
 
 * `string-encoding=N` - can be passed at most once, regardless of `N`.
 * `memory` - this is a subtype of `(memory 1)`
-* `realloc` - the function has type `(func (param i32 i32 i32 i32) (result i32))`
+* `realloc` - the function has type `(func (param T T i32 T) (result T))`
+  where `T` is `i32` or `i64` determined by `memory` as described [above](#canonopt)
 * if `realloc` is present, then `memory` must be present
 * `post-return` - only allowed on [`canon lift`](#canon-lift), which has rules
   for validation
 * 🔀 `async` - cannot be present with `post-return`
 * 🔀,not(🚟) `async` - `callback` must also be present. Note that with the 🚟
   feature (the "stackful" ABI), this restriction is lifted.
-* 🔀 `callback` - the function has type `(func (param i32 i32 i32) (result i32))`
-  and cannot be present without `async` and is only allowed with
+* 🔀 `callback` - the function has type `(func (param i32 i32 T) (result i32))`
+  where the `T` parameter is the payload address and cannot be present
+  without `async` and is only allowed with
   [`canon lift`](#canon-lift)
 
 Additionally some options are required depending on lift/lower operations
@@ -3941,7 +3942,8 @@ For a canonical definition:
 (canon waitable-set.wait $cancellable? (memory $mem) (core func $f))
 ```
 validation specifies:
-* `$f` is given type `(func (param $si) (param $ptr i32) (result i32))`
+* `$f` is given type `(func (param $si i32) (param $ptr T) (result i32))` where
+  `T` is the address type of `$mem`.
 
 Calling `$f` invokes the following function which waits for progress to be made
 on a `Waitable` in the given waitable set (indicated by index `$si`) and then
@@ -3984,7 +3986,8 @@ For a canonical definition:
 (canon waitable-set.poll $cancellable? (memory $mem) (core func $f))
 ```
 validation specifies:
-* `$f` is given type `(func (param $si i32) (param $ptr i32) (result i32))`
+* `$f` is given type `(func (param $si i32) (param $ptr T) (result i32))` where
+  `T` is the address type of `$mem`.
 
 Calling `$f` invokes the following function, which either returns an event that
 was pending on one of the waitables in the given waitable set (the same way as
@@ -4201,7 +4204,8 @@ For canonical definitions:
 ```
 In addition to [general validation of `$opts`](#canonopt-validation) validation
 specifies:
-* `$f` is given type `(func (param i32 i32 i32) (result i32))`
+* `$f` is given type `(func (param i32 T T) (result T))` where `T` is `i32` or
+  `i64` determined by the `memory` from `$opts`
 * `$stream_t` must be a type of the form `(stream $t?)`
 * If `$t` is present:
   * [`lower($t)` above](#canonopt-validation) defines required options for `stream.write`
@@ -4266,7 +4270,7 @@ context switches. Next, the stream's `state` is updated based on the result
 being delivered to core wasm so that, once a stream end has been notified that
 the other end dropped, calling anything other than `stream.drop-*` traps.
 Lastly, `stream_event` packs the `CopyResult` and number of elements copied up
-until this point into a single `i32` payload for core wasm.
+until this point into a single `T`-sized payload for core wasm.
 ```python
   def stream_event(result, reclaim_buffer):
     reclaim_buffer()
@@ -4316,7 +4320,8 @@ For canonical definitions:
 ```
 In addition to [general validation of `$opts`](#canonopt-validation) validation
 specifies:
-* `$f` is given type `(func (param i32 i32) (result i32))`
+* `$f` is given type `(func (param i32 T) (result i32))` where `T` is `i32` or
+  `i64` determined by the `memory` from `$opts`
 * `$future_t` must be a type of the form `(future $t?)`
 * If `$t` is present:
   * [`lift($t)` above](#canonopt-validation) defines required options for `future.read`
@@ -4363,7 +4368,7 @@ state (in which the only valid operation is to call `future.drop-*`) on
 read/written at most once and futures are only passed to other components in a
 state where they are ready to be read/written. Another important difference is
 that, since the buffer length is always implied by the `CopyResult`, the number
-of elements copied is not packed in the high 28 bits; they're always zero.
+of elements copied is not packed in the high bits; they're always zero.
 ```python
   def future_event(result):
     assert((buffer.remain() == 0) == (result == CopyResult.COMPLETED))
@@ -4539,9 +4544,11 @@ For a canonical definition:
 (canon thread.new-indirect $ft $ftbl (core func $new_indirect))
 ```
 validation specifies
-* `$ft` must refer to the type `(func (param $c i32))`
+* `$ft` must refer to the type `(func (param $c T))` where `T` is `i32` or
+  `i64` determined by the linear memory of the component instance
 * `$ftbl` must refer to a table whose element type matches `funcref`
-* `$new_indirect` is given type `(func (param $fi i32) (param $c i32) (result i32))`
+* `$new_indirect` is given type `(func (param $fi I) (param $c T) (result i32))`
+  where `I` is `i32` or `i64` determined by `$ftbl`'s table type
 
 Calling `$new_indirect` invokes the following function which reads a `funcref`
 from `$ftbl` (trapping if out-of-bounds, null or the wrong type), calls the
@@ -4556,7 +4563,7 @@ class CoreFuncRef:
 def canon_thread_new_indirect(ft, ftbl: Table[CoreFuncRef], thread, fi, c):
   trap_if(not thread.task.inst.may_leave)
   f = ftbl.get(fi)
-  assert(ft == CoreFuncType(['i32'], []))
+  assert(ft == CoreFuncType(['i32'], []) or ft == CoreFuncType(['i64'], []))
   trap_if(f.t != ft)
   def thread_func(thread):
     [] = call_and_trap_on_throw(f.callee, thread, [c])
@@ -4736,7 +4743,8 @@ For a canonical definition:
 (canon error-context.new $opts (core func $f))
 ```
 validation specifies:
-* `$f` is given type `(func (param i32 i32) (result i32))`
+* `$f` is given type `(func (param T T) (result i32))` where `T` is `i32` or
+  `i64` determined by the `memory` from `$opts`
 * `async` is not present
 * `memory` must be present
 
@@ -4777,7 +4785,8 @@ For a canonical definition:
 (canon error-context.debug-message $opts (core func $f))
 ```
 validation specifies:
-* `$f` is given type `(func (param i32 i32))`
+* `$f` is given type `(func (param i32 T))` where `T` is `i32` or `i64`
+  determined by the `memory` from `$opts`
 * `async` is not present
 * `memory` must be present
 * `realloc` must be present
@@ -4796,8 +4805,9 @@ def canon_error_context_debug_message(opts, thread, i, ptr):
   store_string(cx, errctx.debug_message, ptr)
   return []
 ```
-Note that `ptr` points to an 8-byte region of memory into which will be stored
-the pointer and length of the debug string (allocated via `opts.realloc`).
+Note that `ptr` points to a region of memory (8 bytes for memory32, 16 bytes
+for memory64) into which will be stored the pointer and length of the debug
+string (allocated via `opts.realloc`).
 
 
 ### 📝 `canon error-context.drop`
@@ -4827,9 +4837,11 @@ For a canonical definition:
 (canon thread.spawn-ref shared? $ft (core func $spawn_ref))
 ```
 validation specifies:
-* `$ft` must refer to the type `(shared? (func (param $c i32)))` (see explanation below)
+* `$ft` must refer to the type `(shared? (func (param $c T)))` where `T` is
+  `i32` or `i64` determined by the linear memory of the component instance
+  (see explanation below)
 * `$spawn_ref` is given type
-  `(shared? (func (param $f (ref null $ft)) (param $c i32) (result $e i32)))`
+  `(shared? (func (param $f (ref null $ft)) (param $c T) (result $e i32)))`
 
 When the `shared` immediate is not present, the spawned thread is
 *cooperative*, only switching at specific program points. When the `shared`
@@ -4838,7 +4850,7 @@ parallel with all other threads.
 
 > Note: ideally, a thread could be spawned with [arbitrary thread parameters].
 > Currently, that would require additional work in the toolchain to support so,
-> for simplicity, the current proposal simply fixes a single `i32` parameter
+> for simplicity, the current proposal simply fixes a single `T` parameter
 > type. However, `thread.spawn-ref` could be extended to allow arbitrary thread
 > parameters in the future, once it's concretely beneficial to the toolchain.
 > The inclusion of `$ft` ensures backwards compatibility for when arbitrary
@@ -4868,12 +4880,14 @@ For a canonical definition:
 (canon thread.spawn-indirect shared? $ft $tbl (core func $spawn_indirect))
 ```
 validation specifies:
-* `$ft` must refer to the type `(shared? (func (param $c i32)))` is allowed
-  (see explanation in `thread.spawn-ref` above)
+* `$ft` must refer to the type `(shared? (func (param $c T)))` is allowed
+  where `T` is `i32` or `i64` determined by the linear memory of the component
+  instance (see explanation in `thread.spawn-ref` above)
 * `$tbl` must refer to a shared table whose element type matches
   `(ref null (shared? func))`
 * `$spawn_indirect` is given type
-  `(shared? (func (param $i i32) (param $c i32) (result $e i32)))`
+  `(shared? (func (param $i I) (param $c T) (result $e i32)))` where `I` is
+  `i32` or `i64` determined by `$tbl`'s table type
 
 When the `shared` immediate is not present, the spawned thread is
 *cooperative*, only switching at specific program points. When the `shared`
diff --git a/design/mvp/Concurrency.md b/design/mvp/Concurrency.md
index 7dd5395e..b357d3f2 100644
--- a/design/mvp/Concurrency.md
+++ b/design/mvp/Concurrency.md
@@ -151,7 +151,7 @@ use cases mentioned in the [goals](#goals).
 
 Until the Core WebAssembly [shared-everything-threads] proposal allows Core
 WebAssembly function types to be annotated with `shared`, `thread.new-indirect`
-can only call non-`shared` functions (via `i32` `(table funcref)` index, just
+can only call non-`shared` functions (via `(table funcref)` index, just
 like `call_indirect`) and thus currently all threads must execute
 [cooperatively] in a sequentially-interleaved fashion, switching between
 threads only at explicit program points just like (and implementable via) a
@@ -232,7 +232,7 @@ unique ownership of the *readable end* of the future or stream. To get a
 end pair (via the [`{stream,future}.new`] built-ins) and then pass the readable
 end elsewhere (e.g., in the above WIT, as a parameter to an imported
 `pipe.write` or as a result of an exported `transform`). Given the readable or
-writable end of a future or stream (represented as an `i32` index into the
+writable end of a future or stream (represented as an index into the
 component instance's handle table), Core WebAssembly can then call a
 [`{stream,future}.{read,write}`] built-in to synchronously or asynchronously
 copy into or out of a caller-provided buffer of Core WebAssembly linear (or,
@@ -369,9 +369,9 @@ creating and running threads.
 New threads are created with the [`thread.new-indirect`] built-in. As mentioned
 [above](#threads-and-tasks), a spawned thread inherits the task of the spawning
 thread which is why threads and tasks are N:1. `thread.new-indirect` adds a new
-thread to the component instance's threads table and returns the `i32` index of
+thread to the component instance's threads table and returns the index of
 this table entry to the Core WebAssembly caller. Like [`pthread_create`],
-`thread.new-indirect` takes a Core WebAssembly function (via `i32` index into a
+`thread.new-indirect` takes a Core WebAssembly function (via index into a
 `funcref` table) and a "closure" parameter to pass to the function when called
 on the new thread. However, unlike `pthread_create`, the new thread is
 initially in a "suspended" state and must be explicitly "resumed" using one of
@@ -436,9 +436,8 @@ stackless async ABI is used, returning the "exit" code to the event loop. This
 non-reuse of thread-local storage between distinct export calls avoids what
 would otherwise be a likely source of TLS-related memory leaks.
 
-When [memory64] is integrated into the Component Model's Canonical ABI,
-`context.{get,set}` will be backwards-compatibly relaxed to allow `i64`
-pointers (overlaying the `i32` values like hardware 32/64-bit registers). When
+The type of `context.{get,set}` values (`i32` or `i64`) is determined by the
+`memory` canonopt (matching the pointer size of the linear memory). When
 [wasm-gc] is integrated, these integral context values can serve as indices
 into guest-managed tables of typed GC references.
 
@@ -945,8 +944,8 @@ Other example asynchronous lowered signatures:
 async func(s1: stream<future<string>>, s2: list<stream<string>>) -> result<stream<string>, stream<error>>
 ```
 In *both* the sync and async ABIs, a `future` or `stream` in the WIT-level type
-translates to a single `i32` in the ABI.  This `i32` is an index into the
-current component instance's handle table. For example, for the WIT function type:
+translates to a single `i32` index into the current component instance's handle
+table. For example, for the WIT function type:
 ```wit
 async func(f: future<string>) -> future<u32>
 ```
@@ -959,10 +958,10 @@ and the asynchronous ABI has the signature:
 (func (param $f i32) (param $out-ptr i32) (result i32))
 ```
 where `$f` is the index of a future (not a pointer to one) while while
-`$out-ptr` is a pointer to a linear memory location that will receive an `i32`
+`$out-ptr` is a pointer to a linear memory location that will receive a handle
 index.
 
-For the runtime semantics of this `i32` index, see `lift_stream`,
+For the runtime semantics of this handle index, see `lift_stream`,
 `lift_future`, `lower_stream` and `lower_future` in the [Canonical ABI
 Explainer]. For a complete description of how async imports work, see
 [`canon_lower`] in the Canonical ABI Explainer.
@@ -1032,18 +1031,19 @@ The `(result i32)` lets the core function return what it wants the runtime to do
 * If the low 4 bits are `1`, the callee wants to yield, allowing other code
   to run, but resuming thereafter without waiting on anything else.
 * If the low 4 bits are `2`, the callee wants to wait for an event to occur in
-  the waitable set whose index is stored in the high 28 bits.
+  the waitable set whose index is stored in the remaining high bits.
 
 When an async stackless function is exported, a companion "callback" function
 must also be exported with signature:
 ```wat
-(func (param i32 i32 i32) (result i32))
+(func (param i32 i32 $addr) (result i32))
 ```
+where `$addr` is `i32` or `i64` depending on the `memory` canonopt.
 
 The `(result i32)` has the same interpretation as the stackless export function
 and the runtime will repeatedly call the callback until a value of `0` is
-returned. The `i32` parameters describe what happened that caused the callback
-to be called again.
+returned. The first two `i32` parameters describe what happened that caused the
+callback to be called again and the `$addr` parameter is the payload address.
 
 For a complete description of how async exports work, see [`canon_lift`] in the
 Canonical ABI Explainer.
diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index e8cc1fad..26661274 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -690,11 +690,11 @@ only be used indirectly by untrusted user-mode processes via their integer
 index in the table.
 
 In the Component Model, handles are lifted-from and lowered-into `i32` values
-(or `i64` values when the `table64` canonopt is present) that index an
-encapsulated per-component-instance table that is maintained by the canonical
-function definitions described [below](#canonical-definitions). In the future, handles
-could be backwards-compatibly lifted and lowered from [reference types]  (via
-the addition of a new `canonopt`, as introduced [below](#canonical-abi)).
+that index an encapsulated per-component-instance table that is maintained by
+the canonical function definitions described [below](#canonical-definitions).
+In the future, handles could be backwards-compatibly lifted and lowered from
+[reference types]  (via the addition of a new `canonopt`, as introduced
+[below](#canonical-abi)).
 
 The uniqueness and dropping conditions mentioned above are enforced at runtime
 by the Component Model through these canonical definitions. The `typeidx`
@@ -715,15 +715,14 @@ destination components. Thus, in the abstract, `stream` and `future` can be
 thought of as inter-component control-flow or synchronization mechanisms.
 
 Just like with handles, in the Component Model, async value types are
-lifted-from and lowered-into `i32` values (or `i64` values when the `table64`
-canonopt is present) that index an encapsulated per-component-instance table
-that is maintained by the canonical ABI built-ins
+lifted-from and lowered-into `i32` values that index an encapsulated
+per-component-instance table that is maintained by the canonical ABI built-ins
 [below](#canonical-definitions). The Component-Model-defined ABI for creating,
-writing-to and reading-from `stream` and `future` values is meant to be bound
-to analogous source-language features like promises, futures, streams,
-iterators, generators and channels so that developers can use these familiar
-high-level concepts when working directly with component types, without the
-need to manually write low-level async glue code. For languages like C without
+writing-to and reading-from `stream` and `future` values is meant to be bound to
+analogous source-language features like promises, futures, streams, iterators,
+generators and channels so that developers can use these familiar high-level
+concepts when working directly with component types, without the need to
+manually write low-level async glue code. For languages like C without
 language-level concurrency support, these ABIs (described in detail in the
 [Canonical ABI explainer]) can be exposed directly as function imports and used
 like normal low-level Operation System I/O APIs.
@@ -859,7 +858,7 @@ which is a *subtype* of some other type. Currently, the only supported bound is
 "any resource type". Thus, only resource types can be imported/exported
 abstractly, not arbitrary value types. This allows type imports to always be
 compiled independently of their arguments using a "universal representation" for
-handle values (viz., `i32` or `i64`, as defined by the [Canonical ABI](CanonicalABI.md)).
+handle values (viz., `i32`, as defined by the [Canonical ABI](CanonicalABI.md)).
 In the future, `sub` may be extended to allow referencing other resource types,
 thereby allowing abstract resource subtyping.
 
@@ -1457,9 +1456,9 @@ canon ::= ...
         | (canon thread.new-indirect <typeidx> <core:tableidx> (core func <id>?)) 🧵
         | (canon thread.switch-to cancellable? (core func <id>?)) 🧵
         | (canon thread.suspend cancellable? (core func <id>?)) 🧵
-        | (canon thread.resume-later (core func <id>?) 🧵
-        | (canon thread.yield-to cancellable? (core func <id>?) 🧵
-        | (canon thread.yield cancellable? (core func <id>?) 🧵
+        | (canon thread.resume-later (core func <id>?)) 🧵
+        | (canon thread.yield-to cancellable? (core func <id>?)) 🧵
+        | (canon thread.yield cancellable? (core func <id>?)) 🧵
         | (canon error-context.new <canonopt>* (core func <id>?)) 📝
         | (canon error-context.debug-message <canonopt>* (core func <id>?)) 📝
         | (canon error-context.drop (core func <id>?)) 📝
@@ -1468,10 +1467,6 @@ canon ::= ...
         | (canon thread.available-parallelism (core func <id>?)) 🧵②
 ```
 
-In the Canonical ABI signatures below, `$addr` is `i32` when the `memory`
-canonopt refers to a 32-bit memory or `i64` for a 64-bit memory, and `$idx` is
-`i32` by default or `i64` when the `table64` canonopt is present.
-
 ##### Resource built-ins
 
 ###### `resource.new`
@@ -1479,7 +1474,7 @@ canonopt refers to a 32-bit memory or `i64` for a 64-bit memory, and `$idx` is
 | Synopsis                   |                            |
 | -------------------------- | -------------------------- |
 | Approximate WIT signature  | `func<T>(rep: T.rep) -> T` |
-| Canonical ABI signature    | `[rep:i32] -> [$idx]`      |
+| Canonical ABI signature    | `[rep:i32] -> [i32]`       |
 
 The `resource.new` built-in creates a new resource (of resource type `T`) with
 `rep` as its representation, and returns a new handle pointing to the new
@@ -1498,7 +1493,7 @@ For details, see [`canon_resource_new`] in the Canonical ABI explainer.
 | Synopsis                   |                                    |
 | -------------------------- | ---------------------------------- |
 | Approximate WIT signature  | `func<T>(t: T)`                    |
-| Canonical ABI signature    | `[t:$idx] -> []`                   |
+| Canonical ABI signature    | `[t:i32] -> []`                    |
 
 The `resource.drop` built-in drops a resource handle `t` (with resource type
 `T`). If the dropped handle owns the resource, the resource's `dtor` is called,
@@ -1512,7 +1507,7 @@ For details, see [`canon_resource_drop`] in the Canonical ABI explainer.
 | Synopsis                   |                          |
 | -------------------------- | ------------------------ |
 | Approximate WIT signature  | `func<T>(t: T) -> T.rep` |
-| Canonical ABI signature    | `[t:$idx] -> [i32]`      |
+| Canonical ABI signature    | `[t:i32] -> [i32]`       |
 
 The `resource.rep` built-in returns the representation of the resource (with
 resource type `T`) pointed to by the handle `t`. Validation only allows
@@ -1530,12 +1525,12 @@ allowing it to create and return new resources to its client:
   (import "Libc" (core module $Libc ...))
   (core instance $libc (instantiate $Libc))
   (type $R (resource (rep i32) (dtor (func $libc "free"))))
-  (core func $R_new (param i32) (result $idx)
+  (core func $R_new (param i32) (result i32)
     (canon resource.new $R)
   )
   (core module $Main
-    (import "canon" "R_new" (func $R_new (param i32) (result $idx)))
-    (func (export "make_R") (param ...) (result $idx)
+    (import "canon" "R_new" (func $R_new (param i32) (result i32)))
+    (func (export "make_R") (param ...) (result i32)
       (return (call $R_new ...))
     )
   )
@@ -1548,7 +1543,7 @@ allowing it to create and return new resources to its client:
   )
 )
 ```
-Here, the `$idx` returned by `resource.new`, which is an index into the current
+Here, the `i32` returned by `resource.new`, which is an index into the current
 component instance's table, is immediately returned by `make_R`, thereby
 transferring ownership of the newly-created resource to the export's caller.
 
@@ -1563,12 +1558,12 @@ See the [concurrency explainer] for background.
 | Synopsis                   |                    |
 | -------------------------- | ------------------ |
 | Approximate WIT signature  | `func<T,i>() -> T` |
-| Canonical ABI signature    | `[] -> [$addr]`       |
+| Canonical ABI signature    | `[] -> [$T]`          |
 
 The `context.get` built-in returns the `i`th element of the [current thread]'s
 [thread-local storage] array. Validation currently restricts `i` to be less
-than 2 and `t` to be `i32` or `i64`, but these restrictions may be relaxed in
-the future.
+than 2 and `T` to be `i32` or `i64` (determined by the `memory` canonopt), but
+these restrictions may be relaxed in the future.
 
 For details, see [Thread-Local Storage] in the concurrency explainer and
 [`canon_context_get`] in the Canonical ABI explainer.
@@ -1578,12 +1573,12 @@ For details, see [Thread-Local Storage] in the concurrency explainer and
 | Synopsis                   |                   |
 | -------------------------- | ----------------- |
 | Approximate WIT signature  | `func<T,i>(v: T)` |
-| Canonical ABI signature    | `[$addr] -> []`      |
+| Canonical ABI signature    | `[$T] -> []`         |
 
 The `context.set` built-in sets the `i`th element of the [current thread]'s
 [thread-local storage] array to the value `v`. Validation currently restricts
-`i` to be less than 2 and `t` to be `i32` or `i64`, but these restrictions may
-be relaxed in the future.
+`i` to be less than 2 and `T` to be `i32` or `i64` (determined by the `memory`
+canonopt), but these restrictions may be relaxed in the future.
 
 For details, see [Thread-Local Storage] in the concurrency explainer and
 [`canon_context_set`] in the Canonical ABI explainer.
@@ -1667,9 +1662,9 @@ For details, see [Cancellation] in the concurrency explainer and
 | Synopsis                   |                          |
 | -------------------------- | ------------------------ |
 | Approximate WIT signature  | `func() -> waitable-set` |
-| Canonical ABI signature    | `[] -> [$idx]`           |
+| Canonical ABI signature    | `[] -> [i32]`            |
 
-The `waitable-set.new` built-in returns the `$idx` index of a new [waitable
+The `waitable-set.new` built-in returns the `i32` index of a new [waitable
 set]. The `waitable-set` type is not a true WIT-level type but instead serves
 to document associated built-ins below. Waitable sets start out empty and are
 populated explicitly with [waitables] by `waitable.join`.
@@ -1682,7 +1677,7 @@ For details, see [Waitables and Waitable Sets] in the concurrency explainer and
 | Synopsis                   |                                                |
 | -------------------------- | ---------------------------------------------- |
 | Approximate WIT signature  | `func<cancellable?>(s: waitable-set) -> event` |
-| Canonical ABI signature    | `[s:$idx payload-addr:$addr] -> [event-code:i32]` |
+| Canonical ABI signature    | `[s:i32 payload-addr:$addr] -> [event-code:i32]` |
 
 where `event` is defined in WIT as:
 ```wit
@@ -1736,7 +1731,7 @@ part [`stream.read` and `stream.write`](#-streamread-and-streamwrite) and
 [`future.read` and `future.write`](#-futureread-and-futurewrite) below.
 
 In the Canonical ABI, the `event-code` return value provides the `event`
-discriminant and the case payloads are stored as two contiguous `$idx`s at the
+discriminant and the case payloads are stored as two contiguous `i32`s at the
 8-byte-aligned address `payload-addr`.
 
 For details, see [Waitables and Waitable Sets] in the concurrency explainer and
@@ -1747,7 +1742,7 @@ For details, see [Waitables and Waitable Sets] in the concurrency explainer and
 | Synopsis                   |                                                |
 | -------------------------- | ---------------------------------------------- |
 | Approximate WIT signature  | `func<cancellable?>(s: waitable-set) -> event` |
-| Canonical ABI signature    | `[s:$idx payload-addr:$addr] -> [event-code:i32]` |
+| Canonical ABI signature    | `[s:i32 payload-addr:$addr] -> [event-code:i32]` |
 
 where `event` is defined as in [`waitable-set.wait`](#-waitable-setwait).
 
@@ -1771,7 +1766,7 @@ For details, see [Waitables and Waitable Sets] in the concurrency explainer and
 | Synopsis                   |                          |
 | -------------------------- | ------------------------ |
 | Approximate WIT signature  | `func(s: waitable-set)` |
-| Canonical ABI signature    | `[s:$idx] -> []`   |
+| Canonical ABI signature    | `[s:i32] -> []`    |
 
 The `waitable-set.drop` built-in removes the indicated [waitable set] from the
 current component instance's table, trapping if the waitable set is not empty
@@ -1785,7 +1780,7 @@ For details, see [Waitables and Waitable Sets] in the concurrency explainer and
 | Synopsis                   |                                                      |
 | -------------------------- | ---------------------------------------------------- |
 | Approximate WIT signature  | `func(w: waitable, maybe_set: option<waitable-set>)` |
-| Canonical ABI signature    | `[w:$idx, maybe_set:$idx] -> []`                     |
+| Canonical ABI signature    | `[w:i32, maybe_set:i32] -> []`                       |
 
 The `waitable.join` built-in may be called given a [waitable] and an optional
 [waitable set]. `join` first removes `w` from any waitable set that it is a
@@ -1807,7 +1802,7 @@ For details, see [Waitables and Waitable Sets] in the concurrency explainer and
 | Synopsis                   |                                                           |
 | -------------------------- | --------------------------------------------------------- |
 | Approximate WIT signature  | `func<async?>(subtask: subtask) -> option<subtask-state>` |
-| Canonical ABI signature    | `[subtask:$idx] -> [i32]`                                 |
+| Canonical ABI signature    | `[subtask:i32] -> [i32]`                                  |
 
 The `subtask.cancel` built-in requests [cancellation] of the indicated subtask.
 If the `async` is present, `none` is returned (reprented as `-1` in the
@@ -1828,7 +1823,7 @@ For details, see [Cancellation] in the concurrency explainer and
 | Synopsis                   |                          |
 | -------------------------- | ------------------------ |
 | Approximate WIT signature  | `func(subtask: subtask)` |
-| Canonical ABI signature    | `[subtask:$idx] -> []`   |
+| Canonical ABI signature    | `[subtask:i32] -> []`    |
 
 The `subtask.drop` built-in removes the indicated [subtask] from the current
 component instance's table, trapping if the subtask hasn't returned.
@@ -1845,9 +1840,9 @@ For details, see [`canon_subtask_drop`] in the Canonical ABI explainer.
 
 The `stream.new` and `future.new` built-ins return the [readable and writable
 ends] of a new `stream<T?>` or `future<T?>`. The readable and writable ends are
-added to the current component instance's table and then the two `$idx` indices
+added to the current component instance's table and then the two `i32` indices
 of the two ends are packed into a single `i64` return value (with the readable
-end in the low bits).
+end in the low 32 bits).
 
 The types `readable-stream-end<T?>` and `writable-stream-end<T?>` are not WIT
 types; they are the conceptual lower-level types that describe how the
@@ -1865,7 +1860,7 @@ For details, see [Streams and Futures] in the concurrency explainer and
 | -------------------------------------------- | ----------------------------------------------------------------------------------------------- |
 | Approximate WIT signature for `stream.read`  | `func<stream<T?>>(e: readable-stream-end<T?>, b: writable-buffer<T>?) -> option<stream-result>` |
 | Approximate WIT signature for `stream.write` | `func<stream<T?>>(e: writable-stream-end<T?>, b: readable-buffer<T>?) -> option<stream-result>` |
-| Canonical ABI signature                      | `[stream-end:$idx ptr:$addr num:i32] -> [i32]`                                                   |
+| Canonical ABI signature                      | `[stream-end:i32 ptr:$addr num:$addr] -> [$addr]`                                               |
 
 where `stream-result` is defined in WIT as:
 ```wit
@@ -1920,14 +1915,14 @@ If `stream.{read,write}` return `dropped` (synchronously or asynchronously),
 any subsequent operation on the stream other than `stream.drop-{readable,writable}`
 traps.
 
-In the Canonical ABI, the `{readable,writable}-stream-end` is passed as a
-`$idx` index into the component instance's table followed by the `$addr` linear
-memory offset and an `i32` size-in-elements of the
+In the Canonical ABI, the `{readable,writable}-stream-end` is passed as an
+`i32` index into the component instance's table followed by a pair of `$addr`s
+describing the linear memory offset and size-in-elements of the
 `{readable,writable}-buffer<T>`. The `option<stream-result>` return value is
-bit-packed into a single `i32` where:
-* `0xffff_ffff` represents `none`.
+bit-packed into a single `$addr` where:
+* all-ones represents `none`.
 * Otherwise, the `result` is in the low 4 bits and the `progress` is in the
-  high 28 bits.
+  remaining high bits.
 
 For details, see [Streams and Futures] in the concurrency explainer and
 [`canon_stream_read`] in the Canonical ABI explainer.
@@ -1938,7 +1933,7 @@ For details, see [Streams and Futures] in the concurrency explainer and
 | -------------------------------------------- | -------------------------------------------------------------------------------------------------------- |
 | Approximate WIT signature for `future.read`  | `func<future<T?>>(e: readable-future-end<T?>, b: writable-buffer<T; 1>?) -> option<future-read-result>`  |
 | Approximate WIT signature for `future.write` | `func<future<T?>>(e: writable-future-end<T?>, v: readable-buffer<T; 1>?) -> option<future-write-result>` |
-| Canonical ABI signature                      | `[readable-future-end:$idx ptr:$addr] -> [i32]`                                                          |
+| Canonical ABI signature                      | `[readable-future-end:i32 ptr:$addr] -> [i32]`                                                           |
 
 where `future-{read,write}-result` are defined in WIT as:
 ```wit
@@ -1987,12 +1982,12 @@ A component *may* call `future.drop-readable` *before* successfully reading a
 value to indicate a loss of interest. `future.drop-writable` will trap if
 called before successfully writing a value.
 
-In the Canonical ABI, the `{readable,writable}-future-end` is passed as a
-`$idx` index into the component instance's table followed by a single
+In the Canonical ABI, the `{readable,writable}-future-end` is passed as an
+`i32` index into the component instance's table followed by a single
 `$addr` describing the linear memory offset of the
 `{readable,writable}-buffer<T; 1>`. The `option<future-{read,write}-result>`
-return value is bit-packed into the single `i32` return value where
-`0xffff_ffff` represents `none`. And, `future-read-result.cancelled` is encoded
+return value is bit-packed into the single `i32` return value where all-ones
+represents `none`. And, `future-read-result.cancelled` is encoded
 as the value of `future-write-result.cancelled`, rather than the value implied
 by the `enum` definition above.
 
@@ -2007,7 +2002,7 @@ For details, see [Streams and Futures] in the concurrency explainer and
 | Approximate WIT signature for `stream.cancel-write` | `func<stream<T?>>(e: writable-stream-end<T?>) -> option<stream-result>`       |
 | Approximate WIT signature for `future.cancel-read`  | `func<future<T?>>(e: readable-future-end<T?>) -> option<future-read-result>`  |
 | Approximate WIT signature for `future.cancel-write` | `func<future<T?>>(e: writable-future-end<T?>) -> option<future-write-result>` |
-| Canonical ABI signature                             | `[e: $idx] -> [i32]`                                                          |
+| Canonical ABI signature                             | `[e: i32] -> [i32]`                                                           |
 
 The `{stream,future}.cancel-{read,write}` built-ins take the matching [readable
 or writable end] of a stream or future that has a pending `async`
@@ -2035,7 +2030,7 @@ For details, see [Streams and Futures] in the concurrency explainer and
 | Approximate WIT signature for `stream.drop-writable` | `func<stream<T?>>(e: writable-stream-end<T?>)` |
 | Approximate WIT signature for `future.drop-readable` | `func<future<T?>>(e: readable-future-end<T?>)` |
 | Approximate WIT signature for `future.drop-writable` | `func<future<T?>>(e: writable-future-end<T?>)` |
-| Canonical ABI signature                              | `[end:$idx err:$idx] -> []`                    |
+| Canonical ABI signature                              | `[end:i32 err:i32] -> []`                      |
 
 The `{stream,future}.drop-{readable,writable}` built-ins remove the indicated
 [stream or future] from the current component instance's table, trapping if the
@@ -2051,7 +2046,7 @@ For details, see [Streams and Futures] in the concurrency explainer and
 | Synopsis                   |                 |
 | -------------------------- | --------------- |
 | Approximate WIT signature  | `func() -> u32` |
-| Canonical ABI signature    | `[] -> [$idx]`  |
+| Canonical ABI signature    | `[] -> [i32]`   |
 
 The `thread.index` built-in returns the index of the [current thread] in the
 component instance's table. While `thread.new-indirect` also returns the index
@@ -2066,7 +2061,7 @@ For details, see [Thread Built-ins] in the concurrency explainer and
 | Synopsis                   |                                                               |
 | -------------------------- | ------------------------------------------------------------- |
 | Approximate WIT signature  | `func<FuncT,tableidx>(fi: u32, c: FuncT.params[0]) -> thread` |
-| Canonical ABI signature    | `[fi:i32 c:i32] -> [$idx]`                                    |
+| Canonical ABI signature    | `[fi:$idx c:$addr] -> [i32]`                                  |
 
 The `thread.new-indirect` built-in adds a new thread to the current component
 instance's table, returning the index of the new thread. The function table
@@ -2075,8 +2070,8 @@ dynamically checked to match the type `FuncT` (in the same manner as
 `call_indirect`). Lastly, the indexed function is called in the new thread
 with `c` as its first and only parameter.
 
-Currently, `FuncT` must be `(func (param i32))` and thus `c` must always be an
-`i32`, but this restriction can be loosened in the future as the Canonical
+Currently, `FuncT` must be `(func (param $addr))` and thus `c` must always be
+an `$addr`, but this restriction can be loosened in the future as the Canonical
 ABI is extended for [GC].
 
 As explained in the [concurrency explainer], a thread created by
@@ -2092,7 +2087,7 @@ For details, see [Thread Built-ins] in the concurrency explainer and
 | Synopsis                   |                                                   |
 | -------------------------- | ------------------------------------------------- |
 | Approximate WIT signature  | `func<cancellable?>(t: thread) -> suspend-result` |
-| Canonical ABI signature    | `[t:$idx] -> [i32]`                               |
+| Canonical ABI signature    | `[t:i32] -> [i32]`                                |
 
 where `suspend-result` is defined in WIT as:
 ```wit
@@ -2146,7 +2141,7 @@ For details, see [Thread Built-ins] in the concurrency explainer and
 | Synopsis                   |                   |
 | -------------------------- | ----------------- |
 | Approximate WIT signature  | `func(t: thread)` |
-| Canonical ABI signature    | `[t:$idx] -> []`  |
+| Canonical ABI signature    | `[t:i32] -> []`   |
 
 The `thread.resume-later` built-in changes the state of thread `t` from
 "suspended" to "ready" (trapping if `t` is not in a "suspended" state) so that
@@ -2160,7 +2155,7 @@ For details, see [Thread Built-ins] in the concurrency explainer and
 | Synopsis                   |                                 |
 | -------------------------- | ------------------------------- |
 | Approximate WIT signature  | `func<cancellable?>(t: thread)` |
-| Canonical ABI signature    | `[t:$idx] -> [suspend-result]`  |
+| Canonical ABI signature    | `[t:i32] -> [i32]`   |
 
 The `thread.yield-to` built-in immediately resumes execution of the thread `t`,
 (trapping if `t` is not in a "suspended" state) leaving the [current thread] in
@@ -2210,7 +2205,7 @@ For details, see [Thread Built-ins] in the concurrency explainer and
 | Synopsis                   |                                                                    |
 | -------------------------- | ------------------------------------------------------------------ |
 | Approximate WIT signature  | `func<shared?,FuncT>(f: FuncT, c: FuncT.params[0]) -> bool`        |
-| Canonical ABI signature    | `shared? [f:(ref null (shared (func (param i32))) c:i32] -> [i32]` |
+| Canonical ABI signature    | `shared? [f:(ref null (shared (func (param $addr))) c:$addr] -> [i32]` |
 
 The `thread.spawn-ref` built-in is an optimization, fusing a call to
 `thread.new_ref` (assuming `thread.new_ref` was added as part of adding a
@@ -2225,7 +2220,7 @@ For details, see [`canon_thread_spawn_ref`] in the Canonical ABI explainer.
 | Synopsis                   |                                                                    |
 | -------------------------- | ------------------------------------------------------------------ |
 | Approximate WIT signature  | `func<shared?,FuncT,tableidx>(i: u32, c: FuncT.params[0]) -> bool` |
-| Canonical ABI signature    | `shared? [i:i32 c:i32] -> [i32]`                                   |
+| Canonical ABI signature    | `shared? [i:$idx c:$addr] -> [i32]`                                |
 
 The `thread.spawn-indirect` built-in is an optimization, fusing a call to
 [`thread.new-indirect`](#-threadnew-indirect) with a call to
@@ -2260,7 +2255,7 @@ explainer.
 | Synopsis                         |                                          |
 | -------------------------------- | ---------------------------------------- |
 | Approximate WIT signature        | `func(message: string) -> error-context` |
-| Canonical ABI signature          | `[ptr:$addr len:$addr] -> [$idx]`        |
+| Canonical ABI signature          | `[ptr:$addr len:$addr] -> [i32]`         |
 
 The `error-context.new` built-in returns a new `error-context` value. The given
 string is non-deterministically transformed to produce the `error-context`'s
@@ -2276,14 +2271,14 @@ For details, see [`canon_error_context_new`] in the Canonical ABI explainer.
 | Synopsis                         |                                         |
 | -------------------------------- | --------------------------------------- |
 | Approximate WIT signature        | `func(errctx: error-context) -> string` |
-| Canonical ABI signature          | `[errctxi:$idx ptr:$addr] -> []`        |
+| Canonical ABI signature          | `[errctxi:i32 ptr:$addr] -> []`         |
 
 The `error-context.debug-message` built-in returns the
 [debug message](#error-context-type) of the given `error-context`.
 
-In the Canonical ABI, it writes the debug message into `ptr` as a (`ptr`,
-`length`) pair (whose size depends on the `memory` canonopt), according to the
-Canonical ABI for `string`, given the `<canonopt>*` immediates.
+In the Canonical ABI, it writes the debug message into `ptr` as an 8-byte or
+16-byte (`ptr`, `length`) pair, according to the Canonical ABI for `string`,
+given the `<canonopt>*` immediates.
 
 For details, see [`canon_error_context_debug_message`] in the Canonical ABI
 explainer.
@@ -2293,7 +2288,7 @@ explainer.
 | Synopsis                         |                               |
 | -------------------------------- | ----------------------------- |
 | Approximate WIT signature        | `func(errctx: error-context)` |
-| Canonical ABI signature          | `[errctxi:$idx] -> []`        |
+| Canonical ABI signature          | `[errctxi:i32] -> []`         |
 
 The `error-context.drop` built-in drops the given `error-context` value from
 the component instance.
diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index abee7ef2..0fea104d 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -1093,13 +1093,13 @@ def alignment(t, opts):
     case F64Type()                   : return 8
     case CharType()                  : return 4
     case StringType()                : return ptr_size(opts)
-    case ErrorContextType()          : return idx_size(opts)
+    case ErrorContextType()          : return 4
     case ListType(t, l)              : return alignment_list(t, l, opts)
     case RecordType(fields)          : return alignment_record(fields, opts)
     case VariantType(cases)          : return alignment_variant(cases, opts)
     case FlagsType(labels)           : return alignment_flags(labels)
-    case OwnType() | BorrowType()    : return idx_size(opts)
-    case StreamType() | FutureType() : return idx_size(opts)
+    case OwnType() | BorrowType()    : return 4
+    case StreamType() | FutureType() : return 4
 
 def alignment_list(elem_type, maybe_length, opts):
   if maybe_length is not None:
@@ -1151,13 +1151,13 @@ def elem_size(t, opts):
     case F64Type()                   : return 8
     case CharType()                  : return 4
     case StringType()                : return 2 * ptr_size(opts)
-    case ErrorContextType()          : return idx_size(opts)
+    case ErrorContextType()          : return 4
     case ListType(t, l)              : return elem_size_list(t, l, opts)
     case RecordType(fields)          : return elem_size_record(fields, opts)
     case VariantType(cases)          : return elem_size_variant(cases, opts)
     case FlagsType(labels)           : return elem_size_flags(labels)
-    case OwnType() | BorrowType()    : return idx_size(opts)
-    case StreamType() | FutureType() : return idx_size(opts)
+    case OwnType() | BorrowType()    : return 4
+    case StreamType() | FutureType() : return 4
 
 def elem_size_list(elem_type, maybe_length, opts):
   if maybe_length is not None:
@@ -1211,15 +1211,15 @@ def load(cx, ptr, t):
     case F64Type()          : return decode_i64_as_float(load_int(cx, ptr, 8))
     case CharType()         : return convert_i32_to_char(cx, load_int(cx, ptr, 4))
     case StringType()       : return load_string(cx, ptr)
-    case ErrorContextType() : return lift_error_context(cx, load_int(cx, ptr, idx_size(cx.opts)))
+    case ErrorContextType() : return lift_error_context(cx, load_int(cx, ptr, 4))
     case ListType(t, l)     : return load_list(cx, ptr, t, l)
     case RecordType(fields) : return load_record(cx, ptr, fields)
     case VariantType(cases) : return load_variant(cx, ptr, cases)
     case FlagsType(labels)  : return load_flags(cx, ptr, labels)
-    case OwnType()          : return lift_own(cx, load_int(cx, ptr, idx_size(cx.opts)), t)
-    case BorrowType()       : return lift_borrow(cx, load_int(cx, ptr, idx_size(cx.opts)), t)
-    case StreamType(t)      : return lift_stream(cx, load_int(cx, ptr, idx_size(cx.opts)), t)
-    case FutureType(t)      : return lift_future(cx, load_int(cx, ptr, idx_size(cx.opts)), t)
+    case OwnType()          : return lift_own(cx, load_int(cx, ptr, 4), t)
+    case BorrowType()       : return lift_borrow(cx, load_int(cx, ptr, 4), t)
+    case StreamType(t)      : return lift_stream(cx, load_int(cx, ptr, 4), t)
+    case FutureType(t)      : return lift_future(cx, load_int(cx, ptr, 4), t)
 
 def load_int(cx, ptr, nbytes, signed = False):
   return int.from_bytes(cx.opts.memory[ptr : ptr+nbytes], 'little', signed = signed)
@@ -1402,15 +1402,15 @@ def store(cx, v, t, ptr):
     case F64Type()          : store_int(cx, encode_float_as_i64(v), ptr, 8)
     case CharType()         : store_int(cx, char_to_i32(v), ptr, 4)
     case StringType()       : store_string(cx, v, ptr)
-    case ErrorContextType() : store_int(cx, lower_error_context(cx, v), ptr, idx_size(cx.opts))
+    case ErrorContextType() : store_int(cx, lower_error_context(cx, v), ptr, 4)
     case ListType(t, l)     : store_list(cx, v, ptr, t, l)
     case RecordType(fields) : store_record(cx, v, ptr, fields)
     case VariantType(cases) : store_variant(cx, v, ptr, cases)
     case FlagsType(labels)  : store_flags(cx, v, ptr, labels)
-    case OwnType()          : store_int(cx, lower_own(cx, v, t), ptr, idx_size(cx.opts))
-    case BorrowType()       : store_int(cx, lower_borrow(cx, v, t), ptr, idx_size(cx.opts))
-    case StreamType(t)      : store_int(cx, lower_stream(cx, v, t), ptr, idx_size(cx.opts))
-    case FutureType(t)      : store_int(cx, lower_future(cx, v, t), ptr, idx_size(cx.opts))
+    case OwnType()          : store_int(cx, lower_own(cx, v, t), ptr, 4)
+    case BorrowType()       : store_int(cx, lower_borrow(cx, v, t), ptr, 4)
+    case StreamType(t)      : store_int(cx, lower_stream(cx, v, t), ptr, 4)
+    case FutureType(t)      : store_int(cx, lower_future(cx, v, t), ptr, 4)
 
 def store_int(cx, v, ptr, nbytes, signed = False):
   cx.opts.memory[ptr : ptr+nbytes] = int.to_bytes(v, nbytes, 'little', signed = signed)
@@ -1734,13 +1734,13 @@ def flatten_type(t, opts):
     case F64Type()                        : return ['f64']
     case CharType()                       : return ['i32']
     case StringType()                     : return [ptr_type(opts), ptr_type(opts)]
-    case ErrorContextType()               : return [idx_type(opts)]
+    case ErrorContextType()               : return ['i32']
     case ListType(t, l)                   : return flatten_list(t, l, opts)
     case RecordType(fields)               : return flatten_record(fields, opts)
     case VariantType(cases)               : return flatten_variant(cases, opts)
     case FlagsType(labels)                : return ['i32']
-    case OwnType() | BorrowType()         : return [idx_type(opts)]
-    case StreamType() | FutureType()      : return [idx_type(opts)]
+    case OwnType() | BorrowType()         : return ['i32']
+    case StreamType() | FutureType()      : return ['i32']
 
 def flatten_list(elem_type, maybe_length, opts):
   if maybe_length is not None:
@@ -1808,15 +1808,15 @@ def lift_flat(cx, vi, t):
     case F64Type()          : return canonicalize_nan64(vi.next('f64'))
     case CharType()         : return convert_i32_to_char(cx, vi.next('i32'))
     case StringType()       : return lift_flat_string(cx, vi)
-    case ErrorContextType() : return lift_error_context(cx, vi.next(idx_type(cx.opts)))
+    case ErrorContextType() : return lift_error_context(cx, vi.next('i32'))
     case ListType(t, l)     : return lift_flat_list(cx, vi, t, l)
     case RecordType(fields) : return lift_flat_record(cx, vi, fields)
     case VariantType(cases) : return lift_flat_variant(cx, vi, cases)
     case FlagsType(labels)  : return lift_flat_flags(vi, labels)
-    case OwnType()          : return lift_own(cx, vi.next(idx_type(cx.opts)), t)
-    case BorrowType()       : return lift_borrow(cx, vi.next(idx_type(cx.opts)), t)
-    case StreamType(t)      : return lift_stream(cx, vi.next(idx_type(cx.opts)), t)
-    case FutureType(t)      : return lift_future(cx, vi.next(idx_type(cx.opts)), t)
+    case OwnType()          : return lift_own(cx, vi.next('i32'), t)
+    case BorrowType()       : return lift_borrow(cx, vi.next('i32'), t)
+    case StreamType(t)      : return lift_stream(cx, vi.next('i32'), t)
+    case FutureType(t)      : return lift_future(cx, vi.next('i32'), t)
 
 def lift_flat_unsigned(vi, core_width, t_width):
   i = vi.next('i' + str(core_width))
@@ -2538,7 +2538,7 @@ class CoreFuncRef:
 def canon_thread_new_indirect(ft, ftbl: Table[CoreFuncRef], thread, fi, c):
   trap_if(not thread.task.inst.may_leave)
   f = ftbl.get(fi)
-  assert(ft == CoreFuncType(['i32'], []))
+  assert(ft == CoreFuncType(['i32'], []) or ft == CoreFuncType(['i64'], []))
   trap_if(f.t != ft)
   def thread_func(thread):
     [] = call_and_trap_on_throw(f.callee, thread, [c])
diff --git a/design/mvp/canonical-abi/run_tests.py b/design/mvp/canonical-abi/run_tests.py
index 07b0b5c4..b71cc1be 100644
--- a/design/mvp/canonical-abi/run_tests.py
+++ b/design/mvp/canonical-abi/run_tests.py
@@ -2844,8 +2844,8 @@ def test_mixed_table_memory_types():
   opts64_tbl = LiftLowerOptions(addr_type='i32', tbl_idx_type='i64')
   assert(alignment(StringType(), opts64_tbl) == 4)
   assert(elem_size(StringType(), opts64_tbl) == 8)
-  assert(alignment(OwnType(rt), opts64_tbl) == 8)
-  assert(elem_size(OwnType(rt), opts64_tbl) == 8)
+  assert(alignment(OwnType(rt), opts64_tbl) == 4)
+  assert(elem_size(OwnType(rt), opts64_tbl) == 4)
 
   # Round-trip a type exercising both memory pointers and table pointers
   before = definitions.MAX_FLAT_RESULTS

From a18568e9ee10c51dfb906359afcd67412a16efaa Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Thu, 19 Mar 2026 10:18:15 +0000
Subject: [PATCH 03/25] Use existing canonical options and static parameters

---
 design/mvp/CanonicalABI.md              | 178 +++++++++++-------------
 design/mvp/Concurrency.md               |   9 +-
 design/mvp/Explainer.md                 | 107 +++++++-------
 design/mvp/canonical-abi/definitions.py | 104 +++++++-------
 design/mvp/canonical-abi/run_tests.py   | 133 +++++++++---------
 5 files changed, 255 insertions(+), 276 deletions(-)

diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index 0ae383da..4e6c970e 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -245,43 +245,29 @@ when lifting individual parameters and results:
 @dataclass
 class LiftOptions:
   string_encoding: str = 'utf8'
-  memory: Optional[bytearray] = None
-  addr_type: str = 'i32'
-  tbl_idx_type: str = 'i32'
+  memory: Optional[tuple[bytearray, str]] = None
 
   def equal(lhs, rhs):
     return lhs.string_encoding == rhs.string_encoding and \
-           lhs.memory is rhs.memory and \
-           lhs.addr_type == rhs.addr_type and \
-           lhs.tbl_idx_type == rhs.tbl_idx_type
+           lhs.memory is rhs.memory
 ```
 The `equal` static method is used by `task.return` below to dynamically
 compare equality of just this subset of `canonopt`.
 
-The `addr_type` is `'i32'` when the `memory` canonopt refers to a memory32
-and `'i64'` when it refers to a memory64. The `tbl_idx_type` is `'i32'` by
-default and `'i64'` when the `table64` canonopt is present. These two
-dimensions are independent (e.g., a 64-bit memory with 32-bit table indices
-is valid).
+The `str` field in `memory` is `'i32'` or `'i64'` to indicate whether type of the core Wasm `memory`.
 
 The following helper functions return the byte size and core value type of
-memory pointers and table indices, based on the options:
+memory pointers: 
 ```python
-def ptr_size(opts):
-  match opts.addr_type:
-    case 'i32': return 4
-    case 'i64': return 8
-
 def ptr_type(opts):
-  return opts.addr_type
+  if opts.memory is None:
+    return 'i32'
+  return opts.memory[1]
 
-def idx_size(opts):
-  match opts.tbl_idx_type:
+def ptr_size(opts):
+  match ptr_type(opts):
     case 'i32': return 4
     case 'i64': return 8
-
-def idx_type(opts):
-  return opts.tbl_idx_type
 ```
 
 The `LiftLowerOptions` class contains the subset of [`canonopt`] which are
@@ -1386,7 +1372,7 @@ class BufferGuestImpl(Buffer):
     trap_if(length > Buffer.MAX_LENGTH)
     if t and length > 0:
       trap_if(ptr != align_to(ptr, alignment(t, cx.opts)))
-      trap_if(ptr + length * elem_size(t, cx.opts) > len(cx.opts.memory))
+      trap_if(ptr + length * elem_size(t, cx.opts) > len(cx.opts.memory[0]))
     self.cx = cx
     self.t = t
     self.ptr = ptr
@@ -2038,7 +2024,7 @@ the top-level case analysis:
 ```python
 def load(cx, ptr, t):
   assert(ptr == align_to(ptr, alignment(t, cx.opts)))
-  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory))
+  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory[0]))
   match despecialize(t):
     case BoolType()         : return convert_int_to_bool(load_int(cx, ptr, 1))
     case U8Type()           : return load_int(cx, ptr, 1)
@@ -2068,7 +2054,7 @@ Integers are loaded directly from memory, with their high-order bit interpreted
 according to the signedness of the type.
 ```python
 def load_int(cx, ptr, nbytes, signed = False):
-  return int.from_bytes(cx.opts.memory[ptr : ptr+nbytes], 'little', signed = signed)
+  return int.from_bytes(cx.opts.memory[0][ptr : ptr+nbytes], 'little', signed = signed)
 ```
 
 Integer-to-boolean conversions treats `0` as `false` and all other bit-patterns
@@ -2169,9 +2155,9 @@ def load_string_from_range(cx, ptr, tagged_code_units) -> String:
         encoding = 'latin-1'
 
   trap_if(ptr != align_to(ptr, alignment))
-  trap_if(ptr + byte_length > len(cx.opts.memory))
+  trap_if(ptr + byte_length > len(cx.opts.memory[0]))
   try:
-    s = cx.opts.memory[ptr : ptr+byte_length].decode(encoding)
+    s = cx.opts.memory[0][ptr : ptr+byte_length].decode(encoding)
   except UnicodeError:
     trap()
 
@@ -2198,7 +2184,7 @@ def load_list(cx, ptr, elem_type, maybe_length):
 
 def load_list_from_range(cx, ptr, length, elem_type):
   trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts)))
-  trap_if(ptr + length * elem_size(elem_type, cx.opts) > len(cx.opts.memory))
+  trap_if(ptr + length * elem_size(elem_type, cx.opts) > len(cx.opts.memory[0]))
   return load_list_from_valid_range(cx, ptr, length, elem_type)
 
 def load_list_from_valid_range(cx, ptr, length, elem_type):
@@ -2324,7 +2310,7 @@ The `store` function defines how to write a value `v` of a given value type
 ```python
 def store(cx, v, t, ptr):
   assert(ptr == align_to(ptr, alignment(t, cx.opts)))
-  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory))
+  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory[0]))
   match despecialize(t):
     case BoolType()         : store_int(cx, int(bool(v)), ptr, 1)
     case U8Type()           : store_int(cx, v, ptr, 1)
@@ -2356,7 +2342,7 @@ the `signed` parameter is only present to ensure that the internal range checks
 of `int.to_bytes` are satisfied.
 ```python
 def store_int(cx, v, ptr, nbytes, signed = False):
-  cx.opts.memory[ptr : ptr+nbytes] = int.to_bytes(v, nbytes, 'little', signed = signed)
+  cx.opts.memory[0][ptr : ptr+nbytes] = int.to_bytes(v, nbytes, 'little', signed = signed)
 ```
 
 Floats are stored directly into memory, with the sign and payload bits of NaN
@@ -2491,10 +2477,10 @@ def store_string_copy(cx, src, src_code_units, dst_code_unit_size, dst_alignment
   trap_if(dst_byte_length > max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, dst_alignment, dst_byte_length)
   trap_if(ptr != align_to(ptr, dst_alignment))
-  trap_if(ptr + dst_byte_length > len(cx.opts.memory))
+  trap_if(ptr + dst_byte_length > len(cx.opts.memory[0]))
   encoded = src.encode(dst_encoding)
   assert(dst_byte_length == len(encoded))
-  cx.opts.memory[ptr : ptr+len(encoded)] = encoded
+  cx.opts.memory[0][ptr : ptr+len(encoded)] = encoded
   return (ptr, src_code_units)
 ```
 The `max_string_byte_length` function ensures that the high bit of a
@@ -2516,19 +2502,19 @@ def store_latin1_to_utf8(cx, src, src_code_units):
 def store_string_to_utf8(cx, src, src_code_units, worst_case_size):
   assert(src_code_units <= max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 1, src_code_units)
-  trap_if(ptr + src_code_units > len(cx.opts.memory))
+  trap_if(ptr + src_code_units > len(cx.opts.memory[0]))
   for i,code_point in enumerate(src):
     if ord(code_point) < 2**7:
-      cx.opts.memory[ptr + i] = ord(code_point)
+      cx.opts.memory[0][ptr + i] = ord(code_point)
     else:
       trap_if(worst_case_size > max_string_byte_length(cx.opts))
       ptr = cx.opts.realloc(ptr, src_code_units, 1, worst_case_size)
-      trap_if(ptr + worst_case_size > len(cx.opts.memory))
+      trap_if(ptr + worst_case_size > len(cx.opts.memory[0]))
       encoded = src.encode('utf-8')
-      cx.opts.memory[ptr+i : ptr+len(encoded)] = encoded[i : ]
+      cx.opts.memory[0][ptr+i : ptr+len(encoded)] = encoded[i : ]
       if worst_case_size > len(encoded):
         ptr = cx.opts.realloc(ptr, worst_case_size, 1, len(encoded))
-        trap_if(ptr + len(encoded) > len(cx.opts.memory))
+        trap_if(ptr + len(encoded) > len(cx.opts.memory[0]))
       return (ptr, len(encoded))
   return (ptr, src_code_units)
 ```
@@ -2543,13 +2529,13 @@ def store_utf8_to_utf16(cx, src, src_code_units):
   trap_if(worst_case_size > max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 2, worst_case_size)
   trap_if(ptr != align_to(ptr, 2))
-  trap_if(ptr + worst_case_size > len(cx.opts.memory))
+  trap_if(ptr + worst_case_size > len(cx.opts.memory[0]))
   encoded = src.encode('utf-16-le')
-  cx.opts.memory[ptr : ptr+len(encoded)] = encoded
+  cx.opts.memory[0][ptr : ptr+len(encoded)] = encoded
   if len(encoded) < worst_case_size:
     ptr = cx.opts.realloc(ptr, worst_case_size, 2, len(encoded))
     trap_if(ptr != align_to(ptr, 2))
-    trap_if(ptr + len(encoded) > len(cx.opts.memory))
+    trap_if(ptr + len(encoded) > len(cx.opts.memory[0]))
   code_units = int(len(encoded) / 2)
   return (ptr, code_units)
 ```
@@ -2567,33 +2553,33 @@ def store_string_to_latin1_or_utf16(cx, src, src_code_units):
   assert(src_code_units <= max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 2, src_code_units)
   trap_if(ptr != align_to(ptr, 2))
-  trap_if(ptr + src_code_units > len(cx.opts.memory))
+  trap_if(ptr + src_code_units > len(cx.opts.memory[0]))
   dst_byte_length = 0
   for usv in src:
     if ord(usv) < (1 << 8):
-      cx.opts.memory[ptr + dst_byte_length] = ord(usv)
+      cx.opts.memory[0][ptr + dst_byte_length] = ord(usv)
       dst_byte_length += 1
     else:
       worst_case_size = 2 * src_code_units
       trap_if(worst_case_size > max_string_byte_length(cx.opts))
       ptr = cx.opts.realloc(ptr, src_code_units, 2, worst_case_size)
       trap_if(ptr != align_to(ptr, 2))
-      trap_if(ptr + worst_case_size > len(cx.opts.memory))
+      trap_if(ptr + worst_case_size > len(cx.opts.memory[0]))
       for j in range(dst_byte_length-1, -1, -1):
-        cx.opts.memory[ptr + 2*j] = cx.opts.memory[ptr + j]
-        cx.opts.memory[ptr + 2*j + 1] = 0
+        cx.opts.memory[0][ptr + 2*j] = cx.opts.memory[0][ptr + j]
+        cx.opts.memory[0][ptr + 2*j + 1] = 0
       encoded = src.encode('utf-16-le')
-      cx.opts.memory[ptr+2*dst_byte_length : ptr+len(encoded)] = encoded[2*dst_byte_length : ]
+      cx.opts.memory[0][ptr+2*dst_byte_length : ptr+len(encoded)] = encoded[2*dst_byte_length : ]
       if worst_case_size > len(encoded):
         ptr = cx.opts.realloc(ptr, worst_case_size, 2, len(encoded))
         trap_if(ptr != align_to(ptr, 2))
-        trap_if(ptr + len(encoded) > len(cx.opts.memory))
+        trap_if(ptr + len(encoded) > len(cx.opts.memory[0]))
       tagged_code_units = int(len(encoded) / 2) | utf16_tag(cx.opts)
       return (ptr, tagged_code_units)
   if dst_byte_length < src_code_units:
     ptr = cx.opts.realloc(ptr, src_code_units, 2, dst_byte_length)
     trap_if(ptr != align_to(ptr, 2))
-    trap_if(ptr + dst_byte_length > len(cx.opts.memory))
+    trap_if(ptr + dst_byte_length > len(cx.opts.memory[0]))
   return (ptr, dst_byte_length)
 ```
 
@@ -2613,17 +2599,17 @@ def store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units):
   trap_if(src_byte_length > max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 2, src_byte_length)
   trap_if(ptr != align_to(ptr, 2))
-  trap_if(ptr + src_byte_length > len(cx.opts.memory))
+  trap_if(ptr + src_byte_length > len(cx.opts.memory[0]))
   encoded = src.encode('utf-16-le')
-  cx.opts.memory[ptr : ptr+len(encoded)] = encoded
+  cx.opts.memory[0][ptr : ptr+len(encoded)] = encoded
   if any(ord(c) >= (1 << 8) for c in src):
     tagged_code_units = int(len(encoded) / 2) | utf16_tag(cx.opts)
     return (ptr, tagged_code_units)
   latin1_size = int(len(encoded) / 2)
   for i in range(latin1_size):
-    cx.opts.memory[ptr + i] = cx.opts.memory[ptr + 2*i]
+    cx.opts.memory[0][ptr + i] = cx.opts.memory[0][ptr + 2*i]
   ptr = cx.opts.realloc(ptr, src_byte_length, 1, latin1_size)
-  trap_if(ptr + latin1_size > len(cx.opts.memory))
+  trap_if(ptr + latin1_size > len(cx.opts.memory[0]))
   return (ptr, latin1_size)
 ```
 
@@ -2653,7 +2639,7 @@ def store_list_into_range(cx, v, elem_type):
   trap_if(byte_length >= (1 << (ptr_size(cx.opts) * 8)))
   ptr = cx.opts.realloc(0, 0, alignment(elem_type, cx.opts), byte_length)
   trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts)))
-  trap_if(ptr + byte_length > len(cx.opts.memory))
+  trap_if(ptr + byte_length > len(cx.opts.memory[0]))
   store_list_into_valid_range(cx, v, ptr, elem_type)
   return (ptr, len(v))
 
@@ -3164,7 +3150,7 @@ def lift_flat_values(cx, max_flat, vi, ts):
     ptr = vi.next(ptr_type(cx.opts))
     tuple_type = TupleType(ts)
     trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts)))
-    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory))
+    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory[0]))
     return list(load(cx, ptr, tuple_type).values())
   else:
     return [ lift_flat(cx, vi, t) for t in ts ]
@@ -3190,7 +3176,7 @@ def lower_flat_values(cx, max_flat, vs, ts, out_param = None):
       ptr = out_param.next(ptr_type(cx.opts))
       flat_vals = []
     trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts)))
-    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory))
+    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory[0]))
     store(cx, tuple_value, tuple_type, ptr)
   else:
     flat_vals = []
@@ -3220,17 +3206,19 @@ specifying `string-encoding=utf8` twice is an error. Each individual option, if
 present, is validated as such:
 
 * `string-encoding=N` - can be passed at most once, regardless of `N`.
-* `memory` - this is a subtype of `(memory 1)`
-* `realloc` - the function has type `(func (param T T i32 T) (result T))`
-  where `T` is `i32` or `i64` determined by `memory` as described [above](#canonopt)
+* `memory` - this is a subtype of `(memory 1)`. In the rest of the explainer,
+  `PTR` will refer to either `i32` or `i64` core Wasm types as determined by the
+  type of this `memory`.
+* `realloc` - the function has type `(func (param PTR PTR PTR PTR) (result PTR))`
+  where `PTR` is `i32` or `i64` as described above.
 * if `realloc` is present, then `memory` must be present
 * `post-return` - only allowed on [`canon lift`](#canon-lift), which has rules
   for validation
 * 🔀 `async` - cannot be present with `post-return`
 * 🔀,not(🚟) `async` - `callback` must also be present. Note that with the 🚟
   feature (the "stackful" ABI), this restriction is lifted.
-* 🔀 `callback` - the function has type `(func (param i32 i32 T) (result i32))`
-  where the `T` parameter is the payload address and cannot be present
+* 🔀 `callback` - the function has type `(func (param i32 i32 PTR) (result i32))`
+  where the `PTR` parameter is the payload address and cannot be present
   without `async` and is only allowed with
   [`canon lift`](#canon-lift)
 
@@ -3647,7 +3635,7 @@ For a canonical definition:
 validation specifies:
 * `$rt` must refer to locally-defined (not imported) resource type
 * `$f` is given type `(func (param $rt.rep) (result i32))`, where `$rt.rep` is
-  currently fixed to be `i32`.
+  `i32` or `i64`.
 
 Calling `$f` invokes the following function, which adds an owning handle
 containing the given resource representation to the current component
@@ -3726,7 +3714,7 @@ For a canonical definition:
 validation specifies:
 * `$rt` must refer to a locally-defined (not imported) resource type
 * `$f` is given type `(func (param i32) (result $rt.rep))`, where `$rt.rep` is
-  currently fixed to be `i32`.
+  `i32` or `i64`.
 
 Calling `$f` invokes the following function, which extracts the resource
 representation from the handle in the current component instance's `handles`
@@ -3749,7 +3737,7 @@ For a canonical definition:
 (canon context.get $t $i (core func $f))
 ```
 validation specifies:
-* `$t` must be `i32` or `i64` (see [here][thread-local storage])
+* `$t` must be `i32` or `i64` (see [here][thread-local storage]).
 * `$i` must be less than `Thread.CONTEXT_LENGTH` (`2`)
 * `$f` is given type `(func (result $t))`
 
@@ -3942,24 +3930,24 @@ For a canonical definition:
 (canon waitable-set.wait $cancellable? (memory $mem) (core func $f))
 ```
 validation specifies:
-* `$f` is given type `(func (param $si i32) (param $ptr T) (result i32))` where
-  `T` is the address type of `$mem`.
+* `$f` is given type `(func (param $si i32) (param $ptr) (result i32))` where
+  `$ptr` is the address type of `$mem`.
 
 Calling `$f` invokes the following function which waits for progress to be made
 on a `Waitable` in the given waitable set (indicated by index `$si`) and then
 returning its `EventCode` and writing the payload values into linear memory:
 ```python
-def canon_waitable_set_wait(cancellable, mem, opts, thread, si, ptr):
+def canon_waitable_set_wait(cancellable, mem, thread, si, ptr):
   trap_if(not thread.task.inst.may_leave)
   trap_if(not thread.task.may_block())
   wset = thread.task.inst.handles.get(si)
   trap_if(not isinstance(wset, WaitableSet))
   event = thread.task.wait_until(lambda: True, thread, wset, cancellable)
-  return unpack_event(mem, opts, thread, ptr, event)
+  return unpack_event(mem, thread, ptr, event)
 
-def unpack_event(mem, opts, thread, ptr, e: EventTuple):
+def unpack_event(mem, thread, ptr, e: EventTuple):
   event, p1, p2 = e
-  cx = LiftLowerContext(LiftLowerOptions(memory = mem, addr_type = opts.addr_type, tbl_idx_type = opts.tbl_idx_type), thread.task.inst)
+  cx = LiftLowerContext(LiftLowerOptions(memory = mem), thread.task.inst)
   store(cx, p1, U32Type(), ptr)
   store(cx, p2, U32Type(), ptr + 4)
   return [event]
@@ -3986,14 +3974,14 @@ For a canonical definition:
 (canon waitable-set.poll $cancellable? (memory $mem) (core func $f))
 ```
 validation specifies:
-* `$f` is given type `(func (param $si i32) (param $ptr T) (result i32))` where
-  `T` is the address type of `$mem`.
+* `$f` is given type `(func (param $si i32) (param $ptr) (result i32))` where
+  `$ptr` is the address type of `$mem`.
 
 Calling `$f` invokes the following function, which either returns an event that
 was pending on one of the waitables in the given waitable set (the same way as
 `waitable-set.wait`) or, if there is none, returns `0`.
 ```python
-def canon_waitable_set_poll(cancellable, mem, opts, thread, si, ptr):
+def canon_waitable_set_poll(cancellable, mem, thread, si, ptr):
   trap_if(not thread.task.inst.may_leave)
   wset = thread.task.inst.handles.get(si)
   trap_if(not isinstance(wset, WaitableSet))
@@ -4003,7 +3991,7 @@ def canon_waitable_set_poll(cancellable, mem, opts, thread, si, ptr):
     event = (EventCode.NONE, 0, 0)
   else:
     event = wset.get_pending_event()
-  return unpack_event(mem, opts, thread, ptr, event)
+  return unpack_event(mem, thread, ptr, event)
 ```
 If `cancellable` is set, then `waitable-set.poll` will return whether the
 supertask has already or concurrently requested cancellation.
@@ -4205,7 +4193,8 @@ For canonical definitions:
 In addition to [general validation of `$opts`](#canonopt-validation) validation
 specifies:
 * `$f` is given type `(func (param i32 T T) (result T))` where `T` is `i32` or
-  `i64` determined by the `memory` from `$opts`
+  `i64` as determined by the `memory` from `$opts` (or `i32` by default if no 
+  `memory` is present).
 * `$stream_t` must be a type of the form `(stream $t?)`
 * If `$t` is present:
   * [`lower($t)` above](#canonopt-validation) defines required options for `stream.write`
@@ -4270,7 +4259,7 @@ context switches. Next, the stream's `state` is updated based on the result
 being delivered to core wasm so that, once a stream end has been notified that
 the other end dropped, calling anything other than `stream.drop-*` traps.
 Lastly, `stream_event` packs the `CopyResult` and number of elements copied up
-until this point into a single `T`-sized payload for core wasm.
+until this point into a single `PTR`-sized payload for core wasm.
 ```python
   def stream_event(result, reclaim_buffer):
     reclaim_buffer()
@@ -4321,7 +4310,8 @@ For canonical definitions:
 In addition to [general validation of `$opts`](#canonopt-validation) validation
 specifies:
 * `$f` is given type `(func (param i32 T) (result i32))` where `T` is `i32` or
-  `i64` determined by the `memory` from `$opts`
+  `i64` as determined by the `memory` from `$opts` (or `i32` by default if no 
+  `memory` is present).
 * `$future_t` must be a type of the form `(future $t?)`
 * If `$t` is present:
   * [`lift($t)` above](#canonopt-validation) defines required options for `future.read`
@@ -4544,11 +4534,12 @@ For a canonical definition:
 (canon thread.new-indirect $ft $ftbl (core func $new_indirect))
 ```
 validation specifies
-* `$ft` must refer to the type `(func (param $c T))` where `T` is `i32` or
-  `i64` determined by the linear memory of the component instance
+* `$ft` must refer to the type `(func (param $c))` where `$c` is `i32` or
+  `i64`.
 * `$ftbl` must refer to a table whose element type matches `funcref`
-* `$new_indirect` is given type `(func (param $fi I) (param $c T) (result i32))`
-  where `I` is `i32` or `i64` determined by `$ftbl`'s table type
+* `$new_indirect` is given type `(func (param $fi) (param $c) (result i32))`
+  where `$fi` is `i32` or `i64` as determined by `$ftbl`'s table type and
+  `$c` has the same type as the parameter in `$ft`.
 
 Calling `$new_indirect` invokes the following function which reads a `funcref`
 from `$ftbl` (trapping if out-of-bounds, null or the wrong type), calls the
@@ -4743,8 +4734,9 @@ For a canonical definition:
 (canon error-context.new $opts (core func $f))
 ```
 validation specifies:
-* `$f` is given type `(func (param T T) (result i32))` where `T` is `i32` or
-  `i64` determined by the `memory` from `$opts`
+* `$f` is given type `(func (param $ptr) (param $units) (result i32))` 
+  where `$ptr` and `$units` are both `i32` or `i64` as determined by
+  the `memory` field in `$opts`.
 * `async` is not present
 * `memory` must be present
 
@@ -4785,8 +4777,8 @@ For a canonical definition:
 (canon error-context.debug-message $opts (core func $f))
 ```
 validation specifies:
-* `$f` is given type `(func (param i32 T))` where `T` is `i32` or `i64`
-  determined by the `memory` from `$opts`
+* `$f` is given type `(func (param i32) (param $ptr))` where `$ptr` is `i32` or `i64`
+  as determined by the `memory` from `$opts`
 * `async` is not present
 * `memory` must be present
 * `realloc` must be present
@@ -4837,11 +4829,10 @@ For a canonical definition:
 (canon thread.spawn-ref shared? $ft (core func $spawn_ref))
 ```
 validation specifies:
-* `$ft` must refer to the type `(shared? (func (param $c T)))` where `T` is
-  `i32` or `i64` determined by the linear memory of the component instance
-  (see explanation below)
+* `$ft` must refer to the type `(shared? (func (param $c)))` where `$c` has
+  type `i32` or `i64`.
 * `$spawn_ref` is given type
-  `(shared? (func (param $f (ref null $ft)) (param $c T) (result $e i32)))`
+  `(shared? (func (param $f (ref null $ft)) (param $c) (result $e i32)))`
 
 When the `shared` immediate is not present, the spawned thread is
 *cooperative*, only switching at specific program points. When the `shared`
@@ -4850,7 +4841,7 @@ parallel with all other threads.
 
 > Note: ideally, a thread could be spawned with [arbitrary thread parameters].
 > Currently, that would require additional work in the toolchain to support so,
-> for simplicity, the current proposal simply fixes a single `T` parameter
+> for simplicity, the current proposal simply fixes a single `i32` or `i64` parameter
 > type. However, `thread.spawn-ref` could be extended to allow arbitrary thread
 > parameters in the future, once it's concretely beneficial to the toolchain.
 > The inclusion of `$ft` ensures backwards compatibility for when arbitrary
@@ -4880,13 +4871,12 @@ For a canonical definition:
 (canon thread.spawn-indirect shared? $ft $tbl (core func $spawn_indirect))
 ```
 validation specifies:
-* `$ft` must refer to the type `(shared? (func (param $c T)))` is allowed
-  where `T` is `i32` or `i64` determined by the linear memory of the component
-  instance (see explanation in `thread.spawn-ref` above)
+* `$ft` must refer to the type `(shared? (func (param $c)))` 
+  where `$c` is either `i32` or `i64`.
 * `$tbl` must refer to a shared table whose element type matches
   `(ref null (shared? func))`
 * `$spawn_indirect` is given type
-  `(shared? (func (param $i I) (param $c T) (result $e i32)))` where `I` is
+  `(shared? (func (param $i) (param $c) (result $e i32)))` where `$i` is
   `i32` or `i64` determined by `$tbl`'s table type
 
 When the `shared` immediate is not present, the spawned thread is
diff --git a/design/mvp/Concurrency.md b/design/mvp/Concurrency.md
index b357d3f2..2e756ef3 100644
--- a/design/mvp/Concurrency.md
+++ b/design/mvp/Concurrency.md
@@ -436,10 +436,9 @@ stackless async ABI is used, returning the "exit" code to the event loop. This
 non-reuse of thread-local storage between distinct export calls avoids what
 would otherwise be a likely source of TLS-related memory leaks.
 
-The type of `context.{get,set}` values (`i32` or `i64`) is determined by the
-`memory` canonopt (matching the pointer size of the linear memory). When
-[wasm-gc] is integrated, these integral context values can serve as indices
-into guest-managed tables of typed GC references.
+When [wasm-gc] is integrated into the Canonical ABI, `context.{get,set}` will be
+relaxed so that these integral context values can serve as indices into
+guest-managed tables of typed GC references.
 
 Since the same mutable thread-local storage cells are shared by all core wasm
 running under the same thread in the same component, the cells' contents must
@@ -885,7 +884,7 @@ world w {
   import quux: async func(t: list<u32; 17>) -> string;
 }
 ```
-the default/synchronous lowered import function signatures are:
+the default/synchronous lowered import function signatures (assuming 32-bit memories) are:
 ```wat
 ;; sync
 (func $foo (param $s-ptr i32) (param $s-len i32) (result i32))
diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index 26661274..e4513716 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -718,11 +718,11 @@ Just like with handles, in the Component Model, async value types are
 lifted-from and lowered-into `i32` values that index an encapsulated
 per-component-instance table that is maintained by the canonical ABI built-ins
 [below](#canonical-definitions). The Component-Model-defined ABI for creating,
-writing-to and reading-from `stream` and `future` values is meant to be bound to
-analogous source-language features like promises, futures, streams, iterators,
-generators and channels so that developers can use these familiar high-level
-concepts when working directly with component types, without the need to
-manually write low-level async glue code. For languages like C without
+writing-to and reading-from `stream` and `future` values is meant to be bound
+to analogous source-language features like promises, futures, streams,
+iterators, generators and channels so that developers can use these familiar
+high-level concepts when working directly with component types, without the
+need to manually write low-level async glue code. For languages like C without
 language-level concurrency support, these ABIs (described in detail in the
 [Canonical ABI explainer]) can be exposed directly as function imports and used
 like normal low-level Operation System I/O APIs.
@@ -1281,7 +1281,6 @@ canonopt ::= string-encoding=utf8
            | (memory <core:memidx>)
            | (realloc <core:funcidx>)
            | (post-return <core:funcidx>)
-           | table64
            | async 🔀
            | (callback <core:funcidx>) 🔀
 ```
@@ -1300,22 +1299,24 @@ default is `utf8`. It is a validation error to include more than one
 
 The `(memory ...)` option specifies the memory that the Canonical ABI will
 use to load and store values. If the Canonical ABI needs to load or store,
-validation requires this option to be present (there is no default).
+validation requires this option to be present (there is no default). The types
+of lowered functions may also depend on whether this memory is a 32-bit or
+64-bit memory if pointers are transitively contained in parameters or results.
+In what follows the notation `PTR` will refer to the core Wasm type `i32` or
+`i64` corresponding to the type of the `(memory ...)` option.
 
 The `(realloc ...)` option specifies a core function that is validated to
 have the following core function type:
 ```wat
-(func (param $originalPtr $addr)
-      (param $originalSize $addr)
-      (param $alignment i32)
-      (param $newSize $addr)
-      (result $addr))
-```
-where `$addr` is `i32` when the `memory` canonopt refers to a 32-bit memory or
-`i64` when it refers to a 64-bit memory. The Canonical ABI will use `realloc`
-both to allocate (passing `0` for the first two parameters) and reallocate. If
-the Canonical ABI needs `realloc`, validation requires this option to be
-present (there is no default).
+(func (param $originalPtr PTR)
+      (param $originalSize PTR)
+      (param $alignment PTR)
+      (param $newSize PTR)
+      (result PTR))
+```
+The Canonical ABI will use `realloc` both to allocate (passing `0` for the first
+two parameters) and reallocate. If the Canonical ABI needs `realloc`, validation
+requires this option to be present (there is no default).
 
 The `(post-return ...)` option may only be present in `canon lift` when
 `async` is not present and specifies a core function to be called with the
@@ -1338,10 +1339,10 @@ validated to have the following core function type:
 ```wat
 (func (param $ctx i32)
       (param $event i32)
-      (param $payload $addr)
+      (param $payload PTR)
       (result $done i32))
 ```
-where `$addr` is determined by the `memory` canonopt as described above.
+where `PTR` is determined by the `memory` canonopt as described above.
 Again, see the [concurrency explainer] for more details.
 
 Based on this description of the AST, the [Canonical ABI explainer] gives a
@@ -1474,17 +1475,17 @@ canon ::= ...
 | Synopsis                   |                            |
 | -------------------------- | -------------------------- |
 | Approximate WIT signature  | `func<T>(rep: T.rep) -> T` |
-| Canonical ABI signature    | `[rep:i32] -> [i32]`       |
+| Canonical ABI signature    | `[rep: T.rep] -> [i32]`     |
 
 The `resource.new` built-in creates a new resource (of resource type `T`) with
 `rep` as its representation, and returns a new handle pointing to the new
 resource. Validation only allows `resource.rep T` to be used within the
 component that defined `T`.
 
-In the Canonical ABI, `T.rep` is defined to be the `$rep` in the
-`(type $T (resource (rep $rep) ...))` type definition that defined `T`. While
-it's designed to allow different types in the future, it is currently
-hard-coded to always be `i32` or `i64`.
+In the Canonical ABI, `T.rep` is defined to be the `$rep` in the `(type $T
+(resource (rep $rep) ...))` type definition that defined `T`. While it's
+designed to allow different types in the future, it is currently fixed to be
+`i32` or `i64`.
 
 For details, see [`canon_resource_new`] in the Canonical ABI explainer.
 
@@ -1507,7 +1508,7 @@ For details, see [`canon_resource_drop`] in the Canonical ABI explainer.
 | Synopsis                   |                          |
 | -------------------------- | ------------------------ |
 | Approximate WIT signature  | `func<T>(t: T) -> T.rep` |
-| Canonical ABI signature    | `[t:i32] -> [i32]`       |
+| Canonical ABI signature    | `[t:i32] -> [T.rep]`     |
 
 The `resource.rep` built-in returns the representation of the resource (with
 resource type `T`) pointed to by the handle `t`. Validation only allows
@@ -1516,7 +1517,7 @@ resource type `T`) pointed to by the handle `t`. Validation only allows
 In the Canonical ABI, `T.rep` is defined to be the `$rep` in the
 `(type $T (resource (rep $rep) ...))` type definition that defined `T`. While
 it's designed to allow different types in the future, it is currently
-hard-coded to always be `i32`.
+fixed to be `i32` or `i64`.
 
 As an example, the following component imports the `resource.new` built-in,
 allowing it to create and return new resources to its client:
@@ -1558,12 +1559,12 @@ See the [concurrency explainer] for background.
 | Synopsis                   |                    |
 | -------------------------- | ------------------ |
 | Approximate WIT signature  | `func<T,i>() -> T` |
-| Canonical ABI signature    | `[] -> [$T]`          |
+| Canonical ABI signature    | `[] -> [T]`          |
 
 The `context.get` built-in returns the `i`th element of the [current thread]'s
 [thread-local storage] array. Validation currently restricts `i` to be less
-than 2 and `T` to be `i32` or `i64` (determined by the `memory` canonopt), but
-these restrictions may be relaxed in the future.
+than 2 and `T` to be `i32` or `i64`, but these restrictions may be relaxed in
+the future.
 
 For details, see [Thread-Local Storage] in the concurrency explainer and
 [`canon_context_get`] in the Canonical ABI explainer.
@@ -1573,12 +1574,12 @@ For details, see [Thread-Local Storage] in the concurrency explainer and
 | Synopsis                   |                   |
 | -------------------------- | ----------------- |
 | Approximate WIT signature  | `func<T,i>(v: T)` |
-| Canonical ABI signature    | `[$T] -> []`         |
+| Canonical ABI signature    | `[T] -> []`         |
 
 The `context.set` built-in sets the `i`th element of the [current thread]'s
 [thread-local storage] array to the value `v`. Validation currently restricts
-`i` to be less than 2 and `T` to be `i32` or `i64` (determined by the `memory`
-canonopt), but these restrictions may be relaxed in the future.
+`i` to be less than 2 and `T` to be `i32` or `i64`, but these restrictions may
+be relaxed in the future.
 
 For details, see [Thread-Local Storage] in the concurrency explainer and
 [`canon_context_set`] in the Canonical ABI explainer.
@@ -1677,7 +1678,7 @@ For details, see [Waitables and Waitable Sets] in the concurrency explainer and
 | Synopsis                   |                                                |
 | -------------------------- | ---------------------------------------------- |
 | Approximate WIT signature  | `func<cancellable?>(s: waitable-set) -> event` |
-| Canonical ABI signature    | `[s:i32 payload-addr:$addr] -> [event-code:i32]` |
+| Canonical ABI signature    | `[s:i32 payload-addr:PTR] -> [event-code:i32]` |
 
 where `event` is defined in WIT as:
 ```wit
@@ -1742,7 +1743,7 @@ For details, see [Waitables and Waitable Sets] in the concurrency explainer and
 | Synopsis                   |                                                |
 | -------------------------- | ---------------------------------------------- |
 | Approximate WIT signature  | `func<cancellable?>(s: waitable-set) -> event` |
-| Canonical ABI signature    | `[s:i32 payload-addr:$addr] -> [event-code:i32]` |
+| Canonical ABI signature    | `[s:i32 payload-addr:PTR] -> [event-code:i32]` |
 
 where `event` is defined as in [`waitable-set.wait`](#-waitable-setwait).
 
@@ -1860,7 +1861,7 @@ For details, see [Streams and Futures] in the concurrency explainer and
 | -------------------------------------------- | ----------------------------------------------------------------------------------------------- |
 | Approximate WIT signature for `stream.read`  | `func<stream<T?>>(e: readable-stream-end<T?>, b: writable-buffer<T>?) -> option<stream-result>` |
 | Approximate WIT signature for `stream.write` | `func<stream<T?>>(e: writable-stream-end<T?>, b: readable-buffer<T>?) -> option<stream-result>` |
-| Canonical ABI signature                      | `[stream-end:i32 ptr:$addr num:$addr] -> [$addr]`                                               |
+| Canonical ABI signature                      | `[stream-end:i32 ptr:PTR num:PTR] -> [PTR]`                                               |
 
 where `stream-result` is defined in WIT as:
 ```wit
@@ -1916,10 +1917,10 @@ any subsequent operation on the stream other than `stream.drop-{readable,writabl
 traps.
 
 In the Canonical ABI, the `{readable,writable}-stream-end` is passed as an
-`i32` index into the component instance's table followed by a pair of `$addr`s
+`i32` index into the component instance's table followed by a pair of `PTR`s
 describing the linear memory offset and size-in-elements of the
 `{readable,writable}-buffer<T>`. The `option<stream-result>` return value is
-bit-packed into a single `$addr` where:
+bit-packed into a single `PTR` where:
 * all-ones represents `none`.
 * Otherwise, the `result` is in the low 4 bits and the `progress` is in the
   remaining high bits.
@@ -1933,7 +1934,7 @@ For details, see [Streams and Futures] in the concurrency explainer and
 | -------------------------------------------- | -------------------------------------------------------------------------------------------------------- |
 | Approximate WIT signature for `future.read`  | `func<future<T?>>(e: readable-future-end<T?>, b: writable-buffer<T; 1>?) -> option<future-read-result>`  |
 | Approximate WIT signature for `future.write` | `func<future<T?>>(e: writable-future-end<T?>, v: readable-buffer<T; 1>?) -> option<future-write-result>` |
-| Canonical ABI signature                      | `[readable-future-end:i32 ptr:$addr] -> [i32]`                                                           |
+| Canonical ABI signature                      | `[readable-future-end:i32 ptr:PTR] -> [i32]`                                                           |
 
 where `future-{read,write}-result` are defined in WIT as:
 ```wit
@@ -1984,7 +1985,7 @@ called before successfully writing a value.
 
 In the Canonical ABI, the `{readable,writable}-future-end` is passed as an
 `i32` index into the component instance's table followed by a single
-`$addr` describing the linear memory offset of the
+`PTR` describing the linear memory offset of the
 `{readable,writable}-buffer<T; 1>`. The `option<future-{read,write}-result>`
 return value is bit-packed into the single `i32` return value where all-ones
 represents `none`. And, `future-read-result.cancelled` is encoded
@@ -2060,19 +2061,21 @@ For details, see [Thread Built-ins] in the concurrency explainer and
 
 | Synopsis                   |                                                               |
 | -------------------------- | ------------------------------------------------------------- |
-| Approximate WIT signature  | `func<FuncT,tableidx>(fi: u32, c: FuncT.params[0]) -> thread` |
-| Canonical ABI signature    | `[fi:$idx c:$addr] -> [i32]`                                  |
+| Approximate WIT signature  | `func<FuncT,tableidx>(fi: uIDX, c: FuncT.params[0]) -> thread` |
+| Canonical ABI signature    | `[fi:iIDX c: FuncT.params[0]] -> [i32]`                                  |
 
 The `thread.new-indirect` built-in adds a new thread to the current component
 instance's table, returning the index of the new thread. The function table
 supplied via [`core:tableidx`] is indexed by the `fi` operand and then
 dynamically checked to match the type `FuncT` (in the same manner as
-`call_indirect`). Lastly, the indexed function is called in the new thread
-with `c` as its first and only parameter.
+`call_indirect`). The types `uIDX` and `iIDX` of `fi` are `u32`/`i32` or
+`u64`/`i64` as determined by the table supplied by [`core:tableidx`]. Lastly,
+the indexed function is called in the new thread with `c` as its first and only
+parameter.
 
-Currently, `FuncT` must be `(func (param $addr))` and thus `c` must always be
-an `$addr`, but this restriction can be loosened in the future as the Canonical
-ABI is extended for [GC].
+Currently, `FuncT` must be `(func (param i32))` or `(func (param i64))` and thus
+`c` must always be an `i32` or `i64`, but this restriction can be loosened in
+the future as the Canonical ABI is extended for [GC].
 
 As explained in the [concurrency explainer], a thread created by
 `thread.new-indirect` is initially in a suspended state and must be resumed
@@ -2205,7 +2208,7 @@ For details, see [Thread Built-ins] in the concurrency explainer and
 | Synopsis                   |                                                                    |
 | -------------------------- | ------------------------------------------------------------------ |
 | Approximate WIT signature  | `func<shared?,FuncT>(f: FuncT, c: FuncT.params[0]) -> bool`        |
-| Canonical ABI signature    | `shared? [f:(ref null (shared (func (param $addr))) c:$addr] -> [i32]` |
+| Canonical ABI signature    | `shared? [f:(ref null (shared (func (param FuncT.params[0]))) c:FuncT.params[0]] -> [i32]` |
 
 The `thread.spawn-ref` built-in is an optimization, fusing a call to
 `thread.new_ref` (assuming `thread.new_ref` was added as part of adding a
@@ -2219,8 +2222,8 @@ For details, see [`canon_thread_spawn_ref`] in the Canonical ABI explainer.
 
 | Synopsis                   |                                                                    |
 | -------------------------- | ------------------------------------------------------------------ |
-| Approximate WIT signature  | `func<shared?,FuncT,tableidx>(i: u32, c: FuncT.params[0]) -> bool` |
-| Canonical ABI signature    | `shared? [i:$idx c:$addr] -> [i32]`                                |
+| Approximate WIT signature  | `func<shared?,FuncT,tableidx>(i: uIDX, c: FuncT.params[0]) -> bool` |
+| Canonical ABI signature    | `shared? [i:iIDX c:FuncT.params[0]] -> [i32]`                                |
 
 The `thread.spawn-indirect` built-in is an optimization, fusing a call to
 [`thread.new-indirect`](#-threadnew-indirect) with a call to
@@ -2255,7 +2258,7 @@ explainer.
 | Synopsis                         |                                          |
 | -------------------------------- | ---------------------------------------- |
 | Approximate WIT signature        | `func(message: string) -> error-context` |
-| Canonical ABI signature          | `[ptr:$addr len:$addr] -> [i32]`         |
+| Canonical ABI signature          | `[ptr:PTR len:PTR] -> [i32]`         |
 
 The `error-context.new` built-in returns a new `error-context` value. The given
 string is non-deterministically transformed to produce the `error-context`'s
@@ -2271,7 +2274,7 @@ For details, see [`canon_error_context_new`] in the Canonical ABI explainer.
 | Synopsis                         |                                         |
 | -------------------------------- | --------------------------------------- |
 | Approximate WIT signature        | `func(errctx: error-context) -> string` |
-| Canonical ABI signature          | `[errctxi:i32 ptr:$addr] -> []`         |
+| Canonical ABI signature          | `[errctxi:i32 ptr:PTR] -> []`         |
 
 The `error-context.debug-message` built-in returns the
 [debug message](#error-context-type) of the given `error-context`.
diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index 0fea104d..a9d9e3da 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -1,4 +1,4 @@
-# After the Boilerplate section, this file is ordered to line up with the code
+ After the Boilerplate section, this file is ordered to line up with the code
 # blocks in ../CanonicalABI.md (split by # comment lines). If you update this
 # file, don't forget to update ../CanonicalABI.md.
 
@@ -233,32 +233,22 @@ def __init__(self, opts, inst, borrow_scope = None):
 @dataclass
 class LiftOptions:
   string_encoding: str = 'utf8'
-  memory: Optional[bytearray] = None
-  addr_type: str = 'i32'
-  tbl_idx_type: str = 'i32'
+  memory: Optional[tuple[bytearray, str]] = None
 
   def equal(lhs, rhs):
     return lhs.string_encoding == rhs.string_encoding and \
-           lhs.memory is rhs.memory and \
-           lhs.addr_type == rhs.addr_type and \
-           lhs.tbl_idx_type == rhs.tbl_idx_type
-
-def ptr_size(opts):
-  match opts.addr_type:
-    case 'i32': return 4
-    case 'i64': return 8
+           lhs.memory is rhs.memory
 
 def ptr_type(opts):
-  return opts.addr_type
+  if opts.memory is None:
+    return 'i32'
+  return opts.memory[1]
 
-def idx_size(opts):
-  match opts.tbl_idx_type:
+def ptr_size(opts):
+  match ptr_type(opts):
     case 'i32': return 4
     case 'i64': return 8
 
-def idx_type(opts):
-  return opts.tbl_idx_type
-
 @dataclass
 class LiftLowerOptions(LiftOptions):
   realloc: Optional[Callable] = None
@@ -796,7 +786,7 @@ def __init__(self, t, cx, ptr, length):
     trap_if(length > Buffer.MAX_LENGTH)
     if t and length > 0:
       trap_if(ptr != align_to(ptr, alignment(t, cx.opts)))
-      trap_if(ptr + length * elem_size(t, cx.opts) > len(cx.opts.memory))
+      trap_if(ptr + length * elem_size(t, cx.opts) > len(cx.opts.memory[0]))
     self.cx = cx
     self.t = t
     self.ptr = ptr
@@ -1196,7 +1186,7 @@ def elem_size_flags(labels):
 
 def load(cx, ptr, t):
   assert(ptr == align_to(ptr, alignment(t, cx.opts)))
-  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory))
+  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory[0]))
   match despecialize(t):
     case BoolType()         : return convert_int_to_bool(load_int(cx, ptr, 1))
     case U8Type()           : return load_int(cx, ptr, 1)
@@ -1222,7 +1212,7 @@ def load(cx, ptr, t):
     case FutureType(t)      : return lift_future(cx, load_int(cx, ptr, 4), t)
 
 def load_int(cx, ptr, nbytes, signed = False):
-  return int.from_bytes(cx.opts.memory[ptr : ptr+nbytes], 'little', signed = signed)
+  return int.from_bytes(cx.opts.memory[0][ptr : ptr+nbytes], 'little', signed = signed)
 
 def convert_int_to_bool(i):
   assert(i >= 0)
@@ -1292,9 +1282,9 @@ def load_string_from_range(cx, ptr, tagged_code_units) -> String:
         encoding = 'latin-1'
 
   trap_if(ptr != align_to(ptr, alignment))
-  trap_if(ptr + byte_length > len(cx.opts.memory))
+  trap_if(ptr + byte_length > len(cx.opts.memory[0]))
   try:
-    s = cx.opts.memory[ptr : ptr+byte_length].decode(encoding)
+    s = cx.opts.memory[0][ptr : ptr+byte_length].decode(encoding)
   except UnicodeError:
     trap()
 
@@ -1314,7 +1304,7 @@ def load_list(cx, ptr, elem_type, maybe_length):
 
 def load_list_from_range(cx, ptr, length, elem_type):
   trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts)))
-  trap_if(ptr + length * elem_size(elem_type, cx.opts) > len(cx.opts.memory))
+  trap_if(ptr + length * elem_size(elem_type, cx.opts) > len(cx.opts.memory[0]))
   return load_list_from_valid_range(cx, ptr, length, elem_type)
 
 def load_list_from_valid_range(cx, ptr, length, elem_type):
@@ -1387,7 +1377,7 @@ def lift_async_value(ReadableEndT, cx, i, t):
 
 def store(cx, v, t, ptr):
   assert(ptr == align_to(ptr, alignment(t, cx.opts)))
-  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory))
+  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory[0]))
   match despecialize(t):
     case BoolType()         : store_int(cx, int(bool(v)), ptr, 1)
     case U8Type()           : store_int(cx, v, ptr, 1)
@@ -1413,7 +1403,7 @@ def store(cx, v, t, ptr):
     case FutureType(t)      : store_int(cx, lower_future(cx, v, t), ptr, 4)
 
 def store_int(cx, v, ptr, nbytes, signed = False):
-  cx.opts.memory[ptr : ptr+nbytes] = int.to_bytes(v, nbytes, 'little', signed = signed)
+  cx.opts.memory[0][ptr : ptr+nbytes] = int.to_bytes(v, nbytes, 'little', signed = signed)
 
 def maybe_scramble_nan32(f):
   if math.isnan(f):
@@ -1504,10 +1494,10 @@ def store_string_copy(cx, src, src_code_units, dst_code_unit_size, dst_alignment
   trap_if(dst_byte_length > max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, dst_alignment, dst_byte_length)
   trap_if(ptr != align_to(ptr, dst_alignment))
-  trap_if(ptr + dst_byte_length > len(cx.opts.memory))
+  trap_if(ptr + dst_byte_length > len(cx.opts.memory[0]))
   encoded = src.encode(dst_encoding)
   assert(dst_byte_length == len(encoded))
-  cx.opts.memory[ptr : ptr+len(encoded)] = encoded
+  cx.opts.memory[0][ptr : ptr+len(encoded)] = encoded
   return (ptr, src_code_units)
 
 def store_utf16_to_utf8(cx, src, src_code_units):
@@ -1521,19 +1511,19 @@ def store_latin1_to_utf8(cx, src, src_code_units):
 def store_string_to_utf8(cx, src, src_code_units, worst_case_size):
   assert(src_code_units <= max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 1, src_code_units)
-  trap_if(ptr + src_code_units > len(cx.opts.memory))
+  trap_if(ptr + src_code_units > len(cx.opts.memory[0]))
   for i,code_point in enumerate(src):
     if ord(code_point) < 2**7:
-      cx.opts.memory[ptr + i] = ord(code_point)
+      cx.opts.memory[0][ptr + i] = ord(code_point)
     else:
       trap_if(worst_case_size > max_string_byte_length(cx.opts))
       ptr = cx.opts.realloc(ptr, src_code_units, 1, worst_case_size)
-      trap_if(ptr + worst_case_size > len(cx.opts.memory))
+      trap_if(ptr + worst_case_size > len(cx.opts.memory[0]))
       encoded = src.encode('utf-8')
-      cx.opts.memory[ptr+i : ptr+len(encoded)] = encoded[i : ]
+      cx.opts.memory[0][ptr+i : ptr+len(encoded)] = encoded[i : ]
       if worst_case_size > len(encoded):
         ptr = cx.opts.realloc(ptr, worst_case_size, 1, len(encoded))
-        trap_if(ptr + len(encoded) > len(cx.opts.memory))
+        trap_if(ptr + len(encoded) > len(cx.opts.memory[0]))
       return (ptr, len(encoded))
   return (ptr, src_code_units)
 
@@ -1542,13 +1532,13 @@ def store_utf8_to_utf16(cx, src, src_code_units):
   trap_if(worst_case_size > max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 2, worst_case_size)
   trap_if(ptr != align_to(ptr, 2))
-  trap_if(ptr + worst_case_size > len(cx.opts.memory))
+  trap_if(ptr + worst_case_size > len(cx.opts.memory[0]))
   encoded = src.encode('utf-16-le')
-  cx.opts.memory[ptr : ptr+len(encoded)] = encoded
+  cx.opts.memory[0][ptr : ptr+len(encoded)] = encoded
   if len(encoded) < worst_case_size:
     ptr = cx.opts.realloc(ptr, worst_case_size, 2, len(encoded))
     trap_if(ptr != align_to(ptr, 2))
-    trap_if(ptr + len(encoded) > len(cx.opts.memory))
+    trap_if(ptr + len(encoded) > len(cx.opts.memory[0]))
   code_units = int(len(encoded) / 2)
   return (ptr, code_units)
 
@@ -1556,33 +1546,33 @@ def store_string_to_latin1_or_utf16(cx, src, src_code_units):
   assert(src_code_units <= max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 2, src_code_units)
   trap_if(ptr != align_to(ptr, 2))
-  trap_if(ptr + src_code_units > len(cx.opts.memory))
+  trap_if(ptr + src_code_units > len(cx.opts.memory[0]))
   dst_byte_length = 0
   for usv in src:
     if ord(usv) < (1 << 8):
-      cx.opts.memory[ptr + dst_byte_length] = ord(usv)
+      cx.opts.memory[0][ptr + dst_byte_length] = ord(usv)
       dst_byte_length += 1
     else:
       worst_case_size = 2 * src_code_units
       trap_if(worst_case_size > max_string_byte_length(cx.opts))
       ptr = cx.opts.realloc(ptr, src_code_units, 2, worst_case_size)
       trap_if(ptr != align_to(ptr, 2))
-      trap_if(ptr + worst_case_size > len(cx.opts.memory))
+      trap_if(ptr + worst_case_size > len(cx.opts.memory[0]))
       for j in range(dst_byte_length-1, -1, -1):
-        cx.opts.memory[ptr + 2*j] = cx.opts.memory[ptr + j]
-        cx.opts.memory[ptr + 2*j + 1] = 0
+        cx.opts.memory[0][ptr + 2*j] = cx.opts.memory[0][ptr + j]
+        cx.opts.memory[0][ptr + 2*j + 1] = 0
       encoded = src.encode('utf-16-le')
-      cx.opts.memory[ptr+2*dst_byte_length : ptr+len(encoded)] = encoded[2*dst_byte_length : ]
+      cx.opts.memory[0][ptr+2*dst_byte_length : ptr+len(encoded)] = encoded[2*dst_byte_length : ]
       if worst_case_size > len(encoded):
         ptr = cx.opts.realloc(ptr, worst_case_size, 2, len(encoded))
         trap_if(ptr != align_to(ptr, 2))
-        trap_if(ptr + len(encoded) > len(cx.opts.memory))
+        trap_if(ptr + len(encoded) > len(cx.opts.memory[0]))
       tagged_code_units = int(len(encoded) / 2) | utf16_tag(cx.opts)
       return (ptr, tagged_code_units)
   if dst_byte_length < src_code_units:
     ptr = cx.opts.realloc(ptr, src_code_units, 2, dst_byte_length)
     trap_if(ptr != align_to(ptr, 2))
-    trap_if(ptr + dst_byte_length > len(cx.opts.memory))
+    trap_if(ptr + dst_byte_length > len(cx.opts.memory[0]))
   return (ptr, dst_byte_length)
 
 def store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units):
@@ -1590,17 +1580,17 @@ def store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units):
   trap_if(src_byte_length > max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 2, src_byte_length)
   trap_if(ptr != align_to(ptr, 2))
-  trap_if(ptr + src_byte_length > len(cx.opts.memory))
+  trap_if(ptr + src_byte_length > len(cx.opts.memory[0]))
   encoded = src.encode('utf-16-le')
-  cx.opts.memory[ptr : ptr+len(encoded)] = encoded
+  cx.opts.memory[0][ptr : ptr+len(encoded)] = encoded
   if any(ord(c) >= (1 << 8) for c in src):
     tagged_code_units = int(len(encoded) / 2) | utf16_tag(cx.opts)
     return (ptr, tagged_code_units)
   latin1_size = int(len(encoded) / 2)
   for i in range(latin1_size):
-    cx.opts.memory[ptr + i] = cx.opts.memory[ptr + 2*i]
+    cx.opts.memory[0][ptr + i] = cx.opts.memory[0][ptr + 2*i]
   ptr = cx.opts.realloc(ptr, src_byte_length, 1, latin1_size)
-  trap_if(ptr + latin1_size > len(cx.opts.memory))
+  trap_if(ptr + latin1_size > len(cx.opts.memory[0]))
   return (ptr, latin1_size)
 
 def lower_error_context(cx, v):
@@ -1620,7 +1610,7 @@ def store_list_into_range(cx, v, elem_type):
   trap_if(byte_length >= (1 << (ptr_size(cx.opts) * 8)))
   ptr = cx.opts.realloc(0, 0, alignment(elem_type, cx.opts), byte_length)
   trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts)))
-  trap_if(ptr + byte_length > len(cx.opts.memory))
+  trap_if(ptr + byte_length > len(cx.opts.memory[0]))
   store_list_into_valid_range(cx, v, ptr, elem_type)
   return (ptr, len(v))
 
@@ -1970,7 +1960,7 @@ def lift_flat_values(cx, max_flat, vi, ts):
     ptr = vi.next(ptr_type(cx.opts))
     tuple_type = TupleType(ts)
     trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts)))
-    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory))
+    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory[0]))
     return list(load(cx, ptr, tuple_type).values())
   else:
     return [ lift_flat(cx, vi, t) for t in ts ]
@@ -1988,7 +1978,7 @@ def lower_flat_values(cx, max_flat, vs, ts, out_param = None):
       ptr = out_param.next(ptr_type(cx.opts))
       flat_vals = []
     trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts)))
-    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory))
+    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory[0]))
     store(cx, tuple_value, tuple_type, ptr)
   else:
     flat_vals = []
@@ -2262,24 +2252,24 @@ def canon_waitable_set_new(thread):
 
 ### 🔀 `canon waitable-set.wait`
 
-def canon_waitable_set_wait(cancellable, mem, opts, thread, si, ptr):
+def canon_waitable_set_wait(cancellable, mem, thread, si, ptr):
   trap_if(not thread.task.inst.may_leave)
   trap_if(not thread.task.may_block())
   wset = thread.task.inst.handles.get(si)
   trap_if(not isinstance(wset, WaitableSet))
   event = thread.task.wait_until(lambda: True, thread, wset, cancellable)
-  return unpack_event(mem, opts, thread, ptr, event)
+  return unpack_event(mem, thread, ptr, event)
 
-def unpack_event(mem, opts, thread, ptr, e: EventTuple):
+def unpack_event(mem, thread, ptr, e: EventTuple):
   event, p1, p2 = e
-  cx = LiftLowerContext(LiftLowerOptions(memory = mem, addr_type = opts.addr_type, tbl_idx_type = opts.tbl_idx_type), thread.task.inst)
+  cx = LiftLowerContext(LiftLowerOptions(memory = mem), thread.task.inst)
   store(cx, p1, U32Type(), ptr)
   store(cx, p2, U32Type(), ptr + 4)
   return [event]
 
 ### 🔀 `canon waitable-set.poll`
 
-def canon_waitable_set_poll(cancellable, mem, opts, thread, si, ptr):
+def canon_waitable_set_poll(cancellable, mem, thread, si, ptr):
   trap_if(not thread.task.inst.may_leave)
   wset = thread.task.inst.handles.get(si)
   trap_if(not isinstance(wset, WaitableSet))
@@ -2289,7 +2279,7 @@ def canon_waitable_set_poll(cancellable, mem, opts, thread, si, ptr):
     event = (EventCode.NONE, 0, 0)
   else:
     event = wset.get_pending_event()
-  return unpack_event(mem, opts, thread, ptr, event)
+  return unpack_event(mem, thread, ptr, event)
 
 ### 🔀 `canon waitable-set.drop`
 
diff --git a/design/mvp/canonical-abi/run_tests.py b/design/mvp/canonical-abi/run_tests.py
index b71cc1be..ed6e60d6 100644
--- a/design/mvp/canonical-abi/run_tests.py
+++ b/design/mvp/canonical-abi/run_tests.py
@@ -35,21 +35,24 @@ def realloc(self, original_ptr, original_size, alignment, new_size):
     self.memory[ret : ret + original_size] = self.memory[original_ptr : original_ptr + original_size]
     return ret
 
-def mk_opts(memory = bytearray(), encoding = 'utf8', realloc = None, post_return = None, sync_task_return = False, async_ = False, addr_type = 'i32', tbl_idx_type = 'i32'):
+_DEFAULT_MEMORY = (bytearray(), 'i32')
+
+def mk_opts(memory = None, encoding = 'utf8', realloc = None, post_return = None, sync_task_return = False, async_ = False, addr_type = 'i32'):
   opts = CanonicalOptions()
-  opts.memory = memory
+  if memory is None and addr_type == 'i32':
+    opts.memory = _DEFAULT_MEMORY
+  else:
+    opts.memory = (memory if memory is not None else bytearray(), addr_type)
   opts.string_encoding = encoding
   opts.realloc = realloc
   opts.post_return = post_return
   opts.sync_task_return = sync_task_return
   opts.async_ = async_
   opts.callback = None
-  opts.addr_type = addr_type
-  opts.tbl_idx_type = tbl_idx_type
   return opts
 
-def mk_cx(memory = bytearray(), encoding = 'utf8', realloc = None, post_return = None, addr_type = 'i32', tbl_idx_type = 'i32'):
-  opts = mk_opts(memory, encoding, realloc, post_return, addr_type=addr_type, tbl_idx_type=tbl_idx_type)
+def mk_cx(memory = bytearray(), encoding = 'utf8', realloc = None, post_return = None, addr_type = 'i32'):
+  opts = mk_opts(memory, encoding, realloc, post_return, addr_type=addr_type)
   inst = ComponentInstance(Store())
   return LiftLowerContext(opts, inst)
 
@@ -131,10 +134,10 @@ def test_name():
   if lower_v is None:
     lower_v = v
 
-  heap = Heap(5*len(cx.opts.memory))
+  heap = Heap(5*len(cx.opts.memory[0]))
   if dst_encoding is None:
     dst_encoding = cx.opts.string_encoding
-  cx = mk_cx(heap.memory, dst_encoding, heap.realloc, addr_type=cx.opts.addr_type, tbl_idx_type=cx.opts.tbl_idx_type)
+  cx = mk_cx(heap.memory, dst_encoding, heap.realloc, addr_type=ptr_type(cx.opts))
   lowered_vals = lower_flat(cx, v, lower_t)
 
   vi = CoreValueIter(lowered_vals)
@@ -269,7 +272,7 @@ def test_string(src_encoding, dst_encoding, s, addr_type='i32'):
     except UnicodeEncodeError:
       pass
     encoded = s.encode('utf-16-le')
-    tagged_code_units = int(len(encoded) / 2) | utf16_tag(LiftLowerOptions(addr_type=addr_type))
+    tagged_code_units = int(len(encoded) / 2) | utf16_tag(LiftLowerOptions(memory=(bytearray(), addr_type)))
     test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units, addr_type)
 
 encodings = ['utf8', 'utf16', 'latin1+utf16']
@@ -284,9 +287,9 @@ def test_string(src_encoding, dst_encoding, s, addr_type='i32'):
       for s in fun_strings:
         test_string(src_encoding, dst_encoding, s, addr_type)
 
-def test_heap(t, expect, args, byte_array, addr_type='i32', tbl_idx_type='i32'):
+def test_heap(t, expect, args, byte_array, addr_type='i32'):
   heap = Heap(byte_array)
-  cx = mk_cx(heap.memory, addr_type=addr_type, tbl_idx_type=tbl_idx_type)
+  cx = mk_cx(heap.memory, addr_type=addr_type)
   test(t, args, expect, cx)
 
 # Empty record types are not permitted yet.
@@ -391,8 +394,8 @@ def test_heap(t, expect, args, byte_array, addr_type='i32', tbl_idx_type='i32'):
 test_heap(t, v, [0,2],
           [0xff,0xff,0xff,0xff, 0,0,0,0])
 
-def test_flatten(t, params, results, addr_type='i32', tbl_idx_type='i32'):
-  opts = mk_opts(addr_type=addr_type, tbl_idx_type=tbl_idx_type)
+def test_flatten(t, params, results, addr_type='i32'):
+  opts = mk_opts(addr_type=addr_type)
   expect = CoreFuncType(params, results)
 
   if len(params) > definitions.MAX_FLAT_PARAMS:
@@ -422,7 +425,7 @@ def test_flatten(t, params, results, addr_type='i32', tbl_idx_type='i32'):
 
 
 def test_roundtrips():
-  def test_roundtrip(t, v, addr_type='i32', tbl_idx_type='i32'):
+  def test_roundtrip(t, v, addr_type='i32'):
     before = definitions.MAX_FLAT_RESULTS
     definitions.MAX_FLAT_RESULTS = 16
 
@@ -433,7 +436,7 @@ def callee(thread, x):
       return x
 
     callee_heap = Heap(1000)
-    callee_opts = mk_opts(callee_heap.memory, 'utf8', callee_heap.realloc, addr_type=addr_type, tbl_idx_type=tbl_idx_type)
+    callee_opts = mk_opts(callee_heap.memory, 'utf8', callee_heap.realloc, addr_type=addr_type)
     callee_inst = ComponentInstance(store)
 
     got = None
@@ -541,12 +544,12 @@ def core_wasm(thread, args):
 
     return [h, h2, h4]
 
-  for tbl_idx_type in ['i32', 'i64']:
+  for _ in [0]:
     store = Store()
     rt = ResourceType(ComponentInstance(store), dtor) # usable in imports and exports
     inst = ComponentInstance(store)
     rt2 = ResourceType(inst, dtor) # only usable in exports
-    opts = mk_opts(tbl_idx_type=tbl_idx_type)
+    opts = mk_opts()
 
     ft = FuncType([
       OwnType(rt),
@@ -645,21 +648,21 @@ def consumer(thread, args):
     fut1_1.set()
 
     waitretp = consumer_heap.realloc(0, 0, 8, 4)
-    [event] = canon_waitable_set_wait(True, consumer_heap.memory, LiftLowerOptions(), thread, seti, waitretp)
+    [event] = canon_waitable_set_wait(True, (consumer_heap.memory, 'i32'), thread, seti, waitretp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_heap.memory[waitretp] == subi1)
     assert(consumer_heap.memory[waitretp+4] == Subtask.State.RETURNED)
     [] = canon_subtask_drop(thread, subi1)
     fut1_2.set()
 
-    [event] = canon_waitable_set_wait(True, consumer_heap.memory, LiftLowerOptions(), thread, seti, waitretp)
+    [event] = canon_waitable_set_wait(True, (consumer_heap.memory, 'i32'), thread, seti, waitretp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_heap.memory[waitretp] == subi2)
     assert(consumer_heap.memory[waitretp+4] == Subtask.State.STARTED)
     assert(consumer_heap.memory[retp] == 13)
     fut2.set()
 
-    [event] = canon_waitable_set_wait(True, consumer_heap.memory, LiftLowerOptions(), thread, seti, waitretp)
+    [event] = canon_waitable_set_wait(True, (consumer_heap.memory, 'i32'), thread, seti, waitretp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_heap.memory[waitretp] == subi2)
     assert(consumer_heap.memory[waitretp+4] == Subtask.State.RETURNED)
@@ -880,7 +883,7 @@ def core_consumer(thread, args):
     assert(ret == CopyResult.COMPLETED)
 
     retp = 0
-    [event] = canon_waitable_set_wait(True, consumer_mem, LiftLowerOptions(), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, (consumer_mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp+0] == subi2)
     assert(consumer_mem[retp+4] == Subtask.State.STARTED)
@@ -892,14 +895,14 @@ def core_consumer(thread, args):
       [ret] = canon_thread_yield(True, thread)
       assert(ret == 0)
       retp = 0
-      [ret] = canon_waitable_set_poll(True, consumer_mem, LiftLowerOptions(), thread, seti, retp)
+      [ret] = canon_waitable_set_poll(True, (consumer_mem, 'i32'), thread, seti, retp)
       assert(ret == EventCode.NONE)
 
     [ret] = canon_future_write(FutureType(None), consumer_opts, thread, wfut21, 0xdeadbeef)
     assert(ret == CopyResult.COMPLETED)
 
     retp = 0
-    [event] = canon_waitable_set_wait(True, consumer_mem, LiftLowerOptions(), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, (consumer_mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp+0] == subi1)
     assert(consumer_mem[retp+4] == Subtask.State.RETURNED)
@@ -913,14 +916,14 @@ def core_consumer(thread, args):
       [ret] = canon_thread_yield(True, thread)
       assert(ret == 0)
       retp = 0
-      [ret] = canon_waitable_set_poll(True, consumer_mem, LiftLowerOptions(), thread, seti, retp)
+      [ret] = canon_waitable_set_poll(True, (consumer_mem, 'i32'), thread, seti, retp)
       assert(ret == EventCode.NONE)
 
     [ret] = canon_future_write(FutureType(None), consumer_opts, thread, wfut13, 0xdeadbeef)
     assert(ret == CopyResult.COMPLETED)
 
     retp = 0
-    [event] = canon_waitable_set_wait(True, consumer_mem, LiftLowerOptions(), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, (consumer_mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp+0] == subi2)
     assert(consumer_mem[retp+4] == Subtask.State.RETURNED)
@@ -937,7 +940,7 @@ def core_consumer(thread, args):
     assert(ret == CopyResult.COMPLETED)
 
     retp = 0
-    [event] = canon_waitable_set_wait(True, consumer_mem, LiftLowerOptions(), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, (consumer_mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp+0] == subi3)
     assert(consumer_mem[retp+4] == Subtask.State.RETURNED)
@@ -995,7 +998,7 @@ def core_caller(thread, args):
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, subi, seti)
     retp3 = 12
-    [event] = canon_waitable_set_wait(True, caller_mem, LiftLowerOptions(), thread, seti, retp3)
+    [event] = canon_waitable_set_wait(True, (caller_mem, 'i32'), thread, seti, retp3)
     assert(event == EventCode.SUBTASK)
     assert(caller_mem[retp3+0] == subi)
     assert(caller_mem[retp3+4] == Subtask.State.RETURNED)
@@ -1062,7 +1065,7 @@ def consumer(thread, args):
       [ret] = canon_thread_yield(True, thread)
       assert(ret == 0)
       retp = 8
-      [event] = canon_waitable_set_poll(True, consumer_heap.memory, LiftLowerOptions(), thread, seti, retp)
+      [event] = canon_waitable_set_poll(True, (consumer_heap.memory, 'i32'), thread, seti, retp)
       if event == EventCode.NONE:
         continue
       assert(event == EventCode.SUBTASK)
@@ -1148,7 +1151,7 @@ def consumer(thread, args):
     remain = [subi1, subi2]
     while remain:
       retp = 8
-      [event] = canon_waitable_set_wait(True, consumer_heap.memory, LiftLowerOptions(), thread, seti, retp)
+      [event] = canon_waitable_set_wait(True, (consumer_heap.memory, 'i32'), thread, seti, retp)
       assert(event == EventCode.SUBTASK)
       assert(consumer_heap.memory[retp+4] == Subtask.State.RETURNED)
       subi = consumer_heap.memory[retp]
@@ -1214,14 +1217,14 @@ def core_func(thread, args):
     fut1.set()
 
     retp = lower_heap.realloc(0,0,8,4)
-    [event] = canon_waitable_set_wait(True, lower_heap.memory, LiftLowerOptions(), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, (lower_heap.memory, 'i32'), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(lower_heap.memory[retp] == subi1)
     assert(lower_heap.memory[retp+4] == Subtask.State.RETURNED)
 
     fut2.set()
 
-    [event] = canon_waitable_set_wait(True, lower_heap.memory, LiftLowerOptions(), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, (lower_heap.memory, 'i32'), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(lower_heap.memory[retp] == subi2)
     assert(lower_heap.memory[retp+4] == Subtask.State.RETURNED)
@@ -1538,7 +1541,7 @@ def core_func(thread, args):
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rsi1, seti)
     definitions.throw_it = True
-    [event] = canon_waitable_set_wait(True, mem, LiftLowerOptions(), thread, seti, retp) ##
+    [event] = canon_waitable_set_wait(True, (mem, 'i32'), thread, seti, retp) ##
     assert(event == EventCode.STREAM_READ)
     assert(mem[retp+0] == rsi1)
     result,n = unpack_result(mem[retp+4])
@@ -1554,7 +1557,7 @@ def core_func(thread, args):
     assert(ret == definitions.BLOCKED)
     host_import_incoming.set_remain(100)
     [] = canon_waitable_join(thread, wsi3, seti)
-    [event] = canon_waitable_set_wait(True, mem, LiftLowerOptions(), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, (mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem[retp+0] == wsi3)
     result,n = unpack_result(mem[retp+4])
@@ -1566,7 +1569,7 @@ def core_func(thread, args):
     assert(ret == definitions.BLOCKED)
     dst_stream.set_remain(100)
     [] = canon_waitable_join(thread, wsi2, seti)
-    [event] = canon_waitable_set_wait(True, mem, LiftLowerOptions(), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, (mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem[retp+0] == wsi2)
     result,n = unpack_result(mem[retp+4])
@@ -1585,7 +1588,7 @@ def core_func(thread, args):
     [ret] = canon_stream_read(StreamType(U8Type()), opts, thread, rsi4, 0, 4)
     assert(ret == definitions.BLOCKED)
     [] = canon_waitable_join(thread, rsi4, seti)
-    [event] = canon_waitable_set_wait(True, mem, LiftLowerOptions(), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, (mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem[retp+0] == rsi4)
     result,n = unpack_result(mem[retp+4])
@@ -1709,7 +1712,7 @@ def core_func(thread, args):
 
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rsi, seti)
-    [event] = canon_waitable_set_wait(True, mem, LiftLowerOptions(), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, (mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem[retp+0] == rsi)
     result,n = unpack_result(mem[retp+4])
@@ -1730,7 +1733,7 @@ def core_func(thread, args):
     assert(ret == definitions.BLOCKED)
     dst.set_remain(4)
     [] = canon_waitable_join(thread, wsi, seti)
-    [event] = canon_waitable_set_wait(True, mem, LiftLowerOptions(), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, (mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem[retp+0] == wsi)
     result,n = unpack_result(mem[retp+4])
@@ -1791,7 +1794,7 @@ def core_func1(thread, args):
     retp = 16
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, wsi, seti)
-    [event] = canon_waitable_set_wait(True, mem1, LiftLowerOptions(), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, (mem1, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem1[retp+0] == wsi)
     result,n = unpack_result(mem1[retp+4])
@@ -1802,7 +1805,7 @@ def core_func1(thread, args):
 
     fut4.set()
 
-    [event] = canon_waitable_set_wait(True, mem1, LiftLowerOptions(), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, (mem1, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem1[retp+0] == wsi)
     assert(mem1[retp+4] == 0)
@@ -1840,7 +1843,7 @@ def core_func2(thread, args):
 
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rsi, seti)
-    [event] = canon_waitable_set_wait(True, mem2, LiftLowerOptions(), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, (mem2, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem2[retp+0] == rsi)
     result,n = unpack_result(mem2[retp+4])
@@ -1868,7 +1871,7 @@ def core_func2(thread, args):
     [ret] = canon_stream_read(StreamType(U8Type()), opts2, thread, rsi, 12345, 0)
     assert(ret == definitions.BLOCKED)
 
-    [event] = canon_waitable_set_wait(True, mem2, LiftLowerOptions(), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, (mem2, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem2[retp+0] == rsi)
     p2 = int.from_bytes(mem2[retp+4 : retp+8], 'little', signed=False)
@@ -1914,7 +1917,7 @@ def core_func1(thread, args):
     retp = 16
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, wsi, seti)
-    [event] = canon_waitable_set_wait(True, mem1, LiftLowerOptions(), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, (mem1, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem1[retp+0] == wsi)
     result,n = unpack_result(mem1[retp+4])
@@ -1951,7 +1954,7 @@ def core_func2(thread, args):
 
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rsi, seti)
-    [event] = canon_waitable_set_wait(True, mem2, LiftLowerOptions(), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, (mem2, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem2[retp+0] == rsi)
     result,n = unpack_result(mem2[retp+4])
@@ -2068,7 +2071,7 @@ def core_func(thread, args):
     host_source.unblock_cancel()
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rsi, seti)
-    [event] = canon_waitable_set_wait(True, mem, LiftLowerOptions(), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, (mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem[retp+0] == rsi)
     result,n = unpack_result(mem[retp+4])
@@ -2173,7 +2176,7 @@ def core_func(thread, args):
 
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rfi, seti)
-    [event] = canon_waitable_set_wait(True, mem, LiftLowerOptions(), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, (mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.FUTURE_READ)
     assert(mem[retp+0] == rfi)
     assert(mem[retp+4] == CopyResult.COMPLETED)
@@ -2239,7 +2242,7 @@ def core_callee1(thread, args):
   def core_callee2(thread, args):
     [x] = args
     [si] = canon_waitable_set_new(thread)
-    [ret] = canon_waitable_set_wait(True, callee_heap.memory, LiftLowerOptions(), thread, si, 0)
+    [ret] = canon_waitable_set_wait(True, (callee_heap.memory, 'i32'), thread, si, 0)
     assert(ret == EventCode.TASK_CANCELLED)
     match x:
       case 1:
@@ -2286,9 +2289,9 @@ def core_callee4(thread, args):
     except Trap:
       pass
     [seti] = canon_waitable_set_new(thread)
-    [result] = canon_waitable_set_wait(True, callee_heap.memory, LiftLowerOptions(), thread, seti, 0)
+    [result] = canon_waitable_set_wait(True, (callee_heap.memory, 'i32'), thread, seti, 0)
     assert(result == EventCode.TASK_CANCELLED)
-    [result] = canon_waitable_set_poll(True, callee_heap.memory, LiftLowerOptions(), thread, seti, 0)
+    [result] = canon_waitable_set_poll(True, (callee_heap.memory, 'i32'), thread, seti, 0)
     assert(result == EventCode.NONE)
     [] = canon_task_cancel(thread)
     return []
@@ -2423,7 +2426,7 @@ def core_caller(thread, args):
     assert(caller_heap.memory[0] == 13)
     [] = canon_waitable_join(thread, subi3, seti)
     retp = 8
-    [ret] = canon_waitable_set_wait(True, caller_heap.memory, LiftLowerOptions(), thread, seti, retp)
+    [ret] = canon_waitable_set_wait(True, (caller_heap.memory, 'i32'), thread, seti, retp)
     assert(ret == EventCode.SUBTASK)
     assert(caller_heap.memory[retp+0] == subi3)
     assert(caller_heap.memory[retp+4] == Subtask.State.RETURNED)
@@ -2442,7 +2445,7 @@ def core_caller(thread, args):
     assert(caller_heap.memory[0] == 13)
     [] = canon_waitable_join(thread, subi4, seti)
     retp = 8
-    [ret] = canon_waitable_set_wait(True, caller_heap.memory, LiftLowerOptions(), thread, seti, retp)
+    [ret] = canon_waitable_set_wait(True, (caller_heap.memory, 'i32'), thread, seti, retp)
     assert(ret == EventCode.SUBTASK)
     assert(caller_heap.memory[retp+0] == subi4)
     assert(caller_heap.memory[retp+4] == Subtask.State.CANCELLED_BEFORE_RETURNED)
@@ -2484,7 +2487,7 @@ def core_caller(thread, args):
     host_fut4.set()
     [] = canon_waitable_join(thread, subi, seti)
     waitretp = 4
-    [event] = canon_waitable_set_wait(True, caller_heap.memory, LiftLowerOptions(), thread, seti, waitretp)
+    [event] = canon_waitable_set_wait(True, (caller_heap.memory, 'i32'), thread, seti, waitretp)
     assert(event == EventCode.SUBTASK)
     assert(caller_heap.memory[waitretp] == subi)
     assert(caller_heap.memory[waitretp+4] == Subtask.State.CANCELLED_BEFORE_RETURNED)
@@ -2500,7 +2503,7 @@ def core_caller(thread, args):
     host_fut5.set()
     [] = canon_waitable_join(thread, subi, seti)
     waitretp = 4
-    [event] = canon_waitable_set_wait(True, caller_heap.memory, LiftLowerOptions(), thread, seti, waitretp)
+    [event] = canon_waitable_set_wait(True, (caller_heap.memory, 'i32'), thread, seti, waitretp)
     assert(event == EventCode.SUBTASK)
     assert(caller_heap.memory[waitretp] == subi)
     assert(caller_heap.memory[waitretp+4] == Subtask.State.RETURNED)
@@ -2515,7 +2518,7 @@ def core_caller(thread, args):
     assert(ret == definitions.BLOCKED)
 
     [] = canon_waitable_join(thread, subi, seti)
-    [event] = canon_waitable_set_wait(True, caller_heap.memory, LiftLowerOptions(), thread, seti, 4)
+    [event] = canon_waitable_set_wait(True, (caller_heap.memory, 'i32'), thread, seti, 4)
     assert(event == EventCode.SUBTASK)
     assert(caller_heap.memory[0] == 45)
     assert(caller_heap.memory[4] == subi)
@@ -2562,7 +2565,7 @@ def core_func(thread, args):
     [] = canon_future_drop_readable(FutureType(elemt), thread, rfi)
 
     [] = canon_waitable_join(thread, wfi, seti)
-    [event] = canon_waitable_set_wait(True, mem, LiftLowerOptions(), thread, seti, 0)
+    [event] = canon_waitable_set_wait(True, (mem, 'i32'), thread, seti, 0)
     assert(event == EventCode.FUTURE_WRITE)
     assert(mem[0] == wfi)
     assert(mem[4] == CopyResult.COMPLETED)
@@ -2582,7 +2585,7 @@ def core_func(thread, args):
     [] = canon_stream_drop_readable(StreamType(elemt), thread, rsi)
 
     [] = canon_waitable_join(thread, wsi, seti)
-    [event] = canon_waitable_set_wait(True, mem, LiftLowerOptions(), thread, seti, 0)
+    [event] = canon_waitable_set_wait(True, (mem, 'i32'), thread, seti, 0)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem[0] == wsi)
     result,n = unpack_result(mem[4])
@@ -2774,14 +2777,14 @@ def core_consumer(thread, args):
     retp3 = 16
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, subi1, seti)
-    [event] = canon_waitable_set_wait(True, consumer_mem, LiftLowerOptions(), thread, seti, retp3)
+    [event] = canon_waitable_set_wait(True, (consumer_mem, 'i32'), thread, seti, retp3)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp3] == subi1)
     assert(consumer_mem[retp3+4] == Subtask.State.RETURNED)
     assert(consumer_mem[retp1] == 42)
 
     [] = canon_waitable_join(thread, subi2, seti)
-    [event] = canon_waitable_set_wait(True, consumer_mem, LiftLowerOptions(), thread, seti, retp3)
+    [event] = canon_waitable_set_wait(True, (consumer_mem, 'i32'), thread, seti, retp3)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp3] == subi2)
     assert(consumer_mem[retp3+4] == Subtask.State.RETURNED)
@@ -2834,20 +2837,14 @@ def test_mixed_table_memory_types():
   store = Store()
   rt = ResourceType(ComponentInstance(store), None)
 
-  # Verify alignment and elem_size for mixed configurations
-  opts64_addr = LiftLowerOptions(addr_type='i64', tbl_idx_type='i32')
+  # Verify alignment and elem_size for memory64
+  opts64_addr = LiftLowerOptions(memory=(bytearray(), 'i64'))
   assert(alignment(StringType(), opts64_addr) == 8)
   assert(elem_size(StringType(), opts64_addr) == 16)
   assert(alignment(OwnType(rt), opts64_addr) == 4)
   assert(elem_size(OwnType(rt), opts64_addr) == 4)
 
-  opts64_tbl = LiftLowerOptions(addr_type='i32', tbl_idx_type='i64')
-  assert(alignment(StringType(), opts64_tbl) == 4)
-  assert(elem_size(StringType(), opts64_tbl) == 8)
-  assert(alignment(OwnType(rt), opts64_tbl) == 4)
-  assert(elem_size(OwnType(rt), opts64_tbl) == 4)
-
-  # Round-trip a type exercising both memory pointers and table pointers
+  # Round-trip a type exercising memory pointers
   before = definitions.MAX_FLAT_RESULTS
   definitions.MAX_FLAT_RESULTS = 16
   t = TupleType([ListType(OwnType(rt)), StringType()])
@@ -2855,10 +2852,10 @@ def test_mixed_table_memory_types():
   def core_wasm(thread, args):
     return args
 
-  for addr_type, tbl_idx_type in [('i64','i32'), ('i32','i64')]:
+  for addr_type in ['i32', 'i64']:
     heap = Heap(1000)
     inst = ComponentInstance(store)
-    opts = mk_opts(heap.memory, 'utf8', heap.realloc, addr_type=addr_type, tbl_idx_type=tbl_idx_type)
+    opts = mk_opts(heap.memory, 'utf8', heap.realloc, addr_type=addr_type)
 
     ft = FuncType([t], [t])
     v = {'0': [42, 43], '1': mk_str("hello")}

From 99274e8d3133403bcaa8e30b4e128ceea7886531 Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Thu, 19 Mar 2026 15:03:11 +0000
Subject: [PATCH 04/25] typo

---
 design/mvp/canonical-abi/definitions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index a9d9e3da..4b42a983 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -1,4 +1,4 @@
- After the Boilerplate section, this file is ordered to line up with the code
+# After the Boilerplate section, this file is ordered to line up with the code
 # blocks in ../CanonicalABI.md (split by # comment lines). If you update this
 # file, don't forget to update ../CanonicalABI.md.
 

From 2955e037ee3ef37f592fd8d215396672d43245b5 Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Thu, 19 Mar 2026 15:16:48 +0000
Subject: [PATCH 05/25] small fixes

---
 design/mvp/Concurrency.md               | 21 ++++++++++-----------
 design/mvp/canonical-abi/definitions.py |  1 +
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/design/mvp/Concurrency.md b/design/mvp/Concurrency.md
index 2e756ef3..a0202cf5 100644
--- a/design/mvp/Concurrency.md
+++ b/design/mvp/Concurrency.md
@@ -232,7 +232,7 @@ unique ownership of the *readable end* of the future or stream. To get a
 end pair (via the [`{stream,future}.new`] built-ins) and then pass the readable
 end elsewhere (e.g., in the above WIT, as a parameter to an imported
 `pipe.write` or as a result of an exported `transform`). Given the readable or
-writable end of a future or stream (represented as an index into the
+writable end of a future or stream (represented as an `i32` index into the
 component instance's handle table), Core WebAssembly can then call a
 [`{stream,future}.{read,write}`] built-in to synchronously or asynchronously
 copy into or out of a caller-provided buffer of Core WebAssembly linear (or,
@@ -369,7 +369,7 @@ creating and running threads.
 New threads are created with the [`thread.new-indirect`] built-in. As mentioned
 [above](#threads-and-tasks), a spawned thread inherits the task of the spawning
 thread which is why threads and tasks are N:1. `thread.new-indirect` adds a new
-thread to the component instance's threads table and returns the index of
+thread to the component instance's threads table and returns the `i32` index of
 this table entry to the Core WebAssembly caller. Like [`pthread_create`],
 `thread.new-indirect` takes a Core WebAssembly function (via index into a
 `funcref` table) and a "closure" parameter to pass to the function when called
@@ -943,8 +943,8 @@ Other example asynchronous lowered signatures:
 async func(s1: stream<future<string>>, s2: list<stream<string>>) -> result<stream<string>, stream<error>>
 ```
 In *both* the sync and async ABIs, a `future` or `stream` in the WIT-level type
-translates to a single `i32` index into the current component instance's handle
-table. For example, for the WIT function type:
+translates to a single `i32` in the ABI.  This `i32` is an index into the
+current component instance's handle table. For example, for the WIT function type:
 ```wit
 async func(f: future<string>) -> future<u32>
 ```
@@ -957,10 +957,10 @@ and the asynchronous ABI has the signature:
 (func (param $f i32) (param $out-ptr i32) (result i32))
 ```
 where `$f` is the index of a future (not a pointer to one) while while
-`$out-ptr` is a pointer to a linear memory location that will receive a handle
+`$out-ptr` is a pointer to a linear memory location that will receive an `i32` 
 index.
 
-For the runtime semantics of this handle index, see `lift_stream`,
+For the runtime semantics of this `i32` index, see `lift_stream`,
 `lift_future`, `lower_stream` and `lower_future` in the [Canonical ABI
 Explainer]. For a complete description of how async imports work, see
 [`canon_lower`] in the Canonical ABI Explainer.
@@ -1030,19 +1030,18 @@ The `(result i32)` lets the core function return what it wants the runtime to do
 * If the low 4 bits are `1`, the callee wants to yield, allowing other code
   to run, but resuming thereafter without waiting on anything else.
 * If the low 4 bits are `2`, the callee wants to wait for an event to occur in
-  the waitable set whose index is stored in the remaining high bits.
+  the waitable set whose index is stored in the high 28 bits.
 
 When an async stackless function is exported, a companion "callback" function
 must also be exported with signature:
 ```wat
-(func (param i32 i32 $addr) (result i32))
+(func (param i32 i32 i32) (result i32))
 ```
-where `$addr` is `i32` or `i64` depending on the `memory` canonopt.
 
 The `(result i32)` has the same interpretation as the stackless export function
 and the runtime will repeatedly call the callback until a value of `0` is
-returned. The first two `i32` parameters describe what happened that caused the
-callback to be called again and the `$addr` parameter is the payload address.
+returned. The `i32` parameters describe what happened that caused the
+callback to be called again.
 
 For a complete description of how async exports work, see [`canon_lift`] in the
 Canonical ABI Explainer.
diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index 4b42a983..cb92ca65 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -233,6 +233,7 @@ def __init__(self, opts, inst, borrow_scope = None):
 @dataclass
 class LiftOptions:
   string_encoding: str = 'utf8'
+  # A tuple consisting of the memory contents and the pointer type ('i32' or 'i64')
   memory: Optional[tuple[bytearray, str]] = None
 
   def equal(lhs, rhs):

From 0fd86a2565fab4e177762aff5de6ea2342d049f3 Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Mon, 23 Mar 2026 13:10:50 +0000
Subject: [PATCH 06/25] review comments

---
 design/mvp/CanonicalABI.md              | 15 +++++++--------
 design/mvp/canonical-abi/definitions.py |  3 +--
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index 4e6c970e..cad9caf6 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -245,6 +245,7 @@ when lifting individual parameters and results:
 @dataclass
 class LiftOptions:
   string_encoding: str = 'utf8'
+  # A tuple consisting of the memory contents and the pointer type ('i32' or 'i64')'
   memory: Optional[tuple[bytearray, str]] = None
 
   def equal(lhs, rhs):
@@ -260,8 +261,7 @@ The following helper functions return the byte size and core value type of
 memory pointers: 
 ```python
 def ptr_type(opts):
-  if opts.memory is None:
-    return 'i32'
+  assert(opts.memory is not None)
   return opts.memory[1]
 
 def ptr_size(opts):
@@ -3206,9 +3206,9 @@ specifying `string-encoding=utf8` twice is an error. Each individual option, if
 present, is validated as such:
 
 * `string-encoding=N` - can be passed at most once, regardless of `N`.
-* `memory` - this is a subtype of `(memory 1)`. In the rest of the explainer,
-  `PTR` will refer to either `i32` or `i64` core Wasm types as determined by the
-  type of this `memory`.
+* `memory` - this is a subtype of `(memory 1)` or `(memory i64 1)`. In the rest
+  of the explainer, `PTR` will refer to either `i32` or `i64` core Wasm types
+  as determined by the type of this `memory`.
 * `realloc` - the function has type `(func (param PTR PTR PTR PTR) (result PTR))`
   where `PTR` is `i32` or `i64` as described above.
 * if `realloc` is present, then `memory` must be present
@@ -3217,9 +3217,8 @@ present, is validated as such:
 * 🔀 `async` - cannot be present with `post-return`
 * 🔀,not(🚟) `async` - `callback` must also be present. Note that with the 🚟
   feature (the "stackful" ABI), this restriction is lifted.
-* 🔀 `callback` - the function has type `(func (param i32 i32 PTR) (result i32))`
-  where the `PTR` parameter is the payload address and cannot be present
-  without `async` and is only allowed with
+* 🔀 `callback` - the function has type `(func (param i32 i32 i32) (result i32))`
+  and cannot be present without `async` and is only allowed with
   [`canon lift`](#canon-lift)
 
 Additionally some options are required depending on lift/lower operations
diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index cb92ca65..469a7140 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -241,8 +241,7 @@ def equal(lhs, rhs):
            lhs.memory is rhs.memory
 
 def ptr_type(opts):
-  if opts.memory is None:
-    return 'i32'
+  assert(opts.memory is not None)
   return opts.memory[1]
 
 def ptr_size(opts):

From 4eec09de7938ac9995769feabc4db720515517de Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Tue, 24 Mar 2026 10:47:17 +0000
Subject: [PATCH 07/25] some test cleanup

---
 design/mvp/canonical-abi/run_tests.py | 108 ++++++++++----------------
 1 file changed, 39 insertions(+), 69 deletions(-)

diff --git a/design/mvp/canonical-abi/run_tests.py b/design/mvp/canonical-abi/run_tests.py
index ed6e60d6..48f5276f 100644
--- a/design/mvp/canonical-abi/run_tests.py
+++ b/design/mvp/canonical-abi/run_tests.py
@@ -36,13 +36,19 @@ def realloc(self, original_ptr, original_size, alignment, new_size):
     return ret
 
 _DEFAULT_MEMORY = (bytearray(), 'i32')
+_DEFAULT_MEMORY_64 = (bytearray(), 'i64')
 
 def mk_opts(memory = None, encoding = 'utf8', realloc = None, post_return = None, sync_task_return = False, async_ = False, addr_type = 'i32'):
   opts = CanonicalOptions()
-  if memory is None and addr_type == 'i32':
-    opts.memory = _DEFAULT_MEMORY
+  if memory is None:
+    if addr_type == 'i32':
+      opts.memory = _DEFAULT_MEMORY
+    elif addr_type == 'i64':
+      opts.memory = _DEFAULT_MEMORY_64
+    else:
+      assert(False, "Invalid address type: {}".format(addr_type))
   else:
-    opts.memory = (memory if memory is not None else bytearray(), addr_type)
+    opts.memory = (memory, addr_type)
   opts.string_encoding = encoding
   opts.realloc = realloc
   opts.post_return = post_return
@@ -481,6 +487,12 @@ def dtor(thread, args):
     dtor_value = args[0]
     return []
 
+  store = Store()
+  rt = ResourceType(ComponentInstance(store), dtor) # usable in imports and exports
+  inst = ComponentInstance(store)
+  rt2 = ResourceType(inst, dtor) # only usable in exports
+  opts = mk_opts()
+
   def host_import(caller, on_start, on_resolve):
     args = on_start()
     assert(len(args) == 2)
@@ -544,39 +556,34 @@ def core_wasm(thread, args):
 
     return [h, h2, h4]
 
-  for _ in [0]:
-    store = Store()
-    rt = ResourceType(ComponentInstance(store), dtor) # usable in imports and exports
-    inst = ComponentInstance(store)
-    rt2 = ResourceType(inst, dtor) # only usable in exports
-    opts = mk_opts()
-
-    ft = FuncType([
-      OwnType(rt),
-      OwnType(rt),
-      BorrowType(rt),
-      BorrowType(rt2)
-    ],[
-      OwnType(rt),
-      OwnType(rt),
-      OwnType(rt)
-    ])
+  ft = FuncType([
+    OwnType(rt),
+    OwnType(rt),
+    BorrowType(rt),
+    BorrowType(rt2)
+  ],[
+    OwnType(rt),
+    OwnType(rt),
+    OwnType(rt)
+  ])
 
-    got = None
-    def on_resolve(results):
-      nonlocal got
-      got = results
+  def on_start():
+    return [ 42, 43, 44, 13 ]
 
-    run_lift(opts, inst, ft, core_wasm, lambda: [42, 43, 44, 13], on_resolve)
+  got = None
+  def on_resolve(results):
+    nonlocal got
+    got = results
 
-    assert(len(got) == 3)
-    assert(got[0] == 46)
-    assert(got[1] == 43)
-    assert(got[2] == 45)
-    assert(len(inst.handles.array) == 5)
-    assert(all(inst.handles.array[i] is None for i in range(4)))
-    assert(len(inst.handles.free) == 4)
+  run_lift(opts, inst, ft, core_wasm, on_start, on_resolve)
 
+  assert(len(got) == 3)
+  assert(got[0] == 46)
+  assert(got[1] == 43)
+  assert(got[2] == 45)
+  assert(len(inst.handles.array) == 5)
+  assert(all(inst.handles.array[i] is None for i in range(4)))
+  assert(len(inst.handles.free) == 4)
   definitions.MAX_FLAT_RESULTS = before
 
 
@@ -2599,7 +2606,6 @@ def core_func(thread, args):
   run_lift(sync_opts, inst, ft, core_func, lambda:[], lambda _:())
 
 
-
 def test_async_flat_params():
   store = Store()
   heap = Heap(1000)
@@ -2833,41 +2839,6 @@ def mk_task(supertask, inst):
   assert(call_might_be_recursive(p_task, c2))
 
 
-def test_mixed_table_memory_types():
-  store = Store()
-  rt = ResourceType(ComponentInstance(store), None)
-
-  # Verify alignment and elem_size for memory64
-  opts64_addr = LiftLowerOptions(memory=(bytearray(), 'i64'))
-  assert(alignment(StringType(), opts64_addr) == 8)
-  assert(elem_size(StringType(), opts64_addr) == 16)
-  assert(alignment(OwnType(rt), opts64_addr) == 4)
-  assert(elem_size(OwnType(rt), opts64_addr) == 4)
-
-  # Round-trip a type exercising memory pointers
-  before = definitions.MAX_FLAT_RESULTS
-  definitions.MAX_FLAT_RESULTS = 16
-  t = TupleType([ListType(OwnType(rt)), StringType()])
-
-  def core_wasm(thread, args):
-    return args
-
-  for addr_type in ['i32', 'i64']:
-    heap = Heap(1000)
-    inst = ComponentInstance(store)
-    opts = mk_opts(heap.memory, 'utf8', heap.realloc, addr_type=addr_type)
-
-    ft = FuncType([t], [t])
-    v = {'0': [42, 43], '1': mk_str("hello")}
-    got = None
-    def on_resolve(results):
-      nonlocal got
-      got = results
-    run_lift(opts, inst, ft, core_wasm, lambda: [v], on_resolve)
-    assert(got[0] == v)
-
-  definitions.MAX_FLAT_RESULTS = before
-
 test_roundtrips()
 test_handles()
 test_async_to_async()
@@ -2894,6 +2865,5 @@ def on_resolve(results):
 test_threads()
 test_thread_cancel_callback()
 test_reentrance()
-test_mixed_table_memory_types()
 
 print("All tests passed")

From 1a24955031a04aeb5f717c93cb657d3a4351a6ee Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Tue, 24 Mar 2026 11:49:31 +0100
Subject: [PATCH 08/25] fix spacing

Co-authored-by: Luke Wagner <mail@lukewagner.name>
---
 design/mvp/Explainer.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index e4513716..64be4280 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -1475,7 +1475,7 @@ canon ::= ...
 | Synopsis                   |                            |
 | -------------------------- | -------------------------- |
 | Approximate WIT signature  | `func<T>(rep: T.rep) -> T` |
-| Canonical ABI signature    | `[rep: T.rep] -> [i32]`     |
+| Canonical ABI signature    | `[rep: T.rep] -> [i32]`    |
 
 The `resource.new` built-in creates a new resource (of resource type `T`) with
 `rep` as its representation, and returns a new handle pointing to the new

From dfa163fcb71d3af2eb28ae160211632dfb9f6656 Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Tue, 24 Mar 2026 11:49:55 +0100
Subject: [PATCH 09/25] fix spacing

Co-authored-by: Luke Wagner <mail@lukewagner.name>
---
 design/mvp/Explainer.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index 64be4280..5d560bf1 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -1574,7 +1574,7 @@ For details, see [Thread-Local Storage] in the concurrency explainer and
 | Synopsis                   |                   |
 | -------------------------- | ----------------- |
 | Approximate WIT signature  | `func<T,i>(v: T)` |
-| Canonical ABI signature    | `[T] -> []`         |
+| Canonical ABI signature    | `[T] -> []`       |
 
 The `context.set` built-in sets the `i`th element of the [current thread]'s
 [thread-local storage] array to the value `v`. Validation currently restricts

From 8036409f55135c2f7e3ab0e6439d317eb3a28dd1 Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Tue, 24 Mar 2026 11:50:22 +0100
Subject: [PATCH 10/25] fix spacing

Co-authored-by: Luke Wagner <mail@lukewagner.name>
---
 design/mvp/Explainer.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index 5d560bf1..e063f0a2 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -1559,7 +1559,7 @@ See the [concurrency explainer] for background.
 | Synopsis                   |                    |
 | -------------------------- | ------------------ |
 | Approximate WIT signature  | `func<T,i>() -> T` |
-| Canonical ABI signature    | `[] -> [T]`          |
+| Canonical ABI signature    | `[] -> [T]`        |
 
 The `context.get` built-in returns the `i`th element of the [current thread]'s
 [thread-local storage] array. Validation currently restricts `i` to be less

From 6824cc0d6edbec27f33bbf25a978a2481723c005 Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Tue, 24 Mar 2026 11:51:15 +0100
Subject: [PATCH 11/25] inline table.addrtype

Co-authored-by: Luke Wagner <mail@lukewagner.name>
---
 design/mvp/Explainer.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index e063f0a2..e25e59d0 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -2061,8 +2061,8 @@ For details, see [Thread Built-ins] in the concurrency explainer and
 
 | Synopsis                   |                                                               |
 | -------------------------- | ------------------------------------------------------------- |
-| Approximate WIT signature  | `func<FuncT,tableidx>(fi: uIDX, c: FuncT.params[0]) -> thread` |
-| Canonical ABI signature    | `[fi:iIDX c: FuncT.params[0]] -> [i32]`                                  |
+| Approximate WIT signature  | `func<FuncT,table>(fi: table.addrtype, c: FuncT.params[0]) -> thread` |
+| Canonical ABI signature    | `[fi:table.addrtype c: FuncT.params[0]] -> [i32]`                                  |
 
 The `thread.new-indirect` built-in adds a new thread to the current component
 instance's table, returning the index of the new thread. The function table

From b3d480456989d40f88600dc4b1d279d6a38f7808 Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Tue, 24 Mar 2026 11:51:45 +0100
Subject: [PATCH 12/25] inline table.addrtype

Co-authored-by: Luke Wagner <mail@lukewagner.name>
---
 design/mvp/Explainer.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index e25e59d0..eebac667 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -2222,8 +2222,8 @@ For details, see [`canon_thread_spawn_ref`] in the Canonical ABI explainer.
 
 | Synopsis                   |                                                                    |
 | -------------------------- | ------------------------------------------------------------------ |
-| Approximate WIT signature  | `func<shared?,FuncT,tableidx>(i: uIDX, c: FuncT.params[0]) -> bool` |
-| Canonical ABI signature    | `shared? [i:iIDX c:FuncT.params[0]] -> [i32]`                                |
+| Approximate WIT signature  | `func<shared?,FuncT,table>(i: table.addrtype, c: FuncT.params[0]) -> bool` |
+| Canonical ABI signature    | `shared? [i:table.addrtype c:FuncT.params[0]] -> [i32]`                                |
 
 The `thread.spawn-indirect` built-in is an optimization, fusing a call to
 [`thread.new-indirect`](#-threadnew-indirect) with a call to

From 24a917c3e0e08ea5a2773418f49bc43cb92e161e Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Tue, 24 Mar 2026 11:07:28 +0000
Subject: [PATCH 13/25] fixup inline table.addrtype

---
 design/mvp/Explainer.md | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index eebac667..d7e2a076 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -2059,19 +2059,18 @@ For details, see [Thread Built-ins] in the concurrency explainer and
 
 ###### 🧵 `thread.new-indirect`
 
-| Synopsis                   |                                                               |
-| -------------------------- | ------------------------------------------------------------- |
+| Synopsis                   |                                                                       |
+| -------------------------- | --------------------------------------------------------------------- |
 | Approximate WIT signature  | `func<FuncT,table>(fi: table.addrtype, c: FuncT.params[0]) -> thread` |
-| Canonical ABI signature    | `[fi:table.addrtype c: FuncT.params[0]] -> [i32]`                                  |
+| Canonical ABI signature    | `[fi:table.addrtype c: FuncT.params[0]] -> [i32]`                     |
 
 The `thread.new-indirect` built-in adds a new thread to the current component
 instance's table, returning the index of the new thread. The function table
 supplied via [`core:tableidx`] is indexed by the `fi` operand and then
 dynamically checked to match the type `FuncT` (in the same manner as
-`call_indirect`). The types `uIDX` and `iIDX` of `fi` are `u32`/`i32` or
-`u64`/`i64` as determined by the table supplied by [`core:tableidx`]. Lastly,
-the indexed function is called in the new thread with `c` as its first and only
-parameter.
+`call_indirect`). Here the `table.addrtype` is either `i32` or `i64` as
+determined by the [`core:table-type`] of the table. Lastly, the indexed function
+is called in the new thread with `c` as its first and only parameter.
 
 Currently, `FuncT` must be `(func (param i32))` or `(func (param i64))` and thus
 `c` must always be an `i32` or `i64`, but this restriction can be loosened in
@@ -2220,10 +2219,10 @@ For details, see [`canon_thread_spawn_ref`] in the Canonical ABI explainer.
 
 ###### 🧵② `thread.spawn-indirect`
 
-| Synopsis                   |                                                                    |
-| -------------------------- | ------------------------------------------------------------------ |
+| Synopsis                   |                                                                            |
+| -------------------------- | -------------------------------------------------------------------------- |
 | Approximate WIT signature  | `func<shared?,FuncT,table>(i: table.addrtype, c: FuncT.params[0]) -> bool` |
-| Canonical ABI signature    | `shared? [i:table.addrtype c:FuncT.params[0]] -> [i32]`                                |
+| Canonical ABI signature    | `shared? [i:table.addrtype c:FuncT.params[0]] -> [i32]`                    |
 
 The `thread.spawn-indirect` built-in is an optimization, fusing a call to
 [`thread.new-indirect`](#-threadnew-indirect) with a call to
@@ -3177,6 +3176,7 @@ For some use-case-focused, worked examples, see:
 [func-import-abbrev]: https://webassembly.github.io/spec/core/text/modules.html#text-func-abbrev
 [`core:version`]: https://webassembly.github.io/spec/core/binary/modules.html#binary-version
 [`core:tableidx`]: https://webassembly.github.io/spec/core/syntax/modules.html#syntax-tableidx
+[`core:table-type`]: https://webassembly.github.io/spec/core/syntax/types.html#table-types
 
 [Embedder]: https://webassembly.github.io/spec/core/appendix/embedding.html
 [`module_instantiate`]: https://webassembly.github.io/spec/core/appendix/embedding.html#mathrm-module-instantiate-xref-exec-runtime-syntax-store-mathit-store-xref-syntax-modules-syntax-module-mathit-module-xref-exec-runtime-syntax-externval-mathit-externval-ast-xref-exec-runtime-syntax-store-mathit-store-xref-exec-runtime-syntax-moduleinst-mathit-moduleinst-xref-appendix-embedding-embed-error-mathit-error

From 940e989d0173943c70c02a20bc82d11ebe7e8413 Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Tue, 24 Mar 2026 11:36:20 +0000
Subject: [PATCH 14/25] add MemInst class

---
 design/mvp/CanonicalABI.md              | 168 +++++++++++++-----------
 design/mvp/canonical-abi/definitions.py | 152 +++++++++++----------
 design/mvp/canonical-abi/run_tests.py   |  94 ++++++-------
 3 files changed, 216 insertions(+), 198 deletions(-)

diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index cad9caf6..4a6eed58 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -239,14 +239,37 @@ that can be set on various Canonical ABI definitions. The default values of
 the Python fields are the default values when the associated `canonopt` is
 not present in the binary or text format definition.
 
+The `MemInst` class represents a core WebAssembly [`memory` instance], with
+`bytes` corresponding to the memory's bytes and `addrtype` coming from the
+[`memory type`].
+```python
+@dataclass
+class MemInst:
+  bytes: bytearray
+  addrtype: Literal['i32', 'i64']
+
+  def ptr_type(self):
+    return self.addrtype
+
+  def ptr_size(self):
+    match self.ptr_type():
+      case 'i32': return 4
+      case 'i64': return 8
+
+  def equal(lhs, rhs):
+    return lhs.bytes == rhs.bytes and \
+           lhs.addrtype == rhs.addrtype
+```
+The `ptr_type` and `ptr_size` methods return the core value type and byte
+size of memory pointers.
+
 The `LiftOptions` class contains the subset of [`canonopt`] which are needed
 when lifting individual parameters and results:
 ```python
 @dataclass
 class LiftOptions:
   string_encoding: str = 'utf8'
-  # A tuple consisting of the memory contents and the pointer type ('i32' or 'i64')'
-  memory: Optional[tuple[bytearray, str]] = None
+  memory: Optional[MemInst] = None
 
   def equal(lhs, rhs):
     return lhs.string_encoding == rhs.string_encoding and \
@@ -255,21 +278,6 @@ class LiftOptions:
 The `equal` static method is used by `task.return` below to dynamically
 compare equality of just this subset of `canonopt`.
 
-The `str` field in `memory` is `'i32'` or `'i64'` to indicate whether type of the core Wasm `memory`.
-
-The following helper functions return the byte size and core value type of
-memory pointers: 
-```python
-def ptr_type(opts):
-  assert(opts.memory is not None)
-  return opts.memory[1]
-
-def ptr_size(opts):
-  match ptr_type(opts):
-    case 'i32': return 4
-    case 'i64': return 8
-```
-
 The `LiftLowerOptions` class contains the subset of [`canonopt`] which are
 needed when lifting *or* lowering individual parameters and results:
 ```python
@@ -1372,7 +1380,7 @@ class BufferGuestImpl(Buffer):
     trap_if(length > Buffer.MAX_LENGTH)
     if t and length > 0:
       trap_if(ptr != align_to(ptr, alignment(t, cx.opts)))
-      trap_if(ptr + length * elem_size(t, cx.opts) > len(cx.opts.memory[0]))
+      trap_if(ptr + length * elem_size(t, cx.opts) > len(cx.opts.memory.bytes))
     self.cx = cx
     self.t = t
     self.ptr = ptr
@@ -1886,7 +1894,7 @@ def alignment(t, opts):
     case F32Type()                   : return 4
     case F64Type()                   : return 8
     case CharType()                  : return 4
-    case StringType()                : return ptr_size(opts)
+    case StringType()                : return opts.memory.ptr_size()
     case ErrorContextType()          : return 4
     case ListType(t, l)              : return alignment_list(t, l, opts)
     case RecordType(fields)          : return alignment_record(fields, opts)
@@ -1902,7 +1910,7 @@ otherwise uses the alignment of pointers.
 def alignment_list(elem_type, maybe_length, opts):
   if maybe_length is not None:
     return alignment(elem_type, opts)
-  return ptr_size(opts)
+  return opts.memory.ptr_size()
 ```
 
 Record alignment is tuple alignment, with the definitions split for reuse below:
@@ -1972,7 +1980,7 @@ def elem_size(t, opts):
     case F32Type()                   : return 4
     case F64Type()                   : return 8
     case CharType()                  : return 4
-    case StringType()                : return 2 * ptr_size(opts)
+    case StringType()                : return 2 * opts.memory.ptr_size()
     case ErrorContextType()          : return 4
     case ListType(t, l)              : return elem_size_list(t, l, opts)
     case RecordType(fields)          : return elem_size_record(fields, opts)
@@ -1984,7 +1992,7 @@ def elem_size(t, opts):
 def elem_size_list(elem_type, maybe_length, opts):
   if maybe_length is not None:
     return maybe_length * elem_size(elem_type, opts)
-  return 2 * ptr_size(opts)
+  return 2 * opts.memory.ptr_size()
 
 def elem_size_record(fields, opts):
   s = 0
@@ -2024,7 +2032,7 @@ the top-level case analysis:
 ```python
 def load(cx, ptr, t):
   assert(ptr == align_to(ptr, alignment(t, cx.opts)))
-  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory[0]))
+  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory.bytes))
   match despecialize(t):
     case BoolType()         : return convert_int_to_bool(load_int(cx, ptr, 1))
     case U8Type()           : return load_int(cx, ptr, 1)
@@ -2054,7 +2062,7 @@ Integers are loaded directly from memory, with their high-order bit interpreted
 according to the signedness of the type.
 ```python
 def load_int(cx, ptr, nbytes, signed = False):
-  return int.from_bytes(cx.opts.memory[0][ptr : ptr+nbytes], 'little', signed = signed)
+  return int.from_bytes(cx.opts.memory.bytes[ptr : ptr+nbytes], 'little', signed = signed)
 ```
 
 Integer-to-boolean conversions treats `0` as `false` and all other bit-patterns
@@ -2128,12 +2136,12 @@ of source code units.
 String = tuple[str, str, int]
 
 def load_string(cx, ptr) -> String:
-  begin = load_int(cx, ptr, ptr_size(cx.opts))
-  tagged_code_units = load_int(cx, ptr + ptr_size(cx.opts), ptr_size(cx.opts))
+  begin = load_int(cx, ptr, cx.opts.memory.ptr_size())
+  tagged_code_units = load_int(cx, ptr + cx.opts.memory.ptr_size(), cx.opts.memory.ptr_size())
   return load_string_from_range(cx, begin, tagged_code_units)
 
 def utf16_tag(opts):
-  return 1 << (ptr_size(opts) * 8 - 1)
+  return 1 << (opts.memory.ptr_size() * 8 - 1)
 
 def load_string_from_range(cx, ptr, tagged_code_units) -> String:
   match cx.opts.string_encoding:
@@ -2155,9 +2163,9 @@ def load_string_from_range(cx, ptr, tagged_code_units) -> String:
         encoding = 'latin-1'
 
   trap_if(ptr != align_to(ptr, alignment))
-  trap_if(ptr + byte_length > len(cx.opts.memory[0]))
+  trap_if(ptr + byte_length > len(cx.opts.memory.bytes))
   try:
-    s = cx.opts.memory[0][ptr : ptr+byte_length].decode(encoding)
+    s = cx.opts.memory.bytes[ptr : ptr+byte_length].decode(encoding)
   except UnicodeError:
     trap()
 
@@ -2178,13 +2186,13 @@ Lists and records are loaded by recursively loading their elements/fields:
 def load_list(cx, ptr, elem_type, maybe_length):
   if maybe_length is not None:
     return load_list_from_valid_range(cx, ptr, maybe_length, elem_type)
-  begin = load_int(cx, ptr, ptr_size(cx.opts))
-  length = load_int(cx, ptr + ptr_size(cx.opts), ptr_size(cx.opts))
+  begin = load_int(cx, ptr, cx.opts.memory.ptr_size())
+  length = load_int(cx, ptr + cx.opts.memory.ptr_size(), cx.opts.memory.ptr_size())
   return load_list_from_range(cx, begin, length, elem_type)
 
 def load_list_from_range(cx, ptr, length, elem_type):
   trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts)))
-  trap_if(ptr + length * elem_size(elem_type, cx.opts) > len(cx.opts.memory[0]))
+  trap_if(ptr + length * elem_size(elem_type, cx.opts) > len(cx.opts.memory.bytes))
   return load_list_from_valid_range(cx, ptr, length, elem_type)
 
 def load_list_from_valid_range(cx, ptr, length, elem_type):
@@ -2310,7 +2318,7 @@ The `store` function defines how to write a value `v` of a given value type
 ```python
 def store(cx, v, t, ptr):
   assert(ptr == align_to(ptr, alignment(t, cx.opts)))
-  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory[0]))
+  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory.bytes))
   match despecialize(t):
     case BoolType()         : store_int(cx, int(bool(v)), ptr, 1)
     case U8Type()           : store_int(cx, v, ptr, 1)
@@ -2342,7 +2350,7 @@ the `signed` parameter is only present to ensure that the internal range checks
 of `int.to_bytes` are satisfied.
 ```python
 def store_int(cx, v, ptr, nbytes, signed = False):
-  cx.opts.memory[0][ptr : ptr+nbytes] = int.to_bytes(v, nbytes, 'little', signed = signed)
+  cx.opts.memory.bytes[ptr : ptr+nbytes] = int.to_bytes(v, nbytes, 'little', signed = signed)
 ```
 
 Floats are stored directly into memory, with the sign and payload bits of NaN
@@ -2427,8 +2435,8 @@ combinations, subdividing the `latin1+utf16` encoding into either `latin1` or
 ```python
 def store_string(cx, v: String, ptr):
   begin, tagged_code_units = store_string_into_range(cx, v)
-  store_int(cx, begin, ptr, ptr_size(cx.opts))
-  store_int(cx, tagged_code_units, ptr + ptr_size(cx.opts), ptr_size(cx.opts))
+  store_int(cx, begin, ptr, cx.opts.memory.ptr_size())
+  store_int(cx, tagged_code_units, ptr + cx.opts.memory.ptr_size(), cx.opts.memory.ptr_size())
 
 def store_string_into_range(cx, v: String):
   src, src_encoding, src_tagged_code_units = v
@@ -2470,17 +2478,17 @@ with a simply loop (that possibly inflates Latin-1 to UTF-16 by injecting a 0
 byte after every Latin-1 byte).
 ```python
 def max_string_byte_length(opts):
-  return (1 << (ptr_size(opts) * 8 - 1)) - 1
+  return (1 << (opts.memory.ptr_size() * 8 - 1)) - 1
 
 def store_string_copy(cx, src, src_code_units, dst_code_unit_size, dst_alignment, dst_encoding):
   dst_byte_length = dst_code_unit_size * src_code_units
   trap_if(dst_byte_length > max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, dst_alignment, dst_byte_length)
   trap_if(ptr != align_to(ptr, dst_alignment))
-  trap_if(ptr + dst_byte_length > len(cx.opts.memory[0]))
+  trap_if(ptr + dst_byte_length > len(cx.opts.memory.bytes))
   encoded = src.encode(dst_encoding)
   assert(dst_byte_length == len(encoded))
-  cx.opts.memory[0][ptr : ptr+len(encoded)] = encoded
+  cx.opts.memory.bytes[ptr : ptr+len(encoded)] = encoded
   return (ptr, src_code_units)
 ```
 The `max_string_byte_length` function ensures that the high bit of a
@@ -2502,19 +2510,19 @@ def store_latin1_to_utf8(cx, src, src_code_units):
 def store_string_to_utf8(cx, src, src_code_units, worst_case_size):
   assert(src_code_units <= max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 1, src_code_units)
-  trap_if(ptr + src_code_units > len(cx.opts.memory[0]))
+  trap_if(ptr + src_code_units > len(cx.opts.memory.bytes))
   for i,code_point in enumerate(src):
     if ord(code_point) < 2**7:
-      cx.opts.memory[0][ptr + i] = ord(code_point)
+      cx.opts.memory.bytes[ptr + i] = ord(code_point)
     else:
       trap_if(worst_case_size > max_string_byte_length(cx.opts))
       ptr = cx.opts.realloc(ptr, src_code_units, 1, worst_case_size)
-      trap_if(ptr + worst_case_size > len(cx.opts.memory[0]))
+      trap_if(ptr + worst_case_size > len(cx.opts.memory.bytes))
       encoded = src.encode('utf-8')
-      cx.opts.memory[0][ptr+i : ptr+len(encoded)] = encoded[i : ]
+      cx.opts.memory.bytes[ptr+i : ptr+len(encoded)] = encoded[i : ]
       if worst_case_size > len(encoded):
         ptr = cx.opts.realloc(ptr, worst_case_size, 1, len(encoded))
-        trap_if(ptr + len(encoded) > len(cx.opts.memory[0]))
+        trap_if(ptr + len(encoded) > len(cx.opts.memory.bytes))
       return (ptr, len(encoded))
   return (ptr, src_code_units)
 ```
@@ -2529,13 +2537,13 @@ def store_utf8_to_utf16(cx, src, src_code_units):
   trap_if(worst_case_size > max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 2, worst_case_size)
   trap_if(ptr != align_to(ptr, 2))
-  trap_if(ptr + worst_case_size > len(cx.opts.memory[0]))
+  trap_if(ptr + worst_case_size > len(cx.opts.memory.bytes))
   encoded = src.encode('utf-16-le')
-  cx.opts.memory[0][ptr : ptr+len(encoded)] = encoded
+  cx.opts.memory.bytes[ptr : ptr+len(encoded)] = encoded
   if len(encoded) < worst_case_size:
     ptr = cx.opts.realloc(ptr, worst_case_size, 2, len(encoded))
     trap_if(ptr != align_to(ptr, 2))
-    trap_if(ptr + len(encoded) > len(cx.opts.memory[0]))
+    trap_if(ptr + len(encoded) > len(cx.opts.memory.bytes))
   code_units = int(len(encoded) / 2)
   return (ptr, code_units)
 ```
@@ -2553,33 +2561,33 @@ def store_string_to_latin1_or_utf16(cx, src, src_code_units):
   assert(src_code_units <= max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 2, src_code_units)
   trap_if(ptr != align_to(ptr, 2))
-  trap_if(ptr + src_code_units > len(cx.opts.memory[0]))
+  trap_if(ptr + src_code_units > len(cx.opts.memory.bytes))
   dst_byte_length = 0
   for usv in src:
     if ord(usv) < (1 << 8):
-      cx.opts.memory[0][ptr + dst_byte_length] = ord(usv)
+      cx.opts.memory.bytes[ptr + dst_byte_length] = ord(usv)
       dst_byte_length += 1
     else:
       worst_case_size = 2 * src_code_units
       trap_if(worst_case_size > max_string_byte_length(cx.opts))
       ptr = cx.opts.realloc(ptr, src_code_units, 2, worst_case_size)
       trap_if(ptr != align_to(ptr, 2))
-      trap_if(ptr + worst_case_size > len(cx.opts.memory[0]))
+      trap_if(ptr + worst_case_size > len(cx.opts.memory.bytes))
       for j in range(dst_byte_length-1, -1, -1):
-        cx.opts.memory[0][ptr + 2*j] = cx.opts.memory[0][ptr + j]
-        cx.opts.memory[0][ptr + 2*j + 1] = 0
+        cx.opts.memory.bytes[ptr + 2*j] = cx.opts.memory.bytes[ptr + j]
+        cx.opts.memory.bytes[ptr + 2*j + 1] = 0
       encoded = src.encode('utf-16-le')
-      cx.opts.memory[0][ptr+2*dst_byte_length : ptr+len(encoded)] = encoded[2*dst_byte_length : ]
+      cx.opts.memory.bytes[ptr+2*dst_byte_length : ptr+len(encoded)] = encoded[2*dst_byte_length : ]
       if worst_case_size > len(encoded):
         ptr = cx.opts.realloc(ptr, worst_case_size, 2, len(encoded))
         trap_if(ptr != align_to(ptr, 2))
-        trap_if(ptr + len(encoded) > len(cx.opts.memory[0]))
+        trap_if(ptr + len(encoded) > len(cx.opts.memory.bytes))
       tagged_code_units = int(len(encoded) / 2) | utf16_tag(cx.opts)
       return (ptr, tagged_code_units)
   if dst_byte_length < src_code_units:
     ptr = cx.opts.realloc(ptr, src_code_units, 2, dst_byte_length)
     trap_if(ptr != align_to(ptr, 2))
-    trap_if(ptr + dst_byte_length > len(cx.opts.memory[0]))
+    trap_if(ptr + dst_byte_length > len(cx.opts.memory.bytes))
   return (ptr, dst_byte_length)
 ```
 
@@ -2599,17 +2607,17 @@ def store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units):
   trap_if(src_byte_length > max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 2, src_byte_length)
   trap_if(ptr != align_to(ptr, 2))
-  trap_if(ptr + src_byte_length > len(cx.opts.memory[0]))
+  trap_if(ptr + src_byte_length > len(cx.opts.memory.bytes))
   encoded = src.encode('utf-16-le')
-  cx.opts.memory[0][ptr : ptr+len(encoded)] = encoded
+  cx.opts.memory.bytes[ptr : ptr+len(encoded)] = encoded
   if any(ord(c) >= (1 << 8) for c in src):
     tagged_code_units = int(len(encoded) / 2) | utf16_tag(cx.opts)
     return (ptr, tagged_code_units)
   latin1_size = int(len(encoded) / 2)
   for i in range(latin1_size):
-    cx.opts.memory[0][ptr + i] = cx.opts.memory[0][ptr + 2*i]
+    cx.opts.memory.bytes[ptr + i] = cx.opts.memory.bytes[ptr + 2*i]
   ptr = cx.opts.realloc(ptr, src_byte_length, 1, latin1_size)
-  trap_if(ptr + latin1_size > len(cx.opts.memory[0]))
+  trap_if(ptr + latin1_size > len(cx.opts.memory.bytes))
   return (ptr, latin1_size)
 ```
 
@@ -2631,15 +2639,15 @@ def store_list(cx, v, ptr, elem_type, maybe_length):
     store_list_into_valid_range(cx, v, ptr, elem_type)
     return
   begin, length = store_list_into_range(cx, v, elem_type)
-  store_int(cx, begin, ptr, ptr_size(cx.opts))
-  store_int(cx, length, ptr + ptr_size(cx.opts), ptr_size(cx.opts))
+  store_int(cx, begin, ptr, cx.opts.memory.ptr_size())
+  store_int(cx, length, ptr + cx.opts.memory.ptr_size(), cx.opts.memory.ptr_size())
 
 def store_list_into_range(cx, v, elem_type):
   byte_length = len(v) * elem_size(elem_type, cx.opts)
-  trap_if(byte_length >= (1 << (ptr_size(cx.opts) * 8)))
+  trap_if(byte_length >= (1 << (cx.opts.memory.ptr_size() * 8)))
   ptr = cx.opts.realloc(0, 0, alignment(elem_type, cx.opts), byte_length)
   trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts)))
-  trap_if(ptr + byte_length > len(cx.opts.memory[0]))
+  trap_if(ptr + byte_length > len(cx.opts.memory.bytes))
   store_list_into_valid_range(cx, v, ptr, elem_type)
   return (ptr, len(v))
 
@@ -2775,29 +2783,29 @@ def flatten_functype(opts, ft, context):
   flat_results = flatten_types(ft.result_type(), opts)
   if not opts.async_:
     if len(flat_params) > MAX_FLAT_PARAMS:
-      flat_params = [ptr_type(opts)]
+      flat_params = [opts.memory.ptr_type()]
     if len(flat_results) > MAX_FLAT_RESULTS:
       match context:
         case 'lift':
-          flat_results = [ptr_type(opts)]
+          flat_results = [opts.memory.ptr_type()]
         case 'lower':
-          flat_params += [ptr_type(opts)]
+          flat_params += [opts.memory.ptr_type()]
           flat_results = []
     return CoreFuncType(flat_params, flat_results)
   else:
     match context:
       case 'lift':
         if len(flat_params) > MAX_FLAT_PARAMS:
-          flat_params = [ptr_type(opts)]
+          flat_params = [opts.memory.ptr_type()]
         if opts.callback:
           flat_results = ['i32']
         else:
           flat_results = []
       case 'lower':
         if len(flat_params) > MAX_FLAT_ASYNC_PARAMS:
-          flat_params = [ptr_type(opts)]
+          flat_params = [opts.memory.ptr_type()]
         if len(flat_results) > 0:
-          flat_params += [ptr_type(opts)]
+          flat_params += [opts.memory.ptr_type()]
         flat_results = ['i32']
     return CoreFuncType(flat_params, flat_results)
 
@@ -2820,7 +2828,7 @@ def flatten_type(t, opts):
     case F32Type()                        : return ['f32']
     case F64Type()                        : return ['f64']
     case CharType()                       : return ['i32']
-    case StringType()                     : return [ptr_type(opts), ptr_type(opts)]
+    case StringType()                     : return [opts.memory.ptr_type(), opts.memory.ptr_type()]
     case ErrorContextType()               : return ['i32']
     case ListType(t, l)                   : return flatten_list(t, l, opts)
     case RecordType(fields)               : return flatten_record(fields, opts)
@@ -2836,7 +2844,7 @@ List flattening of a fixed-length list uses the same flattening as a tuple
 def flatten_list(elem_type, maybe_length, opts):
   if maybe_length is not None:
     return flatten_type(elem_type, opts) * maybe_length
-  return [ptr_type(opts), ptr_type(opts)]
+  return [opts.memory.ptr_type(), opts.memory.ptr_type()]
 ```
 
 Record flattening simply flattens each field in sequence.
@@ -2962,8 +2970,8 @@ instead of from linear memory. Fixed-length lists are lifted the same way as a
 tuple (via `lift_flat_record` below).
 ```python
 def lift_flat_string(cx, vi):
-  ptr = vi.next(ptr_type(cx.opts))
-  packed_length = vi.next(ptr_type(cx.opts))
+  ptr = vi.next(cx.opts.memory.ptr_type())
+  packed_length = vi.next(cx.opts.memory.ptr_type())
   return load_string_from_range(cx, ptr, packed_length)
 
 def lift_flat_list(cx, vi, elem_type, maybe_length):
@@ -2972,8 +2980,8 @@ def lift_flat_list(cx, vi, elem_type, maybe_length):
     for i in range(maybe_length):
       a.append(lift_flat(cx, vi, elem_type))
     return a
-  ptr = vi.next(ptr_type(cx.opts))
-  length = vi.next(ptr_type(cx.opts))
+  ptr = vi.next(cx.opts.memory.ptr_type())
+  length = vi.next(cx.opts.memory.ptr_type())
   return load_list_from_range(cx, ptr, length, elem_type)
 ```
 
@@ -3147,10 +3155,10 @@ of component-level values with types `ts`.
 def lift_flat_values(cx, max_flat, vi, ts):
   flat_types = flatten_types(ts, cx.opts)
   if len(flat_types) > max_flat:
-    ptr = vi.next(ptr_type(cx.opts))
+    ptr = vi.next(cx.opts.memory.ptr_type())
     tuple_type = TupleType(ts)
     trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts)))
-    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory[0]))
+    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory.bytes))
     return list(load(cx, ptr, tuple_type).values())
   else:
     return [ lift_flat(cx, vi, t) for t in ts ]
@@ -3173,10 +3181,10 @@ def lower_flat_values(cx, max_flat, vs, ts, out_param = None):
       ptr = cx.opts.realloc(0, 0, alignment(tuple_type, cx.opts), elem_size(tuple_type, cx.opts))
       flat_vals = [ptr]
     else:
-      ptr = out_param.next(ptr_type(cx.opts))
+      ptr = out_param.next(cx.opts.memory.ptr_type())
       flat_vals = []
     trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts)))
-    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory[0]))
+    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory.bytes))
     store(cx, tuple_value, tuple_type, ptr)
   else:
     flat_vals = []
@@ -4969,6 +4977,8 @@ def canon_thread_available_parallelism():
 [`memaddr`]: https://webassembly.github.io/spec/core/exec/runtime.html#syntax-memaddr
 [`memaddrs` table]: https://webassembly.github.io/spec/core/exec/runtime.html#syntax-moduleinst
 [`memidx`]: https://webassembly.github.io/spec/core/syntax/modules.html#syntax-memidx
+[`memory` instance]: https://webassembly.github.io/spec/core/exec/runtime.html#memory-instances
+[`memory type`]: https://webassembly.github.io/spec/core/syntax/types.html#memory-types
 
 [Alignment]: https://en.wikipedia.org/wiki/Data_structure_alignment
 [UTF-8]: https://en.wikipedia.org/wiki/UTF-8
diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index 469a7140..178b0fc3 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -229,25 +229,33 @@ def __init__(self, opts, inst, borrow_scope = None):
 
 
 ### Canonical ABI Options
+@dataclass
+class MemInst:
+  bytes: bytearray
+  addrtype: Literal['i32', 'i64']
+
+  def ptr_type(self):
+    return self.addrtype
+
+  def ptr_size(self):
+    match self.ptr_type():
+      case 'i32': return 4
+      case 'i64': return 8
+
+  def equal(lhs, rhs):
+    return lhs.bytes == rhs.bytes and \
+           lhs.addrtype == rhs.addrtype
+
 
 @dataclass
 class LiftOptions:
   string_encoding: str = 'utf8'
-  # A tuple consisting of the memory contents and the pointer type ('i32' or 'i64')
-  memory: Optional[tuple[bytearray, str]] = None
+  memory: Optional[MemInst] = None
 
   def equal(lhs, rhs):
     return lhs.string_encoding == rhs.string_encoding and \
            lhs.memory is rhs.memory
 
-def ptr_type(opts):
-  assert(opts.memory is not None)
-  return opts.memory[1]
-
-def ptr_size(opts):
-  match ptr_type(opts):
-    case 'i32': return 4
-    case 'i64': return 8
 
 @dataclass
 class LiftLowerOptions(LiftOptions):
@@ -786,7 +794,7 @@ def __init__(self, t, cx, ptr, length):
     trap_if(length > Buffer.MAX_LENGTH)
     if t and length > 0:
       trap_if(ptr != align_to(ptr, alignment(t, cx.opts)))
-      trap_if(ptr + length * elem_size(t, cx.opts) > len(cx.opts.memory[0]))
+      trap_if(ptr + length * elem_size(t, cx.opts) > len(cx.opts.memory.bytes))
     self.cx = cx
     self.t = t
     self.ptr = ptr
@@ -1082,7 +1090,7 @@ def alignment(t, opts):
     case F32Type()                   : return 4
     case F64Type()                   : return 8
     case CharType()                  : return 4
-    case StringType()                : return ptr_size(opts)
+    case StringType()                : return opts.memory.ptr_size()
     case ErrorContextType()          : return 4
     case ListType(t, l)              : return alignment_list(t, l, opts)
     case RecordType(fields)          : return alignment_record(fields, opts)
@@ -1094,7 +1102,7 @@ def alignment(t, opts):
 def alignment_list(elem_type, maybe_length, opts):
   if maybe_length is not None:
     return alignment(elem_type, opts)
-  return ptr_size(opts)
+  return opts.memory.ptr_size()
 
 def alignment_record(fields, opts):
   a = 1
@@ -1140,7 +1148,7 @@ def elem_size(t, opts):
     case F32Type()                   : return 4
     case F64Type()                   : return 8
     case CharType()                  : return 4
-    case StringType()                : return 2 * ptr_size(opts)
+    case StringType()                : return 2 * opts.memory.ptr_size()
     case ErrorContextType()          : return 4
     case ListType(t, l)              : return elem_size_list(t, l, opts)
     case RecordType(fields)          : return elem_size_record(fields, opts)
@@ -1152,7 +1160,7 @@ def elem_size(t, opts):
 def elem_size_list(elem_type, maybe_length, opts):
   if maybe_length is not None:
     return maybe_length * elem_size(elem_type, opts)
-  return 2 * ptr_size(opts)
+  return 2 * opts.memory.ptr_size()
 
 def elem_size_record(fields, opts):
   s = 0
@@ -1186,7 +1194,7 @@ def elem_size_flags(labels):
 
 def load(cx, ptr, t):
   assert(ptr == align_to(ptr, alignment(t, cx.opts)))
-  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory[0]))
+  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory.bytes))
   match despecialize(t):
     case BoolType()         : return convert_int_to_bool(load_int(cx, ptr, 1))
     case U8Type()           : return load_int(cx, ptr, 1)
@@ -1212,7 +1220,7 @@ def load(cx, ptr, t):
     case FutureType(t)      : return lift_future(cx, load_int(cx, ptr, 4), t)
 
 def load_int(cx, ptr, nbytes, signed = False):
-  return int.from_bytes(cx.opts.memory[0][ptr : ptr+nbytes], 'little', signed = signed)
+  return int.from_bytes(cx.opts.memory.bytes[ptr : ptr+nbytes], 'little', signed = signed)
 
 def convert_int_to_bool(i):
   assert(i >= 0)
@@ -1255,12 +1263,12 @@ def convert_i32_to_char(cx, i):
 String = tuple[str, str, int]
 
 def load_string(cx, ptr) -> String:
-  begin = load_int(cx, ptr, ptr_size(cx.opts))
-  tagged_code_units = load_int(cx, ptr + ptr_size(cx.opts), ptr_size(cx.opts))
+  begin = load_int(cx, ptr, cx.opts.memory.ptr_size())
+  tagged_code_units = load_int(cx, ptr + cx.opts.memory.ptr_size(), cx.opts.memory.ptr_size())
   return load_string_from_range(cx, begin, tagged_code_units)
 
 def utf16_tag(opts):
-  return 1 << (ptr_size(opts) * 8 - 1)
+  return 1 << (opts.memory.ptr_size() * 8 - 1)
 
 def load_string_from_range(cx, ptr, tagged_code_units) -> String:
   match cx.opts.string_encoding:
@@ -1282,9 +1290,9 @@ def load_string_from_range(cx, ptr, tagged_code_units) -> String:
         encoding = 'latin-1'
 
   trap_if(ptr != align_to(ptr, alignment))
-  trap_if(ptr + byte_length > len(cx.opts.memory[0]))
+  trap_if(ptr + byte_length > len(cx.opts.memory.bytes))
   try:
-    s = cx.opts.memory[0][ptr : ptr+byte_length].decode(encoding)
+    s = cx.opts.memory.bytes[ptr : ptr+byte_length].decode(encoding)
   except UnicodeError:
     trap()
 
@@ -1298,13 +1306,13 @@ def lift_error_context(cx, i):
 def load_list(cx, ptr, elem_type, maybe_length):
   if maybe_length is not None:
     return load_list_from_valid_range(cx, ptr, maybe_length, elem_type)
-  begin = load_int(cx, ptr, ptr_size(cx.opts))
-  length = load_int(cx, ptr + ptr_size(cx.opts), ptr_size(cx.opts))
+  begin = load_int(cx, ptr, cx.opts.memory.ptr_size())
+  length = load_int(cx, ptr + cx.opts.memory.ptr_size(), cx.opts.memory.ptr_size())
   return load_list_from_range(cx, begin, length, elem_type)
 
 def load_list_from_range(cx, ptr, length, elem_type):
   trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts)))
-  trap_if(ptr + length * elem_size(elem_type, cx.opts) > len(cx.opts.memory[0]))
+  trap_if(ptr + length * elem_size(elem_type, cx.opts) > len(cx.opts.memory.bytes))
   return load_list_from_valid_range(cx, ptr, length, elem_type)
 
 def load_list_from_valid_range(cx, ptr, length, elem_type):
@@ -1377,7 +1385,7 @@ def lift_async_value(ReadableEndT, cx, i, t):
 
 def store(cx, v, t, ptr):
   assert(ptr == align_to(ptr, alignment(t, cx.opts)))
-  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory[0]))
+  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory.bytes))
   match despecialize(t):
     case BoolType()         : store_int(cx, int(bool(v)), ptr, 1)
     case U8Type()           : store_int(cx, v, ptr, 1)
@@ -1403,7 +1411,7 @@ def store(cx, v, t, ptr):
     case FutureType(t)      : store_int(cx, lower_future(cx, v, t), ptr, 4)
 
 def store_int(cx, v, ptr, nbytes, signed = False):
-  cx.opts.memory[0][ptr : ptr+nbytes] = int.to_bytes(v, nbytes, 'little', signed = signed)
+  cx.opts.memory.bytes[ptr : ptr+nbytes] = int.to_bytes(v, nbytes, 'little', signed = signed)
 
 def maybe_scramble_nan32(f):
   if math.isnan(f):
@@ -1449,8 +1457,8 @@ def char_to_i32(c):
 
 def store_string(cx, v: String, ptr):
   begin, tagged_code_units = store_string_into_range(cx, v)
-  store_int(cx, begin, ptr, ptr_size(cx.opts))
-  store_int(cx, tagged_code_units, ptr + ptr_size(cx.opts), ptr_size(cx.opts))
+  store_int(cx, begin, ptr, cx.opts.memory.ptr_size())
+  store_int(cx, tagged_code_units, ptr + cx.opts.memory.ptr_size(), cx.opts.memory.ptr_size())
 
 def store_string_into_range(cx, v: String):
   src, src_encoding, src_tagged_code_units = v
@@ -1487,17 +1495,17 @@ def store_string_into_range(cx, v: String):
             case 'utf16'    : return store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units)
 
 def max_string_byte_length(opts):
-  return (1 << (ptr_size(opts) * 8 - 1)) - 1
+  return (1 << (opts.memory.ptr_size() * 8 - 1)) - 1
 
 def store_string_copy(cx, src, src_code_units, dst_code_unit_size, dst_alignment, dst_encoding):
   dst_byte_length = dst_code_unit_size * src_code_units
   trap_if(dst_byte_length > max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, dst_alignment, dst_byte_length)
   trap_if(ptr != align_to(ptr, dst_alignment))
-  trap_if(ptr + dst_byte_length > len(cx.opts.memory[0]))
+  trap_if(ptr + dst_byte_length > len(cx.opts.memory.bytes))
   encoded = src.encode(dst_encoding)
   assert(dst_byte_length == len(encoded))
-  cx.opts.memory[0][ptr : ptr+len(encoded)] = encoded
+  cx.opts.memory.bytes[ptr : ptr+len(encoded)] = encoded
   return (ptr, src_code_units)
 
 def store_utf16_to_utf8(cx, src, src_code_units):
@@ -1511,19 +1519,19 @@ def store_latin1_to_utf8(cx, src, src_code_units):
 def store_string_to_utf8(cx, src, src_code_units, worst_case_size):
   assert(src_code_units <= max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 1, src_code_units)
-  trap_if(ptr + src_code_units > len(cx.opts.memory[0]))
+  trap_if(ptr + src_code_units > len(cx.opts.memory.bytes))
   for i,code_point in enumerate(src):
     if ord(code_point) < 2**7:
-      cx.opts.memory[0][ptr + i] = ord(code_point)
+      cx.opts.memory.bytes[ptr + i] = ord(code_point)
     else:
       trap_if(worst_case_size > max_string_byte_length(cx.opts))
       ptr = cx.opts.realloc(ptr, src_code_units, 1, worst_case_size)
-      trap_if(ptr + worst_case_size > len(cx.opts.memory[0]))
+      trap_if(ptr + worst_case_size > len(cx.opts.memory.bytes))
       encoded = src.encode('utf-8')
-      cx.opts.memory[0][ptr+i : ptr+len(encoded)] = encoded[i : ]
+      cx.opts.memory.bytes[ptr+i : ptr+len(encoded)] = encoded[i : ]
       if worst_case_size > len(encoded):
         ptr = cx.opts.realloc(ptr, worst_case_size, 1, len(encoded))
-        trap_if(ptr + len(encoded) > len(cx.opts.memory[0]))
+        trap_if(ptr + len(encoded) > len(cx.opts.memory.bytes))
       return (ptr, len(encoded))
   return (ptr, src_code_units)
 
@@ -1532,13 +1540,13 @@ def store_utf8_to_utf16(cx, src, src_code_units):
   trap_if(worst_case_size > max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 2, worst_case_size)
   trap_if(ptr != align_to(ptr, 2))
-  trap_if(ptr + worst_case_size > len(cx.opts.memory[0]))
+  trap_if(ptr + worst_case_size > len(cx.opts.memory.bytes))
   encoded = src.encode('utf-16-le')
-  cx.opts.memory[0][ptr : ptr+len(encoded)] = encoded
+  cx.opts.memory.bytes[ptr : ptr+len(encoded)] = encoded
   if len(encoded) < worst_case_size:
     ptr = cx.opts.realloc(ptr, worst_case_size, 2, len(encoded))
     trap_if(ptr != align_to(ptr, 2))
-    trap_if(ptr + len(encoded) > len(cx.opts.memory[0]))
+    trap_if(ptr + len(encoded) > len(cx.opts.memory.bytes))
   code_units = int(len(encoded) / 2)
   return (ptr, code_units)
 
@@ -1546,33 +1554,33 @@ def store_string_to_latin1_or_utf16(cx, src, src_code_units):
   assert(src_code_units <= max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 2, src_code_units)
   trap_if(ptr != align_to(ptr, 2))
-  trap_if(ptr + src_code_units > len(cx.opts.memory[0]))
+  trap_if(ptr + src_code_units > len(cx.opts.memory.bytes))
   dst_byte_length = 0
   for usv in src:
     if ord(usv) < (1 << 8):
-      cx.opts.memory[0][ptr + dst_byte_length] = ord(usv)
+      cx.opts.memory.bytes[ptr + dst_byte_length] = ord(usv)
       dst_byte_length += 1
     else:
       worst_case_size = 2 * src_code_units
       trap_if(worst_case_size > max_string_byte_length(cx.opts))
       ptr = cx.opts.realloc(ptr, src_code_units, 2, worst_case_size)
       trap_if(ptr != align_to(ptr, 2))
-      trap_if(ptr + worst_case_size > len(cx.opts.memory[0]))
+      trap_if(ptr + worst_case_size > len(cx.opts.memory.bytes))
       for j in range(dst_byte_length-1, -1, -1):
-        cx.opts.memory[0][ptr + 2*j] = cx.opts.memory[0][ptr + j]
-        cx.opts.memory[0][ptr + 2*j + 1] = 0
+        cx.opts.memory.bytes[ptr + 2*j] = cx.opts.memory.bytes[ptr + j]
+        cx.opts.memory.bytes[ptr + 2*j + 1] = 0
       encoded = src.encode('utf-16-le')
-      cx.opts.memory[0][ptr+2*dst_byte_length : ptr+len(encoded)] = encoded[2*dst_byte_length : ]
+      cx.opts.memory.bytes[ptr+2*dst_byte_length : ptr+len(encoded)] = encoded[2*dst_byte_length : ]
       if worst_case_size > len(encoded):
         ptr = cx.opts.realloc(ptr, worst_case_size, 2, len(encoded))
         trap_if(ptr != align_to(ptr, 2))
-        trap_if(ptr + len(encoded) > len(cx.opts.memory[0]))
+        trap_if(ptr + len(encoded) > len(cx.opts.memory.bytes))
       tagged_code_units = int(len(encoded) / 2) | utf16_tag(cx.opts)
       return (ptr, tagged_code_units)
   if dst_byte_length < src_code_units:
     ptr = cx.opts.realloc(ptr, src_code_units, 2, dst_byte_length)
     trap_if(ptr != align_to(ptr, 2))
-    trap_if(ptr + dst_byte_length > len(cx.opts.memory[0]))
+    trap_if(ptr + dst_byte_length > len(cx.opts.memory.bytes))
   return (ptr, dst_byte_length)
 
 def store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units):
@@ -1580,17 +1588,17 @@ def store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units):
   trap_if(src_byte_length > max_string_byte_length(cx.opts))
   ptr = cx.opts.realloc(0, 0, 2, src_byte_length)
   trap_if(ptr != align_to(ptr, 2))
-  trap_if(ptr + src_byte_length > len(cx.opts.memory[0]))
+  trap_if(ptr + src_byte_length > len(cx.opts.memory.bytes))
   encoded = src.encode('utf-16-le')
-  cx.opts.memory[0][ptr : ptr+len(encoded)] = encoded
+  cx.opts.memory.bytes[ptr : ptr+len(encoded)] = encoded
   if any(ord(c) >= (1 << 8) for c in src):
     tagged_code_units = int(len(encoded) / 2) | utf16_tag(cx.opts)
     return (ptr, tagged_code_units)
   latin1_size = int(len(encoded) / 2)
   for i in range(latin1_size):
-    cx.opts.memory[0][ptr + i] = cx.opts.memory[0][ptr + 2*i]
+    cx.opts.memory.bytes[ptr + i] = cx.opts.memory.bytes[ptr + 2*i]
   ptr = cx.opts.realloc(ptr, src_byte_length, 1, latin1_size)
-  trap_if(ptr + latin1_size > len(cx.opts.memory[0]))
+  trap_if(ptr + latin1_size > len(cx.opts.memory.bytes))
   return (ptr, latin1_size)
 
 def lower_error_context(cx, v):
@@ -1602,15 +1610,15 @@ def store_list(cx, v, ptr, elem_type, maybe_length):
     store_list_into_valid_range(cx, v, ptr, elem_type)
     return
   begin, length = store_list_into_range(cx, v, elem_type)
-  store_int(cx, begin, ptr, ptr_size(cx.opts))
-  store_int(cx, length, ptr + ptr_size(cx.opts), ptr_size(cx.opts))
+  store_int(cx, begin, ptr, cx.opts.memory.ptr_size())
+  store_int(cx, length, ptr + cx.opts.memory.ptr_size(), cx.opts.memory.ptr_size())
 
 def store_list_into_range(cx, v, elem_type):
   byte_length = len(v) * elem_size(elem_type, cx.opts)
-  trap_if(byte_length >= (1 << (ptr_size(cx.opts) * 8)))
+  trap_if(byte_length >= (1 << (cx.opts.memory.ptr_size() * 8)))
   ptr = cx.opts.realloc(0, 0, alignment(elem_type, cx.opts), byte_length)
   trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts)))
-  trap_if(ptr + byte_length > len(cx.opts.memory[0]))
+  trap_if(ptr + byte_length > len(cx.opts.memory.bytes))
   store_list_into_valid_range(cx, v, ptr, elem_type)
   return (ptr, len(v))
 
@@ -1685,29 +1693,29 @@ def flatten_functype(opts, ft, context):
   flat_results = flatten_types(ft.result_type(), opts)
   if not opts.async_:
     if len(flat_params) > MAX_FLAT_PARAMS:
-      flat_params = [ptr_type(opts)]
+      flat_params = [opts.memory.ptr_type()]
     if len(flat_results) > MAX_FLAT_RESULTS:
       match context:
         case 'lift':
-          flat_results = [ptr_type(opts)]
+          flat_results = [opts.memory.ptr_type()]
         case 'lower':
-          flat_params += [ptr_type(opts)]
+          flat_params += [opts.memory.ptr_type()]
           flat_results = []
     return CoreFuncType(flat_params, flat_results)
   else:
     match context:
       case 'lift':
         if len(flat_params) > MAX_FLAT_PARAMS:
-          flat_params = [ptr_type(opts)]
+          flat_params = [opts.memory.ptr_type()]
         if opts.callback:
           flat_results = ['i32']
         else:
           flat_results = []
       case 'lower':
         if len(flat_params) > MAX_FLAT_ASYNC_PARAMS:
-          flat_params = [ptr_type(opts)]
+          flat_params = [opts.memory.ptr_type()]
         if len(flat_results) > 0:
-          flat_params += [ptr_type(opts)]
+          flat_params += [opts.memory.ptr_type()]
         flat_results = ['i32']
     return CoreFuncType(flat_params, flat_results)
 
@@ -1723,7 +1731,7 @@ def flatten_type(t, opts):
     case F32Type()                        : return ['f32']
     case F64Type()                        : return ['f64']
     case CharType()                       : return ['i32']
-    case StringType()                     : return [ptr_type(opts), ptr_type(opts)]
+    case StringType()                     : return [opts.memory.ptr_type(), opts.memory.ptr_type()]
     case ErrorContextType()               : return ['i32']
     case ListType(t, l)                   : return flatten_list(t, l, opts)
     case RecordType(fields)               : return flatten_record(fields, opts)
@@ -1735,7 +1743,7 @@ def flatten_type(t, opts):
 def flatten_list(elem_type, maybe_length, opts):
   if maybe_length is not None:
     return flatten_type(elem_type, opts) * maybe_length
-  return [ptr_type(opts), ptr_type(opts)]
+  return [opts.memory.ptr_type(), opts.memory.ptr_type()]
 
 def flatten_record(fields, opts):
   flat = []
@@ -1822,8 +1830,8 @@ def lift_flat_signed(vi, core_width, t_width):
   return i
 
 def lift_flat_string(cx, vi):
-  ptr = vi.next(ptr_type(cx.opts))
-  packed_length = vi.next(ptr_type(cx.opts))
+  ptr = vi.next(cx.opts.memory.ptr_type())
+  packed_length = vi.next(cx.opts.memory.ptr_type())
   return load_string_from_range(cx, ptr, packed_length)
 
 def lift_flat_list(cx, vi, elem_type, maybe_length):
@@ -1832,8 +1840,8 @@ def lift_flat_list(cx, vi, elem_type, maybe_length):
     for i in range(maybe_length):
       a.append(lift_flat(cx, vi, elem_type))
     return a
-  ptr = vi.next(ptr_type(cx.opts))
-  length = vi.next(ptr_type(cx.opts))
+  ptr = vi.next(cx.opts.memory.ptr_type())
+  length = vi.next(cx.opts.memory.ptr_type())
   return load_list_from_range(cx, ptr, length, elem_type)
 
 def lift_flat_record(cx, vi, fields):
@@ -1957,10 +1965,10 @@ def lower_flat_flags(v, labels):
 def lift_flat_values(cx, max_flat, vi, ts):
   flat_types = flatten_types(ts, cx.opts)
   if len(flat_types) > max_flat:
-    ptr = vi.next(ptr_type(cx.opts))
+    ptr = vi.next(cx.opts.memory.ptr_type())
     tuple_type = TupleType(ts)
     trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts)))
-    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory[0]))
+    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory.bytes))
     return list(load(cx, ptr, tuple_type).values())
   else:
     return [ lift_flat(cx, vi, t) for t in ts ]
@@ -1975,10 +1983,10 @@ def lower_flat_values(cx, max_flat, vs, ts, out_param = None):
       ptr = cx.opts.realloc(0, 0, alignment(tuple_type, cx.opts), elem_size(tuple_type, cx.opts))
       flat_vals = [ptr]
     else:
-      ptr = out_param.next(ptr_type(cx.opts))
+      ptr = out_param.next(cx.opts.memory.ptr_type())
       flat_vals = []
     trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts)))
-    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory[0]))
+    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory.bytes))
     store(cx, tuple_value, tuple_type, ptr)
   else:
     flat_vals = []
diff --git a/design/mvp/canonical-abi/run_tests.py b/design/mvp/canonical-abi/run_tests.py
index 48f5276f..cc32e0ea 100644
--- a/design/mvp/canonical-abi/run_tests.py
+++ b/design/mvp/canonical-abi/run_tests.py
@@ -35,8 +35,8 @@ def realloc(self, original_ptr, original_size, alignment, new_size):
     self.memory[ret : ret + original_size] = self.memory[original_ptr : original_ptr + original_size]
     return ret
 
-_DEFAULT_MEMORY = (bytearray(), 'i32')
-_DEFAULT_MEMORY_64 = (bytearray(), 'i64')
+_DEFAULT_MEMORY = MemInst(bytearray(), 'i32')
+_DEFAULT_MEMORY_64 = MemInst(bytearray(), 'i64')
 
 def mk_opts(memory = None, encoding = 'utf8', realloc = None, post_return = None, sync_task_return = False, async_ = False, addr_type = 'i32'):
   opts = CanonicalOptions()
@@ -46,9 +46,9 @@ def mk_opts(memory = None, encoding = 'utf8', realloc = None, post_return = None
     elif addr_type == 'i64':
       opts.memory = _DEFAULT_MEMORY_64
     else:
-      assert(False, "Invalid address type: {}".format(addr_type))
+      assert False, "Invalid address type: {}".format(addr_type)
   else:
-    opts.memory = (memory, addr_type)
+    opts.memory = MemInst(memory, addr_type)
   opts.string_encoding = encoding
   opts.realloc = realloc
   opts.post_return = post_return
@@ -140,10 +140,10 @@ def test_name():
   if lower_v is None:
     lower_v = v
 
-  heap = Heap(5*len(cx.opts.memory[0]))
+  heap = Heap(5*len(cx.opts.memory.bytes))
   if dst_encoding is None:
     dst_encoding = cx.opts.string_encoding
-  cx = mk_cx(heap.memory, dst_encoding, heap.realloc, addr_type=ptr_type(cx.opts))
+  cx = mk_cx(heap.memory, dst_encoding, heap.realloc, addr_type=cx.opts.memory.ptr_type())
   lowered_vals = lower_flat(cx, v, lower_t)
 
   vi = CoreValueIter(lowered_vals)
@@ -278,7 +278,7 @@ def test_string(src_encoding, dst_encoding, s, addr_type='i32'):
     except UnicodeEncodeError:
       pass
     encoded = s.encode('utf-16-le')
-    tagged_code_units = int(len(encoded) / 2) | utf16_tag(LiftLowerOptions(memory=(bytearray(), addr_type)))
+    tagged_code_units = int(len(encoded) / 2) | utf16_tag(LiftLowerOptions(memory=MemInst(bytearray(), addr_type)))
     test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units, addr_type)
 
 encodings = ['utf8', 'utf16', 'latin1+utf16']
@@ -655,21 +655,21 @@ def consumer(thread, args):
     fut1_1.set()
 
     waitretp = consumer_heap.realloc(0, 0, 8, 4)
-    [event] = canon_waitable_set_wait(True, (consumer_heap.memory, 'i32'), thread, seti, waitretp)
+    [event] = canon_waitable_set_wait(True, MemInst(consumer_heap.memory, 'i32'), thread, seti, waitretp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_heap.memory[waitretp] == subi1)
     assert(consumer_heap.memory[waitretp+4] == Subtask.State.RETURNED)
     [] = canon_subtask_drop(thread, subi1)
     fut1_2.set()
 
-    [event] = canon_waitable_set_wait(True, (consumer_heap.memory, 'i32'), thread, seti, waitretp)
+    [event] = canon_waitable_set_wait(True, MemInst(consumer_heap.memory, 'i32'), thread, seti, waitretp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_heap.memory[waitretp] == subi2)
     assert(consumer_heap.memory[waitretp+4] == Subtask.State.STARTED)
     assert(consumer_heap.memory[retp] == 13)
     fut2.set()
 
-    [event] = canon_waitable_set_wait(True, (consumer_heap.memory, 'i32'), thread, seti, waitretp)
+    [event] = canon_waitable_set_wait(True, MemInst(consumer_heap.memory, 'i32'), thread, seti, waitretp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_heap.memory[waitretp] == subi2)
     assert(consumer_heap.memory[waitretp+4] == Subtask.State.RETURNED)
@@ -890,7 +890,7 @@ def core_consumer(thread, args):
     assert(ret == CopyResult.COMPLETED)
 
     retp = 0
-    [event] = canon_waitable_set_wait(True, (consumer_mem, 'i32'), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(consumer_mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp+0] == subi2)
     assert(consumer_mem[retp+4] == Subtask.State.STARTED)
@@ -902,14 +902,14 @@ def core_consumer(thread, args):
       [ret] = canon_thread_yield(True, thread)
       assert(ret == 0)
       retp = 0
-      [ret] = canon_waitable_set_poll(True, (consumer_mem, 'i32'), thread, seti, retp)
+      [ret] = canon_waitable_set_poll(True, MemInst(consumer_mem, 'i32'), thread, seti, retp)
       assert(ret == EventCode.NONE)
 
     [ret] = canon_future_write(FutureType(None), consumer_opts, thread, wfut21, 0xdeadbeef)
     assert(ret == CopyResult.COMPLETED)
 
     retp = 0
-    [event] = canon_waitable_set_wait(True, (consumer_mem, 'i32'), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(consumer_mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp+0] == subi1)
     assert(consumer_mem[retp+4] == Subtask.State.RETURNED)
@@ -923,14 +923,14 @@ def core_consumer(thread, args):
       [ret] = canon_thread_yield(True, thread)
       assert(ret == 0)
       retp = 0
-      [ret] = canon_waitable_set_poll(True, (consumer_mem, 'i32'), thread, seti, retp)
+      [ret] = canon_waitable_set_poll(True, MemInst(consumer_mem, 'i32'), thread, seti, retp)
       assert(ret == EventCode.NONE)
 
     [ret] = canon_future_write(FutureType(None), consumer_opts, thread, wfut13, 0xdeadbeef)
     assert(ret == CopyResult.COMPLETED)
 
     retp = 0
-    [event] = canon_waitable_set_wait(True, (consumer_mem, 'i32'), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(consumer_mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp+0] == subi2)
     assert(consumer_mem[retp+4] == Subtask.State.RETURNED)
@@ -947,7 +947,7 @@ def core_consumer(thread, args):
     assert(ret == CopyResult.COMPLETED)
 
     retp = 0
-    [event] = canon_waitable_set_wait(True, (consumer_mem, 'i32'), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(consumer_mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp+0] == subi3)
     assert(consumer_mem[retp+4] == Subtask.State.RETURNED)
@@ -1005,7 +1005,7 @@ def core_caller(thread, args):
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, subi, seti)
     retp3 = 12
-    [event] = canon_waitable_set_wait(True, (caller_mem, 'i32'), thread, seti, retp3)
+    [event] = canon_waitable_set_wait(True, MemInst(caller_mem, 'i32'), thread, seti, retp3)
     assert(event == EventCode.SUBTASK)
     assert(caller_mem[retp3+0] == subi)
     assert(caller_mem[retp3+4] == Subtask.State.RETURNED)
@@ -1072,7 +1072,7 @@ def consumer(thread, args):
       [ret] = canon_thread_yield(True, thread)
       assert(ret == 0)
       retp = 8
-      [event] = canon_waitable_set_poll(True, (consumer_heap.memory, 'i32'), thread, seti, retp)
+      [event] = canon_waitable_set_poll(True, MemInst(consumer_heap.memory, 'i32'), thread, seti, retp)
       if event == EventCode.NONE:
         continue
       assert(event == EventCode.SUBTASK)
@@ -1158,7 +1158,7 @@ def consumer(thread, args):
     remain = [subi1, subi2]
     while remain:
       retp = 8
-      [event] = canon_waitable_set_wait(True, (consumer_heap.memory, 'i32'), thread, seti, retp)
+      [event] = canon_waitable_set_wait(True, MemInst(consumer_heap.memory, 'i32'), thread, seti, retp)
       assert(event == EventCode.SUBTASK)
       assert(consumer_heap.memory[retp+4] == Subtask.State.RETURNED)
       subi = consumer_heap.memory[retp]
@@ -1224,14 +1224,14 @@ def core_func(thread, args):
     fut1.set()
 
     retp = lower_heap.realloc(0,0,8,4)
-    [event] = canon_waitable_set_wait(True, (lower_heap.memory, 'i32'), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(lower_heap.memory, 'i32'), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(lower_heap.memory[retp] == subi1)
     assert(lower_heap.memory[retp+4] == Subtask.State.RETURNED)
 
     fut2.set()
 
-    [event] = canon_waitable_set_wait(True, (lower_heap.memory, 'i32'), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(lower_heap.memory, 'i32'), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(lower_heap.memory[retp] == subi2)
     assert(lower_heap.memory[retp+4] == Subtask.State.RETURNED)
@@ -1548,7 +1548,7 @@ def core_func(thread, args):
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rsi1, seti)
     definitions.throw_it = True
-    [event] = canon_waitable_set_wait(True, (mem, 'i32'), thread, seti, retp) ##
+    [event] = canon_waitable_set_wait(True, MemInst(mem, 'i32'), thread, seti, retp) ##
     assert(event == EventCode.STREAM_READ)
     assert(mem[retp+0] == rsi1)
     result,n = unpack_result(mem[retp+4])
@@ -1564,7 +1564,7 @@ def core_func(thread, args):
     assert(ret == definitions.BLOCKED)
     host_import_incoming.set_remain(100)
     [] = canon_waitable_join(thread, wsi3, seti)
-    [event] = canon_waitable_set_wait(True, (mem, 'i32'), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem[retp+0] == wsi3)
     result,n = unpack_result(mem[retp+4])
@@ -1576,7 +1576,7 @@ def core_func(thread, args):
     assert(ret == definitions.BLOCKED)
     dst_stream.set_remain(100)
     [] = canon_waitable_join(thread, wsi2, seti)
-    [event] = canon_waitable_set_wait(True, (mem, 'i32'), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem[retp+0] == wsi2)
     result,n = unpack_result(mem[retp+4])
@@ -1595,7 +1595,7 @@ def core_func(thread, args):
     [ret] = canon_stream_read(StreamType(U8Type()), opts, thread, rsi4, 0, 4)
     assert(ret == definitions.BLOCKED)
     [] = canon_waitable_join(thread, rsi4, seti)
-    [event] = canon_waitable_set_wait(True, (mem, 'i32'), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem[retp+0] == rsi4)
     result,n = unpack_result(mem[retp+4])
@@ -1719,7 +1719,7 @@ def core_func(thread, args):
 
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rsi, seti)
-    [event] = canon_waitable_set_wait(True, (mem, 'i32'), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem[retp+0] == rsi)
     result,n = unpack_result(mem[retp+4])
@@ -1740,7 +1740,7 @@ def core_func(thread, args):
     assert(ret == definitions.BLOCKED)
     dst.set_remain(4)
     [] = canon_waitable_join(thread, wsi, seti)
-    [event] = canon_waitable_set_wait(True, (mem, 'i32'), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem[retp+0] == wsi)
     result,n = unpack_result(mem[retp+4])
@@ -1801,7 +1801,7 @@ def core_func1(thread, args):
     retp = 16
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, wsi, seti)
-    [event] = canon_waitable_set_wait(True, (mem1, 'i32'), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem1, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem1[retp+0] == wsi)
     result,n = unpack_result(mem1[retp+4])
@@ -1812,7 +1812,7 @@ def core_func1(thread, args):
 
     fut4.set()
 
-    [event] = canon_waitable_set_wait(True, (mem1, 'i32'), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem1, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem1[retp+0] == wsi)
     assert(mem1[retp+4] == 0)
@@ -1850,7 +1850,7 @@ def core_func2(thread, args):
 
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rsi, seti)
-    [event] = canon_waitable_set_wait(True, (mem2, 'i32'), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem2, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem2[retp+0] == rsi)
     result,n = unpack_result(mem2[retp+4])
@@ -1878,7 +1878,7 @@ def core_func2(thread, args):
     [ret] = canon_stream_read(StreamType(U8Type()), opts2, thread, rsi, 12345, 0)
     assert(ret == definitions.BLOCKED)
 
-    [event] = canon_waitable_set_wait(True, (mem2, 'i32'), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem2, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem2[retp+0] == rsi)
     p2 = int.from_bytes(mem2[retp+4 : retp+8], 'little', signed=False)
@@ -1924,7 +1924,7 @@ def core_func1(thread, args):
     retp = 16
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, wsi, seti)
-    [event] = canon_waitable_set_wait(True, (mem1, 'i32'), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem1, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem1[retp+0] == wsi)
     result,n = unpack_result(mem1[retp+4])
@@ -1961,7 +1961,7 @@ def core_func2(thread, args):
 
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rsi, seti)
-    [event] = canon_waitable_set_wait(True, (mem2, 'i32'), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem2, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem2[retp+0] == rsi)
     result,n = unpack_result(mem2[retp+4])
@@ -2078,7 +2078,7 @@ def core_func(thread, args):
     host_source.unblock_cancel()
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rsi, seti)
-    [event] = canon_waitable_set_wait(True, (mem, 'i32'), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem[retp+0] == rsi)
     result,n = unpack_result(mem[retp+4])
@@ -2183,7 +2183,7 @@ def core_func(thread, args):
 
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rfi, seti)
-    [event] = canon_waitable_set_wait(True, (mem, 'i32'), thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.FUTURE_READ)
     assert(mem[retp+0] == rfi)
     assert(mem[retp+4] == CopyResult.COMPLETED)
@@ -2249,7 +2249,7 @@ def core_callee1(thread, args):
   def core_callee2(thread, args):
     [x] = args
     [si] = canon_waitable_set_new(thread)
-    [ret] = canon_waitable_set_wait(True, (callee_heap.memory, 'i32'), thread, si, 0)
+    [ret] = canon_waitable_set_wait(True, MemInst(callee_heap.memory, 'i32'), thread, si, 0)
     assert(ret == EventCode.TASK_CANCELLED)
     match x:
       case 1:
@@ -2296,9 +2296,9 @@ def core_callee4(thread, args):
     except Trap:
       pass
     [seti] = canon_waitable_set_new(thread)
-    [result] = canon_waitable_set_wait(True, (callee_heap.memory, 'i32'), thread, seti, 0)
+    [result] = canon_waitable_set_wait(True, MemInst(callee_heap.memory, 'i32'), thread, seti, 0)
     assert(result == EventCode.TASK_CANCELLED)
-    [result] = canon_waitable_set_poll(True, (callee_heap.memory, 'i32'), thread, seti, 0)
+    [result] = canon_waitable_set_poll(True, MemInst(callee_heap.memory, 'i32'), thread, seti, 0)
     assert(result == EventCode.NONE)
     [] = canon_task_cancel(thread)
     return []
@@ -2433,7 +2433,7 @@ def core_caller(thread, args):
     assert(caller_heap.memory[0] == 13)
     [] = canon_waitable_join(thread, subi3, seti)
     retp = 8
-    [ret] = canon_waitable_set_wait(True, (caller_heap.memory, 'i32'), thread, seti, retp)
+    [ret] = canon_waitable_set_wait(True, MemInst(caller_heap.memory, 'i32'), thread, seti, retp)
     assert(ret == EventCode.SUBTASK)
     assert(caller_heap.memory[retp+0] == subi3)
     assert(caller_heap.memory[retp+4] == Subtask.State.RETURNED)
@@ -2452,7 +2452,7 @@ def core_caller(thread, args):
     assert(caller_heap.memory[0] == 13)
     [] = canon_waitable_join(thread, subi4, seti)
     retp = 8
-    [ret] = canon_waitable_set_wait(True, (caller_heap.memory, 'i32'), thread, seti, retp)
+    [ret] = canon_waitable_set_wait(True, MemInst(caller_heap.memory, 'i32'), thread, seti, retp)
     assert(ret == EventCode.SUBTASK)
     assert(caller_heap.memory[retp+0] == subi4)
     assert(caller_heap.memory[retp+4] == Subtask.State.CANCELLED_BEFORE_RETURNED)
@@ -2494,7 +2494,7 @@ def core_caller(thread, args):
     host_fut4.set()
     [] = canon_waitable_join(thread, subi, seti)
     waitretp = 4
-    [event] = canon_waitable_set_wait(True, (caller_heap.memory, 'i32'), thread, seti, waitretp)
+    [event] = canon_waitable_set_wait(True, MemInst(caller_heap.memory, 'i32'), thread, seti, waitretp)
     assert(event == EventCode.SUBTASK)
     assert(caller_heap.memory[waitretp] == subi)
     assert(caller_heap.memory[waitretp+4] == Subtask.State.CANCELLED_BEFORE_RETURNED)
@@ -2510,7 +2510,7 @@ def core_caller(thread, args):
     host_fut5.set()
     [] = canon_waitable_join(thread, subi, seti)
     waitretp = 4
-    [event] = canon_waitable_set_wait(True, (caller_heap.memory, 'i32'), thread, seti, waitretp)
+    [event] = canon_waitable_set_wait(True, MemInst(caller_heap.memory, 'i32'), thread, seti, waitretp)
     assert(event == EventCode.SUBTASK)
     assert(caller_heap.memory[waitretp] == subi)
     assert(caller_heap.memory[waitretp+4] == Subtask.State.RETURNED)
@@ -2525,7 +2525,7 @@ def core_caller(thread, args):
     assert(ret == definitions.BLOCKED)
 
     [] = canon_waitable_join(thread, subi, seti)
-    [event] = canon_waitable_set_wait(True, (caller_heap.memory, 'i32'), thread, seti, 4)
+    [event] = canon_waitable_set_wait(True, MemInst(caller_heap.memory, 'i32'), thread, seti, 4)
     assert(event == EventCode.SUBTASK)
     assert(caller_heap.memory[0] == 45)
     assert(caller_heap.memory[4] == subi)
@@ -2572,7 +2572,7 @@ def core_func(thread, args):
     [] = canon_future_drop_readable(FutureType(elemt), thread, rfi)
 
     [] = canon_waitable_join(thread, wfi, seti)
-    [event] = canon_waitable_set_wait(True, (mem, 'i32'), thread, seti, 0)
+    [event] = canon_waitable_set_wait(True, MemInst(mem, 'i32'), thread, seti, 0)
     assert(event == EventCode.FUTURE_WRITE)
     assert(mem[0] == wfi)
     assert(mem[4] == CopyResult.COMPLETED)
@@ -2592,7 +2592,7 @@ def core_func(thread, args):
     [] = canon_stream_drop_readable(StreamType(elemt), thread, rsi)
 
     [] = canon_waitable_join(thread, wsi, seti)
-    [event] = canon_waitable_set_wait(True, (mem, 'i32'), thread, seti, 0)
+    [event] = canon_waitable_set_wait(True, MemInst(mem, 'i32'), thread, seti, 0)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem[0] == wsi)
     result,n = unpack_result(mem[4])
@@ -2783,14 +2783,14 @@ def core_consumer(thread, args):
     retp3 = 16
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, subi1, seti)
-    [event] = canon_waitable_set_wait(True, (consumer_mem, 'i32'), thread, seti, retp3)
+    [event] = canon_waitable_set_wait(True, MemInst(consumer_mem, 'i32'), thread, seti, retp3)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp3] == subi1)
     assert(consumer_mem[retp3+4] == Subtask.State.RETURNED)
     assert(consumer_mem[retp1] == 42)
 
     [] = canon_waitable_join(thread, subi2, seti)
-    [event] = canon_waitable_set_wait(True, (consumer_mem, 'i32'), thread, seti, retp3)
+    [event] = canon_waitable_set_wait(True, MemInst(consumer_mem, 'i32'), thread, seti, retp3)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp3] == subi2)
     assert(consumer_mem[retp3+4] == Subtask.State.RETURNED)

From e06bda6b30736044e2ece2e6813217669aa345f6 Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Tue, 24 Mar 2026 14:24:32 +0000
Subject: [PATCH 15/25] inline memory defaults in tests

---
 design/mvp/canonical-abi/run_tests.py | 87 ++++++++++++---------------
 1 file changed, 38 insertions(+), 49 deletions(-)

diff --git a/design/mvp/canonical-abi/run_tests.py b/design/mvp/canonical-abi/run_tests.py
index cc32e0ea..9ee37a09 100644
--- a/design/mvp/canonical-abi/run_tests.py
+++ b/design/mvp/canonical-abi/run_tests.py
@@ -35,20 +35,9 @@ def realloc(self, original_ptr, original_size, alignment, new_size):
     self.memory[ret : ret + original_size] = self.memory[original_ptr : original_ptr + original_size]
     return ret
 
-_DEFAULT_MEMORY = MemInst(bytearray(), 'i32')
-_DEFAULT_MEMORY_64 = MemInst(bytearray(), 'i64')
-
-def mk_opts(memory = None, encoding = 'utf8', realloc = None, post_return = None, sync_task_return = False, async_ = False, addr_type = 'i32'):
+def mk_opts(memory = MemInst(bytearray(), 'i32'), encoding = 'utf8', realloc = None, post_return = None, sync_task_return = False, async_ = False):
   opts = CanonicalOptions()
-  if memory is None:
-    if addr_type == 'i32':
-      opts.memory = _DEFAULT_MEMORY
-    elif addr_type == 'i64':
-      opts.memory = _DEFAULT_MEMORY_64
-    else:
-      assert False, "Invalid address type: {}".format(addr_type)
-  else:
-    opts.memory = MemInst(memory, addr_type)
+  opts.memory = memory
   opts.string_encoding = encoding
   opts.realloc = realloc
   opts.post_return = post_return
@@ -57,8 +46,8 @@ def mk_opts(memory = None, encoding = 'utf8', realloc = None, post_return = None
   opts.callback = None
   return opts
 
-def mk_cx(memory = bytearray(), encoding = 'utf8', realloc = None, post_return = None, addr_type = 'i32'):
-  opts = mk_opts(memory, encoding, realloc, post_return, addr_type=addr_type)
+def mk_cx(memory = MemInst(bytearray(), 'i32'), encoding = 'utf8', realloc = None, post_return = None):
+  opts = mk_opts(memory, encoding, realloc, post_return)
   inst = ComponentInstance(Store())
   return LiftLowerContext(opts, inst)
 
@@ -143,7 +132,7 @@ def test_name():
   heap = Heap(5*len(cx.opts.memory.bytes))
   if dst_encoding is None:
     dst_encoding = cx.opts.string_encoding
-  cx = mk_cx(heap.memory, dst_encoding, heap.realloc, addr_type=cx.opts.memory.ptr_type())
+  cx = mk_cx(MemInst(heap.memory, cx.opts.memory.ptr_type()), dst_encoding, heap.realloc)
   lowered_vals = lower_flat(cx, v, lower_t)
 
   vi = CoreValueIter(lowered_vals)
@@ -218,7 +207,7 @@ def test_nan32(inbits, outbits):
     assert(encode_float_as_i32(f) == outbits)
   else:
     assert(not math.isnan(origf) or math.isnan(f))
-  cx = mk_cx(int.to_bytes(inbits, 4, 'little'))
+  cx = mk_cx(MemInst(int.to_bytes(inbits, 4, 'little'), 'i32'))
   f = load(cx, 0, F32Type())
   if definitions.DETERMINISTIC_PROFILE:
     assert(encode_float_as_i32(f) == outbits)
@@ -232,7 +221,7 @@ def test_nan64(inbits, outbits):
     assert(encode_float_as_i64(f) == outbits)
   else:
     assert(not math.isnan(origf) or math.isnan(f))
-  cx = mk_cx(int.to_bytes(inbits, 8, 'little'))
+  cx = mk_cx(MemInst(int.to_bytes(inbits, 8, 'little'), 'i32'))
   f = load(cx, 0, F64Type())
   if definitions.DETERMINISTIC_PROFILE:
     assert(encode_float_as_i64(f) == outbits)
@@ -257,7 +246,7 @@ def test_nan64(inbits, outbits):
 def test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units, addr_type='i32'):
   heap = Heap(len(encoded))
   heap.memory[:] = encoded[:]
-  cx = mk_cx(heap.memory, src_encoding, addr_type=addr_type)
+  cx = mk_cx(MemInst(heap.memory, addr_type), src_encoding)
   v = (s, src_encoding, tagged_code_units)
   test(StringType(), [0, tagged_code_units], v, cx, dst_encoding)
 
@@ -295,7 +284,7 @@ def test_string(src_encoding, dst_encoding, s, addr_type='i32'):
 
 def test_heap(t, expect, args, byte_array, addr_type='i32'):
   heap = Heap(byte_array)
-  cx = mk_cx(heap.memory, addr_type=addr_type)
+  cx = mk_cx(MemInst(heap.memory, addr_type))
   test(t, args, expect, cx)
 
 # Empty record types are not permitted yet.
@@ -401,7 +390,7 @@ def test_heap(t, expect, args, byte_array, addr_type='i32'):
           [0xff,0xff,0xff,0xff, 0,0,0,0])
 
 def test_flatten(t, params, results, addr_type='i32'):
-  opts = mk_opts(addr_type=addr_type)
+  opts = mk_opts(MemInst(bytearray(), addr_type))
   expect = CoreFuncType(params, results)
 
   if len(params) > definitions.MAX_FLAT_PARAMS:
@@ -442,7 +431,7 @@ def callee(thread, x):
       return x
 
     callee_heap = Heap(1000)
-    callee_opts = mk_opts(callee_heap.memory, 'utf8', callee_heap.realloc, addr_type=addr_type)
+    callee_opts = mk_opts(MemInst(callee_heap.memory, addr_type), 'utf8', callee_heap.realloc)
     callee_inst = ComponentInstance(store)
 
     got = None
@@ -589,7 +578,7 @@ def on_resolve(results):
 
 def test_async_to_async():
   producer_heap = Heap(10)
-  producer_opts = mk_opts(producer_heap.memory)
+  producer_opts = mk_opts(MemInst(producer_heap.memory, 'i32'))
   producer_opts.async_ = True
 
   store = Store()
@@ -628,7 +617,7 @@ def core_blocking_producer(thread, args):
   blocking_callee = partial(canon_lift, producer_opts, producer_inst, blocking_ft, core_blocking_producer)
 
   consumer_heap = Heap(20)
-  consumer_opts = mk_opts(consumer_heap.memory)
+  consumer_opts = mk_opts(MemInst(consumer_heap.memory, 'i32'))
   consumer_opts.async_ = True
 
   def consumer(thread, args):
@@ -840,7 +829,7 @@ def core_sync_callee(thread, args):
   consumer_inst = ComponentInstance(store)
   consumer_ft = FuncType([], [], async_ = True)
   consumer_mem = bytearray(24)
-  consumer_opts = mk_opts(consumer_mem, async_ = True)
+  consumer_opts = mk_opts(MemInst(consumer_mem, 'i32'), async_ = True)
   def core_consumer(thread, args):
     assert(len(args) == 0)
 
@@ -982,7 +971,7 @@ def core_callee2(thread, args):
   caller_inst = ComponentInstance(store)
   caller_ft = FuncType([], [], async_ = True)
   caller_mem = bytearray(24)
-  caller_opts = mk_opts(memory = caller_mem, async_ = True)
+  caller_opts = mk_opts(memory = MemInst(caller_mem, 'i32'), async_ = True)
   def core_caller(thread, args):
     assert(len(args) == 0)
 
@@ -1042,7 +1031,7 @@ def producer2_core(thread, args):
   producer2 = partial(canon_lift, producer_opts, producer_inst, producer_ft, producer2_core)
 
   consumer_heap = Heap(20)
-  consumer_opts = mk_opts(consumer_heap.memory)
+  consumer_opts = mk_opts(MemInst(consumer_heap.memory, 'i32'))
   consumer_opts.async_ = True
 
   consumer_ft = FuncType([],[U8Type()], async_ = True)
@@ -1131,7 +1120,7 @@ def producer2_core(thread, args):
   producer2 = partial(canon_lift, producer_opts, producer_inst, producer_ft, producer2_core)
 
   consumer_heap = Heap(20)
-  consumer_opts = mk_opts(consumer_heap.memory, async_ = True)
+  consumer_opts = mk_opts(MemInst(consumer_heap.memory, 'i32'), async_ = True)
 
   consumer_ft = FuncType([],[U8Type()], async_ = True)
   def consumer(thread, args):
@@ -1204,7 +1193,7 @@ def core_hostcall_pre(fut, thread, args):
   hostcall2 = partial(canon_lift, hostcall_opts, hostcall_inst, ft, core_hostcall2)
 
   lower_heap = Heap(20)
-  lower_opts = mk_opts(lower_heap.memory)
+  lower_opts = mk_opts(MemInst(lower_heap.memory, 'i32'))
   lower_opts.async_ = True
 
   def core_func(thread, args):
@@ -1409,8 +1398,8 @@ def test_eager_stream_completion():
   ft = FuncType([StreamType(U8Type())], [StreamType(U8Type())])
   inst = ComponentInstance(store)
   mem = bytearray(20)
-  opts = mk_opts(memory=mem, async_=True)
-  sync_opts = mk_opts(memory=mem, async_=False)
+  opts = mk_opts(memory=MemInst(mem, 'i32'), async_=True)
+  sync_opts = mk_opts(memory=MemInst(mem, 'i32'), async_=False)
 
   def host_import(caller, on_start, on_resolve):
     args = on_start()
@@ -1492,8 +1481,8 @@ def test_async_stream_ops():
   ft = FuncType([StreamType(U8Type())], [StreamType(U8Type())])
   inst = ComponentInstance(store)
   mem = bytearray(24)
-  opts = mk_opts(memory=mem, async_=True)
-  sync_opts = mk_opts(memory=mem, async_=False)
+  opts = mk_opts(memory=MemInst(mem, 'i32'), async_=True)
+  sync_opts = mk_opts(memory=MemInst(mem, 'i32'), async_=False)
 
   host_import_incoming = None
   host_import_outgoing = None
@@ -1642,7 +1631,7 @@ def test_receive_own_stream():
   store = Store()
   inst = ComponentInstance(store)
   mem = bytearray(20)
-  opts = mk_opts(memory=mem, async_=True)
+  opts = mk_opts(memory=MemInst(mem, 'i32'), async_=True)
 
   host_ft = FuncType([StreamType(U8Type())], [StreamType(U8Type())])
   def host_import(caller, on_start, on_resolve):
@@ -1680,7 +1669,7 @@ def on_resolve(results): assert(len(results) == 0)
 def test_host_partial_reads_writes():
   store = Store()
   mem = bytearray(20)
-  opts = mk_opts(memory=mem, async_=True)
+  opts = mk_opts(memory=MemInst(mem, 'i32'), async_=True)
 
   src = HostSource(U8Type(), [1,2,3,4], chunk=2, destroy_if_empty = False)
   source_ft = FuncType([], [StreamType(U8Type())])
@@ -1766,7 +1755,7 @@ def test_wasm_to_wasm_stream():
 
   inst1 = ComponentInstance(store)
   mem1 = bytearray(24)
-  opts1 = mk_opts(memory=mem1, async_=True)
+  opts1 = mk_opts(memory=MemInst(mem1, 'i32'), async_=True)
   ft1 = FuncType([], [StreamType(U8Type())])
   def core_func1(thread, args):
     assert(not args)
@@ -1831,7 +1820,7 @@ def core_func1(thread, args):
   inst2 = ComponentInstance(store)
   heap2 = Heap(24)
   mem2 = heap2.memory
-  opts2 = mk_opts(memory=heap2.memory, realloc=heap2.realloc, async_=True)
+  opts2 = mk_opts(memory=MemInst(heap2.memory, 'i32'), realloc=heap2.realloc, async_=True)
   ft2 = FuncType([], [])
   def core_func2(thread, args):
     assert(not args)
@@ -1897,7 +1886,7 @@ def test_wasm_to_wasm_stream_empty():
 
   inst1 = ComponentInstance(store)
   mem1 = bytearray(24)
-  opts1 = mk_opts(memory=mem1, async_=True)
+  opts1 = mk_opts(memory=MemInst(mem1, 'i32'), async_=True)
   ft1 = FuncType([], [StreamType(None)])
   def core_func1(thread, args):
     assert(not args)
@@ -1942,7 +1931,7 @@ def core_func1(thread, args):
   inst2 = ComponentInstance(store)
   heap2 = Heap(10)
   mem2 = heap2.memory
-  opts2 = mk_opts(memory=heap2.memory, realloc=heap2.realloc, async_=True)
+  opts2 = mk_opts(memory=MemInst(heap2.memory, 'i32'), realloc=heap2.realloc, async_=True)
   ft2 = FuncType([], [])
   def core_func2(thread, args):
     assert(not args)
@@ -1992,7 +1981,7 @@ def test_cancel_copy():
   store = Store()
   inst = ComponentInstance(store)
   mem = bytearray(24)
-  lower_opts = mk_opts(memory=mem, async_=True)
+  lower_opts = mk_opts(memory=MemInst(mem, 'i32'), async_=True)
 
   host_ft1 = FuncType([StreamType(U8Type())],[])
   host_sink = None
@@ -2147,7 +2136,7 @@ def test_futures():
   store = Store()
   inst = ComponentInstance(store)
   mem = bytearray(24)
-  lower_opts = mk_opts(memory=mem, async_=True)
+  lower_opts = mk_opts(memory=MemInst(mem, 'i32'), async_=True)
 
   host_ft1 = FuncType([FutureType(U8Type())],[FutureType(U8Type())])
   def host_func(caller, on_start, on_resolve):
@@ -2238,8 +2227,8 @@ def test_cancel_subtask():
   ft = FuncType([U8Type()], [U8Type()], async_ = True)
 
   callee_heap = Heap(10)
-  callee_opts = mk_opts(callee_heap.memory, async_ = True)
-  sync_callee_opts = mk_opts(callee_heap.memory, async_ = False)
+  callee_opts = mk_opts(MemInst(callee_heap.memory, 'i32'), async_ = True)
+  sync_callee_opts = mk_opts(MemInst(callee_heap.memory, 'i32'), async_ = False)
   callee_inst = ComponentInstance(store)
 
   def core_callee1(thread, args):
@@ -2374,7 +2363,7 @@ def core_callee6(thread, args):
   callee6 = partial(canon_lift, callee_opts, callee_inst, ft, core_callee6)
 
   caller_heap = Heap(20)
-  caller_opts = mk_opts(caller_heap.memory, async_ = True)
+  caller_opts = mk_opts(MemInst(caller_heap.memory, 'i32'), async_ = True)
   caller_inst = ComponentInstance(store)
 
   def core_caller(thread, args):
@@ -2554,8 +2543,8 @@ def test_self_copy(elemt):
   store = Store()
   inst = ComponentInstance(store)
   mem = bytearray(40)
-  sync_opts = mk_opts(memory=mem, async_=False)
-  async_opts = mk_opts(memory=mem, async_=True)
+  sync_opts = mk_opts(memory=MemInst(mem, 'i32'), async_=False)
+  async_opts = mk_opts(memory=MemInst(mem, 'i32'), async_=True)
 
   ft = FuncType([], [], async_ = True)
   def core_func(thread, args):
@@ -2609,7 +2598,7 @@ def core_func(thread, args):
 def test_async_flat_params():
   store = Store()
   heap = Heap(1000)
-  opts = mk_opts(heap.memory, 'utf8', heap.realloc, async_ = True)
+  opts = mk_opts(MemInst(heap.memory, 'i32'), 'utf8', heap.realloc, async_ = True)
 
   ft1 = FuncType([F32Type(), F64Type(), U32Type(), S64Type()],[])
   def f1(caller, on_start, on_resolve):
@@ -2659,7 +2648,7 @@ def test_threads():
   store = Store()
   inst = ComponentInstance(store)
   mem = bytearray(8)
-  opts = mk_opts(memory = mem)
+  opts = mk_opts(memory = MemInst(mem, 'i32'))
 
   ftbl = Table()
   ft = CoreFuncType(['i32'],[])
@@ -2762,7 +2751,7 @@ def core_producer_callback2(thread, args):
   consumer_inst = ComponentInstance(store)
   consumer_ft = FuncType([], [], async_ = True)
   consumer_mem = bytearray(24)
-  consumer_opts = mk_opts(consumer_mem, async_ = True)
+  consumer_opts = mk_opts(MemInst(consumer_mem, 'i32'), async_ = True)
 
   def core_consumer(thread, args):
     assert(len(args) == 0)

From 0270ee3fdcfe52379e4e73a2d9f2c356ec3142e4 Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Tue, 24 Mar 2026 14:29:05 +0000
Subject: [PATCH 16/25] revert max string length to a const

---
 design/mvp/CanonicalABI.md              | 55 +++++++++++++------------
 design/mvp/canonical-abi/definitions.py | 34 +++++++--------
 design/mvp/canonical-abi/run_tests.py   |  2 +-
 3 files changed, 46 insertions(+), 45 deletions(-)

diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index 4a6eed58..d5b6085a 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -2123,10 +2123,13 @@ def convert_i32_to_char(cx, i):
 ```
 
 Strings are loaded from two pointer-sized values: a pointer (offset in linear
-memory) and a number of [code units]. There are three supported string
-encodings in [`canonopt`]: [UTF-8], [UTF-16] and `latin1+utf16`. This last
-option allows a *dynamic* choice between [Latin-1] and UTF-16, indicated by
-the high bit of the second pointer-sized value. String values include their
+memory) and a number of [code units]. There are three supported string encodings
+in [`canonopt`]: [UTF-8], [UTF-16] and `latin1+utf16`. This last option allows a
+*dynamic* choice between [Latin-1] and UTF-16, indicated by the 32nd bit of the
+second pointer-sized value. The length of a string is limited so that the number
+of code units fits in 31 bits (leaving the 32nd bit free as the flag). This
+maximum length is enforced even on 64-bit memories to ensure they don't define
+interfaces which 32-bit components couldn't handle.  String values include their
 original encoding and length in tagged code units as a "hint" that enables
 `store_string` (defined below) to make better up-front allocation size choices
 in many cases. Thus, the value produced by `load_string` isn't simply a Python
@@ -2140,8 +2143,7 @@ def load_string(cx, ptr) -> String:
   tagged_code_units = load_int(cx, ptr + cx.opts.memory.ptr_size(), cx.opts.memory.ptr_size())
   return load_string_from_range(cx, begin, tagged_code_units)
 
-def utf16_tag(opts):
-  return 1 << (opts.memory.ptr_size() * 8 - 1)
+UTF16_TAG = 1 << 31
 
 def load_string_from_range(cx, ptr, tagged_code_units) -> String:
   match cx.opts.string_encoding:
@@ -2155,8 +2157,8 @@ def load_string_from_range(cx, ptr, tagged_code_units) -> String:
       encoding = 'utf-16-le'
     case 'latin1+utf16':
       alignment = 2
-      if bool(tagged_code_units & utf16_tag(cx.opts)):
-        byte_length = 2 * (tagged_code_units ^ utf16_tag(cx.opts))
+      if bool(tagged_code_units & UTF16_TAG):
+        byte_length = 2 * (tagged_code_units ^ UTF16_TAG)
         encoding = 'utf-16-le'
       else:
         byte_length = tagged_code_units
@@ -2431,7 +2433,7 @@ original encoding and number of source [code units]. From this hint data,
 
 We start with a case analysis to enumerate all the meaningful encoding
 combinations, subdividing the `latin1+utf16` encoding into either `latin1` or
-`utf16` based on the `utf16_tag` flag set by `load_string`:
+`utf16` based on the `UTF16_TAG` flag set by `load_string`:
 ```python
 def store_string(cx, v: String, ptr):
   begin, tagged_code_units = store_string_into_range(cx, v)
@@ -2442,9 +2444,9 @@ def store_string_into_range(cx, v: String):
   src, src_encoding, src_tagged_code_units = v
 
   if src_encoding == 'latin1+utf16':
-    if bool(src_tagged_code_units & utf16_tag(cx.opts)):
+    if bool(src_tagged_code_units & UTF16_TAG):
       src_simple_encoding = 'utf16'
-      src_code_units = src_tagged_code_units ^ utf16_tag(cx.opts)
+      src_code_units = src_tagged_code_units ^ UTF16_TAG
     else:
       src_simple_encoding = 'latin1'
       src_code_units = src_tagged_code_units
@@ -2477,12 +2479,11 @@ The simplest 4 cases above can compute the exact destination size and then copy
 with a simply loop (that possibly inflates Latin-1 to UTF-16 by injecting a 0
 byte after every Latin-1 byte).
 ```python
-def max_string_byte_length(opts):
-  return (1 << (opts.memory.ptr_size() * 8 - 1)) - 1
+MAX_STRING_BYTE_LENGTH = (1 << 31) - 1
 
 def store_string_copy(cx, src, src_code_units, dst_code_unit_size, dst_alignment, dst_encoding):
   dst_byte_length = dst_code_unit_size * src_code_units
-  trap_if(dst_byte_length > max_string_byte_length(cx.opts))
+  trap_if(dst_byte_length > MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, dst_alignment, dst_byte_length)
   trap_if(ptr != align_to(ptr, dst_alignment))
   trap_if(ptr + dst_byte_length > len(cx.opts.memory.bytes))
@@ -2491,8 +2492,8 @@ def store_string_copy(cx, src, src_code_units, dst_code_unit_size, dst_alignment
   cx.opts.memory.bytes[ptr : ptr+len(encoded)] = encoded
   return (ptr, src_code_units)
 ```
-The `max_string_byte_length` function ensures that the high bit of a
-string's number of code units is never set, keeping it clear for `utf16_tag`.
+The `MAX_STRING_BYTE_LENGTH` constant ensures that the high bit of a
+string's number of code units is never set, keeping it clear for `UTF16_TAG`.
 
 The 2 cases of transcoding into UTF-8 share an algorithm that starts by
 optimistically assuming that each code unit of the source string fits in a
@@ -2508,14 +2509,14 @@ def store_latin1_to_utf8(cx, src, src_code_units):
   return store_string_to_utf8(cx, src, src_code_units, worst_case_size)
 
 def store_string_to_utf8(cx, src, src_code_units, worst_case_size):
-  assert(src_code_units <= max_string_byte_length(cx.opts))
+  assert(src_code_units <= MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 1, src_code_units)
   trap_if(ptr + src_code_units > len(cx.opts.memory.bytes))
   for i,code_point in enumerate(src):
     if ord(code_point) < 2**7:
       cx.opts.memory.bytes[ptr + i] = ord(code_point)
     else:
-      trap_if(worst_case_size > max_string_byte_length(cx.opts))
+      trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
       ptr = cx.opts.realloc(ptr, src_code_units, 1, worst_case_size)
       trap_if(ptr + worst_case_size > len(cx.opts.memory.bytes))
       encoded = src.encode('utf-8')
@@ -2534,7 +2535,7 @@ if multiple UTF-8 bytes were collapsed into a single 2-byte UTF-16 code unit:
 ```python
 def store_utf8_to_utf16(cx, src, src_code_units):
   worst_case_size = 2 * src_code_units
-  trap_if(worst_case_size > max_string_byte_length(cx.opts))
+  trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 2, worst_case_size)
   trap_if(ptr != align_to(ptr, 2))
   trap_if(ptr + worst_case_size > len(cx.opts.memory.bytes))
@@ -2558,7 +2559,7 @@ after every Latin-1 byte (iterating in reverse to avoid clobbering later
 bytes):
 ```python
 def store_string_to_latin1_or_utf16(cx, src, src_code_units):
-  assert(src_code_units <= max_string_byte_length(cx.opts))
+  assert(src_code_units <= MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 2, src_code_units)
   trap_if(ptr != align_to(ptr, 2))
   trap_if(ptr + src_code_units > len(cx.opts.memory.bytes))
@@ -2569,7 +2570,7 @@ def store_string_to_latin1_or_utf16(cx, src, src_code_units):
       dst_byte_length += 1
     else:
       worst_case_size = 2 * src_code_units
-      trap_if(worst_case_size > max_string_byte_length(cx.opts))
+      trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
       ptr = cx.opts.realloc(ptr, src_code_units, 2, worst_case_size)
       trap_if(ptr != align_to(ptr, 2))
       trap_if(ptr + worst_case_size > len(cx.opts.memory.bytes))
@@ -2582,7 +2583,7 @@ def store_string_to_latin1_or_utf16(cx, src, src_code_units):
         ptr = cx.opts.realloc(ptr, worst_case_size, 2, len(encoded))
         trap_if(ptr != align_to(ptr, 2))
         trap_if(ptr + len(encoded) > len(cx.opts.memory.bytes))
-      tagged_code_units = int(len(encoded) / 2) | utf16_tag(cx.opts)
+      tagged_code_units = int(len(encoded) / 2) | UTF16_TAG
       return (ptr, tagged_code_units)
   if dst_byte_length < src_code_units:
     ptr = cx.opts.realloc(ptr, src_code_units, 2, dst_byte_length)
@@ -2604,14 +2605,14 @@ inexpensively fused with the UTF-16 validate+copy loop.)
 ```python
 def store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units):
   src_byte_length = 2 * src_code_units
-  trap_if(src_byte_length > max_string_byte_length(cx.opts))
+  trap_if(src_byte_length > MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 2, src_byte_length)
   trap_if(ptr != align_to(ptr, 2))
   trap_if(ptr + src_byte_length > len(cx.opts.memory.bytes))
   encoded = src.encode('utf-16-le')
   cx.opts.memory.bytes[ptr : ptr+len(encoded)] = encoded
   if any(ord(c) >= (1 << 8) for c in src):
-    tagged_code_units = int(len(encoded) / 2) | utf16_tag(cx.opts)
+    tagged_code_units = int(len(encoded) / 2) | UTF16_TAG
     return (ptr, tagged_code_units)
   latin1_size = int(len(encoded) / 2)
   for i in range(latin1_size):
@@ -2631,7 +2632,9 @@ def lower_error_context(cx, v):
 Lists and records are stored by recursively storing their elements and
 are symmetric to the loading functions. Unlike strings, lists can
 simply allocate based on the up-front knowledge of length and static
-element size.
+element size. Storing a list that exceeds the size of a 32-bit memory traps even
+when storing on 64-bit platform to avoid having interfaces that 32-bit
+components can't use.
 ```python
 def store_list(cx, v, ptr, elem_type, maybe_length):
   if maybe_length is not None:
@@ -2644,7 +2647,7 @@ def store_list(cx, v, ptr, elem_type, maybe_length):
 
 def store_list_into_range(cx, v, elem_type):
   byte_length = len(v) * elem_size(elem_type, cx.opts)
-  trap_if(byte_length >= (1 << (cx.opts.memory.ptr_size() * 8)))
+  trap_if(byte_length >= (1 << 32))
   ptr = cx.opts.realloc(0, 0, alignment(elem_type, cx.opts), byte_length)
   trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts)))
   trap_if(ptr + byte_length > len(cx.opts.memory.bytes))
diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index 178b0fc3..c06ee968 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -1267,8 +1267,7 @@ def load_string(cx, ptr) -> String:
   tagged_code_units = load_int(cx, ptr + cx.opts.memory.ptr_size(), cx.opts.memory.ptr_size())
   return load_string_from_range(cx, begin, tagged_code_units)
 
-def utf16_tag(opts):
-  return 1 << (opts.memory.ptr_size() * 8 - 1)
+UTF16_TAG = 1 << 31
 
 def load_string_from_range(cx, ptr, tagged_code_units) -> String:
   match cx.opts.string_encoding:
@@ -1282,8 +1281,8 @@ def load_string_from_range(cx, ptr, tagged_code_units) -> String:
       encoding = 'utf-16-le'
     case 'latin1+utf16':
       alignment = 2
-      if bool(tagged_code_units & utf16_tag(cx.opts)):
-        byte_length = 2 * (tagged_code_units ^ utf16_tag(cx.opts))
+      if bool(tagged_code_units & UTF16_TAG):
+        byte_length = 2 * (tagged_code_units ^ UTF16_TAG)
         encoding = 'utf-16-le'
       else:
         byte_length = tagged_code_units
@@ -1464,9 +1463,9 @@ def store_string_into_range(cx, v: String):
   src, src_encoding, src_tagged_code_units = v
 
   if src_encoding == 'latin1+utf16':
-    if bool(src_tagged_code_units & utf16_tag(cx.opts)):
+    if bool(src_tagged_code_units & UTF16_TAG):
       src_simple_encoding = 'utf16'
-      src_code_units = src_tagged_code_units ^ utf16_tag(cx.opts)
+      src_code_units = src_tagged_code_units ^ UTF16_TAG
     else:
       src_simple_encoding = 'latin1'
       src_code_units = src_tagged_code_units
@@ -1494,12 +1493,11 @@ def store_string_into_range(cx, v: String):
             case 'latin1'   : return store_string_copy(cx, src, src_code_units, 1, 2, 'latin-1')
             case 'utf16'    : return store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units)
 
-def max_string_byte_length(opts):
-  return (1 << (opts.memory.ptr_size() * 8 - 1)) - 1
+MAX_STRING_BYTE_LENGTH = (1 << 31) - 1
 
 def store_string_copy(cx, src, src_code_units, dst_code_unit_size, dst_alignment, dst_encoding):
   dst_byte_length = dst_code_unit_size * src_code_units
-  trap_if(dst_byte_length > max_string_byte_length(cx.opts))
+  trap_if(dst_byte_length > MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, dst_alignment, dst_byte_length)
   trap_if(ptr != align_to(ptr, dst_alignment))
   trap_if(ptr + dst_byte_length > len(cx.opts.memory.bytes))
@@ -1517,14 +1515,14 @@ def store_latin1_to_utf8(cx, src, src_code_units):
   return store_string_to_utf8(cx, src, src_code_units, worst_case_size)
 
 def store_string_to_utf8(cx, src, src_code_units, worst_case_size):
-  assert(src_code_units <= max_string_byte_length(cx.opts))
+  assert(src_code_units <= MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 1, src_code_units)
   trap_if(ptr + src_code_units > len(cx.opts.memory.bytes))
   for i,code_point in enumerate(src):
     if ord(code_point) < 2**7:
       cx.opts.memory.bytes[ptr + i] = ord(code_point)
     else:
-      trap_if(worst_case_size > max_string_byte_length(cx.opts))
+      trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
       ptr = cx.opts.realloc(ptr, src_code_units, 1, worst_case_size)
       trap_if(ptr + worst_case_size > len(cx.opts.memory.bytes))
       encoded = src.encode('utf-8')
@@ -1537,7 +1535,7 @@ def store_string_to_utf8(cx, src, src_code_units, worst_case_size):
 
 def store_utf8_to_utf16(cx, src, src_code_units):
   worst_case_size = 2 * src_code_units
-  trap_if(worst_case_size > max_string_byte_length(cx.opts))
+  trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 2, worst_case_size)
   trap_if(ptr != align_to(ptr, 2))
   trap_if(ptr + worst_case_size > len(cx.opts.memory.bytes))
@@ -1551,7 +1549,7 @@ def store_utf8_to_utf16(cx, src, src_code_units):
   return (ptr, code_units)
 
 def store_string_to_latin1_or_utf16(cx, src, src_code_units):
-  assert(src_code_units <= max_string_byte_length(cx.opts))
+  assert(src_code_units <= MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 2, src_code_units)
   trap_if(ptr != align_to(ptr, 2))
   trap_if(ptr + src_code_units > len(cx.opts.memory.bytes))
@@ -1562,7 +1560,7 @@ def store_string_to_latin1_or_utf16(cx, src, src_code_units):
       dst_byte_length += 1
     else:
       worst_case_size = 2 * src_code_units
-      trap_if(worst_case_size > max_string_byte_length(cx.opts))
+      trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
       ptr = cx.opts.realloc(ptr, src_code_units, 2, worst_case_size)
       trap_if(ptr != align_to(ptr, 2))
       trap_if(ptr + worst_case_size > len(cx.opts.memory.bytes))
@@ -1575,7 +1573,7 @@ def store_string_to_latin1_or_utf16(cx, src, src_code_units):
         ptr = cx.opts.realloc(ptr, worst_case_size, 2, len(encoded))
         trap_if(ptr != align_to(ptr, 2))
         trap_if(ptr + len(encoded) > len(cx.opts.memory.bytes))
-      tagged_code_units = int(len(encoded) / 2) | utf16_tag(cx.opts)
+      tagged_code_units = int(len(encoded) / 2) | UTF16_TAG
       return (ptr, tagged_code_units)
   if dst_byte_length < src_code_units:
     ptr = cx.opts.realloc(ptr, src_code_units, 2, dst_byte_length)
@@ -1585,14 +1583,14 @@ def store_string_to_latin1_or_utf16(cx, src, src_code_units):
 
 def store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units):
   src_byte_length = 2 * src_code_units
-  trap_if(src_byte_length > max_string_byte_length(cx.opts))
+  trap_if(src_byte_length > MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 2, src_byte_length)
   trap_if(ptr != align_to(ptr, 2))
   trap_if(ptr + src_byte_length > len(cx.opts.memory.bytes))
   encoded = src.encode('utf-16-le')
   cx.opts.memory.bytes[ptr : ptr+len(encoded)] = encoded
   if any(ord(c) >= (1 << 8) for c in src):
-    tagged_code_units = int(len(encoded) / 2) | utf16_tag(cx.opts)
+    tagged_code_units = int(len(encoded) / 2) | UTF16_TAG
     return (ptr, tagged_code_units)
   latin1_size = int(len(encoded) / 2)
   for i in range(latin1_size):
@@ -1615,7 +1613,7 @@ def store_list(cx, v, ptr, elem_type, maybe_length):
 
 def store_list_into_range(cx, v, elem_type):
   byte_length = len(v) * elem_size(elem_type, cx.opts)
-  trap_if(byte_length >= (1 << (cx.opts.memory.ptr_size() * 8)))
+  trap_if(byte_length >= (1 << 32))
   ptr = cx.opts.realloc(0, 0, alignment(elem_type, cx.opts), byte_length)
   trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts)))
   trap_if(ptr + byte_length > len(cx.opts.memory.bytes))
diff --git a/design/mvp/canonical-abi/run_tests.py b/design/mvp/canonical-abi/run_tests.py
index 9ee37a09..94af80c2 100644
--- a/design/mvp/canonical-abi/run_tests.py
+++ b/design/mvp/canonical-abi/run_tests.py
@@ -267,7 +267,7 @@ def test_string(src_encoding, dst_encoding, s, addr_type='i32'):
     except UnicodeEncodeError:
       pass
     encoded = s.encode('utf-16-le')
-    tagged_code_units = int(len(encoded) / 2) | utf16_tag(LiftLowerOptions(memory=MemInst(bytearray(), addr_type)))
+    tagged_code_units = int(len(encoded) / 2) | UTF16_TAG
     test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units, addr_type)
 
 encodings = ['utf8', 'utf16', 'latin1+utf16']

From 97d6301eb7945f5a60400fde71c333e8183e2ca4 Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Tue, 24 Mar 2026 16:36:02 +0000
Subject: [PATCH 17/25] replace PTR with memory.addrtype

---
 design/mvp/CanonicalABI.md | 15 +++----
 design/mvp/Concurrency.md  |  6 +--
 design/mvp/Explainer.md    | 80 +++++++++++++++++++-------------------
 3 files changed, 51 insertions(+), 50 deletions(-)

diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index d5b6085a..d1c5c56e 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -3217,12 +3217,11 @@ specifying `string-encoding=utf8` twice is an error. Each individual option, if
 present, is validated as such:
 
 * `string-encoding=N` - can be passed at most once, regardless of `N`.
-* `memory` - this is a subtype of `(memory 1)` or `(memory i64 1)`. In the rest
-  of the explainer, `PTR` will refer to either `i32` or `i64` core Wasm types
-  as determined by the type of this `memory`.
-* `realloc` - the function has type `(func (param PTR PTR PTR PTR) (result PTR))`
-  where `PTR` is `i32` or `i64` as described above.
-* if `realloc` is present, then `memory` must be present
+* `memory` - this is a subtype of `(memory 1)` or `(memory i64 1)`.
+* `realloc` - the function has type `(func (param addr addr addr addr) (result addr))`
+  where `addr` is the address type (`i32` or `i64`) coming from the [`memory type`]
+  of the `memory` canonopt.
+* If `realloc` is present then `memory` must be present.
 * `post-return` - only allowed on [`canon lift`](#canon-lift), which has rules
   for validation
 * 🔀 `async` - cannot be present with `post-return`
@@ -4269,7 +4268,9 @@ context switches. Next, the stream's `state` is updated based on the result
 being delivered to core wasm so that, once a stream end has been notified that
 the other end dropped, calling anything other than `stream.drop-*` traps.
 Lastly, `stream_event` packs the `CopyResult` and number of elements copied up
-until this point into a single `PTR`-sized payload for core wasm.
+until this point into a single `i32` or `i64`-sized payload for core wasm. The
+size is determined by the `addrtype` coming from the [`memory type`] of the
+`memory` immediate.
 ```python
   def stream_event(result, reclaim_buffer):
     reclaim_buffer()
diff --git a/design/mvp/Concurrency.md b/design/mvp/Concurrency.md
index a0202cf5..6fe3318e 100644
--- a/design/mvp/Concurrency.md
+++ b/design/mvp/Concurrency.md
@@ -957,7 +957,7 @@ and the asynchronous ABI has the signature:
 (func (param $f i32) (param $out-ptr i32) (result i32))
 ```
 where `$f` is the index of a future (not a pointer to one) while while
-`$out-ptr` is a pointer to a linear memory location that will receive an `i32` 
+`$out-ptr` is a pointer to a linear memory location that will receive an `i32`
 index.
 
 For the runtime semantics of this `i32` index, see `lift_stream`,
@@ -1040,8 +1040,8 @@ must also be exported with signature:
 
 The `(result i32)` has the same interpretation as the stackless export function
 and the runtime will repeatedly call the callback until a value of `0` is
-returned. The `i32` parameters describe what happened that caused the
-callback to be called again.
+returned. The `i32` parameters describe what happened that caused the callback
+to be called again.
 
 For a complete description of how async exports work, see [`canon_lift`] in the
 Canonical ABI Explainer.
diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index d7e2a076..80fe9766 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -1300,19 +1300,18 @@ default is `utf8`. It is a validation error to include more than one
 The `(memory ...)` option specifies the memory that the Canonical ABI will
 use to load and store values. If the Canonical ABI needs to load or store,
 validation requires this option to be present (there is no default). The types
-of lowered functions may also depend on whether this memory is a 32-bit or
-64-bit memory if pointers are transitively contained in parameters or results.
-In what follows the notation `PTR` will refer to the core Wasm type `i32` or
-`i64` corresponding to the type of the `(memory ...)` option.
+of lowered functions may also depend on the [`core:memory-type`] of this memory,
+specifically it's [`core:address-type`] (indicated by `memory.addrtype`), if pointers
+are transitively contained in parameters or results.
 
 The `(realloc ...)` option specifies a core function that is validated to
 have the following core function type:
 ```wat
-(func (param $originalPtr PTR)
-      (param $originalSize PTR)
-      (param $alignment PTR)
-      (param $newSize PTR)
-      (result PTR))
+(func (param $originalPtr memory.addrtype)
+      (param $originalSize memory.addrtype)
+      (param $alignment memory.addrtype)
+      (param $newSize memory.addrtype)
+      (result memory.addrtype))
 ```
 The Canonical ABI will use `realloc` both to allocate (passing `0` for the first
 two parameters) and reallocate. If the Canonical ABI needs `realloc`, validation
@@ -1339,10 +1338,9 @@ validated to have the following core function type:
 ```wat
 (func (param $ctx i32)
       (param $event i32)
-      (param $payload PTR)
+      (param $payload i32)
       (result $done i32))
 ```
-where `PTR` is determined by the `memory` canonopt as described above.
 Again, see the [concurrency explainer] for more details.
 
 Based on this description of the AST, the [Canonical ABI explainer] gives a
@@ -1675,10 +1673,10 @@ For details, see [Waitables and Waitable Sets] in the concurrency explainer and
 
 ###### 🔀 `waitable-set.wait`
 
-| Synopsis                   |                                                |
-| -------------------------- | ---------------------------------------------- |
-| Approximate WIT signature  | `func<cancellable?>(s: waitable-set) -> event` |
-| Canonical ABI signature    | `[s:i32 payload-addr:PTR] -> [event-code:i32]` |
+| Synopsis                   |                                                            |
+| -------------------------- | ---------------------------------------------------------- |
+| Approximate WIT signature  | `func<cancellable?,memory>(s: waitable-set) -> event`      |
+| Canonical ABI signature    | `[s:i32 payload-addr:memory.addrtype] -> [event-code:i32]` |
 
 where `event` is defined in WIT as:
 ```wit
@@ -1740,10 +1738,10 @@ For details, see [Waitables and Waitable Sets] in the concurrency explainer and
 
 ###### 🔀 `waitable-set.poll`
 
-| Synopsis                   |                                                |
-| -------------------------- | ---------------------------------------------- |
-| Approximate WIT signature  | `func<cancellable?>(s: waitable-set) -> event` |
-| Canonical ABI signature    | `[s:i32 payload-addr:PTR] -> [event-code:i32]` |
+| Synopsis                   |                                                            |
+| -------------------------- | ---------------------------------------------------------- |
+| Approximate WIT signature  | `func<cancellable?,memory>(s: waitable-set) -> event`      |
+| Canonical ABI signature    | `[s:i32 payload-addr:memory.addrtype] -> [event-code:i32]` |
 
 where `event` is defined as in [`waitable-set.wait`](#-waitable-setwait).
 
@@ -1857,11 +1855,11 @@ For details, see [Streams and Futures] in the concurrency explainer and
 
 ###### 🔀 `stream.read` and `stream.write`
 
-| Synopsis                                     |                                                                                                 |
-| -------------------------------------------- | ----------------------------------------------------------------------------------------------- |
-| Approximate WIT signature for `stream.read`  | `func<stream<T?>>(e: readable-stream-end<T?>, b: writable-buffer<T>?) -> option<stream-result>` |
-| Approximate WIT signature for `stream.write` | `func<stream<T?>>(e: writable-stream-end<T?>, b: readable-buffer<T>?) -> option<stream-result>` |
-| Canonical ABI signature                      | `[stream-end:i32 ptr:PTR num:PTR] -> [PTR]`                                               |
+| Synopsis                                     |                                                                                                             |
+| -------------------------------------------- | ------------------------------------------------------------------------------------------------------ |
+| Approximate WIT signature for `stream.read`  | `func<stream<T?>,memory>(e: readable-stream-end<T?>, b: writable-buffer<T>?) -> option<stream-result>` |
+| Approximate WIT signature for `stream.write` | `func<stream<T?>,memory>(e: writable-stream-end<T?>, b: readable-buffer<T>?) -> option<stream-result>` |
+| Canonical ABI signature                      | `[stream-end:i32 ptr:memory.addrtype num:memory.addrtype] -> [memory.addrtype]`                        |
 
 where `stream-result` is defined in WIT as:
 ```wit
@@ -1917,10 +1915,10 @@ any subsequent operation on the stream other than `stream.drop-{readable,writabl
 traps.
 
 In the Canonical ABI, the `{readable,writable}-stream-end` is passed as an
-`i32` index into the component instance's table followed by a pair of `PTR`s
+`i32` index into the component instance's table followed by a pair of `memory.addrtype`s
 describing the linear memory offset and size-in-elements of the
 `{readable,writable}-buffer<T>`. The `option<stream-result>` return value is
-bit-packed into a single `PTR` where:
+bit-packed into a single `memory.addrtype` where:
 * all-ones represents `none`.
 * Otherwise, the `result` is in the low 4 bits and the `progress` is in the
   remaining high bits.
@@ -1930,11 +1928,11 @@ For details, see [Streams and Futures] in the concurrency explainer and
 
 ###### 🔀 `future.read` and `future.write`
 
-| Synopsis                                     |                                                                                                          |
-| -------------------------------------------- | -------------------------------------------------------------------------------------------------------- |
-| Approximate WIT signature for `future.read`  | `func<future<T?>>(e: readable-future-end<T?>, b: writable-buffer<T; 1>?) -> option<future-read-result>`  |
-| Approximate WIT signature for `future.write` | `func<future<T?>>(e: writable-future-end<T?>, v: readable-buffer<T; 1>?) -> option<future-write-result>` |
-| Canonical ABI signature                      | `[readable-future-end:i32 ptr:PTR] -> [i32]`                                                           |
+| Synopsis                                     |                                                                                                                 |
+| -------------------------------------------- | --------------------------------------------------------------------------------------------------------------- |
+| Approximate WIT signature for `future.read`  | `func<future<T?>,memory>(e: readable-future-end<T?>, b: writable-buffer<T; 1>?) -> option<future-read-result>`  |
+| Approximate WIT signature for `future.write` | `func<future<T?>,memory>(e: writable-future-end<T?>, v: readable-buffer<T; 1>?) -> option<future-write-result>` |
+| Canonical ABI signature                      | `[readable-future-end:i32 ptr:memory.addrtype] -> [i32]`                                                        |
 
 where `future-{read,write}-result` are defined in WIT as:
 ```wit
@@ -1985,7 +1983,7 @@ called before successfully writing a value.
 
 In the Canonical ABI, the `{readable,writable}-future-end` is passed as an
 `i32` index into the component instance's table followed by a single
-`PTR` describing the linear memory offset of the
+`memory.addrtype` describing the linear memory offset of the
 `{readable,writable}-buffer<T; 1>`. The `option<future-{read,write}-result>`
 return value is bit-packed into the single `i32` return value where all-ones
 represents `none`. And, `future-read-result.cancelled` is encoded
@@ -2254,10 +2252,10 @@ explainer.
 
 ###### 📝 `error-context.new`
 
-| Synopsis                         |                                          |
-| -------------------------------- | ---------------------------------------- |
-| Approximate WIT signature        | `func(message: string) -> error-context` |
-| Canonical ABI signature          | `[ptr:PTR len:PTR] -> [i32]`         |
+| Synopsis                         |                                                      |
+| -------------------------------- | ---------------------------------------------------- |
+| Approximate WIT signature        | `func<memory>(message: string) -> error-context`     |
+| Canonical ABI signature          | `[ptr:memory.addrtype len:memory.addrtype] -> [i32]` |
 
 The `error-context.new` built-in returns a new `error-context` value. The given
 string is non-deterministically transformed to produce the `error-context`'s
@@ -2270,10 +2268,10 @@ For details, see [`canon_error_context_new`] in the Canonical ABI explainer.
 
 ###### 📝 `error-context.debug-message`
 
-| Synopsis                         |                                         |
-| -------------------------------- | --------------------------------------- |
-| Approximate WIT signature        | `func(errctx: error-context) -> string` |
-| Canonical ABI signature          | `[errctxi:i32 ptr:PTR] -> []`         |
+| Synopsis                         |                                                 |
+| -------------------------------- | ----------------------------------------------- |
+| Approximate WIT signature        | `func<memory>(errctx: error-context) -> string` |
+| Canonical ABI signature          | `[errctxi:i32 ptr:memory.addrtype] -> []`       |
 
 The `error-context.debug-message` built-in returns the
 [debug message](#error-context-type) of the given `error-context`.
@@ -3176,6 +3174,8 @@ For some use-case-focused, worked examples, see:
 [func-import-abbrev]: https://webassembly.github.io/spec/core/text/modules.html#text-func-abbrev
 [`core:version`]: https://webassembly.github.io/spec/core/binary/modules.html#binary-version
 [`core:tableidx`]: https://webassembly.github.io/spec/core/syntax/modules.html#syntax-tableidx
+[`core:address-type`]: https://webassembly.github.io/spec/core/syntax/types.html#address-types
+[`core:memory-type`]: https://webassembly.github.io/spec/core/syntax/types.html#memory-types
 [`core:table-type`]: https://webassembly.github.io/spec/core/syntax/types.html#table-types
 
 [Embedder]: https://webassembly.github.io/spec/core/appendix/embedding.html

From 322c1bad4f3a274eda7a30c5f2289e5194f82ee6 Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Wed, 25 Mar 2026 07:25:16 +0000
Subject: [PATCH 18/25] note on buffer size

---
 design/mvp/CanonicalABI.md | 9 ++++++---
 design/mvp/Concurrency.md  | 4 ----
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index d1c5c56e..9289dcac 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -1336,8 +1336,9 @@ that returns how many `t` values may still be read or written. Buffers mostly
 hide their original/complete size. However, zero-length buffers need to be
 treated specially (particularly when a zero-length read rendezvous with a
 zero-length write), so there is a special query for detecting whether a buffer
-is zero-length. Based on this, buffers are represented by the following 3
-abstract Python classes:
+is zero-length. Internally, buffers do have a maximum length of `2^28 - 1` which
+is independent of the type of memory backing the buffer. Based on this, buffers
+are represented by the following 3 abstract Python classes:
 ```python
 class Buffer:
   MAX_LENGTH = 2**28 - 1
@@ -4270,7 +4271,9 @@ the other end dropped, calling anything other than `stream.drop-*` traps.
 Lastly, `stream_event` packs the `CopyResult` and number of elements copied up
 until this point into a single `i32` or `i64`-sized payload for core wasm. The
 size is determined by the `addrtype` coming from the [`memory type`] of the
-`memory` immediate.
+`memory` immediate. Note that even though the number of elements copied is
+packed into an `addrtype`, the maximum length of the buffer is fixed at `2^28 - 1`
+independently of the `addrtype`.
 ```python
   def stream_event(result, reclaim_buffer):
     reclaim_buffer()
diff --git a/design/mvp/Concurrency.md b/design/mvp/Concurrency.md
index 6fe3318e..13f30243 100644
--- a/design/mvp/Concurrency.md
+++ b/design/mvp/Concurrency.md
@@ -436,10 +436,6 @@ stackless async ABI is used, returning the "exit" code to the event loop. This
 non-reuse of thread-local storage between distinct export calls avoids what
 would otherwise be a likely source of TLS-related memory leaks.
 
-When [wasm-gc] is integrated into the Canonical ABI, `context.{get,set}` will be
-relaxed so that these integral context values can serve as indices into
-guest-managed tables of typed GC references.
-
 Since the same mutable thread-local storage cells are shared by all core wasm
 running under the same thread in the same component, the cells' contents must
 be carefully coordinated in the same way as native code has to carefully

From f834bbbdd4f51ff4f70b867d09bb01abb1d7e34e Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Wed, 25 Mar 2026 08:19:12 +0000
Subject: [PATCH 19/25] clarify address type

---
 design/mvp/CanonicalABI.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index 9289dcac..9227df44 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -1319,7 +1319,7 @@ been allowed to resolve and explicitly relinquish any borrowed handles.
 
 A "buffer" is an abstract region of memory that can either be read-from or
 written-to. This region of memory can either be owned by the host or by wasm.
-Currently wasm memory is always 32-bit linear memory, but soon 64-bit and GC
+Currently wasm memory is always 32-bit or 64-bit linear memory, but soon GC
 memory will be added. Thus, buffers provide an abstraction over at least 4
 different "kinds" of memory.
 
@@ -4203,8 +4203,8 @@ For canonical definitions:
 In addition to [general validation of `$opts`](#canonopt-validation) validation
 specifies:
 * `$f` is given type `(func (param i32 T T) (result T))` where `T` is `i32` or
-  `i64` as determined by the `memory` from `$opts` (or `i32` by default if no 
-  `memory` is present).
+  `i64` as determined by the address type of `memory` from `$opts` (or `i32` by
+  default if no `memory` is present).
 * `$stream_t` must be a type of the form `(stream $t?)`
 * If `$t` is present:
   * [`lower($t)` above](#canonopt-validation) defines required options for `stream.write`
@@ -4324,8 +4324,8 @@ For canonical definitions:
 In addition to [general validation of `$opts`](#canonopt-validation) validation
 specifies:
 * `$f` is given type `(func (param i32 T) (result i32))` where `T` is `i32` or
-  `i64` as determined by the `memory` from `$opts` (or `i32` by default if no 
-  `memory` is present).
+  `i64` as determined by the address type of `memory` from `$opts` (or `i32`
+  by default if no `memory` is present).
 * `$future_t` must be a type of the form `(future $t?)`
 * If `$t` is present:
   * [`lift($t)` above](#canonopt-validation) defines required options for `future.read`
@@ -4750,7 +4750,7 @@ For a canonical definition:
 validation specifies:
 * `$f` is given type `(func (param $ptr) (param $units) (result i32))` 
   where `$ptr` and `$units` are both `i32` or `i64` as determined by
-  the `memory` field in `$opts`.
+  the address type of the `memory` field in `$opts`.
 * `async` is not present
 * `memory` must be present
 
@@ -4792,7 +4792,7 @@ For a canonical definition:
 ```
 validation specifies:
 * `$f` is given type `(func (param i32) (param $ptr))` where `$ptr` is `i32` or `i64`
-  as determined by the `memory` from `$opts`
+  as determined by the address type of `memory` from `$opts`
 * `async` is not present
 * `memory` must be present
 * `realloc` must be present

From 8ab246a6857908df0fde640d9c7f16853e86ebbb Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Wed, 25 Mar 2026 09:25:46 +0000
Subject: [PATCH 20/25] mixed context types behavior

---
 design/mvp/CanonicalABI.md              |  9 +++++++--
 design/mvp/Concurrency.md               | 21 ++++++++++++++-------
 design/mvp/Explainer.md                 | 14 ++++++++++++++
 design/mvp/canonical-abi/definitions.py |  7 ++++++-
 4 files changed, 41 insertions(+), 10 deletions(-)

diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index 9227df44..da642ee6 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -3752,12 +3752,17 @@ validation specifies:
 * `$f` is given type `(func (result $t))`
 
 Calling `$f` invokes the following function, which reads the [thread-local
-storage] of the [current thread]:
+storage] of the [current thread] (taking only the low 32-bits if `$t` is `i32`):
 ```python
 def canon_context_get(t, i, thread):
+  MASK_32BIT = 1 << 32 - 1
+
   assert(t == 'i32' or t == 'i64')
   assert(i < Thread.CONTEXT_LENGTH)
-  return [thread.context[i]]
+  result = thread.context[i]
+  if t == 'i32':
+    result &= MASK_32BIT
+  return [result]
 ```
 
 
diff --git a/design/mvp/Concurrency.md b/design/mvp/Concurrency.md
index 13f30243..2a49aeb5 100644
--- a/design/mvp/Concurrency.md
+++ b/design/mvp/Concurrency.md
@@ -414,17 +414,24 @@ current thread's thread-local storage can be read and written from core wasm
 code by calling the [`context.get`] and [`context.set`] built-ins.
 
 The thread-local storage array's length is currently fixed to contain exactly 2
-`i32`s or `i64`s with the goal of allowing this array to be stored inline in
-whatever existing runtime data structure is already efficiently reachable from
-ambient compiled wasm code. Because module instantiation is declarative in the
-Component Model, the imported `context.{get,set}` built-ins can be inlined by
-the core wasm compiler as-if they were instructions, allowing the generated
-machine code to be a single load or store. This makes thread-local storage a
-natural place to store:
+`i64`s with the goal of allowing this array to be stored inline in whatever
+existing runtime data structure is already efficiently reachable from ambient
+compiled wasm code. Because module instantiation is declarative in the Component
+Model, the imported `context.{get,set}` built-ins can be inlined by the core
+wasm compiler as-if they were instructions, allowing the generated machine code
+to be a single load or store. This makes thread-local storage a natural place to
+store:
 1. a pointer to the linear-memory "shadow stack" pointer
 2. a pointer to a struct used by the runtime to implement the language's
    thread-local features
 
+Both of `context.{get,set}` take an immediate argument of `i32` or `i64` to
+indicate the return or argument type. `context.set i32` will zero the high
+bits of the stored value and `context.get i32` will only read the low bits of
+the stored value.  Generally it is expected that 32-bit components always use
+the `i32` immediate and 64-bit components always use the `i64` immediate, but
+mixing these calls is still valid.
+
 When threads are created explicitly by `thread.new-indirect`, the lifetime of
 the thread-local storage array ends when the function passed to
 `thread.new-indirect` returns and thus any linear-memory allocations associated
diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index 80fe9766..8e005fd4 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -1564,6 +1564,13 @@ The `context.get` built-in returns the `i`th element of the [current thread]'s
 than 2 and `T` to be `i32` or `i64`, but these restrictions may be relaxed in
 the future.
 
+Mixing `i32` and `i64` results in truncating or unsigned extending the
+stored values:
+* If `context.get i32 i` is called after `context.set i64 i v`,
+  only the low 32-bits are read (returning `i32.wrap_i64 v`).
+* If `context.get i64 i` is called after `context.set i32 i v`,
+  only the upper 32-bits will be zeroed (returning `i64.extend_i32_u v`).
+
 For details, see [Thread-Local Storage] in the concurrency explainer and
 [`canon_context_get`] in the Canonical ABI explainer.
 
@@ -1579,6 +1586,13 @@ The `context.set` built-in sets the `i`th element of the [current thread]'s
 `i` to be less than 2 and `T` to be `i32` or `i64`, but these restrictions may
 be relaxed in the future.
 
+Mixing `i32` and `i64` results in truncating or unsigned extending the
+stored values:
+* If `context.get i32 i` is called after `context.set i64 i v`,
+  only the low 32-bits are read (returning `i32.wrap_i64 v`).
+* If `context.get i64 i` is called after `context.set i32 i v`,
+  only the upper 32-bits will be zeroed (returning `i64.extend_i32_u v`).
+
 For details, see [Thread-Local Storage] in the concurrency explainer and
 [`canon_context_set`] in the Canonical ABI explainer.
 
diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index c06ee968..c783df40 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -2195,9 +2195,14 @@ def canon_resource_rep(rt, thread, i):
 ### 🔀 `canon context.get`
 
 def canon_context_get(t, i, thread):
+  MASK_32BIT = 1 << 32 - 1
+
   assert(t == 'i32' or t == 'i64')
   assert(i < Thread.CONTEXT_LENGTH)
-  return [thread.context[i]]
+  result = thread.context[i]
+  if t == 'i32':
+    result &= MASK_32BIT
+  return [result]
 
 ### 🔀 `canon context.set`
 

From 4fd7d8bd19b544dcfc6f6154119984435203d29c Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Wed, 25 Mar 2026 13:26:51 +0000
Subject: [PATCH 21/25] fix 32 bit mask

---
 design/mvp/canonical-abi/definitions.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index c783df40..4080fd8a 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -2195,7 +2195,7 @@ def canon_resource_rep(rt, thread, i):
 ### 🔀 `canon context.get`
 
 def canon_context_get(t, i, thread):
-  MASK_32BIT = 1 << 32 - 1
+  MASK_32BIT = (1 << 32) - 1
 
   assert(t == 'i32' or t == 'i64')
   assert(i < Thread.CONTEXT_LENGTH)

From 50d68b6549b0ac851fc8ca62570776e44a32209e Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Wed, 25 Mar 2026 14:01:36 +0000
Subject: [PATCH 22/25] transparent indexing on MemInst

---
 design/mvp/CanonicalABI.md              | 77 +++++++++++++-----------
 design/mvp/canonical-abi/definitions.py | 78 ++++++++++++++-----------
 design/mvp/canonical-abi/run_tests.py   |  2 +-
 3 files changed, 87 insertions(+), 70 deletions(-)

diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index da642ee6..3ea4873c 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -248,6 +248,15 @@ class MemInst:
   bytes: bytearray
   addrtype: Literal['i32', 'i64']
 
+  def __getitem__(self, i):
+    return self.bytes[i]
+
+  def __setitem__(self, i, v):
+    self.bytes[i] = v
+
+  def __len__(self):
+    return len(self.bytes)
+
   def ptr_type(self):
     return self.addrtype
 
@@ -1381,7 +1390,7 @@ class BufferGuestImpl(Buffer):
     trap_if(length > Buffer.MAX_LENGTH)
     if t and length > 0:
       trap_if(ptr != align_to(ptr, alignment(t, cx.opts)))
-      trap_if(ptr + length * elem_size(t, cx.opts) > len(cx.opts.memory.bytes))
+      trap_if(ptr + length * elem_size(t, cx.opts) > len(cx.opts.memory))
     self.cx = cx
     self.t = t
     self.ptr = ptr
@@ -2033,7 +2042,7 @@ the top-level case analysis:
 ```python
 def load(cx, ptr, t):
   assert(ptr == align_to(ptr, alignment(t, cx.opts)))
-  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory.bytes))
+  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory))
   match despecialize(t):
     case BoolType()         : return convert_int_to_bool(load_int(cx, ptr, 1))
     case U8Type()           : return load_int(cx, ptr, 1)
@@ -2063,7 +2072,7 @@ Integers are loaded directly from memory, with their high-order bit interpreted
 according to the signedness of the type.
 ```python
 def load_int(cx, ptr, nbytes, signed = False):
-  return int.from_bytes(cx.opts.memory.bytes[ptr : ptr+nbytes], 'little', signed = signed)
+  return int.from_bytes(cx.opts.memory[ptr : ptr+nbytes], 'little', signed = signed)
 ```
 
 Integer-to-boolean conversions treats `0` as `false` and all other bit-patterns
@@ -2166,9 +2175,9 @@ def load_string_from_range(cx, ptr, tagged_code_units) -> String:
         encoding = 'latin-1'
 
   trap_if(ptr != align_to(ptr, alignment))
-  trap_if(ptr + byte_length > len(cx.opts.memory.bytes))
+  trap_if(ptr + byte_length > len(cx.opts.memory))
   try:
-    s = cx.opts.memory.bytes[ptr : ptr+byte_length].decode(encoding)
+    s = cx.opts.memory[ptr : ptr+byte_length].decode(encoding)
   except UnicodeError:
     trap()
 
@@ -2195,7 +2204,7 @@ def load_list(cx, ptr, elem_type, maybe_length):
 
 def load_list_from_range(cx, ptr, length, elem_type):
   trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts)))
-  trap_if(ptr + length * elem_size(elem_type, cx.opts) > len(cx.opts.memory.bytes))
+  trap_if(ptr + length * elem_size(elem_type, cx.opts) > len(cx.opts.memory))
   return load_list_from_valid_range(cx, ptr, length, elem_type)
 
 def load_list_from_valid_range(cx, ptr, length, elem_type):
@@ -2321,7 +2330,7 @@ The `store` function defines how to write a value `v` of a given value type
 ```python
 def store(cx, v, t, ptr):
   assert(ptr == align_to(ptr, alignment(t, cx.opts)))
-  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory.bytes))
+  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory))
   match despecialize(t):
     case BoolType()         : store_int(cx, int(bool(v)), ptr, 1)
     case U8Type()           : store_int(cx, v, ptr, 1)
@@ -2353,7 +2362,7 @@ the `signed` parameter is only present to ensure that the internal range checks
 of `int.to_bytes` are satisfied.
 ```python
 def store_int(cx, v, ptr, nbytes, signed = False):
-  cx.opts.memory.bytes[ptr : ptr+nbytes] = int.to_bytes(v, nbytes, 'little', signed = signed)
+  cx.opts.memory[ptr : ptr+nbytes] = int.to_bytes(v, nbytes, 'little', signed = signed)
 ```
 
 Floats are stored directly into memory, with the sign and payload bits of NaN
@@ -2487,10 +2496,10 @@ def store_string_copy(cx, src, src_code_units, dst_code_unit_size, dst_alignment
   trap_if(dst_byte_length > MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, dst_alignment, dst_byte_length)
   trap_if(ptr != align_to(ptr, dst_alignment))
-  trap_if(ptr + dst_byte_length > len(cx.opts.memory.bytes))
+  trap_if(ptr + dst_byte_length > len(cx.opts.memory))
   encoded = src.encode(dst_encoding)
   assert(dst_byte_length == len(encoded))
-  cx.opts.memory.bytes[ptr : ptr+len(encoded)] = encoded
+  cx.opts.memory[ptr : ptr+len(encoded)] = encoded
   return (ptr, src_code_units)
 ```
 The `MAX_STRING_BYTE_LENGTH` constant ensures that the high bit of a
@@ -2512,19 +2521,19 @@ def store_latin1_to_utf8(cx, src, src_code_units):
 def store_string_to_utf8(cx, src, src_code_units, worst_case_size):
   assert(src_code_units <= MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 1, src_code_units)
-  trap_if(ptr + src_code_units > len(cx.opts.memory.bytes))
+  trap_if(ptr + src_code_units > len(cx.opts.memory))
   for i,code_point in enumerate(src):
     if ord(code_point) < 2**7:
-      cx.opts.memory.bytes[ptr + i] = ord(code_point)
+      cx.opts.memory[ptr + i] = ord(code_point)
     else:
       trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
       ptr = cx.opts.realloc(ptr, src_code_units, 1, worst_case_size)
-      trap_if(ptr + worst_case_size > len(cx.opts.memory.bytes))
+      trap_if(ptr + worst_case_size > len(cx.opts.memory))
       encoded = src.encode('utf-8')
-      cx.opts.memory.bytes[ptr+i : ptr+len(encoded)] = encoded[i : ]
+      cx.opts.memory[ptr+i : ptr+len(encoded)] = encoded[i : ]
       if worst_case_size > len(encoded):
         ptr = cx.opts.realloc(ptr, worst_case_size, 1, len(encoded))
-        trap_if(ptr + len(encoded) > len(cx.opts.memory.bytes))
+        trap_if(ptr + len(encoded) > len(cx.opts.memory))
       return (ptr, len(encoded))
   return (ptr, src_code_units)
 ```
@@ -2539,13 +2548,13 @@ def store_utf8_to_utf16(cx, src, src_code_units):
   trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 2, worst_case_size)
   trap_if(ptr != align_to(ptr, 2))
-  trap_if(ptr + worst_case_size > len(cx.opts.memory.bytes))
+  trap_if(ptr + worst_case_size > len(cx.opts.memory))
   encoded = src.encode('utf-16-le')
-  cx.opts.memory.bytes[ptr : ptr+len(encoded)] = encoded
+  cx.opts.memory[ptr : ptr+len(encoded)] = encoded
   if len(encoded) < worst_case_size:
     ptr = cx.opts.realloc(ptr, worst_case_size, 2, len(encoded))
     trap_if(ptr != align_to(ptr, 2))
-    trap_if(ptr + len(encoded) > len(cx.opts.memory.bytes))
+    trap_if(ptr + len(encoded) > len(cx.opts.memory))
   code_units = int(len(encoded) / 2)
   return (ptr, code_units)
 ```
@@ -2563,33 +2572,33 @@ def store_string_to_latin1_or_utf16(cx, src, src_code_units):
   assert(src_code_units <= MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 2, src_code_units)
   trap_if(ptr != align_to(ptr, 2))
-  trap_if(ptr + src_code_units > len(cx.opts.memory.bytes))
+  trap_if(ptr + src_code_units > len(cx.opts.memory))
   dst_byte_length = 0
   for usv in src:
     if ord(usv) < (1 << 8):
-      cx.opts.memory.bytes[ptr + dst_byte_length] = ord(usv)
+      cx.opts.memory[ptr + dst_byte_length] = ord(usv)
       dst_byte_length += 1
     else:
       worst_case_size = 2 * src_code_units
       trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
       ptr = cx.opts.realloc(ptr, src_code_units, 2, worst_case_size)
       trap_if(ptr != align_to(ptr, 2))
-      trap_if(ptr + worst_case_size > len(cx.opts.memory.bytes))
+      trap_if(ptr + worst_case_size > len(cx.opts.memory))
       for j in range(dst_byte_length-1, -1, -1):
-        cx.opts.memory.bytes[ptr + 2*j] = cx.opts.memory.bytes[ptr + j]
-        cx.opts.memory.bytes[ptr + 2*j + 1] = 0
+        cx.opts.memory[ptr + 2*j] = cx.opts.memory[ptr + j]
+        cx.opts.memory[ptr + 2*j + 1] = 0
       encoded = src.encode('utf-16-le')
-      cx.opts.memory.bytes[ptr+2*dst_byte_length : ptr+len(encoded)] = encoded[2*dst_byte_length : ]
+      cx.opts.memory[ptr+2*dst_byte_length : ptr+len(encoded)] = encoded[2*dst_byte_length : ]
       if worst_case_size > len(encoded):
         ptr = cx.opts.realloc(ptr, worst_case_size, 2, len(encoded))
         trap_if(ptr != align_to(ptr, 2))
-        trap_if(ptr + len(encoded) > len(cx.opts.memory.bytes))
+        trap_if(ptr + len(encoded) > len(cx.opts.memory))
       tagged_code_units = int(len(encoded) / 2) | UTF16_TAG
       return (ptr, tagged_code_units)
   if dst_byte_length < src_code_units:
     ptr = cx.opts.realloc(ptr, src_code_units, 2, dst_byte_length)
     trap_if(ptr != align_to(ptr, 2))
-    trap_if(ptr + dst_byte_length > len(cx.opts.memory.bytes))
+    trap_if(ptr + dst_byte_length > len(cx.opts.memory))
   return (ptr, dst_byte_length)
 ```
 
@@ -2609,17 +2618,17 @@ def store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units):
   trap_if(src_byte_length > MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 2, src_byte_length)
   trap_if(ptr != align_to(ptr, 2))
-  trap_if(ptr + src_byte_length > len(cx.opts.memory.bytes))
+  trap_if(ptr + src_byte_length > len(cx.opts.memory))
   encoded = src.encode('utf-16-le')
-  cx.opts.memory.bytes[ptr : ptr+len(encoded)] = encoded
+  cx.opts.memory[ptr : ptr+len(encoded)] = encoded
   if any(ord(c) >= (1 << 8) for c in src):
     tagged_code_units = int(len(encoded) / 2) | UTF16_TAG
     return (ptr, tagged_code_units)
   latin1_size = int(len(encoded) / 2)
   for i in range(latin1_size):
-    cx.opts.memory.bytes[ptr + i] = cx.opts.memory.bytes[ptr + 2*i]
+    cx.opts.memory[ptr + i] = cx.opts.memory[ptr + 2*i]
   ptr = cx.opts.realloc(ptr, src_byte_length, 1, latin1_size)
-  trap_if(ptr + latin1_size > len(cx.opts.memory.bytes))
+  trap_if(ptr + latin1_size > len(cx.opts.memory))
   return (ptr, latin1_size)
 ```
 
@@ -2651,7 +2660,7 @@ def store_list_into_range(cx, v, elem_type):
   trap_if(byte_length >= (1 << 32))
   ptr = cx.opts.realloc(0, 0, alignment(elem_type, cx.opts), byte_length)
   trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts)))
-  trap_if(ptr + byte_length > len(cx.opts.memory.bytes))
+  trap_if(ptr + byte_length > len(cx.opts.memory))
   store_list_into_valid_range(cx, v, ptr, elem_type)
   return (ptr, len(v))
 
@@ -3162,7 +3171,7 @@ def lift_flat_values(cx, max_flat, vi, ts):
     ptr = vi.next(cx.opts.memory.ptr_type())
     tuple_type = TupleType(ts)
     trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts)))
-    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory.bytes))
+    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory))
     return list(load(cx, ptr, tuple_type).values())
   else:
     return [ lift_flat(cx, vi, t) for t in ts ]
@@ -3188,7 +3197,7 @@ def lower_flat_values(cx, max_flat, vs, ts, out_param = None):
       ptr = out_param.next(cx.opts.memory.ptr_type())
       flat_vals = []
     trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts)))
-    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory.bytes))
+    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory))
     store(cx, tuple_value, tuple_type, ptr)
   else:
     flat_vals = []
@@ -3755,7 +3764,7 @@ Calling `$f` invokes the following function, which reads the [thread-local
 storage] of the [current thread] (taking only the low 32-bits if `$t` is `i32`):
 ```python
 def canon_context_get(t, i, thread):
-  MASK_32BIT = 1 << 32 - 1
+  MASK_32BIT = (1 << 32) - 1
 
   assert(t == 'i32' or t == 'i64')
   assert(i < Thread.CONTEXT_LENGTH)
diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index 4080fd8a..7395210a 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -229,11 +229,21 @@ def __init__(self, opts, inst, borrow_scope = None):
 
 
 ### Canonical ABI Options
+
 @dataclass
 class MemInst:
   bytes: bytearray
   addrtype: Literal['i32', 'i64']
 
+  def __getitem__(self, i):
+    return self.bytes[i]
+
+  def __setitem__(self, i, v):
+    self.bytes[i] = v
+
+  def __len__(self):
+    return len(self.bytes)
+
   def ptr_type(self):
     return self.addrtype
 
@@ -246,7 +256,6 @@ def equal(lhs, rhs):
     return lhs.bytes == rhs.bytes and \
            lhs.addrtype == rhs.addrtype
 
-
 @dataclass
 class LiftOptions:
   string_encoding: str = 'utf8'
@@ -256,7 +265,6 @@ def equal(lhs, rhs):
     return lhs.string_encoding == rhs.string_encoding and \
            lhs.memory is rhs.memory
 
-
 @dataclass
 class LiftLowerOptions(LiftOptions):
   realloc: Optional[Callable] = None
@@ -794,7 +802,7 @@ def __init__(self, t, cx, ptr, length):
     trap_if(length > Buffer.MAX_LENGTH)
     if t and length > 0:
       trap_if(ptr != align_to(ptr, alignment(t, cx.opts)))
-      trap_if(ptr + length * elem_size(t, cx.opts) > len(cx.opts.memory.bytes))
+      trap_if(ptr + length * elem_size(t, cx.opts) > len(cx.opts.memory))
     self.cx = cx
     self.t = t
     self.ptr = ptr
@@ -1194,7 +1202,7 @@ def elem_size_flags(labels):
 
 def load(cx, ptr, t):
   assert(ptr == align_to(ptr, alignment(t, cx.opts)))
-  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory.bytes))
+  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory))
   match despecialize(t):
     case BoolType()         : return convert_int_to_bool(load_int(cx, ptr, 1))
     case U8Type()           : return load_int(cx, ptr, 1)
@@ -1220,7 +1228,7 @@ def load(cx, ptr, t):
     case FutureType(t)      : return lift_future(cx, load_int(cx, ptr, 4), t)
 
 def load_int(cx, ptr, nbytes, signed = False):
-  return int.from_bytes(cx.opts.memory.bytes[ptr : ptr+nbytes], 'little', signed = signed)
+  return int.from_bytes(cx.opts.memory[ptr : ptr+nbytes], 'little', signed = signed)
 
 def convert_int_to_bool(i):
   assert(i >= 0)
@@ -1289,9 +1297,9 @@ def load_string_from_range(cx, ptr, tagged_code_units) -> String:
         encoding = 'latin-1'
 
   trap_if(ptr != align_to(ptr, alignment))
-  trap_if(ptr + byte_length > len(cx.opts.memory.bytes))
+  trap_if(ptr + byte_length > len(cx.opts.memory))
   try:
-    s = cx.opts.memory.bytes[ptr : ptr+byte_length].decode(encoding)
+    s = cx.opts.memory[ptr : ptr+byte_length].decode(encoding)
   except UnicodeError:
     trap()
 
@@ -1311,7 +1319,7 @@ def load_list(cx, ptr, elem_type, maybe_length):
 
 def load_list_from_range(cx, ptr, length, elem_type):
   trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts)))
-  trap_if(ptr + length * elem_size(elem_type, cx.opts) > len(cx.opts.memory.bytes))
+  trap_if(ptr + length * elem_size(elem_type, cx.opts) > len(cx.opts.memory))
   return load_list_from_valid_range(cx, ptr, length, elem_type)
 
 def load_list_from_valid_range(cx, ptr, length, elem_type):
@@ -1384,7 +1392,7 @@ def lift_async_value(ReadableEndT, cx, i, t):
 
 def store(cx, v, t, ptr):
   assert(ptr == align_to(ptr, alignment(t, cx.opts)))
-  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory.bytes))
+  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory))
   match despecialize(t):
     case BoolType()         : store_int(cx, int(bool(v)), ptr, 1)
     case U8Type()           : store_int(cx, v, ptr, 1)
@@ -1410,7 +1418,7 @@ def store(cx, v, t, ptr):
     case FutureType(t)      : store_int(cx, lower_future(cx, v, t), ptr, 4)
 
 def store_int(cx, v, ptr, nbytes, signed = False):
-  cx.opts.memory.bytes[ptr : ptr+nbytes] = int.to_bytes(v, nbytes, 'little', signed = signed)
+  cx.opts.memory[ptr : ptr+nbytes] = int.to_bytes(v, nbytes, 'little', signed = signed)
 
 def maybe_scramble_nan32(f):
   if math.isnan(f):
@@ -1500,10 +1508,10 @@ def store_string_copy(cx, src, src_code_units, dst_code_unit_size, dst_alignment
   trap_if(dst_byte_length > MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, dst_alignment, dst_byte_length)
   trap_if(ptr != align_to(ptr, dst_alignment))
-  trap_if(ptr + dst_byte_length > len(cx.opts.memory.bytes))
+  trap_if(ptr + dst_byte_length > len(cx.opts.memory))
   encoded = src.encode(dst_encoding)
   assert(dst_byte_length == len(encoded))
-  cx.opts.memory.bytes[ptr : ptr+len(encoded)] = encoded
+  cx.opts.memory[ptr : ptr+len(encoded)] = encoded
   return (ptr, src_code_units)
 
 def store_utf16_to_utf8(cx, src, src_code_units):
@@ -1517,19 +1525,19 @@ def store_latin1_to_utf8(cx, src, src_code_units):
 def store_string_to_utf8(cx, src, src_code_units, worst_case_size):
   assert(src_code_units <= MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 1, src_code_units)
-  trap_if(ptr + src_code_units > len(cx.opts.memory.bytes))
+  trap_if(ptr + src_code_units > len(cx.opts.memory))
   for i,code_point in enumerate(src):
     if ord(code_point) < 2**7:
-      cx.opts.memory.bytes[ptr + i] = ord(code_point)
+      cx.opts.memory[ptr + i] = ord(code_point)
     else:
       trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
       ptr = cx.opts.realloc(ptr, src_code_units, 1, worst_case_size)
-      trap_if(ptr + worst_case_size > len(cx.opts.memory.bytes))
+      trap_if(ptr + worst_case_size > len(cx.opts.memory))
       encoded = src.encode('utf-8')
-      cx.opts.memory.bytes[ptr+i : ptr+len(encoded)] = encoded[i : ]
+      cx.opts.memory[ptr+i : ptr+len(encoded)] = encoded[i : ]
       if worst_case_size > len(encoded):
         ptr = cx.opts.realloc(ptr, worst_case_size, 1, len(encoded))
-        trap_if(ptr + len(encoded) > len(cx.opts.memory.bytes))
+        trap_if(ptr + len(encoded) > len(cx.opts.memory))
       return (ptr, len(encoded))
   return (ptr, src_code_units)
 
@@ -1538,13 +1546,13 @@ def store_utf8_to_utf16(cx, src, src_code_units):
   trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 2, worst_case_size)
   trap_if(ptr != align_to(ptr, 2))
-  trap_if(ptr + worst_case_size > len(cx.opts.memory.bytes))
+  trap_if(ptr + worst_case_size > len(cx.opts.memory))
   encoded = src.encode('utf-16-le')
-  cx.opts.memory.bytes[ptr : ptr+len(encoded)] = encoded
+  cx.opts.memory[ptr : ptr+len(encoded)] = encoded
   if len(encoded) < worst_case_size:
     ptr = cx.opts.realloc(ptr, worst_case_size, 2, len(encoded))
     trap_if(ptr != align_to(ptr, 2))
-    trap_if(ptr + len(encoded) > len(cx.opts.memory.bytes))
+    trap_if(ptr + len(encoded) > len(cx.opts.memory))
   code_units = int(len(encoded) / 2)
   return (ptr, code_units)
 
@@ -1552,33 +1560,33 @@ def store_string_to_latin1_or_utf16(cx, src, src_code_units):
   assert(src_code_units <= MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 2, src_code_units)
   trap_if(ptr != align_to(ptr, 2))
-  trap_if(ptr + src_code_units > len(cx.opts.memory.bytes))
+  trap_if(ptr + src_code_units > len(cx.opts.memory))
   dst_byte_length = 0
   for usv in src:
     if ord(usv) < (1 << 8):
-      cx.opts.memory.bytes[ptr + dst_byte_length] = ord(usv)
+      cx.opts.memory[ptr + dst_byte_length] = ord(usv)
       dst_byte_length += 1
     else:
       worst_case_size = 2 * src_code_units
       trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
       ptr = cx.opts.realloc(ptr, src_code_units, 2, worst_case_size)
       trap_if(ptr != align_to(ptr, 2))
-      trap_if(ptr + worst_case_size > len(cx.opts.memory.bytes))
+      trap_if(ptr + worst_case_size > len(cx.opts.memory))
       for j in range(dst_byte_length-1, -1, -1):
-        cx.opts.memory.bytes[ptr + 2*j] = cx.opts.memory.bytes[ptr + j]
-        cx.opts.memory.bytes[ptr + 2*j + 1] = 0
+        cx.opts.memory[ptr + 2*j] = cx.opts.memory[ptr + j]
+        cx.opts.memory[ptr + 2*j + 1] = 0
       encoded = src.encode('utf-16-le')
-      cx.opts.memory.bytes[ptr+2*dst_byte_length : ptr+len(encoded)] = encoded[2*dst_byte_length : ]
+      cx.opts.memory[ptr+2*dst_byte_length : ptr+len(encoded)] = encoded[2*dst_byte_length : ]
       if worst_case_size > len(encoded):
         ptr = cx.opts.realloc(ptr, worst_case_size, 2, len(encoded))
         trap_if(ptr != align_to(ptr, 2))
-        trap_if(ptr + len(encoded) > len(cx.opts.memory.bytes))
+        trap_if(ptr + len(encoded) > len(cx.opts.memory))
       tagged_code_units = int(len(encoded) / 2) | UTF16_TAG
       return (ptr, tagged_code_units)
   if dst_byte_length < src_code_units:
     ptr = cx.opts.realloc(ptr, src_code_units, 2, dst_byte_length)
     trap_if(ptr != align_to(ptr, 2))
-    trap_if(ptr + dst_byte_length > len(cx.opts.memory.bytes))
+    trap_if(ptr + dst_byte_length > len(cx.opts.memory))
   return (ptr, dst_byte_length)
 
 def store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units):
@@ -1586,17 +1594,17 @@ def store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units):
   trap_if(src_byte_length > MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 2, src_byte_length)
   trap_if(ptr != align_to(ptr, 2))
-  trap_if(ptr + src_byte_length > len(cx.opts.memory.bytes))
+  trap_if(ptr + src_byte_length > len(cx.opts.memory))
   encoded = src.encode('utf-16-le')
-  cx.opts.memory.bytes[ptr : ptr+len(encoded)] = encoded
+  cx.opts.memory[ptr : ptr+len(encoded)] = encoded
   if any(ord(c) >= (1 << 8) for c in src):
     tagged_code_units = int(len(encoded) / 2) | UTF16_TAG
     return (ptr, tagged_code_units)
   latin1_size = int(len(encoded) / 2)
   for i in range(latin1_size):
-    cx.opts.memory.bytes[ptr + i] = cx.opts.memory.bytes[ptr + 2*i]
+    cx.opts.memory[ptr + i] = cx.opts.memory[ptr + 2*i]
   ptr = cx.opts.realloc(ptr, src_byte_length, 1, latin1_size)
-  trap_if(ptr + latin1_size > len(cx.opts.memory.bytes))
+  trap_if(ptr + latin1_size > len(cx.opts.memory))
   return (ptr, latin1_size)
 
 def lower_error_context(cx, v):
@@ -1616,7 +1624,7 @@ def store_list_into_range(cx, v, elem_type):
   trap_if(byte_length >= (1 << 32))
   ptr = cx.opts.realloc(0, 0, alignment(elem_type, cx.opts), byte_length)
   trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts)))
-  trap_if(ptr + byte_length > len(cx.opts.memory.bytes))
+  trap_if(ptr + byte_length > len(cx.opts.memory))
   store_list_into_valid_range(cx, v, ptr, elem_type)
   return (ptr, len(v))
 
@@ -1966,7 +1974,7 @@ def lift_flat_values(cx, max_flat, vi, ts):
     ptr = vi.next(cx.opts.memory.ptr_type())
     tuple_type = TupleType(ts)
     trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts)))
-    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory.bytes))
+    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory))
     return list(load(cx, ptr, tuple_type).values())
   else:
     return [ lift_flat(cx, vi, t) for t in ts ]
@@ -1984,7 +1992,7 @@ def lower_flat_values(cx, max_flat, vs, ts, out_param = None):
       ptr = out_param.next(cx.opts.memory.ptr_type())
       flat_vals = []
     trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts)))
-    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory.bytes))
+    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory))
     store(cx, tuple_value, tuple_type, ptr)
   else:
     flat_vals = []
diff --git a/design/mvp/canonical-abi/run_tests.py b/design/mvp/canonical-abi/run_tests.py
index 94af80c2..4432553f 100644
--- a/design/mvp/canonical-abi/run_tests.py
+++ b/design/mvp/canonical-abi/run_tests.py
@@ -129,7 +129,7 @@ def test_name():
   if lower_v is None:
     lower_v = v
 
-  heap = Heap(5*len(cx.opts.memory.bytes))
+  heap = Heap(5*len(cx.opts.memory))
   if dst_encoding is None:
     dst_encoding = cx.opts.string_encoding
   cx = mk_cx(MemInst(heap.memory, cx.opts.memory.ptr_type()), dst_encoding, heap.realloc)

From c0ad66254eb62d3bd07871af97beb7e0b611511b Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Wed, 25 Mar 2026 14:16:45 +0000
Subject: [PATCH 23/25] undo old change

---
 design/mvp/CanonicalABI.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index 3ea4873c..d90441e5 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -4386,7 +4386,7 @@ state (in which the only valid operation is to call `future.drop-*`) on
 read/written at most once and futures are only passed to other components in a
 state where they are ready to be read/written. Another important difference is
 that, since the buffer length is always implied by the `CopyResult`, the number
-of elements copied is not packed in the high bits; they're always zero.
+of elements copied is not packed in the high 28 bits; they're always zero.
 ```python
   def future_event(result):
     assert((buffer.remain() == 0) == (result == CopyResult.COMPLETED))

From 70e876ef5609182d097a934127be6557e03ef95d Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Wed, 25 Mar 2026 14:29:18 +0000
Subject: [PATCH 24/25] revert some small changes

---
 design/mvp/Concurrency.md | 14 +++++++-------
 design/mvp/Explainer.md   | 24 ++++++++++++------------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/design/mvp/Concurrency.md b/design/mvp/Concurrency.md
index 2a49aeb5..d0eab4c4 100644
--- a/design/mvp/Concurrency.md
+++ b/design/mvp/Concurrency.md
@@ -413,14 +413,14 @@ Each thread contains a distinct mutable **thread-local storage** array. The
 current thread's thread-local storage can be read and written from core wasm
 code by calling the [`context.get`] and [`context.set`] built-ins.
 
-The thread-local storage array's length is currently fixed to contain exactly 2
-`i64`s with the goal of allowing this array to be stored inline in whatever
+The thread-local storage array's length is currently fixed to contain exactly
+2 `i64`s with the goal of allowing this array to be stored inline in whatever
 existing runtime data structure is already efficiently reachable from ambient
-compiled wasm code. Because module instantiation is declarative in the Component
-Model, the imported `context.{get,set}` built-ins can be inlined by the core
-wasm compiler as-if they were instructions, allowing the generated machine code
-to be a single load or store. This makes thread-local storage a natural place to
-store:
+compiled wasm code. Because module instantiation is declarative in the
+Component Model, the imported `context.{get,set}` built-ins can be inlined by
+the core wasm compiler as-if they were instructions, allowing the generated
+machine code to be a single load or store. This makes thread-local storage a
+natural place to store:
 1. a pointer to the linear-memory "shadow stack" pointer
 2. a pointer to a struct used by the runtime to implement the language's
    thread-local features
diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index 8e005fd4..80e81a0c 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -1313,9 +1313,9 @@ have the following core function type:
       (param $newSize memory.addrtype)
       (result memory.addrtype))
 ```
-The Canonical ABI will use `realloc` both to allocate (passing `0` for the first
-two parameters) and reallocate. If the Canonical ABI needs `realloc`, validation
-requires this option to be present (there is no default).
+The Canonical ABI will use `realloc` both to allocate (passing `0` for the
+first two parameters) and reallocate. If the Canonical ABI needs `realloc`,
+validation requires this option to be present (there is no default).
 
 The `(post-return ...)` option may only be present in `canon lift` when
 `async` is not present and specifies a core function to be called with the
@@ -1480,10 +1480,10 @@ The `resource.new` built-in creates a new resource (of resource type `T`) with
 resource. Validation only allows `resource.rep T` to be used within the
 component that defined `T`.
 
-In the Canonical ABI, `T.rep` is defined to be the `$rep` in the `(type $T
-(resource (rep $rep) ...))` type definition that defined `T`. While it's
-designed to allow different types in the future, it is currently fixed to be
-`i32` or `i64`.
+In the Canonical ABI, `T.rep` is defined to be the `$rep` in the
+`(type $T (resource (rep $rep) ...))` type definition that defined `T`. While
+it's designed to allow different types in the future, it is currently
+limited to `i32` or `i64`.
 
 For details, see [`canon_resource_new`] in the Canonical ABI explainer.
 
@@ -1515,7 +1515,7 @@ resource type `T`) pointed to by the handle `t`. Validation only allows
 In the Canonical ABI, `T.rep` is defined to be the `$rep` in the
 `(type $T (resource (rep $rep) ...))` type definition that defined `T`. While
 it's designed to allow different types in the future, it is currently
-fixed to be `i32` or `i64`.
+limited to `i32` or `i64`.
 
 As an example, the following component imports the `resource.new` built-in,
 allowing it to create and return new resources to its client:
@@ -1869,7 +1869,7 @@ For details, see [Streams and Futures] in the concurrency explainer and
 
 ###### 🔀 `stream.read` and `stream.write`
 
-| Synopsis                                     |                                                                                                             |
+| Synopsis                                     |                                                                                                        |
 | -------------------------------------------- | ------------------------------------------------------------------------------------------------------ |
 | Approximate WIT signature for `stream.read`  | `func<stream<T?>,memory>(e: readable-stream-end<T?>, b: writable-buffer<T>?) -> option<stream-result>` |
 | Approximate WIT signature for `stream.write` | `func<stream<T?>,memory>(e: writable-stream-end<T?>, b: readable-buffer<T>?) -> option<stream-result>` |
@@ -2216,9 +2216,9 @@ For details, see [Thread Built-ins] in the concurrency explainer and
 
 ###### 🧵② `thread.spawn-ref`
 
-| Synopsis                   |                                                                    |
-| -------------------------- | ------------------------------------------------------------------ |
-| Approximate WIT signature  | `func<shared?,FuncT>(f: FuncT, c: FuncT.params[0]) -> bool`        |
+| Synopsis                   |                                                                                            |
+| -------------------------- | ------------------------------------------------------------------------------------------ |
+| Approximate WIT signature  | `func<shared?,FuncT>(f: FuncT, c: FuncT.params[0]) -> bool`                                |
 | Canonical ABI signature    | `shared? [f:(ref null (shared (func (param FuncT.params[0]))) c:FuncT.params[0]] -> [i32]` |
 
 The `thread.spawn-ref` built-in is an optimization, fusing a call to

From 520cf2572ae63be0c5ba9c989212fbad384d52ec Mon Sep 17 00:00:00 2001
From: Adam Bratschi-Kaye <adam.bratschikaye@dfinity.org>
Date: Thu, 26 Mar 2026 08:59:15 +0000
Subject: [PATCH 25/25] check list and string lengths when loading

---
 design/mvp/CanonicalABI.md              | 189 ++++++++++++++----------
 design/mvp/canonical-abi/definitions.py | 169 ++++++++++++---------
 design/mvp/canonical-abi/run_tests.py   | 104 +++++++++++++
 3 files changed, 316 insertions(+), 146 deletions(-)

diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index d90441e5..93c068d1 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -243,6 +243,13 @@ The `MemInst` class represents a core WebAssembly [`memory` instance], with
 `bytes` corresponding to the memory's bytes and `addrtype` coming from the
 [`memory type`].
 ```python
+def ptr_size(ptr_type):
+  match ptr_type:
+    case 'i32':
+      return 4
+    case 'i64':
+      return 8
+
 @dataclass
 class MemInst:
   bytes: bytearray
@@ -261,9 +268,7 @@ class MemInst:
     return self.addrtype
 
   def ptr_size(self):
-    match self.ptr_type():
-      case 'i32': return 4
-      case 'i64': return 8
+    return ptr_size(self.ptr_type())
 
   def equal(lhs, rhs):
     return lhs.bytes == rhs.bytes and \
@@ -1389,8 +1394,8 @@ class BufferGuestImpl(Buffer):
   def __init__(self, t, cx, ptr, length):
     trap_if(length > Buffer.MAX_LENGTH)
     if t and length > 0:
-      trap_if(ptr != align_to(ptr, alignment(t, cx.opts)))
-      trap_if(ptr + length * elem_size(t, cx.opts) > len(cx.opts.memory))
+      trap_if(ptr != align_to(ptr, alignment(t, cx.opts.memory.ptr_type())))
+      trap_if(ptr + length * elem_size(t, cx.opts.memory.ptr_type()) > len(cx.opts.memory))
     self.cx = cx
     self.t = t
     self.ptr = ptr
@@ -1408,7 +1413,7 @@ class ReadableBufferGuestImpl(BufferGuestImpl):
     assert(n <= self.remain())
     if self.t:
       vs = load_list_from_valid_range(self.cx, self.ptr, n, self.t)
-      self.ptr += n * elem_size(self.t, self.cx.opts)
+      self.ptr += n * elem_size(self.t, self.cx.opts.memory.ptr_type())
     else:
       vs = n * [()]
     self.progress += n
@@ -1419,7 +1424,7 @@ class WritableBufferGuestImpl(BufferGuestImpl, WritableBuffer):
     assert(len(vs) <= self.remain())
     if self.t:
       store_list_into_valid_range(self.cx, vs, self.ptr, self.t)
-      self.ptr += len(vs) * elem_size(self.t, self.cx.opts)
+      self.ptr += len(vs) * elem_size(self.t, self.cx.opts.memory.ptr_type())
     else:
       assert(all(v == () for v in vs))
     self.progress += len(vs)
@@ -1894,7 +1899,7 @@ Each value type is assigned an [alignment] which is used by subsequent
 Canonical ABI definitions. Presenting the definition of `alignment` piecewise,
 we start with the top-level case analysis:
 ```python
-def alignment(t, opts):
+def alignment(t, ptr_type):
   match despecialize(t):
     case BoolType()                  : return 1
     case S8Type() | U8Type()         : return 1
@@ -1904,11 +1909,11 @@ def alignment(t, opts):
     case F32Type()                   : return 4
     case F64Type()                   : return 8
     case CharType()                  : return 4
-    case StringType()                : return opts.memory.ptr_size()
+    case StringType()                : return ptr_size(ptr_type)
     case ErrorContextType()          : return 4
-    case ListType(t, l)              : return alignment_list(t, l, opts)
-    case RecordType(fields)          : return alignment_record(fields, opts)
-    case VariantType(cases)          : return alignment_variant(cases, opts)
+    case ListType(t, l)              : return alignment_list(t, l, ptr_type)
+    case RecordType(fields)          : return alignment_record(fields, ptr_type)
+    case VariantType(cases)          : return alignment_variant(cases, ptr_type)
     case FlagsType(labels)           : return alignment_flags(labels)
     case OwnType() | BorrowType()    : return 4
     case StreamType() | FutureType() : return 4
@@ -1917,18 +1922,18 @@ def alignment(t, opts):
 List alignment is the same as tuple alignment when the length is fixed and
 otherwise uses the alignment of pointers.
 ```python
-def alignment_list(elem_type, maybe_length, opts):
+def alignment_list(elem_type, maybe_length, ptr_type):
   if maybe_length is not None:
-    return alignment(elem_type, opts)
-  return opts.memory.ptr_size()
+    return alignment(elem_type, ptr_type)
+  return ptr_size(ptr_type)
 ```
 
 Record alignment is tuple alignment, with the definitions split for reuse below:
 ```python
-def alignment_record(fields, opts):
+def alignment_record(fields, ptr_type):
   a = 1
   for f in fields:
-    a = max(a, alignment(f.t, opts))
+    a = max(a, alignment(f.t, ptr_type))
   return a
 ```
 
@@ -1938,8 +1943,8 @@ covering the number of cases in the variant (with cases numbered in order from
 compact representations of variants in memory. This smallest integer type is
 selected by the following function, used above and below:
 ```python
-def alignment_variant(cases, opts):
-  return max(alignment(discriminant_type(cases), opts), max_case_alignment(cases, opts))
+def alignment_variant(cases, ptr_type):
+  return max(alignment(discriminant_type(cases), ptr_type), max_case_alignment(cases, ptr_type))
 
 def discriminant_type(cases):
   n = len(cases)
@@ -1950,11 +1955,11 @@ def discriminant_type(cases):
     case 2: return U16Type()
     case 3: return U32Type()
 
-def max_case_alignment(cases, opts):
+def max_case_alignment(cases, ptr_type):
   a = 1
   for c in cases:
     if c.t is not None:
-      a = max(a, alignment(c.t, opts))
+      a = max(a, alignment(c.t, ptr_type))
   return a
 ```
 
@@ -1980,7 +1985,7 @@ maps well to languages which represent `list`s as random-access arrays. Empty
 types, such as records with no fields, are not permitted, to avoid
 complications in source languages.
 ```python
-def elem_size(t, opts):
+def elem_size(t, ptr_type):
   match despecialize(t):
     case BoolType()                  : return 1
     case S8Type() | U8Type()         : return 1
@@ -1990,40 +1995,48 @@ def elem_size(t, opts):
     case F32Type()                   : return 4
     case F64Type()                   : return 8
     case CharType()                  : return 4
-    case StringType()                : return 2 * opts.memory.ptr_size()
+    case StringType()                : return 2 * ptr_size(ptr_type)
     case ErrorContextType()          : return 4
-    case ListType(t, l)              : return elem_size_list(t, l, opts)
-    case RecordType(fields)          : return elem_size_record(fields, opts)
-    case VariantType(cases)          : return elem_size_variant(cases, opts)
+    case ListType(t, l)              : return elem_size_list(t, l, ptr_type)
+    case RecordType(fields)          : return elem_size_record(fields, ptr_type)
+    case VariantType(cases)          : return elem_size_variant(cases, ptr_type)
     case FlagsType(labels)           : return elem_size_flags(labels)
     case OwnType() | BorrowType()    : return 4
     case StreamType() | FutureType() : return 4
 
-def elem_size_list(elem_type, maybe_length, opts):
+def worst_case_elem_size(t, ptr_type):
+  if ptr_type is None:
+    return elem_size(t, ptr_type)
+  result = elem_size(t, ptr_type)
+  other_ptr_type = 'i32' if ptr_type == 'i64' else 'i64'
+  result = max(result, elem_size(t, other_ptr_type))
+  return result
+
+def elem_size_list(elem_type, maybe_length, ptr_type):
   if maybe_length is not None:
-    return maybe_length * elem_size(elem_type, opts)
-  return 2 * opts.memory.ptr_size()
+    return maybe_length * elem_size(elem_type, ptr_type)
+  return 2 * ptr_size(ptr_type)
 
-def elem_size_record(fields, opts):
+def elem_size_record(fields, ptr_type):
   s = 0
   for f in fields:
-    s = align_to(s, alignment(f.t, opts))
-    s += elem_size(f.t, opts)
+    s = align_to(s, alignment(f.t, ptr_type))
+    s += elem_size(f.t, ptr_type)
   assert(s > 0)
-  return align_to(s, alignment_record(fields, opts))
+  return align_to(s, alignment_record(fields, ptr_type))
 
 def align_to(ptr, alignment):
   return math.ceil(ptr / alignment) * alignment
 
-def elem_size_variant(cases, opts):
-  s = elem_size(discriminant_type(cases), opts)
-  s = align_to(s, max_case_alignment(cases, opts))
+def elem_size_variant(cases, ptr_type):
+  s = elem_size(discriminant_type(cases), ptr_type)
+  s = align_to(s, max_case_alignment(cases, ptr_type))
   cs = 0
   for c in cases:
     if c.t is not None:
-      cs = max(cs, elem_size(c.t, opts))
+      cs = max(cs, elem_size(c.t, ptr_type))
   s += cs
-  return align_to(s, alignment_variant(cases, opts))
+  return align_to(s, alignment_variant(cases, ptr_type))
 
 def elem_size_flags(labels):
   n = len(labels)
@@ -2041,8 +2054,8 @@ as a Python value. Presenting the definition of `load` piecewise, we start with
 the top-level case analysis:
 ```python
 def load(cx, ptr, t):
-  assert(ptr == align_to(ptr, alignment(t, cx.opts)))
-  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory))
+  assert(ptr == align_to(ptr, alignment(t, cx.opts.memory.ptr_type())))
+  assert(ptr + elem_size(t, cx.opts.memory.ptr_type()) <= len(cx.opts.memory))
   match despecialize(t):
     case BoolType()         : return convert_int_to_bool(load_int(cx, ptr, 1))
     case U8Type()           : return load_int(cx, ptr, 1)
@@ -2145,6 +2158,17 @@ original encoding and length in tagged code units as a "hint" that enables
 in many cases. Thus, the value produced by `load_string` isn't simply a Python
 `str`, but a *tuple* containing a `str`, the original encoding and the number
 of source code units.
+
+The `MAX_STRING_BYTE_LENGTH` constant ensures that the high bit of a
+string's number of code units is never set, keeping it clear for `UTF16_TAG`.
+
+Since this byte length of a string depends on the encoding, we estimate the
+worst case length across all encodings when loading the string and trap if the
+maximum length might be exceeded. Generally the worst case length comes from
+encoding in UTF-16 where byte length could be twice the number of code units.
+But if the original encoding was UTF-16 the byte length may be up to 3 times the
+number of code units when encoding in UTF-8 if there are code points at 2^7 or
+higher.
 ```python
 String = tuple[str, str, int]
 
@@ -2155,6 +2179,16 @@ def load_string(cx, ptr) -> String:
 
 UTF16_TAG = 1 << 31
 
+def worst_case_string_byte_length(string : String):
+  (s, encoding, tagged_code_units) = string
+  if encoding == 'utf16' or (encoding == 'latin1+utf16' and (tagged_code_units & UTF16_TAG)):
+    for code_point in s:
+      if ord(code_point) >= 2 ** 7:
+        return 3 * (tagged_code_units & ~UTF16_TAG)
+  return 2 * (tagged_code_units & ~UTF16_TAG)
+
+MAX_STRING_BYTE_LENGTH = (1 << 31) - 1
+
 def load_string_from_range(cx, ptr, tagged_code_units) -> String:
   match cx.opts.string_encoding:
     case 'utf8':
@@ -2181,7 +2215,10 @@ def load_string_from_range(cx, ptr, tagged_code_units) -> String:
   except UnicodeError:
     trap()
 
-  return (s, cx.opts.string_encoding, tagged_code_units)
+  string = (s, cx.opts.string_encoding, tagged_code_units)
+  trap_if(worst_case_string_byte_length(string) > MAX_STRING_BYTE_LENGTH)
+
+  return string
 ```
 
 Error context values are lifted directly from the current component instance's
@@ -2193,8 +2230,14 @@ def lift_error_context(cx, i):
   return errctx
 ```
 
-Lists and records are loaded by recursively loading their elements/fields:
+Lists and records are loaded by recursively loading their elements/fields. The
+byte length of a list is limited to fit in a 32-bit memory. When loading a list,
+we check the what it's worst case byte length doesn't exceed that limit under
+any pointer type and trap if the length could exceed the maximum limit. This
+ensures that interfaces can be used by both 32-bit and 64-bit components.
 ```python
+MAX_LIST_BYTE_LENGTH = (1 << 32) - 1
+
 def load_list(cx, ptr, elem_type, maybe_length):
   if maybe_length is not None:
     return load_list_from_valid_range(cx, ptr, maybe_length, elem_type)
@@ -2203,22 +2246,23 @@ def load_list(cx, ptr, elem_type, maybe_length):
   return load_list_from_range(cx, begin, length, elem_type)
 
 def load_list_from_range(cx, ptr, length, elem_type):
-  trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts)))
-  trap_if(ptr + length * elem_size(elem_type, cx.opts) > len(cx.opts.memory))
+  trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts.memory.ptr_type())))
+  trap_if(ptr + length * elem_size(elem_type, cx.opts.memory.ptr_type()) > len(cx.opts.memory))
   return load_list_from_valid_range(cx, ptr, length, elem_type)
 
 def load_list_from_valid_range(cx, ptr, length, elem_type):
+  trap_if(length * worst_case_elem_size(elem_type, cx.opts.memory.ptr_type()) > MAX_LIST_BYTE_LENGTH)
   a = []
   for i in range(length):
-    a.append(load(cx, ptr + i * elem_size(elem_type, cx.opts), elem_type))
+    a.append(load(cx, ptr + i * elem_size(elem_type, cx.opts.memory.ptr_type()), elem_type))
   return a
 
 def load_record(cx, ptr, fields):
   record = {}
   for field in fields:
-    ptr = align_to(ptr, alignment(field.t, cx.opts))
+    ptr = align_to(ptr, alignment(field.t, cx.opts.memory.ptr_type()))
     record[field.label] = load(cx, ptr, field.t)
-    ptr += elem_size(field.t, cx.opts)
+    ptr += elem_size(field.t, cx.opts.memory.ptr_type())
   return record
 ```
 As a technical detail: the `align_to` in the loop in `load_record` is
@@ -2232,12 +2276,12 @@ implementation can build the appropriate index tables at compile-time so that
 variant-passing is always O(1) and not involving string operations.
 ```python
 def load_variant(cx, ptr, cases):
-  disc_size = elem_size(discriminant_type(cases), cx.opts)
+  disc_size = elem_size(discriminant_type(cases), cx.opts.memory.ptr_type())
   case_index = load_int(cx, ptr, disc_size)
   ptr += disc_size
   trap_if(case_index >= len(cases))
   c = cases[case_index]
-  ptr = align_to(ptr, max_case_alignment(cases, cx.opts))
+  ptr = align_to(ptr, max_case_alignment(cases, cx.opts.memory.ptr_type()))
   if c.t is None:
     return { c.label: None }
   return { c.label: load(cx, ptr, c.t) }
@@ -2329,8 +2373,8 @@ The `store` function defines how to write a value `v` of a given value type
 `store` piecewise, we start with the top-level case analysis:
 ```python
 def store(cx, v, t, ptr):
-  assert(ptr == align_to(ptr, alignment(t, cx.opts)))
-  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory))
+  assert(ptr == align_to(ptr, alignment(t, cx.opts.memory.ptr_type())))
+  assert(ptr + elem_size(t, cx.opts.memory.ptr_type()) <= len(cx.opts.memory))
   match despecialize(t):
     case BoolType()         : store_int(cx, int(bool(v)), ptr, 1)
     case U8Type()           : store_int(cx, v, ptr, 1)
@@ -2489,11 +2533,9 @@ The simplest 4 cases above can compute the exact destination size and then copy
 with a simply loop (that possibly inflates Latin-1 to UTF-16 by injecting a 0
 byte after every Latin-1 byte).
 ```python
-MAX_STRING_BYTE_LENGTH = (1 << 31) - 1
-
 def store_string_copy(cx, src, src_code_units, dst_code_unit_size, dst_alignment, dst_encoding):
   dst_byte_length = dst_code_unit_size * src_code_units
-  trap_if(dst_byte_length > MAX_STRING_BYTE_LENGTH)
+  assert(dst_byte_length <= MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, dst_alignment, dst_byte_length)
   trap_if(ptr != align_to(ptr, dst_alignment))
   trap_if(ptr + dst_byte_length > len(cx.opts.memory))
@@ -2502,9 +2544,6 @@ def store_string_copy(cx, src, src_code_units, dst_code_unit_size, dst_alignment
   cx.opts.memory[ptr : ptr+len(encoded)] = encoded
   return (ptr, src_code_units)
 ```
-The `MAX_STRING_BYTE_LENGTH` constant ensures that the high bit of a
-string's number of code units is never set, keeping it clear for `UTF16_TAG`.
-
 The 2 cases of transcoding into UTF-8 share an algorithm that starts by
 optimistically assuming that each code unit of the source string fits in a
 single UTF-8 byte and then, failing that, reallocates to a worst-case size,
@@ -2526,7 +2565,7 @@ def store_string_to_utf8(cx, src, src_code_units, worst_case_size):
     if ord(code_point) < 2**7:
       cx.opts.memory[ptr + i] = ord(code_point)
     else:
-      trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
+      assert(worst_case_size <= MAX_STRING_BYTE_LENGTH)
       ptr = cx.opts.realloc(ptr, src_code_units, 1, worst_case_size)
       trap_if(ptr + worst_case_size > len(cx.opts.memory))
       encoded = src.encode('utf-8')
@@ -2545,7 +2584,7 @@ if multiple UTF-8 bytes were collapsed into a single 2-byte UTF-16 code unit:
 ```python
 def store_utf8_to_utf16(cx, src, src_code_units):
   worst_case_size = 2 * src_code_units
-  trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
+  assert(worst_case_size <= MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 2, worst_case_size)
   trap_if(ptr != align_to(ptr, 2))
   trap_if(ptr + worst_case_size > len(cx.opts.memory))
@@ -2580,7 +2619,7 @@ def store_string_to_latin1_or_utf16(cx, src, src_code_units):
       dst_byte_length += 1
     else:
       worst_case_size = 2 * src_code_units
-      trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
+      assert(worst_case_size <= MAX_STRING_BYTE_LENGTH)
       ptr = cx.opts.realloc(ptr, src_code_units, 2, worst_case_size)
       trap_if(ptr != align_to(ptr, 2))
       trap_if(ptr + worst_case_size > len(cx.opts.memory))
@@ -2615,7 +2654,7 @@ inexpensively fused with the UTF-16 validate+copy loop.)
 ```python
 def store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units):
   src_byte_length = 2 * src_code_units
-  trap_if(src_byte_length > MAX_STRING_BYTE_LENGTH)
+  assert(src_byte_length <= MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 2, src_byte_length)
   trap_if(ptr != align_to(ptr, 2))
   trap_if(ptr + src_byte_length > len(cx.opts.memory))
@@ -2656,23 +2695,23 @@ def store_list(cx, v, ptr, elem_type, maybe_length):
   store_int(cx, length, ptr + cx.opts.memory.ptr_size(), cx.opts.memory.ptr_size())
 
 def store_list_into_range(cx, v, elem_type):
-  byte_length = len(v) * elem_size(elem_type, cx.opts)
-  trap_if(byte_length >= (1 << 32))
-  ptr = cx.opts.realloc(0, 0, alignment(elem_type, cx.opts), byte_length)
-  trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts)))
+  byte_length = len(v) * elem_size(elem_type, cx.opts.memory.ptr_type())
+  assert(byte_length <= MAX_LIST_BYTE_LENGTH)
+  ptr = cx.opts.realloc(0, 0, alignment(elem_type, cx.opts.memory.ptr_type()), byte_length)
+  trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts.memory.ptr_type())))
   trap_if(ptr + byte_length > len(cx.opts.memory))
   store_list_into_valid_range(cx, v, ptr, elem_type)
   return (ptr, len(v))
 
 def store_list_into_valid_range(cx, v, ptr, elem_type):
   for i,e in enumerate(v):
-    store(cx, e, elem_type, ptr + i * elem_size(elem_type, cx.opts))
+    store(cx, e, elem_type, ptr + i * elem_size(elem_type, cx.opts.memory.ptr_type()))
 
 def store_record(cx, v, ptr, fields):
   for f in fields:
-    ptr = align_to(ptr, alignment(f.t, cx.opts))
+    ptr = align_to(ptr, alignment(f.t, cx.opts.memory.ptr_type()))
     store(cx, v[f.label], f.t, ptr)
-    ptr += elem_size(f.t, cx.opts)
+    ptr += elem_size(f.t, cx.opts.memory.ptr_type())
 ```
 
 Variant values are represented as Python dictionaries containing exactly one
@@ -2685,10 +2724,10 @@ indices.
 ```python
 def store_variant(cx, v, ptr, cases):
   case_index, case_value = match_case(v, cases)
-  disc_size = elem_size(discriminant_type(cases), cx.opts)
+  disc_size = elem_size(discriminant_type(cases), cx.opts.memory.ptr_type())
   store_int(cx, case_index, ptr, disc_size)
   ptr += disc_size
-  ptr = align_to(ptr, max_case_alignment(cases, cx.opts))
+  ptr = align_to(ptr, max_case_alignment(cases, cx.opts.memory.ptr_type()))
   c = cases[case_index]
   if c.t is not None:
     store(cx, case_value, c.t, ptr)
@@ -3170,8 +3209,8 @@ def lift_flat_values(cx, max_flat, vi, ts):
   if len(flat_types) > max_flat:
     ptr = vi.next(cx.opts.memory.ptr_type())
     tuple_type = TupleType(ts)
-    trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts)))
-    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory))
+    trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts.memory.ptr_type())))
+    trap_if(ptr + elem_size(tuple_type, cx.opts.memory.ptr_type()) > len(cx.opts.memory))
     return list(load(cx, ptr, tuple_type).values())
   else:
     return [ lift_flat(cx, vi, t) for t in ts ]
@@ -3191,13 +3230,13 @@ def lower_flat_values(cx, max_flat, vs, ts, out_param = None):
     tuple_type = TupleType(ts)
     tuple_value = {str(i): v for i,v in enumerate(vs)}
     if out_param is None:
-      ptr = cx.opts.realloc(0, 0, alignment(tuple_type, cx.opts), elem_size(tuple_type, cx.opts))
+      ptr = cx.opts.realloc(0, 0, alignment(tuple_type, cx.opts.memory.ptr_type()), elem_size(tuple_type, cx.opts.memory.ptr_type()))
       flat_vals = [ptr]
     else:
       ptr = out_param.next(cx.opts.memory.ptr_type())
       flat_vals = []
-    trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts)))
-    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory))
+    trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts.memory.ptr_type())))
+    trap_if(ptr + elem_size(tuple_type, cx.opts.memory.ptr_type()) > len(cx.opts.memory))
     store(cx, tuple_value, tuple_type, ptr)
   else:
     flat_vals = []
diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index 7395210a..16dcdd4d 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -230,6 +230,13 @@ def __init__(self, opts, inst, borrow_scope = None):
 
 ### Canonical ABI Options
 
+def ptr_size(ptr_type):
+  match ptr_type:
+    case 'i32':
+      return 4
+    case 'i64':
+      return 8
+
 @dataclass
 class MemInst:
   bytes: bytearray
@@ -248,9 +255,7 @@ def ptr_type(self):
     return self.addrtype
 
   def ptr_size(self):
-    match self.ptr_type():
-      case 'i32': return 4
-      case 'i64': return 8
+    return ptr_size(self.ptr_type())
 
   def equal(lhs, rhs):
     return lhs.bytes == rhs.bytes and \
@@ -801,8 +806,8 @@ class BufferGuestImpl(Buffer):
   def __init__(self, t, cx, ptr, length):
     trap_if(length > Buffer.MAX_LENGTH)
     if t and length > 0:
-      trap_if(ptr != align_to(ptr, alignment(t, cx.opts)))
-      trap_if(ptr + length * elem_size(t, cx.opts) > len(cx.opts.memory))
+      trap_if(ptr != align_to(ptr, alignment(t, cx.opts.memory.ptr_type())))
+      trap_if(ptr + length * elem_size(t, cx.opts.memory.ptr_type()) > len(cx.opts.memory))
     self.cx = cx
     self.t = t
     self.ptr = ptr
@@ -820,7 +825,7 @@ def read(self, n):
     assert(n <= self.remain())
     if self.t:
       vs = load_list_from_valid_range(self.cx, self.ptr, n, self.t)
-      self.ptr += n * elem_size(self.t, self.cx.opts)
+      self.ptr += n * elem_size(self.t, self.cx.opts.memory.ptr_type())
     else:
       vs = n * [()]
     self.progress += n
@@ -831,7 +836,7 @@ def write(self, vs):
     assert(len(vs) <= self.remain())
     if self.t:
       store_list_into_valid_range(self.cx, vs, self.ptr, self.t)
-      self.ptr += len(vs) * elem_size(self.t, self.cx.opts)
+      self.ptr += len(vs) * elem_size(self.t, self.cx.opts.memory.ptr_type())
     else:
       assert(all(v == () for v in vs))
     self.progress += len(vs)
@@ -1088,7 +1093,7 @@ def contains(t, p):
 
 ### Alignment
 
-def alignment(t, opts):
+def alignment(t, ptr_type):
   match despecialize(t):
     case BoolType()                  : return 1
     case S8Type() | U8Type()         : return 1
@@ -1098,28 +1103,28 @@ def alignment(t, opts):
     case F32Type()                   : return 4
     case F64Type()                   : return 8
     case CharType()                  : return 4
-    case StringType()                : return opts.memory.ptr_size()
+    case StringType()                : return ptr_size(ptr_type)
     case ErrorContextType()          : return 4
-    case ListType(t, l)              : return alignment_list(t, l, opts)
-    case RecordType(fields)          : return alignment_record(fields, opts)
-    case VariantType(cases)          : return alignment_variant(cases, opts)
+    case ListType(t, l)              : return alignment_list(t, l, ptr_type)
+    case RecordType(fields)          : return alignment_record(fields, ptr_type)
+    case VariantType(cases)          : return alignment_variant(cases, ptr_type)
     case FlagsType(labels)           : return alignment_flags(labels)
     case OwnType() | BorrowType()    : return 4
     case StreamType() | FutureType() : return 4
 
-def alignment_list(elem_type, maybe_length, opts):
+def alignment_list(elem_type, maybe_length, ptr_type):
   if maybe_length is not None:
-    return alignment(elem_type, opts)
-  return opts.memory.ptr_size()
+    return alignment(elem_type, ptr_type)
+  return ptr_size(ptr_type)
 
-def alignment_record(fields, opts):
+def alignment_record(fields, ptr_type):
   a = 1
   for f in fields:
-    a = max(a, alignment(f.t, opts))
+    a = max(a, alignment(f.t, ptr_type))
   return a
 
-def alignment_variant(cases, opts):
-  return max(alignment(discriminant_type(cases), opts), max_case_alignment(cases, opts))
+def alignment_variant(cases, ptr_type):
+  return max(alignment(discriminant_type(cases), ptr_type), max_case_alignment(cases, ptr_type))
 
 def discriminant_type(cases):
   n = len(cases)
@@ -1130,11 +1135,11 @@ def discriminant_type(cases):
     case 2: return U16Type()
     case 3: return U32Type()
 
-def max_case_alignment(cases, opts):
+def max_case_alignment(cases, ptr_type):
   a = 1
   for c in cases:
     if c.t is not None:
-      a = max(a, alignment(c.t, opts))
+      a = max(a, alignment(c.t, ptr_type))
   return a
 
 def alignment_flags(labels):
@@ -1146,7 +1151,7 @@ def alignment_flags(labels):
 
 ### Element Size
 
-def elem_size(t, opts):
+def elem_size(t, ptr_type):
   match despecialize(t):
     case BoolType()                  : return 1
     case S8Type() | U8Type()         : return 1
@@ -1156,40 +1161,48 @@ def elem_size(t, opts):
     case F32Type()                   : return 4
     case F64Type()                   : return 8
     case CharType()                  : return 4
-    case StringType()                : return 2 * opts.memory.ptr_size()
+    case StringType()                : return 2 * ptr_size(ptr_type)
     case ErrorContextType()          : return 4
-    case ListType(t, l)              : return elem_size_list(t, l, opts)
-    case RecordType(fields)          : return elem_size_record(fields, opts)
-    case VariantType(cases)          : return elem_size_variant(cases, opts)
+    case ListType(t, l)              : return elem_size_list(t, l, ptr_type)
+    case RecordType(fields)          : return elem_size_record(fields, ptr_type)
+    case VariantType(cases)          : return elem_size_variant(cases, ptr_type)
     case FlagsType(labels)           : return elem_size_flags(labels)
     case OwnType() | BorrowType()    : return 4
     case StreamType() | FutureType() : return 4
 
-def elem_size_list(elem_type, maybe_length, opts):
+def worst_case_elem_size(t, ptr_type):
+  if ptr_type is None:
+    return elem_size(t, ptr_type)
+  result = elem_size(t, ptr_type)
+  other_ptr_type = 'i32' if ptr_type == 'i64' else 'i64'
+  result = max(result, elem_size(t, other_ptr_type))
+  return result
+
+def elem_size_list(elem_type, maybe_length, ptr_type):
   if maybe_length is not None:
-    return maybe_length * elem_size(elem_type, opts)
-  return 2 * opts.memory.ptr_size()
+    return maybe_length * elem_size(elem_type, ptr_type)
+  return 2 * ptr_size(ptr_type)
 
-def elem_size_record(fields, opts):
+def elem_size_record(fields, ptr_type):
   s = 0
   for f in fields:
-    s = align_to(s, alignment(f.t, opts))
-    s += elem_size(f.t, opts)
+    s = align_to(s, alignment(f.t, ptr_type))
+    s += elem_size(f.t, ptr_type)
   assert(s > 0)
-  return align_to(s, alignment_record(fields, opts))
+  return align_to(s, alignment_record(fields, ptr_type))
 
 def align_to(ptr, alignment):
   return math.ceil(ptr / alignment) * alignment
 
-def elem_size_variant(cases, opts):
-  s = elem_size(discriminant_type(cases), opts)
-  s = align_to(s, max_case_alignment(cases, opts))
+def elem_size_variant(cases, ptr_type):
+  s = elem_size(discriminant_type(cases), ptr_type)
+  s = align_to(s, max_case_alignment(cases, ptr_type))
   cs = 0
   for c in cases:
     if c.t is not None:
-      cs = max(cs, elem_size(c.t, opts))
+      cs = max(cs, elem_size(c.t, ptr_type))
   s += cs
-  return align_to(s, alignment_variant(cases, opts))
+  return align_to(s, alignment_variant(cases, ptr_type))
 
 def elem_size_flags(labels):
   n = len(labels)
@@ -1201,8 +1214,8 @@ def elem_size_flags(labels):
 ### Loading
 
 def load(cx, ptr, t):
-  assert(ptr == align_to(ptr, alignment(t, cx.opts)))
-  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory))
+  assert(ptr == align_to(ptr, alignment(t, cx.opts.memory.ptr_type())))
+  assert(ptr + elem_size(t, cx.opts.memory.ptr_type()) <= len(cx.opts.memory))
   match despecialize(t):
     case BoolType()         : return convert_int_to_bool(load_int(cx, ptr, 1))
     case U8Type()           : return load_int(cx, ptr, 1)
@@ -1277,6 +1290,16 @@ def load_string(cx, ptr) -> String:
 
 UTF16_TAG = 1 << 31
 
+def worst_case_string_byte_length(string : String):
+  (s, encoding, tagged_code_units) = string
+  if encoding == 'utf16' or (encoding == 'latin1+utf16' and (tagged_code_units & UTF16_TAG)):
+    for code_point in s:
+      if ord(code_point) >= 2 ** 7:
+        return 3 * (tagged_code_units & ~UTF16_TAG)
+  return 2 * (tagged_code_units & ~UTF16_TAG)
+
+MAX_STRING_BYTE_LENGTH = (1 << 31) - 1
+
 def load_string_from_range(cx, ptr, tagged_code_units) -> String:
   match cx.opts.string_encoding:
     case 'utf8':
@@ -1303,13 +1326,18 @@ def load_string_from_range(cx, ptr, tagged_code_units) -> String:
   except UnicodeError:
     trap()
 
-  return (s, cx.opts.string_encoding, tagged_code_units)
+  string = (s, cx.opts.string_encoding, tagged_code_units)
+  trap_if(worst_case_string_byte_length(string) > MAX_STRING_BYTE_LENGTH)
+
+  return string
 
 def lift_error_context(cx, i):
   errctx = cx.inst.handles.get(i)
   trap_if(not isinstance(errctx, ErrorContext))
   return errctx
 
+MAX_LIST_BYTE_LENGTH = (1 << 32) - 1
+
 def load_list(cx, ptr, elem_type, maybe_length):
   if maybe_length is not None:
     return load_list_from_valid_range(cx, ptr, maybe_length, elem_type)
@@ -1318,31 +1346,32 @@ def load_list(cx, ptr, elem_type, maybe_length):
   return load_list_from_range(cx, begin, length, elem_type)
 
 def load_list_from_range(cx, ptr, length, elem_type):
-  trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts)))
-  trap_if(ptr + length * elem_size(elem_type, cx.opts) > len(cx.opts.memory))
+  trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts.memory.ptr_type())))
+  trap_if(ptr + length * elem_size(elem_type, cx.opts.memory.ptr_type()) > len(cx.opts.memory))
   return load_list_from_valid_range(cx, ptr, length, elem_type)
 
 def load_list_from_valid_range(cx, ptr, length, elem_type):
+  trap_if(length * worst_case_elem_size(elem_type, cx.opts.memory.ptr_type()) > MAX_LIST_BYTE_LENGTH)
   a = []
   for i in range(length):
-    a.append(load(cx, ptr + i * elem_size(elem_type, cx.opts), elem_type))
+    a.append(load(cx, ptr + i * elem_size(elem_type, cx.opts.memory.ptr_type()), elem_type))
   return a
 
 def load_record(cx, ptr, fields):
   record = {}
   for field in fields:
-    ptr = align_to(ptr, alignment(field.t, cx.opts))
+    ptr = align_to(ptr, alignment(field.t, cx.opts.memory.ptr_type()))
     record[field.label] = load(cx, ptr, field.t)
-    ptr += elem_size(field.t, cx.opts)
+    ptr += elem_size(field.t, cx.opts.memory.ptr_type())
   return record
 
 def load_variant(cx, ptr, cases):
-  disc_size = elem_size(discriminant_type(cases), cx.opts)
+  disc_size = elem_size(discriminant_type(cases), cx.opts.memory.ptr_type())
   case_index = load_int(cx, ptr, disc_size)
   ptr += disc_size
   trap_if(case_index >= len(cases))
   c = cases[case_index]
-  ptr = align_to(ptr, max_case_alignment(cases, cx.opts))
+  ptr = align_to(ptr, max_case_alignment(cases, cx.opts.memory.ptr_type()))
   if c.t is None:
     return { c.label: None }
   return { c.label: load(cx, ptr, c.t) }
@@ -1391,8 +1420,8 @@ def lift_async_value(ReadableEndT, cx, i, t):
 ### Storing
 
 def store(cx, v, t, ptr):
-  assert(ptr == align_to(ptr, alignment(t, cx.opts)))
-  assert(ptr + elem_size(t, cx.opts) <= len(cx.opts.memory))
+  assert(ptr == align_to(ptr, alignment(t, cx.opts.memory.ptr_type())))
+  assert(ptr + elem_size(t, cx.opts.memory.ptr_type()) <= len(cx.opts.memory))
   match despecialize(t):
     case BoolType()         : store_int(cx, int(bool(v)), ptr, 1)
     case U8Type()           : store_int(cx, v, ptr, 1)
@@ -1501,11 +1530,9 @@ def store_string_into_range(cx, v: String):
             case 'latin1'   : return store_string_copy(cx, src, src_code_units, 1, 2, 'latin-1')
             case 'utf16'    : return store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units)
 
-MAX_STRING_BYTE_LENGTH = (1 << 31) - 1
-
 def store_string_copy(cx, src, src_code_units, dst_code_unit_size, dst_alignment, dst_encoding):
   dst_byte_length = dst_code_unit_size * src_code_units
-  trap_if(dst_byte_length > MAX_STRING_BYTE_LENGTH)
+  assert(dst_byte_length <= MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, dst_alignment, dst_byte_length)
   trap_if(ptr != align_to(ptr, dst_alignment))
   trap_if(ptr + dst_byte_length > len(cx.opts.memory))
@@ -1530,7 +1557,7 @@ def store_string_to_utf8(cx, src, src_code_units, worst_case_size):
     if ord(code_point) < 2**7:
       cx.opts.memory[ptr + i] = ord(code_point)
     else:
-      trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
+      assert(worst_case_size <= MAX_STRING_BYTE_LENGTH)
       ptr = cx.opts.realloc(ptr, src_code_units, 1, worst_case_size)
       trap_if(ptr + worst_case_size > len(cx.opts.memory))
       encoded = src.encode('utf-8')
@@ -1543,7 +1570,7 @@ def store_string_to_utf8(cx, src, src_code_units, worst_case_size):
 
 def store_utf8_to_utf16(cx, src, src_code_units):
   worst_case_size = 2 * src_code_units
-  trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
+  assert(worst_case_size <= MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 2, worst_case_size)
   trap_if(ptr != align_to(ptr, 2))
   trap_if(ptr + worst_case_size > len(cx.opts.memory))
@@ -1568,7 +1595,7 @@ def store_string_to_latin1_or_utf16(cx, src, src_code_units):
       dst_byte_length += 1
     else:
       worst_case_size = 2 * src_code_units
-      trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
+      assert(worst_case_size <= MAX_STRING_BYTE_LENGTH)
       ptr = cx.opts.realloc(ptr, src_code_units, 2, worst_case_size)
       trap_if(ptr != align_to(ptr, 2))
       trap_if(ptr + worst_case_size > len(cx.opts.memory))
@@ -1591,7 +1618,7 @@ def store_string_to_latin1_or_utf16(cx, src, src_code_units):
 
 def store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units):
   src_byte_length = 2 * src_code_units
-  trap_if(src_byte_length > MAX_STRING_BYTE_LENGTH)
+  assert(src_byte_length <= MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 2, src_byte_length)
   trap_if(ptr != align_to(ptr, 2))
   trap_if(ptr + src_byte_length > len(cx.opts.memory))
@@ -1620,30 +1647,30 @@ def store_list(cx, v, ptr, elem_type, maybe_length):
   store_int(cx, length, ptr + cx.opts.memory.ptr_size(), cx.opts.memory.ptr_size())
 
 def store_list_into_range(cx, v, elem_type):
-  byte_length = len(v) * elem_size(elem_type, cx.opts)
-  trap_if(byte_length >= (1 << 32))
-  ptr = cx.opts.realloc(0, 0, alignment(elem_type, cx.opts), byte_length)
-  trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts)))
+  byte_length = len(v) * elem_size(elem_type, cx.opts.memory.ptr_type())
+  assert(byte_length <= MAX_LIST_BYTE_LENGTH)
+  ptr = cx.opts.realloc(0, 0, alignment(elem_type, cx.opts.memory.ptr_type()), byte_length)
+  trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts.memory.ptr_type())))
   trap_if(ptr + byte_length > len(cx.opts.memory))
   store_list_into_valid_range(cx, v, ptr, elem_type)
   return (ptr, len(v))
 
 def store_list_into_valid_range(cx, v, ptr, elem_type):
   for i,e in enumerate(v):
-    store(cx, e, elem_type, ptr + i * elem_size(elem_type, cx.opts))
+    store(cx, e, elem_type, ptr + i * elem_size(elem_type, cx.opts.memory.ptr_type()))
 
 def store_record(cx, v, ptr, fields):
   for f in fields:
-    ptr = align_to(ptr, alignment(f.t, cx.opts))
+    ptr = align_to(ptr, alignment(f.t, cx.opts.memory.ptr_type()))
     store(cx, v[f.label], f.t, ptr)
-    ptr += elem_size(f.t, cx.opts)
+    ptr += elem_size(f.t, cx.opts.memory.ptr_type())
 
 def store_variant(cx, v, ptr, cases):
   case_index, case_value = match_case(v, cases)
-  disc_size = elem_size(discriminant_type(cases), cx.opts)
+  disc_size = elem_size(discriminant_type(cases), cx.opts.memory.ptr_type())
   store_int(cx, case_index, ptr, disc_size)
   ptr += disc_size
-  ptr = align_to(ptr, max_case_alignment(cases, cx.opts))
+  ptr = align_to(ptr, max_case_alignment(cases, cx.opts.memory.ptr_type()))
   c = cases[case_index]
   if c.t is not None:
     store(cx, case_value, c.t, ptr)
@@ -1973,8 +2000,8 @@ def lift_flat_values(cx, max_flat, vi, ts):
   if len(flat_types) > max_flat:
     ptr = vi.next(cx.opts.memory.ptr_type())
     tuple_type = TupleType(ts)
-    trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts)))
-    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory))
+    trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts.memory.ptr_type())))
+    trap_if(ptr + elem_size(tuple_type, cx.opts.memory.ptr_type()) > len(cx.opts.memory))
     return list(load(cx, ptr, tuple_type).values())
   else:
     return [ lift_flat(cx, vi, t) for t in ts ]
@@ -1986,13 +2013,13 @@ def lower_flat_values(cx, max_flat, vs, ts, out_param = None):
     tuple_type = TupleType(ts)
     tuple_value = {str(i): v for i,v in enumerate(vs)}
     if out_param is None:
-      ptr = cx.opts.realloc(0, 0, alignment(tuple_type, cx.opts), elem_size(tuple_type, cx.opts))
+      ptr = cx.opts.realloc(0, 0, alignment(tuple_type, cx.opts.memory.ptr_type()), elem_size(tuple_type, cx.opts.memory.ptr_type()))
       flat_vals = [ptr]
     else:
       ptr = out_param.next(cx.opts.memory.ptr_type())
       flat_vals = []
-    trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts)))
-    trap_if(ptr + elem_size(tuple_type, cx.opts) > len(cx.opts.memory))
+    trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts.memory.ptr_type())))
+    trap_if(ptr + elem_size(tuple_type, cx.opts.memory.ptr_type()) > len(cx.opts.memory))
     store(cx, tuple_value, tuple_type, ptr)
   else:
     flat_vals = []
diff --git a/design/mvp/canonical-abi/run_tests.py b/design/mvp/canonical-abi/run_tests.py
index 4432553f..5f5d067f 100644
--- a/design/mvp/canonical-abi/run_tests.py
+++ b/design/mvp/canonical-abi/run_tests.py
@@ -389,6 +389,7 @@ def test_heap(t, expect, args, byte_array, addr_type='i32'):
 test_heap(t, v, [0,2],
           [0xff,0xff,0xff,0xff, 0,0,0,0])
 
+
 def test_flatten(t, params, results, addr_type='i32'):
   opts = mk_opts(MemInst(bytearray(), addr_type))
   expect = CoreFuncType(params, results)
@@ -465,6 +466,107 @@ def on_resolve(result):
       test_roundtrip(t, v, addr_type=addr_type)
 
 
+def assert_trap_on_load_string(src_encoding, s, tagged_code_units, encoded):
+  ptr_offset = 8
+  memory = bytearray(ptr_offset + len(encoded))
+  memory[0:4] = int.to_bytes(ptr_offset, 4, 'little')
+  memory[4:8] = int.to_bytes(tagged_code_units, 4, 'little')
+  memory[ptr_offset:] = encoded
+  cx = mk_cx(MemInst(memory, 'i32'), src_encoding)
+  try:
+    load(cx, 0, StringType())
+    fail("expected trap loading {!r} as {}".format(s, src_encoding))
+  except Trap:
+    pass
+
+def test_string_byte_length_limit():
+  saved = definitions.MAX_STRING_BYTE_LENGTH
+  try:
+    definitions.MAX_STRING_BYTE_LENGTH = 20
+
+    # Loading from UTF-8: 10 bytes will succeed, 11 bytes will trap on load
+    for dst in encodings:
+      test_string('utf8', dst, 'helloworld')
+    assert_trap_on_load_string('utf8', 'hello world', 11, b'hello world')
+
+    # Loading from UTF-16 all ASCII: 10 code units will succeed, 11 will trap on
+    # load
+    for dst in encodings:
+      test_string('utf16', dst, 'abcdefghij')
+    assert_trap_on_load_string('utf16', 'abcdefghijk', 11,
+                        'abcdefghijk'.encode('utf-16-le'))
+
+    # UTF-16 non-ASCII: 6 code units will succeed, 7 will trap on load
+    for dst in encodings:
+      test_string('utf16', dst, 'ab\u0100def')
+    assert_trap_on_load_string('utf16', '\u0100abcdef', 7,
+                        '\u0100abcdef'.encode('utf-16-le'))
+
+    # Latin1+utf16 (latin1): 10 bytes will succeed, 11 will trap on load
+    for dst in encodings:
+      test_string('latin1+utf16', dst, 'helloworld')
+    assert_trap_on_load_string('latin1+utf16', 'hello world', 11,
+                        b'hello world')
+
+    # Latin1+utf16 (utf16 variant, non-ASCII): 6 code units will succeed, 7
+    # will trap on load
+    for dst in encodings:
+      test_string('latin1+utf16', dst, '\u0100abcde')
+    assert_trap_on_load_string('latin1+utf16', '\u0100abcdef', 7 | UTF16_TAG,
+                        '\u0100abcdef'.encode('utf-16-le'))
+
+  finally:
+    definitions.MAX_STRING_BYTE_LENGTH = saved
+
+def test_list_byte_length_limit():
+  saved = definitions.MAX_LIST_BYTE_LENGTH
+  try:
+    definitions.MAX_LIST_BYTE_LENGTH = 20
+
+    # This list has the same size under all pointer types
+    for addr_type in ['i32', 'i64']:
+      # five U32's fit in 20 bytes
+      test_heap(ListType(U32Type()), [1,2,3,4,5], [0, 5],
+                [1,0,0,0, 2,0,0,0, 3,0,0,0, 4,0,0,0, 5,0,0,0], addr_type)
+      # six U32's exceed the limit
+      test_heap(ListType(U32Type()), None, [0, 6],
+                [1,0,0,0, 2,0,0,0, 3,0,0,0, 4,0,0,0, 5,0,0,0, 6,0,0,0], addr_type)
+
+    # A list of strings has 8 bytes per entry in i32 and 16 bytes per entry in
+    # i64 So a list of length 1 can be loaded, but a list of length 2 hits the
+    # limit.
+    test_heap(ListType(StringType()), [mk_str("hi")], [0, 1],
+              [8,0,0,0, 2,0,0,0, ord('h'), ord('i')], 'i32')
+    test_heap(ListType(StringType()), [mk_str("hi")], [0, 1],
+              [16,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, ord('h'), ord('i')], 'i64')
+
+    test_heap(ListType(StringType()), None, [0, 2],
+              [16,0,0,0, 2,0,0,0, 18,0,0,0, 2,0,0,0,
+                ord('h'),ord('i'),ord('a'),ord('b')], 'i32')
+    test_heap(ListType(StringType()), None, [0, 2],
+              [32,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0,
+                34,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0,
+                ord('h'),ord('i'),ord('a'),ord('b')], 'i64')
+
+	  # Similarly a list of lists of U8's has 8 bytes per entry in i32 and 16
+	  # bytes per entry in i64 So a list of length 1 can be loaded, but a list
+	  # of length 2 hits the limit.
+    test_heap(ListType(ListType(U8Type())), [[3,4,5]], [0, 1],
+              [8,0,0,0, 3,0,0,0, 3, 4, 5], 'i32')
+    test_heap(ListType(ListType(U8Type())), [[3,4,5]], [0, 1],
+              [16,0,0,0,0,0,0,0, 3,0,0,0,0,0,0,0, 3, 4, 5], 'i64')
+    test_heap(ListType(ListType(U8Type())), None, [0, 2],
+              [16,0,0,0, 2,0,0,0, 18,0,0,0, 3,0,0,0,
+               1,2,3,4,5], 'i32')
+    test_heap(ListType(ListType(U8Type())), None, [0, 2],
+              [32,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0,
+                34,0,0,0,0,0,0,0, 3,0,0,0,0,0,0,0,
+                1,2,3,4,5], 'i64')
+
+  finally:
+    definitions.MAX_LIST_BYTE_LENGTH = saved
+
+
 def test_handles():
   before = definitions.MAX_FLAT_RESULTS
   definitions.MAX_FLAT_RESULTS = 16
@@ -2829,6 +2931,8 @@ def mk_task(supertask, inst):
 
 
 test_roundtrips()
+test_string_byte_length_limit()
+test_list_byte_length_limit()
 test_handles()
 test_async_to_async()
 test_async_callback()