diff --git a/design/mvp/CanonicalABI.md b/design/mvp/CanonicalABI.md
index 32fb81fd..93c068d1 100644
--- a/design/mvp/CanonicalABI.md
+++ b/design/mvp/CanonicalABI.md
@@ -239,13 +239,51 @@ that can be set on various Canonical ABI definitions. The default values of
 the Python fields are the default values when the associated `canonopt` is
 not present in the binary or text format definition.
 
+The `MemInst` class represents a core WebAssembly [`memory` instance], with
+`bytes` corresponding to the memory's bytes and `addrtype` coming from the
+[`memory type`].
+```python
+def ptr_size(ptr_type):
+  match ptr_type:
+    case 'i32':
+      return 4
+    case 'i64':
+      return 8
+
+@dataclass
+class MemInst:
+  bytes: bytearray
+  addrtype: Literal['i32', 'i64']
+
+  def __getitem__(self, i):
+    return self.bytes[i]
+
+  def __setitem__(self, i, v):
+    self.bytes[i] = v
+
+  def __len__(self):
+    return len(self.bytes)
+
+  def ptr_type(self):
+    return self.addrtype
+
+  def ptr_size(self):
+    return ptr_size(self.ptr_type())
+
+  def equal(lhs, rhs):
+    return lhs.bytes == rhs.bytes and \
+           lhs.addrtype == rhs.addrtype
+```
+The `ptr_type` and `ptr_size` methods return the core value type and byte
+size of memory pointers.
+
 The `LiftOptions` class contains the subset of [`canonopt`] which are needed
 when lifting individual parameters and results:
 ```python
 @dataclass
 class LiftOptions:
   string_encoding: str = 'utf8'
-  memory: Optional[bytearray] = None
+  memory: Optional[MemInst] = None
 
   def equal(lhs, rhs):
     return lhs.string_encoding == rhs.string_encoding and \
@@ -1295,7 +1333,7 @@ been allowed to resolve and explicitly relinquish any borrowed handles.
 
 A "buffer" is an abstract region of memory that can either be read-from or
 written-to. This region of memory can either be owned by the host or by wasm.
-Currently wasm memory is always 32-bit linear memory, but soon 64-bit and GC
+Currently wasm memory is always 32-bit or 64-bit linear memory, but soon GC
 memory will be added. Thus, buffers provide an abstraction over at least 4
 different "kinds" of memory.
 
@@ -1312,8 +1350,9 @@ that returns how many `t` values may still be read or written. Buffers mostly
 hide their original/complete size. However, zero-length buffers need to be
 treated specially (particularly when a zero-length read rendezvous with a
 zero-length write), so there is a special query for detecting whether a buffer
-is zero-length. Based on this, buffers are represented by the following 3
-abstract Python classes:
+is zero-length. Internally, buffers do have a maximum length of `2^28 - 1` which
+is independent of the type of memory backing the buffer. Based on this, buffers
+are represented by the following 3 abstract Python classes:
 ```python
 class Buffer:
   MAX_LENGTH = 2**28 - 1
@@ -1355,8 +1394,8 @@ class BufferGuestImpl(Buffer):
   def __init__(self, t, cx, ptr, length):
     trap_if(length > Buffer.MAX_LENGTH)
     if t and length > 0:
-      trap_if(ptr != align_to(ptr, alignment(t)))
-      trap_if(ptr + length * elem_size(t) > len(cx.opts.memory))
+      trap_if(ptr != align_to(ptr, alignment(t, cx.opts.memory.ptr_type())))
+      trap_if(ptr + length * elem_size(t, cx.opts.memory.ptr_type()) > len(cx.opts.memory))
     self.cx = cx
     self.t = t
     self.ptr = ptr
@@ -1374,7 +1413,7 @@ class ReadableBufferGuestImpl(BufferGuestImpl):
     assert(n <= self.remain())
     if self.t:
       vs = load_list_from_valid_range(self.cx, self.ptr, n, self.t)
-      self.ptr += n * elem_size(self.t)
+      self.ptr += n * elem_size(self.t, self.cx.opts.memory.ptr_type())
     else:
       vs = n * [()]
     self.progress += n
@@ -1385,7 +1424,7 @@ class WritableBufferGuestImpl(BufferGuestImpl, WritableBuffer):
     assert(len(vs) <= self.remain())
     if self.t:
       store_list_into_valid_range(self.cx, vs, self.ptr, self.t)
-      self.ptr += len(vs) * elem_size(self.t)
+      self.ptr += len(vs) * elem_size(self.t, self.cx.opts.memory.ptr_type())
     else:
       assert(all(v == () for v in vs))
     self.progress += len(vs)
@@ -1860,7 +1899,7 @@ Each value type is assigned an [alignment] which is used by subsequent
 Canonical ABI definitions. Presenting the definition of `alignment` piecewise,
 we start with the top-level case analysis:
 ```python
-def alignment(t):
+def alignment(t, ptr_type):
   match despecialize(t):
     case BoolType()                  : return 1
     case S8Type() | U8Type()         : return 1
@@ -1870,11 +1909,11 @@ def alignment(t):
     case F32Type()                   : return 4
     case F64Type()                   : return 8
     case CharType()                  : return 4
-    case StringType()                : return 4
+    case StringType()                : return ptr_size(ptr_type)
     case ErrorContextType()          : return 4
-    case ListType(t, l)              : return alignment_list(t, l)
-    case RecordType(fields)          : return alignment_record(fields)
-    case VariantType(cases)          : return alignment_variant(cases)
+    case ListType(t, l)              : return alignment_list(t, l, ptr_type)
+    case RecordType(fields)          : return alignment_record(fields, ptr_type)
+    case VariantType(cases)          : return alignment_variant(cases, ptr_type)
     case FlagsType(labels)           : return alignment_flags(labels)
     case OwnType() | BorrowType()    : return 4
     case StreamType() | FutureType() : return 4
@@ -1883,18 +1922,18 @@ def alignment(t):
 List alignment is the same as tuple alignment when the length is fixed and
 otherwise uses the alignment of pointers.
 ```python
-def alignment_list(elem_type, maybe_length):
+def alignment_list(elem_type, maybe_length, ptr_type):
   if maybe_length is not None:
-    return alignment(elem_type)
-  return 4
+    return alignment(elem_type, ptr_type)
+  return ptr_size(ptr_type)
 ```
 
 Record alignment is tuple alignment, with the definitions split for reuse below:
 ```python
-def alignment_record(fields):
+def alignment_record(fields, ptr_type):
   a = 1
   for f in fields:
-    a = max(a, alignment(f.t))
+    a = max(a, alignment(f.t, ptr_type))
   return a
 ```
 
@@ -1904,8 +1943,8 @@ covering the number of cases in the variant (with cases numbered in order from
 compact representations of variants in memory. This smallest integer type is
 selected by the following function, used above and below:
 ```python
-def alignment_variant(cases):
-  return max(alignment(discriminant_type(cases)), max_case_alignment(cases))
+def alignment_variant(cases, ptr_type):
+  return max(alignment(discriminant_type(cases), ptr_type), max_case_alignment(cases, ptr_type))
 
 def discriminant_type(cases):
   n = len(cases)
@@ -1916,11 +1955,11 @@ def discriminant_type(cases):
     case 2: return U16Type()
     case 3: return U32Type()
 
-def max_case_alignment(cases):
+def max_case_alignment(cases, ptr_type):
   a = 1
   for c in cases:
     if c.t is not None:
-      a = max(a, alignment(c.t))
+      a = max(a, alignment(c.t, ptr_type))
   return a
 ```
 
@@ -1946,7 +1985,7 @@ maps well to languages which represent `list`s as random-access arrays. Empty
 types, such as records with no fields, are not permitted, to avoid
 complications in source languages.
 ```python
-def elem_size(t):
+def elem_size(t, ptr_type):
   match despecialize(t):
     case BoolType()                  : return 1
     case S8Type() | U8Type()         : return 1
@@ -1956,40 +1995,48 @@ def elem_size(t):
     case F32Type()                   : return 4
     case F64Type()                   : return 8
     case CharType()                  : return 4
-    case StringType()                : return 8
+    case StringType()                : return 2 * ptr_size(ptr_type)
     case ErrorContextType()          : return 4
-    case ListType(t, l)              : return elem_size_list(t, l)
-    case RecordType(fields)          : return elem_size_record(fields)
-    case VariantType(cases)          : return elem_size_variant(cases)
+    case ListType(t, l)              : return elem_size_list(t, l, ptr_type)
+    case RecordType(fields)          : return elem_size_record(fields, ptr_type)
+    case VariantType(cases)          : return elem_size_variant(cases, ptr_type)
     case FlagsType(labels)           : return elem_size_flags(labels)
     case OwnType() | BorrowType()    : return 4
     case StreamType() | FutureType() : return 4
 
-def elem_size_list(elem_type, maybe_length):
+def worst_case_elem_size(t, ptr_type):
+  if ptr_type is None:
+    return elem_size(t, ptr_type)
+  result = elem_size(t, ptr_type)
+  other_ptr_type = 'i32' if ptr_type == 'i64' else 'i64'
+  result = max(result, elem_size(t, other_ptr_type))
+  return result
+
+def elem_size_list(elem_type, maybe_length, ptr_type):
   if maybe_length is not None:
-    return maybe_length * elem_size(elem_type)
-  return 8
+    return maybe_length * elem_size(elem_type, ptr_type)
+  return 2 * ptr_size(ptr_type)
 
-def elem_size_record(fields):
+def elem_size_record(fields, ptr_type):
   s = 0
   for f in fields:
-    s = align_to(s, alignment(f.t))
-    s += elem_size(f.t)
+    s = align_to(s, alignment(f.t, ptr_type))
+    s += elem_size(f.t, ptr_type)
   assert(s > 0)
-  return align_to(s, alignment_record(fields))
+  return align_to(s, alignment_record(fields, ptr_type))
 
 def align_to(ptr, alignment):
   return math.ceil(ptr / alignment) * alignment
 
-def elem_size_variant(cases):
-  s = elem_size(discriminant_type(cases))
-  s = align_to(s, max_case_alignment(cases))
+def elem_size_variant(cases, ptr_type):
+  s = elem_size(discriminant_type(cases), ptr_type)
+  s = align_to(s, max_case_alignment(cases, ptr_type))
   cs = 0
   for c in cases:
     if c.t is not None:
-      cs = max(cs, elem_size(c.t))
+      cs = max(cs, elem_size(c.t, ptr_type))
   s += cs
-  return align_to(s, alignment_variant(cases))
+  return align_to(s, alignment_variant(cases, ptr_type))
 
 def elem_size_flags(labels):
   n = len(labels)
@@ -2007,8 +2054,8 @@ as a Python value. Presenting the definition of `load` piecewise, we start with
 the top-level case analysis:
 ```python
 def load(cx, ptr, t):
-  assert(ptr == align_to(ptr, alignment(t)))
-  assert(ptr + elem_size(t) <= len(cx.opts.memory))
+  assert(ptr == align_to(ptr, alignment(t, cx.opts.memory.ptr_type())))
+  assert(ptr + elem_size(t, cx.opts.memory.ptr_type()) <= len(cx.opts.memory))
   match despecialize(t):
     case BoolType()         : return convert_int_to_bool(load_int(cx, ptr, 1))
     case U8Type()           : return load_int(cx, ptr, 1)
@@ -2098,25 +2145,50 @@ def convert_i32_to_char(cx, i):
   return chr(i)
 ```
 
-Strings are loaded from two `i32` values: a pointer (offset in linear memory)
-and a number of [code units]. There are three supported string encodings in
-[`canonopt`]: [UTF-8], [UTF-16] and `latin1+utf16`. This last options allows a
-*dynamic* choice between [Latin-1] and UTF-16, indicated by the high bit of the
-second `i32`. String values include their original encoding and length in
-tagged code units as a "hint" that enables `store_string` (defined below) to
-make better up-front allocation size choices in many cases. Thus, the value
-produced by `load_string` isn't simply a Python `str`, but a *tuple* containing
-a `str`, the original encoding and the number of source code units.
+Strings are loaded from two pointer-sized values: a pointer (offset in linear
+memory) and a number of [code units]. There are three supported string encodings
+in [`canonopt`]: [UTF-8], [UTF-16] and `latin1+utf16`. This last option allows a
+*dynamic* choice between [Latin-1] and UTF-16, indicated by the 32nd bit of the
+second pointer-sized value. The length of a string is limited so that the number
+of code units fits in 31 bits (leaving the 32nd bit free as the flag). This
+maximum length is enforced even on 64-bit memories to ensure they don't define
+interfaces which 32-bit components couldn't handle.  String values include their
+original encoding and length in tagged code units as a "hint" that enables
+`store_string` (defined below) to make better up-front allocation size choices
+in many cases. Thus, the value produced by `load_string` isn't simply a Python
+`str`, but a *tuple* containing a `str`, the original encoding and the number
+of source code units.
+
+The `MAX_STRING_BYTE_LENGTH` constant ensures that the high bit of a
+string's number of code units is never set, keeping it clear for `UTF16_TAG`.
+
+Since this byte length of a string depends on the encoding, we estimate the
+worst case length across all encodings when loading the string and trap if the
+maximum length might be exceeded. Generally the worst case length comes from
+encoding in UTF-16 where byte length could be twice the number of code units.
+But if the original encoding was UTF-16 the byte length may be up to 3 times the
+number of code units when encoding in UTF-8 if there are code points at 2^7 or
+higher.
 ```python
 String = tuple[str, str, int]
 
 def load_string(cx, ptr) -> String:
-  begin = load_int(cx, ptr, 4)
-  tagged_code_units = load_int(cx, ptr + 4, 4)
+  begin = load_int(cx, ptr, cx.opts.memory.ptr_size())
+  tagged_code_units = load_int(cx, ptr + cx.opts.memory.ptr_size(), cx.opts.memory.ptr_size())
   return load_string_from_range(cx, begin, tagged_code_units)
 
 UTF16_TAG = 1 << 31
 
+def worst_case_string_byte_length(string : String):
+  (s, encoding, tagged_code_units) = string
+  if encoding == 'utf16' or (encoding == 'latin1+utf16' and (tagged_code_units & UTF16_TAG)):
+    for code_point in s:
+      if ord(code_point) >= 2 ** 7:
+        return 3 * (tagged_code_units & ~UTF16_TAG)
+  return 2 * (tagged_code_units & ~UTF16_TAG)
+
+MAX_STRING_BYTE_LENGTH = (1 << 31) - 1
+
 def load_string_from_range(cx, ptr, tagged_code_units) -> String:
   match cx.opts.string_encoding:
     case 'utf8':
@@ -2143,7 +2215,10 @@ def load_string_from_range(cx, ptr, tagged_code_units) -> String:
   except UnicodeError:
     trap()
 
-  return (s, cx.opts.string_encoding, tagged_code_units)
+  string = (s, cx.opts.string_encoding, tagged_code_units)
+  trap_if(worst_case_string_byte_length(string) > MAX_STRING_BYTE_LENGTH)
+
+  return string
 ```
 
 Error context values are lifted directly from the current component instance's
@@ -2155,32 +2230,39 @@ def lift_error_context(cx, i):
   return errctx
 ```
 
-Lists and records are loaded by recursively loading their elements/fields:
+Lists and records are loaded by recursively loading their elements/fields. The
+byte length of a list is limited to fit in a 32-bit memory. When loading a list,
+we check the what it's worst case byte length doesn't exceed that limit under
+any pointer type and trap if the length could exceed the maximum limit. This
+ensures that interfaces can be used by both 32-bit and 64-bit components.
 ```python
+MAX_LIST_BYTE_LENGTH = (1 << 32) - 1
+
 def load_list(cx, ptr, elem_type, maybe_length):
   if maybe_length is not None:
     return load_list_from_valid_range(cx, ptr, maybe_length, elem_type)
-  begin = load_int(cx, ptr, 4)
-  length = load_int(cx, ptr + 4, 4)
+  begin = load_int(cx, ptr, cx.opts.memory.ptr_size())
+  length = load_int(cx, ptr + cx.opts.memory.ptr_size(), cx.opts.memory.ptr_size())
   return load_list_from_range(cx, begin, length, elem_type)
 
 def load_list_from_range(cx, ptr, length, elem_type):
-  trap_if(ptr != align_to(ptr, alignment(elem_type)))
-  trap_if(ptr + length * elem_size(elem_type) > len(cx.opts.memory))
+  trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts.memory.ptr_type())))
+  trap_if(ptr + length * elem_size(elem_type, cx.opts.memory.ptr_type()) > len(cx.opts.memory))
   return load_list_from_valid_range(cx, ptr, length, elem_type)
 
 def load_list_from_valid_range(cx, ptr, length, elem_type):
+  trap_if(length * worst_case_elem_size(elem_type, cx.opts.memory.ptr_type()) > MAX_LIST_BYTE_LENGTH)
   a = []
   for i in range(length):
-    a.append(load(cx, ptr + i * elem_size(elem_type), elem_type))
+    a.append(load(cx, ptr + i * elem_size(elem_type, cx.opts.memory.ptr_type()), elem_type))
   return a
 
 def load_record(cx, ptr, fields):
   record = {}
   for field in fields:
-    ptr = align_to(ptr, alignment(field.t))
+    ptr = align_to(ptr, alignment(field.t, cx.opts.memory.ptr_type()))
     record[field.label] = load(cx, ptr, field.t)
-    ptr += elem_size(field.t)
+    ptr += elem_size(field.t, cx.opts.memory.ptr_type())
   return record
 ```
 As a technical detail: the `align_to` in the loop in `load_record` is
@@ -2194,12 +2276,12 @@ implementation can build the appropriate index tables at compile-time so that
 variant-passing is always O(1) and not involving string operations.
 ```python
 def load_variant(cx, ptr, cases):
-  disc_size = elem_size(discriminant_type(cases))
+  disc_size = elem_size(discriminant_type(cases), cx.opts.memory.ptr_type())
   case_index = load_int(cx, ptr, disc_size)
   ptr += disc_size
   trap_if(case_index >= len(cases))
   c = cases[case_index]
-  ptr = align_to(ptr, max_case_alignment(cases))
+  ptr = align_to(ptr, max_case_alignment(cases, cx.opts.memory.ptr_type()))
   if c.t is None:
     return { c.label: None }
   return { c.label: load(cx, ptr, c.t) }
@@ -2291,8 +2373,8 @@ The `store` function defines how to write a value `v` of a given value type
 `store` piecewise, we start with the top-level case analysis:
 ```python
 def store(cx, v, t, ptr):
-  assert(ptr == align_to(ptr, alignment(t)))
-  assert(ptr + elem_size(t) <= len(cx.opts.memory))
+  assert(ptr == align_to(ptr, alignment(t, cx.opts.memory.ptr_type())))
+  assert(ptr + elem_size(t, cx.opts.memory.ptr_type()) <= len(cx.opts.memory))
   match despecialize(t):
     case BoolType()         : store_int(cx, int(bool(v)), ptr, 1)
     case U8Type()           : store_int(cx, v, ptr, 1)
@@ -2409,8 +2491,8 @@ combinations, subdividing the `latin1+utf16` encoding into either `latin1` or
 ```python
 def store_string(cx, v: String, ptr):
   begin, tagged_code_units = store_string_into_range(cx, v)
-  store_int(cx, begin, ptr, 4)
-  store_int(cx, tagged_code_units, ptr + 4, 4)
+  store_int(cx, begin, ptr, cx.opts.memory.ptr_size())
+  store_int(cx, tagged_code_units, ptr + cx.opts.memory.ptr_size(), cx.opts.memory.ptr_size())
 
 def store_string_into_range(cx, v: String):
   src, src_encoding, src_tagged_code_units = v
@@ -2451,11 +2533,9 @@ The simplest 4 cases above can compute the exact destination size and then copy
 with a simply loop (that possibly inflates Latin-1 to UTF-16 by injecting a 0
 byte after every Latin-1 byte).
 ```python
-MAX_STRING_BYTE_LENGTH = (1 << 31) - 1
-
 def store_string_copy(cx, src, src_code_units, dst_code_unit_size, dst_alignment, dst_encoding):
   dst_byte_length = dst_code_unit_size * src_code_units
-  trap_if(dst_byte_length > MAX_STRING_BYTE_LENGTH)
+  assert(dst_byte_length <= MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, dst_alignment, dst_byte_length)
   trap_if(ptr != align_to(ptr, dst_alignment))
   trap_if(ptr + dst_byte_length > len(cx.opts.memory))
@@ -2464,9 +2544,6 @@ def store_string_copy(cx, src, src_code_units, dst_code_unit_size, dst_alignment
   cx.opts.memory[ptr : ptr+len(encoded)] = encoded
   return (ptr, src_code_units)
 ```
-The choice of `MAX_STRING_BYTE_LENGTH` constant ensures that the high bit of a
-string's number of code units is never set, keeping it clear for `UTF16_TAG`.
-
 The 2 cases of transcoding into UTF-8 share an algorithm that starts by
 optimistically assuming that each code unit of the source string fits in a
 single UTF-8 byte and then, failing that, reallocates to a worst-case size,
@@ -2488,7 +2565,7 @@ def store_string_to_utf8(cx, src, src_code_units, worst_case_size):
     if ord(code_point) < 2**7:
       cx.opts.memory[ptr + i] = ord(code_point)
     else:
-      trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
+      assert(worst_case_size <= MAX_STRING_BYTE_LENGTH)
       ptr = cx.opts.realloc(ptr, src_code_units, 1, worst_case_size)
       trap_if(ptr + worst_case_size > len(cx.opts.memory))
       encoded = src.encode('utf-8')
@@ -2507,7 +2584,7 @@ if multiple UTF-8 bytes were collapsed into a single 2-byte UTF-16 code unit:
 ```python
 def store_utf8_to_utf16(cx, src, src_code_units):
   worst_case_size = 2 * src_code_units
-  trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
+  assert(worst_case_size <= MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 2, worst_case_size)
   trap_if(ptr != align_to(ptr, 2))
   trap_if(ptr + worst_case_size > len(cx.opts.memory))
@@ -2542,7 +2619,7 @@ def store_string_to_latin1_or_utf16(cx, src, src_code_units):
       dst_byte_length += 1
     else:
       worst_case_size = 2 * src_code_units
-      trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
+      assert(worst_case_size <= MAX_STRING_BYTE_LENGTH)
       ptr = cx.opts.realloc(ptr, src_code_units, 2, worst_case_size)
       trap_if(ptr != align_to(ptr, 2))
       trap_if(ptr + worst_case_size > len(cx.opts.memory))
@@ -2577,7 +2654,7 @@ inexpensively fused with the UTF-16 validate+copy loop.)
 ```python
 def store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units):
   src_byte_length = 2 * src_code_units
-  trap_if(src_byte_length > MAX_STRING_BYTE_LENGTH)
+  assert(src_byte_length <= MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 2, src_byte_length)
   trap_if(ptr != align_to(ptr, 2))
   trap_if(ptr + src_byte_length > len(cx.opts.memory))
@@ -2604,7 +2681,9 @@ def lower_error_context(cx, v):
 Lists and records are stored by recursively storing their elements and
 are symmetric to the loading functions. Unlike strings, lists can
 simply allocate based on the up-front knowledge of length and static
-element size.
+element size. Storing a list that exceeds the size of a 32-bit memory traps even
+when storing on 64-bit platform to avoid having interfaces that 32-bit
+components can't use.
 ```python
 def store_list(cx, v, ptr, elem_type, maybe_length):
   if maybe_length is not None:
@@ -2612,27 +2691,27 @@ def store_list(cx, v, ptr, elem_type, maybe_length):
     store_list_into_valid_range(cx, v, ptr, elem_type)
     return
   begin, length = store_list_into_range(cx, v, elem_type)
-  store_int(cx, begin, ptr, 4)
-  store_int(cx, length, ptr + 4, 4)
+  store_int(cx, begin, ptr, cx.opts.memory.ptr_size())
+  store_int(cx, length, ptr + cx.opts.memory.ptr_size(), cx.opts.memory.ptr_size())
 
 def store_list_into_range(cx, v, elem_type):
-  byte_length = len(v) * elem_size(elem_type)
-  trap_if(byte_length >= (1 << 32))
-  ptr = cx.opts.realloc(0, 0, alignment(elem_type), byte_length)
-  trap_if(ptr != align_to(ptr, alignment(elem_type)))
+  byte_length = len(v) * elem_size(elem_type, cx.opts.memory.ptr_type())
+  assert(byte_length <= MAX_LIST_BYTE_LENGTH)
+  ptr = cx.opts.realloc(0, 0, alignment(elem_type, cx.opts.memory.ptr_type()), byte_length)
+  trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts.memory.ptr_type())))
   trap_if(ptr + byte_length > len(cx.opts.memory))
   store_list_into_valid_range(cx, v, ptr, elem_type)
   return (ptr, len(v))
 
 def store_list_into_valid_range(cx, v, ptr, elem_type):
   for i,e in enumerate(v):
-    store(cx, e, elem_type, ptr + i * elem_size(elem_type))
+    store(cx, e, elem_type, ptr + i * elem_size(elem_type, cx.opts.memory.ptr_type()))
 
 def store_record(cx, v, ptr, fields):
   for f in fields:
-    ptr = align_to(ptr, alignment(f.t))
+    ptr = align_to(ptr, alignment(f.t, cx.opts.memory.ptr_type()))
     store(cx, v[f.label], f.t, ptr)
-    ptr += elem_size(f.t)
+    ptr += elem_size(f.t, cx.opts.memory.ptr_type())
 ```
 
 Variant values are represented as Python dictionaries containing exactly one
@@ -2645,10 +2724,10 @@ indices.
 ```python
 def store_variant(cx, v, ptr, cases):
   case_index, case_value = match_case(v, cases)
-  disc_size = elem_size(discriminant_type(cases))
+  disc_size = elem_size(discriminant_type(cases), cx.opts.memory.ptr_type())
   store_int(cx, case_index, ptr, disc_size)
   ptr += disc_size
-  ptr = align_to(ptr, max_case_alignment(cases))
+  ptr = align_to(ptr, max_case_alignment(cases, cx.opts.memory.ptr_type()))
   c = cases[case_index]
   if c.t is not None:
     store(cx, case_value, c.t, ptr)
@@ -2752,38 +2831,38 @@ MAX_FLAT_ASYNC_PARAMS = 4
 MAX_FLAT_RESULTS = 1
 
 def flatten_functype(opts, ft, context):
-  flat_params = flatten_types(ft.param_types())
-  flat_results = flatten_types(ft.result_type())
+  flat_params = flatten_types(ft.param_types(), opts)
+  flat_results = flatten_types(ft.result_type(), opts)
   if not opts.async_:
     if len(flat_params) > MAX_FLAT_PARAMS:
-      flat_params = ['i32']
+      flat_params = [opts.memory.ptr_type()]
     if len(flat_results) > MAX_FLAT_RESULTS:
       match context:
         case 'lift':
-          flat_results = ['i32']
+          flat_results = [opts.memory.ptr_type()]
         case 'lower':
-          flat_params += ['i32']
+          flat_params += [opts.memory.ptr_type()]
           flat_results = []
     return CoreFuncType(flat_params, flat_results)
   else:
     match context:
       case 'lift':
         if len(flat_params) > MAX_FLAT_PARAMS:
-          flat_params = ['i32']
+          flat_params = [opts.memory.ptr_type()]
         if opts.callback:
           flat_results = ['i32']
         else:
           flat_results = []
       case 'lower':
         if len(flat_params) > MAX_FLAT_ASYNC_PARAMS:
-          flat_params = ['i32']
+          flat_params = [opts.memory.ptr_type()]
         if len(flat_results) > 0:
-          flat_params += ['i32']
+          flat_params += [opts.memory.ptr_type()]
         flat_results = ['i32']
     return CoreFuncType(flat_params, flat_results)
 
-def flatten_types(ts):
-  return [ft for t in ts for ft in flatten_type(t)]
+def flatten_types(ts, opts):
+  return [ft for t in ts for ft in flatten_type(t, opts)]
 ```
 As shown here, the core signatures `async` functions use a lower limit on the
 maximum number of parameters (1) and results (0) passed as scalars before
@@ -2792,7 +2871,7 @@ falling back to passing through memory.
 Presenting the definition of `flatten_type` piecewise, we start with the
 top-level case analysis:
 ```python
-def flatten_type(t):
+def flatten_type(t, opts):
   match despecialize(t):
     case BoolType()                       : return ['i32']
     case U8Type() | U16Type() | U32Type() : return ['i32']
@@ -2801,11 +2880,11 @@ def flatten_type(t):
     case F32Type()                        : return ['f32']
     case F64Type()                        : return ['f64']
     case CharType()                       : return ['i32']
-    case StringType()                     : return ['i32', 'i32']
+    case StringType()                     : return [opts.memory.ptr_type(), opts.memory.ptr_type()]
     case ErrorContextType()               : return ['i32']
-    case ListType(t, l)                   : return flatten_list(t, l)
-    case RecordType(fields)               : return flatten_record(fields)
-    case VariantType(cases)               : return flatten_variant(cases)
+    case ListType(t, l)                   : return flatten_list(t, l, opts)
+    case RecordType(fields)               : return flatten_record(fields, opts)
+    case VariantType(cases)               : return flatten_variant(cases, opts)
     case FlagsType(labels)                : return ['i32']
     case OwnType() | BorrowType()         : return ['i32']
     case StreamType() | FutureType()      : return ['i32']
@@ -2814,18 +2893,18 @@ def flatten_type(t):
 List flattening of a fixed-length list uses the same flattening as a tuple
 (via `flatten_record` below).
 ```python
-def flatten_list(elem_type, maybe_length):
+def flatten_list(elem_type, maybe_length, opts):
   if maybe_length is not None:
-    return flatten_type(elem_type) * maybe_length
-  return ['i32', 'i32']
+    return flatten_type(elem_type, opts) * maybe_length
+  return [opts.memory.ptr_type(), opts.memory.ptr_type()]
 ```
 
 Record flattening simply flattens each field in sequence.
 ```python
-def flatten_record(fields):
+def flatten_record(fields, opts):
   flat = []
   for f in fields:
-    flat += flatten_type(f.t)
+    flat += flatten_type(f.t, opts)
   return flat
 ```
 
@@ -2838,16 +2917,16 @@ case, all flattened variants are passed with the same static set of core types,
 which may involve, e.g., reinterpreting an `f32` as an `i32` or zero-extending
 an `i32` into an `i64`.
 ```python
-def flatten_variant(cases):
+def flatten_variant(cases, opts):
   flat = []
   for c in cases:
     if c.t is not None:
-      for i,ft in enumerate(flatten_type(c.t)):
+      for i,ft in enumerate(flatten_type(c.t, opts)):
         if i < len(flat):
           flat[i] = join(flat[i], ft)
         else:
           flat.append(ft)
-  return flatten_type(discriminant_type(cases)) + flat
+  return flatten_type(discriminant_type(cases), opts) + flat
 
 def join(a, b):
   if a == b: return a
@@ -2938,13 +3017,13 @@ def lift_flat_signed(vi, core_width, t_width):
 
 The contents of strings and variable-length lists are stored in memory so
 lifting these types is essentially the same as loading them from memory; the
-only difference is that the pointer and length come from `i32` values instead
-of from linear memory. Fixed-length lists are lifted the same way as a
+only difference is that the pointer and length come from ptr-sized values
+instead of from linear memory. Fixed-length lists are lifted the same way as a
 tuple (via `lift_flat_record` below).
 ```python
 def lift_flat_string(cx, vi):
-  ptr = vi.next('i32')
-  packed_length = vi.next('i32')
+  ptr = vi.next(cx.opts.memory.ptr_type())
+  packed_length = vi.next(cx.opts.memory.ptr_type())
   return load_string_from_range(cx, ptr, packed_length)
 
 def lift_flat_list(cx, vi, elem_type, maybe_length):
@@ -2953,8 +3032,8 @@ def lift_flat_list(cx, vi, elem_type, maybe_length):
     for i in range(maybe_length):
       a.append(lift_flat(cx, vi, elem_type))
     return a
-  ptr = vi.next('i32')
-  length = vi.next('i32')
+  ptr = vi.next(cx.opts.memory.ptr_type())
+  length = vi.next(cx.opts.memory.ptr_type())
   return load_list_from_range(cx, ptr, length, elem_type)
 ```
 
@@ -2975,7 +3054,7 @@ reinterprets between the different types appropriately and also traps if the
 high bits of an `i64` are set for a 32-bit type:
 ```python
 def lift_flat_variant(cx, vi, cases):
-  flat_types = flatten_variant(cases)
+  flat_types = flatten_variant(cases, cx.opts)
   assert(flat_types.pop(0) == 'i32')
   case_index = vi.next('i32')
   trap_if(case_index >= len(cases))
@@ -3092,14 +3171,14 @@ manually coercing the otherwise-incompatible type pairings allowed by `join`:
 ```python
 def lower_flat_variant(cx, v, cases):
   case_index, case_value = match_case(v, cases)
-  flat_types = flatten_variant(cases)
+  flat_types = flatten_variant(cases, cx.opts)
   assert(flat_types.pop(0) == 'i32')
   c = cases[case_index]
   if c.t is None:
     payload = []
   else:
     payload = lower_flat(cx, case_value, c.t)
-    for i,(fv,have) in enumerate(zip(payload, flatten_type(c.t))):
+    for i,(fv,have) in enumerate(zip(payload, flatten_type(c.t, cx.opts))):
       want = flat_types.pop(0)
       match (have, want):
         case ('f32', 'i32') : payload[i] = encode_float_as_i32(fv)
@@ -3126,12 +3205,12 @@ parameters or results (given by the `CoreValueIter` `vi`) into a tuple
 of component-level values with types `ts`.
 ```python
 def lift_flat_values(cx, max_flat, vi, ts):
-  flat_types = flatten_types(ts)
+  flat_types = flatten_types(ts, cx.opts)
   if len(flat_types) > max_flat:
-    ptr = vi.next('i32')
+    ptr = vi.next(cx.opts.memory.ptr_type())
     tuple_type = TupleType(ts)
-    trap_if(ptr != align_to(ptr, alignment(tuple_type)))
-    trap_if(ptr + elem_size(tuple_type) > len(cx.opts.memory))
+    trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts.memory.ptr_type())))
+    trap_if(ptr + elem_size(tuple_type, cx.opts.memory.ptr_type()) > len(cx.opts.memory))
     return list(load(cx, ptr, tuple_type).values())
   else:
     return [ lift_flat(cx, vi, t) for t in ts ]
@@ -3146,18 +3225,18 @@ out-param:
 ```python
 def lower_flat_values(cx, max_flat, vs, ts, out_param = None):
   cx.inst.may_leave = False
-  flat_types = flatten_types(ts)
+  flat_types = flatten_types(ts, cx.opts)
   if len(flat_types) > max_flat:
     tuple_type = TupleType(ts)
     tuple_value = {str(i): v for i,v in enumerate(vs)}
     if out_param is None:
-      ptr = cx.opts.realloc(0, 0, alignment(tuple_type), elem_size(tuple_type))
+      ptr = cx.opts.realloc(0, 0, alignment(tuple_type, cx.opts.memory.ptr_type()), elem_size(tuple_type, cx.opts.memory.ptr_type()))
       flat_vals = [ptr]
     else:
-      ptr = out_param.next('i32')
+      ptr = out_param.next(cx.opts.memory.ptr_type())
       flat_vals = []
-    trap_if(ptr != align_to(ptr, alignment(tuple_type)))
-    trap_if(ptr + elem_size(tuple_type) > len(cx.opts.memory))
+    trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts.memory.ptr_type())))
+    trap_if(ptr + elem_size(tuple_type, cx.opts.memory.ptr_type()) > len(cx.opts.memory))
     store(cx, tuple_value, tuple_type, ptr)
   else:
     flat_vals = []
@@ -3187,9 +3266,11 @@ specifying `string-encoding=utf8` twice is an error. Each individual option, if
 present, is validated as such:
 
 * `string-encoding=N` - can be passed at most once, regardless of `N`.
-* `memory` - this is a subtype of `(memory 1)`
-* `realloc` - the function has type `(func (param i32 i32 i32 i32) (result i32))`
-* if `realloc` is present, then `memory` must be present
+* `memory` - this is a subtype of `(memory 1)` or `(memory i64 1)`.
+* `realloc` - the function has type `(func (param addr addr addr addr) (result addr))`
+  where `addr` is the address type (`i32` or `i64`) coming from the [`memory type`]
+  of the `memory` canonopt.
+* If `realloc` is present then `memory` must be present.
 * `post-return` - only allowed on [`canon lift`](#canon-lift), which has rules
   for validation
 * 🔀 `async` - cannot be present with `post-return`
@@ -3612,7 +3693,7 @@ For a canonical definition:
 validation specifies:
 * `$rt` must refer to locally-defined (not imported) resource type
 * `$f` is given type `(func (param $rt.rep) (result i32))`, where `$rt.rep` is
-  currently fixed to be `i32`.
+  `i32` or `i64`.
 
 Calling `$f` invokes the following function, which adds an owning handle
 containing the given resource representation to the current component
@@ -3691,7 +3772,7 @@ For a canonical definition:
 validation specifies:
 * `$rt` must refer to a locally-defined (not imported) resource type
 * `$f` is given type `(func (param i32) (result $rt.rep))`, where `$rt.rep` is
-  currently fixed to be `i32`.
+  `i32` or `i64`.
 
 Calling `$f` invokes the following function, which extracts the resource
 representation from the handle in the current component instance's `handles`
@@ -3714,17 +3795,22 @@ For a canonical definition:
 (canon context.get $t $i (core func $f))
 ```
 validation specifies:
-* `$t` must be `i32` (for now; see [here][thread-local storage])
+* `$t` must be `i32` or `i64` (see [here][thread-local storage]).
 * `$i` must be less than `Thread.CONTEXT_LENGTH` (`2`)
-* `$f` is given type `(func (result i32))`
+* `$f` is given type `(func (result $t))`
 
 Calling `$f` invokes the following function, which reads the [thread-local
-storage] of the [current thread]:
+storage] of the [current thread] (taking only the low 32-bits if `$t` is `i32`):
 ```python
 def canon_context_get(t, i, thread):
-  assert(t == 'i32')
+  MASK_32BIT = (1 << 32) - 1
+
+  assert(t == 'i32' or t == 'i64')
   assert(i < Thread.CONTEXT_LENGTH)
-  return [thread.context[i]]
+  result = thread.context[i]
+  if t == 'i32':
+    result &= MASK_32BIT
+  return [result]
 ```
 
 
@@ -3735,15 +3821,15 @@ For a canonical definition:
 (canon context.set $t $i (core func $f))
 ```
 validation specifies:
-* `$t` must be `i32` (for now; see [here][thread-local storage])
+* `$t` must be `i32` or `i64` (see [here][thread-local storage])
 * `$i` must be less than `Thread.CONTEXT_LENGTH` (`2`)
-* `$f` is given type `(func (param $v i32))`
+* `$f` is given type `(func (param $v $t))`
 
 Calling `$f` invokes the following function, which writes to the [thread-local
 storage] of the [current thread]:
 ```python
 def canon_context_set(t, i, thread, v):
-  assert(t == 'i32')
+  assert(t == 'i32' or t == 'i64')
   assert(i < Thread.CONTEXT_LENGTH)
   thread.context[i] = v
   return []
@@ -3907,7 +3993,8 @@ For a canonical definition:
 (canon waitable-set.wait $cancellable? (memory $mem) (core func $f))
 ```
 validation specifies:
-* `$f` is given type `(func (param $si) (param $ptr i32) (result i32))`
+* `$f` is given type `(func (param $si i32) (param $ptr) (result i32))` where
+  `$ptr` is the address type of `$mem`.
 
 Calling `$f` invokes the following function which waits for progress to be made
 on a `Waitable` in the given waitable set (indicated by index `$si`) and then
@@ -3950,7 +4037,8 @@ For a canonical definition:
 (canon waitable-set.poll $cancellable? (memory $mem) (core func $f))
 ```
 validation specifies:
-* `$f` is given type `(func (param $si i32) (param $ptr i32) (result i32))`
+* `$f` is given type `(func (param $si i32) (param $ptr) (result i32))` where
+  `$ptr` is the address type of `$mem`.
 
 Calling `$f` invokes the following function, which either returns an event that
 was pending on one of the waitables in the given waitable set (the same way as
@@ -4167,7 +4255,9 @@ For canonical definitions:
 ```
 In addition to [general validation of `$opts`](#canonopt-validation) validation
 specifies:
-* `$f` is given type `(func (param i32 i32 i32) (result i32))`
+* `$f` is given type `(func (param i32 T T) (result T))` where `T` is `i32` or
+  `i64` as determined by the address type of `memory` from `$opts` (or `i32` by
+  default if no `memory` is present).
 * `$stream_t` must be a type of the form `(stream $t?)`
 * If `$t` is present:
   * [`lower($t)` above](#canonopt-validation) defines required options for `stream.write`
@@ -4232,7 +4322,11 @@ context switches. Next, the stream's `state` is updated based on the result
 being delivered to core wasm so that, once a stream end has been notified that
 the other end dropped, calling anything other than `stream.drop-*` traps.
 Lastly, `stream_event` packs the `CopyResult` and number of elements copied up
-until this point into a single `i32` payload for core wasm.
+until this point into a single `i32` or `i64`-sized payload for core wasm. The
+size is determined by the `addrtype` coming from the [`memory type`] of the
+`memory` immediate. Note that even though the number of elements copied is
+packed into an `addrtype`, the maximum length of the buffer is fixed at `2^28 - 1`
+independently of the `addrtype`.
 ```python
   def stream_event(result, reclaim_buffer):
     reclaim_buffer()
@@ -4282,7 +4376,9 @@ For canonical definitions:
 ```
 In addition to [general validation of `$opts`](#canonopt-validation) validation
 specifies:
-* `$f` is given type `(func (param i32 i32) (result i32))`
+* `$f` is given type `(func (param i32 T) (result i32))` where `T` is `i32` or
+  `i64` as determined by the address type of `memory` from `$opts` (or `i32`
+  by default if no `memory` is present).
 * `$future_t` must be a type of the form `(future $t?)`
 * If `$t` is present:
   * [`lift($t)` above](#canonopt-validation) defines required options for `future.read`
@@ -4505,9 +4601,12 @@ For a canonical definition:
 (canon thread.new-indirect $ft $ftbl (core func $new_indirect))
 ```
 validation specifies
-* `$ft` must refer to the type `(func (param $c i32))`
+* `$ft` must refer to the type `(func (param $c))` where `$c` is `i32` or
+  `i64`.
 * `$ftbl` must refer to a table whose element type matches `funcref`
-* `$new_indirect` is given type `(func (param $fi i32) (param $c i32) (result i32))`
+* `$new_indirect` is given type `(func (param $fi) (param $c) (result i32))`
+  where `$fi` is `i32` or `i64` as determined by `$ftbl`'s table type and
+  `$c` has the same type as the parameter in `$ft`.
 
 Calling `$new_indirect` invokes the following function which reads a `funcref`
 from `$ftbl` (trapping if out-of-bounds, null or the wrong type), calls the
@@ -4522,7 +4621,7 @@ class CoreFuncRef:
 def canon_thread_new_indirect(ft, ftbl: Table[CoreFuncRef], thread, fi, c):
   trap_if(not thread.task.inst.may_leave)
   f = ftbl.get(fi)
-  assert(ft == CoreFuncType(['i32'], []))
+  assert(ft == CoreFuncType(['i32'], []) or ft == CoreFuncType(['i64'], []))
   trap_if(f.t != ft)
   def thread_func(thread):
     [] = call_and_trap_on_throw(f.callee, thread, [c])
@@ -4702,7 +4801,9 @@ For a canonical definition:
 (canon error-context.new $opts (core func $f))
 ```
 validation specifies:
-* `$f` is given type `(func (param i32 i32) (result i32))`
+* `$f` is given type `(func (param $ptr) (param $units) (result i32))` 
+  where `$ptr` and `$units` are both `i32` or `i64` as determined by
+  the address type of the `memory` field in `$opts`.
 * `async` is not present
 * `memory` must be present
 
@@ -4743,7 +4844,8 @@ For a canonical definition:
 (canon error-context.debug-message $opts (core func $f))
 ```
 validation specifies:
-* `$f` is given type `(func (param i32 i32))`
+* `$f` is given type `(func (param i32) (param $ptr))` where `$ptr` is `i32` or `i64`
+  as determined by the address type of `memory` from `$opts`
 * `async` is not present
 * `memory` must be present
 * `realloc` must be present
@@ -4762,8 +4864,9 @@ def canon_error_context_debug_message(opts, thread, i, ptr):
   store_string(cx, errctx.debug_message, ptr)
   return []
 ```
-Note that `ptr` points to an 8-byte region of memory into which will be stored
-the pointer and length of the debug string (allocated via `opts.realloc`).
+Note that `ptr` points to a region of memory (8 bytes for memory32, 16 bytes
+for memory64) into which will be stored the pointer and length of the debug
+string (allocated via `opts.realloc`).
 
 
 ### 📝 `canon error-context.drop`
@@ -4793,9 +4896,10 @@ For a canonical definition:
 (canon thread.spawn-ref shared? $ft (core func $spawn_ref))
 ```
 validation specifies:
-* `$ft` must refer to the type `(shared? (func (param $c i32)))` (see explanation below)
+* `$ft` must refer to the type `(shared? (func (param $c)))` where `$c` has
+  type `i32` or `i64`.
 * `$spawn_ref` is given type
-  `(shared? (func (param $f (ref null $ft)) (param $c i32) (result $e i32)))`
+  `(shared? (func (param $f (ref null $ft)) (param $c) (result $e i32)))`
 
 When the `shared` immediate is not present, the spawned thread is
 *cooperative*, only switching at specific program points. When the `shared`
@@ -4804,7 +4908,7 @@ parallel with all other threads.
 
 > Note: ideally, a thread could be spawned with [arbitrary thread parameters].
 > Currently, that would require additional work in the toolchain to support so,
-> for simplicity, the current proposal simply fixes a single `i32` parameter
+> for simplicity, the current proposal simply fixes a single `i32` or `i64` parameter
 > type. However, `thread.spawn-ref` could be extended to allow arbitrary thread
 > parameters in the future, once it's concretely beneficial to the toolchain.
 > The inclusion of `$ft` ensures backwards compatibility for when arbitrary
@@ -4834,12 +4938,13 @@ For a canonical definition:
 (canon thread.spawn-indirect shared? $ft $tbl (core func $spawn_indirect))
 ```
 validation specifies:
-* `$ft` must refer to the type `(shared? (func (param $c i32)))` is allowed
-  (see explanation in `thread.spawn-ref` above)
+* `$ft` must refer to the type `(shared? (func (param $c)))` 
+  where `$c` is either `i32` or `i64`.
 * `$tbl` must refer to a shared table whose element type matches
   `(ref null (shared? func))`
 * `$spawn_indirect` is given type
-  `(shared? (func (param $i i32) (param $c i32) (result $e i32)))`
+  `(shared? (func (param $i) (param $c) (result $e i32)))` where `$i` is
+  `i32` or `i64` determined by `$tbl`'s table type
 
 When the `shared` immediate is not present, the spawned thread is
 *cooperative*, only switching at specific program points. When the `shared`
@@ -4932,6 +5037,8 @@ def canon_thread_available_parallelism():
 [`memaddr`]: https://webassembly.github.io/spec/core/exec/runtime.html#syntax-memaddr
 [`memaddrs` table]: https://webassembly.github.io/spec/core/exec/runtime.html#syntax-moduleinst
 [`memidx`]: https://webassembly.github.io/spec/core/syntax/modules.html#syntax-memidx
+[`memory` instance]: https://webassembly.github.io/spec/core/exec/runtime.html#memory-instances
+[`memory type`]: https://webassembly.github.io/spec/core/syntax/types.html#memory-types
 
 [Alignment]: https://en.wikipedia.org/wiki/Data_structure_alignment
 [UTF-8]: https://en.wikipedia.org/wiki/UTF-8
diff --git a/design/mvp/Concurrency.md b/design/mvp/Concurrency.md
index 6c9c5f6f..d0eab4c4 100644
--- a/design/mvp/Concurrency.md
+++ b/design/mvp/Concurrency.md
@@ -151,7 +151,7 @@ use cases mentioned in the [goals](#goals).
 
 Until the Core WebAssembly [shared-everything-threads] proposal allows Core
 WebAssembly function types to be annotated with `shared`, `thread.new-indirect`
-can only call non-`shared` functions (via `i32` `(table funcref)` index, just
+can only call non-`shared` functions (via `(table funcref)` index, just
 like `call_indirect`) and thus currently all threads must execute
 [cooperatively] in a sequentially-interleaved fashion, switching between
 threads only at explicit program points just like (and implementable via) a
@@ -371,7 +371,7 @@ New threads are created with the [`thread.new-indirect`] built-in. As mentioned
 thread which is why threads and tasks are N:1. `thread.new-indirect` adds a new
 thread to the component instance's threads table and returns the `i32` index of
 this table entry to the Core WebAssembly caller. Like [`pthread_create`],
-`thread.new-indirect` takes a Core WebAssembly function (via `i32` index into a
+`thread.new-indirect` takes a Core WebAssembly function (via index into a
 `funcref` table) and a "closure" parameter to pass to the function when called
 on the new thread. However, unlike `pthread_create`, the new thread is
 initially in a "suspended" state and must be explicitly "resumed" using one of
@@ -414,7 +414,7 @@ current thread's thread-local storage can be read and written from core wasm
 code by calling the [`context.get`] and [`context.set`] built-ins.
 
 The thread-local storage array's length is currently fixed to contain exactly
-2 `i32`s with the goal of allowing this array to be stored inline in whatever
+2 `i64`s with the goal of allowing this array to be stored inline in whatever
 existing runtime data structure is already efficiently reachable from ambient
 compiled wasm code. Because module instantiation is declarative in the
 Component Model, the imported `context.{get,set}` built-ins can be inlined by
@@ -425,6 +425,13 @@ natural place to store:
 2. a pointer to a struct used by the runtime to implement the language's
    thread-local features
 
+Both of `context.{get,set}` take an immediate argument of `i32` or `i64` to
+indicate the return or argument type. `context.set i32` will zero the high
+bits of the stored value and `context.get i32` will only read the low bits of
+the stored value.  Generally it is expected that 32-bit components always use
+the `i32` immediate and 64-bit components always use the `i64` immediate, but
+mixing these calls is still valid.
+
 When threads are created explicitly by `thread.new-indirect`, the lifetime of
 the thread-local storage array ends when the function passed to
 `thread.new-indirect` returns and thus any linear-memory allocations associated
@@ -436,12 +443,6 @@ stackless async ABI is used, returning the "exit" code to the event loop. This
 non-reuse of thread-local storage between distinct export calls avoids what
 would otherwise be a likely source of TLS-related memory leaks.
 
-When [memory64] is integrated into the Component Model's Canonical ABI,
-`context.{get,set}` will be backwards-compatibly relaxed to allow `i64`
-pointers (overlaying the `i32` values like hardware 32/64-bit registers). When
-[wasm-gc] is integrated, these integral context values can serve as indices
-into guest-managed tables of typed GC references.
-
 Since the same mutable thread-local storage cells are shared by all core wasm
 running under the same thread in the same component, the cells' contents must
 be carefully coordinated in the same way as native code has to carefully
@@ -886,7 +887,7 @@ world w {
   import quux: async func(t: list<u32; 17>) -> string;
 }
 ```
-the default/synchronous lowered import function signatures are:
+the default/synchronous lowered import function signatures (assuming 32-bit memories) are:
 ```wat
 ;; sync
 (func $foo (param $s-ptr i32) (param $s-len i32) (result i32))
diff --git a/design/mvp/Explainer.md b/design/mvp/Explainer.md
index d8c0bdd1..80e81a0c 100644
--- a/design/mvp/Explainer.md
+++ b/design/mvp/Explainer.md
@@ -1299,16 +1299,19 @@ default is `utf8`. It is a validation error to include more than one
 
 The `(memory ...)` option specifies the memory that the Canonical ABI will
 use to load and store values. If the Canonical ABI needs to load or store,
-validation requires this option to be present (there is no default).
+validation requires this option to be present (there is no default). The types
+of lowered functions may also depend on the [`core:memory-type`] of this memory,
+specifically it's [`core:address-type`] (indicated by `memory.addrtype`), if pointers
+are transitively contained in parameters or results.
 
 The `(realloc ...)` option specifies a core function that is validated to
 have the following core function type:
 ```wat
-(func (param $originalPtr i32)
-      (param $originalSize i32)
-      (param $alignment i32)
-      (param $newSize i32)
-      (result i32))
+(func (param $originalPtr memory.addrtype)
+      (param $originalSize memory.addrtype)
+      (param $alignment memory.addrtype)
+      (param $newSize memory.addrtype)
+      (result memory.addrtype))
 ```
 The Canonical ABI will use `realloc` both to allocate (passing `0` for the
 first two parameters) and reallocate. If the Canonical ABI needs `realloc`,
@@ -1452,9 +1455,9 @@ canon ::= ...
         | (canon thread.new-indirect <typeidx> <core:tableidx> (core func <id>?)) 🧵
         | (canon thread.switch-to cancellable? (core func <id>?)) 🧵
         | (canon thread.suspend cancellable? (core func <id>?)) 🧵
-        | (canon thread.resume-later (core func <id>?) 🧵
-        | (canon thread.yield-to cancellable? (core func <id>?) 🧵
-        | (canon thread.yield cancellable? (core func <id>?) 🧵
+        | (canon thread.resume-later (core func <id>?)) 🧵
+        | (canon thread.yield-to cancellable? (core func <id>?)) 🧵
+        | (canon thread.yield cancellable? (core func <id>?)) 🧵
         | (canon error-context.new <canonopt>* (core func <id>?)) 📝
         | (canon error-context.debug-message <canonopt>* (core func <id>?)) 📝
         | (canon error-context.drop (core func <id>?)) 📝
@@ -1470,7 +1473,7 @@ canon ::= ...
 | Synopsis                   |                            |
 | -------------------------- | -------------------------- |
 | Approximate WIT signature  | `func<T>(rep: T.rep) -> T` |
-| Canonical ABI signature    | `[rep:i32] -> [i32]`       |
+| Canonical ABI signature    | `[rep: T.rep] -> [i32]`    |
 
 The `resource.new` built-in creates a new resource (of resource type `T`) with
 `rep` as its representation, and returns a new handle pointing to the new
@@ -1480,7 +1483,7 @@ component that defined `T`.
 In the Canonical ABI, `T.rep` is defined to be the `$rep` in the
 `(type $T (resource (rep $rep) ...))` type definition that defined `T`. While
 it's designed to allow different types in the future, it is currently
-hard-coded to always be `i32`.
+limited to `i32` or `i64`.
 
 For details, see [`canon_resource_new`] in the Canonical ABI explainer.
 
@@ -1503,7 +1506,7 @@ For details, see [`canon_resource_drop`] in the Canonical ABI explainer.
 | Synopsis                   |                          |
 | -------------------------- | ------------------------ |
 | Approximate WIT signature  | `func<T>(t: T) -> T.rep` |
-| Canonical ABI signature    | `[t:i32] -> [i32]`       |
+| Canonical ABI signature    | `[t:i32] -> [T.rep]`     |
 
 The `resource.rep` built-in returns the representation of the resource (with
 resource type `T`) pointed to by the handle `t`. Validation only allows
@@ -1512,7 +1515,7 @@ resource type `T`) pointed to by the handle `t`. Validation only allows
 In the Canonical ABI, `T.rep` is defined to be the `$rep` in the
 `(type $T (resource (rep $rep) ...))` type definition that defined `T`. While
 it's designed to allow different types in the future, it is currently
-hard-coded to always be `i32`.
+limited to `i32` or `i64`.
 
 As an example, the following component imports the `resource.new` built-in,
 allowing it to create and return new resources to its client:
@@ -1554,12 +1557,19 @@ See the [concurrency explainer] for background.
 | Synopsis                   |                    |
 | -------------------------- | ------------------ |
 | Approximate WIT signature  | `func<T,i>() -> T` |
-| Canonical ABI signature    | `[] -> [i32]`      |
+| Canonical ABI signature    | `[] -> [T]`        |
 
 The `context.get` built-in returns the `i`th element of the [current thread]'s
 [thread-local storage] array. Validation currently restricts `i` to be less
-than 2 and `t` to be `i32`, but these restrictions may be relaxed in the
-future.
+than 2 and `T` to be `i32` or `i64`, but these restrictions may be relaxed in
+the future.
+
+Mixing `i32` and `i64` results in truncating or unsigned extending the
+stored values:
+* If `context.get i32 i` is called after `context.set i64 i v`,
+  only the low 32-bits are read (returning `i32.wrap_i64 v`).
+* If `context.get i64 i` is called after `context.set i32 i v`,
+  only the upper 32-bits will be zeroed (returning `i64.extend_i32_u v`).
 
 For details, see [Thread-Local Storage] in the concurrency explainer and
 [`canon_context_get`] in the Canonical ABI explainer.
@@ -1569,12 +1579,19 @@ For details, see [Thread-Local Storage] in the concurrency explainer and
 | Synopsis                   |                   |
 | -------------------------- | ----------------- |
 | Approximate WIT signature  | `func<T,i>(v: T)` |
-| Canonical ABI signature    | `[i32] -> []`     |
+| Canonical ABI signature    | `[T] -> []`       |
 
 The `context.set` built-in sets the `i`th element of the [current thread]'s
 [thread-local storage] array to the value `v`. Validation currently restricts
-`i` to be less than 2 and `t` to be `i32`, but these restrictions may be
-relaxed in the future.
+`i` to be less than 2 and `T` to be `i32` or `i64`, but these restrictions may
+be relaxed in the future.
+
+Mixing `i32` and `i64` results in truncating or unsigned extending the
+stored values:
+* If `context.get i32 i` is called after `context.set i64 i v`,
+  only the low 32-bits are read (returning `i32.wrap_i64 v`).
+* If `context.get i64 i` is called after `context.set i32 i v`,
+  only the upper 32-bits will be zeroed (returning `i64.extend_i32_u v`).
 
 For details, see [Thread-Local Storage] in the concurrency explainer and
 [`canon_context_set`] in the Canonical ABI explainer.
@@ -1670,10 +1687,10 @@ For details, see [Waitables and Waitable Sets] in the concurrency explainer and
 
 ###### 🔀 `waitable-set.wait`
 
-| Synopsis                   |                                                |
-| -------------------------- | ---------------------------------------------- |
-| Approximate WIT signature  | `func<cancellable?>(s: waitable-set) -> event` |
-| Canonical ABI signature    | `[s:i32 payload-addr:i32] -> [event-code:i32]` |
+| Synopsis                   |                                                            |
+| -------------------------- | ---------------------------------------------------------- |
+| Approximate WIT signature  | `func<cancellable?,memory>(s: waitable-set) -> event`      |
+| Canonical ABI signature    | `[s:i32 payload-addr:memory.addrtype] -> [event-code:i32]` |
 
 where `event` is defined in WIT as:
 ```wit
@@ -1735,10 +1752,10 @@ For details, see [Waitables and Waitable Sets] in the concurrency explainer and
 
 ###### 🔀 `waitable-set.poll`
 
-| Synopsis                   |                                                |
-| -------------------------- | ---------------------------------------------- |
-| Approximate WIT signature  | `func<cancellable?>(s: waitable-set) -> event` |
-| Canonical ABI signature    | `[s:i32 payload-addr:i32] -> [event-code:i32]` |
+| Synopsis                   |                                                            |
+| -------------------------- | ---------------------------------------------------------- |
+| Approximate WIT signature  | `func<cancellable?,memory>(s: waitable-set) -> event`      |
+| Canonical ABI signature    | `[s:i32 payload-addr:memory.addrtype] -> [event-code:i32]` |
 
 where `event` is defined as in [`waitable-set.wait`](#-waitable-setwait).
 
@@ -1852,11 +1869,11 @@ For details, see [Streams and Futures] in the concurrency explainer and
 
 ###### 🔀 `stream.read` and `stream.write`
 
-| Synopsis                                     |                                                                                                 |
-| -------------------------------------------- | ----------------------------------------------------------------------------------------------- |
-| Approximate WIT signature for `stream.read`  | `func<stream<T?>>(e: readable-stream-end<T?>, b: writable-buffer<T>?) -> option<stream-result>` |
-| Approximate WIT signature for `stream.write` | `func<stream<T?>>(e: writable-stream-end<T?>, b: readable-buffer<T>?) -> option<stream-result>` |
-| Canonical ABI signature                      | `[stream-end:i32 ptr:i32 num:i32] -> [i32]`                                                     |
+| Synopsis                                     |                                                                                                        |
+| -------------------------------------------- | ------------------------------------------------------------------------------------------------------ |
+| Approximate WIT signature for `stream.read`  | `func<stream<T?>,memory>(e: readable-stream-end<T?>, b: writable-buffer<T>?) -> option<stream-result>` |
+| Approximate WIT signature for `stream.write` | `func<stream<T?>,memory>(e: writable-stream-end<T?>, b: readable-buffer<T>?) -> option<stream-result>` |
+| Canonical ABI signature                      | `[stream-end:i32 ptr:memory.addrtype num:memory.addrtype] -> [memory.addrtype]`                        |
 
 where `stream-result` is defined in WIT as:
 ```wit
@@ -1912,24 +1929,24 @@ any subsequent operation on the stream other than `stream.drop-{readable,writabl
 traps.
 
 In the Canonical ABI, the `{readable,writable}-stream-end` is passed as an
-`i32` index into the component instance's table followed by a pair of `i32`s
+`i32` index into the component instance's table followed by a pair of `memory.addrtype`s
 describing the linear memory offset and size-in-elements of the
 `{readable,writable}-buffer<T>`. The `option<stream-result>` return value is
-bit-packed into a single `i32` where:
-* `0xffff_ffff` represents `none`.
+bit-packed into a single `memory.addrtype` where:
+* all-ones represents `none`.
 * Otherwise, the `result` is in the low 4 bits and the `progress` is in the
-  high 28 bits.
+  remaining high bits.
 
 For details, see [Streams and Futures] in the concurrency explainer and
 [`canon_stream_read`] in the Canonical ABI explainer.
 
 ###### 🔀 `future.read` and `future.write`
 
-| Synopsis                                     |                                                                                                          |
-| -------------------------------------------- | -------------------------------------------------------------------------------------------------------- |
-| Approximate WIT signature for `future.read`  | `func<future<T?>>(e: readable-future-end<T?>, b: writable-buffer<T; 1>?) -> option<future-read-result>`  |
-| Approximate WIT signature for `future.write` | `func<future<T?>>(e: writable-future-end<T?>, v: readable-buffer<T; 1>?) -> option<future-write-result>` |
-| Canonical ABI signature                      | `[readable-future-end:i32 ptr:i32] -> [i32]`                                                             |
+| Synopsis                                     |                                                                                                                 |
+| -------------------------------------------- | --------------------------------------------------------------------------------------------------------------- |
+| Approximate WIT signature for `future.read`  | `func<future<T?>,memory>(e: readable-future-end<T?>, b: writable-buffer<T; 1>?) -> option<future-read-result>`  |
+| Approximate WIT signature for `future.write` | `func<future<T?>,memory>(e: writable-future-end<T?>, v: readable-buffer<T; 1>?) -> option<future-write-result>` |
+| Canonical ABI signature                      | `[readable-future-end:i32 ptr:memory.addrtype] -> [i32]`                                                        |
 
 where `future-{read,write}-result` are defined in WIT as:
 ```wit
@@ -1980,10 +1997,10 @@ called before successfully writing a value.
 
 In the Canonical ABI, the `{readable,writable}-future-end` is passed as an
 `i32` index into the component instance's table followed by a single
-`i32` describing the linear memory offset of the
+`memory.addrtype` describing the linear memory offset of the
 `{readable,writable}-buffer<T; 1>`. The `option<future-{read,write}-result>`
-return value is bit-packed into the single `i32` return value where
-`0xffff_ffff` represents `none`. And, `future-read-result.cancelled` is encoded
+return value is bit-packed into the single `i32` return value where all-ones
+represents `none`. And, `future-read-result.cancelled` is encoded
 as the value of `future-write-result.cancelled`, rather than the value implied
 by the `enum` definition above.
 
@@ -2054,21 +2071,22 @@ For details, see [Thread Built-ins] in the concurrency explainer and
 
 ###### 🧵 `thread.new-indirect`
 
-| Synopsis                   |                                                               |
-| -------------------------- | ------------------------------------------------------------- |
-| Approximate WIT signature  | `func<FuncT,tableidx>(fi: u32, c: FuncT.params[0]) -> thread` |
-| Canonical ABI signature    | `[fi:i32 c:i32] -> [i32]`                                     |
+| Synopsis                   |                                                                       |
+| -------------------------- | --------------------------------------------------------------------- |
+| Approximate WIT signature  | `func<FuncT,table>(fi: table.addrtype, c: FuncT.params[0]) -> thread` |
+| Canonical ABI signature    | `[fi:table.addrtype c: FuncT.params[0]] -> [i32]`                     |
 
 The `thread.new-indirect` built-in adds a new thread to the current component
 instance's table, returning the index of the new thread. The function table
 supplied via [`core:tableidx`] is indexed by the `fi` operand and then
 dynamically checked to match the type `FuncT` (in the same manner as
-`call_indirect`). Lastly, the indexed function is called in the new thread
-with `c` as its first and only parameter.
+`call_indirect`). Here the `table.addrtype` is either `i32` or `i64` as
+determined by the [`core:table-type`] of the table. Lastly, the indexed function
+is called in the new thread with `c` as its first and only parameter.
 
-Currently, `FuncT` must be `(func (param i32))` and thus `c` must always be an
-`i32`, but this restriction can be loosened in the future as the Canonical
-ABI is extended for [memory64] and [GC].
+Currently, `FuncT` must be `(func (param i32))` or `(func (param i64))` and thus
+`c` must always be an `i32` or `i64`, but this restriction can be loosened in
+the future as the Canonical ABI is extended for [GC].
 
 As explained in the [concurrency explainer], a thread created by
 `thread.new-indirect` is initially in a suspended state and must be resumed
@@ -2151,7 +2169,7 @@ For details, see [Thread Built-ins] in the concurrency explainer and
 | Synopsis                   |                                 |
 | -------------------------- | ------------------------------- |
 | Approximate WIT signature  | `func<cancellable?>(t: thread)` |
-| Canonical ABI signature    | `[t:i32] -> [suspend-result]`   |
+| Canonical ABI signature    | `[t:i32] -> [i32]`   |
 
 The `thread.yield-to` built-in immediately resumes execution of the thread `t`,
 (trapping if `t` is not in a "suspended" state) leaving the [current thread] in
@@ -2198,10 +2216,10 @@ For details, see [Thread Built-ins] in the concurrency explainer and
 
 ###### 🧵② `thread.spawn-ref`
 
-| Synopsis                   |                                                                    |
-| -------------------------- | ------------------------------------------------------------------ |
-| Approximate WIT signature  | `func<shared?,FuncT>(f: FuncT, c: FuncT.params[0]) -> bool`        |
-| Canonical ABI signature    | `shared? [f:(ref null (shared (func (param i32))) c:i32] -> [i32]` |
+| Synopsis                   |                                                                                            |
+| -------------------------- | ------------------------------------------------------------------------------------------ |
+| Approximate WIT signature  | `func<shared?,FuncT>(f: FuncT, c: FuncT.params[0]) -> bool`                                |
+| Canonical ABI signature    | `shared? [f:(ref null (shared (func (param FuncT.params[0]))) c:FuncT.params[0]] -> [i32]` |
 
 The `thread.spawn-ref` built-in is an optimization, fusing a call to
 `thread.new_ref` (assuming `thread.new_ref` was added as part of adding a
@@ -2213,10 +2231,10 @@ For details, see [`canon_thread_spawn_ref`] in the Canonical ABI explainer.
 
 ###### 🧵② `thread.spawn-indirect`
 
-| Synopsis                   |                                                                    |
-| -------------------------- | ------------------------------------------------------------------ |
-| Approximate WIT signature  | `func<shared?,FuncT,tableidx>(i: u32, c: FuncT.params[0]) -> bool` |
-| Canonical ABI signature    | `shared? [i:i32 c:i32] -> [i32]`                                   |
+| Synopsis                   |                                                                            |
+| -------------------------- | -------------------------------------------------------------------------- |
+| Approximate WIT signature  | `func<shared?,FuncT,table>(i: table.addrtype, c: FuncT.params[0]) -> bool` |
+| Canonical ABI signature    | `shared? [i:table.addrtype c:FuncT.params[0]] -> [i32]`                    |
 
 The `thread.spawn-indirect` built-in is an optimization, fusing a call to
 [`thread.new-indirect`](#-threadnew-indirect) with a call to
@@ -2248,10 +2266,10 @@ explainer.
 
 ###### 📝 `error-context.new`
 
-| Synopsis                         |                                          |
-| -------------------------------- | ---------------------------------------- |
-| Approximate WIT signature        | `func(message: string) -> error-context` |
-| Canonical ABI signature          | `[ptr:i32 len:i32] -> [i32]`             |
+| Synopsis                         |                                                      |
+| -------------------------------- | ---------------------------------------------------- |
+| Approximate WIT signature        | `func<memory>(message: string) -> error-context`     |
+| Canonical ABI signature          | `[ptr:memory.addrtype len:memory.addrtype] -> [i32]` |
 
 The `error-context.new` built-in returns a new `error-context` value. The given
 string is non-deterministically transformed to produce the `error-context`'s
@@ -2264,17 +2282,17 @@ For details, see [`canon_error_context_new`] in the Canonical ABI explainer.
 
 ###### 📝 `error-context.debug-message`
 
-| Synopsis                         |                                         |
-| -------------------------------- | --------------------------------------- |
-| Approximate WIT signature        | `func(errctx: error-context) -> string` |
-| Canonical ABI signature          | `[errctxi:i32 ptr:i32] -> []`           |
+| Synopsis                         |                                                 |
+| -------------------------------- | ----------------------------------------------- |
+| Approximate WIT signature        | `func<memory>(errctx: error-context) -> string` |
+| Canonical ABI signature          | `[errctxi:i32 ptr:memory.addrtype] -> []`       |
 
 The `error-context.debug-message` built-in returns the
 [debug message](#error-context-type) of the given `error-context`.
 
-In the Canonical ABI, it writes the debug message into `ptr` as an 8-byte
-(`ptr`, `length`) pair, according to the Canonical ABI for `string`, given the
-`<canonopt>*` immediates.
+In the Canonical ABI, it writes the debug message into `ptr` as an 8-byte or
+16-byte (`ptr`, `length`) pair, according to the Canonical ABI for `string`,
+given the `<canonopt>*` immediates.
 
 For details, see [`canon_error_context_debug_message`] in the Canonical ABI
 explainer.
@@ -3170,6 +3188,9 @@ For some use-case-focused, worked examples, see:
 [func-import-abbrev]: https://webassembly.github.io/spec/core/text/modules.html#text-func-abbrev
 [`core:version`]: https://webassembly.github.io/spec/core/binary/modules.html#binary-version
 [`core:tableidx`]: https://webassembly.github.io/spec/core/syntax/modules.html#syntax-tableidx
+[`core:address-type`]: https://webassembly.github.io/spec/core/syntax/types.html#address-types
+[`core:memory-type`]: https://webassembly.github.io/spec/core/syntax/types.html#memory-types
+[`core:table-type`]: https://webassembly.github.io/spec/core/syntax/types.html#table-types
 
 [Embedder]: https://webassembly.github.io/spec/core/appendix/embedding.html
 [`module_instantiate`]: https://webassembly.github.io/spec/core/appendix/embedding.html#mathrm-module-instantiate-xref-exec-runtime-syntax-store-mathit-store-xref-syntax-modules-syntax-module-mathit-module-xref-exec-runtime-syntax-externval-mathit-externval-ast-xref-exec-runtime-syntax-store-mathit-store-xref-exec-runtime-syntax-moduleinst-mathit-moduleinst-xref-appendix-embedding-embed-error-mathit-error
diff --git a/design/mvp/canonical-abi/definitions.py b/design/mvp/canonical-abi/definitions.py
index 32db6db0..16dcdd4d 100644
--- a/design/mvp/canonical-abi/definitions.py
+++ b/design/mvp/canonical-abi/definitions.py
@@ -230,10 +230,41 @@ def __init__(self, opts, inst, borrow_scope = None):
 
 ### Canonical ABI Options
 
+def ptr_size(ptr_type):
+  match ptr_type:
+    case 'i32':
+      return 4
+    case 'i64':
+      return 8
+
+@dataclass
+class MemInst:
+  bytes: bytearray
+  addrtype: Literal['i32', 'i64']
+
+  def __getitem__(self, i):
+    return self.bytes[i]
+
+  def __setitem__(self, i, v):
+    self.bytes[i] = v
+
+  def __len__(self):
+    return len(self.bytes)
+
+  def ptr_type(self):
+    return self.addrtype
+
+  def ptr_size(self):
+    return ptr_size(self.ptr_type())
+
+  def equal(lhs, rhs):
+    return lhs.bytes == rhs.bytes and \
+           lhs.addrtype == rhs.addrtype
+
 @dataclass
 class LiftOptions:
   string_encoding: str = 'utf8'
-  memory: Optional[bytearray] = None
+  memory: Optional[MemInst] = None
 
   def equal(lhs, rhs):
     return lhs.string_encoding == rhs.string_encoding and \
@@ -775,8 +806,8 @@ class BufferGuestImpl(Buffer):
   def __init__(self, t, cx, ptr, length):
     trap_if(length > Buffer.MAX_LENGTH)
     if t and length > 0:
-      trap_if(ptr != align_to(ptr, alignment(t)))
-      trap_if(ptr + length * elem_size(t) > len(cx.opts.memory))
+      trap_if(ptr != align_to(ptr, alignment(t, cx.opts.memory.ptr_type())))
+      trap_if(ptr + length * elem_size(t, cx.opts.memory.ptr_type()) > len(cx.opts.memory))
     self.cx = cx
     self.t = t
     self.ptr = ptr
@@ -794,7 +825,7 @@ def read(self, n):
     assert(n <= self.remain())
     if self.t:
       vs = load_list_from_valid_range(self.cx, self.ptr, n, self.t)
-      self.ptr += n * elem_size(self.t)
+      self.ptr += n * elem_size(self.t, self.cx.opts.memory.ptr_type())
     else:
       vs = n * [()]
     self.progress += n
@@ -805,7 +836,7 @@ def write(self, vs):
     assert(len(vs) <= self.remain())
     if self.t:
       store_list_into_valid_range(self.cx, vs, self.ptr, self.t)
-      self.ptr += len(vs) * elem_size(self.t)
+      self.ptr += len(vs) * elem_size(self.t, self.cx.opts.memory.ptr_type())
     else:
       assert(all(v == () for v in vs))
     self.progress += len(vs)
@@ -1062,7 +1093,7 @@ def contains(t, p):
 
 ### Alignment
 
-def alignment(t):
+def alignment(t, ptr_type):
   match despecialize(t):
     case BoolType()                  : return 1
     case S8Type() | U8Type()         : return 1
@@ -1072,28 +1103,28 @@ def alignment(t):
     case F32Type()                   : return 4
     case F64Type()                   : return 8
     case CharType()                  : return 4
-    case StringType()                : return 4
+    case StringType()                : return ptr_size(ptr_type)
     case ErrorContextType()          : return 4
-    case ListType(t, l)              : return alignment_list(t, l)
-    case RecordType(fields)          : return alignment_record(fields)
-    case VariantType(cases)          : return alignment_variant(cases)
+    case ListType(t, l)              : return alignment_list(t, l, ptr_type)
+    case RecordType(fields)          : return alignment_record(fields, ptr_type)
+    case VariantType(cases)          : return alignment_variant(cases, ptr_type)
     case FlagsType(labels)           : return alignment_flags(labels)
     case OwnType() | BorrowType()    : return 4
     case StreamType() | FutureType() : return 4
 
-def alignment_list(elem_type, maybe_length):
+def alignment_list(elem_type, maybe_length, ptr_type):
   if maybe_length is not None:
-    return alignment(elem_type)
-  return 4
+    return alignment(elem_type, ptr_type)
+  return ptr_size(ptr_type)
 
-def alignment_record(fields):
+def alignment_record(fields, ptr_type):
   a = 1
   for f in fields:
-    a = max(a, alignment(f.t))
+    a = max(a, alignment(f.t, ptr_type))
   return a
 
-def alignment_variant(cases):
-  return max(alignment(discriminant_type(cases)), max_case_alignment(cases))
+def alignment_variant(cases, ptr_type):
+  return max(alignment(discriminant_type(cases), ptr_type), max_case_alignment(cases, ptr_type))
 
 def discriminant_type(cases):
   n = len(cases)
@@ -1104,11 +1135,11 @@ def discriminant_type(cases):
     case 2: return U16Type()
     case 3: return U32Type()
 
-def max_case_alignment(cases):
+def max_case_alignment(cases, ptr_type):
   a = 1
   for c in cases:
     if c.t is not None:
-      a = max(a, alignment(c.t))
+      a = max(a, alignment(c.t, ptr_type))
   return a
 
 def alignment_flags(labels):
@@ -1120,7 +1151,7 @@ def alignment_flags(labels):
 
 ### Element Size
 
-def elem_size(t):
+def elem_size(t, ptr_type):
   match despecialize(t):
     case BoolType()                  : return 1
     case S8Type() | U8Type()         : return 1
@@ -1130,40 +1161,48 @@ def elem_size(t):
     case F32Type()                   : return 4
     case F64Type()                   : return 8
     case CharType()                  : return 4
-    case StringType()                : return 8
+    case StringType()                : return 2 * ptr_size(ptr_type)
     case ErrorContextType()          : return 4
-    case ListType(t, l)              : return elem_size_list(t, l)
-    case RecordType(fields)          : return elem_size_record(fields)
-    case VariantType(cases)          : return elem_size_variant(cases)
+    case ListType(t, l)              : return elem_size_list(t, l, ptr_type)
+    case RecordType(fields)          : return elem_size_record(fields, ptr_type)
+    case VariantType(cases)          : return elem_size_variant(cases, ptr_type)
     case FlagsType(labels)           : return elem_size_flags(labels)
     case OwnType() | BorrowType()    : return 4
     case StreamType() | FutureType() : return 4
 
-def elem_size_list(elem_type, maybe_length):
+def worst_case_elem_size(t, ptr_type):
+  if ptr_type is None:
+    return elem_size(t, ptr_type)
+  result = elem_size(t, ptr_type)
+  other_ptr_type = 'i32' if ptr_type == 'i64' else 'i64'
+  result = max(result, elem_size(t, other_ptr_type))
+  return result
+
+def elem_size_list(elem_type, maybe_length, ptr_type):
   if maybe_length is not None:
-    return maybe_length * elem_size(elem_type)
-  return 8
+    return maybe_length * elem_size(elem_type, ptr_type)
+  return 2 * ptr_size(ptr_type)
 
-def elem_size_record(fields):
+def elem_size_record(fields, ptr_type):
   s = 0
   for f in fields:
-    s = align_to(s, alignment(f.t))
-    s += elem_size(f.t)
+    s = align_to(s, alignment(f.t, ptr_type))
+    s += elem_size(f.t, ptr_type)
   assert(s > 0)
-  return align_to(s, alignment_record(fields))
+  return align_to(s, alignment_record(fields, ptr_type))
 
 def align_to(ptr, alignment):
   return math.ceil(ptr / alignment) * alignment
 
-def elem_size_variant(cases):
-  s = elem_size(discriminant_type(cases))
-  s = align_to(s, max_case_alignment(cases))
+def elem_size_variant(cases, ptr_type):
+  s = elem_size(discriminant_type(cases), ptr_type)
+  s = align_to(s, max_case_alignment(cases, ptr_type))
   cs = 0
   for c in cases:
     if c.t is not None:
-      cs = max(cs, elem_size(c.t))
+      cs = max(cs, elem_size(c.t, ptr_type))
   s += cs
-  return align_to(s, alignment_variant(cases))
+  return align_to(s, alignment_variant(cases, ptr_type))
 
 def elem_size_flags(labels):
   n = len(labels)
@@ -1175,8 +1214,8 @@ def elem_size_flags(labels):
 ### Loading
 
 def load(cx, ptr, t):
-  assert(ptr == align_to(ptr, alignment(t)))
-  assert(ptr + elem_size(t) <= len(cx.opts.memory))
+  assert(ptr == align_to(ptr, alignment(t, cx.opts.memory.ptr_type())))
+  assert(ptr + elem_size(t, cx.opts.memory.ptr_type()) <= len(cx.opts.memory))
   match despecialize(t):
     case BoolType()         : return convert_int_to_bool(load_int(cx, ptr, 1))
     case U8Type()           : return load_int(cx, ptr, 1)
@@ -1245,12 +1284,22 @@ def convert_i32_to_char(cx, i):
 String = tuple[str, str, int]
 
 def load_string(cx, ptr) -> String:
-  begin = load_int(cx, ptr, 4)
-  tagged_code_units = load_int(cx, ptr + 4, 4)
+  begin = load_int(cx, ptr, cx.opts.memory.ptr_size())
+  tagged_code_units = load_int(cx, ptr + cx.opts.memory.ptr_size(), cx.opts.memory.ptr_size())
   return load_string_from_range(cx, begin, tagged_code_units)
 
 UTF16_TAG = 1 << 31
 
+def worst_case_string_byte_length(string : String):
+  (s, encoding, tagged_code_units) = string
+  if encoding == 'utf16' or (encoding == 'latin1+utf16' and (tagged_code_units & UTF16_TAG)):
+    for code_point in s:
+      if ord(code_point) >= 2 ** 7:
+        return 3 * (tagged_code_units & ~UTF16_TAG)
+  return 2 * (tagged_code_units & ~UTF16_TAG)
+
+MAX_STRING_BYTE_LENGTH = (1 << 31) - 1
+
 def load_string_from_range(cx, ptr, tagged_code_units) -> String:
   match cx.opts.string_encoding:
     case 'utf8':
@@ -1277,46 +1326,52 @@ def load_string_from_range(cx, ptr, tagged_code_units) -> String:
   except UnicodeError:
     trap()
 
-  return (s, cx.opts.string_encoding, tagged_code_units)
+  string = (s, cx.opts.string_encoding, tagged_code_units)
+  trap_if(worst_case_string_byte_length(string) > MAX_STRING_BYTE_LENGTH)
+
+  return string
 
 def lift_error_context(cx, i):
   errctx = cx.inst.handles.get(i)
   trap_if(not isinstance(errctx, ErrorContext))
   return errctx
 
+MAX_LIST_BYTE_LENGTH = (1 << 32) - 1
+
 def load_list(cx, ptr, elem_type, maybe_length):
   if maybe_length is not None:
     return load_list_from_valid_range(cx, ptr, maybe_length, elem_type)
-  begin = load_int(cx, ptr, 4)
-  length = load_int(cx, ptr + 4, 4)
+  begin = load_int(cx, ptr, cx.opts.memory.ptr_size())
+  length = load_int(cx, ptr + cx.opts.memory.ptr_size(), cx.opts.memory.ptr_size())
   return load_list_from_range(cx, begin, length, elem_type)
 
 def load_list_from_range(cx, ptr, length, elem_type):
-  trap_if(ptr != align_to(ptr, alignment(elem_type)))
-  trap_if(ptr + length * elem_size(elem_type) > len(cx.opts.memory))
+  trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts.memory.ptr_type())))
+  trap_if(ptr + length * elem_size(elem_type, cx.opts.memory.ptr_type()) > len(cx.opts.memory))
   return load_list_from_valid_range(cx, ptr, length, elem_type)
 
 def load_list_from_valid_range(cx, ptr, length, elem_type):
+  trap_if(length * worst_case_elem_size(elem_type, cx.opts.memory.ptr_type()) > MAX_LIST_BYTE_LENGTH)
   a = []
   for i in range(length):
-    a.append(load(cx, ptr + i * elem_size(elem_type), elem_type))
+    a.append(load(cx, ptr + i * elem_size(elem_type, cx.opts.memory.ptr_type()), elem_type))
   return a
 
 def load_record(cx, ptr, fields):
   record = {}
   for field in fields:
-    ptr = align_to(ptr, alignment(field.t))
+    ptr = align_to(ptr, alignment(field.t, cx.opts.memory.ptr_type()))
     record[field.label] = load(cx, ptr, field.t)
-    ptr += elem_size(field.t)
+    ptr += elem_size(field.t, cx.opts.memory.ptr_type())
   return record
 
 def load_variant(cx, ptr, cases):
-  disc_size = elem_size(discriminant_type(cases))
+  disc_size = elem_size(discriminant_type(cases), cx.opts.memory.ptr_type())
   case_index = load_int(cx, ptr, disc_size)
   ptr += disc_size
   trap_if(case_index >= len(cases))
   c = cases[case_index]
-  ptr = align_to(ptr, max_case_alignment(cases))
+  ptr = align_to(ptr, max_case_alignment(cases, cx.opts.memory.ptr_type()))
   if c.t is None:
     return { c.label: None }
   return { c.label: load(cx, ptr, c.t) }
@@ -1365,8 +1420,8 @@ def lift_async_value(ReadableEndT, cx, i, t):
 ### Storing
 
 def store(cx, v, t, ptr):
-  assert(ptr == align_to(ptr, alignment(t)))
-  assert(ptr + elem_size(t) <= len(cx.opts.memory))
+  assert(ptr == align_to(ptr, alignment(t, cx.opts.memory.ptr_type())))
+  assert(ptr + elem_size(t, cx.opts.memory.ptr_type()) <= len(cx.opts.memory))
   match despecialize(t):
     case BoolType()         : store_int(cx, int(bool(v)), ptr, 1)
     case U8Type()           : store_int(cx, v, ptr, 1)
@@ -1438,8 +1493,8 @@ def char_to_i32(c):
 
 def store_string(cx, v: String, ptr):
   begin, tagged_code_units = store_string_into_range(cx, v)
-  store_int(cx, begin, ptr, 4)
-  store_int(cx, tagged_code_units, ptr + 4, 4)
+  store_int(cx, begin, ptr, cx.opts.memory.ptr_size())
+  store_int(cx, tagged_code_units, ptr + cx.opts.memory.ptr_size(), cx.opts.memory.ptr_size())
 
 def store_string_into_range(cx, v: String):
   src, src_encoding, src_tagged_code_units = v
@@ -1475,11 +1530,9 @@ def store_string_into_range(cx, v: String):
             case 'latin1'   : return store_string_copy(cx, src, src_code_units, 1, 2, 'latin-1')
             case 'utf16'    : return store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units)
 
-MAX_STRING_BYTE_LENGTH = (1 << 31) - 1
-
 def store_string_copy(cx, src, src_code_units, dst_code_unit_size, dst_alignment, dst_encoding):
   dst_byte_length = dst_code_unit_size * src_code_units
-  trap_if(dst_byte_length > MAX_STRING_BYTE_LENGTH)
+  assert(dst_byte_length <= MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, dst_alignment, dst_byte_length)
   trap_if(ptr != align_to(ptr, dst_alignment))
   trap_if(ptr + dst_byte_length > len(cx.opts.memory))
@@ -1504,7 +1557,7 @@ def store_string_to_utf8(cx, src, src_code_units, worst_case_size):
     if ord(code_point) < 2**7:
       cx.opts.memory[ptr + i] = ord(code_point)
     else:
-      trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
+      assert(worst_case_size <= MAX_STRING_BYTE_LENGTH)
       ptr = cx.opts.realloc(ptr, src_code_units, 1, worst_case_size)
       trap_if(ptr + worst_case_size > len(cx.opts.memory))
       encoded = src.encode('utf-8')
@@ -1517,7 +1570,7 @@ def store_string_to_utf8(cx, src, src_code_units, worst_case_size):
 
 def store_utf8_to_utf16(cx, src, src_code_units):
   worst_case_size = 2 * src_code_units
-  trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
+  assert(worst_case_size <= MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 2, worst_case_size)
   trap_if(ptr != align_to(ptr, 2))
   trap_if(ptr + worst_case_size > len(cx.opts.memory))
@@ -1542,7 +1595,7 @@ def store_string_to_latin1_or_utf16(cx, src, src_code_units):
       dst_byte_length += 1
     else:
       worst_case_size = 2 * src_code_units
-      trap_if(worst_case_size > MAX_STRING_BYTE_LENGTH)
+      assert(worst_case_size <= MAX_STRING_BYTE_LENGTH)
       ptr = cx.opts.realloc(ptr, src_code_units, 2, worst_case_size)
       trap_if(ptr != align_to(ptr, 2))
       trap_if(ptr + worst_case_size > len(cx.opts.memory))
@@ -1565,7 +1618,7 @@ def store_string_to_latin1_or_utf16(cx, src, src_code_units):
 
 def store_probably_utf16_to_latin1_or_utf16(cx, src, src_code_units):
   src_byte_length = 2 * src_code_units
-  trap_if(src_byte_length > MAX_STRING_BYTE_LENGTH)
+  assert(src_byte_length <= MAX_STRING_BYTE_LENGTH)
   ptr = cx.opts.realloc(0, 0, 2, src_byte_length)
   trap_if(ptr != align_to(ptr, 2))
   trap_if(ptr + src_byte_length > len(cx.opts.memory))
@@ -1590,34 +1643,34 @@ def store_list(cx, v, ptr, elem_type, maybe_length):
     store_list_into_valid_range(cx, v, ptr, elem_type)
     return
   begin, length = store_list_into_range(cx, v, elem_type)
-  store_int(cx, begin, ptr, 4)
-  store_int(cx, length, ptr + 4, 4)
+  store_int(cx, begin, ptr, cx.opts.memory.ptr_size())
+  store_int(cx, length, ptr + cx.opts.memory.ptr_size(), cx.opts.memory.ptr_size())
 
 def store_list_into_range(cx, v, elem_type):
-  byte_length = len(v) * elem_size(elem_type)
-  trap_if(byte_length >= (1 << 32))
-  ptr = cx.opts.realloc(0, 0, alignment(elem_type), byte_length)
-  trap_if(ptr != align_to(ptr, alignment(elem_type)))
+  byte_length = len(v) * elem_size(elem_type, cx.opts.memory.ptr_type())
+  assert(byte_length <= MAX_LIST_BYTE_LENGTH)
+  ptr = cx.opts.realloc(0, 0, alignment(elem_type, cx.opts.memory.ptr_type()), byte_length)
+  trap_if(ptr != align_to(ptr, alignment(elem_type, cx.opts.memory.ptr_type())))
   trap_if(ptr + byte_length > len(cx.opts.memory))
   store_list_into_valid_range(cx, v, ptr, elem_type)
   return (ptr, len(v))
 
 def store_list_into_valid_range(cx, v, ptr, elem_type):
   for i,e in enumerate(v):
-    store(cx, e, elem_type, ptr + i * elem_size(elem_type))
+    store(cx, e, elem_type, ptr + i * elem_size(elem_type, cx.opts.memory.ptr_type()))
 
 def store_record(cx, v, ptr, fields):
   for f in fields:
-    ptr = align_to(ptr, alignment(f.t))
+    ptr = align_to(ptr, alignment(f.t, cx.opts.memory.ptr_type()))
     store(cx, v[f.label], f.t, ptr)
-    ptr += elem_size(f.t)
+    ptr += elem_size(f.t, cx.opts.memory.ptr_type())
 
 def store_variant(cx, v, ptr, cases):
   case_index, case_value = match_case(v, cases)
-  disc_size = elem_size(discriminant_type(cases))
+  disc_size = elem_size(discriminant_type(cases), cx.opts.memory.ptr_type())
   store_int(cx, case_index, ptr, disc_size)
   ptr += disc_size
-  ptr = align_to(ptr, max_case_alignment(cases))
+  ptr = align_to(ptr, max_case_alignment(cases, cx.opts.memory.ptr_type()))
   c = cases[case_index]
   if c.t is not None:
     store(cx, case_value, c.t, ptr)
@@ -1669,40 +1722,40 @@ def lower_future(cx, v, t):
 MAX_FLAT_RESULTS = 1
 
 def flatten_functype(opts, ft, context):
-  flat_params = flatten_types(ft.param_types())
-  flat_results = flatten_types(ft.result_type())
+  flat_params = flatten_types(ft.param_types(), opts)
+  flat_results = flatten_types(ft.result_type(), opts)
   if not opts.async_:
     if len(flat_params) > MAX_FLAT_PARAMS:
-      flat_params = ['i32']
+      flat_params = [opts.memory.ptr_type()]
     if len(flat_results) > MAX_FLAT_RESULTS:
       match context:
         case 'lift':
-          flat_results = ['i32']
+          flat_results = [opts.memory.ptr_type()]
         case 'lower':
-          flat_params += ['i32']
+          flat_params += [opts.memory.ptr_type()]
           flat_results = []
     return CoreFuncType(flat_params, flat_results)
   else:
     match context:
       case 'lift':
         if len(flat_params) > MAX_FLAT_PARAMS:
-          flat_params = ['i32']
+          flat_params = [opts.memory.ptr_type()]
         if opts.callback:
           flat_results = ['i32']
         else:
           flat_results = []
       case 'lower':
         if len(flat_params) > MAX_FLAT_ASYNC_PARAMS:
-          flat_params = ['i32']
+          flat_params = [opts.memory.ptr_type()]
         if len(flat_results) > 0:
-          flat_params += ['i32']
+          flat_params += [opts.memory.ptr_type()]
         flat_results = ['i32']
     return CoreFuncType(flat_params, flat_results)
 
-def flatten_types(ts):
-  return [ft for t in ts for ft in flatten_type(t)]
+def flatten_types(ts, opts):
+  return [ft for t in ts for ft in flatten_type(t, opts)]
 
-def flatten_type(t):
+def flatten_type(t, opts):
   match despecialize(t):
     case BoolType()                       : return ['i32']
     case U8Type() | U16Type() | U32Type() : return ['i32']
@@ -1711,36 +1764,36 @@ def flatten_type(t):
     case F32Type()                        : return ['f32']
     case F64Type()                        : return ['f64']
     case CharType()                       : return ['i32']
-    case StringType()                     : return ['i32', 'i32']
+    case StringType()                     : return [opts.memory.ptr_type(), opts.memory.ptr_type()]
     case ErrorContextType()               : return ['i32']
-    case ListType(t, l)                   : return flatten_list(t, l)
-    case RecordType(fields)               : return flatten_record(fields)
-    case VariantType(cases)               : return flatten_variant(cases)
+    case ListType(t, l)                   : return flatten_list(t, l, opts)
+    case RecordType(fields)               : return flatten_record(fields, opts)
+    case VariantType(cases)               : return flatten_variant(cases, opts)
     case FlagsType(labels)                : return ['i32']
     case OwnType() | BorrowType()         : return ['i32']
     case StreamType() | FutureType()      : return ['i32']
 
-def flatten_list(elem_type, maybe_length):
+def flatten_list(elem_type, maybe_length, opts):
   if maybe_length is not None:
-    return flatten_type(elem_type) * maybe_length
-  return ['i32', 'i32']
+    return flatten_type(elem_type, opts) * maybe_length
+  return [opts.memory.ptr_type(), opts.memory.ptr_type()]
 
-def flatten_record(fields):
+def flatten_record(fields, opts):
   flat = []
   for f in fields:
-    flat += flatten_type(f.t)
+    flat += flatten_type(f.t, opts)
   return flat
 
-def flatten_variant(cases):
+def flatten_variant(cases, opts):
   flat = []
   for c in cases:
     if c.t is not None:
-      for i,ft in enumerate(flatten_type(c.t)):
+      for i,ft in enumerate(flatten_type(c.t, opts)):
         if i < len(flat):
           flat[i] = join(flat[i], ft)
         else:
           flat.append(ft)
-  return flatten_type(discriminant_type(cases)) + flat
+  return flatten_type(discriminant_type(cases), opts) + flat
 
 def join(a, b):
   if a == b: return a
@@ -1810,8 +1863,8 @@ def lift_flat_signed(vi, core_width, t_width):
   return i
 
 def lift_flat_string(cx, vi):
-  ptr = vi.next('i32')
-  packed_length = vi.next('i32')
+  ptr = vi.next(cx.opts.memory.ptr_type())
+  packed_length = vi.next(cx.opts.memory.ptr_type())
   return load_string_from_range(cx, ptr, packed_length)
 
 def lift_flat_list(cx, vi, elem_type, maybe_length):
@@ -1820,8 +1873,8 @@ def lift_flat_list(cx, vi, elem_type, maybe_length):
     for i in range(maybe_length):
       a.append(lift_flat(cx, vi, elem_type))
     return a
-  ptr = vi.next('i32')
-  length = vi.next('i32')
+  ptr = vi.next(cx.opts.memory.ptr_type())
+  length = vi.next(cx.opts.memory.ptr_type())
   return load_list_from_range(cx, ptr, length, elem_type)
 
 def lift_flat_record(cx, vi, fields):
@@ -1831,7 +1884,7 @@ def lift_flat_record(cx, vi, fields):
   return record
 
 def lift_flat_variant(cx, vi, cases):
-  flat_types = flatten_variant(cases)
+  flat_types = flatten_variant(cases, cx.opts)
   assert(flat_types.pop(0) == 'i32')
   case_index = vi.next('i32')
   trap_if(case_index >= len(cases))
@@ -1917,14 +1970,14 @@ def lower_flat_record(cx, v, fields):
 
 def lower_flat_variant(cx, v, cases):
   case_index, case_value = match_case(v, cases)
-  flat_types = flatten_variant(cases)
+  flat_types = flatten_variant(cases, cx.opts)
   assert(flat_types.pop(0) == 'i32')
   c = cases[case_index]
   if c.t is None:
     payload = []
   else:
     payload = lower_flat(cx, case_value, c.t)
-    for i,(fv,have) in enumerate(zip(payload, flatten_type(c.t))):
+    for i,(fv,have) in enumerate(zip(payload, flatten_type(c.t, cx.opts))):
       want = flat_types.pop(0)
       match (have, want):
         case ('f32', 'i32') : payload[i] = encode_float_as_i32(fv)
@@ -1943,30 +1996,30 @@ def lower_flat_flags(v, labels):
 ### Lifting and Lowering Values
 
 def lift_flat_values(cx, max_flat, vi, ts):
-  flat_types = flatten_types(ts)
+  flat_types = flatten_types(ts, cx.opts)
   if len(flat_types) > max_flat:
-    ptr = vi.next('i32')
+    ptr = vi.next(cx.opts.memory.ptr_type())
     tuple_type = TupleType(ts)
-    trap_if(ptr != align_to(ptr, alignment(tuple_type)))
-    trap_if(ptr + elem_size(tuple_type) > len(cx.opts.memory))
+    trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts.memory.ptr_type())))
+    trap_if(ptr + elem_size(tuple_type, cx.opts.memory.ptr_type()) > len(cx.opts.memory))
     return list(load(cx, ptr, tuple_type).values())
   else:
     return [ lift_flat(cx, vi, t) for t in ts ]
 
 def lower_flat_values(cx, max_flat, vs, ts, out_param = None):
   cx.inst.may_leave = False
-  flat_types = flatten_types(ts)
+  flat_types = flatten_types(ts, cx.opts)
   if len(flat_types) > max_flat:
     tuple_type = TupleType(ts)
     tuple_value = {str(i): v for i,v in enumerate(vs)}
     if out_param is None:
-      ptr = cx.opts.realloc(0, 0, alignment(tuple_type), elem_size(tuple_type))
+      ptr = cx.opts.realloc(0, 0, alignment(tuple_type, cx.opts.memory.ptr_type()), elem_size(tuple_type, cx.opts.memory.ptr_type()))
       flat_vals = [ptr]
     else:
-      ptr = out_param.next('i32')
+      ptr = out_param.next(cx.opts.memory.ptr_type())
       flat_vals = []
-    trap_if(ptr != align_to(ptr, alignment(tuple_type)))
-    trap_if(ptr + elem_size(tuple_type) > len(cx.opts.memory))
+    trap_if(ptr != align_to(ptr, alignment(tuple_type, cx.opts.memory.ptr_type())))
+    trap_if(ptr + elem_size(tuple_type, cx.opts.memory.ptr_type()) > len(cx.opts.memory))
     store(cx, tuple_value, tuple_type, ptr)
   else:
     flat_vals = []
@@ -2177,14 +2230,19 @@ def canon_resource_rep(rt, thread, i):
 ### 🔀 `canon context.get`
 
 def canon_context_get(t, i, thread):
-  assert(t == 'i32')
+  MASK_32BIT = (1 << 32) - 1
+
+  assert(t == 'i32' or t == 'i64')
   assert(i < Thread.CONTEXT_LENGTH)
-  return [thread.context[i]]
+  result = thread.context[i]
+  if t == 'i32':
+    result &= MASK_32BIT
+  return [result]
 
 ### 🔀 `canon context.set`
 
 def canon_context_set(t, i, thread, v):
-  assert(t == 'i32')
+  assert(t == 'i32' or t == 'i64')
   assert(i < Thread.CONTEXT_LENGTH)
   thread.context[i] = v
   return []
@@ -2516,7 +2574,7 @@ class CoreFuncRef:
 def canon_thread_new_indirect(ft, ftbl: Table[CoreFuncRef], thread, fi, c):
   trap_if(not thread.task.inst.may_leave)
   f = ftbl.get(fi)
-  assert(ft == CoreFuncType(['i32'], []))
+  assert(ft == CoreFuncType(['i32'], []) or ft == CoreFuncType(['i64'], []))
   trap_if(f.t != ft)
   def thread_func(thread):
     [] = call_and_trap_on_throw(f.callee, thread, [c])
diff --git a/design/mvp/canonical-abi/run_tests.py b/design/mvp/canonical-abi/run_tests.py
index cd7ee74a..5f5d067f 100644
--- a/design/mvp/canonical-abi/run_tests.py
+++ b/design/mvp/canonical-abi/run_tests.py
@@ -35,7 +35,7 @@ def realloc(self, original_ptr, original_size, alignment, new_size):
     self.memory[ret : ret + original_size] = self.memory[original_ptr : original_ptr + original_size]
     return ret
 
-def mk_opts(memory = bytearray(), encoding = 'utf8', realloc = None, post_return = None, sync_task_return = False, async_ = False):
+def mk_opts(memory = MemInst(bytearray(), 'i32'), encoding = 'utf8', realloc = None, post_return = None, sync_task_return = False, async_ = False):
   opts = CanonicalOptions()
   opts.memory = memory
   opts.string_encoding = encoding
@@ -46,7 +46,7 @@ def mk_opts(memory = bytearray(), encoding = 'utf8', realloc = None, post_return
   opts.callback = None
   return opts
 
-def mk_cx(memory = bytearray(), encoding = 'utf8', realloc = None, post_return = None):
+def mk_cx(memory = MemInst(bytearray(), 'i32'), encoding = 'utf8', realloc = None, post_return = None):
   opts = mk_opts(memory, encoding, realloc, post_return)
   inst = ComponentInstance(Store())
   return LiftLowerContext(opts, inst)
@@ -132,7 +132,7 @@ def test_name():
   heap = Heap(5*len(cx.opts.memory))
   if dst_encoding is None:
     dst_encoding = cx.opts.string_encoding
-  cx = mk_cx(heap.memory, dst_encoding, heap.realloc)
+  cx = mk_cx(MemInst(heap.memory, cx.opts.memory.ptr_type()), dst_encoding, heap.realloc)
   lowered_vals = lower_flat(cx, v, lower_t)
 
   vi = CoreValueIter(lowered_vals)
@@ -207,7 +207,7 @@ def test_nan32(inbits, outbits):
     assert(encode_float_as_i32(f) == outbits)
   else:
     assert(not math.isnan(origf) or math.isnan(f))
-  cx = mk_cx(int.to_bytes(inbits, 4, 'little'))
+  cx = mk_cx(MemInst(int.to_bytes(inbits, 4, 'little'), 'i32'))
   f = load(cx, 0, F32Type())
   if definitions.DETERMINISTIC_PROFILE:
     assert(encode_float_as_i32(f) == outbits)
@@ -221,7 +221,7 @@ def test_nan64(inbits, outbits):
     assert(encode_float_as_i64(f) == outbits)
   else:
     assert(not math.isnan(origf) or math.isnan(f))
-  cx = mk_cx(int.to_bytes(inbits, 8, 'little'))
+  cx = mk_cx(MemInst(int.to_bytes(inbits, 8, 'little'), 'i32'))
   f = load(cx, 0, F64Type())
   if definitions.DETERMINISTIC_PROFILE:
     assert(encode_float_as_i64(f) == outbits)
@@ -243,32 +243,32 @@ def test_nan64(inbits, outbits):
 test_nan64(0x7ff0000000000000, 0x7ff0000000000000)
 test_nan64(0x3ff0000000000000, 0x3ff0000000000000)
 
-def test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units):
+def test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units, addr_type='i32'):
   heap = Heap(len(encoded))
   heap.memory[:] = encoded[:]
-  cx = mk_cx(heap.memory, src_encoding)
+  cx = mk_cx(MemInst(heap.memory, addr_type), src_encoding)
   v = (s, src_encoding, tagged_code_units)
   test(StringType(), [0, tagged_code_units], v, cx, dst_encoding)
 
-def test_string(src_encoding, dst_encoding, s):
+def test_string(src_encoding, dst_encoding, s, addr_type='i32'):
   if src_encoding == 'utf8':
     encoded = s.encode('utf-8')
     tagged_code_units = len(encoded)
-    test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units)
+    test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units, addr_type)
   elif src_encoding == 'utf16':
     encoded = s.encode('utf-16-le')
     tagged_code_units = int(len(encoded) / 2)
-    test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units)
+    test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units, addr_type)
   elif src_encoding == 'latin1+utf16':
     try:
       encoded = s.encode('latin-1')
       tagged_code_units = len(encoded)
-      test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units)
+      test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units, addr_type)
     except UnicodeEncodeError:
       pass
     encoded = s.encode('utf-16-le')
     tagged_code_units = int(len(encoded) / 2) | UTF16_TAG
-    test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units)
+    test_string_internal(src_encoding, dst_encoding, s, encoded, tagged_code_units, addr_type)
 
 encodings = ['utf8', 'utf16', 'latin1+utf16']
 
@@ -276,14 +276,15 @@ def test_string(src_encoding, dst_encoding, s):
                '\u01ffy', 'xy\u01ff', 'a\ud7ffb', 'a\u02ff\u03ff\u04ffbc',
                '\uf123', '\uf123\uf123abc', 'abcdef\uf123']
 
-for src_encoding in encodings:
-  for dst_encoding in encodings:
-    for s in fun_strings:
-      test_string(src_encoding, dst_encoding, s)
+for addr_type in ['i32', 'i64']:
+  for src_encoding in encodings:
+    for dst_encoding in encodings:
+      for s in fun_strings:
+        test_string(src_encoding, dst_encoding, s, addr_type)
 
-def test_heap(t, expect, args, byte_array):
+def test_heap(t, expect, args, byte_array, addr_type='i32'):
   heap = Heap(byte_array)
-  cx = mk_cx(heap.memory)
+  cx = mk_cx(MemInst(heap.memory, addr_type))
   test(t, args, expect, cx)
 
 # Empty record types are not permitted yet.
@@ -309,15 +310,34 @@ def test_heap(t, expect, args, byte_array):
 test_heap(ListType(StringType()), [mk_str("hi"),mk_str("wat")], [0,2],
           [16,0,0,0, 2,0,0,0, 21,0,0,0, 3,0,0,0,
            ord('h'), ord('i'),   0xf,0xf,0xf,   ord('w'), ord('a'), ord('t')])
+test_heap(ListType(StringType()), [mk_str("hi"),mk_str("wat")], [0,2],
+          [32,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0,
+           37,0,0,0,0,0,0,0, 3,0,0,0,0,0,0,0,
+           ord('h'), ord('i'),   0xf,0xf,0xf,   ord('w'), ord('a'), ord('t')],
+          addr_type='i64')
 test_heap(ListType(ListType(U8Type())), [[3,4,5],[],[6,7]], [0,3],
           [24,0,0,0, 3,0,0,0, 0,0,0,0, 0,0,0,0, 27,0,0,0, 2,0,0,0,
           3,4,5,  6,7])
+test_heap(ListType(ListType(U8Type())), [[3,4,5],[],[6,7]], [0,3],
+          [48,0,0,0,0,0,0,0, 3,0,0,0,0,0,0,0,
+           0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,
+           51,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0,
+           3,4,5, 6,7],
+          addr_type='i64')
 test_heap(ListType(ListType(U16Type())), [[5,6]], [0,1],
           [8,0,0,0, 2,0,0,0,
           5,0, 6,0])
+test_heap(ListType(ListType(U16Type())), [[5,6]], [0,1],
+          [16,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0,
+          5,0, 6,0],
+          addr_type='i64')
 test_heap(ListType(ListType(U16Type())), None, [0,1],
           [9,0,0,0, 2,0,0,0,
           0, 5,0, 6,0])
+test_heap(ListType(ListType(U16Type())), None, [0,1],
+          [17,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0,
+          0, 5,0, 6,0],
+          addr_type='i64')
 test_heap(ListType(ListType(U8Type(),2)), [[1,2],[3,4]], [0,2],
           [1,2, 3,4])
 test_heap(ListType(ListType(U32Type(),2)), [[1,2],[3,4]], [0,2],
@@ -369,21 +389,23 @@ def test_heap(t, expect, args, byte_array):
 test_heap(t, v, [0,2],
           [0xff,0xff,0xff,0xff, 0,0,0,0])
 
-def test_flatten(t, params, results):
+
+def test_flatten(t, params, results, addr_type='i32'):
+  opts = mk_opts(MemInst(bytearray(), addr_type))
   expect = CoreFuncType(params, results)
 
   if len(params) > definitions.MAX_FLAT_PARAMS:
-    expect.params = ['i32']
+    expect.params = [addr_type]
 
   if len(results) > definitions.MAX_FLAT_RESULTS:
-    expect.results = ['i32']
-  got = flatten_functype(CanonicalOptions(), t, 'lift')
+    expect.results = [addr_type]
+  got = flatten_functype(opts, t, 'lift')
   assert(got == expect)
 
   if len(results) > definitions.MAX_FLAT_RESULTS:
-    expect.params += ['i32']
+    expect.params += [addr_type]
     expect.results = []
-  got = flatten_functype(CanonicalOptions(), t, 'lower')
+  got = flatten_functype(opts, t, 'lower')
   assert(got == expect)
 
 test_flatten(FuncType([U8Type(),F32Type(),F64Type()],[]), ['i32','f32','f64'], [])
@@ -393,11 +415,13 @@ def test_flatten(t, params, results):
 test_flatten(FuncType([U8Type(),F32Type(),F64Type()],[TupleType([F32Type(),F32Type()])]), ['i32','f32','f64'], ['f32','f32'])
 test_flatten(FuncType([U8Type(),F32Type(),F64Type()],[F32Type(),F32Type()]), ['i32','f32','f64'], ['f32','f32'])
 test_flatten(FuncType([U8Type() for _ in range(17)],[]), ['i32' for _ in range(17)], [])
+test_flatten(FuncType([U8Type() for _ in range(17)],[]), ['i32' for _ in range(17)], [], addr_type='i64')
 test_flatten(FuncType([U8Type() for _ in range(17)],[TupleType([U8Type(),U8Type()])]), ['i32' for _ in range(17)], ['i32','i32'])
+test_flatten(FuncType([U8Type() for _ in range(17)],[TupleType([U8Type(),U8Type()])]), ['i32' for _ in range(17)], ['i32','i32'], addr_type='i64')
 
 
 def test_roundtrips():
-  def test_roundtrip(t, v):
+  def test_roundtrip(t, v, addr_type='i32'):
     before = definitions.MAX_FLAT_RESULTS
     definitions.MAX_FLAT_RESULTS = 16
 
@@ -408,9 +432,8 @@ def callee(thread, x):
       return x
 
     callee_heap = Heap(1000)
-    callee_opts = mk_opts(callee_heap.memory, 'utf8', callee_heap.realloc)
+    callee_opts = mk_opts(MemInst(callee_heap.memory, addr_type), 'utf8', callee_heap.realloc)
     callee_inst = ComponentInstance(store)
-    lifted_callee = partial(canon_lift, callee_opts, callee_inst, ft, callee)
 
     got = None
     def on_start():
@@ -425,17 +448,123 @@ def on_resolve(result):
 
     definitions.MAX_FLAT_RESULTS = before
 
-  test_roundtrip(S8Type(), -1)
-  test_roundtrip(TupleType([U16Type(),U16Type()]), mk_tup(3,4))
-  test_roundtrip(ListType(StringType()), [mk_str("hello there")])
-  test_roundtrip(ListType(ListType(StringType())), [[mk_str("one"),mk_str("two")],[mk_str("three")]])
-  test_roundtrip(ListType(OptionType(TupleType([StringType(),U16Type()]))), [{'some':mk_tup(mk_str("answer"),42)}])
-  test_roundtrip(VariantType([CaseType('x', TupleType([U32Type(),U32Type(),U32Type(),U32Type(),
-                                                       U32Type(),U32Type(),U32Type(),U32Type(),
-                                                       U32Type(),U32Type(),U32Type(),U32Type(),
-                                                       U32Type(),U32Type(),U32Type(),U32Type(),
-                                                       StringType()]))]),
-                       {'x': mk_tup(1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, mk_str("wat"))})
+  cases = [
+    (S8Type(), -1),
+    (TupleType([U16Type(),U16Type()]), mk_tup(3,4)),
+    (ListType(StringType()), [mk_str("hello there")]),
+    (ListType(ListType(StringType())), [[mk_str("one"),mk_str("two")],[mk_str("three")]]),
+    (ListType(OptionType(TupleType([StringType(),U16Type()]))), [{'some':mk_tup(mk_str("answer"),42)}]),
+    (VariantType([CaseType('x', TupleType([U32Type(),U32Type(),U32Type(),U32Type(),
+                                           U32Type(),U32Type(),U32Type(),U32Type(),
+                                           U32Type(),U32Type(),U32Type(),U32Type(),
+                                           U32Type(),U32Type(),U32Type(),U32Type(),
+                                           StringType()]))]),
+                  {'x': mk_tup(1,2,3,4, 5,6,7,8, 9,10,11,12, 13,14,15,16, mk_str("wat"))}),
+  ]
+  for addr_type in ['i32', 'i64']:
+    for t, v in cases:
+      test_roundtrip(t, v, addr_type=addr_type)
+
+
+def assert_trap_on_load_string(src_encoding, s, tagged_code_units, encoded):
+  ptr_offset = 8
+  memory = bytearray(ptr_offset + len(encoded))
+  memory[0:4] = int.to_bytes(ptr_offset, 4, 'little')
+  memory[4:8] = int.to_bytes(tagged_code_units, 4, 'little')
+  memory[ptr_offset:] = encoded
+  cx = mk_cx(MemInst(memory, 'i32'), src_encoding)
+  try:
+    load(cx, 0, StringType())
+    fail("expected trap loading {!r} as {}".format(s, src_encoding))
+  except Trap:
+    pass
+
+def test_string_byte_length_limit():
+  saved = definitions.MAX_STRING_BYTE_LENGTH
+  try:
+    definitions.MAX_STRING_BYTE_LENGTH = 20
+
+    # Loading from UTF-8: 10 bytes will succeed, 11 bytes will trap on load
+    for dst in encodings:
+      test_string('utf8', dst, 'helloworld')
+    assert_trap_on_load_string('utf8', 'hello world', 11, b'hello world')
+
+    # Loading from UTF-16 all ASCII: 10 code units will succeed, 11 will trap on
+    # load
+    for dst in encodings:
+      test_string('utf16', dst, 'abcdefghij')
+    assert_trap_on_load_string('utf16', 'abcdefghijk', 11,
+                        'abcdefghijk'.encode('utf-16-le'))
+
+    # UTF-16 non-ASCII: 6 code units will succeed, 7 will trap on load
+    for dst in encodings:
+      test_string('utf16', dst, 'ab\u0100def')
+    assert_trap_on_load_string('utf16', '\u0100abcdef', 7,
+                        '\u0100abcdef'.encode('utf-16-le'))
+
+    # Latin1+utf16 (latin1): 10 bytes will succeed, 11 will trap on load
+    for dst in encodings:
+      test_string('latin1+utf16', dst, 'helloworld')
+    assert_trap_on_load_string('latin1+utf16', 'hello world', 11,
+                        b'hello world')
+
+    # Latin1+utf16 (utf16 variant, non-ASCII): 6 code units will succeed, 7
+    # will trap on load
+    for dst in encodings:
+      test_string('latin1+utf16', dst, '\u0100abcde')
+    assert_trap_on_load_string('latin1+utf16', '\u0100abcdef', 7 | UTF16_TAG,
+                        '\u0100abcdef'.encode('utf-16-le'))
+
+  finally:
+    definitions.MAX_STRING_BYTE_LENGTH = saved
+
+def test_list_byte_length_limit():
+  saved = definitions.MAX_LIST_BYTE_LENGTH
+  try:
+    definitions.MAX_LIST_BYTE_LENGTH = 20
+
+    # This list has the same size under all pointer types
+    for addr_type in ['i32', 'i64']:
+      # five U32's fit in 20 bytes
+      test_heap(ListType(U32Type()), [1,2,3,4,5], [0, 5],
+                [1,0,0,0, 2,0,0,0, 3,0,0,0, 4,0,0,0, 5,0,0,0], addr_type)
+      # six U32's exceed the limit
+      test_heap(ListType(U32Type()), None, [0, 6],
+                [1,0,0,0, 2,0,0,0, 3,0,0,0, 4,0,0,0, 5,0,0,0, 6,0,0,0], addr_type)
+
+    # A list of strings has 8 bytes per entry in i32 and 16 bytes per entry in
+    # i64 So a list of length 1 can be loaded, but a list of length 2 hits the
+    # limit.
+    test_heap(ListType(StringType()), [mk_str("hi")], [0, 1],
+              [8,0,0,0, 2,0,0,0, ord('h'), ord('i')], 'i32')
+    test_heap(ListType(StringType()), [mk_str("hi")], [0, 1],
+              [16,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, ord('h'), ord('i')], 'i64')
+
+    test_heap(ListType(StringType()), None, [0, 2],
+              [16,0,0,0, 2,0,0,0, 18,0,0,0, 2,0,0,0,
+                ord('h'),ord('i'),ord('a'),ord('b')], 'i32')
+    test_heap(ListType(StringType()), None, [0, 2],
+              [32,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0,
+                34,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0,
+                ord('h'),ord('i'),ord('a'),ord('b')], 'i64')
+
+	  # Similarly a list of lists of U8's has 8 bytes per entry in i32 and 16
+	  # bytes per entry in i64 So a list of length 1 can be loaded, but a list
+	  # of length 2 hits the limit.
+    test_heap(ListType(ListType(U8Type())), [[3,4,5]], [0, 1],
+              [8,0,0,0, 3,0,0,0, 3, 4, 5], 'i32')
+    test_heap(ListType(ListType(U8Type())), [[3,4,5]], [0, 1],
+              [16,0,0,0,0,0,0,0, 3,0,0,0,0,0,0,0, 3, 4, 5], 'i64')
+    test_heap(ListType(ListType(U8Type())), None, [0, 2],
+              [16,0,0,0, 2,0,0,0, 18,0,0,0, 3,0,0,0,
+               1,2,3,4,5], 'i32')
+    test_heap(ListType(ListType(U8Type())), None, [0, 2],
+              [32,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0,
+                34,0,0,0,0,0,0,0, 3,0,0,0,0,0,0,0,
+                1,2,3,4,5], 'i64')
+
+  finally:
+    definitions.MAX_LIST_BYTE_LENGTH = saved
 
 
 def test_handles():
@@ -551,7 +680,7 @@ def on_resolve(results):
 
 def test_async_to_async():
   producer_heap = Heap(10)
-  producer_opts = mk_opts(producer_heap.memory)
+  producer_opts = mk_opts(MemInst(producer_heap.memory, 'i32'))
   producer_opts.async_ = True
 
   store = Store()
@@ -590,7 +719,7 @@ def core_blocking_producer(thread, args):
   blocking_callee = partial(canon_lift, producer_opts, producer_inst, blocking_ft, core_blocking_producer)
 
   consumer_heap = Heap(20)
-  consumer_opts = mk_opts(consumer_heap.memory)
+  consumer_opts = mk_opts(MemInst(consumer_heap.memory, 'i32'))
   consumer_opts.async_ = True
 
   def consumer(thread, args):
@@ -617,21 +746,21 @@ def consumer(thread, args):
     fut1_1.set()
 
     waitretp = consumer_heap.realloc(0, 0, 8, 4)
-    [event] = canon_waitable_set_wait(True, consumer_heap.memory, thread, seti, waitretp)
+    [event] = canon_waitable_set_wait(True, MemInst(consumer_heap.memory, 'i32'), thread, seti, waitretp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_heap.memory[waitretp] == subi1)
     assert(consumer_heap.memory[waitretp+4] == Subtask.State.RETURNED)
     [] = canon_subtask_drop(thread, subi1)
     fut1_2.set()
 
-    [event] = canon_waitable_set_wait(True, consumer_heap.memory, thread, seti, waitretp)
+    [event] = canon_waitable_set_wait(True, MemInst(consumer_heap.memory, 'i32'), thread, seti, waitretp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_heap.memory[waitretp] == subi2)
     assert(consumer_heap.memory[waitretp+4] == Subtask.State.STARTED)
     assert(consumer_heap.memory[retp] == 13)
     fut2.set()
 
-    [event] = canon_waitable_set_wait(True, consumer_heap.memory, thread, seti, waitretp)
+    [event] = canon_waitable_set_wait(True, MemInst(consumer_heap.memory, 'i32'), thread, seti, waitretp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_heap.memory[waitretp] == subi2)
     assert(consumer_heap.memory[waitretp+4] == Subtask.State.RETURNED)
@@ -802,7 +931,7 @@ def core_sync_callee(thread, args):
   consumer_inst = ComponentInstance(store)
   consumer_ft = FuncType([], [], async_ = True)
   consumer_mem = bytearray(24)
-  consumer_opts = mk_opts(consumer_mem, async_ = True)
+  consumer_opts = mk_opts(MemInst(consumer_mem, 'i32'), async_ = True)
   def core_consumer(thread, args):
     assert(len(args) == 0)
 
@@ -852,7 +981,7 @@ def core_consumer(thread, args):
     assert(ret == CopyResult.COMPLETED)
 
     retp = 0
-    [event] = canon_waitable_set_wait(True, consumer_mem, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(consumer_mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp+0] == subi2)
     assert(consumer_mem[retp+4] == Subtask.State.STARTED)
@@ -864,14 +993,14 @@ def core_consumer(thread, args):
       [ret] = canon_thread_yield(True, thread)
       assert(ret == 0)
       retp = 0
-      [ret] = canon_waitable_set_poll(True, consumer_mem, thread, seti, retp)
+      [ret] = canon_waitable_set_poll(True, MemInst(consumer_mem, 'i32'), thread, seti, retp)
       assert(ret == EventCode.NONE)
 
     [ret] = canon_future_write(FutureType(None), consumer_opts, thread, wfut21, 0xdeadbeef)
     assert(ret == CopyResult.COMPLETED)
 
     retp = 0
-    [event] = canon_waitable_set_wait(True, consumer_mem, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(consumer_mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp+0] == subi1)
     assert(consumer_mem[retp+4] == Subtask.State.RETURNED)
@@ -885,14 +1014,14 @@ def core_consumer(thread, args):
       [ret] = canon_thread_yield(True, thread)
       assert(ret == 0)
       retp = 0
-      [ret] = canon_waitable_set_poll(True, consumer_mem, thread, seti, retp)
+      [ret] = canon_waitable_set_poll(True, MemInst(consumer_mem, 'i32'), thread, seti, retp)
       assert(ret == EventCode.NONE)
 
     [ret] = canon_future_write(FutureType(None), consumer_opts, thread, wfut13, 0xdeadbeef)
     assert(ret == CopyResult.COMPLETED)
 
     retp = 0
-    [event] = canon_waitable_set_wait(True, consumer_mem, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(consumer_mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp+0] == subi2)
     assert(consumer_mem[retp+4] == Subtask.State.RETURNED)
@@ -909,7 +1038,7 @@ def core_consumer(thread, args):
     assert(ret == CopyResult.COMPLETED)
 
     retp = 0
-    [event] = canon_waitable_set_wait(True, consumer_mem, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(consumer_mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp+0] == subi3)
     assert(consumer_mem[retp+4] == Subtask.State.RETURNED)
@@ -944,7 +1073,7 @@ def core_callee2(thread, args):
   caller_inst = ComponentInstance(store)
   caller_ft = FuncType([], [], async_ = True)
   caller_mem = bytearray(24)
-  caller_opts = mk_opts(memory = caller_mem, async_ = True)
+  caller_opts = mk_opts(memory = MemInst(caller_mem, 'i32'), async_ = True)
   def core_caller(thread, args):
     assert(len(args) == 0)
 
@@ -967,7 +1096,7 @@ def core_caller(thread, args):
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, subi, seti)
     retp3 = 12
-    [event] = canon_waitable_set_wait(True, caller_mem, thread, seti, retp3)
+    [event] = canon_waitable_set_wait(True, MemInst(caller_mem, 'i32'), thread, seti, retp3)
     assert(event == EventCode.SUBTASK)
     assert(caller_mem[retp3+0] == subi)
     assert(caller_mem[retp3+4] == Subtask.State.RETURNED)
@@ -1004,7 +1133,7 @@ def producer2_core(thread, args):
   producer2 = partial(canon_lift, producer_opts, producer_inst, producer_ft, producer2_core)
 
   consumer_heap = Heap(20)
-  consumer_opts = mk_opts(consumer_heap.memory)
+  consumer_opts = mk_opts(MemInst(consumer_heap.memory, 'i32'))
   consumer_opts.async_ = True
 
   consumer_ft = FuncType([],[U8Type()], async_ = True)
@@ -1034,7 +1163,7 @@ def consumer(thread, args):
       [ret] = canon_thread_yield(True, thread)
       assert(ret == 0)
       retp = 8
-      [event] = canon_waitable_set_poll(True, consumer_heap.memory, thread, seti, retp)
+      [event] = canon_waitable_set_poll(True, MemInst(consumer_heap.memory, 'i32'), thread, seti, retp)
       if event == EventCode.NONE:
         continue
       assert(event == EventCode.SUBTASK)
@@ -1093,7 +1222,7 @@ def producer2_core(thread, args):
   producer2 = partial(canon_lift, producer_opts, producer_inst, producer_ft, producer2_core)
 
   consumer_heap = Heap(20)
-  consumer_opts = mk_opts(consumer_heap.memory, async_ = True)
+  consumer_opts = mk_opts(MemInst(consumer_heap.memory, 'i32'), async_ = True)
 
   consumer_ft = FuncType([],[U8Type()], async_ = True)
   def consumer(thread, args):
@@ -1120,7 +1249,7 @@ def consumer(thread, args):
     remain = [subi1, subi2]
     while remain:
       retp = 8
-      [event] = canon_waitable_set_wait(True, consumer_heap.memory, thread, seti, retp)
+      [event] = canon_waitable_set_wait(True, MemInst(consumer_heap.memory, 'i32'), thread, seti, retp)
       assert(event == EventCode.SUBTASK)
       assert(consumer_heap.memory[retp+4] == Subtask.State.RETURNED)
       subi = consumer_heap.memory[retp]
@@ -1166,7 +1295,7 @@ def core_hostcall_pre(fut, thread, args):
   hostcall2 = partial(canon_lift, hostcall_opts, hostcall_inst, ft, core_hostcall2)
 
   lower_heap = Heap(20)
-  lower_opts = mk_opts(lower_heap.memory)
+  lower_opts = mk_opts(MemInst(lower_heap.memory, 'i32'))
   lower_opts.async_ = True
 
   def core_func(thread, args):
@@ -1186,14 +1315,14 @@ def core_func(thread, args):
     fut1.set()
 
     retp = lower_heap.realloc(0,0,8,4)
-    [event] = canon_waitable_set_wait(True, lower_heap.memory, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(lower_heap.memory, 'i32'), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(lower_heap.memory[retp] == subi1)
     assert(lower_heap.memory[retp+4] == Subtask.State.RETURNED)
 
     fut2.set()
 
-    [event] = canon_waitable_set_wait(True, lower_heap.memory, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(lower_heap.memory, 'i32'), thread, seti, retp)
     assert(event == EventCode.SUBTASK)
     assert(lower_heap.memory[retp] == subi2)
     assert(lower_heap.memory[retp+4] == Subtask.State.RETURNED)
@@ -1371,8 +1500,8 @@ def test_eager_stream_completion():
   ft = FuncType([StreamType(U8Type())], [StreamType(U8Type())])
   inst = ComponentInstance(store)
   mem = bytearray(20)
-  opts = mk_opts(memory=mem, async_=True)
-  sync_opts = mk_opts(memory=mem, async_=False)
+  opts = mk_opts(memory=MemInst(mem, 'i32'), async_=True)
+  sync_opts = mk_opts(memory=MemInst(mem, 'i32'), async_=False)
 
   def host_import(caller, on_start, on_resolve):
     args = on_start()
@@ -1454,8 +1583,8 @@ def test_async_stream_ops():
   ft = FuncType([StreamType(U8Type())], [StreamType(U8Type())])
   inst = ComponentInstance(store)
   mem = bytearray(24)
-  opts = mk_opts(memory=mem, async_=True)
-  sync_opts = mk_opts(memory=mem, async_=False)
+  opts = mk_opts(memory=MemInst(mem, 'i32'), async_=True)
+  sync_opts = mk_opts(memory=MemInst(mem, 'i32'), async_=False)
 
   host_import_incoming = None
   host_import_outgoing = None
@@ -1510,7 +1639,7 @@ def core_func(thread, args):
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rsi1, seti)
     definitions.throw_it = True
-    [event] = canon_waitable_set_wait(True, mem, thread, seti, retp) ##
+    [event] = canon_waitable_set_wait(True, MemInst(mem, 'i32'), thread, seti, retp) ##
     assert(event == EventCode.STREAM_READ)
     assert(mem[retp+0] == rsi1)
     result,n = unpack_result(mem[retp+4])
@@ -1526,7 +1655,7 @@ def core_func(thread, args):
     assert(ret == definitions.BLOCKED)
     host_import_incoming.set_remain(100)
     [] = canon_waitable_join(thread, wsi3, seti)
-    [event] = canon_waitable_set_wait(True, mem, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem[retp+0] == wsi3)
     result,n = unpack_result(mem[retp+4])
@@ -1538,7 +1667,7 @@ def core_func(thread, args):
     assert(ret == definitions.BLOCKED)
     dst_stream.set_remain(100)
     [] = canon_waitable_join(thread, wsi2, seti)
-    [event] = canon_waitable_set_wait(True, mem, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem[retp+0] == wsi2)
     result,n = unpack_result(mem[retp+4])
@@ -1557,7 +1686,7 @@ def core_func(thread, args):
     [ret] = canon_stream_read(StreamType(U8Type()), opts, thread, rsi4, 0, 4)
     assert(ret == definitions.BLOCKED)
     [] = canon_waitable_join(thread, rsi4, seti)
-    [event] = canon_waitable_set_wait(True, mem, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem[retp+0] == rsi4)
     result,n = unpack_result(mem[retp+4])
@@ -1604,7 +1733,7 @@ def test_receive_own_stream():
   store = Store()
   inst = ComponentInstance(store)
   mem = bytearray(20)
-  opts = mk_opts(memory=mem, async_=True)
+  opts = mk_opts(memory=MemInst(mem, 'i32'), async_=True)
 
   host_ft = FuncType([StreamType(U8Type())], [StreamType(U8Type())])
   def host_import(caller, on_start, on_resolve):
@@ -1642,7 +1771,7 @@ def on_resolve(results): assert(len(results) == 0)
 def test_host_partial_reads_writes():
   store = Store()
   mem = bytearray(20)
-  opts = mk_opts(memory=mem, async_=True)
+  opts = mk_opts(memory=MemInst(mem, 'i32'), async_=True)
 
   src = HostSource(U8Type(), [1,2,3,4], chunk=2, destroy_if_empty = False)
   source_ft = FuncType([], [StreamType(U8Type())])
@@ -1681,7 +1810,7 @@ def core_func(thread, args):
 
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rsi, seti)
-    [event] = canon_waitable_set_wait(True, mem, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem[retp+0] == rsi)
     result,n = unpack_result(mem[retp+4])
@@ -1702,7 +1831,7 @@ def core_func(thread, args):
     assert(ret == definitions.BLOCKED)
     dst.set_remain(4)
     [] = canon_waitable_join(thread, wsi, seti)
-    [event] = canon_waitable_set_wait(True, mem, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem[retp+0] == wsi)
     result,n = unpack_result(mem[retp+4])
@@ -1728,7 +1857,7 @@ def test_wasm_to_wasm_stream():
 
   inst1 = ComponentInstance(store)
   mem1 = bytearray(24)
-  opts1 = mk_opts(memory=mem1, async_=True)
+  opts1 = mk_opts(memory=MemInst(mem1, 'i32'), async_=True)
   ft1 = FuncType([], [StreamType(U8Type())])
   def core_func1(thread, args):
     assert(not args)
@@ -1763,7 +1892,7 @@ def core_func1(thread, args):
     retp = 16
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, wsi, seti)
-    [event] = canon_waitable_set_wait(True, mem1, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem1, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem1[retp+0] == wsi)
     result,n = unpack_result(mem1[retp+4])
@@ -1774,7 +1903,7 @@ def core_func1(thread, args):
 
     fut4.set()
 
-    [event] = canon_waitable_set_wait(True, mem1, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem1, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem1[retp+0] == wsi)
     assert(mem1[retp+4] == 0)
@@ -1793,7 +1922,7 @@ def core_func1(thread, args):
   inst2 = ComponentInstance(store)
   heap2 = Heap(24)
   mem2 = heap2.memory
-  opts2 = mk_opts(memory=heap2.memory, realloc=heap2.realloc, async_=True)
+  opts2 = mk_opts(memory=MemInst(heap2.memory, 'i32'), realloc=heap2.realloc, async_=True)
   ft2 = FuncType([], [])
   def core_func2(thread, args):
     assert(not args)
@@ -1812,7 +1941,7 @@ def core_func2(thread, args):
 
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rsi, seti)
-    [event] = canon_waitable_set_wait(True, mem2, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem2, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem2[retp+0] == rsi)
     result,n = unpack_result(mem2[retp+4])
@@ -1840,7 +1969,7 @@ def core_func2(thread, args):
     [ret] = canon_stream_read(StreamType(U8Type()), opts2, thread, rsi, 12345, 0)
     assert(ret == definitions.BLOCKED)
 
-    [event] = canon_waitable_set_wait(True, mem2, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem2, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem2[retp+0] == rsi)
     p2 = int.from_bytes(mem2[retp+4 : retp+8], 'little', signed=False)
@@ -1859,7 +1988,7 @@ def test_wasm_to_wasm_stream_empty():
 
   inst1 = ComponentInstance(store)
   mem1 = bytearray(24)
-  opts1 = mk_opts(memory=mem1, async_=True)
+  opts1 = mk_opts(memory=MemInst(mem1, 'i32'), async_=True)
   ft1 = FuncType([], [StreamType(None)])
   def core_func1(thread, args):
     assert(not args)
@@ -1886,7 +2015,7 @@ def core_func1(thread, args):
     retp = 16
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, wsi, seti)
-    [event] = canon_waitable_set_wait(True, mem1, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem1, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem1[retp+0] == wsi)
     result,n = unpack_result(mem1[retp+4])
@@ -1904,7 +2033,7 @@ def core_func1(thread, args):
   inst2 = ComponentInstance(store)
   heap2 = Heap(10)
   mem2 = heap2.memory
-  opts2 = mk_opts(memory=heap2.memory, realloc=heap2.realloc, async_=True)
+  opts2 = mk_opts(memory=MemInst(heap2.memory, 'i32'), realloc=heap2.realloc, async_=True)
   ft2 = FuncType([], [])
   def core_func2(thread, args):
     assert(not args)
@@ -1923,7 +2052,7 @@ def core_func2(thread, args):
 
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rsi, seti)
-    [event] = canon_waitable_set_wait(True, mem2, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem2, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem2[retp+0] == rsi)
     result,n = unpack_result(mem2[retp+4])
@@ -1954,7 +2083,7 @@ def test_cancel_copy():
   store = Store()
   inst = ComponentInstance(store)
   mem = bytearray(24)
-  lower_opts = mk_opts(memory=mem, async_=True)
+  lower_opts = mk_opts(memory=MemInst(mem, 'i32'), async_=True)
 
   host_ft1 = FuncType([StreamType(U8Type())],[])
   host_sink = None
@@ -2040,7 +2169,7 @@ def core_func(thread, args):
     host_source.unblock_cancel()
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rsi, seti)
-    [event] = canon_waitable_set_wait(True, mem, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.STREAM_READ)
     assert(mem[retp+0] == rsi)
     result,n = unpack_result(mem[retp+4])
@@ -2109,7 +2238,7 @@ def test_futures():
   store = Store()
   inst = ComponentInstance(store)
   mem = bytearray(24)
-  lower_opts = mk_opts(memory=mem, async_=True)
+  lower_opts = mk_opts(memory=MemInst(mem, 'i32'), async_=True)
 
   host_ft1 = FuncType([FutureType(U8Type())],[FutureType(U8Type())])
   def host_func(caller, on_start, on_resolve):
@@ -2145,7 +2274,7 @@ def core_func(thread, args):
 
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, rfi, seti)
-    [event] = canon_waitable_set_wait(True, mem, thread, seti, retp)
+    [event] = canon_waitable_set_wait(True, MemInst(mem, 'i32'), thread, seti, retp)
     assert(event == EventCode.FUTURE_READ)
     assert(mem[retp+0] == rfi)
     assert(mem[retp+4] == CopyResult.COMPLETED)
@@ -2200,8 +2329,8 @@ def test_cancel_subtask():
   ft = FuncType([U8Type()], [U8Type()], async_ = True)
 
   callee_heap = Heap(10)
-  callee_opts = mk_opts(callee_heap.memory, async_ = True)
-  sync_callee_opts = mk_opts(callee_heap.memory, async_ = False)
+  callee_opts = mk_opts(MemInst(callee_heap.memory, 'i32'), async_ = True)
+  sync_callee_opts = mk_opts(MemInst(callee_heap.memory, 'i32'), async_ = False)
   callee_inst = ComponentInstance(store)
 
   def core_callee1(thread, args):
@@ -2211,7 +2340,7 @@ def core_callee1(thread, args):
   def core_callee2(thread, args):
     [x] = args
     [si] = canon_waitable_set_new(thread)
-    [ret] = canon_waitable_set_wait(True, callee_heap.memory, thread, si, 0)
+    [ret] = canon_waitable_set_wait(True, MemInst(callee_heap.memory, 'i32'), thread, si, 0)
     assert(ret == EventCode.TASK_CANCELLED)
     match x:
       case 1:
@@ -2258,9 +2387,9 @@ def core_callee4(thread, args):
     except Trap:
       pass
     [seti] = canon_waitable_set_new(thread)
-    [result] = canon_waitable_set_wait(True, callee_heap.memory, thread, seti, 0)
+    [result] = canon_waitable_set_wait(True, MemInst(callee_heap.memory, 'i32'), thread, seti, 0)
     assert(result == EventCode.TASK_CANCELLED)
-    [result] = canon_waitable_set_poll(True, callee_heap.memory, thread, seti, 0)
+    [result] = canon_waitable_set_poll(True, MemInst(callee_heap.memory, 'i32'), thread, seti, 0)
     assert(result == EventCode.NONE)
     [] = canon_task_cancel(thread)
     return []
@@ -2336,7 +2465,7 @@ def core_callee6(thread, args):
   callee6 = partial(canon_lift, callee_opts, callee_inst, ft, core_callee6)
 
   caller_heap = Heap(20)
-  caller_opts = mk_opts(caller_heap.memory, async_ = True)
+  caller_opts = mk_opts(MemInst(caller_heap.memory, 'i32'), async_ = True)
   caller_inst = ComponentInstance(store)
 
   def core_caller(thread, args):
@@ -2395,7 +2524,7 @@ def core_caller(thread, args):
     assert(caller_heap.memory[0] == 13)
     [] = canon_waitable_join(thread, subi3, seti)
     retp = 8
-    [ret] = canon_waitable_set_wait(True, caller_heap.memory, thread, seti, retp)
+    [ret] = canon_waitable_set_wait(True, MemInst(caller_heap.memory, 'i32'), thread, seti, retp)
     assert(ret == EventCode.SUBTASK)
     assert(caller_heap.memory[retp+0] == subi3)
     assert(caller_heap.memory[retp+4] == Subtask.State.RETURNED)
@@ -2414,7 +2543,7 @@ def core_caller(thread, args):
     assert(caller_heap.memory[0] == 13)
     [] = canon_waitable_join(thread, subi4, seti)
     retp = 8
-    [ret] = canon_waitable_set_wait(True, caller_heap.memory, thread, seti, retp)
+    [ret] = canon_waitable_set_wait(True, MemInst(caller_heap.memory, 'i32'), thread, seti, retp)
     assert(ret == EventCode.SUBTASK)
     assert(caller_heap.memory[retp+0] == subi4)
     assert(caller_heap.memory[retp+4] == Subtask.State.CANCELLED_BEFORE_RETURNED)
@@ -2456,7 +2585,7 @@ def core_caller(thread, args):
     host_fut4.set()
     [] = canon_waitable_join(thread, subi, seti)
     waitretp = 4
-    [event] = canon_waitable_set_wait(True, caller_heap.memory, thread, seti, waitretp)
+    [event] = canon_waitable_set_wait(True, MemInst(caller_heap.memory, 'i32'), thread, seti, waitretp)
     assert(event == EventCode.SUBTASK)
     assert(caller_heap.memory[waitretp] == subi)
     assert(caller_heap.memory[waitretp+4] == Subtask.State.CANCELLED_BEFORE_RETURNED)
@@ -2472,7 +2601,7 @@ def core_caller(thread, args):
     host_fut5.set()
     [] = canon_waitable_join(thread, subi, seti)
     waitretp = 4
-    [event] = canon_waitable_set_wait(True, caller_heap.memory, thread, seti, waitretp)
+    [event] = canon_waitable_set_wait(True, MemInst(caller_heap.memory, 'i32'), thread, seti, waitretp)
     assert(event == EventCode.SUBTASK)
     assert(caller_heap.memory[waitretp] == subi)
     assert(caller_heap.memory[waitretp+4] == Subtask.State.RETURNED)
@@ -2487,7 +2616,7 @@ def core_caller(thread, args):
     assert(ret == definitions.BLOCKED)
 
     [] = canon_waitable_join(thread, subi, seti)
-    [event] = canon_waitable_set_wait(True, caller_heap.memory, thread, seti, 4)
+    [event] = canon_waitable_set_wait(True, MemInst(caller_heap.memory, 'i32'), thread, seti, 4)
     assert(event == EventCode.SUBTASK)
     assert(caller_heap.memory[0] == 45)
     assert(caller_heap.memory[4] == subi)
@@ -2516,8 +2645,8 @@ def test_self_copy(elemt):
   store = Store()
   inst = ComponentInstance(store)
   mem = bytearray(40)
-  sync_opts = mk_opts(memory=mem, async_=False)
-  async_opts = mk_opts(memory=mem, async_=True)
+  sync_opts = mk_opts(memory=MemInst(mem, 'i32'), async_=False)
+  async_opts = mk_opts(memory=MemInst(mem, 'i32'), async_=True)
 
   ft = FuncType([], [], async_ = True)
   def core_func(thread, args):
@@ -2534,7 +2663,7 @@ def core_func(thread, args):
     [] = canon_future_drop_readable(FutureType(elemt), thread, rfi)
 
     [] = canon_waitable_join(thread, wfi, seti)
-    [event] = canon_waitable_set_wait(True, mem, thread, seti, 0)
+    [event] = canon_waitable_set_wait(True, MemInst(mem, 'i32'), thread, seti, 0)
     assert(event == EventCode.FUTURE_WRITE)
     assert(mem[0] == wfi)
     assert(mem[4] == CopyResult.COMPLETED)
@@ -2554,7 +2683,7 @@ def core_func(thread, args):
     [] = canon_stream_drop_readable(StreamType(elemt), thread, rsi)
 
     [] = canon_waitable_join(thread, wsi, seti)
-    [event] = canon_waitable_set_wait(True, mem, thread, seti, 0)
+    [event] = canon_waitable_set_wait(True, MemInst(mem, 'i32'), thread, seti, 0)
     assert(event == EventCode.STREAM_WRITE)
     assert(mem[0] == wsi)
     result,n = unpack_result(mem[4])
@@ -2571,7 +2700,7 @@ def core_func(thread, args):
 def test_async_flat_params():
   store = Store()
   heap = Heap(1000)
-  opts = mk_opts(heap.memory, 'utf8', heap.realloc, async_ = True)
+  opts = mk_opts(MemInst(heap.memory, 'i32'), 'utf8', heap.realloc, async_ = True)
 
   ft1 = FuncType([F32Type(), F64Type(), U32Type(), S64Type()],[])
   def f1(caller, on_start, on_resolve):
@@ -2621,7 +2750,7 @@ def test_threads():
   store = Store()
   inst = ComponentInstance(store)
   mem = bytearray(8)
-  opts = mk_opts(memory = mem)
+  opts = mk_opts(memory = MemInst(mem, 'i32'))
 
   ftbl = Table()
   ft = CoreFuncType(['i32'],[])
@@ -2724,7 +2853,7 @@ def core_producer_callback2(thread, args):
   consumer_inst = ComponentInstance(store)
   consumer_ft = FuncType([], [], async_ = True)
   consumer_mem = bytearray(24)
-  consumer_opts = mk_opts(consumer_mem, async_ = True)
+  consumer_opts = mk_opts(MemInst(consumer_mem, 'i32'), async_ = True)
 
   def core_consumer(thread, args):
     assert(len(args) == 0)
@@ -2745,14 +2874,14 @@ def core_consumer(thread, args):
     retp3 = 16
     [seti] = canon_waitable_set_new(thread)
     [] = canon_waitable_join(thread, subi1, seti)
-    [event] = canon_waitable_set_wait(True, consumer_mem, thread, seti, retp3)
+    [event] = canon_waitable_set_wait(True, MemInst(consumer_mem, 'i32'), thread, seti, retp3)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp3] == subi1)
     assert(consumer_mem[retp3+4] == Subtask.State.RETURNED)
     assert(consumer_mem[retp1] == 42)
 
     [] = canon_waitable_join(thread, subi2, seti)
-    [event] = canon_waitable_set_wait(True, consumer_mem, thread, seti, retp3)
+    [event] = canon_waitable_set_wait(True, MemInst(consumer_mem, 'i32'), thread, seti, retp3)
     assert(event == EventCode.SUBTASK)
     assert(consumer_mem[retp3] == subi2)
     assert(consumer_mem[retp3+4] == Subtask.State.RETURNED)
@@ -2802,6 +2931,8 @@ def mk_task(supertask, inst):
 
 
 test_roundtrips()
+test_string_byte_length_limit()
+test_list_byte_length_limit()
 test_handles()
 test_async_to_async()
 test_async_callback()
diff --git a/test/wasm-tools/memory64.wast b/test/wasm-tools/memory64.wast
index 0ec55341..a72eb3b3 100644
--- a/test/wasm-tools/memory64.wast
+++ b/test/wasm-tools/memory64.wast
@@ -42,13 +42,14 @@
   (core instance (instantiate $B (with "" (instance (export "" (table $m))))))
 )
 
-(assert_invalid
-  (component
-    (import "x" (func $x (param "x" string)))
-    (core module $A
-      (memory (export "m") i64 1))
-    (core instance $A (instantiate $A))
-    (alias core export $A "m" (core memory $m))
-    (core func (canon lower (func $x) (memory $m)))
+(component
+  (import "x" (func $x (param "x" string)))
+  (core module $A
+    (memory (export "m") i64 1)
+    (func (export "realloc") (param i64 i64 i64 i64) (result i64) unreachable)
   )
-  "canonical ABI memory is not a 32-bit linear memory")
+  (core instance $A (instantiate $A))
+  (alias core export $A "m" (core memory $m))
+  (core func $realloc (alias core export $A "realloc"))
+  (core func (canon lower (func $x) (memory $m) (realloc (func $realloc))))
+)