WebAssembly · brendandahl · Apr 7, 2026 · stevenfontanella · Apr 7, 2026 · stevenfontanella
@@ -550,6 +550,8 @@
     ("f16x8.convert_i16x8_s",    "makeUnary(UnaryOp::ConvertSVecI16x8ToVecF16x8)"),
     ("f16x8.convert_i16x8_u",    "makeUnary(UnaryOp::ConvertUVecI16x8ToVecF16x8)"),
     ("f32x4.promote_low_f16x8",  "makeUnary(UnaryOp::PromoteLowVecF16x8ToVecF32x4)"),
+    ("f16x8.demote_f32x4_zero",  "makeUnary(UnaryOp::DemoteZeroVecF32x4ToVecF16x8)"),
+    ("f16x8.demote_f64x2_zero",  "makeUnary(UnaryOp::DemoteZeroVecF64x2ToVecF16x8)"),
     ("f16x8.madd",               "makeSIMDTernary(SIMDTernaryOp::MaddVecF16x8)"),
     ("f16x8.nmadd",              "makeSIMDTernary(SIMDTernaryOp::NmaddVecF16x8)"),
 

@@ -505,12 +505,34 @@ switch (buf[0]) {
               default: goto parse_error;
             }
           }
-          case 'd':
-            if (op == "f16x8.div"sv) {
-              CHECK_ERR(makeBinary(ctx, pos, annotations, BinaryOp::DivVecF16x8));
-              return Ok{};
+          case 'd': {
+            switch (buf[7]) {
+              case 'e': {
+                switch (buf[14]) {
+                  case '3':
+                    if (op == "f16x8.demote_f32x4_zero"sv) {
+                      CHECK_ERR(makeUnary(ctx, pos, annotations, UnaryOp::DemoteZeroVecF32x4ToVecF16x8));
+                      return Ok{};
+                    }
+                    goto parse_error;
+                  case '6':
+                    if (op == "f16x8.demote_f64x2_zero"sv) {
+                      CHECK_ERR(makeUnary(ctx, pos, annotations, UnaryOp::DemoteZeroVecF64x2ToVecF16x8));
+                      return Ok{};
+                    }
+                    goto parse_error;
+                  default: goto parse_error;
+                }
+              }
+              case 'i':
+                if (op == "f16x8.div"sv) {
+                  CHECK_ERR(makeBinary(ctx, pos, annotations, BinaryOp::DivVecF16x8));
+                  return Ok{};
+                }
+                goto parse_error;
+              default: goto parse_error;
             }
-            goto parse_error;
+          }
           case 'e': {
             switch (buf[7]) {
               case 'q':

@@ -448,6 +448,8 @@ template<typename Subtype> struct ChildTyper : OverriddenVisitor<Subtype> {
       case ConvertSVecI16x8ToVecF16x8:
       case ConvertUVecI16x8ToVecF16x8:
       case PromoteLowVecF16x8ToVecF32x4:
+      case DemoteZeroVecF32x4ToVecF16x8:
+      case DemoteZeroVecF64x2ToVecF16x8:
       case AnyTrueVec128:
       case AllTrueVecI8x16:
       case AllTrueVecI16x8:

@@ -285,6 +285,8 @@ struct CostAnalyzer : public OverriddenVisitor<CostAnalyzer, CostType> {
       case ConvertSVecI16x8ToVecF16x8:
       case ConvertUVecI16x8ToVecF16x8:
       case PromoteLowVecF16x8ToVecF32x4:
+      case DemoteZeroVecF32x4ToVecF16x8:
+      case DemoteZeroVecF64x2ToVecF16x8:
         ret = 1;
         break;
       case InvalidUnary:

@@ -724,6 +724,8 @@ class Literal {
   Literal demoteZeroToF32x4() const;
   Literal promoteLowToF64x2() const;
   Literal promoteLowF16x8ToF32x4() const;
+  Literal demoteZeroF32x4ToF16x8() const;
+  Literal demoteZeroF64x2ToF16x8() const;
   Literal truncSatToSI16x8() const;
   Literal truncSatToUI16x8() const;
   Literal convertSToF16x8() const;

@@ -1404,6 +1404,12 @@ struct PrintExpressionContents
       case PromoteLowVecF16x8ToVecF32x4:
         o << "f32x4.promote_low_f16x8";
         break;
+      case DemoteZeroVecF32x4ToVecF16x8:
+        o << "f16x8.demote_f32x4_zero";
+        break;
+      case DemoteZeroVecF64x2ToVecF16x8:
+        o << "f16x8.demote_f64x2_zero";
+        break;
       case InvalidUnary:
         WASM_UNREACHABLE("unvalid unary operator");
     }

@@ -1126,6 +1126,8 @@ enum ASTNodes {
   I16x8TruncSatF16x8U = 0x146,
   F16x8ConvertI16x8S = 0x147,
   F16x8ConvertI16x8U = 0x148,
+  F16x8DemoteF32x4Zero = 0x149,
+  F16x8DemoteF64x2Zero = 0x14a,
   F32x4PromoteLowF16x8 = 0x14b,
 
   // bulk memory opcodes

@@ -1166,6 +1166,10 @@ class ExpressionRunner : public OverriddenVisitor<SubType, Flow> {
         return value.convertUToF16x8();
       case PromoteLowVecF16x8ToVecF32x4:
         return value.promoteLowF16x8ToF32x4();
+      case DemoteZeroVecF32x4ToVecF16x8:
+        return value.demoteZeroF32x4ToF16x8();
+      case DemoteZeroVecF64x2ToVecF16x8:
+        return value.demoteZeroF64x2ToF16x8();
       case InvalidUnary:
         WASM_UNREACHABLE("invalid unary op");
     }

@@ -252,6 +252,8 @@ enum UnaryOp {
   ConvertSVecI16x8ToVecF16x8,
   ConvertUVecI16x8ToVecF16x8,
   PromoteLowVecF16x8ToVecF32x4,
+  DemoteZeroVecF32x4ToVecF16x8,
+  DemoteZeroVecF64x2ToVecF16x8,
 
   InvalidUnary
 };

@@ -2912,6 +2912,30 @@ Literal Literal::truncSatZeroUToI32x4() const {
 Literal Literal::demoteZeroToF32x4() const {
   return unary_zero<4, &Literal::getLanesF64x2, &Literal::demote>(*this);
 }
+Literal Literal::demoteZeroF32x4ToF16x8() const {
+  auto lanes = getLanesF32x4();
+  LaneArray<8> result;
+  for (size_t i = 0; i < 4; ++i) {
+    result[i] = Literal(fp16_ieee_from_fp32_value(lanes[i].getf32()));
+  }
+  for (size_t i = 4; i < 8; ++i) {
+    result[i] = Literal(int32_t(0));
+  }
+  return Literal(result);
+}
+
+Literal Literal::demoteZeroF64x2ToF16x8() const {
+  auto lanes = getLanesF64x2();
+  LaneArray<8> result;
+  for (size_t i = 0; i < 2; ++i) {
+    result[i] = Literal(fp16_ieee_from_fp32_value(lanes[i].demote().getf32()));
+  }
+  for (size_t i = 2; i < 8; ++i) {
+    result[i] = Literal(int32_t(0));
+  }
+  return Literal(result);
+}
+
 Literal Literal::promoteLowToF64x2() const {
   return extendF32<LaneOrder::Low>(*this);
 }

@@ -4474,6 +4474,10 @@ Result<> WasmBinaryReader::readInst() {
           return builder.makeUnary(ConvertSVecI16x8ToVecF16x8);
         case BinaryConsts::F16x8ConvertI16x8U:
           return builder.makeUnary(ConvertUVecI16x8ToVecF16x8);
+        case BinaryConsts::F16x8DemoteF32x4Zero:
+          return builder.makeUnary(DemoteZeroVecF32x4ToVecF16x8);
+        case BinaryConsts::F16x8DemoteF64x2Zero:
+          return builder.makeUnary(DemoteZeroVecF64x2ToVecF16x8);
         case BinaryConsts::F32x4PromoteLowF16x8:
           return builder.makeUnary(PromoteLowVecF16x8ToVecF32x4);
         case BinaryConsts::I8x16ExtractLaneS:

@@ -1459,6 +1459,14 @@ void BinaryInstWriter::visitUnary(Unary* curr) {
       o << static_cast<int8_t>(BinaryConsts::SIMDPrefix)
         << U32LEB(BinaryConsts::F16x8ConvertI16x8U);
       break;
+    case DemoteZeroVecF32x4ToVecF16x8:
+      o << static_cast<int8_t>(BinaryConsts::SIMDPrefix)
+        << U32LEB(BinaryConsts::F16x8DemoteF32x4Zero);
+      break;
+    case DemoteZeroVecF64x2ToVecF16x8:
+      o << static_cast<int8_t>(BinaryConsts::SIMDPrefix)
+        << U32LEB(BinaryConsts::F16x8DemoteF64x2Zero);
+      break;
     case PromoteLowVecF16x8ToVecF32x4:
       o << static_cast<int8_t>(BinaryConsts::SIMDPrefix)
         << U32LEB(BinaryConsts::F32x4PromoteLowF16x8);

@@ -2381,6 +2381,8 @@ void FunctionValidator::visitUnary(Unary* curr) {
     case DemoteZeroVecF64x2ToVecF32x4:
     case PromoteLowVecF32x4ToVecF64x2:
     case PromoteLowVecF16x8ToVecF32x4:
+    case DemoteZeroVecF32x4ToVecF16x8:
+    case DemoteZeroVecF64x2ToVecF16x8:
     case RelaxedTruncSVecF32x4ToVecI32x4:
     case RelaxedTruncUVecF32x4ToVecI32x4:
     case RelaxedTruncZeroSVecF64x2ToVecI32x4:

@@ -715,6 +715,8 @@ void Unary::finalize() {
     case ConvertSVecI16x8ToVecF16x8:
     case ConvertUVecI16x8ToVecF16x8:
     case PromoteLowVecF16x8ToVecF32x4:
+    case DemoteZeroVecF32x4ToVecF16x8:
+    case DemoteZeroVecF64x2ToVecF16x8:
       type = Type::v128;
       break;
     case AnyTrueVec128:

@@ -613,6 +613,36 @@
    (local.get $0)
   )
  )
+ ;; CHECK-TEXT:      (func $f16x8.demote_f32x4_zero (type $1) (param $0 v128) (result v128)
+ ;; CHECK-TEXT-NEXT:  (f16x8.demote_f32x4_zero
+ ;; CHECK-TEXT-NEXT:   (local.get $0)
+ ;; CHECK-TEXT-NEXT:  )
+ ;; CHECK-TEXT-NEXT: )
+ ;; CHECK-BIN:      (func $f16x8.demote_f32x4_zero (type $1) (param $0 v128) (result v128)
+ ;; CHECK-BIN-NEXT:  (f16x8.demote_f32x4_zero
+ ;; CHECK-BIN-NEXT:   (local.get $0)
+ ;; CHECK-BIN-NEXT:  )
+ ;; CHECK-BIN-NEXT: )
+ (func $f16x8.demote_f32x4_zero (param $0 v128) (result v128)
+  (f16x8.demote_f32x4_zero
+   (local.get $0)
+  )
+ )
+ ;; CHECK-TEXT:      (func $f16x8.demote_f64x2_zero (type $1) (param $0 v128) (result v128)
+ ;; CHECK-TEXT-NEXT:  (f16x8.demote_f64x2_zero
+ ;; CHECK-TEXT-NEXT:   (local.get $0)
+ ;; CHECK-TEXT-NEXT:  )
+ ;; CHECK-TEXT-NEXT: )
+ ;; CHECK-BIN:      (func $f16x8.demote_f64x2_zero (type $1) (param $0 v128) (result v128)
+ ;; CHECK-BIN-NEXT:  (f16x8.demote_f64x2_zero
+ ;; CHECK-BIN-NEXT:   (local.get $0)
+ ;; CHECK-BIN-NEXT:  )
+ ;; CHECK-BIN-NEXT: )
+ (func $f16x8.demote_f64x2_zero (param $0 v128) (result v128)
+  (f16x8.demote_f64x2_zero
+   (local.get $0)
+  )
+ )
 )
 ;; CHECK-BIN-NODEBUG:      (type $0 (func (param v128 v128) (result v128)))
 
@@ -849,3 +879,15 @@
 ;; CHECK-BIN-NODEBUG-NEXT:   (local.get $0)
 ;; CHECK-BIN-NODEBUG-NEXT:  )
 ;; CHECK-BIN-NODEBUG-NEXT: )
+
+;; CHECK-BIN-NODEBUG:      (func $33 (type $1) (param $0 v128) (result v128)
+;; CHECK-BIN-NODEBUG-NEXT:  (f16x8.demote_f32x4_zero
+;; CHECK-BIN-NODEBUG-NEXT:   (local.get $0)
+;; CHECK-BIN-NODEBUG-NEXT:  )
+;; CHECK-BIN-NODEBUG-NEXT: )
+
+;; CHECK-BIN-NODEBUG:      (func $34 (type $1) (param $0 v128) (result v128)
+;; CHECK-BIN-NODEBUG-NEXT:  (f16x8.demote_f64x2_zero
+;; CHECK-BIN-NODEBUG-NEXT:   (local.get $0)
+;; CHECK-BIN-NODEBUG-NEXT:  )
+;; CHECK-BIN-NODEBUG-NEXT: )
@@ -39,6 +39,8 @@
   (func (export "f16x8.convert_i16x8_s") (param $0 v128) (result v128) (f16x8.convert_i16x8_s (local.get $0)))
   (func (export "f16x8.convert_i16x8_u") (param $0 v128) (result v128) (f16x8.convert_i16x8_u (local.get $0)))
   (func (export "f32x4.promote_low_f16x8") (param $0 v128) (result v128) (f32x4.promote_low_f16x8 (local.get $0)))
+  (func (export "f16x8.demote_f32x4_zero") (param $0 v128) (result v128) (f16x8.demote_f32x4_zero (local.get $0)))
+  (func (export "f16x8.demote_f64x2_zero") (param $0 v128) (result v128) (f16x8.demote_f64x2_zero (local.get $0)))
   ;; Multiple operation tests:
   (func (export "splat_replace") (result v128) (f16x8.replace_lane 0 (f16x8.splat (f32.const 1)) (f32.const 99))
  )
@@ -268,3 +270,55 @@
     (v128.const i16x8 0x0001 0      0 0 0 0 0 0))
     ;;                2^-24
     (v128.const i32x4 0x33800000 0      0 0))
+
+(assert_return (invoke "f16x8.demote_f32x4_zero"
+    ;;                1.0        2.0        3.0        4.0
+    (v128.const i32x4 0x3f800000 0x40000000 0x40400000 0x40800000))
+    ;;                1.0    2.0    3.0    4.0    0 0 0 0
+    (v128.const i16x8 0x3c00 0x4000 0x4200 0x4400 0 0 0 0))
+
+(assert_return (invoke "f16x8.demote_f64x2_zero"
+    ;;                1.0                2.0
+    (v128.const i64x2 0x3ff0000000000000 0x4000000000000000))
+    ;;                1.0    2.0    0 0 0 0 0 0
+    (v128.const i16x8 0x3c00 0x4000 0 0 0 0 0 0))
+
+;; Edge cases: Infinities, NaNs, Zeros
+(assert_return (invoke "f16x8.demote_f32x4_zero"
+    ;;                inf        -inf       nan        -0.0
+    (v128.const i32x4 0x7f800000 0xff800000 0x7fc00000 0x80000000))
+    ;;                inf    -inf   nan    -0.0   0 0 0 0
+    (v128.const i16x8 0x7c00 0xfc00 0x7e00 0x8000 0 0 0 0))
+
+;; Edge cases: Overflow
+(assert_return (invoke "f16x8.demote_f32x4_zero"
+    ;;                1e5        -1e5       65504      -65504
+    (v128.const i32x4 0x47c35000 0xc7c35000 0x477fe000 0xc77fe000))
+    ;;                inf    -inf   65504  -65504 0 0 0 0
+    (v128.const i16x8 0x7c00 0xfc00 0x7bff 0xfbff 0 0 0 0))
+
+;; Edge cases: Infinities, NaNs, Zeros
+(assert_return (invoke "f16x8.demote_f64x2_zero"
+    ;;                inf                -inf
+    (v128.const i64x2 0x7ff0000000000000 0xfff0000000000000))
+    ;;                inf    -inf   0 0 0 0 0 0
+    (v128.const i16x8 0x7c00 0xfc00 0 0 0 0 0 0))
+
+(assert_return (invoke "f16x8.demote_f64x2_zero"
+    ;;                nan                -0.0
+    (v128.const i64x2 0x7ff8000000000000 0x8000000000000000))
+    ;;                nan    -0.0   0 0 0 0 0 0
+    (v128.const i16x8 0x7e00 0x8000 0 0 0 0 0 0))
+
+;; Edge cases: Overflow
+(assert_return (invoke "f16x8.demote_f64x2_zero"
+    ;;                1e5                -1e5
+    (v128.const i64x2 0x40f86a0000000000 0xc0f86a0000000000))
+    ;;                inf    -inf   0 0 0 0 0 0
+    (v128.const i16x8 0x7c00 0xfc00 0 0 0 0 0 0))
+
+(assert_return (invoke "f16x8.demote_f64x2_zero"
+    ;;                65504              -65504
+    (v128.const i64x2 0x40effc0000000000 0xc0effc0000000000))
+    ;;                65504  -65504 0 0 0 0 0 0
+    (v128.const i16x8 0x7bff 0xfbff 0 0 0 0 0 0))