Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions mlir/include/mlir/Dialect/DXSA/IR/DXSADoubleArithOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -305,4 +305,52 @@ def DXSA_DRcpSat : DXSA_UnaryOp<"drcp_sat"> {
}];
}

//===----------------------------------------------------------------------===//
// dxsa.dfma
//===----------------------------------------------------------------------===//

def DXSA_DFma : DXSA_MultiplyAddOp<"dfma"> {
let summary = "component-wise double-precision multiply-add";
let description = [{
The `dxsa.dfma` operation computes the component-wise double-precision
multiply-add `$dst = $lhs * $rhs + $acc`. Each operand holds a vector of
doubles, one double per `xy` and `zw` component pair.

Because each double spans a component pair, the destination write mask must
be `<x, y>`, `<z, w>`, or `<x, y, z, w>`, and each source swizzle must be
one of `<x, y, z, w>`, `<x, y, x, y>`, `<z, w, x, y>`, or `<z, w, z, w>`.

Example:

```mlir
dxsa.dfma r<0>, r<1>, r<2>, r<3>
dxsa.dfma r<1, <x, y>>, r<1, <x, y, x, y>>, r<1, <z, w, z, w>>, r<2, <x, y, x, y>>
```
}];
}

//===----------------------------------------------------------------------===//
// dxsa.dfma_sat
//===----------------------------------------------------------------------===//

def DXSA_DFmaSat : DXSA_MultiplyAddOp<"dfma_sat"> {
let summary = "component-wise double-precision multiply-add, saturated to [0, 1]";
let description = [{
The `dxsa.dfma_sat` operation computes the component-wise double-precision
multiply-add of `$lhs`, `$rhs` and `$acc`, clamps each result component to
`[0.0, 1.0]`, and writes it to `$dst`.

Because each double spans a component pair, the destination write mask must
be `<x, y>`, `<z, w>`, or `<x, y, z, w>`, and each source swizzle must be
one of `<x, y, z, w>`, `<x, y, x, y>`, `<z, w, x, y>`, or `<z, w, z, w>`.

Example:

```mlir
dxsa.dfma_sat r<0>, r<1>, r<2>, r<3>
dxsa.dfma_sat r<1, <x, y>>, r<1, <x, y, x, y>>, r<1, <z, w, z, w>>, r<2, <x, y, x, y>>
```
}];
}

#endif // MLIR_DIALECT_DXSA_IR_DXSADOUBLEARITHOPS
2 changes: 2 additions & 0 deletions mlir/lib/Target/DXSA/BinaryParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2572,6 +2572,8 @@ class Parser {
return SATURABLE_OP(DMul, 1, 2, HasPreciseAttr::Yes);
case D3D11_1_SB_OPCODE_DDIV:
return SATURABLE_OP(DDiv, 1, 2, HasPreciseAttr::Yes);
case D3D11_1_SB_OPCODE_DFMA:
return SATURABLE_OP(DFma, 1, 3, HasPreciseAttr::Yes);
case D3D11_1_SB_OPCODE_DRCP:
return SATURABLE_OP(DRcp, 1, 1, HasPreciseAttr::Yes);
// Other instructions
Expand Down
84 changes: 84 additions & 0 deletions mlir/test/Target/DXSA/double_arith_ops.test
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,87 @@
// CHECK-NEXT: dxsa.drcp_sat r<0>, r<1>
// CHECK-NEXT: }
0x050020d4, 0x001000f2, 0x00000000, 0x00100e46, 0x00000001

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma precise <x, y, z, w> r<0>, r<0>, r<1>, r<2>
// CHECK-NEXT: }
0x097800D3, 0x001000F2, 0x00000000, 0x00100E46, 0x00000000, 0x00100E46, 0x00000001, 0x00100E46, 0x00000002

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma_sat precise <x, y, z, w> r<1>, r<1>, r<2>, r<3>
// CHECK-NEXT: }
0x097820D3, 0x001000F2, 0x00000001, 0x00100E46, 0x00000001, 0x00100E46, 0x00000002, 0x00100E46, 0x00000003

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma r<1>, r<1>, r<2>, r<3>
// CHECK-NEXT: }
0x090000D3, 0x001000F2, 0x00000001, 0x00100E46, 0x00000001, 0x00100E46, 0x00000002, 0x00100E46, 0x00000003

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma_sat r<1>, r<1>, r<2>, r<3>
// CHECK-NEXT: }
0x090020D3, 0x001000F2, 0x00000001, 0x00100E46, 0x00000001, 0x00100E46, 0x00000002, 0x00100E46, 0x00000003

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma r<1>, -r<1>, r<2>, r<3>
// CHECK-NEXT: }
0x0A0000D3, 0x001000F2, 0x00000001, 0x80100E46, 0x00000041, 0x00000001, 0x00100E46, 0x00000002, 0x00100E46, 0x00000003

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma_sat r<1>, -r<1>, r<2>, r<3>
// CHECK-NEXT: }
0x0A0020D3, 0x001000F2, 0x00000001, 0x80100E46, 0x00000041, 0x00000001, 0x00100E46, 0x00000002, 0x00100E46, 0x00000003

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma r<1>, -r<1>, r<2>, -r<3>
// CHECK-NEXT: }
0x0B0000D3, 0x001000F2, 0x00000001, 0x80100E46, 0x00000041, 0x00000001, 0x00100E46, 0x00000002, 0x80100E46, 0x00000041, 0x00000003

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma_sat r<1>, -r<1>, r<2>, -r<3>
// CHECK-NEXT: }
0x0B0020D3, 0x001000F2, 0x00000001, 0x80100E46, 0x00000041, 0x00000001, 0x00100E46, 0x00000002, 0x80100E46, 0x00000041, 0x00000003

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma r<1>, r<1, <z, w, x, y>>, r<2>, r<3>
// CHECK-NEXT: }
0x090000D3, 0x001000F2, 0x00000001, 0x001004E6, 0x00000001, 0x00100E46, 0x00000002, 0x00100E46, 0x00000003

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma_sat r<1>, r<1, <z, w, x, y>>, r<2>, r<3>
// CHECK-NEXT: }
0x090020D3, 0x001000F2, 0x00000001, 0x001004E6, 0x00000001, 0x00100E46, 0x00000002, 0x00100E46, 0x00000003

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma r<1, <x, y>>, r<1, <x, y, x, y>>, r<1, <z, w, z, w>>, r<2, <x, y, x, y>>
// CHECK-NEXT: }
0x090000D3, 0x00100032, 0x00000001, 0x00100446, 0x00000001, 0x00100EE6, 0x00000001, 0x00100446, 0x00000002

// -----

// CHECK-LABEL: dxsa.module {
// CHECK-NEXT: dxsa.dfma_sat r<1, <x, y>>, r<1, <x, y, x, y>>, r<1, <z, w, z, w>>, r<2, <x, y, x, y>>
// CHECK-NEXT: }
0x090020D3, 0x00100032, 0x00000001, 0x00100446, 0x00000001, 0x00100EE6, 0x00000001, 0x00100446, 0x00000002