Skip to content

Commit 73e9e3a

Browse files
apivovarovyuxuanchen1997
authored andcommitted
[MLIR] Add f8E4M3 IEEE 754 type (#97118)
Summary: This PR adds `f8E4M3` type to mlir. `f8E4M3` type follows IEEE 754 convention ```c f8E4M3 (IEEE 754) - Exponent bias: 7 - Maximum stored exponent value: 14 (binary 1110) - Maximum unbiased exponent value: 14 - 7 = 7 - Minimum stored exponent value: 1 (binary 0001) - Minimum unbiased exponent value: 1 − 7 = −6 - Precision specifies the total number of bits used for the significand (mantisa), including implicit leading integer bit = 3 + 1 = 4 - Follows IEEE 754 conventions for representation of special values - Has Positive and Negative zero - Has Positive and Negative infinity - Has NaNs Additional details: - Max exp (unbiased): 7 - Min exp (unbiased): -6 - Infinities (+/-): S.1111.000 - Zeros (+/-): S.0000.000 - NaNs: S.1111.{001, 010, 011, 100, 101, 110, 111} - Max normal number: S.1110.111 = +/-2^(7) x (1 + 0.875) = +/-240 - Min normal number: S.0001.000 = +/-2^(-6) - Max subnormal number: S.0000.111 = +/-2^(-6) x 0.875 = +/-2^(-9) x 7 - Min subnormal number: S.0000.001 = +/-2^(-6) x 0.125 = +/-2^(-9) ``` Related PRs: - [PR-97179](#97179) [APFloat] Add support for f8E4M3 IEEE 754 type Test Plan: Reviewers: Subscribers: Tasks: Tags: Differential Revision: https://phabricator.intern.facebook.com/D60251173
1 parent 8207ae2 commit 73e9e3a

File tree

24 files changed

+136
-10
lines changed

24 files changed

+136
-10
lines changed

mlir/include/mlir-c/BuiltinTypes.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,16 @@ MLIR_CAPI_EXPORTED bool mlirTypeIsAFloat8E5M2(MlirType type);
8989
/// context.
9090
MLIR_CAPI_EXPORTED MlirType mlirFloat8E5M2TypeGet(MlirContext ctx);
9191

92+
/// Returns the typeID of an Float8E4M3 type.
93+
MLIR_CAPI_EXPORTED MlirTypeID mlirFloat8E4M3TypeGetTypeID(void);
94+
95+
/// Checks whether the given type is an f8E4M3 type.
96+
MLIR_CAPI_EXPORTED bool mlirTypeIsAFloat8E4M3(MlirType type);
97+
98+
/// Creates an f8E4M3 type in the given context. The type is owned by the
99+
/// context.
100+
MLIR_CAPI_EXPORTED MlirType mlirFloat8E4M3TypeGet(MlirContext ctx);
101+
92102
/// Returns the typeID of an Float8E4M3FN type.
93103
MLIR_CAPI_EXPORTED MlirTypeID mlirFloat8E4M3FNTypeGetTypeID(void);
94104

mlir/include/mlir/IR/Builders.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ class Builder {
6161

6262
// Types.
6363
FloatType getFloat8E5M2Type();
64+
FloatType getFloat8E4M3Type();
6465
FloatType getFloat8E4M3FNType();
6566
FloatType getFloat8E5M2FNUZType();
6667
FloatType getFloat8E4M3FNUZType();

mlir/include/mlir/IR/BuiltinTypes.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@ class FloatType : public Type {
6161
static FloatType getF80(MLIRContext *ctx);
6262
static FloatType getF128(MLIRContext *ctx);
6363
static FloatType getFloat8E5M2(MLIRContext *ctx);
64+
static FloatType getFloat8E4M3(MLIRContext *ctx);
6465
static FloatType getFloat8E4M3FN(MLIRContext *ctx);
6566
static FloatType getFloat8E5M2FNUZ(MLIRContext *ctx);
6667
static FloatType getFloat8E4M3FNUZ(MLIRContext *ctx);
@@ -410,16 +411,20 @@ inline bool BaseMemRefType::isValidElementType(Type type) {
410411
}
411412

412413
inline bool FloatType::classof(Type type) {
413-
return llvm::isa<Float8E5M2Type, Float8E4M3FNType, Float8E5M2FNUZType,
414-
Float8E4M3FNUZType, Float8E4M3B11FNUZType, BFloat16Type,
415-
Float16Type, FloatTF32Type, Float32Type, Float64Type,
416-
Float80Type, Float128Type>(type);
414+
return llvm::isa<
415+
Float8E5M2Type, Float8E4M3Type, Float8E4M3FNType, Float8E5M2FNUZType,
416+
Float8E4M3FNUZType, Float8E4M3B11FNUZType, BFloat16Type, Float16Type,
417+
FloatTF32Type, Float32Type, Float64Type, Float80Type, Float128Type>(type);
417418
}
418419

419420
inline FloatType FloatType::getFloat8E5M2(MLIRContext *ctx) {
420421
return Float8E5M2Type::get(ctx);
421422
}
422423

424+
inline FloatType FloatType::getFloat8E4M3(MLIRContext *ctx) {
425+
return Float8E4M3Type::get(ctx);
426+
}
427+
423428
inline FloatType FloatType::getFloat8E4M3FN(MLIRContext *ctx) {
424429
return Float8E4M3FNType::get(ctx);
425430
}

mlir/include/mlir/IR/BuiltinTypes.td

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,25 @@ def Builtin_Float8E5M2 : Builtin_FloatType<"Float8E5M2", "f8E5M2"> {
106106
}];
107107
}
108108

109+
//===----------------------------------------------------------------------===//
110+
// Float8E4M3Type
111+
112+
def Builtin_Float8E4M3 : Builtin_FloatType<"Float8E4M3", "f8E4M3"> {
113+
let summary = "8-bit floating point with 3 bit mantissa";
114+
let description = [{
115+
An 8-bit floating point type with 1 sign bit, 4 bits exponent and 3 bits
116+
mantissa. This is not a standard type as defined by IEEE-754, but it
117+
follows similar conventions with the following characteristics:
118+
119+
* bit encoding: S1E4M3
120+
* exponent bias: 7
121+
* infinities: supported with exponent set to all 1s and mantissa 0s
122+
* NaNs: supported with exponent bits set to all 1s and mantissa of
123+
(001, 010, 011, 100, 101, 110, 111)
124+
* denormals when exponent is 0
125+
}];
126+
}
127+
109128
//===----------------------------------------------------------------------===//
110129
// Float8E4M3FNType
111130

mlir/include/mlir/IR/CommonTypeConstraints.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -334,6 +334,8 @@ def F8E4M3FN : Type<CPred<"$_self.isFloat8E4M3FN()">, "f8E4M3FN type">,
334334
BuildableType<"$_builder.getFloat8E4M3FNType()">;
335335
def F8E5M2 : Type<CPred<"$_self.isFloat8E5M2()">, "f8E5M2 type">,
336336
BuildableType<"$_builder.getFloat8E5M2Type()">;
337+
def F8E4M3 : Type<CPred<"$_self.isFloat8E4M3()">, "f8E4M3 type">,
338+
BuildableType<"$_builder.getFloat8E4M3Type()">;
337339
def F8E4M3FNUZ : Type<CPred<"$_self.isFloat8E4M3FNUZ()">, "f8E4M3FNUZ type">,
338340
BuildableType<"$_builder.getFloat8E4M3FNUZType()">;
339341
def F8E4M3B11FNUZ : Type<CPred<"$_self.isFloat8E4M3B11FNUZ()">, "f8E4M3B11FNUZ type">,

mlir/include/mlir/IR/Types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ class Type {
126126
// derived types should use isa/dyn_cast.
127127
bool isIndex() const;
128128
bool isFloat8E5M2() const;
129+
bool isFloat8E4M3() const;
129130
bool isFloat8E4M3FN() const;
130131
bool isFloat8E5M2FNUZ() const;
131132
bool isFloat8E4M3FNUZ() const;

mlir/lib/AsmParser/TokenKinds.def

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,7 @@ TOK_KEYWORD(f32)
9595
TOK_KEYWORD(f64)
9696
TOK_KEYWORD(f80)
9797
TOK_KEYWORD(f8E5M2)
98+
TOK_KEYWORD(f8E4M3)
9899
TOK_KEYWORD(f8E4M3FN)
99100
TOK_KEYWORD(f8E5M2FNUZ)
100101
TOK_KEYWORD(f8E4M3FNUZ)

mlir/lib/AsmParser/TypeParser.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ OptionalParseResult Parser::parseOptionalType(Type &type) {
4040
case Token::kw_vector:
4141
case Token::inttype:
4242
case Token::kw_f8E5M2:
43+
case Token::kw_f8E4M3:
4344
case Token::kw_f8E4M3FN:
4445
case Token::kw_f8E5M2FNUZ:
4546
case Token::kw_f8E4M3FNUZ:
@@ -304,6 +305,9 @@ Type Parser::parseNonFunctionType() {
304305
case Token::kw_f8E5M2:
305306
consumeToken(Token::kw_f8E5M2);
306307
return builder.getFloat8E5M2Type();
308+
case Token::kw_f8E4M3:
309+
consumeToken(Token::kw_f8E4M3);
310+
return builder.getFloat8E4M3Type();
307311
case Token::kw_f8E4M3FN:
308312
consumeToken(Token::kw_f8E4M3FN);
309313
return builder.getFloat8E4M3FNType();

mlir/lib/Bindings/Python/IRTypes.cpp

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ class PyFloat8E4M3FNType
143143
}
144144
};
145145

146-
/// Floating Point Type subclass - Float8M5E2Type.
146+
/// Floating Point Type subclass - Float8E5M2Type.
147147
class PyFloat8E5M2Type : public PyConcreteType<PyFloat8E5M2Type, PyFloatType> {
148148
public:
149149
static constexpr IsAFunctionTy isaFunction = mlirTypeIsAFloat8E5M2;
@@ -163,6 +163,26 @@ class PyFloat8E5M2Type : public PyConcreteType<PyFloat8E5M2Type, PyFloatType> {
163163
}
164164
};
165165

166+
/// Floating Point Type subclass - Float8E4M3Type.
167+
class PyFloat8E4M3Type : public PyConcreteType<PyFloat8E4M3Type, PyFloatType> {
168+
public:
169+
static constexpr IsAFunctionTy isaFunction = mlirTypeIsAFloat8E4M3;
170+
static constexpr GetTypeIDFunctionTy getTypeIdFunction =
171+
mlirFloat8E4M3TypeGetTypeID;
172+
static constexpr const char *pyClassName = "Float8E4M3Type";
173+
using PyConcreteType::PyConcreteType;
174+
175+
static void bindDerived(ClassTy &c) {
176+
c.def_static(
177+
"get",
178+
[](DefaultingPyMlirContext context) {
179+
MlirType t = mlirFloat8E4M3TypeGet(context->get());
180+
return PyFloat8E4M3Type(context->getRef(), t);
181+
},
182+
py::arg("context") = py::none(), "Create a float8_e4m3 type.");
183+
}
184+
};
185+
166186
/// Floating Point Type subclass - Float8E4M3FNUZ.
167187
class PyFloat8E4M3FNUZType
168188
: public PyConcreteType<PyFloat8E4M3FNUZType, PyFloatType> {
@@ -840,6 +860,7 @@ void mlir::python::populateIRTypes(py::module &m) {
840860
PyIndexType::bind(m);
841861
PyFloat8E4M3FNType::bind(m);
842862
PyFloat8E5M2Type::bind(m);
863+
PyFloat8E4M3Type::bind(m);
843864
PyFloat8E4M3FNUZType::bind(m);
844865
PyFloat8E4M3B11FNUZType::bind(m);
845866
PyFloat8E5M2FNUZType::bind(m);

mlir/lib/CAPI/IR/BuiltinTypes.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,18 @@ MlirType mlirFloat8E5M2TypeGet(MlirContext ctx) {
9797
return wrap(FloatType::getFloat8E5M2(unwrap(ctx)));
9898
}
9999

100+
MlirTypeID mlirFloat8E4M3TypeGetTypeID() {
101+
return wrap(Float8E4M3Type::getTypeID());
102+
}
103+
104+
bool mlirTypeIsAFloat8E4M3(MlirType type) {
105+
return unwrap(type).isFloat8E4M3();
106+
}
107+
108+
MlirType mlirFloat8E4M3TypeGet(MlirContext ctx) {
109+
return wrap(FloatType::getFloat8E4M3(unwrap(ctx)));
110+
}
111+
100112
MlirTypeID mlirFloat8E4M3FNTypeGetTypeID() {
101113
return wrap(Float8E4M3FNType::getTypeID());
102114
}

0 commit comments

Comments
 (0)