Skip to content

Commit 632f70a

Browse files
committed
Revert "Revert "Enable quant save/load through prepack fn registration (#3078)""
This reverts commit c6ea20b.
1 parent 674b51c commit 632f70a

File tree

11 files changed

+87
-235
lines changed

11 files changed

+87
-235
lines changed

csrc/gpu/CMakeLists.txt

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -150,10 +150,6 @@ if (USE_PROFILER)
150150
list(APPEND IPEX_COMPILE_DEFINITIONS "USE_PROFILER")
151151
endif()
152152

153-
if (BUILD_JIT_QUANTIZATION_SAVE)
154-
list(APPEND IPEX_COMPILE_DEFINITIONS "BUILD_JIT_QUANTIZATION_SAVE")
155-
endif()
156-
157153
if (USE_SPLIT_FP64_LOOPS)
158154
list(APPEND IPEX_COMPILE_DEFINITIONS "USE_SPLIT_FP64_LOOPS")
159155
endif()

csrc/gpu/aten/operators/QConv_prepack.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
#include <oneDNN/oneDNN.h>
33
#include <runtime/Utils.h>
44

5+
#include <ATen/native/quantized/PackedParams.h>
56
#include "comm/ParamUtils.h"
67

78
#include <quantized/QUtils.h>
@@ -123,3 +124,18 @@ TORCH_LIBRARY_IMPL(quantized, XPU, m) {
123124

124125
} // namespace AtenIpexTypeQuantizedXPU
125126
} // namespace at
127+
128+
int init_prepack_fn() {
129+
register_prepack<2>(
130+
at::QEngine::QXPU,
131+
at::AtenIpexTypeQuantizedXPU::PackedConvWeightQDPCPP<2>::prepack);
132+
register_prepack<3>(
133+
at::QEngine::QXPU,
134+
at::AtenIpexTypeQuantizedXPU::PackedConvWeightQDPCPP<3>::prepack);
135+
register_linear_prepack(
136+
at::QEngine::QXPU,
137+
at::AtenIpexTypeQuantizedXPU::PackedLinearWeightQDPCPP::prepack);
138+
return 1;
139+
}
140+
141+
auto xpu_prepack = init_prepack_fn();

csrc/gpu/aten/quantized/QTensor.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,11 @@ Tensor& set_(
131131
auto* self_ = self.unsafeGetTensorImpl();
132132
self_->set_storage_keep_dtype(storage);
133133
self_->set_storage_offset(storage_offset);
134-
self_->set_sizes_and_strides(sizes, strides);
134+
if (strides.data() == nullptr) {
135+
self_->set_sizes_contiguous(sizes);
136+
} else {
137+
self_->set_sizes_and_strides(sizes, strides);
138+
}
135139
return self;
136140
}
137141

csrc/gpu/aten/quantized/QUtils.cpp

Lines changed: 0 additions & 102 deletions
This file was deleted.

csrc/gpu/aten/quantized/QUtils.h

Lines changed: 1 addition & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
namespace xpu {
2222
namespace dpcpp {
23+
2324
// Note: [Opaque u8 tensor]
2425
// Due to the difference between oneDNN and PyTorch u8 quantization, we quant
2526
// tensor with kQUint8 and 128 zp to memory::data_type::s8 and 0 zp inside. This
@@ -326,93 +327,3 @@ struct PackedLinearWeightQDPCPP : public LinearPackedParamsBase {
326327

327328
} // namespace AtenIpexTypeQuantizedXPU
328329
} // namespace at
329-
330-
#ifdef BUILD_JIT_QUANTIZATION_SAVE
331-
332-
// Repeat torch type definition here again
333-
using ConvParamsSerializationTypeV2 = std::tuple<
334-
// version, for versions 2 and up
335-
std::string,
336-
// non-optional tensors
337-
std::vector<at::Tensor>,
338-
// optional tensors
339-
std::vector<c10::optional<at::Tensor>>>;
340-
using ConvParamsSerializationTypeV3 = std::tuple<
341-
// version, int for versions 3 and up
342-
int64_t,
343-
// configuration values
344-
std::vector<int64_t>,
345-
// optional tensors
346-
std::vector<c10::optional<at::Tensor>>>;
347-
348-
using ConvParamsSerializationType = ConvParamsSerializationTypeV2;
349-
350-
template <uint32_t kSpatialDim>
351-
c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>> deserialize_conv_dpcpp(
352-
ConvParamsSerializationTypeV3 state) {
353-
int64_t version;
354-
std::vector<int64_t> config_vals;
355-
std::vector<c10::optional<at::Tensor>> tensors;
356-
357-
std::tie(version, config_vals, tensors) = state;
358-
TORCH_INTERNAL_ASSERT(
359-
version == 3, "Unexpected serialized qconv version: ", version);
360-
361-
TORCH_CHECK(tensors.size() == 3, "Wrong number of tensors", tensors.size());
362-
c10::optional<at::Tensor> weight = tensors[1];
363-
c10::optional<at::Tensor> bias = tensors[2];
364-
TORCH_INTERNAL_ASSERT(
365-
weight, "Weight should always be present in serialized qconv.");
366-
367-
torch::List<int64_t> stride, padding, output_padding, dilation;
368-
// skip kSpatialDim
369-
int idx = 1;
370-
for (const auto i : c10::irange(kSpatialDim)) {
371-
(void)i; // Suppress unused variable
372-
stride.emplace_back(config_vals.at(idx));
373-
idx++;
374-
}
375-
for (const auto i : c10::irange(kSpatialDim)) {
376-
(void)i; // Suppress unused variable
377-
padding.emplace_back(config_vals.at(idx));
378-
idx++;
379-
}
380-
for (const auto i : c10::irange(kSpatialDim)) {
381-
(void)i; // Suppress unused variable
382-
dilation.emplace_back(config_vals.at(idx));
383-
idx++;
384-
}
385-
for (const auto i : c10::irange(kSpatialDim)) {
386-
(void)i; // Suppress unused variable
387-
output_padding.emplace_back(config_vals.at(idx));
388-
idx++;
389-
}
390-
int64_t groups = config_vals.at(idx);
391-
idx++;
392-
int64_t flags = config_vals.at(idx);
393-
idx++;
394-
TORCH_INTERNAL_ASSERT(
395-
idx == static_cast<int64_t>(config_vals.size()),
396-
"Unexpected length of config_vals, expected ",
397-
idx,
398-
" got ",
399-
config_vals.size());
400-
401-
bool transpose = flags & (1 << 0);
402-
403-
int64_t other_flags = flags & ~(1 << 0);
404-
TORCH_INTERNAL_ASSERT(
405-
other_flags == 0, "Unexpected flags set in ", flags, ".");
406-
407-
return at::AtenIpexTypeQuantizedXPU::PackedConvWeightQDPCPP<kSpatialDim>::
408-
prepack(
409-
weight.value(),
410-
bias,
411-
stride,
412-
padding,
413-
output_padding,
414-
dilation,
415-
groups,
416-
transpose);
417-
}
418-
#endif

csrc/gpu/utils/Settings.cpp

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
#include <ATen/native/quantized/PackedParams.h>
12
#include <oneDNN/Runtime.h>
23
#include <runtime/Device.h>
34
#include <utils/Settings.h>
@@ -292,14 +293,6 @@ bool Settings::is_channels_last_1d_enabled() const {
292293
#endif
293294
}
294295

295-
bool Settings::is_jit_quantization_save_enabled() const {
296-
#if defined(BUILD_JIT_QUANTIZATION_SAVE)
297-
return true;
298-
#else
299-
return false;
300-
#endif
301-
}
302-
303296
bool Settings::is_xetla_enabled() const {
304297
#if defined(USE_XETLA)
305298
return true;

csrc/gpu/utils/Settings.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,6 @@ class IPEX_API Settings final {
7272
bool is_multi_context_enabled() const;
7373

7474
bool is_channels_last_1d_enabled() const;
75-
bool is_jit_quantization_save_enabled() const;
7675
bool is_xetla_enabled() const;
7776

7877
bool is_simple_trace_enabled() const;

intel_extension_for_pytorch/csrc/xpu/Module.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -677,10 +677,6 @@ void init_xpu_module(pybind11::module& m) {
677677
return Settings::I().is_multi_context_enabled();
678678
});
679679

680-
m.def("_is_jit_quantization_save_enabled", []() {
681-
return Settings::I().is_jit_quantization_save_enabled();
682-
});
683-
684680
m.def("_is_channels_last_1d_enabled", []() {
685681
return Settings::I().is_channels_last_1d_enabled();
686682
});

intel_extension_for_pytorch/xpu/utils.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -310,10 +310,6 @@ def disable_tile_as_device():
310310
################################################################
311311

312312

313-
def has_jit_quantization_save():
314-
return _C._is_jit_quantization_save_enabled()
315-
316-
317313
def has_xetla():
318314
return _C._is_xetla_enabled()
319315

0 commit comments

Comments
 (0)