Revert "Revert "Enable quant save/load through prepack fn registration (#3078)""

Nuullll · Nuullll · commit 632f70a26052 · 2023-10-16T12:11:24.000+08:00
This reverts commit c6ea20b.
diff --git a/csrc/gpu/CMakeLists.txt b/csrc/gpu/CMakeLists.txt
@@ -150,10 +150,6 @@ if (USE_PROFILER)
   list(APPEND IPEX_COMPILE_DEFINITIONS "USE_PROFILER")
 endif()
 
-if (BUILD_JIT_QUANTIZATION_SAVE)
-  list(APPEND IPEX_COMPILE_DEFINITIONS "BUILD_JIT_QUANTIZATION_SAVE")
-endif()
-
 if (USE_SPLIT_FP64_LOOPS)
   list(APPEND IPEX_COMPILE_DEFINITIONS "USE_SPLIT_FP64_LOOPS")
 endif()
diff --git a/csrc/gpu/aten/operators/QConv_prepack.cpp b/csrc/gpu/aten/operators/QConv_prepack.cpp
@@ -2,6 +2,7 @@
 #include <oneDNN/oneDNN.h>
 #include <runtime/Utils.h>
 
+#include <ATen/native/quantized/PackedParams.h>
 #include "comm/ParamUtils.h"
 
 #include <quantized/QUtils.h>
@@ -123,3 +124,18 @@ TORCH_LIBRARY_IMPL(quantized, XPU, m) {
 
 } // namespace AtenIpexTypeQuantizedXPU
 } // namespace at
+
+int init_prepack_fn() {
+  register_prepack<2>(
+      at::QEngine::QXPU,
+      at::AtenIpexTypeQuantizedXPU::PackedConvWeightQDPCPP<2>::prepack);
+  register_prepack<3>(
+      at::QEngine::QXPU,
+      at::AtenIpexTypeQuantizedXPU::PackedConvWeightQDPCPP<3>::prepack);
+  register_linear_prepack(
+      at::QEngine::QXPU,
+      at::AtenIpexTypeQuantizedXPU::PackedLinearWeightQDPCPP::prepack);
+  return 1;
+}
+
+auto xpu_prepack = init_prepack_fn();
diff --git a/csrc/gpu/aten/quantized/QTensor.cpp b/csrc/gpu/aten/quantized/QTensor.cpp
@@ -131,7 +131,11 @@ Tensor& set_(
   auto* self_ = self.unsafeGetTensorImpl();
   self_->set_storage_keep_dtype(storage);
   self_->set_storage_offset(storage_offset);
-  self_->set_sizes_and_strides(sizes, strides);
+  if (strides.data() == nullptr) {
+    self_->set_sizes_contiguous(sizes);
+  } else {
+    self_->set_sizes_and_strides(sizes, strides);
+  }
   return self;
 }
 
diff --git a/csrc/gpu/aten/quantized/QUtils.cpp b/csrc/gpu/aten/quantized/QUtils.cpp
diff --git a/csrc/gpu/aten/quantized/QUtils.h b/csrc/gpu/aten/quantized/QUtils.h
@@ -20,6 +20,7 @@
 
 namespace xpu {
 namespace dpcpp {
+
 // Note: [Opaque u8 tensor]
 // Due to the difference between oneDNN and PyTorch u8 quantization, we quant
 // tensor with kQUint8 and 128 zp to memory::data_type::s8 and 0 zp inside. This
@@ -326,93 +327,3 @@ struct PackedLinearWeightQDPCPP : public LinearPackedParamsBase {
 
 } // namespace AtenIpexTypeQuantizedXPU
 } // namespace at
-
-#ifdef BUILD_JIT_QUANTIZATION_SAVE
-
-// Repeat torch type definition here again
-using ConvParamsSerializationTypeV2 = std::tuple<
-    // version, for versions 2 and up
-    std::string,
-    // non-optional tensors
-    std::vector<at::Tensor>,
-    // optional tensors
-    std::vector<c10::optional<at::Tensor>>>;
-using ConvParamsSerializationTypeV3 = std::tuple<
-    // version, int for versions 3 and up
-    int64_t,
-    // configuration values
-    std::vector<int64_t>,
-    // optional tensors
-    std::vector<c10::optional<at::Tensor>>>;
-
-using ConvParamsSerializationType = ConvParamsSerializationTypeV2;
-
-template <uint32_t kSpatialDim>
-c10::intrusive_ptr<ConvPackedParamsBase<kSpatialDim>> deserialize_conv_dpcpp(
-    ConvParamsSerializationTypeV3 state) {
-  int64_t version;
-  std::vector<int64_t> config_vals;
-  std::vector<c10::optional<at::Tensor>> tensors;
-
-  std::tie(version, config_vals, tensors) = state;
-  TORCH_INTERNAL_ASSERT(
-      version == 3, "Unexpected serialized qconv version: ", version);
-
-  TORCH_CHECK(tensors.size() == 3, "Wrong number of tensors", tensors.size());
-  c10::optional<at::Tensor> weight = tensors[1];
-  c10::optional<at::Tensor> bias = tensors[2];
-  TORCH_INTERNAL_ASSERT(
-      weight, "Weight should always be present in serialized qconv.");
-
-  torch::List<int64_t> stride, padding, output_padding, dilation;
-  // skip kSpatialDim
-  int idx = 1;
-  for (const auto i : c10::irange(kSpatialDim)) {
-    (void)i; // Suppress unused variable
-    stride.emplace_back(config_vals.at(idx));
-    idx++;
-  }
-  for (const auto i : c10::irange(kSpatialDim)) {
-    (void)i; // Suppress unused variable
-    padding.emplace_back(config_vals.at(idx));
-    idx++;
-  }
-  for (const auto i : c10::irange(kSpatialDim)) {
-    (void)i; // Suppress unused variable
-    dilation.emplace_back(config_vals.at(idx));
-    idx++;
-  }
-  for (const auto i : c10::irange(kSpatialDim)) {
-    (void)i; // Suppress unused variable
-    output_padding.emplace_back(config_vals.at(idx));
-    idx++;
-  }
-  int64_t groups = config_vals.at(idx);
-  idx++;
-  int64_t flags = config_vals.at(idx);
-  idx++;
-  TORCH_INTERNAL_ASSERT(
-      idx == static_cast<int64_t>(config_vals.size()),
-      "Unexpected length of config_vals, expected ",
-      idx,
-      " got ",
-      config_vals.size());
-
-  bool transpose = flags & (1 << 0);
-
-  int64_t other_flags = flags & ~(1 << 0);
-  TORCH_INTERNAL_ASSERT(
-      other_flags == 0, "Unexpected flags set in ", flags, ".");
-
-  return at::AtenIpexTypeQuantizedXPU::PackedConvWeightQDPCPP<kSpatialDim>::
-      prepack(
-          weight.value(),
-          bias,
-          stride,
-          padding,
-          output_padding,
-          dilation,
-          groups,
-          transpose);
-}
-#endif
diff --git a/csrc/gpu/utils/Settings.cpp b/csrc/gpu/utils/Settings.cpp
@@ -1,3 +1,4 @@
+#include <ATen/native/quantized/PackedParams.h>
 #include <oneDNN/Runtime.h>
 #include <runtime/Device.h>
 #include <utils/Settings.h>
@@ -292,14 +293,6 @@ bool Settings::is_channels_last_1d_enabled() const {
 #endif
 }
 
-bool Settings::is_jit_quantization_save_enabled() const {
-#if defined(BUILD_JIT_QUANTIZATION_SAVE)
-  return true;
-#else
-  return false;
-#endif
-}
-
 bool Settings::is_xetla_enabled() const {
 #if defined(USE_XETLA)
   return true;
diff --git a/csrc/gpu/utils/Settings.h b/csrc/gpu/utils/Settings.h
@@ -72,7 +72,6 @@ class IPEX_API Settings final {
   bool is_multi_context_enabled() const;
 
   bool is_channels_last_1d_enabled() const;
-  bool is_jit_quantization_save_enabled() const;
   bool is_xetla_enabled() const;
 
   bool is_simple_trace_enabled() const;
diff --git a/intel_extension_for_pytorch/csrc/xpu/Module.cpp b/intel_extension_for_pytorch/csrc/xpu/Module.cpp
@@ -677,10 +677,6 @@ void init_xpu_module(pybind11::module& m) {
     return Settings::I().is_multi_context_enabled();
   });
 
-  m.def("_is_jit_quantization_save_enabled", []() {
-    return Settings::I().is_jit_quantization_save_enabled();
-  });
-
   m.def("_is_channels_last_1d_enabled", []() {
     return Settings::I().is_channels_last_1d_enabled();
   });
diff --git a/intel_extension_for_pytorch/xpu/utils.py b/intel_extension_for_pytorch/xpu/utils.py
@@ -310,10 +310,6 @@ def disable_tile_as_device():
 ################################################################
 
 
-def has_jit_quantization_save():
-    return _C._is_jit_quantization_save_enabled()
-
-
 def has_xetla():
     return _C._is_xetla_enabled()
 
diff --git a/tests/gpu/examples/test_int8_jit_save.py b/tests/gpu/examples/test_int8_jit_save.py
diff --git a/tests/gpu/examples/test_jit_save_load.py b/tests/gpu/examples/test_jit_save_load.py