Export ONNX with ONNX's DeformConv

itskyf · itskyf · commit 18498f0fef28 · 2025-01-20T22:16:50.000+07:00
diff --git a/tutorials/BiRefNet_pth2onnx.ipynb b/tutorials/BiRefNet_pth2onnx.ipynb
@@ -158,63 +158,84 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Cloning into 'deform_conv2d_onnx_exporter'...\n",
-      "remote: Enumerating objects: 205, done.\u001b[K\n",
-      "remote: Counting objects: 100% (7/7), done.\u001b[K\n",
-      "remote: Total 205 (delta 6), reused 6 (delta 6), pack-reused 198 (from 1)\u001b[K\n",
-      "Receiving objects: 100% (205/205), 36.21 KiB | 170.00 KiB/s, done.\n",
-      "Resolving deltas: 100% (102/102), done.\n"
-     ]
-    }
-   ],
-   "source": [
-    "!git clone https://github.com/masamitsu-murase/deform_conv2d_onnx_exporter\n",
-    "%cp deform_conv2d_onnx_exporter/src/deform_conv2d_onnx_exporter.py .\n",
-    "!rm -rf deform_conv2d_onnx_exporter"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
-    "with open('deform_conv2d_onnx_exporter.py') as fp:\n",
-    "    file_lines = fp.read()\n",
+    "from torch.onnx.symbolic_helper import parse_args\n",
+    "from torch.onnx import register_custom_op_symbolic\n",
     "\n",
-    "file_lines = file_lines.replace(\n",
-    "    \"return sym_help._get_tensor_dim_size(tensor, dim)\",\n",
-    "    '''\n",
-    "    tensor_dim_size = sym_help._get_tensor_dim_size(tensor, dim)\n",
-    "    if tensor_dim_size == None and (dim == 2 or dim == 3):\n",
-    "        import typing\n",
-    "        from torch import _C\n",
     "\n",
-    "        x_type = typing.cast(_C.TensorType, tensor.type())\n",
-    "        x_strides = x_type.strides()\n",
+    "@parse_args(\n",
+    "    \"v\",  # arg0: input (tensor)\n",
+    "    \"v\",  # arg1: weight (tensor)\n",
+    "    \"v\",  # arg2: offset (tensor)\n",
+    "    \"v\",  # arg3: mask (tensor)\n",
+    "    \"v\",  # arg4: bias (tensor)\n",
+    "    \"i\",  # arg5: stride_h\n",
+    "    \"i\",  # arg6: stride_w\n",
+    "    \"i\",  # arg7: pad_h\n",
+    "    \"i\",  # arg8: pad_w\n",
+    "    \"i\",  # arg9: dilation_h\n",
+    "    \"i\",  # arg10: dilation_w\n",
+    "    \"i\",  # arg11: groups\n",
+    "    \"i\",  # arg12: deform_groups\n",
+    "    \"b\",  # arg13: some bool\n",
+    ")\n",
+    "def symbolic_deform_conv_19(\n",
+    "    g,\n",
+    "    input,\n",
+    "    weight,\n",
+    "    offset,\n",
+    "    mask,\n",
+    "    bias,\n",
+    "    stride_h,\n",
+    "    stride_w,\n",
+    "    pad_h,\n",
+    "    pad_w,\n",
+    "    dilation_h,\n",
+    "    dilation_w,\n",
+    "    groups,\n",
+    "    deform_groups,\n",
+    "    maybe_bool,\n",
+    "):\n",
+    "    # Convert them back into lists where needed:\n",
+    "    strides = [stride_h, stride_w]\n",
+    "    pads = [pad_h, pad_w, pad_h, pad_w]\n",
+    "    dilations = [dilation_h, dilation_w]\n",
     "\n",
-    "        tensor_dim_size = x_strides[2] if dim == 3 else x_strides[1] // x_strides[2]\n",
-    "    elif tensor_dim_size == None and (dim == 0):\n",
-    "        import typing\n",
-    "        from torch import _C\n",
+    "    # If bias is None, you'd do something like:\n",
+    "    #   if bias.node().kind() == \"prim::Constant\" and bias.node()[\"value\"] is None:\n",
+    "    #       bias = g.op(\"Constant\", value_t=torch.tensor([], dtype=torch.float32))\n",
+    "    #\n",
+    "    # But from your debug, arg4 is a real tensor of shape [256], so it's not None.\n",
     "\n",
-    "        x_type = typing.cast(_C.TensorType, tensor.type())\n",
-    "        x_strides = x_type.strides()\n",
-    "        tensor_dim_size = x_strides[3]\n",
+    "    # Similarly for mask not being None in your debug, but if you want to handle\n",
+    "    # a None path, do a check like above.\n",
+    "\n",
+    "    # Construct the official ONNX DeformConv (Opset 19).\n",
+    "    # 'main' domain => just \"DeformConv\"\n",
+    "    return g.op(\n",
+    "        \"DeformConv\",\n",
+    "        input,\n",
+    "        weight,\n",
+    "        offset,\n",
+    "        bias,\n",
+    "        mask,\n",
+    "        strides_i=strides,\n",
+    "        pads_i=pads,\n",
+    "        dilations_i=dilations,\n",
+    "        group_i=groups,\n",
+    "        offset_group_i=deform_groups,\n",
+    "        # You can ignore maybe_bool if you don't need it, or pass it as an attribute.\n",
+    "    )\n",
     "\n",
-    "    return tensor_dim_size\n",
-    "    ''',\n",
-    ")\n",
     "\n",
-    "with open('deform_conv2d_onnx_exporter.py', mode=\"w\") as fp:\n",
-    "    fp.write(file_lines)"
+    "register_custom_op_symbolic(\n",
+    "    \"torchvision::deform_conv2d\",  # PyTorch JIT/FX name\n",
+    "    symbolic_deform_conv_19,\n",
+    "    opset_version=19,\n",
+    ")"
    ]
   },
   {
@@ -265,27 +286,34 @@
    ],
    "source": [
     "from torchvision.ops.deform_conv import DeformConv2d\n",
-    "import deform_conv2d_onnx_exporter\n",
     "\n",
-    "# register deform_conv2d operator\n",
-    "deform_conv2d_onnx_exporter.register_deform_conv2d_onnx_op()\n",
     "\n",
-    "def convert_to_onnx(net, file_name='output.onnx', input_shape=(1024, 1024), device=device):\n",
+    "def convert_to_onnx(\n",
+    "    net, file_name=\"output.onnx\", input_shape=(1024, 1024), device=device\n",
+    "):\n",
     "    input = torch.randn(1, 3, input_shape[0], input_shape[1]).to(device)\n",
     "\n",
-    "    input_layer_names = ['input_image']\n",
-    "    output_layer_names = ['output_image']\n",
+    "    input_layer_names = [\"input_image\"]\n",
+    "    output_layer_names = [\"output_image\"]\n",
     "\n",
     "    torch.onnx.export(\n",
     "        net,\n",
     "        input,\n",
     "        file_name,\n",
     "        verbose=False,\n",
-    "        opset_version=17,\n",
+    "        opset_version=20,\n",
     "        input_names=input_layer_names,\n",
     "        output_names=output_layer_names,\n",
+    "        dynamic_axes={\"input_image\": [0]},\n",
     "    )\n",
-    "convert_to_onnx(birefnet, weights_file.replace('.pth', '.onnx'), input_shape=(1024, 1024), device=device)"
+    "\n",
+    "\n",
+    "convert_to_onnx(\n",
+    "    birefnet,\n",
+    "    weights_file.replace(\".pth\", \".onnx\"),\n",
+    "    input_shape=(1024, 1024),\n",
+    "    device=device,\n",
+    ")\n"
    ]
   },
   {
@@ -451,7 +479,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.20"
+   "version": "3.12.0"
   }
  },
  "nbformat": 4,