nod-ai · saienduri · Jun 15, 2024 · Jun 15, 2024 · Jun 15, 2024 · Jun 15, 2024
diff --git a/.github/workflows/test_iree.yml b/.github/workflows/test_iree.yml
@@ -101,15 +101,17 @@ jobs:
           - name: cpu_llvm_task
             runs-on: nodai-amdgpu-w7900-x86-64
             models-config-file: config_pytorch_models_cpu_llvm_task.json
-            sdxl-prompt-encoder-config-file: config_sdxl_prompt_encoder_cpu_llvm_task.json
-            sdxl-unet-config-file: config_sdxl_scheduled_unet_cpu_llvm_task.json
-            sdxl-vae-decode-config-file: config_sdxl_vae_decode_cpu_llvm_task.json
+            sdxl-prompt-encoder-config-file: config_sd_prompt_encoder_cpu_llvm_task.json
+            sdxl-unet-config-file: config_sd_scheduled_unet_cpu_llvm_task.json
+            sdxl-vae-decode-config-file: config_sd_vae_decode_cpu_llvm_task.json
+            backend: cpu
           - name: gpu_mi250_rocm
             runs-on: nodai-amdgpu-mi250-x86-64
             models-config-file: config_gpu_rocm_models.json
-            sdxl-prompt-encoder-config-file: config_sdxl_prompt_encoder_gpu_rocm.json
-            sdxl-unet-config-file: config_sdxl_scheduled_unet_gpu_rocm.json
-            sdxl-vae-decode-config-file: config_sdxl_vae_decode_gpu_rocm.json
+            sdxl-prompt-encoder-config-file: config_sd_prompt_encoder_gpu_rocm.json
+            sdxl-unet-config-file: config_sd_scheduled_unet_gpu_rocm.json
+            sdxl-vae-decode-config-file: config_sd_vae_decode_gpu_rocm.json
+            backend: rocm
     env:
       VENV_DIR: ${{ github.workspace }}/.venv
       IREE_TEST_FILES: ~/iree_tests_cache
@@ -163,12 +165,12 @@ jobs:
             --durations=0 \
             --config-files=${MODELS_CONFIG_FILE_PATH}
 
-      - name: "Running real weights SDXL prompt encoder tests"
+      - name: "Running real weights SDXL + SD3 prompt encoder tests"
         id: prompt_encoder
         if: ${{ !cancelled() }}
         run: |
           source ${VENV_DIR}/bin/activate
-          pytest iree_tests/pytorch/models/sdxl-prompt-encoder-tank \
+          pytest iree_tests/pytorch/models/sd-clip \
             -rpfE \
             -k real_weights \
             --no-skip-tests-missing-files \
@@ -178,12 +180,12 @@ jobs:
             --durations=0 \
             --config-files=${SDXL_PROMPT_ENCODER_CONFIG_FILE_PATH}
 
-      - name: "Running real weights SDXL scheduled unet tests"
+      - name: "Running real weights SDXL + SD3 scheduled unet/mmdit tests"
         id: unet
         if: ${{ !cancelled() }}
         run: |
           source ${VENV_DIR}/bin/activate
-          pytest iree_tests/pytorch/models/sdxl-scheduled-unet-3-tank \
+          pytest iree_tests/pytorch/models/sd-unet \
             -rpfE \
             -k real_weights \
             --no-skip-tests-missing-files \
@@ -192,13 +194,15 @@ jobs:
             --timeout=1200 \
             --durations=0 \
             --config-files=${SDXL_UNET_CONFIG_FILE_PATH}
+        env:
+          IREE_TEST_BACKEND: ${{ matrix.backend }}
 
-      - name: "Running real weights SDXL vae decode tests"
+      - name: "Running real weights SDXL + SD3 vae decode tests"
         id: vae
         if: ${{ !cancelled() }}
         run: |
           source ${VENV_DIR}/bin/activate
-          pytest iree_tests/pytorch/models/sdxl-vae-decode-tank \
+          pytest iree_tests/pytorch/models/sd-vae \
             -rpfE \
             -k real_weights \
             --no-skip-tests-missing-files \

diff --git a/iree_tests/benchmarks/benchmark_sdxl_rocm.py b/iree_tests/benchmarks/benchmark_sdxl_rocm.py
@@ -12,9 +12,9 @@
 
 benchmark_dir = os.path.dirname(os.path.realpath(__file__))
 iree_root = os.path.dirname(benchmark_dir)
-prompt_encoder_dir = f"{iree_root}/pytorch/models/sdxl-prompt-encoder-tank"
-scheduled_unet_dir = f"{iree_root}/pytorch/models/sdxl-scheduled-unet-3-tank"
-vae_decode_dir = f"{iree_root}/pytorch/models/sdxl-vae-decode-tank"
+prompt_encoder_dir = f"{iree_root}/pytorch/models/sd-clip/sdxl-prompt-encoder-tank"
+scheduled_unet_dir = f"{iree_root}/pytorch/models/sd-unet/sdxl-scheduled-unet-3-tank"
+vae_decode_dir = f"{iree_root}/pytorch/models/sd-vae/sdxl-vae-decode-tank"
 
 def run_iree_command(args: Sequence[str] = ()):
     command = "Exec:", " ".join(args)

diff --git a/iree_tests/configs/config_gpu_rocm_models.json b/iree_tests/configs/config_gpu_rocm_models.json
@@ -12,7 +12,10 @@
     "skip_compile_tests": [
       "sdxl-scheduled-unet-3-tank",
       "sdxl-vae-decode-tank",
-      "sdxl-prompt-encoder-tank"
+      "sdxl-prompt-encoder-tank",
+      "sd3-mmdit",
+      "sd3-vae-decode",
+      "sd3-prompt-encoder"
     ],
     "skip_run_tests": [],
     "expected_compile_failures": [

diff --git a/iree_tests/configs/config_pytorch_models_cpu_llvm_task.json b/iree_tests/configs/config_pytorch_models_cpu_llvm_task.json
@@ -10,7 +10,10 @@
     "skip_compile_tests": [
       "sdxl-scheduled-unet-3-tank",
       "sdxl-vae-decode-tank",
-      "sdxl-prompt-encoder-tank"
+      "sdxl-prompt-encoder-tank",
+      "sd3-mmdit",
+      "sd3-vae-decode",
+      "sd3-prompt-encoder"
     ],
     "skip_run_tests": [],
     "expected_compile_failures": [

diff --git a/iree_tests/configs/config_sd_prompt_encoder_cpu_llvm_task.json b/iree_tests/configs/config_sd_prompt_encoder_cpu_llvm_task.json
@@ -0,0 +1,20 @@
+{
+    "config_name": "cpu_llvm_task",
+    "iree_compile_flags" : [
+      "--iree-hal-target-backends=llvm-cpu",
+      "--iree-llvmcpu-target-cpu-features=host",
+      "--iree-llvmcpu-fail-on-out-of-bounds-stack-allocation=false",
+      "--iree-llvmcpu-distribution-size=32",
+      "--iree-opt-const-eval=false",
+      "--iree-llvmcpu-enable-ukernels=all",
+      "--iree-global-opt-enable-quantized-matmul-reassociation"
+    ],
+    "iree_run_module_flags": [
+      "--device=local-task",
+      "--parameters=model=real_weights.irpa"
+    ],
+    "skip_compile_tests": [],
+    "skip_run_tests": [],
+    "expected_compile_failures": [],
+    "expected_run_failures": []
+}
diff --git a/.../config_sdxl_prompt_encoder_gpu_rocm.json → ...gs/config_sd_prompt_encoder_gpu_rocm.json b/.../config_sdxl_prompt_encoder_gpu_rocm.json → ...gs/config_sd_prompt_encoder_gpu_rocm.json
@@ -18,17 +18,12 @@
   ],
   "iree_run_module_flags": [
     "--device=hip",
-    "--parameters=model=real_weights.irpa",
-    "--input=1x64xi64=@inference_input.0.bin",
-    "--input=1x64xi64=@inference_input.1.bin",
-    "--input=1x64xi64=@inference_input.2.bin",
-    "--input=1x64xi64=@inference_input.3.bin",
-    "--expected_output=2x64x2048xf16=@inference_output.0.bin",
-    "--expected_output=2x1280xf16=@inference_output.1.bin",
-    "--expected_f16_threshold=1.0f"
+    "--parameters=model=real_weights.irpa"
   ],
   "skip_compile_tests": [],
   "skip_run_tests": [],
-  "expected_compile_failures": [],
+  "expected_compile_failures": [
+    "sd3-prompt-encoder",
+  ],
   "expected_run_failures": []
 }
diff --git a/iree_tests/configs/config_sd_scheduled_unet_cpu_llvm_task.json b/iree_tests/configs/config_sd_scheduled_unet_cpu_llvm_task.json
@@ -0,0 +1,23 @@
+{
+    "config_name": "cpu_llvm_task",
+    "iree_compile_flags" : [
+      "--iree-hal-target-backends=llvm-cpu",
+      "--iree-llvmcpu-target-cpu-features=host",
+      "--iree-llvmcpu-fail-on-out-of-bounds-stack-allocation=false",
+      "--iree-llvmcpu-distribution-size=32",
+      "--iree-opt-const-eval=false",
+      "--iree-llvmcpu-enable-ukernels=all",
+      "--iree-global-opt-enable-quantized-matmul-reassociation"
+    ],
+    "iree_run_module_flags": [
+      "--device=local-task",
+      "--parameters=model=real_weights.irpa"
+    ],
+    "skip_compile_tests": [],
+    "skip_run_tests": [],
+    "expected_compile_failures": [
+      "sdxl-scheduled-unet-3-tank",
+      "sd3-mmdit"
+    ],
+    "expected_run_failures": []
+}
diff --git a/.../config_sdxl_scheduled_unet_gpu_rocm.json → ...gs/config_sd_scheduled_unet_gpu_rocm.json b/.../config_sdxl_scheduled_unet_gpu_rocm.json → ...gs/config_sd_scheduled_unet_gpu_rocm.json
@@ -21,17 +21,12 @@
   ],
   "iree_run_module_flags": [
     "--device=hip",
-    "--parameters=model=real_weights.irpa",
-    "--module=sdxl_scheduled_unet_pipeline_fp16_rocm.vmfb",
-    "--input=1x4x128x128xf16=@inference_input.0.bin",
-    "--input=2x64x2048xf16=@inference_input.1.bin",
-    "--input=2x1280xf16=@inference_input.2.bin",
-    "--input=1xf16=@inference_input.3.bin",
-    "--expected_output=1x4x128x128xf16=@inference_output.0.bin",
-    "--expected_f16_threshold=0.7f"
+    "--parameters=model=real_weights.irpa"
   ],
   "skip_compile_tests": [],
   "skip_run_tests": [],
-  "expected_compile_failures": [],
+  "expected_compile_failures": [
+    "sd3-mmdit",
+  ],
   "expected_run_failures": []
 }
diff --git a/...config_sdxl_vae_decode_cpu_llvm_task.json → ...s/config_sd_vae_decode_cpu_llvm_task.json b/...config_sdxl_vae_decode_cpu_llvm_task.json → ...s/config_sd_vae_decode_cpu_llvm_task.json
@@ -2,13 +2,17 @@
     "config_name": "cpu_llvm_task",
     "iree_compile_flags" : [
       "--iree-hal-target-backends=llvm-cpu",
-      "--iree-llvmcpu-target-cpu-features=host"
+      "--iree-llvmcpu-target-cpu-features=host",
+      "--iree-llvmcpu-fail-on-out-of-bounds-stack-allocation=false",
+      "--iree-llvmcpu-distribution-size=32",
+      "--iree-opt-const-eval=false",
+      "--iree-llvmcpu-enable-ukernels=all",
+      "--iree-global-opt-enable-quantized-matmul-reassociation"
     ],
     "iree_run_module_flags": [
       "--device=local-task",
       "--parameters=model=real_weights.irpa",
-      "--input=1x4x128x128xf16=@inference_input.0.bin",
-      "--expected_output=1x3x1024x1024xf16=@inference_output.0.bin",
+      "--expected_f32_threshold=0.01f",
       "--expected_f16_threshold=0.02f"
     ],
     "skip_compile_tests": [],

diff --git a/...figs/config_sdxl_vae_decode_gpu_rocm.json → ...onfigs/config_sd_vae_decode_gpu_rocm.json b/...figs/config_sdxl_vae_decode_gpu_rocm.json → ...onfigs/config_sd_vae_decode_gpu_rocm.json
@@ -16,8 +16,7 @@
   "iree_run_module_flags": [
     "--device=hip",
     "--parameters=model=real_weights.irpa",
-    "--input=1x4x128x128xf16=@inference_input.0.bin",
-    "--expected_output=1x3x1024x1024xf16=@inference_output.0.bin",
+    "--expected_f32_threshold=0.6f",
     "--expected_f16_threshold=0.4f"
   ],
   "skip_compile_tests": [],

diff --git a/iree_tests/configs/config_sdxl_prompt_encoder_cpu_llvm_task.json b/iree_tests/configs/config_sdxl_prompt_encoder_cpu_llvm_task.json
diff --git a/iree_tests/configs/config_sdxl_scheduled_unet_cpu_llvm_task.json b/iree_tests/configs/config_sdxl_scheduled_unet_cpu_llvm_task.json
diff --git a/iree_tests/conftest.py b/iree_tests/conftest.py
@@ -319,7 +319,12 @@ def __init__(self, spec, **kwargs):
 
         self.run_args = ["iree-run-module", f"--module={vmfb_name}"]
         self.run_args.extend(self.spec.iree_run_module_flags)
-        self.run_args.append(f"--flagfile={self.spec.data_flagfile_name}")
+
+        # expand data flag file, so beter for logging and can use environment variables
+        flag_file_path = f"{self.test_cwd}/{self.spec.data_flagfile_name}"
+        file = open(flag_file_path)
+        for line in file:
+            self.run_args.append(line.rstrip())
-        # expand data flag file, so beter for logging and can use environment variables
-        flag_file_path = f"{self.test_cwd}/{self.spec.data_flagfile_name}"
-        file = open(flag_file_path)
-        for line in file:
-            self.run_args.append(line.rstrip())
+        # Expand data flag files to make logs explicit
+        # Tools accept `--flagfile=/path/to/flagfile` but logs are easier
+        # to read with explicit `--flag1=value1 --flag2=value2` flags.
+        flag_file_path = f"{self.test_cwd}/{self.spec.data_flagfile_name}"
+        file = open(flag_file_path)
+        for line in file:
+            self.run_args.append(line.rstrip())
-        # expand data flag file, so beter for logging and can use environment variables
-        flag_file_path = f"{self.test_cwd}/{self.spec.data_flagfile_name}"
-        file = open(flag_file_path)
-        for line in file:
-            self.run_args.append(line.rstrip())
+        # Expand data flag files to make logs explicit
+        # Tools accept `--flagfile=/path/to/flagfile` but logs are easier
+        # to read with explicit `--flag1=value1 --flag2=value2` flags.
+        flag_file_path = f"{self.test_cwd}/{self.spec.data_flagfile_name}"
+        file = open(flag_file_path)
+        for line in file:
+            self.run_args.append(line.rstrip())
 
     def runtest(self):
         # TODO(scotttodd): log files needed by the test (remote files / git LFS)
@@ -385,6 +390,8 @@ def test_compile(self):
         compile_env["IREE_TEST_PATH_EXTENSION"] = os.getenv(
             "IREE_TEST_PATH_EXTENSION", default=str(self.test_cwd)
         )
+
+        # expand environment variable for logging
         path_extension = compile_env["IREE_TEST_PATH_EXTENSION"]
         cmd = subprocess.list2cmdline(self.compile_args)
         cmd = cmd.replace("${IREE_TEST_PATH_EXTENSION}", f"{path_extension}")
@@ -401,8 +408,15 @@ def test_compile(self):
 
     def test_run(self):
         run_env = os.environ.copy()
-        cmd = subprocess.list2cmdline(self.run_args)
+        run_env["IREE_TEST_BACKEND"] = os.getenv(
+            "IREE_TEST_BACKEND", default="none"
+        )
 
+        # expand environment variable for logging
+        backend = run_env["IREE_TEST_BACKEND"]
+        cmd = subprocess.list2cmdline(self.run_args)
+        cmd = cmd.replace("${IREE_TEST_BACKEND}", f"{backend}")
+
         # TODO(scotttodd): expand flagfile(s)
         logging.getLogger().info(
             f"Launching run command:\n"  #

diff --git a/iree_tests/pytorch/models/sd-clip/sd3-prompt-encoder/model.mlirbc b/iree_tests/pytorch/models/sd-clip/sd3-prompt-encoder/model.mlirbc
diff --git a/iree_tests/pytorch/models/sd-clip/sd3-prompt-encoder/real_weights_data_flags.txt b/iree_tests/pytorch/models/sd-clip/sd3-prompt-encoder/real_weights_data_flags.txt
@@ -0,0 +1,9 @@
+--input=1x77x2xi64=@inference_input.0.bin
+--input=1x77x2xi64=@inference_input.1.bin
+--input=1x77x2xi64=@inference_input.2.bin
+--input=1x77x2xi64=@inference_input.3.bin
+--input=1x77x2xi64=@inference_input.4.bin
+--input=1x77x2xi64=@inference_input.5.bin
+--expected_output=2x154x4096xf32=@inference_output.0.bin
+--expected_output=2x2048xf32=@inference_output.1.bin
+--expected_f32_threshold=0.15f
diff --git a/iree_tests/pytorch/models/sd-clip/sd3-prompt-encoder/splat_data_flags.txt b/iree_tests/pytorch/models/sd-clip/sd3-prompt-encoder/splat_data_flags.txt
@@ -0,0 +1,7 @@
+--input="1x77x2xi64"
+--input="1x77x2xi64"
+--input="1x77x2xi64"
+--input="1x77x2xi64"
+--input="1x77x2xi64"
+--input="1x77x2xi64"
+--parameters=splats.irpa
diff --git a/iree_tests/pytorch/models/sd-clip/sd3-prompt-encoder/splats.irpa b/iree_tests/pytorch/models/sd-clip/sd3-prompt-encoder/splats.irpa
diff --git a/iree_tests/pytorch/models/sd-clip/sd3-prompt-encoder/test_cases.json b/iree_tests/pytorch/models/sd-clip/sd3-prompt-encoder/test_cases.json
@@ -0,0 +1,25 @@
+{
+  "file_format": "test_cases_v0",
+  "test_cases": [
+    {
+      "name": "splats",
+      "runtime_flagfile": "splat_data_flags.txt",
+      "remote_files": []
+    },
+    {
+      "name": "real_weights",
+      "runtime_flagfile": "real_weights_data_flags.txt",
+      "remote_files": [
+        "https://sharkpublic.blob.core.windows.net/sharkpublic/sai/sd3-prompt-encoder/inference_input.0.bin",
+        "https://sharkpublic.blob.core.windows.net/sharkpublic/sai/sd3-prompt-encoder/inference_input.1.bin",
+        "https://sharkpublic.blob.core.windows.net/sharkpublic/sai/sd3-prompt-encoder/inference_input.2.bin",
+        "https://sharkpublic.blob.core.windows.net/sharkpublic/sai/sd3-prompt-encoder/inference_input.3.bin",
+        "https://sharkpublic.blob.core.windows.net/sharkpublic/sai/sd3-prompt-encoder/inference_input.4.bin",
+        "https://sharkpublic.blob.core.windows.net/sharkpublic/sai/sd3-prompt-encoder/inference_input.5.bin",
+        "https://sharkpublic.blob.core.windows.net/sharkpublic/sai/sd3-prompt-encoder/inference_output.0.bin",
+        "https://sharkpublic.blob.core.windows.net/sharkpublic/sai/sd3-prompt-encoder/inference_output.1.bin",
+        "https://sharkpublic.blob.core.windows.net/sharkpublic/sai/sd3-prompt-encoder/real_weights.irpa"
+      ]
+    }
+  ]
+}
diff --git a/...els/sdxl-prompt-encoder-tank/model.mlirbc → ...lip/sdxl-prompt-encoder-tank/model.mlirbc b/...els/sdxl-prompt-encoder-tank/model.mlirbc → ...lip/sdxl-prompt-encoder-tank/model.mlirbc
diff --git a/iree_tests/pytorch/models/sd-clip/sdxl-prompt-encoder-tank/real_weights_data_flags.txt b/iree_tests/pytorch/models/sd-clip/sdxl-prompt-encoder-tank/real_weights_data_flags.txt
@@ -0,0 +1,7 @@
+--input=1x64xi64=@inference_input.0.bin
+--input=1x64xi64=@inference_input.1.bin
+--input=1x64xi64=@inference_input.2.bin
+--input=1x64xi64=@inference_input.3.bin
+--expected_output=2x64x2048xf16=@inference_output.0.bin
+--expected_output=2x1280xf16=@inference_output.1.bin
+--expected_f16_threshold=1.0f
diff --git a/...-prompt-encoder-tank/splat_data_flags.txt → ...-prompt-encoder-tank/splat_data_flags.txt b/...-prompt-encoder-tank/splat_data_flags.txt → ...-prompt-encoder-tank/splat_data_flags.txt
diff --git a/...dels/sdxl-prompt-encoder-tank/splats.irpa → ...clip/sdxl-prompt-encoder-tank/splats.irpa b/...dels/sdxl-prompt-encoder-tank/splats.irpa → ...clip/sdxl-prompt-encoder-tank/splats.irpa
diff --git a/.../sdxl-prompt-encoder-tank/test_cases.json → .../sdxl-prompt-encoder-tank/test_cases.json b/.../sdxl-prompt-encoder-tank/test_cases.json → .../sdxl-prompt-encoder-tank/test_cases.json
diff --git a/iree_tests/pytorch/models/sd-unet/sd3-mmdit/model.mlirbc b/iree_tests/pytorch/models/sd-unet/sd3-mmdit/model.mlirbc
diff --git a/iree_tests/pytorch/models/sd-unet/sd3-mmdit/real_weights_data_flags.txt b/iree_tests/pytorch/models/sd-unet/sd3-mmdit/real_weights_data_flags.txt
@@ -0,0 +1,7 @@
+--parameters=model=real_weights.irpa
+--input=2x16x128x128xf16=@inference_input.0.bin
+--input=2x154x4096xf16=@inference_input.1.bin
+--input=2x2048xf16=@inference_input.2.bin
+--input=1xf16=@inference_input.3.bin
+--expected_output=2x16x128x128xf32=@inference_output.0.bin
+--expected_f16_threshold=1.0f
diff --git a/iree_tests/pytorch/models/sd-unet/sd3-mmdit/splat_data_flags.txt b/iree_tests/pytorch/models/sd-unet/sd3-mmdit/splat_data_flags.txt
@@ -0,0 +1,5 @@
+--input="2x16x128x128xf16"
+--input="2x154x4096xf16"
+--input="2x2048xf16"
+--input="1xf16"
+--parameters=splats.irpa
diff --git a/iree_tests/pytorch/models/sd-unet/sd3-mmdit/splats.irpa b/iree_tests/pytorch/models/sd-unet/sd3-mmdit/splats.irpa