BEGIN 1782126387.6915982 EXEC /__w/rockrel/rockrel/build/ml-libs/composable_kernel/build /usr/local/therock-tools/bin/cmake -E env --unset=ROCM_PATH --unset=ROCM_DIR --unset=HIP_PATH --unset=HIP_DIR -- /usr/local/therock-tools/bin/cmake --build /__w/rockrel/rockrel/build/ml-libs/composable_kernel/build 41.6 [1/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mnnn_instance.cpp.o 41.8 [2/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_kknn_instance.cpp.o 42.3 [3/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_knnn_instance.cpp.o 42.4 [4/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mknn_instance.cpp.o 42.7 [5/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_knnn_instance.cpp.o 44.2 [6/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mnnn_instance.cpp.o 44.6 [7/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mknn_instance.cpp.o 45.2 [8/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_kknn_instance.cpp.o 51.6 [9/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp.o 52.2 [10/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp.o 52.3 [11/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_kknn_instance.cpp.o 54.1 [12/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_kknn_instance.cpp.o 55.2 [13/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_kknn_instance.cpp.o 56.4 [14/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp.o 56.8 [15/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp.o 56.8 [16/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp.o 57.1 [17/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp.o 57.2 [18/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp.o 57.8 [19/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_knnn_instance.cpp.o 58.3 [20/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mknn_instance.cpp.o 58.8 [21/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mknn_instance.cpp.o 59.1 [22/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mnnn_instance.cpp.o 59.5 [23/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp.o 59.9 [24/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_knnn_instance.cpp.o 60.1 [25/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mnnn_instance.cpp.o 60.3 [26/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_knnn_instance.cpp.o 60.4 [27/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mknn_instance.cpp.o 61.3 [28/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mnnn_instance.cpp.o 92.1 [29/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp.o 96.8 [30/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_kknn_instance.cpp.o 97.8 [31/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_knn_instance.cpp.o 98.6 [32/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mkn_instance.cpp.o 98.7 [33/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp.o 98.7 [34/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_kkn_instance.cpp.o 99.6 [35/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_knnn_instance.cpp.o 99.8 [36/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mknn_instance.cpp.o 100.6 [37/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mnn_instance.cpp.o 103.0 [38/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mnnn_instance.cpp.o 103.9 [39/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp.o 107.3 [40/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp.o 149.3 [41/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_kkn_instance.cpp.o 151.4 [42/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mnn_instance.cpp.o 154.2 [43/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_knn_instance.cpp.o 154.3 [44/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mkn_instance.cpp.o 157.0 [45/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_kkn_instance.cpp.o 166.3 [46/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_knn_instance.cpp.o 186.2 [47/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp.o 197.1 [48/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp.o 203.7 [49/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp.o 204.3 [50/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mkn_instance.cpp.o 205.7 [51/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mnn_instance.cpp.o 205.9 [52/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp.o 208.3 [53/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp.o 210.7 [54/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp.o 210.9 [55/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp.o 221.9 [56/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp.o 231.4 [57/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp.o 233.2 [58/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp.o 241.9 [59/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp.o 259.3 [60/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp.o 274.5 [61/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_kknn_instance.cpp.o 294.6 [62/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_kknn_instance.cpp.o 295.0 [63/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_compute_f32_kkn_instance.cpp.o 309.4 [64/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_compute_f32_knn_instance.cpp.o 311.8 [65/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp.o 311.9 [66/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp.o 315.8 [67/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp.o 319.2 [68/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_knnn_instance.cpp.o 320.9 [69/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_compute_f32_mkn_instance.cpp.o 325.2 [70/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_kkn_instance.cpp.o 335.6 [71/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp.o 336.9 [72/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_compute_f32_mnn_instance.cpp.o 337.0 [73/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mknn_instance.cpp.o 338.5 [74/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mnnn_instance.cpp.o 340.5 [75/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_kkn_instance.cpp.o 343.2 [76/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_kknn_instance.cpp.o 344.5 [77/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_knnn_instance.cpp.o 351.2 [78/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mnnn_instance.cpp.o 351.8 [79/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mknn_instance.cpp.o 355.8 [80/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_knn_instance.cpp.o 356.4 [81/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mkn_instance.cpp.o 371.0 [82/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_knn_instance.cpp.o 378.7 [83/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_compute_f32_kkn_instance.cpp.o 382.8 [84/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mnn_instance.cpp.o 384.1 [85/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_compute_f32_mkn_instance.cpp.o 385.7 [86/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_compute_f32_knn_instance.cpp.o 386.8 [87/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mkn_instance.cpp.o 388.5 [88/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_kknn_instance.cpp.o 400.7 [89/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_compute_f32_mnn_instance.cpp.o 412.6 [90/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mnn_instance.cpp.o 412.8 [91/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_kknn_instance.cpp.o 415.2 [92/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_kkn_instance.cpp.o 424.3 [93/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_knnn_instance.cpp.o 430.1 [94/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_knn_instance.cpp.o 433.3 [95/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mkn_instance.cpp.o 442.1 [96/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mnn_instance.cpp.o 445.9 [97/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_kkn_instance.cpp.o 448.4 [98/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeFiles/device_conv1d_bwd_data_instance.dir/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp.o 450.6 [99/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_knnn_instance.cpp.o 452.2 [100/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeFiles/device_conv1d_bwd_data_instance.dir/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp.o 463.7 [101/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_kknn_instance.cpp.o 469.9 [102/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mnnn_instance.cpp.o 470.7 [103/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp.o 473.2 [104/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mknn_instance.cpp.o 492.7 [105/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp.o 493.5 [106/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mknn_instance.cpp.o 497.7 [107/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp.o 514.0 [108/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mnnn_instance.cpp.o 529.5 [109/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeFiles/device_conv2d_bwd_data_instance.dir/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp.o 544.9 [110/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeFiles/device_conv3d_bwd_data_instance.dir/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp.o 546.9 [111/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeFiles/device_conv2d_bwd_data_instance.dir/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp.o 561.7 [112/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeFiles/device_conv3d_bwd_data_instance.dir/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp.o 579.0 [113/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_knnn_instance.cpp.o 595.1 [114/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_kkn_instance.cpp.o 596.2 [115/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp.o 601.7 [116/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_kknn_instance.cpp.o 604.4 [117/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp.o 622.9 [118/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp.o 653.0 [119/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_knnn_instance.cpp.o 655.1 [120/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_f16_kkn_instance.cpp.o 655.6 [121/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_mnnn_instance.cpp.o 656.4 [122/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_knn_instance.cpp.o 657.9 [123/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_mknn_instance.cpp.o 671.4 [124/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_kknn_instance.cpp.o 684.1 [125/1412] Building CXX object library/src/tensor_operation_instance/gpu/elementwise_normalization/CMakeFiles/device_elementwise_normalization_instance.dir/device_elementwise_normalization_f16_instance.cpp.o 692.8 [126/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_bf16_kkn_instance.cpp.o 693.8 [127/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_f16_mkn_instance.cpp.o 698.8 [128/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_mkn_instance.cpp.o 712.1 [129/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_knn_instance.cpp.o 716.6 [130/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_mnnn_instance.cpp.o 725.6 [131/1412] Building CXX object library/src/tensor_operation_instance/gpu/elementwise/CMakeFiles/device_elementwise_instance.dir/device_normalize_instance.cpp.o 726.0 [132/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_f16_knn_instance.cpp.o 727.1 [133/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_f16_mnn_instance.cpp.o 744.0 [134/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeFiles/device_conv2d_fwd_bias_relu_instance.dir/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp.o 750.4 [135/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeFiles/device_conv1d_bwd_data_instance.dir/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp.o 761.9 [136/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_mknn_instance.cpp.o 765.4 [137/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeFiles/device_conv2d_fwd_bias_relu_add_instance.dir/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp.o 765.7 [138/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_bf16_knn_instance.cpp.o 778.8 [139/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_knnn_instance.cpp.o 779.4 [140/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_bf16_mnn_instance.cpp.o 784.9 [141/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_mnn_instance.cpp.o 795.9 [142/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_mnn_instance.cpp.o 799.5 [143/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_mkn_instance.cpp.o 819.0 [144/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_bf16_mkn_instance.cpp.o 823.3 [145/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_comp_mnkpadding_instance.cpp.o 824.7 [146/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_comp_default_instance.cpp.o 842.1 [147/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_mknn_instance.cpp.o 871.6 [148/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_comp_default_instance.cpp.o 886.1 [149/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.o 907.5 [150/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeFiles/device_conv2d_bwd_data_instance.dir/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.o 926.3 [151/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_comp_mnkpadding_instance.cpp.o 929.1 [152/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_knnn_instance.cpp.o 943.5 [153/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_mnnn_instance.cpp.o 984.9 [154/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f32_f16_add.cpp.o 989.0 [155/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp.o 991.3 [156/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_add.cpp.o 991.6 [157/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp.o 992.1 [158/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_kkn_instance.cpp.o 1019.7 [159/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp.o 1029.7 [160/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp.o 1037.0 [161/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp.o 1055.9 [162/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_add.cpp.o 1073.3 [163/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f64_f32_add.cpp.o 1085.6 [164/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_mknn_instance.cpp.o 1093.4 [165/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp.o 1122.7 [166/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp.o 1123.1 [167/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp.o 1129.9 [168/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp.o 1132.6 [169/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp.o 1133.4 [170/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f32_f16_add.cpp.o 1143.2 [171/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f16_f16_max.cpp.o 1143.7 [172/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeFiles/device_conv3d_bwd_data_instance.dir/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp.o 1152.0 [173/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp.o 1153.4 [174/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_mnnn_instance.cpp.o 1153.9 [175/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f16_f16_min.cpp.o 1166.8 [176/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp.o 1170.9 [177/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_add.cpp.o 1173.1 [178/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i32_i8_add.cpp.o 1193.0 [179/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp.o 1211.9 [180/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp.o 1243.2 [181/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_wmma_bf16_bf16_bf16_exp_odd_mn_instance.cpp.o 1245.8 [182/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp.o 1248.6 [183/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f64_f32_add.cpp.o 1259.2 [184/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_kkn_instance.cpp.o 1259.8 [185/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_min.cpp.o 1262.4 [186/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_knn_instance.cpp.o 1263.6 [187/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_add.cpp.o 1269.5 [188/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp.o 1269.7 [189/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp.o 1272.2 [190/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_max.cpp.o 1274.0 [191/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i32_i8_add.cpp.o 1279.0 [192/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp.o 1288.7 [193/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_wmma_f16_f16_f16_exp_odd_mn_instance.cpp.o 1290.1 [194/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp.o 1292.4 [195/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_mem_v1_default_instance.cpp.o 1298.8 [196/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_max.cpp.o 1306.6 [197/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp.o 1317.3 [198/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp.o 1318.6 [199/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_min.cpp.o 1324.9 [200/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f16_f16_min.cpp.o 1330.9 [201/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_add.cpp.o 1353.3 [202/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp.o 1363.5 [203/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp.o 1365.4 [204/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i8_i8_max.cpp.o 1368.9 [205/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i8_i8_min.cpp.o 1369.7 [206/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp.o 1382.1 [207/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_add.cpp.o 1386.4 [208/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp.o 1399.9 [209/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp.o 1403.4 [210/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_mem_v1_default_instance.cpp.o 1406.9 [211/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f16_f16_max.cpp.o 1414.5 [212/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp.o 1416.2 [213/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp.o 1417.7 [214/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp.o 1421.9 [215/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_mem_v2_default_instance.cpp.o 1429.5 [216/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeFiles/device_conv1d_bwd_data_instance.dir/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp.o 1431.7 [217/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_min.cpp.o 1446.6 [218/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_max.cpp.o 1447.1 [219/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_min.cpp.o 1448.4 [220/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_mkn_instance.cpp.o 1452.9 [221/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_knn_instance.cpp.o 1476.9 [222/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_max.cpp.o 1494.6 [223/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_mem_v1_mnkpadding_instance.cpp.o 1500.4 [224/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i8_i8_min.cpp.o 1508.8 [225/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_mnn_instance.cpp.o 1512.2 [226/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_mnn_instance.cpp.o 1523.4 [227/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_min.cpp.o 1530.8 [228/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_mem_v2_default_instance.cpp.o 1534.0 [229/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank4_reduce2.cpp.o 1540.5 [230/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp.o 1541.9 [231/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank3_reduce2.cpp.o 1544.3 [232/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_mem_v2_mnkpadding_instance.cpp.o 1546.6 [233/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_mem_v2_mnkpadding_instance.cpp.o 1547.3 [234/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank3_reduce1.cpp.o 1548.3 [235/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_odd_m_instance.cpp.o 1548.7 [236/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank4_reduce1.cpp.o 1555.4 [237/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_max.cpp.o 1559.1 [238/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank4_reduce3.cpp.o 1562.1 [239/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank3_reduce3.cpp.o 1564.6 [240/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_wmma_f16_f16_f16_exp_comp_mnkpadding_instance.cpp.o 1570.1 [241/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank3_reduce3.cpp.o 1575.6 [242/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank4_reduce4.cpp.o 1581.5 [243/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_mem_v1_mnkpadding_instance.cpp.o 1585.0 [244/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank4_reduce1.cpp.o 1586.2 [245/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank3_reduce1.cpp.o 1590.1 [246/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank4_reduce3.cpp.o 1592.8 [247/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank3_reduce2.cpp.o 1599.4 [248/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_wmma_bf16_bf16_bf16_exp_comp_mnkpadding_instance.cpp.o 1605.4 [249/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_odd_m_instance.cpp.o 1606.8 [250/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank4_reduce2.cpp.o 1608.5 [251/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i8_i8_max.cpp.o 1615.0 [252/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp.o 1621.3 [253/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_comp_instance.cpp.o 1626.3 [254/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp.o 1626.3 [255/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank4_reduce4.cpp.o 1636.7 [256/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_mkn_instance.cpp.o 1638.9 [257/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp.o 1639.1 [258/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp.o 1644.4 [259/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp.o 1650.6 [260/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_wmma_f16_f16_f16_exp_comp_default_instance.cpp.o 1651.2 [261/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instance.cpp.o 1654.5 [262/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instance.cpp.o 1663.2 [263/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp.o 1664.5 [264/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instance.cpp.o 1666.9 [265/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp.o 1668.8 [266/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp.o 1672.6 [267/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp.o 1674.8 [268/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp.o 1678.5 [269/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp.o 1680.6 [270/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeFiles/device_conv2d_bwd_data_instance.dir/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp.o 1685.3 [271/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 1686.3 [272/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_int8_instance.cpp.o 1691.4 [273/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_comp_large_tensors_instance.cpp.o 1699.1 [274/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeFiles/device_conv3d_bwd_data_instance.dir/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp.o 1708.9 [275/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_wmma_bf16_bf16_bf16_exp_comp_default_instance.cpp.o 1714.7 [276/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp.o 1717.4 [277/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_min.cpp.o 1729.6 [278/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_odd_n_instance.cpp.o 1737.4 [279/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_comp_instance.cpp.o 1746.5 [280/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_1.cpp.o 1750.6 [281/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 1751.1 [282/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_0.cpp.o 1758.8 [283/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_2.cpp.o 1760.0 [284/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_int8_instance.cpp.o 1766.2 [285/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_max.cpp.o 1768.2 [286/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_3.cpp.o 1770.6 [287/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_4.cpp.o 1779.1 [288/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_instance.cpp.o 1782.8 [289/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_6.cpp.o 1788.2 [290/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instance.cpp.o 1791.7 [291/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_comp_2x_instance.cpp.o 1791.8 [292/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 1794.9 [293/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp.o 1795.5 [294/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_8.cpp.o 1806.3 [295/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instance.cpp.o 1809.4 [296/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_20.cpp.o 1809.8 [297/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_11.cpp.o 1810.8 [298/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_10.cpp.o 1814.0 [299/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances.cpp.o 1816.0 [300/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instance.cpp.o 1818.4 [301/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instance.cpp.o 1824.3 [302/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_12.cpp.o 1824.6 [303/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_13.cpp.o 1831.4 [304/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_16.cpp.o 1832.8 [305/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_odd_mn_instance.cpp.o 1832.9 [306/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_14.cpp.o 1836.6 [307/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_15.cpp.o 1839.7 [308/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_19.cpp.o 1840.1 [309/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_odd_mn_instance.cpp.o 1842.6 [310/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp.o 1844.3 [311/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_17.cpp.o 1847.3 [312/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_18.cpp.o 1862.6 [313/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_2.cpp.o 1867.7 [314/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_1.cpp.o 1869.1 [315/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_0.cpp.o 1871.6 [316/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_3.cpp.o 1873.7 [317/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances.cpp.o 1876.4 [318/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_4.cpp.o 1878.0 [319/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_20.cpp.o 1880.8 [320/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_6.cpp.o 1881.1 [321/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_11.cpp.o 1883.0 [322/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_8.cpp.o 1884.8 [323/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_13.cpp.o 1888.4 [324/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_10.cpp.o 1901.5 [325/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_12.cpp.o 1911.4 [326/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1912.4 [327/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_18.cpp.o 1913.3 [328/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_odd_n_instance.cpp.o 1915.1 [329/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_16.cpp.o 1915.8 [330/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_14.cpp.o 1917.9 [331/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_15.cpp.o 1919.9 [332/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1922.4 [333/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_19.cpp.o 1922.6 [334/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_17.cpp.o 1922.9 [335/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1931.3 [336/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2007.4 [337/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instance.cpp.o 2025.1 [338/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_v3_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2029.4 [339/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_16_16_instance.cpp.o 2030.7 [340/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instance.cpp.o 2031.8 [341/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_5.cpp.o 2039.8 [342/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_v3_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 2058.4 [343/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_v3_nhwgc_gkyxc_nhwgk_f32_large_tensors_instance.cpp.o 2092.3 [344/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_v3_nhwgc_gkyxc_nhwgk_bf16_large_tensors_instance.cpp.o 2100.1 [345/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/wmma/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_wmma_nhwgc_gkyxc_nhwgk_bf16_pipev1_instance.cpp.o 2103.9 [346/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 2115.8 [347/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/wmma/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_wmma_nhwgc_gkyxc_nhwgk_f16_pipev1_instance.cpp.o 2121.8 [348/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_v3_nhwgc_gkyxc_nhwgk_f16_large_tensors_instance.cpp.o 2123.9 [349/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2127.5 [350/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_optimized_loads_instance.cpp.o 2142.0 [351/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_5.cpp.o 2148.5 [352/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp.o 2224.0 [353/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_16_16_instance.cpp.o 2226.4 [354/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_16_16_instance.cpp.o 2233.4 [355/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_v3_nhwgc_gkyxc_nhwgk_f16_16_16_instance.cpp.o 2236.6 [356/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load.cpp.o 2250.4 [357/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_v3_nhwgc_gkyxc_nhwgk_bf16_16_16_instance.cpp.o 2296.4 [358/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instance.cpp.o 2331.2 [359/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 2339.1 [360/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2341.5 [361/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_f32_bf16_instance.cpp.o 2355.6 [362/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_large_tensors_instance.cpp.o 2357.1 [363/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_comp_large_tensors_instance.cpp.o 2364.2 [364/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp.o 2397.2 [365/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp.o 2404.1 [366/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp.o 2404.5 [367/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part4.cpp.o 2407.4 [368/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load.cpp.o 2410.8 [369/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_wavelet_pad0_instance.cpp.o 2412.7 [370/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_nongroup_ported_f16_instance.cpp.o 2413.1 [371/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_wavelet_default_instance.cpp.o 2424.9 [372/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 2425.2 [373/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_nongroup_ported_bf16_instance.cpp.o 2434.9 [374/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp.o 2435.1 [375/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2437.3 [376/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_optimized_loads_instance.cpp.o 2443.0 [377/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_wavelet_4w2_default_instance.cpp.o 2444.0 [378/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 2448.6 [379/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_7.cpp.o 2452.1 [380/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part4.cpp.o 2455.4 [381/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 2456.0 [382/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_wavelet_4w2_pad0_instance.cpp.o 2456.6 [383/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_wavelet_pad0_instance.cpp.o 2459.9 [384/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_wavelet_4w2_pad0_instance.cpp.o 2461.6 [385/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_wavelet_default_instance.cpp.o 2462.7 [386/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_wavelet_4w2_default_instance.cpp.o 2463.3 [387/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_default_pipev2_instance.cpp.o 2477.0 [388/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_optimized_loads_instance.cpp.o 2484.6 [389/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_default_pipev5_instance.cpp.o 2489.9 [390/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part2.cpp.o 2491.6 [391/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_pad0_pipev2_instance.cpp.o 2493.3 [392/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_pad0_pipev5_instance.cpp.o 2523.0 [393/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 2523.2 [394/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_7.cpp.o 2553.3 [395/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part3.cpp.o 2571.8 [396/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part2.cpp.o 2580.2 [397/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instance.cpp.o 2627.0 [398/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part3.cpp.o 2631.6 [399/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_2x_instance.cpp.o 2640.5 [400/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part1.cpp.o 2642.7 [401/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_instance.cpp.o 2645.5 [402/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_wave_transfer_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 2646.9 [403/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instance.cpp.o 2681.5 [404/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_generic_instance.cpp.o 2694.8 [405/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp.o 2697.7 [406/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_generic_instance.cpp.o 2716.3 [407/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_9.cpp.o 2720.1 [408/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_comp_instance.cpp.o 2727.3 [409/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_wave_transfer_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2750.9 [410/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_16x16_instance.cpp.o 2756.1 [411/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev5_irregular_instance.cpp.o 2763.0 [412/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev5_irregular_instance.cpp.o 2763.1 [413/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part1.cpp.o 2769.0 [414/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2774.7 [415/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 2775.9 [416/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 2781.9 [417/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_9.cpp.o 2805.3 [418/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2807.4 [419/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 2816.5 [420/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_part2_instance.cpp.o 2834.3 [421/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev1_part2_instance.cpp.o 2838.2 [422/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev1_part2_instance.cpp.o 2840.7 [423/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_16x16_instance.cpp.o 2843.1 [424/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev2_irregular_instance.cpp.o 2851.4 [425/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 2859.1 [426/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp.o 2862.4 [427/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_v3_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 2870.1 [428/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 2885.7 [429/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 2887.4 [430/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev2_irregular_instance.cpp.o 2888.0 [431/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2906.5 [432/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instance.cpp.o 2917.2 [433/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_direct_load_instance.cpp.o 2920.6 [434/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp.o 2936.3 [435/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev1_instance.cpp.o 2946.5 [436/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp.o 2957.3 [437/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev1_instance.cpp.o 2961.8 [438/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instance.cpp.o 2999.2 [439/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_2x_instance.cpp.o 2999.5 [440/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instance.cpp.o 3011.8 [441/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_v3_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 3012.5 [442/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_instance.cpp.o 3013.4 [443/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_pad0_pipev5_instance.cpp.o 3020.8 [444/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_default_pipev5_instance.cpp.o 3021.4 [445/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_pad0_pipev5_instance.cpp.o 3022.0 [446/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp.o 3052.6 [447/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instance.cpp.o 3054.9 [448/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_comp_instance.cpp.o 3066.6 [449/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp.o 3066.8 [450/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_generic_instance.cpp.o 3075.9 [451/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_generic_instance.cpp.o 3096.3 [452/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp.o 3103.1 [453/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp.o 3108.5 [454/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_16x16_instance.cpp.o 3108.6 [455/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 3110.6 [456/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 3111.5 [457/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 3112.0 [458/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_bias_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 3115.3 [459/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_bias_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 3135.2 [460/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_default_pipev5_instance.cpp.o 3140.5 [461/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_bias_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 3167.0 [462/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instance.cpp.o 3175.1 [463/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_mem_intra_instance.cpp.o 3183.6 [464/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_mem_inter_instance.cpp.o 3189.1 [465/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_bias_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 3192.1 [466/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/wmma/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 3197.6 [467/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp.o 3199.2 [468/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_default_large_tensors_instance.cpp.o 3206.6 [469/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_part2_instance.cpp.o 3224.8 [470/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 3225.3 [471/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instances.cpp.o 3240.6 [472/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_pad0_pipev2_instance.cpp.o 3240.9 [473/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp.o 3250.9 [474/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 3251.0 [475/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 3259.2 [476/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_0.cpp.o 3267.1 [477/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_1.cpp.o 3269.1 [478/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_direct_load_instance.cpp.o 3271.5 [479/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instances_shard_0.cpp.o 3276.4 [480/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp.o 3278.3 [481/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instance.cpp.o 3281.8 [482/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_2.cpp.o 3288.2 [483/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_3.cpp.o 3293.3 [484/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_16x16_instance.cpp.o 3299.0 [485/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances.cpp.o 3300.3 [486/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 3308.3 [487/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_4.cpp.o 3318.1 [488/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_5.cpp.o 3321.6 [489/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_6.cpp.o 3326.6 [490/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances.cpp.o 3328.2 [491/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_8.cpp.o 3330.7 [492/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_pad0_pipev2_instance.cpp.o 3331.7 [493/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_10.cpp.o 3336.1 [494/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_2x_instances.cpp.o 3336.9 [495/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_9.cpp.o 3339.8 [496/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_instances.cpp.o 3344.6 [497/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_7.cpp.o 3348.0 [498/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_1.cpp.o 3354.9 [499/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_0.cpp.o 3358.1 [500/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_2x_instances_shard_0.cpp.o 3363.0 [501/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_4.cpp.o 3363.3 [502/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_default_pipev2_instance.cpp.o 3364.9 [503/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 3366.4 [504/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_default_pipev2_instance.cpp.o 3367.5 [505/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_2.cpp.o 3370.8 [506/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_3.cpp.o 3371.8 [507/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 3376.1 [508/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_instances_shard_0.cpp.o 3383.0 [509/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_0.cpp.o 3388.8 [510/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 3392.0 [511/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances.cpp.o 3394.5 [512/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_1.cpp.o 3395.7 [513/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part4.cpp.o 3404.3 [514/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances.cpp.o 3405.4 [515/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_2.cpp.o 3409.3 [516/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_4.cpp.o 3410.3 [517/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_8.cpp.o 3414.8 [518/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_3.cpp.o 3418.7 [519/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_5.cpp.o 3419.8 [520/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_7.cpp.o 3420.9 [521/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_bias_bn_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 3421.8 [522/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_6.cpp.o 3422.4 [523/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances.cpp.o 3426.0 [524/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_bias_bn_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 3426.0 [525/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_9.cpp.o 3427.9 [526/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_10.cpp.o 3428.0 [527/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances_shard_0.cpp.o 3431.5 [528/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_11.cpp.o 3436.6 [529/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances_shard_1.cpp.o 3444.9 [530/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances_shard_2.cpp.o 3448.3 [531/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_mem_inter_instance.cpp.o 3448.8 [532/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances_shard_3.cpp.o 3452.3 [533/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_0.cpp.o 3458.4 [534/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances.cpp.o 3463.6 [535/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev5_instance.cpp.o 3464.8 [536/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_3.cpp.o 3465.4 [537/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev5_instance.cpp.o 3466.9 [538/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_1.cpp.o 3467.4 [539/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_4.cpp.o 3467.9 [540/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_2.cpp.o 3468.5 [541/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_5.cpp.o 3471.9 [542/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_8.cpp.o 3477.1 [543/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_6.cpp.o 3477.2 [544/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_11.cpp.o 3477.3 [545/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances.cpp.o 3478.5 [546/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/wmma/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_wmma_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 3478.9 [547/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_10.cpp.o 3479.6 [548/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_bias_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 3485.0 [549/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_13.cpp.o 3488.1 [550/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_7.cpp.o 3488.2 [551/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_bias_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 3489.5 [552/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_mem_intra_instance.cpp.o 3490.8 [553/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_12.cpp.o 3491.8 [554/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_9.cpp.o 3498.9 [555/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_14.cpp.o 3500.2 [556/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_15.cpp.o 3516.9 [557/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances.cpp.o 3521.0 [558/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instances.cpp.o 3521.0 [559/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_1.cpp.o 3521.6 [560/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances_shard_3.cpp.o 3521.9 [561/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_3.cpp.o 3522.6 [562/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances_shard_2.cpp.o 3523.6 [563/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_0.cpp.o 3528.8 [564/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_2.cpp.o 3532.1 [565/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_14.cpp.o 3532.2 [566/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_9.cpp.o 3532.4 [567/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_7.cpp.o 3533.0 [568/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances_shard_1.cpp.o 3534.5 [569/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_4.cpp.o 3534.8 [570/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_6.cpp.o 3537.7 [571/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_5.cpp.o 3537.8 [572/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_12.cpp.o 3539.6 [573/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_8.cpp.o 3542.5 [574/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_10.cpp.o 3543.3 [575/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_11.cpp.o 3547.9 [576/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_13.cpp.o 3549.4 [577/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_15.cpp.o 3551.1 [578/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part2.cpp.o 3551.9 [579/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instances_shard_0.cpp.o 3555.9 [580/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_2.cpp.o 3557.0 [581/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instances_shard_2.cpp.o 3557.1 [582/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part4.cpp.o 3558.0 [583/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instances_shard_1.cpp.o 3560.9 [584/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_0.cpp.o 3566.1 [585/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances_shard_0.cpp.o 3569.5 [586/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances_shard_3.cpp.o 3570.6 [587/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances.cpp.o 3571.2 [588/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_1.cpp.o 3572.4 [589/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_3.cpp.o 3576.9 [590/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances.cpp.o 3582.8 [591/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_8.cpp.o 3582.9 [592/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instances.cpp.o 3584.7 [593/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances_shard_1.cpp.o 3584.9 [594/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_4.cpp.o 3588.7 [595/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances_shard_0.cpp.o 3589.2 [596/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f32_instances.cpp.o 3589.6 [597/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_6.cpp.o 3590.2 [598/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_9.cpp.o 3590.6 [599/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances_shard_1.cpp.o 3592.3 [600/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances_shard_2.cpp.o 3592.4 [601/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_5.cpp.o 3593.8 [602/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_7.cpp.o 3594.6 [603/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_11.cpp.o 3595.0 [604/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_10.cpp.o 3595.6 [605/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instances_shard_2.cpp.o 3596.2 [606/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances_shard_0.cpp.o 3599.6 [607/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instances_shard_0.cpp.o 3600.1 [608/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances_shard_2.cpp.o 3602.3 [609/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_14.cpp.o 3602.9 [610/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_12.cpp.o 3603.7 [611/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_13.cpp.o 3605.1 [612/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instances_shard_1.cpp.o 3606.8 [613/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f32_instances_shard_1.cpp.o 3608.5 [614/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_15.cpp.o 3612.9 [615/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f32_instances_shard_0.cpp.o 3621.5 [616/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances.cpp.o 3623.8 [617/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_1.cpp.o 3625.9 [618/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_7.cpp.o 3633.4 [619/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_6.cpp.o 3637.0 [620/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_4.cpp.o 3637.7 [621/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_10.cpp.o 3638.9 [622/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_3.cpp.o 3639.0 [623/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_2.cpp.o 3640.8 [624/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_8.cpp.o 3642.7 [625/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_0.cpp.o 3643.0 [626/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances.cpp.o 3643.8 [627/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_9.cpp.o 3646.0 [628/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_5.cpp.o 3647.8 [629/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part2.cpp.o 3658.3 [630/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_11.cpp.o 3661.7 [631/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_16.cpp.o 3662.2 [632/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part3.cpp.o 3663.6 [633/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_2.cpp.o 3663.9 [634/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_0.cpp.o 3666.3 [635/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_1.cpp.o 3668.4 [636/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_10.cpp.o 3668.6 [637/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_mem_inter_instance.cpp.o 3670.0 [638/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_11.cpp.o 3670.3 [639/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_3.cpp.o 3672.0 [640/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_6.cpp.o 3674.0 [641/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_12.cpp.o 3674.7 [642/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_8.cpp.o 3675.8 [643/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_4.cpp.o 3677.5 [644/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_18.cpp.o 3678.4 [645/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_14.cpp.o 3678.5 [646/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_19.cpp.o 3678.7 [647/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_15.cpp.o 3679.9 [648/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_13.cpp.o 3685.1 [649/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_1.cpp.o 3686.2 [650/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_17.cpp.o 3690.3 [651/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_0.cpp.o 3691.5 [652/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp.o 3694.0 [653/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_2.cpp.o 3694.4 [654/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_3.cpp.o 3694.8 [655/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_8.cpp.o 3696.0 [656/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_mem_intra_instance.cpp.o 3696.1 [657/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances.cpp.o 3696.3 [658/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp.o 3698.2 [659/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_11.cpp.o 3699.9 [660/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_12.cpp.o 3700.2 [661/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_4.cpp.o 3700.8 [662/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_6.cpp.o 3701.1 [663/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part1.cpp.o 3703.3 [664/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances.cpp.o 3707.0 [665/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_13.cpp.o 3708.4 [666/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_18.cpp.o 3709.7 [667/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_10.cpp.o 3711.0 [668/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_14.cpp.o 3711.6 [669/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_1.cpp.o 3712.0 [670/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_16.cpp.o 3714.3 [671/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_19.cpp.o 3714.5 [672/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_15.cpp.o 3714.7 [673/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_2.cpp.o 3717.2 [674/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_17.cpp.o 3717.4 [675/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_8.cpp.o 3718.1 [676/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_4.cpp.o 3719.3 [677/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_7.cpp.o 3720.2 [678/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_9.cpp.o 3720.6 [679/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_10.cpp.o 3720.9 [680/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_3.cpp.o 3722.4 [681/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_6.cpp.o 3726.2 [682/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_5.cpp.o 3726.7 [683/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_1.cpp.o 3728.1 [684/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_0.cpp.o 3728.4 [685/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_0.cpp.o 3729.2 [686/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_11.cpp.o 3734.3 [687/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances.cpp.o 3735.4 [688/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_2.cpp.o 3740.7 [689/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_3.cpp.o 3742.5 [690/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_12.cpp.o 3744.0 [691/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_10.cpp.o 3744.5 [692/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_8.cpp.o 3744.5 [693/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_11.cpp.o 3745.3 [694/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_13.cpp.o 3748.2 [695/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_6.cpp.o 3748.4 [696/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_4.cpp.o 3753.9 [697/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part3.cpp.o 3754.2 [698/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_15.cpp.o 3754.9 [699/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_2.cpp.o 3755.7 [700/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_14.cpp.o 3757.3 [701/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_16.cpp.o 3758.3 [702/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_18.cpp.o 3759.1 [703/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_17.cpp.o 3760.3 [704/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_1.cpp.o 3760.9 [705/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_19.cpp.o 3761.0 [706/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_0.cpp.o 3764.3 [707/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances.cpp.o 3765.2 [708/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_11.cpp.o 3766.2 [709/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_10.cpp.o 3766.3 [710/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_3.cpp.o 3768.4 [711/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_14.cpp.o 3769.0 [712/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_6.cpp.o 3769.4 [713/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_8.cpp.o 3770.3 [714/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_12.cpp.o 3772.1 [715/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_13.cpp.o 3773.4 [716/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_4.cpp.o 3776.0 [717/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_15.cpp.o 3777.8 [718/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_17.cpp.o 3779.1 [719/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_16.cpp.o 3781.8 [720/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_18.cpp.o 3783.6 [721/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_19.cpp.o 3784.2 [722/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances.cpp.o 3786.5 [723/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_1.cpp.o 3786.6 [724/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_0.cpp.o 3787.6 [725/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_3.cpp.o 3789.2 [726/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_2.cpp.o 3792.1 [727/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part1.cpp.o 3793.1 [728/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_mem_intra_instance.cpp.o 3796.5 [729/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_9.cpp.o 3796.8 [730/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_10.cpp.o 3797.4 [731/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_8.cpp.o 3797.5 [732/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_6.cpp.o 3799.3 [733/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_5.cpp.o 3799.3 [734/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_4.cpp.o 3799.8 [735/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_11.cpp.o 3801.7 [736/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_13.cpp.o 3802.2 [737/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_14.cpp.o 3802.5 [738/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_12.cpp.o 3802.9 [739/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_2.cpp.o 3804.4 [740/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_15.cpp.o 3804.9 [741/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_1.cpp.o 3807.8 [742/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances.cpp.o 3808.7 [743/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_3.cpp.o 3811.0 [744/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_0.cpp.o 3813.5 [745/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_8.cpp.o 3813.8 [746/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances.cpp.o 3815.4 [747/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_10.cpp.o 3815.7 [748/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_5.cpp.o 3816.9 [749/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_6.cpp.o 3817.5 [750/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_7.cpp.o 3818.9 [751/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_4.cpp.o 3818.9 [752/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances.cpp.o 3819.3 [753/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_9.cpp.o 3820.6 [754/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_11.cpp.o 3823.2 [755/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_5.cpp.o 3824.5 [756/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instances.cpp.o 3825.1 [757/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_13.cpp.o 3825.6 [758/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_14.cpp.o 3827.6 [759/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_12.cpp.o 3830.7 [760/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_15.cpp.o 3836.3 [761/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 3840.3 [762/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_5.cpp.o 3842.1 [763/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instances_shard_2.cpp.o 3842.3 [764/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instances_shard_0.cpp.o 3844.7 [765/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instances_shard_1.cpp.o 3845.6 [766/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 3847.4 [767/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 3849.2 [768/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part4.cpp.o 3850.3 [769/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_int8_instance.cpp.o 3857.4 [770/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_large_tensors_instance.cpp.o 3862.0 [771/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_7.cpp.o 3862.5 [772/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_9.cpp.o 3876.5 [773/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_large_tensors_instance.cpp.o 3876.6 [774/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_mem_inter_instance.cpp.o 3877.7 [775/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_5.cpp.o 3879.5 [776/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp.o 3881.6 [777/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_7.cpp.o 3882.2 [778/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp.o 3882.3 [779/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_9.cpp.o 3890.0 [780/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_7.cpp.o 3891.8 [781/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instance.cpp.o 3895.4 [782/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_7.cpp.o 3895.6 [783/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instance.cpp.o 3899.1 [784/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_5.cpp.o 3899.9 [785/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part4.cpp.o 3900.8 [786/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev2_instance.cpp.o 3908.6 [787/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 3919.3 [788/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_7.cpp.o 3919.7 [789/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_9.cpp.o 3924.1 [790/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_9.cpp.o 3924.9 [791/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_large_tensors_instance.cpp.o 3929.3 [792/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev2_instance.cpp.o 3938.9 [793/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances_shard_2.cpp.o 3939.6 [794/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 3944.7 [795/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances_shard_1.cpp.o 3946.7 [796/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 3956.3 [797/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part2.cpp.o 3969.8 [798/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances_shard_2.cpp.o 3971.7 [799/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 3972.7 [800/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_dynamic_op_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 3973.2 [801/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances_shard_1.cpp.o 3975.0 [802/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp.o 3985.8 [803/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part2.cpp.o 3989.7 [804/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances_shard_0.cpp.o 3991.6 [805/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instance.cpp.o 3999.9 [806/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_2.cpp.o 4009.6 [807/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances.cpp.o 4010.7 [808/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_11.cpp.o 4012.9 [809/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_1.cpp.o 4022.1 [810/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_3.cpp.o 4025.9 [811/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_fp8_instance.cpp.o 4030.8 [812/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_dynamic_op_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 4037.1 [813/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances_shard_0.cpp.o 4040.3 [814/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp.o 4045.5 [815/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf8_instance.cpp.o 4047.0 [816/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_9.cpp.o 4050.0 [817/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_4.cpp.o 4066.7 [818/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_fp8_bf8_instance.cpp.o 4066.9 [819/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_1.cpp.o 4067.8 [820/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf8_fp8_instance.cpp.o 4073.0 [821/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances.cpp.o 4077.1 [822/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part3.cpp.o 4078.8 [823/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_2.cpp.o 4087.6 [824/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_6.cpp.o 4089.1 [825/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_4.cpp.o 4089.4 [826/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_3.cpp.o 4095.5 [827/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_5.cpp.o 4096.0 [828/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part1.cpp.o 4105.6 [829/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_7.cpp.o 4119.3 [830/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_8.cpp.o 4123.4 [831/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4124.9 [832/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4127.8 [833/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4130.7 [834/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4130.9 [835/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part3.cpp.o 4136.7 [836/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4170.7 [837/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_10.cpp.o 4174.1 [838/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_default_large_tensors_instance.cpp.o 4185.0 [839/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4191.4 [840/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_8.cpp.o 4207.5 [841/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_0.cpp.o 4210.8 [842/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_6.cpp.o 4217.4 [843/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instance.cpp.o 4228.5 [844/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16_16_instance.cpp.o 4249.5 [845/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instance.cpp.o 4255.6 [846/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part1.cpp.o 4261.0 [847/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_7.cpp.o 4264.6 [848/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp.o 4266.8 [849/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_0.cpp.o 4273.0 [850/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_v3_ndhwgc_gkzyxc_ndhwgk_f32_large_tensors_instance.cpp.o 4277.2 [851/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_5.cpp.o 4279.6 [852/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_default_large_tensors_instance.cpp.o 4294.1 [853/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_10.cpp.o 4298.8 [854/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_optimized_loads_instance.cpp.o 4307.5 [855/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_v3_ndhwgc_gkzyxc_ndhwgk_bf16_large_tensors_instance.cpp.o 4311.8 [856/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_11.cpp.o 4329.4 [857/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_9.cpp.o 4349.6 [858/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_v3_ndhwgc_gkzyxc_ndhwgk_f16_large_tensors_instance.cpp.o 4349.8 [859/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/wmma/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_wmma_ndhwgc_gkzyxc_ndhwgk_bf16_pipev1_instance.cpp.o 4365.1 [860/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/wmma/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_wmma_ndhwgc_gkzyxc_ndhwgk_f16_pipev1_instance.cpp.o 4383.1 [861/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 4397.5 [862/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4414.4 [863/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16_16_instance.cpp.o 4420.7 [864/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_f32_bf16_instance.cpp.o 4451.6 [865/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16_16_instance.cpp.o 4470.7 [866/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_wavelet_default_instance.cpp.o 4474.2 [867/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_wavelet_pad0_instance.cpp.o 4490.6 [868/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4496.1 [869/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_ndhwgc_gkzyxc_ndhwgk_bf16_16_16_instance.cpp.o 4511.9 [870/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_wavelet_default_instance.cpp.o 4524.6 [871/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_wavelet_pad0_instance.cpp.o 4529.5 [872/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_ndhwgc_gkzyxc_ndhwgk_f16_16_16_instance.cpp.o 4541.3 [873/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_default_pipev2_instance.cpp.o 4552.2 [874/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_default_pipev5_instance.cpp.o 4562.3 [875/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_pad0_pipev2_instance.cpp.o 4562.5 [876/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp.o 4573.4 [877/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp.o 4573.6 [878/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_pad0_pipev5_instance.cpp.o 4586.2 [879/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4591.6 [880/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 4597.1 [881/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4599.5 [882/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.o 4602.4 [883/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_large_tensors_instance.cpp.o 4605.0 [884/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_large_tensors_instance.cpp.o 4613.4 [885/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_optimized_loads_instance.cpp.o 4626.3 [886/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_optimized_loads_instance.cpp.o 4631.5 [887/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 4632.6 [888/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4633.8 [889/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp.o 4640.3 [890/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 4656.3 [891/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp.o 4666.1 [892/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 4692.0 [893/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 4718.4 [894/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 4735.0 [895/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4741.0 [896/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 4741.2 [897/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 4755.3 [898/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4777.8 [899/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 4810.7 [900/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 4851.4 [901/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 4893.5 [902/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev5_irregular_instance.cpp.o 4911.2 [903/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_generic_instance.cpp.o 4935.8 [904/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 4937.1 [905/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev2_irregular_instance.cpp.o 4937.6 [906/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev1_part2_instance.cpp.o 4945.5 [907/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 4957.6 [908/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev1_part2_instance.cpp.o 4958.7 [909/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev5_irregular_instance.cpp.o 4959.3 [910/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pad0_pipev5_instance.cpp.o 4980.9 [911/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pad0_pipev5_instance.cpp.o 5013.0 [912/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_generic_instance.cpp.o 5022.8 [913/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_16x16_instance.cpp.o 5028.6 [914/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev1_instance.cpp.o 5036.0 [915/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/comp/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_comp_instance.cpp.o 5037.8 [916/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev1_instance.cpp.o 5038.5 [917/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev2_irregular_instance.cpp.o 5051.8 [918/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 5064.6 [919/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 5082.0 [920/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 5086.3 [921/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.o 5093.6 [922/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 5106.4 [923/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 5112.2 [924/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/comp/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_comp_instance.cpp.o 5114.7 [925/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 5120.3 [926/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pad0_pipev2_instance.cpp.o 5125.1 [927/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 5135.1 [928/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/comp/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp.o 5138.3 [929/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 5141.0 [930/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_default_pipev5_instance.cpp.o 5148.9 [931/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pad0_pipev2_instance.cpp.o 5154.6 [932/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp.o 5159.8 [933/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 5171.2 [934/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_default_pipev5_instance.cpp.o 5182.8 [935/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 5184.0 [936/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.o 5197.6 [937/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_16x16_instance.cpp.o 5204.0 [938/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 5210.4 [939/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 5218.4 [940/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instance.cpp.o 5223.8 [941/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 5232.4 [942/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp.o 5247.9 [943/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instance.cpp.o 5250.7 [944/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 5259.6 [945/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp.o 5261.6 [946/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 5261.8 [947/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_default_large_tensors_instance.cpp.o 5267.3 [948/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 5277.9 [949/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 5297.2 [950/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp.o 5299.8 [951/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/comp/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_comp_instance.cpp.o 5311.9 [952/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_16x16_instance.cpp.o 5313.9 [953/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_bias_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 5315.4 [954/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_bias_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 5318.2 [955/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.o 5320.4 [956/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_bias_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 5336.2 [957/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_generic_instance.cpp.o 5348.2 [958/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_generic_instance.cpp.o 5366.9 [959/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/wmma/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 5368.5 [960/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 5371.1 [961/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 5388.8 [962/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp.o 5402.0 [963/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_2x_instances.cpp.o 5408.2 [964/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 5411.6 [965/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_bias_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 5416.2 [966/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/comp/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_comp_instance.cpp.o 5421.3 [967/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_16x16_instance.cpp.o 5424.4 [968/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_mem_intra_instance.cpp.o 5426.4 [969/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_2x_instances_shard_0.cpp.o 5434.7 [970/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_mem_inter_instance.cpp.o 5435.0 [971/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 5439.2 [972/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/comp/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp.o 5445.8 [973/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_1.cpp.o 5447.6 [974/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_0.cpp.o 5453.8 [975/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_2.cpp.o 5454.4 [976/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_default_pipev2_instance.cpp.o 5455.0 [977/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp.o 5456.0 [978/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 5468.3 [979/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances.cpp.o 5469.2 [980/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_default_pipev2_instance.cpp.o 5469.6 [981/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 5491.8 [982/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances.cpp.o 5492.2 [983/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_4.cpp.o 5495.3 [984/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_3.cpp.o 5496.6 [985/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_6.cpp.o 5501.8 [986/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_bias_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 5502.0 [987/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_2x_instances.cpp.o 5502.7 [988/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_5.cpp.o 5505.0 [989/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_bias_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 5505.2 [990/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_9.cpp.o 5506.7 [991/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances.cpp.o 5511.5 [992/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_10.cpp.o 5512.3 [993/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_7.cpp.o 5516.1 [994/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_0.cpp.o 5518.7 [995/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_8.cpp.o 5526.2 [996/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_1.cpp.o 5529.0 [997/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_2x_instances_shard_0.cpp.o 5531.2 [998/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 5533.9 [999/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_4.cpp.o 5537.6 [1000/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/wmma/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 5539.0 [1001/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_2.cpp.o 5539.2 [1002/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_3.cpp.o 5545.2 [1003/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 5546.8 [1004/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_0.cpp.o 5550.8 [1005/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances.cpp.o 5551.4 [1006/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_0.cpp.o 5559.8 [1007/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_1.cpp.o 5565.5 [1008/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 5567.9 [1009/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances.cpp.o 5570.0 [1010/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_5.cpp.o 5573.7 [1011/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances.cpp.o 5573.8 [1012/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_2.cpp.o 5576.1 [1013/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_7.cpp.o 5579.5 [1014/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_3.cpp.o 5582.2 [1015/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_8.cpp.o 5584.5 [1016/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_4.cpp.o 5585.9 [1017/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_10.cpp.o 5587.7 [1018/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_9.cpp.o 5592.3 [1019/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances_shard_1.cpp.o 5592.8 [1020/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances_shard_3.cpp.o 5594.8 [1021/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances_shard_0.cpp.o 5595.0 [1022/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 5595.6 [1023/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_6.cpp.o 5601.9 [1024/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_11.cpp.o 5604.4 [1025/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_0.cpp.o 5607.2 [1026/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances_shard_2.cpp.o 5613.6 [1027/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev5_instance.cpp.o 5627.8 [1028/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_1.cpp.o 5629.2 [1029/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances.cpp.o 5632.5 [1030/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_4.cpp.o 5632.9 [1031/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_5.cpp.o 5635.5 [1032/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_3.cpp.o 5638.3 [1033/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_2.cpp.o 5640.4 [1034/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances.cpp.o 5648.3 [1035/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_6.cpp.o 5651.3 [1036/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_14.cpp.o 5653.2 [1037/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_8.cpp.o 5653.4 [1038/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev5_instance.cpp.o 5654.0 [1039/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_13.cpp.o 5654.4 [1040/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_11.cpp.o 5654.9 [1041/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_15.cpp.o 5654.9 [1042/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_12.cpp.o 5661.5 [1043/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_7.cpp.o 5663.3 [1044/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_9.cpp.o 5665.5 [1045/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_0.cpp.o 5666.9 [1046/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances_shard_3.cpp.o 5668.8 [1047/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_10.cpp.o 5681.3 [1048/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 5695.5 [1049/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances.cpp.o 5696.8 [1050/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_mem_intra_instance.cpp.o 5697.9 [1051/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_6.cpp.o 5702.4 [1052/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instances.cpp.o 5703.1 [1053/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_3.cpp.o 5704.4 [1054/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_5.cpp.o 5706.2 [1055/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_4.cpp.o 5706.7 [1056/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_1.cpp.o 5708.2 [1057/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_2.cpp.o 5709.5 [1058/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances_shard_2.cpp.o 5712.2 [1059/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_8.cpp.o 5714.3 [1060/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances_shard_1.cpp.o 5714.6 [1061/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_11.cpp.o 5717.1 [1062/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_mem_inter_instance.cpp.o 5719.4 [1063/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_13.cpp.o 5720.0 [1064/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_7.cpp.o 5720.4 [1065/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_15.cpp.o 5720.5 [1066/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 5721.2 [1067/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_12.cpp.o 5721.5 [1068/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_10.cpp.o 5721.8 [1069/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_9.cpp.o 5725.0 [1070/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_14.cpp.o 5731.1 [1071/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances_shard_0.cpp.o 5731.1 [1072/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instances_shard_2.cpp.o 5732.8 [1073/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instances_shard_0.cpp.o 5733.0 [1074/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instances_shard_1.cpp.o 5740.6 [1075/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances_shard_3.cpp.o 5750.5 [1076/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 5752.2 [1077/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances_shard_2.cpp.o 5755.6 [1078/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances.cpp.o 5757.1 [1079/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances.cpp.o 5761.5 [1080/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_2.cpp.o 5763.5 [1081/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 5763.9 [1082/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_3.cpp.o 5764.3 [1083/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_5.cpp.o 5765.0 [1084/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instances.cpp.o 5765.6 [1085/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_0.cpp.o 5767.3 [1086/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_4.cpp.o 5767.8 [1087/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances_shard_0.cpp.o 5768.2 [1088/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_6.cpp.o 5769.2 [1089/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_1.cpp.o 5770.0 [1090/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_10.cpp.o 5772.4 [1091/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_8.cpp.o 5773.0 [1092/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_0.cpp.o 5775.5 [1093/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_11.cpp.o 5777.0 [1094/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_7.cpp.o 5777.2 [1095/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_9.cpp.o 5777.2 [1096/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instances.cpp.o 5777.9 [1097/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances_shard_1.cpp.o 5779.1 [1098/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_1.cpp.o 5779.3 [1099/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_1.cpp.o 5780.7 [1100/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_12.cpp.o 5783.2 [1101/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_13.cpp.o 5783.9 [1102/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_15.cpp.o 5784.3 [1103/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_0.cpp.o 5784.3 [1104/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_2.cpp.o 5785.5 [1105/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_0.cpp.o 5788.2 [1106/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_2.cpp.o 5788.4 [1107/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_14.cpp.o 5789.6 [1108/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_1.cpp.o 5807.8 [1109/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 5815.4 [1110/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances.cpp.o 5824.8 [1111/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_0.cpp.o 5826.9 [1112/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_1.cpp.o 5827.9 [1113/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_2.cpp.o 5829.6 [1114/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 5837.6 [1115/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_3.cpp.o 5840.0 [1116/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_4.cpp.o 5843.1 [1117/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_12.cpp.o 5843.9 [1118/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_11.cpp.o 5844.6 [1119/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_14.cpp.o 5847.3 [1120/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_0.cpp.o 5847.5 [1121/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_8.cpp.o 5848.1 [1122/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_13.cpp.o 5850.2 [1123/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_18.cpp.o 5854.6 [1124/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_2.cpp.o 5854.7 [1125/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_19.cpp.o 5855.3 [1126/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_1.cpp.o 5855.8 [1127/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_10.cpp.o 5856.2 [1128/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_6.cpp.o 5857.0 [1129/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_16.cpp.o 5857.3 [1130/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_15.cpp.o 5859.3 [1131/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_17.cpp.o 5864.1 [1132/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_11.cpp.o 5864.3 [1133/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_3.cpp.o 5866.0 [1134/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_8.cpp.o 5868.1 [1135/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_10.cpp.o 5870.1 [1136/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_4.cpp.o 5873.4 [1137/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_6.cpp.o 5880.7 [1138/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances.cpp.o 5883.9 [1139/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_12.cpp.o 5890.9 [1140/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_13.cpp.o 5900.2 [1141/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_14.cpp.o 5901.9 [1142/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_17.cpp.o 5903.4 [1143/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_1.cpp.o 5905.0 [1144/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_15.cpp.o 5906.3 [1145/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_16.cpp.o 5913.7 [1146/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances.cpp.o 5914.3 [1147/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_18.cpp.o 5918.2 [1148/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 5919.2 [1149/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_19.cpp.o 5921.3 [1150/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_0.cpp.o 5926.7 [1151/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_2.cpp.o 5927.3 [1152/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_13.cpp.o 5928.7 [1153/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 5931.5 [1154/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_3.cpp.o 5934.1 [1155/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_11.cpp.o 5934.8 [1156/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_mem_intra_instance.cpp.o 5935.6 [1157/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_6.cpp.o 5936.5 [1158/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_8.cpp.o 5936.9 [1159/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_12.cpp.o 5938.4 [1160/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_4.cpp.o 5939.4 [1161/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_18.cpp.o 5942.5 [1162/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_15.cpp.o 5945.1 [1163/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_10.cpp.o 5946.2 [1164/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_16.cpp.o 5946.5 [1165/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_14.cpp.o 5953.9 [1166/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_17.cpp.o 5955.0 [1167/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_mem_inter_instance.cpp.o 5962.8 [1168/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_19.cpp.o 5963.7 [1169/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_0.cpp.o 5964.2 [1170/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_1.cpp.o 5964.7 [1171/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_2.cpp.o 5969.7 [1172/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances.cpp.o 5970.0 [1173/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_3.cpp.o 5970.5 [1174/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 5977.2 [1175/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 5978.0 [1176/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_12.cpp.o 5981.9 [1177/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_11.cpp.o 5990.5 [1178/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_4.cpp.o 5990.6 [1179/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 5992.4 [1180/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_6.cpp.o 5992.8 [1181/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_14.cpp.o 5995.2 [1182/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_10.cpp.o 5998.2 [1183/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 5999.4 [1184/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_18.cpp.o 6001.2 [1185/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_15.cpp.o 6006.2 [1186/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_8.cpp.o 6007.3 [1187/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_16.cpp.o 6007.8 [1188/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_2.cpp.o 6009.0 [1189/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_13.cpp.o 6010.1 [1190/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_1.cpp.o 6011.5 [1191/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_0.cpp.o 6013.0 [1192/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_3.cpp.o 6014.1 [1193/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp.o 6014.9 [1194/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances.cpp.o 6015.4 [1195/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_17.cpp.o 6021.4 [1196/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_19.cpp.o 6025.6 [1197/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_6.cpp.o 6027.6 [1198/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_4.cpp.o 6028.3 [1199/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_10.cpp.o 6032.9 [1200/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp.o 6033.0 [1201/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_9.cpp.o 6035.8 [1202/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_11.cpp.o 6037.3 [1203/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_8.cpp.o 6038.3 [1204/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_5.cpp.o 6045.4 [1205/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_15.cpp.o 6045.8 [1206/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_14.cpp.o 6046.8 [1207/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_12.cpp.o 6046.9 [1208/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances.cpp.o 6054.8 [1209/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_2.cpp.o 6055.1 [1210/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_13.cpp.o 6058.4 [1211/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances.cpp.o 6061.0 [1212/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_1.cpp.o 6065.1 [1213/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_0.cpp.o 6067.0 [1214/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_3.cpp.o 6067.6 [1215/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances.cpp.o 6068.6 [1216/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_6.cpp.o 6070.5 [1217/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_9.cpp.o 6073.3 [1218/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_5.cpp.o 6075.6 [1219/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_5.cpp.o 6075.7 [1220/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_13.cpp.o 6078.3 [1221/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_4.cpp.o 6078.7 [1222/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instances.cpp.o 6079.1 [1223/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_10.cpp.o 6083.5 [1224/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_8.cpp.o 6086.9 [1225/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_14.cpp.o 6090.1 [1226/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_11.cpp.o 6091.1 [1227/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_15.cpp.o 6091.8 [1228/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_12.cpp.o 6101.5 [1229/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_large_tensors_instance.cpp.o 6107.9 [1230/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_2.cpp.o 6111.0 [1231/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_7.cpp.o 6111.4 [1232/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_1.cpp.o 6111.6 [1233/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_5.cpp.o 6114.7 [1234/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_7.cpp.o 6120.7 [1235/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_large_tensors_instance.cpp.o 6122.1 [1236/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_0.cpp.o 6142.9 [1237/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp.o 6157.0 [1238/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_mem_inter_instance.cpp.o 6159.7 [1239/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_9.cpp.o 6170.1 [1240/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_5.cpp.o 6171.9 [1241/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 6172.5 [1242/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 6178.4 [1243/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 6181.7 [1244/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 6184.0 [1245/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_7.cpp.o 6193.2 [1246/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_9.cpp.o 6212.4 [1247/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_mem_intra_instance.cpp.o 6218.0 [1248/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_7.cpp.o 6218.5 [1249/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_convscale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_ndhwgc_gkzyxc_ndhwgk_bf8_f8_instance.cpp.o 6226.2 [1250/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 6234.6 [1251/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_5.cpp.o 6236.0 [1252/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 6238.9 [1253/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 6240.3 [1254/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_add/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_convscale_add_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_add_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 6242.2 [1255/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 6243.7 [1256/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_convscale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 6243.8 [1257/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_convscale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_combconvscale_ndhwgc_gkzyxc_ndhwgk_f8_f8_f32_instance.cpp.o 6243.8 [1258/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 6243.8 [1259/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 6244.8 [1260/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_relu/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_convscale_relu_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 6249.7 [1261/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_convscale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_ndhwgc_gkzyxc_ndhwgk_f8_bf8_instance.cpp.o 6254.1 [1262/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_9.cpp.o 6257.6 [1263/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convinvscale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_convinvscale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convinvscale_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 6259.2 [1264/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_default_large_tensors_instance.cpp.o 6259.8 [1265/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_7.cpp.o 6260.4 [1266/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_7.cpp.o 6262.0 [1267/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_convscale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_ndhwgc_gkzyxc_ndhwgk_bf8_instance.cpp.o 6262.0 [1268/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_relu/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_convscale_relu_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_combconvscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_f8_f32_instance.cpp.o 6297.9 [1269/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_1.cpp.o 6305.3 [1270/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_9.cpp.o 6306.5 [1271/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 6309.6 [1272/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp.o 6310.5 [1273/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 6310.7 [1274/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 6319.4 [1275/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_2.cpp.o 6323.2 [1276/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_1.cpp.o 6325.4 [1277/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_0.cpp.o 6327.7 [1278/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 6327.7 [1279/1412] cd /__w/rockrel/rockrel/build/ml-libs/composable_kernel/build && /usr/local/therock-tools/bin/cmake -E rm -f /__w/rockrel/rockrel/build/ml-libs/composable_kernel/stamp/stage.stamp 6336.4 [1280/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_2.cpp.o 6340.5 [1281/1412] Building CXX object library/src/utility/CMakeFiles/utility.dir/device_memory.cpp.o 6342.2 [1282/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 6342.7 [1283/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 6343.6 [1284/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev2_instance.cpp.o 6345.6 [1285/1412] Building CXX object library/src/utility/CMakeFiles/utility.dir/host_tensor.cpp.o 6345.9 [1286/1412] Building CXX object library/src/utility/CMakeFiles/utility.dir/convolution_parameter.cpp.o 6373.2 [1287/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_0.cpp.o 6374.1 [1288/1412] Linking CXX static library lib/libdevice_contraction_operations.a 6374.2 [1289/1412] Linking CXX static library lib/libdevice_other_operations.a 6382.6 [1290/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev2_instance.cpp.o 6388.4 [1291/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_dynamic_op_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 6392.2 [1292/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 6395.6 [1293/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 6409.4 [1294/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 6425.9 [1295/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 6431.4 [1296/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 6438.3 [1297/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 6438.5 [1298/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 6459.5 [1299/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 6468.6 [1300/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 6489.2 [1301/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 6534.2 [1302/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.o 6535.8 [1303/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 6537.4 [1304/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 6567.5 [1305/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_default_large_tensors_instance.cpp.o 6569.6 [1306/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 6582.9 [1307/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 6584.9 [1308/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.o 6597.4 [1309/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_f32_bf16_instance.cpp.o 6611.5 [1310/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 6615.2 [1311/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp.o 6659.8 [1312/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 6699.1 [1313/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 6699.6 [1314/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 6774.5 [1315/1412] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp.o 6798.9 [1316/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 6822.2 [1317/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp.o 6850.6 [1318/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.o 6895.0 [1319/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp.o 6895.7 [1320/1412] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp.o 6905.1 [1321/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 6907.1 [1322/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 6908.9 [1323/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.o 6921.0 [1324/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_wmma_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.o 6931.1 [1325/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_f32_bf16_instance.cpp.o 6955.4 [1326/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.o 6959.0 [1327/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 6970.5 [1328/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.o 6987.4 [1329/1412] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp.o 6991.8 [1330/1412] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp.o 7003.8 [1331/1412] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp.o 7007.2 [1332/1412] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp.o 7008.4 [1333/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 7023.6 [1334/1412] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp.o 7049.5 [1335/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_comp_default_instance.cpp.o 7053.5 [1336/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 7076.5 [1337/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 7110.5 [1338/1412] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp.o 7113.5 [1339/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_comp_mnpadding_instance.cpp.o 7114.1 [1340/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 7126.4 [1341/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_wmma_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.o 7138.1 [1342/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_comp_kpadding_instance.cpp.o 7139.6 [1343/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.o 7154.9 [1344/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 7155.6 [1345/1412] Linking CXX static library lib/libdevice_reduction_operations.a 7155.7 [1346/1412] Linking CXX static library lib/libutility.a 7170.8 [1347/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_comp_mnkpadding_instance.cpp.o 7176.3 [1348/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_wmma_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.o 7177.2 [1349/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.o 7179.6 [1350/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.o 7199.6 [1351/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.o 7202.4 [1352/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.o 7217.5 [1353/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/conv2d_fwd/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp.o 7219.7 [1354/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 7224.3 [1355/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/conv2d_fwd/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp.o 7228.2 [1356/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.o 7234.1 [1357/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_comp_default_instance.cpp.o 7248.8 [1358/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.o 7262.3 [1359/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 7266.3 [1360/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.o 7270.0 [1361/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 7289.7 [1362/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 7297.1 [1363/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 7304.6 [1364/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_wmma_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.o 7312.7 [1365/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_comp_mnkpadding_instance.cpp.o 7315.3 [1366/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp.o 7318.5 [1367/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_comp_mnpadding_instance.cpp.o 7319.4 [1368/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/conv2d_fwd/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp.o 7330.5 [1369/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_comp_kpadding_instance.cpp.o 7332.2 [1370/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 7359.5 [1371/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 7418.9 [1372/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 7440.9 [1373/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 7445.4 [1374/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_comp_kpadding_instance.cpp.o 7460.6 [1375/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_comp_mnpadding_instance.cpp.o 7463.8 [1376/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_comp_default_instance.cpp.o 7471.4 [1377/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_comp_mnkpadding_instance.cpp.o 7507.9 [1378/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 7527.1 [1379/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 7539.6 [1380/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_mem_v2_default_instance.cpp.o 7540.7 [1381/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_mem_v2_kpadding_instance.cpp.o 7556.7 [1382/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_mem_v2_mnkpadding_instance.cpp.o 7558.9 [1383/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 7594.8 [1384/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_wmma_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp.o 7601.8 [1385/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 7606.3 [1386/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_mem_v2_mnkpadding_instance.cpp.o 7608.3 [1387/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v1_default_instance.cpp.o 7609.5 [1388/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_mem_v2_kpadding_instance.cpp.o 7611.2 [1389/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_mem_v2_default_instance.cpp.o 7611.8 [1390/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_wmma_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp.o 7618.0 [1391/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v2_default_instance.cpp.o 7624.5 [1392/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v2_mnkpadding_instance.cpp.o 7625.3 [1393/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_wmma_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp.o 7631.5 [1394/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v1_kpadding_instance.cpp.o 7638.8 [1395/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_wmma_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp.o 7639.6 [1396/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v2_kpadding_instance.cpp.o 7653.1 [1397/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v1_mnkpadding_instance.cpp.o 7699.2 [1398/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 7701.7 [1399/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 7749.1 [1400/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 7793.3 [1401/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_weight_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 7805.9 [1402/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp.o 7842.9 [1403/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_weight_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 7878.8 [1404/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp.o 7930.7 [1405/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_wmma_universal_f16_f16_f16/device_gemm_wmma_universal_f16_f16_f16_mk_kn_mn_comp_default_instance.cpp.o 7930.7 [1406/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_wmma_universal_bf16_bf16_bf16/device_gemm_wmma_universal_bf16_bf16_bf16_mk_kn_mn_comp_default_instance.cpp.o 7936.5 [1407/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp.o 7944.5 [1408/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp.o 7997.5 [1409/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_weight_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 8033.6 [1410/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_wmma_universal_bf16_i8_bf16/device_gemm_wmma_universal_bf16_i8_bf16_mk_kn_mn_comp_default_instance.cpp.o 8058.3 [1411/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_weight_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 8060.4 [1412/1412] Linking CXX static library lib/libdevice_conv_operations.a END 1782134448.0802882 8060.388689994812 0