BEGIN 1782125758.3492646 EXEC /__w/rockrel/rockrel/build/ml-libs/composable_kernel/build /usr/local/therock-tools/bin/cmake -E env --unset=ROCM_PATH --unset=ROCM_DIR --unset=HIP_PATH --unset=HIP_DIR -- /usr/local/therock-tools/bin/cmake --build /__w/rockrel/rockrel/build/ml-libs/composable_kernel/build 31.1 [1/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_knnn_instance.cpp.o 31.2 [2/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mknn_instance.cpp.o 31.9 [3/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mknn_instance.cpp.o 33.1 [4/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mnnn_instance.cpp.o 33.5 [5/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mnnn_instance.cpp.o 33.9 [6/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_knnn_instance.cpp.o 34.1 [7/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_kknn_instance.cpp.o 34.1 [8/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_kknn_instance.cpp.o 38.6 [9/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp.o 40.8 [10/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_kknn_instance.cpp.o 41.8 [11/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_kknn_instance.cpp.o 42.6 [12/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp.o 43.5 [13/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_kknn_instance.cpp.o 44.3 [14/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_knnn_instance.cpp.o 44.6 [15/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mknn_instance.cpp.o 45.4 [16/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp.o 45.7 [17/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_knnn_instance.cpp.o 46.7 [18/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp.o 46.7 [19/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp.o 46.8 [20/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp.o 46.8 [21/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mnnn_instance.cpp.o 46.9 [22/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mknn_instance.cpp.o 47.3 [23/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mknn_instance.cpp.o 47.5 [24/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mnnn_instance.cpp.o 47.7 [25/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mnnn_instance.cpp.o 48.0 [26/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp.o 48.9 [27/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp.o 51.0 [28/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_knnn_instance.cpp.o 73.9 [29/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp.o 75.7 [30/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_kknn_instance.cpp.o 76.4 [31/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mkn_instance.cpp.o 77.2 [32/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_kkn_instance.cpp.o 77.4 [33/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mnn_instance.cpp.o 79.3 [34/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_knn_instance.cpp.o 80.0 [35/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp.o 81.8 [36/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mnnn_instance.cpp.o 82.0 [37/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_knnn_instance.cpp.o 83.1 [38/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mknn_instance.cpp.o 83.3 [39/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp.o 84.9 [40/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp.o 108.9 [41/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp.o 111.7 [42/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp.o 114.6 [43/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp.o 114.7 [44/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp.o 117.0 [45/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_kkn_instance.cpp.o 121.8 [46/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mkn_instance.cpp.o 122.2 [47/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mnn_instance.cpp.o 124.0 [48/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_knn_instance.cpp.o 126.8 [49/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_kkn_instance.cpp.o 128.4 [50/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_knn_instance.cpp.o 141.6 [51/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_kknn_instance.cpp.o 147.1 [52/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp.o 150.0 [53/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp.o 154.1 [54/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp.o 156.1 [55/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp.o 157.4 [56/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_kknn_instance.cpp.o 158.1 [57/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mkn_instance.cpp.o 169.2 [58/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mnn_instance.cpp.o 170.0 [59/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp.o 172.3 [60/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp.o 173.7 [61/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp.o 176.6 [62/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_knnn_instance.cpp.o 180.6 [63/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp.o 182.2 [64/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_knnn_instance.cpp.o 186.3 [65/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp.o 186.8 [66/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp.o 189.4 [67/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp.o 191.8 [68/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mknn_instance.cpp.o 195.0 [69/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mnnn_instance.cpp.o 195.4 [70/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp.o 196.8 [71/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_kkn_instance.cpp.o 199.7 [72/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mknn_instance.cpp.o 204.5 [73/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mnnn_instance.cpp.o 207.2 [74/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_kkn_instance.cpp.o 210.0 [75/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_compute_f32_mkn_instance.cpp.o 212.4 [76/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_kknn_instance.cpp.o 215.0 [77/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_compute_f32_knn_instance.cpp.o 218.5 [78/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_kknn_instance.cpp.o 218.8 [79/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_kknn_instance.cpp.o 219.0 [80/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_compute_f32_kkn_instance.cpp.o 220.0 [81/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mnnn_instance.cpp.o 222.9 [82/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_compute_f32_mnn_instance.cpp.o 224.4 [83/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_knn_instance.cpp.o 234.0 [84/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mkn_instance.cpp.o 235.3 [85/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_knnn_instance.cpp.o 236.5 [86/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mkn_instance.cpp.o 237.5 [87/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mnn_instance.cpp.o 238.5 [88/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_knn_instance.cpp.o 239.2 [89/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mnnn_instance.cpp.o 240.4 [90/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_compute_f32_kkn_instance.cpp.o 243.9 [91/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mnn_instance.cpp.o 247.6 [92/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_knnn_instance.cpp.o 248.4 [93/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_kknn_instance.cpp.o 248.6 [94/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_compute_f32_mkn_instance.cpp.o 249.5 [95/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mknn_instance.cpp.o 250.0 [96/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_compute_f32_knn_instance.cpp.o 250.3 [97/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_compute_f32_mnn_instance.cpp.o 253.5 [98/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mknn_instance.cpp.o 254.9 [99/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_kkn_instance.cpp.o 256.1 [100/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_kkn_instance.cpp.o 262.4 [101/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_knn_instance.cpp.o 267.9 [102/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mnn_instance.cpp.o 269.6 [103/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_knnn_instance.cpp.o 270.7 [104/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mkn_instance.cpp.o 271.5 [105/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_kknn_instance.cpp.o 278.2 [106/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeFiles/device_conv1d_bwd_data_instance.dir/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp.o 279.1 [107/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp.o 279.9 [108/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeFiles/device_conv1d_bwd_data_instance.dir/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp.o 286.2 [109/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp.o 290.7 [110/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_mknn_instance.cpp.o 296.3 [111/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_mnnn_instance.cpp.o 301.3 [112/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_f16_kkn_instance.cpp.o 305.2 [113/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeFiles/device_conv3d_bwd_data_instance.dir/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp.o 305.5 [114/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_knnn_instance.cpp.o 306.1 [115/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_kkn_instance.cpp.o 307.1 [116/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_knn_instance.cpp.o 307.9 [117/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_kknn_instance.cpp.o 310.5 [118/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeFiles/device_conv3d_bwd_data_instance.dir/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp.o 313.3 [119/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp.o 315.3 [120/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeFiles/device_conv2d_bwd_data_instance.dir/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp.o 316.9 [121/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeFiles/device_conv2d_bwd_data_instance.dir/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp.o 317.7 [122/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp.o 318.8 [123/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_mknn_instance.cpp.o 321.1 [124/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp.o 321.2 [125/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/2D/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_mnnn_instance.cpp.o 324.4 [126/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp.o 328.5 [127/1412] Building CXX object library/src/tensor_operation_instance/gpu/elementwise_normalization/CMakeFiles/device_elementwise_normalization_instance.dir/device_elementwise_normalization_f16_instance.cpp.o 338.3 [128/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_knnn_instance.cpp.o 338.9 [129/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_bf16_kkn_instance.cpp.o 343.8 [130/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_f16_knn_instance.cpp.o 351.1 [131/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_mknn_instance.cpp.o 352.6 [132/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_f16_mkn_instance.cpp.o 353.5 [133/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_knnn_instance.cpp.o 355.3 [134/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_f16_mnn_instance.cpp.o 359.0 [135/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_mkn_instance.cpp.o 359.8 [136/1412] Building CXX object library/src/tensor_operation_instance/gpu/elementwise/CMakeFiles/device_elementwise_instance.dir/device_normalize_instance.cpp.o 364.1 [137/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_mnn_instance.cpp.o 368.7 [138/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_knn_instance.cpp.o 369.1 [139/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_mkn_instance.cpp.o 372.3 [140/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_f16_mnnn_instance.cpp.o 372.9 [141/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_bf16_knn_instance.cpp.o 374.6 [142/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeFiles/device_conv2d_fwd_bias_relu_instance.dir/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp.o 375.2 [143/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeFiles/device_conv1d_bwd_data_instance.dir/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp.o 377.6 [144/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_comp_default_instance.cpp.o 378.8 [145/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_bf16_mkn_instance.cpp.o 383.2 [146/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f32_f32_f32_compute_bf16_mnn_instance.cpp.o 384.6 [147/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeFiles/device_conv2d_fwd_bias_relu_add_instance.dir/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp.o 384.9 [148/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.o 386.7 [149/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_comp_mnkpadding_instance.cpp.o 391.1 [150/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/2D/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_mnn_instance.cpp.o 405.3 [151/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_comp_default_instance.cpp.o 410.0 [152/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_kkn_instance.cpp.o 410.1 [153/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_comp_mnkpadding_instance.cpp.o 419.7 [154/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_mknn_instance.cpp.o 425.8 [155/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeFiles/device_conv2d_bwd_data_instance.dir/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.o 428.3 [156/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_kkn_instance.cpp.o 436.0 [157/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeFiles/device_contraction_bilinear_instance.dir/6D/device_contraction_bilinear_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_bf16_mnnn_instance.cpp.o 452.0 [158/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f32_f16_add.cpp.o 452.5 [159/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeFiles/device_conv3d_bwd_data_instance.dir/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp.o 454.8 [160/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_knn_instance.cpp.o 459.0 [161/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp.o 463.2 [162/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp.o 464.8 [163/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_add.cpp.o 470.4 [164/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp.o 472.9 [165/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp.o 474.2 [166/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f16_f16_min.cpp.o 477.3 [167/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f64_f32_add.cpp.o 477.7 [168/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_wmma_f16_f16_f16_exp_odd_mn_instance.cpp.o 480.1 [169/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f16_f16_max.cpp.o 483.9 [170/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_wmma_bf16_bf16_bf16_exp_odd_mn_instance.cpp.o 488.4 [171/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp.o 488.6 [172/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f32_f16_add.cpp.o 489.4 [173/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_add.cpp.o 491.0 [174/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp.o 492.7 [175/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp.o 494.3 [176/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_mkn_instance.cpp.o 494.5 [177/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp.o 495.3 [178/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp.o 498.3 [179/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_f16_f16_f16_mnn_instance.cpp.o 499.4 [180/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp.o 501.2 [181/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp.o 501.5 [182/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_add.cpp.o 503.6 [183/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeFiles/device_conv1d_bwd_data_instance.dir/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp.o 503.7 [184/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp.o 503.9 [185/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp.o 505.1 [186/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp.o 506.6 [187/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i32_i8_add.cpp.o 512.9 [188/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f64_f32_add.cpp.o 515.5 [189/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp.o 519.1 [190/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_knn_instance.cpp.o 523.0 [191/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp.o 523.2 [192/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp.o 526.8 [193/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_min.cpp.o 531.1 [194/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_add.cpp.o 531.8 [195/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp.o 533.6 [196/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp.o 537.0 [197/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_add.cpp.o 537.5 [198/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_max.cpp.o 541.1 [199/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_mem_v1_default_instance.cpp.o 542.0 [200/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp.o 542.4 [201/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_wmma_f16_f16_f16_exp_comp_mnkpadding_instance.cpp.o 544.2 [202/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i32_i8_add.cpp.o 545.0 [203/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_mem_v2_default_instance.cpp.o 547.9 [204/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp.o 548.7 [205/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp.o 554.1 [206/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_add.cpp.o 556.4 [207/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_mkn_instance.cpp.o 558.3 [208/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_mem_v1_mnkpadding_instance.cpp.o 560.9 [209/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_mem_v2_mnkpadding_instance.cpp.o 562.6 [210/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp.o 564.3 [211/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp.o 567.5 [212/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp.o 569.0 [213/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_max.cpp.o 569.7 [214/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp.o 572.5 [215/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_f64_f64_f64_min.cpp.o 578.4 [216/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i8_i8_max.cpp.o 579.5 [217/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp.o 583.1 [218/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i8_i8_min.cpp.o 585.0 [219/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_mem_v1_mnkpadding_instance.cpp.o 585.4 [220/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp.o 588.6 [221/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_min.cpp.o 589.8 [222/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_mem_v1_default_instance.cpp.o 591.5 [223/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_threadwise_b16_f32_b16_max.cpp.o 599.4 [224/1412] Building CXX object library/src/tensor_operation_instance/gpu/contraction_scale/CMakeFiles/device_contraction_scale_instance.dir/6D/device_contraction_scale_m6_n6_k6_xdl_c_shuffle_bf16_bf16_bf16_mnn_instance.cpp.o 612.7 [225/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_mem_v2_default_instance.cpp.o 619.0 [226/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f16_f16_max.cpp.o 619.7 [227/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_mem_v2_mnkpadding_instance.cpp.o 619.9 [228/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_odd_m_instance.cpp.o 622.4 [229/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_wmma_bf16_bf16_bf16_exp_comp_mnkpadding_instance.cpp.o 628.0 [230/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_wmma_f16_f16_f16_exp_comp_default_instance.cpp.o 634.8 [231/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_odd_m_instance.cpp.o 640.1 [232/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp.o 642.9 [233/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank4_reduce3.cpp.o 647.7 [234/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank3_reduce3.cpp.o 648.5 [235/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeFiles/device_conv2d_bwd_data_instance.dir/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp.o 649.9 [236/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f16_f16_f16_min.cpp.o 651.0 [237/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank3_reduce1.cpp.o 657.2 [238/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank4_reduce2.cpp.o 657.6 [239/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank4_reduce2.cpp.o 663.0 [240/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank3_reduce2.cpp.o 664.3 [241/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank3_reduce1.cpp.o 667.6 [242/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank4_reduce1.cpp.o 669.6 [243/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank3_reduce2.cpp.o 674.3 [244/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank4_reduce4.cpp.o 677.6 [245/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_min.cpp.o 680.0 [246/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank4_reduce1.cpp.o 680.0 [247/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeFiles/device_conv2d_fwd_instance.dir/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp.o 681.9 [248/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_max.cpp.o 684.7 [249/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_wmma/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_wmma_bf16_bf16_bf16_exp_comp_default_instance.cpp.o 686.1 [250/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f16_f16_instance_rank4_reduce4.cpp.o 697.2 [251/1412] Building CXX object library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeFiles/device_conv3d_bwd_data_instance.dir/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp.o 702.6 [252/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank3_reduce3.cpp.o 706.4 [253/1412] Building CXX object library/src/tensor_operation_instance/gpu/softmax/CMakeFiles/device_softmax_instance.dir/device_softmax_f32_f32_instance_rank4_reduce3.cpp.o 707.8 [254/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_comp_instance.cpp.o 722.8 [255/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instance.cpp.o 731.0 [256/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp.o 734.4 [257/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_odd_mn_instance.cpp.o 737.3 [258/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp.o 742.6 [259/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_odd_n_instance.cpp.o 743.7 [260/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i8_i8_min.cpp.o 745.5 [261/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp.o 746.5 [262/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instance.cpp.o 749.4 [263/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instance.cpp.o 750.0 [264/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_max.cpp.o 751.7 [265/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 753.9 [266/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/bf16_bf16_bf16/device_grouped_convnd_bwd_weight_xdl_bf16_bf16_bf16_exp_odd_mn_instance.cpp.o 755.4 [267/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f64_f64_f64_min.cpp.o 755.6 [268/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_comp_large_tensors_instance.cpp.o 761.6 [269/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i8_i8_max.cpp.o 776.2 [270/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_int8_instance.cpp.o 776.3 [271/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp.o 781.2 [272/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp.o 781.4 [273/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp.o 783.1 [274/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_comp_2x_instance.cpp.o 788.0 [275/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp.o 790.5 [276/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp.o 790.6 [277/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instance.cpp.o 793.0 [278/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp.o 794.2 [279/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp.o 794.6 [280/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp.o 795.0 [281/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp.o 795.0 [282/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_int8_instance.cpp.o 797.8 [283/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_1.cpp.o 802.1 [284/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 803.4 [285/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instance.cpp.o 804.2 [286/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_convnd_bwd_weight/CMakeFiles/device_grouped_convnd_bwd_weight_instance.dir/explicit_xdl/fp16_fp16_fp16/device_grouped_convnd_bwd_weight_xdl_f16_f16_f16_exp_odd_n_instance.cpp.o 804.3 [287/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_0.cpp.o 805.9 [288/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_min.cpp.o 808.6 [289/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp.o 812.6 [290/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_2.cpp.o 814.6 [291/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_3.cpp.o 816.4 [292/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 818.6 [293/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_comp_instance.cpp.o 820.5 [294/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_6.cpp.o 822.3 [295/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_max.cpp.o 822.9 [296/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_20.cpp.o 823.2 [297/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances.cpp.o 823.5 [298/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_11.cpp.o 824.2 [299/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_8.cpp.o 824.6 [300/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp.o 824.9 [301/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instance.cpp.o 826.4 [302/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 826.5 [303/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_4.cpp.o 826.7 [304/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_10.cpp.o 830.2 [305/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_12.cpp.o 833.4 [306/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_13.cpp.o 833.5 [307/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_14.cpp.o 834.6 [308/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_16.cpp.o 835.4 [309/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_15.cpp.o 837.0 [310/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_19.cpp.o 838.9 [311/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_instance.cpp.o 838.9 [312/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 840.8 [313/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_18.cpp.o 842.0 [314/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 843.4 [315/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_17.cpp.o 845.4 [316/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 846.3 [317/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_1.cpp.o 847.6 [318/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instance.cpp.o 849.1 [319/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_2.cpp.o 851.4 [320/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_0.cpp.o 851.6 [321/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_20.cpp.o 854.0 [322/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_8.cpp.o 854.8 [323/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances.cpp.o 855.5 [324/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_3.cpp.o 856.1 [325/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_11.cpp.o 858.3 [326/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_10.cpp.o 858.6 [327/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_4.cpp.o 859.9 [328/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_12.cpp.o 861.1 [329/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_13.cpp.o 864.2 [330/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_6.cpp.o 864.8 [331/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_14.cpp.o 866.7 [332/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_18.cpp.o 867.1 [333/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_15.cpp.o 869.1 [334/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_19.cpp.o 869.2 [335/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_16.cpp.o 869.8 [336/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_17.cpp.o 888.6 [337/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 889.0 [338/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_5.cpp.o 916.8 [339/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instance.cpp.o 924.7 [340/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 928.5 [341/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_v3_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 933.2 [342/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instance.cpp.o 933.9 [343/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_v3_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 940.7 [344/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instance.cpp.o 943.7 [345/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_5.cpp.o 948.4 [346/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_16_16_instance.cpp.o 949.0 [347/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_v3_nhwgc_gkyxc_nhwgk_f32_large_tensors_instance.cpp.o 953.1 [348/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/wmma/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_wmma_nhwgc_gkyxc_nhwgk_f16_pipev1_instance.cpp.o 955.6 [349/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp.o 959.2 [350/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_v3_nhwgc_gkyxc_nhwgk_bf16_large_tensors_instance.cpp.o 967.8 [351/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/wmma/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_wmma_nhwgc_gkyxc_nhwgk_bf16_pipev1_instance.cpp.o 969.6 [352/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_v3_nhwgc_gkyxc_nhwgk_f16_large_tensors_instance.cpp.o 972.0 [353/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_nongroup_ported_f16_instance.cpp.o 975.1 [354/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp.o 990.2 [355/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_comp_large_tensors_instance.cpp.o 1003.6 [356/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp.o 1003.9 [357/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_large_tensors_instance.cpp.o 1022.5 [358/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_optimized_loads_instance.cpp.o 1037.3 [359/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_16_16_instance.cpp.o 1046.8 [360/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_16_16_instance.cpp.o 1046.9 [361/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load.cpp.o 1049.4 [362/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_nongroup_ported_bf16_instance.cpp.o 1053.1 [363/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_v3_nhwgc_gkyxc_nhwgk_bf16_16_16_instance.cpp.o 1053.3 [364/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1059.8 [365/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_v3_nhwgc_gkyxc_nhwgk_f16_16_16_instance.cpp.o 1078.7 [366/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp.o 1096.7 [367/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_f32_bf16_instance.cpp.o 1097.5 [368/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1097.9 [369/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_wavelet_default_instance.cpp.o 1098.0 [370/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_wavelet_pad0_instance.cpp.o 1101.3 [371/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_wavelet_4w2_default_instance.cpp.o 1103.9 [372/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1114.8 [373/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load.cpp.o 1120.9 [374/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1124.2 [375/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_wavelet_4w2_pad0_instance.cpp.o 1125.3 [376/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_7.cpp.o 1138.7 [377/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_wavelet_pad0_instance.cpp.o 1139.2 [378/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part4.cpp.o 1139.8 [379/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_wavelet_4w2_default_instance.cpp.o 1140.2 [380/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_wavelet_4w2_pad0_instance.cpp.o 1144.9 [381/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part4.cpp.o 1146.6 [382/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_default_pipev5_instance.cpp.o 1147.2 [383/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_wavelet_default_instance.cpp.o 1151.9 [384/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_default_pipev2_instance.cpp.o 1159.1 [385/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1161.7 [386/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_pad0_pipev2_instance.cpp.o 1166.8 [387/1412] Building CXX object library/src/tensor_operation_instance/gpu/reduce/CMakeFiles/device_reduce_instance.dir/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp.o 1166.9 [388/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part1.cpp.o 1168.7 [389/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_7.cpp.o 1172.7 [390/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_pad0_pipev5_instance.cpp.o 1186.2 [391/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part2.cpp.o 1186.6 [392/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_optimized_loads_instance.cpp.o 1215.0 [393/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part2.cpp.o 1224.5 [394/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_optimized_loads_instance.cpp.o 1224.9 [395/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_inter_instances_shard_9.cpp.o 1250.8 [396/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 1258.0 [397/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instance.cpp.o 1262.5 [398/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part3.cpp.o 1286.9 [399/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_generic_instance.cpp.o 1287.7 [400/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part3.cpp.o 1289.6 [401/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_int8_mem_intra_instances_shard_9.cpp.o 1301.9 [402/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_generic_instance.cpp.o 1307.3 [403/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_2x_instance.cpp.o 1307.6 [404/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_instance.cpp.o 1310.2 [405/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_wave_transfer_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1337.8 [406/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instance.cpp.o 1338.2 [407/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp.o 1339.6 [408/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_comp_instance.cpp.o 1345.2 [409/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_wave_transfer_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1358.5 [410/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp.o 1360.9 [411/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_16x16_instance.cpp.o 1364.4 [412/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part1.cpp.o 1383.8 [413/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp.o 1384.0 [414/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 1388.6 [415/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 1389.0 [416/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp.o 1390.5 [417/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 1393.3 [418/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1394.2 [419/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1396.3 [420/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp.o 1403.3 [421/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev5_irregular_instance.cpp.o 1411.0 [422/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev1_part2_instance.cpp.o 1412.3 [423/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev5_irregular_instance.cpp.o 1412.8 [424/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev1_part2_instance.cpp.o 1413.3 [425/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp.o 1413.8 [426/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_part2_instance.cpp.o 1425.9 [427/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1430.6 [428/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp.o 1431.1 [429/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_default_pipev5_instance.cpp.o 1434.3 [430/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_16x16_instance.cpp.o 1435.5 [431/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev2_irregular_instance.cpp.o 1438.9 [432/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instance.cpp.o 1439.5 [433/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1439.6 [434/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 1441.4 [435/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev1_instance.cpp.o 1443.6 [436/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev2_irregular_instance.cpp.o 1444.5 [437/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_default_pipev5_instance.cpp.o 1452.4 [438/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev1_instance.cpp.o 1456.5 [439/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_pad0_pipev5_instance.cpp.o 1462.1 [440/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1462.6 [441/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 1466.7 [442/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 1474.9 [443/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_pad0_pipev5_instance.cpp.o 1489.2 [444/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_instance.cpp.o 1489.6 [445/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instance.cpp.o 1492.8 [446/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_v3_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1499.1 [447/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_2x_instance.cpp.o 1501.1 [448/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instance.cpp.o 1513.9 [449/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_v3_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1516.3 [450/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_direct_load_instance.cpp.o 1540.9 [451/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_comp_instance.cpp.o 1541.2 [452/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_16x16_instance.cpp.o 1542.3 [453/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_bias_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1548.4 [454/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_default_large_tensors_instance.cpp.o 1551.3 [455/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_generic_instance.cpp.o 1553.7 [456/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_bias_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 1553.9 [457/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp.o 1555.3 [458/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_generic_instance.cpp.o 1555.8 [459/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/large_tensor/device_grouped_conv2d_fwd_bias_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 1556.7 [460/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instance.cpp.o 1565.8 [461/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 1569.0 [462/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instance.cpp.o 1577.0 [463/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instance.cpp.o 1579.3 [464/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_pad0_pipev2_instance.cpp.o 1594.4 [465/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1598.0 [466/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_comp_part2_instance.cpp.o 1600.5 [467/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instances.cpp.o 1603.7 [468/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_bias_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 1610.7 [469/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_16x16_instance.cpp.o 1614.7 [470/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/comp/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instance.cpp.o 1615.9 [471/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp.o 1619.2 [472/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_2x_instances_shard_0.cpp.o 1623.6 [473/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_pad0_pipev2_instance.cpp.o 1625.4 [474/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_instance.cpp.o 1627.6 [475/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_mem_inter_instance.cpp.o 1627.8 [476/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_default_pipev2_instance.cpp.o 1628.1 [477/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instance.cpp.o 1628.5 [478/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1632.6 [479/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 1645.3 [480/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_0.cpp.o 1647.9 [481/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_2.cpp.o 1648.5 [482/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_mem_intra_instance.cpp.o 1650.2 [483/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances.cpp.o 1652.7 [484/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_1.cpp.o 1654.3 [485/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_default_pipev2_instance.cpp.o 1661.8 [486/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_3.cpp.o 1675.5 [487/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances.cpp.o 1680.6 [488/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_2x_instances.cpp.o 1681.1 [489/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_direct_load_instance.cpp.o 1681.2 [490/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_4.cpp.o 1684.3 [491/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_9.cpp.o 1685.1 [492/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/wmma/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1687.1 [493/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_1.cpp.o 1689.0 [494/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_5.cpp.o 1690.6 [495/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_10.cpp.o 1690.8 [496/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_7.cpp.o 1692.6 [497/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_instances.cpp.o 1695.3 [498/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_8.cpp.o 1696.0 [499/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_instances_shard_6.cpp.o 1698.6 [500/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1698.8 [501/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1700.2 [502/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_0.cpp.o 1700.8 [503/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_2.cpp.o 1704.1 [504/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_bias_bn_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1706.9 [505/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_instances_shard_0.cpp.o 1709.8 [506/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1709.9 [507/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_bias_bn_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 1709.9 [508/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/large_tensor/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1715.4 [509/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_2x_instances_shard_0.cpp.o 1717.4 [510/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_bias_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1717.6 [511/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_3.cpp.o 1730.0 [512/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_comp_part2_instances_shard_4.cpp.o 1735.8 [513/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances.cpp.o 1742.4 [514/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances.cpp.o 1742.9 [515/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances.cpp.o 1750.1 [516/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_5.cpp.o 1750.3 [517/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_0.cpp.o 1756.0 [518/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_6.cpp.o 1756.4 [519/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev5_instance.cpp.o 1757.3 [520/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_1.cpp.o 1758.0 [521/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances_shard_0.cpp.o 1761.0 [522/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_3.cpp.o 1762.1 [523/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances_shard_2.cpp.o 1764.6 [524/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_2.cpp.o 1765.9 [525/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances_shard_1.cpp.o 1766.7 [526/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_10.cpp.o 1768.4 [527/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_7.cpp.o 1768.7 [528/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_0.cpp.o 1769.1 [529/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_comp_instances_shard_3.cpp.o 1771.5 [530/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_8.cpp.o 1772.2 [531/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev5_instance.cpp.o 1773.0 [532/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_4.cpp.o 1774.1 [533/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_11.cpp.o 1774.7 [534/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_3.cpp.o 1774.9 [535/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_1.cpp.o 1777.8 [536/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/comp/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_comp_part2_instances_shard_9.cpp.o 1781.6 [537/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/merged_groups/device_grouped_conv2d_fwd_bias_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_fp16_instance.cpp.o 1784.2 [538/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_2.cpp.o 1785.2 [539/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances_shard_2.cpp.o 1787.4 [540/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part4.cpp.o 1788.8 [541/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_5.cpp.o 1789.0 [542/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances.cpp.o 1790.7 [543/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_8.cpp.o 1792.0 [544/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_4.cpp.o 1794.3 [545/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances_shard_3.cpp.o 1795.8 [546/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances.cpp.o 1796.3 [547/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_10.cpp.o 1797.7 [548/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_7.cpp.o 1799.7 [549/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_6.cpp.o 1805.5 [550/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_11.cpp.o 1809.8 [551/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_9.cpp.o 1810.5 [552/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_13.cpp.o 1813.9 [553/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_12.cpp.o 1814.2 [554/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_2.cpp.o 1814.3 [555/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances_shard_0.cpp.o 1814.4 [556/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_1.cpp.o 1814.7 [557/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_0.cpp.o 1814.9 [558/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_mem_inter_instance.cpp.o 1816.7 [559/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_16x16_instances_shard_1.cpp.o 1817.0 [560/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_14.cpp.o 1817.1 [561/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_instances_shard_15.cpp.o 1818.4 [562/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances.cpp.o 1819.7 [563/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/wmma/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_wmma_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 1825.8 [564/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_3.cpp.o 1826.7 [565/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_4.cpp.o 1829.4 [566/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp32_mem_intra_instance.cpp.o 1829.7 [567/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_8.cpp.o 1830.6 [568/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instances.cpp.o 1830.9 [569/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part1.cpp.o 1833.3 [570/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_6.cpp.o 1833.3 [571/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_5.cpp.o 1833.5 [572/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part3.cpp.o 1833.7 [573/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_7.cpp.o 1833.7 [574/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_10.cpp.o 1834.0 [575/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part4.cpp.o 1834.3 [576/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_11.cpp.o 1835.6 [577/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_13.cpp.o 1837.1 [578/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_9.cpp.o 1837.3 [579/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_12.cpp.o 1837.6 [580/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instances_shard_1.cpp.o 1837.7 [581/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_14.cpp.o 1840.3 [582/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_instances_shard_15.cpp.o 1842.7 [583/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instances_shard_0.cpp.o 1842.7 [584/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances_shard_3.cpp.o 1842.9 [585/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_16x16_instances_shard_2.cpp.o 1846.9 [586/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances_shard_2.cpp.o 1849.1 [587/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances.cpp.o 1850.0 [588/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_3.cpp.o 1850.3 [589/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part2.cpp.o 1850.8 [590/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_5.cpp.o 1851.3 [591/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances_shard_1.cpp.o 1851.3 [592/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances.cpp.o 1851.8 [593/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_1.cpp.o 1853.5 [594/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_0.cpp.o 1854.0 [595/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_8.cpp.o 1854.5 [596/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_7.cpp.o 1854.7 [597/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_9.cpp.o 1855.0 [598/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f32_instances.cpp.o 1855.1 [599/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_2.cpp.o 1855.6 [600/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instances.cpp.o 1855.7 [601/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_11.cpp.o 1855.8 [602/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_12.cpp.o 1856.8 [603/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_4.cpp.o 1857.0 [604/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_16x16_instances_shard_0.cpp.o 1857.6 [605/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_6.cpp.o 1858.4 [606/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_10.cpp.o 1860.1 [607/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instances_shard_1.cpp.o 1860.2 [608/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instances_shard_2.cpp.o 1860.2 [609/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f16_instances_shard_0.cpp.o 1860.3 [610/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_15.cpp.o 1860.8 [611/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part2.cpp.o 1862.0 [612/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp.o 1862.3 [613/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances_shard_2.cpp.o 1863.5 [614/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances_shard_1.cpp.o 1865.1 [615/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f32_instances_shard_0.cpp.o 1865.2 [616/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_13.cpp.o 1867.1 [617/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_bf16_instances_shard_0.cpp.o 1867.1 [618/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_1.cpp.o 1869.1 [619/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_large_tensor_nhwgc_gkyxc_nhwgk_f32_instances_shard_1.cpp.o 1869.8 [620/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_instances_shard_14.cpp.o 1871.1 [621/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances.cpp.o 1873.9 [622/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_4.cpp.o 1878.7 [623/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_7.cpp.o 1879.9 [624/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_mem_inter_instance.cpp.o 1880.0 [625/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_3.cpp.o 1880.7 [626/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_9.cpp.o 1880.8 [627/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_6.cpp.o 1881.4 [628/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_2.cpp.o 1884.1 [629/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_5.cpp.o 1884.4 [630/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_0.cpp.o 1885.1 [631/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_8.cpp.o 1885.6 [632/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_10.cpp.o 1886.0 [633/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances.cpp.o 1886.8 [634/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_2.cpp.o 1888.1 [635/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_11.cpp.o 1889.2 [636/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_direct_load_instances_shard_11.cpp.o 1890.2 [637/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_0.cpp.o 1891.2 [638/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_1.cpp.o 1891.9 [639/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_3.cpp.o 1894.1 [640/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_18.cpp.o 1895.0 [641/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_6.cpp.o 1895.0 [642/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_12.cpp.o 1895.0 [643/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp.o 1895.7 [644/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_8.cpp.o 1896.1 [645/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_13.cpp.o 1896.5 [646/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_1.cpp.o 1897.3 [647/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_14.cpp.o 1899.6 [648/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_19.cpp.o 1899.7 [649/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_4.cpp.o 1900.0 [650/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_10.cpp.o 1900.1 [651/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_15.cpp.o 1900.3 [652/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_2.cpp.o 1900.7 [653/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_17.cpp.o 1902.2 [654/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_0.cpp.o 1902.3 [655/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_3.cpp.o 1902.4 [656/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_16.cpp.o 1906.0 [657/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_4.cpp.o 1906.7 [658/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances.cpp.o 1911.3 [659/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_6.cpp.o 1912.3 [660/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_10.cpp.o 1915.1 [661/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_11.cpp.o 1915.3 [662/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_13.cpp.o 1915.7 [663/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_16.cpp.o 1915.9 [664/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_14.cpp.o 1915.9 [665/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_mem_intra_instance.cpp.o 1916.2 [666/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_8.cpp.o 1917.4 [667/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_18.cpp.o 1917.7 [668/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_12.cpp.o 1917.9 [669/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances.cpp.o 1918.1 [670/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_15.cpp.o 1918.1 [671/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_17.cpp.o 1919.3 [672/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_1.cpp.o 1920.5 [673/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_2.cpp.o 1923.9 [674/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_19.cpp.o 1924.5 [675/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_8.cpp.o 1924.8 [676/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_3.cpp.o 1925.1 [677/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_10.cpp.o 1925.4 [678/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_7.cpp.o 1925.6 [679/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_9.cpp.o 1927.2 [680/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_6.cpp.o 1927.8 [681/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_11.cpp.o 1928.0 [682/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_5.cpp.o 1928.7 [683/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part3.cpp.o 1929.2 [684/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_4.cpp.o 1930.0 [685/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_2.cpp.o 1930.0 [686/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_direct_load_instances_shard_0.cpp.o 1931.4 [687/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_1.cpp.o 1931.8 [688/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_large_tensors_instance.cpp.o 1932.8 [689/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances.cpp.o 1933.3 [690/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_pipev2_instance.cpp.o 1934.5 [691/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_4.cpp.o 1935.2 [692/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_0.cpp.o 1935.7 [693/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_3.cpp.o 1939.0 [694/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_8.cpp.o 1939.9 [695/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_11.cpp.o 1940.2 [696/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_bf16_large_tensors_instance.cpp.o 1942.0 [697/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_6.cpp.o 1943.9 [698/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instance.cpp.o 1946.6 [699/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_10.cpp.o 1946.7 [700/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_18.cpp.o 1948.1 [701/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_1.cpp.o 1949.5 [702/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_15.cpp.o 1949.8 [703/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances.cpp.o 1951.7 [704/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_5.cpp.o 1952.0 [705/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_14.cpp.o 1952.1 [706/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_12.cpp.o 1954.6 [707/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_2.cpp.o 1954.6 [708/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_16.cpp.o 1955.2 [709/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_3.cpp.o 1955.2 [710/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_17.cpp.o 1955.3 [711/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_19.cpp.o 1957.2 [712/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_11.cpp.o 1957.8 [713/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_10.cpp.o 1958.0 [714/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part1.cpp.o 1958.1 [715/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_13.cpp.o 1958.1 [716/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_8.cpp.o 1960.0 [717/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_0.cpp.o 1960.1 [718/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_4.cpp.o 1960.2 [719/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_12.cpp.o 1962.7 [720/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_19.cpp.o 1962.9 [721/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_18.cpp.o 1963.2 [722/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_6.cpp.o 1964.4 [723/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_17.cpp.o 1965.9 [724/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_13.cpp.o 1965.9 [725/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_7.cpp.o 1966.5 [726/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_16.cpp.o 1966.7 [727/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_2.cpp.o 1966.8 [728/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_3.cpp.o 1967.8 [729/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_0.cpp.o 1968.6 [730/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_14.cpp.o 1969.1 [731/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_5.cpp.o 1969.4 [732/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances.cpp.o 1969.7 [733/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_15.cpp.o 1969.9 [734/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_1.cpp.o 1971.9 [735/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_6.cpp.o 1973.0 [736/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_5.cpp.o 1975.1 [737/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_9.cpp.o 1976.7 [738/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_4.cpp.o 1978.5 [739/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_10.cpp.o 1980.0 [740/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_8.cpp.o 1980.3 [741/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances.cpp.o 1980.5 [742/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_mem_inter_instance.cpp.o 1980.6 [743/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instances_shard_9.cpp.o 1981.8 [744/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp.o 1982.2 [745/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_11.cpp.o 1982.5 [746/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances.cpp.o 1982.5 [747/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_12.cpp.o 1982.6 [748/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_14.cpp.o 1982.9 [749/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_15.cpp.o 1983.3 [750/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_13.cpp.o 1983.6 [751/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_2.cpp.o 1984.3 [752/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_fp16_mem_intra_instance.cpp.o 1984.7 [753/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_7.cpp.o 1985.5 [754/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_3.cpp.o 1985.6 [755/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances.cpp.o 1987.2 [756/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_0.cpp.o 1987.8 [757/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_two_stage_xdl_nhwgc_gkyxc_nhwgk_f16_pipev2_instance.cpp.o 1988.0 [758/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_1.cpp.o 1988.6 [759/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instances.cpp.o 1989.1 [760/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/xdl/mem/device_grouped_conv2d_fwd_bias_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_inter_instance.cpp.o 1989.2 [761/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_10.cpp.o 1989.4 [762/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_11.cpp.o 1989.7 [763/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_9.cpp.o 1989.9 [764/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_8.cpp.o 1990.8 [765/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_5.cpp.o 1991.2 [766/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_6.cpp.o 1991.9 [767/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_4.cpp.o 1992.7 [768/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_13.cpp.o 1994.4 [769/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_14.cpp.o 1994.6 [770/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_15.cpp.o 1995.0 [771/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_7.cpp.o 1995.0 [772/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_12.cpp.o 1996.8 [773/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part2.cpp.o 2001.6 [774/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instances_shard_2.cpp.o 2002.2 [775/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instances_shard_1.cpp.o 2003.4 [776/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f32_instances_shard_0.cpp.o 2003.6 [777/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_5.cpp.o 2003.9 [778/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part4.cpp.o 2006.1 [779/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2008.1 [780/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_bf16_mem_intra_instances_shard_9.cpp.o 2008.2 [781/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f32_instance.cpp.o 2009.2 [782/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_int8_instance.cpp.o 2010.1 [783/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_nhwgc_instance.dir/xdl/device_grouped_conv2d_fwd_xdl_dynamic_op_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 2021.2 [784/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_5.cpp.o 2021.5 [785/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_inter_instances_shard_7.cpp.o 2024.7 [786/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_inter_instances_shard_9.cpp.o 2025.9 [787/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_default_large_tensors_instance.cpp.o 2026.2 [788/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part2.cpp.o 2027.0 [789/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_7.cpp.o 2027.4 [790/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 2029.3 [791/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instance.cpp.o 2032.9 [792/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_weight_nhwgc_instance.dir/xdl/nhwgc_gkyxc_nhwgk/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_default_large_tensors_instance.cpp.o 2034.5 [793/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instance.cpp.o 2035.1 [794/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f32_mem_intra_instances_shard_7.cpp.o 2043.9 [795/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/mem/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_nhwgc_gkyxc_nhwgk_f16_mem_intra_instances_shard_9.cpp.o 2053.0 [796/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part4.cpp.o 2057.1 [797/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 2059.9 [798/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_large_tensors_instance.cpp.o 2062.4 [799/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances_shard_1.cpp.o 2071.2 [800/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances_shard_2.cpp.o 2082.0 [801/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_dynamic_op_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 2082.2 [802/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp.o 2082.6 [803/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances_shard_2.cpp.o 2085.0 [804/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part3.cpp.o 2088.1 [805/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances_shard_1.cpp.o 2089.1 [806/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instance.cpp.o 2089.1 [807/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_f16_instances_shard_0.cpp.o 2089.7 [808/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp.o 2090.4 [809/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_1.cpp.o 2100.3 [810/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_11.cpp.o 2105.8 [811/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_2.cpp.o 2107.8 [812/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_3.cpp.o 2108.6 [813/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_dynamic_op/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_dynamic_op_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_wmma_dynamic_op_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2111.8 [814/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances.cpp.o 2112.4 [815/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 2116.8 [816/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_4.cpp.o 2117.4 [817/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 2125.5 [818/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_bnorm_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_bnorm_clamp_nhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv2d_fwd_bias_bn_clamp_xdl_merged_groups_nhwgc_gkyxc_nhwgk_bf16_instances_shard_0.cpp.o 2127.8 [819/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_f16_instance_part1.cpp.o 2128.5 [820/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_9.cpp.o 2132.6 [821/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2136.4 [822/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2137.9 [823/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2139.2 [824/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp.o 2140.4 [825/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances.cpp.o 2146.7 [826/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_1.cpp.o 2147.4 [827/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_7.cpp.o 2147.8 [828/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2148.2 [829/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_2.cpp.o 2149.9 [830/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_fp8_instance.cpp.o 2151.1 [831/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_5.cpp.o 2151.6 [832/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_4.cpp.o 2153.7 [833/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_6.cpp.o 2154.8 [834/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_3.cpp.o 2156.5 [835/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2161.0 [836/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part3.cpp.o 2166.5 [837/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_8.cpp.o 2167.5 [838/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf8_instance.cpp.o 2178.2 [839/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_fp8_bf8_instance.cpp.o 2178.9 [840/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf8_fp8_instance.cpp.o 2179.7 [841/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2186.7 [842/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_10.cpp.o 2187.7 [843/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_8.cpp.o 2189.8 [844/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_0.cpp.o 2208.8 [845/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_7.cpp.o 2220.3 [846/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_6.cpp.o 2226.8 [847/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_5.cpp.o 2237.9 [848/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd_bias_clamp/nhwgc/CMakeFiles/device_grouped_conv2d_fwd_bias_clamp_nhwgc_instance.dir/wmma/device_grouped_conv2d_fwd_bias_clamp_wmma_cshufflev3_nhwgc_gkyxc_nhwgk_bf16_instance_part1.cpp.o 2241.4 [849/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instance.cpp.o 2244.4 [850/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instance.cpp.o 2251.4 [851/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16_16_instance.cpp.o 2264.5 [852/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_11.cpp.o 2265.1 [853/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_0.cpp.o 2265.1 [854/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_10.cpp.o 2280.4 [855/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_v3_ndhwgc_gkzyxc_ndhwgk_f32_large_tensors_instance.cpp.o 2282.7 [856/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_9.cpp.o 2283.9 [857/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/wmma/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_wmma_ndhwgc_gkzyxc_ndhwgk_bf16_pipev1_instance.cpp.o 2298.1 [858/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/wmma/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_wmma_ndhwgc_gkzyxc_ndhwgk_f16_pipev1_instance.cpp.o 2299.3 [859/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_v3_ndhwgc_gkzyxc_ndhwgk_bf16_large_tensors_instance.cpp.o 2304.5 [860/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_v3_ndhwgc_gkzyxc_ndhwgk_f16_large_tensors_instance.cpp.o 2307.2 [861/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_optimized_loads_instance.cpp.o 2313.1 [862/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16_16_instance.cpp.o 2328.6 [863/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16_16_instance.cpp.o 2330.2 [864/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_f32_bf16_instance.cpp.o 2338.8 [865/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 2340.5 [866/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2342.2 [867/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.o 2352.4 [868/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.o 2352.6 [869/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_ndhwgc_gkzyxc_ndhwgk_f16_16_16_instance.cpp.o 2353.5 [870/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2353.9 [871/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_ndhwgc_gkzyxc_ndhwgk_bf16_16_16_instance.cpp.o 2355.0 [872/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_wavelet_default_instance.cpp.o 2356.5 [873/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_wavelet_pad0_instance.cpp.o 2358.3 [874/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp.o 2359.7 [875/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2362.8 [876/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_wavelet_pad0_instance.cpp.o 2363.8 [877/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2365.1 [878/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_wavelet_default_instance.cpp.o 2370.2 [879/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_default_pipev2_instance.cpp.o 2377.3 [880/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_default_pipev5_instance.cpp.o 2377.5 [881/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_pad0_pipev2_instance.cpp.o 2377.8 [882/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp.o 2377.9 [883/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2378.3 [884/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_large_tensors_instance.cpp.o 2378.8 [885/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_pad0_pipev5_instance.cpp.o 2384.5 [886/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/comp/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_large_tensors_instance.cpp.o 2407.8 [887/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_optimized_loads_instance.cpp.o 2408.7 [888/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 2411.4 [889/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 2413.6 [890/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/nhwgc/CMakeFiles/device_grouped_conv2d_bwd_data_nhwgc_instance.dir/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.o 2419.9 [891/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp.o 2422.3 [892/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_optimized_loads_instance.cpp.o 2437.3 [893/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 2447.7 [894/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 2455.0 [895/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2455.5 [896/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 2465.6 [897/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 2466.4 [898/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev2_irregular_instance.cpp.o 2472.3 [899/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev5_irregular_instance.cpp.o 2478.2 [900/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 2478.4 [901/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2479.2 [902/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 2482.3 [903/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 2487.3 [904/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev1_part2_instance.cpp.o 2492.0 [905/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev1_instance.cpp.o 2492.5 [906/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pad0_pipev5_instance.cpp.o 2495.3 [907/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 2497.2 [908/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev1_part2_instance.cpp.o 2505.2 [909/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.o 2512.2 [910/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev1_instance.cpp.o 2514.0 [911/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pad0_pipev5_instance.cpp.o 2515.8 [912/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev5_irregular_instance.cpp.o 2518.1 [913/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.o 2521.9 [914/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev2_irregular_instance.cpp.o 2521.9 [915/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp.o 2523.2 [916/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_generic_instance.cpp.o 2527.6 [917/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/comp/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_comp_instance.cpp.o 2535.8 [918/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_16x16_instance.cpp.o 2535.9 [919/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_generic_instance.cpp.o 2536.2 [920/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pad0_pipev2_instance.cpp.o 2539.4 [921/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_default_pipev5_instance.cpp.o 2541.4 [922/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp.o 2541.5 [923/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2542.3 [924/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 2542.5 [925/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 2543.1 [926/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 2546.4 [927/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2551.3 [928/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.o 2552.8 [929/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_default_pipev5_instance.cpp.o 2556.9 [930/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2558.9 [931/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pad0_pipev2_instance.cpp.o 2559.0 [932/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_16x16_instance.cpp.o 2559.7 [933/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp.o 2567.2 [934/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 2567.6 [935/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2570.0 [936/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 2572.0 [937/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 2575.3 [938/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instance.cpp.o 2576.4 [939/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/comp/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_comp_instance.cpp.o 2579.1 [940/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_default_large_tensors_instance.cpp.o 2579.6 [941/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instance.cpp.o 2581.9 [942/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/comp/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp.o 2584.2 [943/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp.o 2586.0 [944/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2586.5 [945/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 2588.4 [946/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2593.0 [947/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 2596.6 [948/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2608.0 [949/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2610.0 [950/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 2613.1 [951/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp.o 2619.9 [952/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/wmma/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2620.3 [953/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_bias_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 2621.8 [954/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 2624.1 [955/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_bias_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2627.6 [956/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/large_tensor/device_grouped_conv3d_fwd_bias_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 2628.8 [957/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_16x16_instance.cpp.o 2628.9 [958/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/comp/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_comp_instance.cpp.o 2630.8 [959/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_generic_instance.cpp.o 2631.4 [960/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_generic_instance.cpp.o 2632.0 [961/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_default_pipev2_instance.cpp.o 2641.4 [962/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_default_pipev2_instance.cpp.o 2645.3 [963/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_2x_instances.cpp.o 2654.1 [964/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 2656.6 [965/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_bias_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 2658.5 [966/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_2x_instances_shard_0.cpp.o 2659.0 [967/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 2659.1 [968/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_0.cpp.o 2661.5 [969/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_16x16_instance.cpp.o 2662.1 [970/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_1.cpp.o 2665.6 [971/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev5_instance.cpp.o 2673.3 [972/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_2.cpp.o 2675.4 [973/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instance.cpp.o 2676.4 [974/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_mem_intra_instance.cpp.o 2677.7 [975/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2684.9 [976/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_mem_inter_instance.cpp.o 2690.8 [977/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_instance.cpp.o 2691.3 [978/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_bias_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 2693.3 [979/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_instance.cpp.o 2693.4 [980/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances.cpp.o 2696.4 [981/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_4.cpp.o 2697.2 [982/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_3.cpp.o 2697.5 [983/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev5_instance.cpp.o 2698.2 [984/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/comp/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_comp_instance.cpp.o 2705.2 [985/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/merged_groups/device_grouped_conv3d_fwd_bias_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2711.5 [986/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/comp/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instance.cpp.o 2711.9 [987/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_8.cpp.o 2715.4 [988/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 2716.8 [989/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_2x_instances.cpp.o 2718.5 [990/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances.cpp.o 2720.3 [991/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances.cpp.o 2720.8 [992/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2722.9 [993/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2724.7 [994/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_7.cpp.o 2725.4 [995/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_5.cpp.o 2726.0 [996/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_9.cpp.o 2728.5 [997/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_6.cpp.o 2729.0 [998/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_instances_shard_10.cpp.o 2730.8 [999/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2733.3 [1000/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_3.cpp.o 2737.8 [1001/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 2740.3 [1002/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_2.cpp.o 2740.8 [1003/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_4.cpp.o 2744.5 [1004/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_1.cpp.o 2744.7 [1005/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_comp_part2_instances_shard_0.cpp.o 2748.8 [1006/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/large_tensor/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2755.4 [1007/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_2x_instances_shard_0.cpp.o 2756.2 [1008/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_2.cpp.o 2758.2 [1009/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_1.cpp.o 2759.4 [1010/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances.cpp.o 2759.4 [1011/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances.cpp.o 2762.0 [1012/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_instances_shard_0.cpp.o 2762.3 [1013/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_0.cpp.o 2774.2 [1014/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_3.cpp.o 2779.7 [1015/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances.cpp.o 2786.6 [1016/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_4.cpp.o 2787.5 [1017/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_5.cpp.o 2802.2 [1018/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_8.cpp.o 2803.2 [1019/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_10.cpp.o 2805.3 [1020/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances_shard_2.cpp.o 2806.4 [1021/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_9.cpp.o 2807.3 [1022/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_7.cpp.o 2807.5 [1023/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances_shard_3.cpp.o 2811.7 [1024/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances_shard_0.cpp.o 2812.8 [1025/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_6.cpp.o 2814.3 [1026/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_comp_instances_shard_1.cpp.o 2819.1 [1027/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/comp/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_part2_instances_shard_11.cpp.o 2830.0 [1028/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_0.cpp.o 2831.2 [1029/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_7.cpp.o 2834.5 [1030/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_9.cpp.o 2834.6 [1031/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_1.cpp.o 2839.9 [1032/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_5.cpp.o 2841.3 [1033/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_4.cpp.o 2841.9 [1034/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_2.cpp.o 2844.4 [1035/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/wmma/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2846.1 [1036/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances.cpp.o 2846.7 [1037/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_6.cpp.o 2849.0 [1038/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_3.cpp.o 2849.3 [1039/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_8.cpp.o 2851.4 [1040/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances.cpp.o 2853.8 [1041/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_10.cpp.o 2861.0 [1042/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_mem_inter_instance.cpp.o 2865.8 [1043/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_11.cpp.o 2873.7 [1044/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_12.cpp.o 2874.8 [1045/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_14.cpp.o 2877.7 [1046/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances_shard_3.cpp.o 2881.9 [1047/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_15.cpp.o 2884.2 [1048/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances_shard_2.cpp.o 2887.6 [1049/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp32_mem_intra_instance.cpp.o 2892.4 [1050/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances.cpp.o 2893.2 [1051/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_13.cpp.o 2897.3 [1052/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances_shard_1.cpp.o 2897.7 [1053/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_2.cpp.o 2906.6 [1054/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_0.cpp.o 2910.2 [1055/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_4.cpp.o 2911.4 [1056/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_16x16_instances_shard_0.cpp.o 2916.4 [1057/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_11.cpp.o 2918.6 [1058/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_1.cpp.o 2919.8 [1059/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_5.cpp.o 2920.5 [1060/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_3.cpp.o 2922.4 [1061/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_7.cpp.o 2926.1 [1062/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances_shard_3.cpp.o 2926.6 [1063/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_12.cpp.o 2927.2 [1064/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_6.cpp.o 2927.5 [1065/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instances.cpp.o 2928.9 [1066/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_10.cpp.o 2931.4 [1067/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_9.cpp.o 2933.3 [1068/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 2934.5 [1069/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_14.cpp.o 2937.3 [1070/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_8.cpp.o 2942.1 [1071/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances_shard_1.cpp.o 2942.2 [1072/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_13.cpp.o 2943.5 [1073/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 2943.8 [1074/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_15.cpp.o 2947.4 [1075/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instances_shard_2.cpp.o 2947.7 [1076/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instances_shard_0.cpp.o 2954.4 [1077/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_16x16_instances_shard_1.cpp.o 2954.8 [1078/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 2957.4 [1079/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances_shard_2.cpp.o 2961.7 [1080/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_3.cpp.o 2962.1 [1081/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances.cpp.o 2963.7 [1082/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_2.cpp.o 2969.8 [1083/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_1.cpp.o 2972.4 [1084/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 2973.4 [1085/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances.cpp.o 2977.6 [1086/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instances.cpp.o 2979.2 [1087/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 2979.3 [1088/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_4.cpp.o 2979.7 [1089/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_16x16_instances_shard_0.cpp.o 2981.9 [1090/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_5.cpp.o 2982.5 [1091/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_6.cpp.o 2986.2 [1092/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_11.cpp.o 2986.8 [1093/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instances.cpp.o 2987.2 [1094/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_0.cpp.o 2987.9 [1095/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_0.cpp.o 2989.9 [1096/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_7.cpp.o 2990.7 [1097/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_10.cpp.o 2992.6 [1098/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_8.cpp.o 3000.0 [1099/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_1.cpp.o 3001.6 [1100/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_9.cpp.o 3003.8 [1101/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_0.cpp.o 3004.6 [1102/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_13.cpp.o 3005.4 [1103/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_0.cpp.o 3006.3 [1104/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_15.cpp.o 3007.0 [1105/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_1.cpp.o 3007.2 [1106/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_1.cpp.o 3007.5 [1107/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_2.cpp.o 3009.9 [1108/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_14.cpp.o 3010.4 [1109/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_12.cpp.o 3011.8 [1110/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 3017.7 [1111/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/large_tensor/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_large_tensor_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_2.cpp.o 3023.8 [1112/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances.cpp.o 3034.1 [1113/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_0.cpp.o 3044.7 [1114/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_2.cpp.o 3048.2 [1115/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_3.cpp.o 3049.5 [1116/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 3050.3 [1117/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_1.cpp.o 3064.3 [1118/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_18.cpp.o 3066.4 [1119/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_17.cpp.o 3068.0 [1120/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_15.cpp.o 3068.6 [1121/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_6.cpp.o 3068.9 [1122/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_4.cpp.o 3069.1 [1123/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 3070.7 [1124/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_12.cpp.o 3071.3 [1125/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_11.cpp.o 3073.1 [1126/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_19.cpp.o 3075.4 [1127/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_16.cpp.o 3076.1 [1128/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_8.cpp.o 3076.3 [1129/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_8.cpp.o 3077.4 [1130/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_14.cpp.o 3077.9 [1131/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_1.cpp.o 3078.5 [1132/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp.o 3078.6 [1133/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_13.cpp.o 3081.3 [1134/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_0.cpp.o 3081.3 [1135/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_10.cpp.o 3093.7 [1136/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_2.cpp.o 3095.2 [1137/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances.cpp.o 3095.5 [1138/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_6.cpp.o 3096.5 [1139/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_10.cpp.o 3098.1 [1140/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_4.cpp.o 3101.7 [1141/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_11.cpp.o 3102.4 [1142/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_12.cpp.o 3104.2 [1143/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_mem_inter_instance.cpp.o 3106.4 [1144/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_13.cpp.o 3109.6 [1145/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_15.cpp.o 3110.2 [1146/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_3.cpp.o 3118.2 [1147/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_16.cpp.o 3122.5 [1148/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_18.cpp.o 3123.4 [1149/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_14.cpp.o 3133.5 [1150/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_2.cpp.o 3141.9 [1151/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_1.cpp.o 3142.0 [1152/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_0.cpp.o 3146.4 [1153/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances.cpp.o 3146.6 [1154/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp.o 3152.7 [1155/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_17.cpp.o 3156.0 [1156/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_19.cpp.o 3157.2 [1157/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 3157.4 [1158/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_3.cpp.o 3164.4 [1159/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_10.cpp.o 3169.3 [1160/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 3170.8 [1161/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_4.cpp.o 3171.0 [1162/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_15.cpp.o 3174.1 [1163/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_14.cpp.o 3174.6 [1164/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_mem_intra_instance.cpp.o 3174.8 [1165/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_16.cpp.o 3178.6 [1166/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_11.cpp.o 3179.2 [1167/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_18.cpp.o 3179.3 [1168/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_6.cpp.o 3180.0 [1169/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_2.cpp.o 3180.2 [1170/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_8.cpp.o 3182.1 [1171/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_17.cpp.o 3182.8 [1172/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_13.cpp.o 3183.7 [1173/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_12.cpp.o 3184.2 [1174/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_4.cpp.o 3187.9 [1175/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_1.cpp.o 3193.8 [1176/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_0.cpp.o 3195.1 [1177/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_large_tensors_instance.cpp.o 3203.0 [1178/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_3.cpp.o 3204.3 [1179/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_19.cpp.o 3206.9 [1180/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances.cpp.o 3207.9 [1181/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_large_tensors_instance.cpp.o 3210.1 [1182/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_10.cpp.o 3220.6 [1183/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_6.cpp.o 3222.1 [1184/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_8.cpp.o 3227.8 [1185/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_13.cpp.o 3231.7 [1186/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances.cpp.o 3231.9 [1187/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_11.cpp.o 3233.8 [1188/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_15.cpp.o 3238.0 [1189/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_16.cpp.o 3238.9 [1190/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_12.cpp.o 3239.7 [1191/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_19.cpp.o 3241.5 [1192/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_18.cpp.o 3245.0 [1193/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_17.cpp.o 3246.9 [1194/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_14.cpp.o 3253.4 [1195/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_f16_pipev2_instance.cpp.o 3254.2 [1196/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_5.cpp.o 3254.4 [1197/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_6.cpp.o 3257.4 [1198/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_7.cpp.o 3260.2 [1199/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.o 3260.8 [1200/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_11.cpp.o 3260.9 [1201/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_1.cpp.o 3263.3 [1202/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_15.cpp.o 3263.4 [1203/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_0.cpp.o 3263.9 [1204/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 3265.3 [1205/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_2.cpp.o 3266.2 [1206/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_9.cpp.o 3266.4 [1207/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_14.cpp.o 3267.6 [1208/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_7.cpp.o 3268.1 [1209/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_3.cpp.o 3268.2 [1210/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_0.cpp.o 3269.6 [1211/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_10.cpp.o 3270.6 [1212/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_12.cpp.o 3272.5 [1213/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_two_stage_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_pipev2_instance.cpp.o 3274.9 [1214/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_4.cpp.o 3276.0 [1215/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_8.cpp.o 3276.5 [1216/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_13.cpp.o 3277.6 [1217/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_1.cpp.o 3282.0 [1218/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances.cpp.o 3284.7 [1219/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_5.cpp.o 3284.7 [1220/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_mem_inter_instance.cpp.o 3284.7 [1221/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_2.cpp.o 3285.7 [1222/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instance.cpp.o 3285.8 [1223/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_3.cpp.o 3288.3 [1224/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances.cpp.o 3288.8 [1225/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances.cpp.o 3290.2 [1226/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_9.cpp.o 3291.7 [1227/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_4.cpp.o 3292.5 [1228/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_5.cpp.o 3295.8 [1229/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instances.cpp.o 3296.1 [1230/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_10.cpp.o 3301.1 [1231/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_8.cpp.o 3302.0 [1232/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_6.cpp.o 3304.2 [1233/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_14.cpp.o 3309.3 [1234/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_13.cpp.o 3310.1 [1235/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_5.cpp.o 3311.5 [1236/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_11.cpp.o 3314.4 [1237/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_15.cpp.o 3316.7 [1238/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_5.cpp.o 3316.7 [1239/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instance.cpp.o 3319.0 [1240/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_12.cpp.o 3328.9 [1241/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_intra_instances_shard_9.cpp.o 3330.4 [1242/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_2.cpp.o 3336.8 [1243/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_1.cpp.o 3337.8 [1244/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f32_instances_shard_0.cpp.o 3347.5 [1245/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_7.cpp.o 3348.3 [1246/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3348.8 [1247/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3350.6 [1248/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_5.cpp.o 3352.6 [1249/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_mem_inter_instances_shard_9.cpp.o 3353.3 [1250/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 3353.4 [1251/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 3355.6 [1252/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_7.cpp.o 3359.7 [1253/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_dynamic_op_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 3363.9 [1254/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/xdl/mem/device_grouped_conv3d_fwd_bias_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_fp16_mem_intra_instance.cpp.o 3364.5 [1255/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_inter_instances_shard_9.cpp.o 3375.4 [1256/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_inter_instances_shard_7.cpp.o 3382.7 [1257/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_default_large_tensors_instance.cpp.o 3405.7 [1258/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f32_mem_intra_instances_shard_7.cpp.o 3408.7 [1259/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 3410.4 [1260/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/mem/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_ndhwgc_gkzyxc_ndhwgk_f16_mem_intra_instances_shard_9.cpp.o 3413.4 [1261/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_2.cpp.o 3415.0 [1262/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 3418.2 [1263/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 3421.3 [1264/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3428.0 [1265/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 3430.3 [1266/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_convscale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_ndhwgc_gkzyxc_ndhwgk_bf8_instance.cpp.o 3430.5 [1267/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3430.7 [1268/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convinvscale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_convinvscale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convinvscale_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 3431.3 [1269/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3432.4 [1270/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 3434.8 [1271/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_convscale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_combconvscale_ndhwgc_gkzyxc_ndhwgk_f8_f8_f32_instance.cpp.o 3435.8 [1272/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 3436.2 [1273/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 3437.5 [1274/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_add/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_convscale_add_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_add_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 3440.6 [1275/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3441.6 [1276/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_1.cpp.o 3445.7 [1277/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 3447.0 [1278/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.o 3447.1 [1279/1412] cd /__w/rockrel/rockrel/build/ml-libs/composable_kernel/build && /usr/local/therock-tools/bin/cmake -E rm -f /__w/rockrel/rockrel/build/ml-libs/composable_kernel/stamp/stage.stamp 3448.4 [1280/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_1.cpp.o 3452.5 [1281/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_convscale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_ndhwgc_gkzyxc_ndhwgk_bf8_f8_instance.cpp.o 3454.2 [1282/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_convscale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 3454.6 [1283/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_ndhwgc_instance.dir/xdl/ndhwgc_gkzyxc_ndhwgk/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_default_large_tensors_instance.cpp.o 3454.9 [1284/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_relu/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_convscale_relu_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_combconvscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_f8_f32_instance.cpp.o 3455.1 [1285/1412] Building CXX object library/src/utility/CMakeFiles/utility.dir/device_memory.cpp.o 3456.2 [1286/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_convscale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_ndhwgc_gkzyxc_ndhwgk_f8_bf8_instance.cpp.o 3460.0 [1287/1412] Building CXX object library/src/utility/CMakeFiles/utility.dir/convolution_parameter.cpp.o 3460.1 [1288/1412] Building CXX object library/src/utility/CMakeFiles/utility.dir/host_tensor.cpp.o 3460.2 [1289/1412] Linking CXX static library lib/libdevice_other_operations.a 3460.4 [1290/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_2.cpp.o 3461.2 [1291/1412] Linking CXX static library lib/libdevice_contraction_operations.a 3462.1 [1292/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_convscale_relu/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_convscale_relu_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_convscale_relu_ndhwgc_gkzyxc_ndhwgk_f8_instance.cpp.o 3470.0 [1293/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_dynamic_op_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3490.4 [1294/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_bf16_instances_shard_0.cpp.o 3496.1 [1295/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp.o 3503.0 [1296/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3503.5 [1297/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 3507.0 [1298/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_bnorm_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_bnorm_clamp_ndhwgc_instance.dir/generated/xdl/merged_groups/device_grouped_conv3d_fwd_bias_bn_clamp_xdl_merged_groups_ndhwgc_gkzyxc_ndhwgk_f16_instances_shard_0.cpp.o 3507.7 [1299/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_dynamic_op/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_dynamic_op_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_dynamic_op_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3509.5 [1300/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3514.2 [1301/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.o 3518.7 [1302/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 3522.3 [1303/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3532.9 [1304/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_scaleadd_relu_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3563.6 [1305/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 3589.0 [1306/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.o 3593.8 [1307/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 3624.9 [1308/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 3629.1 [1309/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3632.0 [1310/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bias_clamp/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bias_clamp_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_bias_clamp_wmma_cshufflev3_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 3659.9 [1311/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_f32_bf16_instance.cpp.o 3662.5 [1312/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3672.8 [1313/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp.o 3684.0 [1314/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 3696.2 [1315/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 3715.8 [1316/1412] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp.o 3716.3 [1317/1412] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp.o 3749.2 [1318/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.o 3749.5 [1319/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 3771.2 [1320/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 3772.5 [1321/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.o 3772.7 [1322/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_wmma_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.o 3782.4 [1323/1412] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp.o 3796.9 [1324/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 3807.8 [1325/1412] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp.o 3827.7 [1326/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp.o 3829.5 [1327/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.o 3838.4 [1328/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_f32_bf16_instance.cpp.o 3842.5 [1329/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.o 3847.4 [1330/1412] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp.o 3868.6 [1331/1412] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp.o 3901.4 [1332/1412] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp.o 3903.3 [1333/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 3907.6 [1334/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_weight_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3910.8 [1335/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.o 3912.4 [1336/1412] Building CXX object library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeFiles/device_batched_gemm_reduce_instance.dir/device_batched_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp.o 3914.1 [1337/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_comp_default_instance.cpp.o 3917.0 [1338/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.o 3917.5 [1339/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3919.1 [1340/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_wmma_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.o 3923.2 [1341/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.o 3925.4 [1342/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.o 3925.5 [1343/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_wmma_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.o 3936.9 [1344/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 3937.9 [1345/1412] Linking CXX static library lib/libdevice_reduction_operations.a 3938.0 [1346/1412] Linking CXX static library lib/libutility.a 3941.9 [1347/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.o 3944.9 [1348/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.o 3945.9 [1349/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_comp_kpadding_instance.cpp.o 3953.8 [1350/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 3959.2 [1351/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeFiles/device_gemm_reduce_instance.dir/device_gemm_reduce_wmma_cshuffle_v3_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.o 3960.3 [1352/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_comp_mnpadding_instance.cpp.o 3965.3 [1353/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_comp_mnkpadding_instance.cpp.o 3966.2 [1354/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 3981.2 [1355/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_wmma_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.o 3991.0 [1356/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part4.cpp.o 3999.1 [1357/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeFiles/device_gemm_bias_add_reduce_instance.dir/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.o 4002.9 [1358/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part3.cpp.o 4005.3 [1359/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_comp_default_instance.cpp.o 4022.5 [1360/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part2.cpp.o 4040.1 [1361/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_comp_kpadding_instance.cpp.o 4040.3 [1362/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part3.cpp.o 4059.6 [1363/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part4.cpp.o 4061.4 [1364/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_comp_mnpadding_instance.cpp.o 4068.9 [1365/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_comp_mnpadding_instance.cpp.o 4078.2 [1366/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_comp_default_instance.cpp.o 4083.4 [1367/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_comp_kpadding_instance.cpp.o 4087.3 [1368/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/conv2d_fwd/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp.o 4087.8 [1369/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_comp_mnkpadding_instance.cpp.o 4092.4 [1370/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_comp_mnkpadding_instance.cpp.o 4099.5 [1371/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/conv2d_fwd/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp.o 4101.1 [1372/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance_part1.cpp.o 4140.9 [1373/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 4146.2 [1374/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp.o 4152.2 [1375/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_scaleadd_ab_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_cshufflev3_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 4165.2 [1376/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/conv2d_fwd/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp.o 4177.7 [1377/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4179.6 [1378/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part1.cpp.o 4186.4 [1379/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_mem_v2_default_instance.cpp.o 4220.7 [1380/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_fwd_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_fwd_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance_part2.cpp.o 4227.2 [1381/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_mem_v2_kpadding_instance.cpp.o 4242.0 [1382/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4249.4 [1383/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_wmma_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp.o 4255.0 [1384/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_wmma_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp.o 4267.7 [1385/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v2_default_instance.cpp.o 4268.0 [1386/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_i8_bf16/device_gemm_xdl_universal_bf16_i8_bf16_mk_kn_mn_mem_v2_mnkpadding_instance.cpp.o 4270.7 [1387/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v1_kpadding_instance.cpp.o 4274.1 [1388/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_mem_v2_default_instance.cpp.o 4294.5 [1389/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_mem_v2_kpadding_instance.cpp.o 4298.7 [1390/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_wmma_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp.o 4300.6 [1391/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_wmma_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp.o 4302.3 [1392/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v1_default_instance.cpp.o 4321.3 [1393/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_data_wmma_v3_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4322.4 [1394/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v1_mnkpadding_instance.cpp.o 4323.6 [1395/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v2_kpadding_instance.cpp.o 4330.5 [1396/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_bf16_bf16_bf16/device_gemm_xdl_universal_bf16_bf16_bf16_mk_kn_mn_mem_v2_mnkpadding_instance.cpp.o 4349.7 [1397/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_xdl_universal_f16_f16_f16/device_gemm_xdl_universal_f16_f16_f16_mk_kn_mn_mem_v2_mnkpadding_instance.cpp.o 4387.2 [1398/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4396.4 [1399/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_bilinear_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4411.7 [1400/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_data_scale_ndhwgc_instance.dir/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4486.7 [1401/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp.o 4546.9 [1402/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp.o 4553.0 [1403/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_weight_wmma_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4556.4 [1404/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_scale/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_scale_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_weight_wmma_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4582.9 [1405/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_wmma_universal_bf16_bf16_bf16/device_gemm_wmma_universal_bf16_bf16_bf16_mk_kn_mn_comp_default_instance.cpp.o 4584.6 [1406/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp.o 4593.3 [1407/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_wmma_universal_f16_f16_f16/device_gemm_wmma_universal_f16_f16_f16_mk_kn_mn_comp_default_instance.cpp.o 4630.0 [1408/1412] Building CXX object library/src/tensor_operation_instance/gpu/quantization/CMakeFiles/device_quantization_instance.dir/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp.o 4704.4 [1409/1412] Building CXX object library/src/tensor_operation_instance/gpu/gemm_universal_reduce/CMakeFiles/device_gemm_universal_reduce_instance.dir/device_gemm_wmma_universal_bf16_i8_bf16/device_gemm_wmma_universal_bf16_i8_bf16_mk_kn_mn_comp_default_instance.cpp.o 4800.2 [1410/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_weight_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.o 4827.4 [1411/1412] Building CXX object library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight_bilinear/ndhwgc/CMakeFiles/device_grouped_conv3d_bwd_weight_bilinear_ndhwgc_instance.dir/wmma/device_grouped_conv3d_bwd_weight_wmma_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.o 4829.9 [1412/1412] Linking CXX static library lib/libdevice_conv_operations.a END 1782130588.2890766 4829.939811944962 0