Spaces:
Runtime error
Runtime error
// Copyright (C) 2018-2022 Intel Corporation | |
// SPDX-License-Identifier: Apache-2.0 | |
// | |
__kernel void reorg_chw( | |
__global const half *restrict src, | |
__global half *restrict dst, | |
int W, | |
int H, | |
int C, | |
int stride) | |
{ | |
__local half local_src[8 * 1024]; | |
__local half local_dst[8 * 1024]; | |
event_t e1 = async_work_group_copy_2D2D( | |
local_src, // dst | |
src + get_group_id(1) * W * stride | |
+ get_group_id(0) * W * stride * stride, // src | |
W * stride, // num_elements_per_line, | |
get_local_size(0), // num_lines, | |
W * stride * (stride * get_num_groups(0) - 1), // src_line_stride, | |
0, // dst_line_stride, | |
0); | |
wait_group_events(1, &e1); | |
const int c = get_local_id(0); | |
const int stride_x = get_local_id(1); | |
const int srcIdx = stride_x + c * W * stride; | |
const int dstIdx = stride_x * W * get_local_size(0) + c * W; | |
int x = 0; | |
for (; x <= W - 8; x += 8) { | |
half8 data = (half8){ | |
local_src[srcIdx + (x + 0) * stride], | |
local_src[srcIdx + (x + 1) * stride], | |
local_src[srcIdx + (x + 2) * stride], | |
local_src[srcIdx + (x + 3) * stride], | |
local_src[srcIdx + (x + 4) * stride], | |
local_src[srcIdx + (x + 5) * stride], | |
local_src[srcIdx + (x + 6) * stride], | |
local_src[srcIdx + (x + 7) * stride]}; | |
*((__local half8 *)(&local_dst[dstIdx + x])) = data; | |
} | |
for (; x < W; x++) { | |
local_dst[dstIdx + x] = local_src[srcIdx + x * stride]; | |
} | |
barrier(CLK_LOCAL_MEM_FENCE); | |
event_t e2 = async_work_group_copy_2D2D( | |
dst + get_group_id(0) * W | |
+ get_group_id(1) * W * stride * get_global_size(0), // dst | |
local_dst, // src | |
W, // num_elements_per_line | |
get_local_size(0) * stride, // num_lines | |
0, // src_line_stride | |
W * (get_num_groups(0) - 1), // dst_line_stride | |
0); | |
wait_group_events(1, &e2); | |
} | |