37 #include "NE10_imgproc.h"
39 #include "unit_test_common.h"
41 #define BASIC_KERNEL_SIZE 5
42 #define KERNEL_COUNT BASIC_KERNEL_SIZE * BASIC_KERNEL_SIZE
44 ne10_float32_t cal_psnr_uint8_rgba (
const ne10_uint8_t *pRef,
45 const ne10_uint8_t *pTest,
46 const ne10_uint32_t buffSize)
48 ne10_float64_t mse = 0.0, max = 255.0;
50 ne10_float32_t psnr_value;
52 for (i = 0; i < buffSize; i++)
54 mse += (pRef[i] - pTest[i]) * (pRef[i] - pTest[i]);
56 mse = mse / buffSize / 4;
57 psnr_value = 10 * log10 (max * max / mse);
61 int rand_range (
int min,
int max)
64 return (
int) ( ( (double) (diff + 1) / RAND_MAX) * rand() + min);
70 for (i = 0; i < size; i++)
72 if (kernels[i].x < 1 || kernels[i].y < 1)
83 int valid_result (
const ne10_uint8_t *image1,
84 const ne10_uint8_t *image2,
86 ne10_int32_t src_stride,
89 assert ((image1 != 0) && (image2 != 0));
90 assert ((src_sz.x != 0) && (src_sz.y != 0)
91 && (src_stride != 0) && (channel != 0));
93 ne10_int32_t *diff_mat = (ne10_int32_t *) malloc (
sizeof (ne10_int32_t)
97 ne10_int32_t diff_mat_stride =
sizeof (ne10_int32_t) * channel * src_sz.x;
101 printf (
"**ERROR**: allocating %d bytes memory for kernels fails!",
102 sizeof (ne10_int32_t)
118 ne10_int32_t diff_nu = diff_count ( (
const ne10_int32_t *) diff_mat,
125 ne10_float32_t psnr_value = cal_psnr_uint8_rgba (image1,
130 if (diff_nu != 0 && psnr_value < PSNR_THRESHOLD)
132 printf (
"\ndifferent point is:%d\t PSNR value is:%f\n",
133 diff_nu, psnr_value);
142 void boxfilter_get_kernels (
size_t max_kernel_length,
147 if (max_kernel_length > BASIC_KERNEL_SIZE)
149 *size = KERNEL_COUNT + 3;
151 else if (max_kernel_length < BASIC_KERNEL_SIZE)
153 *size = max_kernel_length * max_kernel_length;
157 *size = KERNEL_COUNT;
161 if (*kernels_ptr == 0)
163 printf (
"**ERROR**: allocating %d bytes memory for kernels fails!\n",
167 int x, y, first_part_size;
169 if (max_kernel_length < BASIC_KERNEL_SIZE)
171 first_part_size = max_kernel_length;
175 first_part_size = BASIC_KERNEL_SIZE;
178 for (x = 0; x < first_part_size; x++)
180 for (y = 0; y < first_part_size; y++)
182 (*kernels_ptr) [x * first_part_size + y].x = x + 1;
183 (*kernels_ptr) [x * first_part_size + y].y = y + 1;
192 if (max_kernel_length > BASIC_KERNEL_SIZE)
194 (*kernels_ptr) [*size - 3].x = max_kernel_length;
195 (*kernels_ptr) [*size - 3].y = 1;
196 (*kernels_ptr) [*size - 2].x = 1;
197 (*kernels_ptr) [*size - 2].y = max_kernel_length;
198 (*kernels_ptr) [*size - 1].x = max_kernel_length;
199 (*kernels_ptr) [*size - 1].y = max_kernel_length;
202 assert (valid_kernels (*kernels_ptr, *size) == NE10_OK);
205 void create_rgba8888_image (ne10_uint8_t **img,
ne10_size_t src_sz)
207 assert ( (src_sz.x != 0) || (src_sz.y != 0));
209 int size =
sizeof (ne10_uint8_t) * src_sz.x * src_sz.y * 4;
211 *img = (ne10_uint8_t *) NE10_MALLOC (
sizeof (ne10_uint8_t) *
215 for (i = 0; i < size; i++)
217 * (*img + i) = rand_range (0, 255);
220 assert (*img != NULL);
223 int boxfilter_conformance_test (
ne10_size_t src_sz)
225 assert ( (src_sz.x != 0) || (src_sz.y != 0));
227 printf (
"\ntest boxfilter on image with size:%d x %d:\n",
230 int max_kernel_length = src_sz.x < src_sz.y ?
232 max_kernel_length = max_kernel_length < ( (1 << 7) - 1) ?
233 max_kernel_length : ( (1 << 7) - 1);
237 boxfilter_get_kernels (max_kernel_length, &kernels, &kernels_size);
239 ne10_uint8_t *src, *neon_dst, *c_dst;
240 create_rgba8888_image (&src, src_sz);
241 create_rgba8888_image (&neon_dst, src_sz);
242 create_rgba8888_image (&c_dst, src_sz);
243 ne10_int32_t stride = src_sz.x * 4 *
sizeof (ne10_uint8_t);
246 for (i = 0; i < kernels_size; i++)
248 printf (
"test kernel size(%d x %d):",
249 kernels[i].x, kernels[i].y);
264 assert_true (valid_result (c_dst,
279 void boxfilter_performance_test (
ne10_size_t img_size,
281 long int *neon_ticks,
286 ne10_uint8_t *src, *neon_dst, *c_dst;
287 create_rgba8888_image (&src, img_size);
288 create_rgba8888_image (&neon_dst, img_size);
289 create_rgba8888_image (&c_dst, img_size);
290 ne10_int32_t stride = img_size.x * 4 *
sizeof (ne10_uint8_t);
294 for (i = 0; i < run_loop; i++)
305 *c_ticks = ticks / run_loop;
308 for (i = 0; i < run_loop; i++)
319 *neon_ticks = ticks / run_loop;
322 void test_boxfilter_performance_case()
324 ne10_size_t img_sizes[] = {{240, 320}, {480, 320}, {960, 1280},
325 {1200, 1600}, {2000, 2000}
327 ne10_size_t kernel_sizes[] = {{3, 3}, {5, 5}, {7, 7}, {9, 9}};
329 int i, j, n_img, n_kernel;
330 n_img =
sizeof (img_sizes) /
sizeof (img_sizes[0]);
331 n_kernel =
sizeof (kernel_sizes) /
sizeof (kernel_sizes[0]);
332 long int neon_ticks, c_ticks;
335 for (i = 0; i < n_img; i++)
337 for (j = 0; j < n_kernel; j++)
339 boxfilter_performance_test (img_sizes[i],
347 img_sizes[i].x, img_sizes[i].y,
348 kernel_sizes[j].x, kernel_sizes[j].y);
350 ne10_performance_print (UBUNTU_COMMAND_LINE,
358 void test_boxfilter_smoke_case()
360 ne10_size_t img_sizes[] = {{1, 1}, {2, 2}, {8, 3}, {10, 19},
363 int n =
sizeof (img_sizes) /
sizeof (img_sizes[0]);
365 for (i = 0; i < n; i++)
367 boxfilter_conformance_test (img_sizes[i]);
372 void test_boxfilter_regression_case()
374 ne10_size_t img_sizes[] = {{1, 1}, {2, 2}, {8, 3}, {10, 19},
375 {239, 319}, {240, 320}, {480, 640},
376 {969, 1280}, {1200, 1600}
378 int n =
sizeof (img_sizes) /
sizeof (img_sizes[0]);
380 for (i = 0; i < n; i++)
382 boxfilter_conformance_test (img_sizes[i]);
386 void test_boxfilter()
388 #if defined (SMOKE_TEST)
389 test_boxfilter_smoke_case();
392 #if defined (REGRESSION_TEST)
393 test_boxfilter_regression_case();
396 #if defined PERFORMANCE_TEST
397 test_boxfilter_performance_case();
401 static void my_test_setup (
void)
403 ne10_log_buffer_ptr = ne10_log_buffer;
406 void test_fixture_boxfilter (
void)
408 test_fixture_start();
410 fixture_setup (my_test_setup);
412 run_test (test_boxfilter);
void ne10_img_boxfilter_rgba8888_neon(const ne10_uint8_t *src, ne10_uint8_t *dst, ne10_size_t src_size, ne10_int32_t src_stride, ne10_int32_t dst_stride, ne10_size_t kernel_size)
neon optimized box filter
void ne10_img_boxfilter_rgba8888_c(const ne10_uint8_t *src, ne10_uint8_t *dst, ne10_size_t src_size, ne10_int32_t src_stride, ne10_int32_t dst_stride, ne10_size_t kernel_size)
box filter