yuv-to-rgb.c is not correct though it is faster

2022-01-29 20:45:53 +08:00 · 2022-01-29 20:45:53 +08:00 · 9de2944d6b
commit 9de2944d6b
parent bf9badb10e
6 changed files with 1473 additions and 13 deletions
--- a/src/connection.ts
+++ b/src/connection.ts
@ -239,7 +239,6 @@ export default class Connection {

  draw(frame: any) {
    this._draw?.(frame);
-    // globals.I420ToARGB(frame);
  }

  close() {
--- a/src/globals.js
+++ b/src/globals.js
@ -328,17 +328,18 @@ export function I420ToARGB(yb) {
    vPtr = malloc(n);
  }
  HEAPU8.set(yb.v.bytes, vPtr);
-  const w = yb.format.width;
-  const h = yb.format.height;
+  const w = yb.format.displayWidth;
+  const h = yb.format.displayHeight;
  n = w * h * 4;
  if (outPtrLen != n) {
    if (outPtr) free(outPtr);
    outPtrLen = n;
    outPtr = malloc(n);
+    HEAPU8.fill(255, outPtr, outPtr + n);
  }
  // const res = wasmExports.I420ToARGB(yPtr, yb.y.stride, uPtr, yb.u.stride, vPtr, yb.v.stride, outPtr, w * 4, w, h);
-  const res = wasmExports.AVX_YUV_to_RGBA(outPtr, yPtr, uPtr, vPtr, w, h);
-  // const res = wasmExports.yuv420_rgb24_std(w, h, yPtr, uPtr, vPtr, yb.y.stride, yb.v.stride, outPtr, w * 4, 0);
+  // const res = wasmExports.AVX_YUV_to_ARGB(outPtr, yPtr, yb.y.stride, uPtr, yb.u.stride, vPtr, yb.v.stride, w, h);
+  const res = wasmExports.yuv420_rgb24_std(w, h, yPtr, uPtr, vPtr, yb.y.stride, yb.v.stride, outPtr, w * 4, 1);
  const out = HEAPU8.slice(outPtr, outPtr + n);
  /*
  testSpeed[1] += new Date().getTime() - tm0;
--- a/yuv-to-rgb.c
+++ b/yuv-to-rgb.c
@ -45,7 +45,7 @@ static int foo;
 static int frame;

 void
-AVX_YUV_to_RGBA(unsigned char *dst, unsigned char *y, unsigned char* u, unsigned char* v, int width, int height) {
+AVX_YUV_to_ARGB(unsigned char *dst, unsigned char *y, int ystride, unsigned char* u, int ustride, unsigned char* v, int vstride, int width, int height) {
    int             r, g, b;
    unsigned char   *uline, *vline;
    int             w, h;
@ -54,27 +54,27 @@ AVX_YUV_to_RGBA(unsigned char *dst, unsigned char *y, unsigned char* u, unsigned
        initialized = !0;
        build_tables();
    }
-    int half_width = width / 2;
    // Loop the image, taking into account sub-sample for the chroma channels
    for (h = 0; h < height; h++) {
        uline = u;
        vline = v;
-        for (w = 0; w < width; w++, y++) {
+        for (w = 0; w < width; w++) {
            r = *y + T1[*vline];
            g = *y + T2[*vline] + T3[*uline];
            b = *y + T4[*uline];
-            *dst++ = clamp(r);     // 16-bit to 8-bit, chuck precision
+            *dst++ = clamp(b);     // 16-bit to 8-bit, chuck precision
            *dst++ = clamp(g);
-            *dst++ = clamp(b);
-            *dst++ = 255;
+            *dst++ = clamp(r);
+            ++dst;
            if (w & 0x01) {
                uline++;
                vline++;
            }
        }
+        y += ystride;
        if (h & 0x01) {
-            u += half_width;
-            v += half_width;
+            u += ustride;
+            v += vstride;
        }
    }
 }
--- a/yuv.wasm
+++ b/yuv.wasm
--- a/yuv_rgb.c
+++ b/yuv_rgb.c
--- a/yuv_rgb.h
+++ b/yuv_rgb.h
@ -0,0 +1,155 @@
+// Copyright 2016 Adrien Descamps
+// Distributed under BSD 3-Clause License
+
+// Provide optimized functions to convert images from 8bits yuv420 to rgb24 format
+
+// There are a few slightly different variations of the YCbCr color space with different parameters that 
+// change the conversion matrix.
+// The three most common YCbCr color space, defined by BT.601, BT.709 and JPEG standard are implemented here.
+// See the respective standards for details
+// The matrix values used are derived from http://www.equasys.de/colorconversion.html
+
+// YUV420 is stored as three separate channels, with U and V (Cb and Cr) subsampled by a 2 factor
+// For conversion from yuv to rgb, no interpolation is done, and the same UV value are used for 4 rgb pixels. This 
+// is suboptimal for image quality, but by far the fastest method.
+
+// For all methods, width and height should be even, if not, the last row/column of the result image won't be affected.
+// For sse methods, if the width if not divisable by 32, the last (width%32) pixels of each line won't be affected.
+
+#include <stdint.h>
+
+typedef enum
+{
+	YCBCR_JPEG,
+	YCBCR_601,
+	YCBCR_709
+} YCbCrType;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// yuv to rgb, standard c implementation
+void yuv420_rgb24_std(
+	uint32_t width, uint32_t height, 
+	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
+	uint8_t *rgb, uint32_t rgb_stride, 
+	YCbCrType yuv_type);
+
+// yuv to rgb, yuv in nv12 semi planar format
+void nv12_rgb24_std(
+	uint32_t width, uint32_t height,
+	const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
+	uint8_t *rgb, uint32_t rgb_stride,
+	YCbCrType yuv_type);
+
+// yuv to rgb, yuv in nv12 semi planar format
+void nv21_rgb24_std(
+	uint32_t width, uint32_t height,
+	const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
+	uint8_t *rgb, uint32_t rgb_stride,
+	YCbCrType yuv_type);
+
+// yuv to rgb, sse implementation
+// pointers must be 16 byte aligned, and strides must be divisable by 16
+void yuv420_rgb24_sse(
+	uint32_t width, uint32_t height, 
+	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
+	uint8_t *rgb, uint32_t rgb_stride, 
+	YCbCrType yuv_type);
+
+// yuv to rgb, sse implementation
+// pointers do not need to be 16 byte aligned
+void yuv420_rgb24_sseu(
+	uint32_t width, uint32_t height, 
+	const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
+	uint8_t *rgb, uint32_t rgb_stride, 
+	YCbCrType yuv_type);
+
+// yuv nv12 to rgb, sse implementation
+// pointers must be 16 byte aligned, and strides must be divisable by 16
+void nv12_rgb24_sse(
+	uint32_t width, uint32_t height, 
+	const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride, 
+	uint8_t *rgb, uint32_t rgb_stride, 
+	YCbCrType yuv_type);
+
+// yuv nv12 to rgb, sse implementation
+// pointers do not need to be 16 byte aligned
+void nv12_rgb24_sseu(
+	uint32_t width, uint32_t height, 
+	const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride, 
+	uint8_t *rgb, uint32_t rgb_stride, 
+	YCbCrType yuv_type);
+
+// yuv nv21 to rgb, sse implementation
+// pointers must be 16 byte aligned, and strides must be divisable by 16
+void nv21_rgb24_sse(
+	uint32_t width, uint32_t height, 
+	const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride, 
+	uint8_t *rgb, uint32_t rgb_stride, 
+	YCbCrType yuv_type);
+
+// yuv nv21 to rgb, sse implementation
+// pointers do not need to be 16 byte aligned
+void nv21_rgb24_sseu(
+	uint32_t width, uint32_t height, 
+	const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride, 
+	uint8_t *rgb, uint32_t rgb_stride, 
+	YCbCrType yuv_type);
+
+
+
+
+// rgb to yuv, standard c implementation
+void rgb24_yuv420_std(
+	uint32_t width, uint32_t height, 
+	const uint8_t *rgb, uint32_t rgb_stride, 
+	uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
+	YCbCrType yuv_type);
+
+// rgb to yuv, sse implementation
+// pointers must be 16 byte aligned, and strides must be divisible by 16
+void rgb24_yuv420_sse(
+	uint32_t width, uint32_t height, 
+	const uint8_t *rgb, uint32_t rgb_stride, 
+	uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
+	YCbCrType yuv_type);
+
+// rgb to yuv, sse implementation
+// pointers do not need to be 16 byte aligned
+void rgb24_yuv420_sseu(
+	uint32_t width, uint32_t height, 
+	const uint8_t *rgb, uint32_t rgb_stride, 
+	uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
+	YCbCrType yuv_type);
+
+// rgba to yuv, standard c implementation
+// alpha channel is ignored
+void rgb32_yuv420_std(
+	uint32_t width, uint32_t height, 
+	const uint8_t *rgba, uint32_t rgba_stride, 
+	uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
+	YCbCrType yuv_type);
+
+// rgba to yuv, sse implementation
+// pointers must be 16 byte aligned, and strides must be divisible by 16
+// alpha channel is ignored
+void rgb32_yuv420_sse(
+	uint32_t width, uint32_t height, 
+	const uint8_t *rgba, uint32_t rgba_stride, 
+	uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
+	YCbCrType yuv_type);
+
+// rgba to yuv, sse implementation
+// pointers do not need to be 16 byte aligned
+// alpha channel is ignored
+void rgb32_yuv420_sseu(
+	uint32_t width, uint32_t height, 
+	const uint8_t *rgba, uint32_t rgba_stride, 
+	uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride, 
+	YCbCrType yuv_type);
+
+#ifdef __cplusplus
+}
+#endif