yuv-to-rgb.c is not correct though it is faster

This commit is contained in:
rustdesk 2022-01-29 20:45:53 +08:00
parent bf9badb10e
commit 9de2944d6b
6 changed files with 1473 additions and 13 deletions

View File

@ -239,7 +239,6 @@ export default class Connection {
draw(frame: any) {
this._draw?.(frame);
// globals.I420ToARGB(frame);
}
close() {

View File

@ -328,17 +328,18 @@ export function I420ToARGB(yb) {
vPtr = malloc(n);
}
HEAPU8.set(yb.v.bytes, vPtr);
const w = yb.format.width;
const h = yb.format.height;
const w = yb.format.displayWidth;
const h = yb.format.displayHeight;
n = w * h * 4;
if (outPtrLen != n) {
if (outPtr) free(outPtr);
outPtrLen = n;
outPtr = malloc(n);
HEAPU8.fill(255, outPtr, outPtr + n);
}
// const res = wasmExports.I420ToARGB(yPtr, yb.y.stride, uPtr, yb.u.stride, vPtr, yb.v.stride, outPtr, w * 4, w, h);
const res = wasmExports.AVX_YUV_to_RGBA(outPtr, yPtr, uPtr, vPtr, w, h);
// const res = wasmExports.yuv420_rgb24_std(w, h, yPtr, uPtr, vPtr, yb.y.stride, yb.v.stride, outPtr, w * 4, 0);
// const res = wasmExports.AVX_YUV_to_ARGB(outPtr, yPtr, yb.y.stride, uPtr, yb.u.stride, vPtr, yb.v.stride, w, h);
const res = wasmExports.yuv420_rgb24_std(w, h, yPtr, uPtr, vPtr, yb.y.stride, yb.v.stride, outPtr, w * 4, 1);
const out = HEAPU8.slice(outPtr, outPtr + n);
/*
testSpeed[1] += new Date().getTime() - tm0;

View File

@ -45,7 +45,7 @@ static int foo;
static int frame;
void
AVX_YUV_to_RGBA(unsigned char *dst, unsigned char *y, unsigned char* u, unsigned char* v, int width, int height) {
AVX_YUV_to_ARGB(unsigned char *dst, unsigned char *y, int ystride, unsigned char* u, int ustride, unsigned char* v, int vstride, int width, int height) {
int r, g, b;
unsigned char *uline, *vline;
int w, h;
@ -54,27 +54,27 @@ AVX_YUV_to_RGBA(unsigned char *dst, unsigned char *y, unsigned char* u, unsigned
initialized = !0;
build_tables();
}
int half_width = width / 2;
// Loop the image, taking into account sub-sample for the chroma channels
for (h = 0; h < height; h++) {
uline = u;
vline = v;
for (w = 0; w < width; w++, y++) {
for (w = 0; w < width; w++) {
r = *y + T1[*vline];
g = *y + T2[*vline] + T3[*uline];
b = *y + T4[*uline];
*dst++ = clamp(r); // 16-bit to 8-bit, chuck precision
*dst++ = clamp(b); // 16-bit to 8-bit, chuck precision
*dst++ = clamp(g);
*dst++ = clamp(b);
*dst++ = 255;
*dst++ = clamp(r);
++dst;
if (w & 0x01) {
uline++;
vline++;
}
}
y += ystride;
if (h & 0x01) {
u += half_width;
v += half_width;
u += ustride;
v += vstride;
}
}
}

BIN
yuv.wasm

Binary file not shown.

1305
yuv_rgb.c Normal file

File diff suppressed because it is too large Load Diff

155
yuv_rgb.h Normal file
View File

@ -0,0 +1,155 @@
// Copyright 2016 Adrien Descamps
// Distributed under BSD 3-Clause License
// Provide optimized functions to convert images from 8bits yuv420 to rgb24 format
// There are a few slightly different variations of the YCbCr color space with different parameters that
// change the conversion matrix.
// The three most common YCbCr color space, defined by BT.601, BT.709 and JPEG standard are implemented here.
// See the respective standards for details
// The matrix values used are derived from http://www.equasys.de/colorconversion.html
// YUV420 is stored as three separate channels, with U and V (Cb and Cr) subsampled by a 2 factor
// For conversion from yuv to rgb, no interpolation is done, and the same UV value are used for 4 rgb pixels. This
// is suboptimal for image quality, but by far the fastest method.
// For all methods, width and height should be even, if not, the last row/column of the result image won't be affected.
// For sse methods, if the width if not divisable by 32, the last (width%32) pixels of each line won't be affected.
#include <stdint.h>
typedef enum
{
YCBCR_JPEG,
YCBCR_601,
YCBCR_709
} YCbCrType;
#ifdef __cplusplus
extern "C" {
#endif
// yuv to rgb, standard c implementation
void yuv420_rgb24_std(
uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride,
YCbCrType yuv_type);
// yuv to rgb, yuv in nv12 semi planar format
void nv12_rgb24_std(
uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride,
YCbCrType yuv_type);
// yuv to rgb, yuv in nv12 semi planar format
void nv21_rgb24_std(
uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride,
YCbCrType yuv_type);
// yuv to rgb, sse implementation
// pointers must be 16 byte aligned, and strides must be divisable by 16
void yuv420_rgb24_sse(
uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride,
YCbCrType yuv_type);
// yuv to rgb, sse implementation
// pointers do not need to be 16 byte aligned
void yuv420_rgb24_sseu(
uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *u, const uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride,
YCbCrType yuv_type);
// yuv nv12 to rgb, sse implementation
// pointers must be 16 byte aligned, and strides must be divisable by 16
void nv12_rgb24_sse(
uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride,
YCbCrType yuv_type);
// yuv nv12 to rgb, sse implementation
// pointers do not need to be 16 byte aligned
void nv12_rgb24_sseu(
uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride,
YCbCrType yuv_type);
// yuv nv21 to rgb, sse implementation
// pointers must be 16 byte aligned, and strides must be divisable by 16
void nv21_rgb24_sse(
uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride,
YCbCrType yuv_type);
// yuv nv21 to rgb, sse implementation
// pointers do not need to be 16 byte aligned
void nv21_rgb24_sseu(
uint32_t width, uint32_t height,
const uint8_t *y, const uint8_t *uv, uint32_t y_stride, uint32_t uv_stride,
uint8_t *rgb, uint32_t rgb_stride,
YCbCrType yuv_type);
// rgb to yuv, standard c implementation
void rgb24_yuv420_std(
uint32_t width, uint32_t height,
const uint8_t *rgb, uint32_t rgb_stride,
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
YCbCrType yuv_type);
// rgb to yuv, sse implementation
// pointers must be 16 byte aligned, and strides must be divisible by 16
void rgb24_yuv420_sse(
uint32_t width, uint32_t height,
const uint8_t *rgb, uint32_t rgb_stride,
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
YCbCrType yuv_type);
// rgb to yuv, sse implementation
// pointers do not need to be 16 byte aligned
void rgb24_yuv420_sseu(
uint32_t width, uint32_t height,
const uint8_t *rgb, uint32_t rgb_stride,
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
YCbCrType yuv_type);
// rgba to yuv, standard c implementation
// alpha channel is ignored
void rgb32_yuv420_std(
uint32_t width, uint32_t height,
const uint8_t *rgba, uint32_t rgba_stride,
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
YCbCrType yuv_type);
// rgba to yuv, sse implementation
// pointers must be 16 byte aligned, and strides must be divisible by 16
// alpha channel is ignored
void rgb32_yuv420_sse(
uint32_t width, uint32_t height,
const uint8_t *rgba, uint32_t rgba_stride,
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
YCbCrType yuv_type);
// rgba to yuv, sse implementation
// pointers do not need to be 16 byte aligned
// alpha channel is ignored
void rgb32_yuv420_sseu(
uint32_t width, uint32_t height,
const uint8_t *rgba, uint32_t rgba_stride,
uint8_t *y, uint8_t *u, uint8_t *v, uint32_t y_stride, uint32_t uv_stride,
YCbCrType yuv_type);
#ifdef __cplusplus
}
#endif