diff --git a/libs/scrap/src/common/aom.rs b/libs/scrap/src/common/aom.rs index 975a82d64..169e8d38e 100644 --- a/libs/scrap/src/common/aom.rs +++ b/libs/scrap/src/common/aom.rs @@ -106,7 +106,7 @@ mod webrtc { // Overwrite default config with input encoder settings & RTC-relevant values. c.g_w = cfg.width; c.g_h = cfg.height; - c.g_threads = codec_thread_num() as _; + c.g_threads = codec_thread_num(64) as _; c.g_timebase.num = 1; c.g_timebase.den = kRtpTicksPerSecond; c.g_input_bit_depth = kBitDepth; @@ -452,7 +452,7 @@ impl AomDecoder { let i = call_aom_ptr!(aom_codec_av1_dx()); let mut ctx = Default::default(); let cfg = aom_codec_dec_cfg_t { - threads: codec_thread_num() as _, + threads: codec_thread_num(64) as _, w: 0, h: 0, allow_lowbitdepth: 1, diff --git a/libs/scrap/src/common/codec.rs b/libs/scrap/src/common/codec.rs index c89442b5f..32ebf2dbc 100644 --- a/libs/scrap/src/common/codec.rs +++ b/libs/scrap/src/common/codec.rs @@ -566,15 +566,18 @@ pub fn base_bitrate(width: u32, height: u32) -> u32 { base_bitrate } -pub fn codec_thread_num() -> usize { +pub fn codec_thread_num(limit: usize) -> usize { let max: usize = num_cpus::get(); let mut res; let info; + let mut s = System::new(); + s.refresh_memory(); + let memory = s.available_memory() / 1024 / 1024 / 1024; #[cfg(windows)] { res = 0; let percent = hbb_common::platform::windows::cpu_uage_one_minute(); - info = format!("cpu usage:{:?}", percent); + info = format!("cpu usage: {:?}", percent); if let Some(pecent) = percent { if pecent < 100.0 { res = ((100.0 - pecent) * (max as f64) / 200.0).round() as usize; @@ -583,24 +586,41 @@ pub fn codec_thread_num() -> usize { } #[cfg(not(windows))] { - let mut s = System::new(); s.refresh_cpu_usage(); // https://man7.org/linux/man-pages/man3/getloadavg.3.html let avg = s.load_average(); - info = format!("cpu loadavg:{}", avg.one); + info = format!("cpu loadavg: {}", avg.one); res = (((max as f64) - avg.one) * 0.5).round() as usize; } res = std::cmp::min(res, max / 2); - if res == 0 { - res = 1; - } + res = std::cmp::min(res, memory as usize / 2); + // Use common thread count + res = match res { + _ if res >= 64 => 64, + _ if res >= 32 => 32, + _ if res >= 16 => 16, + _ if res >= 8 => 8, + _ if res >= 4 => 4, + _ if res >= 2 => 2, + _ => 1, + }; + // https://aomedia.googlesource.com/aom/+/refs/heads/main/av1/av1_cx_iface.c#677 + // https://aomedia.googlesource.com/aom/+/refs/heads/main/aom_util/aom_thread.h#26 + // https://chromium.googlesource.com/webm/libvpx/+/refs/heads/main/vp8/vp8_cx_iface.c#148 + // https://chromium.googlesource.com/webm/libvpx/+/refs/heads/main/vp9/vp9_cx_iface.c#190 + // https://github.com/FFmpeg/FFmpeg/blob/7c16bf0829802534004326c8e65fb6cdbdb634fa/libavcodec/pthread.c#L65 + // https://github.com/FFmpeg/FFmpeg/blob/7c16bf0829802534004326c8e65fb6cdbdb634fa/libavcodec/pthread_internal.h#L26 + // libaom: MAX_NUM_THREADS = 64 + // libvpx: MAX_NUM_THREADS = 64 + // ffmpeg: MAX_AUTO_THREADS = 16 + res = std::cmp::min(res, limit); // avoid frequent log let log = match THREAD_LOG_TIME.lock().unwrap().clone() { Some(instant) => instant.elapsed().as_secs() > 1, None => true, }; if log { - log::info!("cpu num: {max}, {info}, codec thread: {res}"); + log::info!("cpu num: {max}, {info}, available memory: {memory}G, codec thread: {res}"); *THREAD_LOG_TIME.lock().unwrap() = Some(Instant::now()); } res diff --git a/libs/scrap/src/common/hwcodec.rs b/libs/scrap/src/common/hwcodec.rs index 1b7f84fe6..2aeceb37e 100644 --- a/libs/scrap/src/common/hwcodec.rs +++ b/libs/scrap/src/common/hwcodec.rs @@ -63,7 +63,7 @@ impl EncoderApi for HwEncoder { gop, quality: DEFAULT_HW_QUALITY, rc: DEFAULT_RC, - thread_count: codec_thread_num() as _, // ffmpeg's thread_count is used for cpu + thread_count: codec_thread_num(16) as _, // ffmpeg's thread_count is used for cpu }; let format = match Encoder::format_from_name(config.name.clone()) { Ok(format) => format, @@ -235,7 +235,7 @@ impl HwDecoder { let ctx = DecodeContext { name: info.name.clone(), device_type: info.hwdevice.clone(), - thread_count: codec_thread_num() as _, + thread_count: codec_thread_num(16) as _, }; match Decoder::new(ctx) { Ok(decoder) => Ok(HwDecoder { decoder, info }), diff --git a/libs/scrap/src/common/vpxcodec.rs b/libs/scrap/src/common/vpxcodec.rs index 19ede9bba..63154bb1f 100644 --- a/libs/scrap/src/common/vpxcodec.rs +++ b/libs/scrap/src/common/vpxcodec.rs @@ -73,7 +73,7 @@ impl EncoderApi for VpxEncoder { // When the data buffer falls below this percentage of fullness, a dropped frame is indicated. Set the threshold to zero (0) to disable this feature. // In dynamic scenes, low bitrate gets low fps while high bitrate gets high fps. c.rc_dropframe_thresh = 25; - c.g_threads = codec_thread_num() as _; + c.g_threads = codec_thread_num(64) as _; c.g_error_resilient = VPX_ERROR_RESILIENT_DEFAULT; // https://developers.google.com/media/vp9/bitrate-modes/ // Constant Bitrate mode (CBR) is recommended for live streaming with VP9. @@ -450,7 +450,7 @@ impl VpxDecoder { }; let mut ctx = Default::default(); let cfg = vpx_codec_dec_cfg_t { - threads: codec_thread_num() as _, + threads: codec_thread_num(64) as _, w: 0, h: 0, };