Created
March 12, 2026 07:51
-
-
Save hjanuschka/76c4bd303876ac22675c1f3c58b4f6bd to your computer and use it in GitHub Desktop.
JXL decoder: use frame scanner + seeking for animations, progressive flush
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| diff --git a/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.cc b/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.cc | |
| index 5cadd5f76b2b7..995f304d6fffa 100644 | |
| --- a/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.cc | |
| +++ b/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.cc | |
| @@ -16,20 +16,21 @@ | |
| namespace blink { | |
| using jxl_rs::jxl_rs_decoder_create; | |
| +using jxl_rs::jxl_rs_frame_scanner_create; | |
| using jxl_rs::jxl_rs_signature_check; | |
| using jxl_rs::JxlRsBasicInfo; | |
| using jxl_rs::JxlRsDecoder; | |
| using jxl_rs::JxlRsFrameHeader; | |
| +using jxl_rs::JxlRsFrameScanner; | |
| using jxl_rs::JxlRsPixelFormat; | |
| using jxl_rs::JxlRsProcessResult; | |
| using jxl_rs::JxlRsStatus; | |
| +using jxl_rs::JxlRsVisibleFrameInfo; | |
| namespace { | |
| -// The maximum number of decoded samples we allow. This helps prevent resource | |
| -// exhaustion from malicious files. The jxl-rs API counts pixels * channels, | |
| -// so an RGBA image counts 4 samples per pixel. JPEG XL codestream level 5 | |
| -// limits specify ~268M pixels, so we allow ~1B samples to support that. | |
| +// The maximum number of decoded samples we allow. JPEG XL codestream level 5 | |
| +// limits specify ~268M pixels; we allow ~1B samples for RGBA. | |
| constexpr uint64_t kMaxDecodedPixels = 1024ULL * 1024 * 1024; | |
| } // namespace | |
| @@ -72,6 +73,118 @@ bool JXLImageDecoder::MatchesJXLSignature( | |
| rust::Slice<const uint8_t>(data.data(), data.size())); | |
| } | |
| +// --------------------------------------------------------------------------- | |
| +// Frame scanning (no pixel decoding) | |
| +// --------------------------------------------------------------------------- | |
| + | |
| +void JXLImageDecoder::ScanFrames() { | |
| + if (scanner_done_) { | |
| + return; | |
| + } | |
| + | |
| + if (!scanner_.has_value()) { | |
| + scanner_ = jxl_rs_frame_scanner_create(kMaxDecodedPixels); | |
| + } | |
| + | |
| + FastSharedBufferReader reader(data_.get()); | |
| + size_t data_size = reader.size(); | |
| + size_t remaining = data_size - scanner_input_offset_; | |
| + | |
| + if (remaining == 0 && !IsAllDataReceived()) { | |
| + return; | |
| + } | |
| + | |
| + Vector<uint8_t> chunk_buffer; | |
| + if (remaining > 0) { | |
| + chunk_buffer.resize(remaining); | |
| + } | |
| + auto data_span = remaining > 0 | |
| + ? reader.GetConsecutiveData(scanner_input_offset_, | |
| + remaining, | |
| + base::span(chunk_buffer)) | |
| + : base::span<const uint8_t>(); | |
| + | |
| + bool all_input = | |
| + IsAllDataReceived() && (scanner_input_offset_ + remaining >= data_size); | |
| + rust::Slice<const uint8_t> input_slice(data_span.data(), data_span.size()); | |
| + | |
| + JxlRsProcessResult result = (*scanner_)->feed(input_slice, all_input); | |
| + | |
| + if (result.status == JxlRsStatus::Error) { | |
| + SetFailed(); | |
| + return; | |
| + } | |
| + | |
| + scanner_input_offset_ += result.bytes_consumed; | |
| + | |
| + if (result.status == JxlRsStatus::Success) { | |
| + scanner_done_ = true; | |
| + } | |
| + | |
| + // Extract basic info from scanner if not yet available. | |
| + if (!have_basic_info_ && (*scanner_)->has_basic_info()) { | |
| + basic_info_ = (*scanner_)->get_basic_info(); | |
| + | |
| + if (!SetSize(basic_info_.width, basic_info_.height)) { | |
| + return; | |
| + } | |
| + | |
| + if (basic_info_.bits_per_sample > 8) { | |
| + is_high_bit_depth_ = true; | |
| + } | |
| + | |
| + decode_to_half_float_ = | |
| + ImageIsHighBitDepth() && | |
| + high_bit_depth_decoding_option_ == kHighBitDepthToHalfFloat; | |
| + | |
| + if (!IgnoresColorSpace()) { | |
| + auto icc_data = (*scanner_)->get_icc_profile(); | |
| + if (!icc_data.empty()) { | |
| + auto profile = ColorProfile::Create(icc_data); | |
| + if (profile) { | |
| + SetEmbeddedColorProfile(std::move(profile)); | |
| + } | |
| + } | |
| + } | |
| + | |
| + if (basic_info_.bits_per_sample == 8 && !basic_info_.is_grayscale && | |
| + !basic_info_.have_animation && !basic_info_.has_alpha) { | |
| + static constexpr char kType[] = "Jxl"; | |
| + update_bpp_histogram_callback_ = | |
| + CrossThreadBindOnce(&UpdateBppHistogram<kType>); | |
| + } | |
| + | |
| + have_basic_info_ = true; | |
| + } | |
| + | |
| + // Update frame_seek_info_ from the scanner's discovered frames. | |
| + size_t scanned_count = (*scanner_)->frame_count(); | |
| + base::TimeDelta cumulative_time; | |
| + | |
| + if (!frame_seek_info_.empty()) { | |
| + const auto& last = frame_seek_info_.back(); | |
| + cumulative_time = last.timestamp + last.duration; | |
| + } | |
| + | |
| + for (size_t i = frame_seek_info_.size(); i < scanned_count; i++) { | |
| + JxlRsVisibleFrameInfo info = (*scanner_)->get_frame_info(i); | |
| + FrameSeekInfo seek; | |
| + seek.duration = base::Milliseconds(info.duration_ms); | |
| + seek.timestamp = cumulative_time; | |
| + seek.is_keyframe = info.is_keyframe; | |
| + seek.decode_start_file_offset = info.decode_start_file_offset; | |
| + seek.remaining_in_box = info.remaining_in_box; | |
| + seek.visible_frames_to_skip = info.visible_frames_to_skip; | |
| + | |
| + cumulative_time += seek.duration; | |
| + frame_seek_info_.push_back(seek); | |
| + } | |
| +} | |
| + | |
| +// --------------------------------------------------------------------------- | |
| +// ImageDecoder overrides | |
| +// --------------------------------------------------------------------------- | |
| + | |
| void JXLImageDecoder::DecodeSize() { | |
| Decode(0, /*only_size=*/true); | |
| } | |
| @@ -86,21 +199,12 @@ wtf_size_t JXLImageDecoder::DecodeFrameCount() { | |
| return 1; | |
| } | |
| - // If we have received all the data, we must produce the correct | |
| - // frame count. Thus, we always decode all the data we have. | |
| - // TODO(veluca): for long animations, this will currently decode | |
| - // the entire file, using a large amount of memory and CPU time. | |
| - // Avoid doing that once jxl-rs supports seeking and/or frame | |
| - // skipping. | |
| - while (decoder_state_ != DecoderState::kDone) { | |
| - size_t offset_pre = input_offset_; | |
| - size_t decoded_frames_pre = num_decoded_frames_; | |
| - Decode(num_decoded_frames_, /*only_size=*/false); | |
| - // Exit the loop if the image is corrupted or we didn't make any progress. | |
| - if (Failed() || (offset_pre == input_offset_ && | |
| - num_decoded_frames_ == decoded_frames_pre)) { | |
| - break; | |
| - } | |
| + // Use the lightweight scanner to discover frames without decoding pixels. | |
| + ScanFrames(); | |
| + | |
| + // Resize the frame buffer cache to match discovered frames. | |
| + if (frame_seek_info_.size() > frame_buffer_cache_.size()) { | |
| + frame_buffer_cache_.resize(frame_seek_info_.size()); | |
| } | |
| return frame_buffer_cache_.size(); | |
| @@ -120,10 +224,8 @@ void JXLImageDecoder::InitializeNewFrame(wtf_size_t index) { | |
| buffer.SetOriginalFrameRect(gfx::Rect(Size())); | |
| buffer.SetRequiredPreviousFrameIndex(kNotFound); | |
| - // Set duration/timestamp if the frame header has been parsed. | |
| - // This is available before the frame is fully decoded. | |
| - if (index < frame_info_.size()) { | |
| - const FrameInfo& info = frame_info_[index]; | |
| + if (index < frame_seek_info_.size()) { | |
| + const FrameSeekInfo& info = frame_seek_info_[index]; | |
| buffer.SetDuration(info.duration); | |
| buffer.SetTimestamp(info.timestamp); | |
| } | |
| @@ -150,40 +252,36 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) { | |
| } | |
| } | |
| - FastSharedBufferReader reader(data_.get()); | |
| - size_t data_size = reader.size(); | |
| - | |
| - // Handle animation loop rewind. | |
| - if (decoder_.has_value() && !only_size && basic_info_.have_animation) { | |
| - bool frame_already_cached = | |
| - index < frame_buffer_cache_.size() && | |
| - frame_buffer_cache_[index].GetStatus() == ImageFrame::kFrameComplete; | |
| - | |
| - if (!frame_already_cached && index < num_decoded_frames_) { | |
| - (*decoder_)->rewind(); | |
| - decoder_state_ = DecoderState::kInitial; | |
| - num_decoded_frames_ = 0; | |
| - input_offset_ = 0; | |
| - // Keep basic_info_ and have_basic_info_ since the stream hasn't changed. | |
| + // For animation frames that need seeking (not the next sequential frame), | |
| + // use the seek path. | |
| + if (!only_size && have_basic_info_ && basic_info_.have_animation && | |
| + index != num_decoded_frames_) { | |
| + // Ensure we have seek info for this frame. | |
| + if (index >= frame_seek_info_.size()) { | |
| + ScanFrames(); | |
| + if (Failed() || index >= frame_seek_info_.size()) { | |
| + return; | |
| + } | |
| } | |
| + SeekAndDecodeFrame(index); | |
| + return; | |
| } | |
| - // Create decoder if needed. Pass premultiply_alpha_ so jxl-rs handles | |
| - // premultiplication natively (faster and handles alpha_associated correctly). | |
| + FastSharedBufferReader reader(data_.get()); | |
| + size_t data_size = reader.size(); | |
| + | |
| + // Create decoder if needed. | |
| if (!decoder_.has_value()) { | |
| decoder_ = jxl_rs_decoder_create(kMaxDecodedPixels, premultiply_alpha_); | |
| } | |
| // Process until we get what we need. | |
| for (;;) { | |
| - size_t remaining_size = data_size - input_offset_; | |
| - // When all data is received, process it all at once for efficiency. | |
| - // Only use smaller chunks for true progressive loading (streaming data). | |
| + size_t remaining_size = data_size - decoder_input_offset_; | |
| size_t chunk_size; | |
| if (IsAllDataReceived()) { | |
| - chunk_size = remaining_size; // Process all available data | |
| + chunk_size = remaining_size; | |
| } else { | |
| - // Progressive streaming: use smaller chunks to allow partial rendering | |
| constexpr size_t kMaxChunkSize = 64 * 1024; | |
| chunk_size = std::min(remaining_size, kMaxChunkSize); | |
| } | |
| @@ -192,12 +290,13 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) { | |
| Vector<uint8_t> chunk_buffer; | |
| if (chunk_size > 0) { | |
| chunk_buffer.resize(chunk_size); | |
| - data_span = reader.GetConsecutiveData(input_offset_, chunk_size, | |
| + data_span = reader.GetConsecutiveData(decoder_input_offset_, chunk_size, | |
| base::span(chunk_buffer)); | |
| } | |
| bool all_input = | |
| - IsAllDataReceived() && (input_offset_ + chunk_size >= data_size); | |
| + IsAllDataReceived() && | |
| + (decoder_input_offset_ + chunk_size >= data_size); | |
| rust::Slice<const uint8_t> input_slice(data_span.data(), data_span.size()); | |
| switch (decoder_state_) { | |
| @@ -210,16 +309,16 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) { | |
| return; | |
| } | |
| if (result.status == JxlRsStatus::NeedMoreInput) { | |
| - input_offset_ += result.bytes_consumed; | |
| + decoder_input_offset_ += result.bytes_consumed; | |
| if (all_input) { | |
| SetFailed(); | |
| } | |
| return; | |
| } | |
| - // Success - got basic info | |
| + // Success - got basic info. | |
| basic_info_ = (*decoder_)->get_basic_info(); | |
| - input_offset_ += result.bytes_consumed; | |
| + decoder_input_offset_ += result.bytes_consumed; | |
| if (!SetSize(basic_info_.width, basic_info_.height)) { | |
| return; | |
| @@ -233,12 +332,12 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) { | |
| ImageIsHighBitDepth() && | |
| high_bit_depth_decoding_option_ == kHighBitDepthToHalfFloat; | |
| - // Set pixel format on decoder. | |
| - // Use native 8-bit ordering for kN32, and RGBA F16 for half float. | |
| #if SK_PMCOLOR_BYTE_ORDER(B, G, R, A) | |
| - constexpr JxlRsPixelFormat kNativePixelFormat = JxlRsPixelFormat::Bgra8; | |
| + constexpr JxlRsPixelFormat kNativePixelFormat = | |
| + JxlRsPixelFormat::Bgra8; | |
| #elif SK_PMCOLOR_BYTE_ORDER(R, G, B, A) | |
| - constexpr JxlRsPixelFormat kNativePixelFormat = JxlRsPixelFormat::Rgba8; | |
| + constexpr JxlRsPixelFormat kNativePixelFormat = | |
| + JxlRsPixelFormat::Rgba8; | |
| #else | |
| #error "Unsupported Skia pixel order" | |
| #endif | |
| @@ -248,7 +347,6 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) { | |
| (*decoder_)->set_pixel_format(pixel_format, | |
| basic_info_.num_extra_channels); | |
| - // Extract ICC color profile. | |
| if (!IgnoresColorSpace()) { | |
| auto icc_data = (*decoder_)->get_icc_profile(); | |
| if (!icc_data.empty()) { | |
| @@ -259,8 +357,6 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) { | |
| } | |
| } | |
| - // Record bpp information only for 8-bit, color, still images without | |
| - // alpha. | |
| if (!have_basic_info_ && basic_info_.bits_per_sample == 8 && | |
| !basic_info_.is_grayscale && !basic_info_.have_animation && | |
| !basic_info_.has_alpha) { | |
| @@ -287,31 +383,27 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) { | |
| return; | |
| } | |
| if (result.status == JxlRsStatus::NeedMoreInput) { | |
| - input_offset_ += result.bytes_consumed; | |
| + decoder_input_offset_ += result.bytes_consumed; | |
| return; | |
| } | |
| - input_offset_ += result.bytes_consumed; | |
| + decoder_input_offset_ += result.bytes_consumed; | |
| - // Successfully parsed a frame header - increment discovered count. | |
| JxlRsFrameHeader header = (*decoder_)->get_frame_header(); | |
| if (basic_info_.have_animation) { | |
| wtf_size_t frame_idx = num_decoded_frames_; | |
| - FrameInfo info; | |
| - info.duration = base::Milliseconds(header.duration_ms); | |
| - info.timestamp = base::TimeDelta(); | |
| - | |
| - if (frame_idx > 0 && frame_idx - 1 < frame_info_.size()) { | |
| - const FrameInfo& prev = frame_info_[frame_idx - 1]; | |
| - info.timestamp = prev.timestamp + prev.duration; | |
| - } | |
| - if (frame_idx < frame_info_.size()) { | |
| - frame_info_[frame_idx] = info; | |
| - } else { | |
| - CHECK_EQ(frame_idx, frame_info_.size()); | |
| - frame_info_.push_back(info); | |
| + // Update frame_seek_info_ if we don't have it yet from the scanner. | |
| + if (frame_idx >= frame_seek_info_.size()) { | |
| + FrameSeekInfo info; | |
| + info.duration = base::Milliseconds(header.duration_ms); | |
| + info.timestamp = base::TimeDelta(); | |
| + if (frame_idx > 0 && frame_idx - 1 < frame_seek_info_.size()) { | |
| + const FrameSeekInfo& prev = frame_seek_info_[frame_idx - 1]; | |
| + info.timestamp = prev.timestamp + prev.duration; | |
| + } | |
| + frame_seek_info_.push_back(info); | |
| } | |
| } | |
| @@ -322,23 +414,12 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) { | |
| case DecoderState::kHaveFrameHeader: { | |
| wtf_size_t frame_index = num_decoded_frames_; | |
| - // Ensure frame buffer cache is large enough. | |
| if (frame_buffer_cache_.size() <= frame_index) { | |
| frame_buffer_cache_.resize(frame_index + 1); | |
| } | |
| ImageFrame& frame = frame_buffer_cache_[frame_index]; | |
| if (frame.GetStatus() == ImageFrame::kFrameEmpty) { | |
| - // We call InitializeNewFrame manually here because JXLImageDecoder, | |
| - // unlike other image decoder classes, handles the frame buffer cache | |
| - // in the decode loop. This happens because decoding the frame count | |
| - // also fully renders the frames - when we switch to lightweight | |
| - // decoding for frame count + decoding individual frames via seeking, | |
| - // we will likely be able to remove this call. | |
| - // | |
| - // IMPORTANT: InitializeNewFrame() must run before InitFrameBuffer(), | |
| - // so the base class allocates the correct backing store (e.g. | |
| - // RGBA_F16 for high bit depth + half float). | |
| InitializeNewFrame(frame_index); | |
| if (!InitFrameBuffer(frame_index)) { | |
| SetFailed(); | |
| @@ -351,7 +432,6 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) { | |
| const uint32_t width = basic_info_.width; | |
| const uint32_t height = basic_info_.height; | |
| - // Get direct access to the frame buffer's backing store. | |
| const SkBitmap& bitmap = frame.Bitmap(); | |
| uint8_t* frame_pixels = static_cast<uint8_t*>(bitmap.getPixels()); | |
| size_t row_stride = bitmap.rowBytes(); | |
| @@ -361,12 +441,9 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) { | |
| return; | |
| } | |
| - // Calculate buffer size for the decoder. | |
| size_t buffer_size = row_stride * height; | |
| rust::Slice<uint8_t> output_slice(frame_pixels, buffer_size); | |
| - // Decode directly into the frame buffer. | |
| - // Premultiplication is handled by jxl-rs based on premultiply_alpha_. | |
| JxlRsProcessResult result = (*decoder_)->decode_frame_with_stride( | |
| input_slice, all_input, output_slice, width, height, row_stride); | |
| @@ -375,56 +452,244 @@ void JXLImageDecoder::Decode(wtf_size_t index, bool only_size) { | |
| return; | |
| } | |
| if (result.status == JxlRsStatus::NeedMoreInput) { | |
| - // Update offset with consumed bytes for progressive decoding. | |
| - input_offset_ += result.bytes_consumed; | |
| - | |
| - // Signal that pixels may have changed for progressive rendering. | |
| - // TODO(veluca): set the frame status to kFramePartial if and only | |
| - // if jxl-rs signals that some data has been painted (jxl-rs | |
| - // does not yet expose this functionality, nor does it do | |
| - // progressive rendering properly). | |
| - frame.SetStatus(ImageFrame::kFramePartial); | |
| - frame.SetPixelsChanged(true); | |
| + decoder_input_offset_ += result.bytes_consumed; | |
| + | |
| + // Progressive flush: render whatever pixels are available. | |
| + JxlRsProcessResult flush_result = (*decoder_)->flush_pixels( | |
| + output_slice, width, height, row_stride); | |
| + if (flush_result.status == JxlRsStatus::Success) { | |
| + frame.SetPixelsChanged(true); | |
| + frame.SetStatus(ImageFrame::kFramePartial); | |
| + } | |
| + | |
| if (all_input) { | |
| SetFailed(); | |
| } | |
| return; | |
| } | |
| - input_offset_ += result.bytes_consumed; | |
| + decoder_input_offset_ += result.bytes_consumed; | |
| frame.SetPixelsChanged(true); | |
| frame.SetStatus(ImageFrame::kFrameComplete); | |
| - if (frame_index < frame_info_.size()) { | |
| - const FrameInfo& info = frame_info_[frame_index]; | |
| + if (frame_index < frame_seek_info_.size()) { | |
| + const FrameSeekInfo& info = frame_seek_info_[frame_index]; | |
| frame.SetDuration(info.duration); | |
| frame.SetTimestamp(info.timestamp); | |
| } | |
| num_decoded_frames_++; | |
| - // Record bpp histogram for still images when fully decoded. | |
| if (IsAllDataReceived() && update_bpp_histogram_callback_) { | |
| std::move(update_bpp_histogram_callback_).Run(Size(), data_->size()); | |
| } | |
| if ((*decoder_)->has_more_frames()) { | |
| - // Go back to waiting for next frame header. | |
| decoder_state_ = DecoderState::kHaveBasicInfo; | |
| } else { | |
| decoder_state_ = DecoderState::kDone; | |
| } | |
| - // Check if we've decoded the requested frame. | |
| if (frame_index >= index) { | |
| return; | |
| } | |
| break; | |
| } | |
| case DecoderState::kDone: | |
| - break; | |
| + return; | |
| + } | |
| + } | |
| +} | |
| + | |
| +// --------------------------------------------------------------------------- | |
| +// Seek-based animation frame decode | |
| +// --------------------------------------------------------------------------- | |
| + | |
| +void JXLImageDecoder::SeekAndDecodeFrame(wtf_size_t index) { | |
| + CHECK_LT(index, frame_seek_info_.size()); | |
| + const FrameSeekInfo& seek = frame_seek_info_[index]; | |
| + | |
| + // Create a fresh decoder for seeking. The decoder needs to have basic info | |
| + // parsed before we can seek. | |
| + if (!decoder_.has_value()) { | |
| + decoder_ = jxl_rs_decoder_create(kMaxDecodedPixels, premultiply_alpha_); | |
| + decoder_state_ = DecoderState::kInitial; | |
| + decoder_input_offset_ = 0; | |
| + } | |
| + | |
| + FastSharedBufferReader reader(data_.get()); | |
| + size_t data_size = reader.size(); | |
| + bool all_input = IsAllDataReceived(); | |
| + | |
| + // Ensure decoder has basic info. | |
| + if (decoder_state_ == DecoderState::kInitial) { | |
| + size_t remaining = data_size - decoder_input_offset_; | |
| + Vector<uint8_t> chunk_buffer; | |
| + if (remaining > 0) { | |
| + chunk_buffer.resize(remaining); | |
| + } | |
| + auto data_span = | |
| + remaining > 0 | |
| + ? reader.GetConsecutiveData(decoder_input_offset_, remaining, | |
| + base::span(chunk_buffer)) | |
| + : base::span<const uint8_t>(); | |
| + rust::Slice<const uint8_t> input_slice(data_span.data(), data_span.size()); | |
| + | |
| + JxlRsProcessResult result = | |
| + (*decoder_)->parse_basic_info(input_slice, all_input); | |
| + if (result.status != JxlRsStatus::Success) { | |
| + if (result.status == JxlRsStatus::Error || all_input) { | |
| + SetFailed(); | |
| + } | |
| + return; | |
| + } | |
| + decoder_input_offset_ += result.bytes_consumed; | |
| + | |
| + // Configure pixel format. | |
| +#if SK_PMCOLOR_BYTE_ORDER(B, G, R, A) | |
| + constexpr JxlRsPixelFormat kNativePixelFormat = JxlRsPixelFormat::Bgra8; | |
| +#elif SK_PMCOLOR_BYTE_ORDER(R, G, B, A) | |
| + constexpr JxlRsPixelFormat kNativePixelFormat = JxlRsPixelFormat::Rgba8; | |
| +#else | |
| +#error "Unsupported Skia pixel order" | |
| +#endif | |
| + JxlRsPixelFormat pixel_format = | |
| + decode_to_half_float_ ? JxlRsPixelFormat::RgbaF16 | |
| + : kNativePixelFormat; | |
| + (*decoder_)->set_pixel_format(pixel_format, basic_info_.num_extra_channels); | |
| + decoder_state_ = DecoderState::kHaveBasicInfo; | |
| + } | |
| + | |
| + // Seek to the frame's decode start position. | |
| + (*decoder_)->seek_to_frame(seek.remaining_in_box); | |
| + size_t input_offset = seek.decode_start_file_offset; | |
| + | |
| + // Skip preceding visible frames if needed. | |
| + for (size_t i = 0; i < seek.visible_frames_to_skip; i++) { | |
| + size_t remaining = data_size - input_offset; | |
| + Vector<uint8_t> chunk_buffer; | |
| + if (remaining > 0) { | |
| + chunk_buffer.resize(remaining); | |
| + } | |
| + auto data_span = | |
| + remaining > 0 | |
| + ? reader.GetConsecutiveData(input_offset, remaining, | |
| + base::span(chunk_buffer)) | |
| + : base::span<const uint8_t>(); | |
| + rust::Slice<const uint8_t> input_slice(data_span.data(), data_span.size()); | |
| + | |
| + JxlRsProcessResult result = | |
| + (*decoder_)->skip_visible_frame(input_slice, all_input); | |
| + if (result.status != JxlRsStatus::Success) { | |
| + if (result.status == JxlRsStatus::Error || all_input) { | |
| + SetFailed(); | |
| + } | |
| + return; | |
| + } | |
| + input_offset += result.bytes_consumed; | |
| + } | |
| + | |
| + // Ensure frame buffer cache is large enough. | |
| + if (frame_buffer_cache_.size() <= index) { | |
| + frame_buffer_cache_.resize(index + 1); | |
| + } | |
| + | |
| + ImageFrame& frame = frame_buffer_cache_[index]; | |
| + if (frame.GetStatus() == ImageFrame::kFrameEmpty) { | |
| + InitializeNewFrame(index); | |
| + if (!InitFrameBuffer(index)) { | |
| + SetFailed(); | |
| + return; | |
| + } | |
| + } | |
| + | |
| + frame.SetHasAlpha(basic_info_.has_alpha); | |
| + | |
| + const uint32_t width = basic_info_.width; | |
| + const uint32_t height = basic_info_.height; | |
| + | |
| + const SkBitmap& bitmap = frame.Bitmap(); | |
| + uint8_t* frame_pixels = static_cast<uint8_t*>(bitmap.getPixels()); | |
| + size_t row_stride = bitmap.rowBytes(); | |
| + | |
| + if (!frame_pixels) { | |
| + SetFailed(); | |
| + return; | |
| + } | |
| + | |
| + size_t buffer_size = row_stride * height; | |
| + rust::Slice<uint8_t> output_slice(frame_pixels, buffer_size); | |
| + | |
| + // Parse frame header. | |
| + { | |
| + size_t remaining = data_size - input_offset; | |
| + Vector<uint8_t> chunk_buffer; | |
| + if (remaining > 0) { | |
| + chunk_buffer.resize(remaining); | |
| + } | |
| + auto data_span = | |
| + remaining > 0 | |
| + ? reader.GetConsecutiveData(input_offset, remaining, | |
| + base::span(chunk_buffer)) | |
| + : base::span<const uint8_t>(); | |
| + rust::Slice<const uint8_t> input_slice(data_span.data(), data_span.size()); | |
| + | |
| + JxlRsProcessResult result = | |
| + (*decoder_)->parse_frame_header(input_slice, all_input); | |
| + if (result.status != JxlRsStatus::Success) { | |
| + if (result.status == JxlRsStatus::Error || all_input) { | |
| + SetFailed(); | |
| + } | |
| + return; | |
| + } | |
| + input_offset += result.bytes_consumed; | |
| + } | |
| + | |
| + // Decode pixels. | |
| + { | |
| + size_t remaining = data_size - input_offset; | |
| + Vector<uint8_t> chunk_buffer; | |
| + if (remaining > 0) { | |
| + chunk_buffer.resize(remaining); | |
| } | |
| + auto data_span = | |
| + remaining > 0 | |
| + ? reader.GetConsecutiveData(input_offset, remaining, | |
| + base::span(chunk_buffer)) | |
| + : base::span<const uint8_t>(); | |
| + rust::Slice<const uint8_t> input_slice(data_span.data(), data_span.size()); | |
| + | |
| + JxlRsProcessResult result = (*decoder_)->decode_frame_with_stride( | |
| + input_slice, all_input, output_slice, width, height, row_stride); | |
| + | |
| + if (result.status == JxlRsStatus::Error) { | |
| + SetFailed(); | |
| + return; | |
| + } | |
| + if (result.status == JxlRsStatus::NeedMoreInput) { | |
| + JxlRsProcessResult flush_result = | |
| + (*decoder_)->flush_pixels(output_slice, width, height, row_stride); | |
| + if (flush_result.status == JxlRsStatus::Success) { | |
| + frame.SetPixelsChanged(true); | |
| + frame.SetStatus(ImageFrame::kFramePartial); | |
| + } | |
| + if (all_input) { | |
| + SetFailed(); | |
| + } | |
| + return; | |
| + } | |
| + input_offset += result.bytes_consumed; | |
| } | |
| + | |
| + frame.SetPixelsChanged(true); | |
| + frame.SetStatus(ImageFrame::kFrameComplete); | |
| + frame.SetDuration(seek.duration); | |
| + frame.SetTimestamp(seek.timestamp); | |
| + | |
| + // After seeking, the decoder is in an indeterminate state for sequential | |
| + // decode. Mark it so the next sequential decode will either continue | |
| + // from the right place or seek again. | |
| + decoder_state_ = DecoderState::kHaveBasicInfo; | |
| } | |
| bool JXLImageDecoder::CanReusePreviousFrameBuffer( | |
| @@ -441,20 +706,15 @@ bool JXLImageDecoder::FrameIsReceivedAtIndex(wtf_size_t index) const { | |
| std::optional<base::TimeDelta> JXLImageDecoder::FrameTimestampAtIndex( | |
| wtf_size_t index) const { | |
| - // Use frame_info_ which is populated at header parsing time, | |
| - // not frame_buffer_cache_ which is only set after decoding. | |
| - if (index < frame_info_.size()) { | |
| - return frame_info_[index].timestamp; | |
| + if (index < frame_seek_info_.size()) { | |
| + return frame_seek_info_[index].timestamp; | |
| } | |
| return std::nullopt; | |
| } | |
| base::TimeDelta JXLImageDecoder::FrameDurationAtIndex(wtf_size_t index) const { | |
| - // Durations are available in frame_info_ for all discovered frames. | |
| - // Frame discovery happens in DecodeFrameCount() which is called by | |
| - // FrameCount() whenever new data arrives. | |
| - if (index < frame_info_.size()) { | |
| - return frame_info_[index].duration; | |
| + if (index < frame_seek_info_.size()) { | |
| + return frame_seek_info_[index].duration; | |
| } | |
| return base::TimeDelta(); | |
| } | |
| @@ -472,13 +732,8 @@ int JXLImageDecoder::RepetitionCount() const { | |
| wtf_size_t JXLImageDecoder::ClearCacheExceptFrame( | |
| wtf_size_t clear_except_frame) { | |
| - if (basic_info_.have_animation) { | |
| - // TODO(veluca): jxl-rs does not (yet) support seeking to specific frames. | |
| - // For now, deal with this by disallowing clearing the cache. | |
| - | |
| - return 0; | |
| - } | |
| - | |
| + // With frame seeking support, we can clear cached frames and re-decode | |
| + // them on demand by seeking to the appropriate offset. | |
| return ImageDecoder::ClearCacheExceptFrame(clear_except_frame); | |
| } | |
| diff --git a/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.h b/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.h | |
| index 1a3f502fdba83..b09bfda992e2f 100644 | |
| --- a/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.h | |
| +++ b/third_party/blink/renderer/platform/image-decoders/jxl/jxl_image_decoder.h | |
| @@ -45,10 +45,11 @@ class PLATFORM_EXPORT JXLImageDecoder final : public ImageDecoder { | |
| static bool MatchesJXLSignature(const FastSharedBufferReader& fast_reader); | |
| private: | |
| - // C++-managed Rust Box for JxlRsDecoder. | |
| + // C++-managed Rust Box types. | |
| using JxlRsDecoderPtr = rust::Box<jxl_rs::JxlRsDecoder>; | |
| + using JxlRsScannerPtr = rust::Box<jxl_rs::JxlRsFrameScanner>; | |
| - // Decoder state machine. | |
| + // Decoder state machine for the pixel decoder. | |
| enum class DecoderState { | |
| kInitial, // Waiting for basic info | |
| kHaveBasicInfo, // Have basic info, waiting for frame header | |
| @@ -56,10 +57,14 @@ class PLATFORM_EXPORT JXLImageDecoder final : public ImageDecoder { | |
| kDone // Decoding is done | |
| }; | |
| - // Frame information tracked during decoding. | |
| - struct FrameInfo { | |
| + // Seek info for a visible frame, cached from the scanner. | |
| + struct FrameSeekInfo { | |
| base::TimeDelta duration; | |
| base::TimeDelta timestamp; | |
| + bool is_keyframe = false; | |
| + size_t decode_start_file_offset = 0; | |
| + uint64_t remaining_in_box = 0; | |
| + size_t visible_frames_to_skip = 0; | |
| }; | |
| // ImageDecoder: | |
| @@ -72,29 +77,39 @@ class PLATFORM_EXPORT JXLImageDecoder final : public ImageDecoder { | |
| // Internal decode function that optionally stops after metadata. | |
| void Decode(wtf_size_t index, bool only_size); | |
| - // Eagerly decode all animation frames upfront. | |
| - void DecodeAllFrames(); | |
| + // Run the frame scanner to discover frame metadata without decoding pixels. | |
| + void ScanFrames(); | |
| + | |
| + // Seek the pixel decoder to the target frame and decode it. | |
| + void SeekAndDecodeFrame(wtf_size_t index); | |
| // Converts JXL pixel format to Skia color type. | |
| SkColorType GetSkColorType() const; | |
| - // Decoder state. | |
| + // Lightweight frame scanner -- discovers frame count, durations, and seek | |
| + // offsets without decoding any pixels. | |
| + std::optional<JxlRsScannerPtr> scanner_; | |
| + size_t scanner_input_offset_ = 0; | |
| + bool scanner_done_ = false; | |
| + | |
| + // Full pixel decoder with state machine. | |
| std::optional<JxlRsDecoderPtr> decoder_; | |
| DecoderState decoder_state_ = DecoderState::kInitial; | |
| + size_t decoder_input_offset_ = 0; | |
| + wtf_size_t num_decoded_frames_ = 0; | |
| + | |
| + // Cached metadata. | |
| jxl_rs::JxlRsBasicInfo basic_info_{}; | |
| bool have_basic_info_ = false; | |
| - wtf_size_t num_decoded_frames_ = 0; // Frames whose pixels we've decoded. | |
| - size_t input_offset_ = 0; // Current position in input stream. | |
| - // Animation frame tracking. | |
| - Vector<FrameInfo> frame_info_; | |
| + // Per-frame seek info populated by the scanner. | |
| + Vector<FrameSeekInfo> frame_seek_info_; | |
| // Color management. | |
| bool is_high_bit_depth_ = false; | |
| bool decode_to_half_float_ = false; | |
| - // Used to call UpdateBppHistogram<"Jxl">() at most once to record the | |
| - // bits-per-pixel value of the image when the image is successfully decoded. | |
| + // Used to call UpdateBppHistogram<"Jxl">() at most once. | |
| CrossThreadOnceFunction<void(gfx::Size, size_t)> | |
| update_bpp_histogram_callback_; | |
| }; | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/color.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/color.rs | |
| index 2c3ce9855618b..ba55581559ced 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/color.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/color.rs | |
| @@ -937,7 +937,29 @@ impl JxlColorEncoding { | |
| } | |
| } else { | |
| match self { | |
| - JxlColorEncoding::XYB { .. } => todo!("implement A2B0 and B2A0 tags"), | |
| + JxlColorEncoding::XYB { .. } => { | |
| + // Create A2B0 tag for XYB color space | |
| + let a2b0_start = tags_data.len() as u32; | |
| + create_icc_lut_atob_tag_for_xyb(&mut tags_data)?; | |
| + pad_to_4_byte_boundary(&mut tags_data); | |
| + let a2b0_size = (tags_data.len() as u32) - a2b0_start; | |
| + collected_tags.push(TagInfo { | |
| + signature: *b"A2B0", | |
| + offset_in_tags_blob: a2b0_start, | |
| + size_unpadded: a2b0_size, | |
| + }); | |
| + | |
| + // Create B2A0 tag (no-op, required by Apple software) | |
| + let b2a0_start = tags_data.len() as u32; | |
| + create_icc_noop_btoa_tag(&mut tags_data)?; | |
| + pad_to_4_byte_boundary(&mut tags_data); | |
| + let b2a0_size = (tags_data.len() as u32) - b2a0_start; | |
| + collected_tags.push(TagInfo { | |
| + signature: *b"B2A0", | |
| + offset_in_tags_blob: b2a0_start, | |
| + size_unpadded: b2a0_size, | |
| + }); | |
| + } | |
| JxlColorEncoding::RgbColorSpace { | |
| transfer_function, .. | |
| } | |
| @@ -2047,6 +2069,108 @@ fn tone_map_pixel( | |
| ]) | |
| } | |
| +/// Create mAB A2B0 tag for XYB color space. | |
| +fn create_icc_lut_atob_tag_for_xyb(tags: &mut Vec<u8>) -> Result<(), Error> { | |
| + use super::xyb_constants::*; | |
| + use byteorder::{BigEndian, WriteBytesExt}; | |
| + | |
| + // Tag signature: 'mAB ' | |
| + tags.extend_from_slice(b"mAB "); | |
| + // 4 reserved bytes set to 0 | |
| + tags.write_u32::<BigEndian>(0) | |
| + .map_err(|_| Error::InvalidIccStream)?; | |
| + // Number of input channels | |
| + tags.push(3); | |
| + // Number of output channels | |
| + tags.push(3); | |
| + // 2 reserved bytes for padding | |
| + tags.write_u16::<BigEndian>(0) | |
| + .map_err(|_| Error::InvalidIccStream)?; | |
| + | |
| + // Offsets (calculated based on structure size) | |
| + // offset to first B curve: 32 | |
| + tags.write_u32::<BigEndian>(32) | |
| + .map_err(|_| Error::InvalidIccStream)?; | |
| + // offset to matrix: 244 | |
| + tags.write_u32::<BigEndian>(244) | |
| + .map_err(|_| Error::InvalidIccStream)?; | |
| + // offset to first M curve: 148 | |
| + tags.write_u32::<BigEndian>(148) | |
| + .map_err(|_| Error::InvalidIccStream)?; | |
| + // offset to CLUT: 80 | |
| + tags.write_u32::<BigEndian>(80) | |
| + .map_err(|_| Error::InvalidIccStream)?; | |
| + // offset to first A curve (reuse linear B curves): 32 | |
| + tags.write_u32::<BigEndian>(32) | |
| + .map_err(|_| Error::InvalidIccStream)?; | |
| + | |
| + // offset = 32: B curves (3 identity/linear curves) | |
| + // Each curve is 12 bytes: 'para' (4) + reserved (4) + function type (2) + reserved (2) | |
| + // For type 0: Y = X^gamma, with gamma = 1.0 (identity) | |
| + for _ in 0..3 { | |
| + create_icc_curv_para_tag(tags, &[1.0], 0)?; | |
| + } | |
| + | |
| + // offset = 80: CLUT | |
| + // 16 bytes for grid points (only first 3 used, rest 0) | |
| + for i in 0..16 { | |
| + tags.push(if i < 3 { 2 } else { 0 }); | |
| + } | |
| + // precision = 2 (16-bit) | |
| + tags.push(2); | |
| + // 3 bytes padding | |
| + tags.push(0); | |
| + tags.write_u16::<BigEndian>(0) | |
| + .map_err(|_| Error::InvalidIccStream)?; | |
| + | |
| + // 2x2x2x3 entries of 2 bytes each = 48 bytes | |
| + let cube = unscaled_a2b_cube_full(); | |
| + for row_x in &cube { | |
| + for row_y in row_x { | |
| + for out_f in row_y { | |
| + for &val_f in out_f { | |
| + let val = (65535.0 * val_f).round().clamp(0.0, 65535.0) as u16; | |
| + tags.write_u16::<BigEndian>(val) | |
| + .map_err(|_| Error::InvalidIccStream)?; | |
| + } | |
| + } | |
| + } | |
| + } | |
| + | |
| + // offset = 148: M curves (3 parametric curves) | |
| + // Type 3 parametric curve: Y = (aX + b)^gamma + c for X >= d, else Y = cX | |
| + // Each curve: 12 + 5*4 = 32 bytes | |
| + let scale = xyb_scale(); | |
| + for i in 0..3 { | |
| + let b = -XYB_OFFSET[i] - NEG_OPSIN_ABSORBANCE_BIAS_RGB[i].cbrt(); | |
| + let params = [ | |
| + 3.0, // gamma | |
| + 1.0 / scale[i], // a | |
| + b, // b | |
| + 0.0, // c (unused) | |
| + (-b * scale[i]).max(0.0), // d (make skcms happy) | |
| + ]; | |
| + create_icc_curv_para_tag(tags, ¶ms, 3)?; | |
| + } | |
| + | |
| + // offset = 244: Matrix (12 values as s15Fixed16) | |
| + // 9 matrix values + 3 intercepts = 12 * 4 = 48 bytes | |
| + for v in XYB_ICC_MATRIX { | |
| + append_s15_fixed_16(tags, v as f32)?; | |
| + } | |
| + | |
| + // Intercepts | |
| + for i in 0..3 { | |
| + let mut intercept: f64 = 0.0; | |
| + for j in 0..3 { | |
| + intercept += XYB_ICC_MATRIX[i * 3 + j] * (NEG_OPSIN_ABSORBANCE_BIAS_RGB[j] as f64); | |
| + } | |
| + append_s15_fixed_16(tags, intercept as f32)?; | |
| + } | |
| + | |
| + Ok(()) | |
| +} | |
| + | |
| /// Create mft1 (8-bit LUT) A2B0 tag for HDR tone mapping. | |
| fn create_icc_lut_atob_tag_for_hdr( | |
| transfer_function: &JxlTransferFunction, | |
| @@ -2642,4 +2766,17 @@ mod test { | |
| assert!(!rgb.same_color_encoding(&gray)); | |
| assert!(!gray.same_color_encoding(&rgb)); | |
| } | |
| + | |
| + /// Verify XYB color profiles generate valid ICC profiles with A2B0/B2A0 tags. | |
| + #[test] | |
| + fn test_xyb_icc_profile_generation() { | |
| + let xyb = JxlColorProfile::Simple(JxlColorEncoding::XYB { | |
| + rendering_intent: RenderingIntent::Perceptual, | |
| + }); | |
| + | |
| + let icc = xyb.try_as_icc().expect("XYB should generate ICC profile"); | |
| + assert!(!icc.is_empty()); | |
| + assert!(icc.windows(4).any(|w| w == b"mAB ")); | |
| + assert!(icc.windows(4).any(|w| w == b"mBA ")); | |
| + } | |
| } | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/data_types.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/data_types.rs | |
| index 5debb0c1df1d6..966033c5a5d50 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/data_types.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/data_types.rs | |
| @@ -40,6 +40,13 @@ impl JxlColorType { | |
| Self::Rgba | Self::Bgra => false, | |
| } | |
| } | |
| + pub fn add_alpha(&self) -> Self { | |
| + match self { | |
| + Self::Grayscale | Self::GrayscaleAlpha => Self::GrayscaleAlpha, | |
| + Self::Rgb | Self::Rgba => Self::Rgba, | |
| + Self::Bgr | Self::Bgra => Self::Bgra, | |
| + } | |
| + } | |
| } | |
| #[derive(Clone, Copy, Debug, PartialEq, Eq)] | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/decoder.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/decoder.rs | |
| index d74b0ffa18e06..848adce67153f 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/decoder.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/decoder.rs | |
| @@ -9,7 +9,7 @@ use super::{ | |
| }; | |
| #[cfg(test)] | |
| use crate::frame::Frame; | |
| -use crate::{api::JxlFrameHeader, error::Result}; | |
| +use crate::{api::JxlFrameHeader, container::frame_index::FrameIndexBox, error::Result}; | |
| use states::*; | |
| use std::marker::PhantomData; | |
| @@ -35,6 +35,42 @@ pub struct JxlDecoder<State: JxlState> { | |
| #[cfg(test)] | |
| pub type FrameCallback = dyn FnMut(&Frame, usize) -> Result<()>; | |
| +/// Information about a single visible frame discovered while decoding. | |
| +#[derive(Debug, Clone, PartialEq)] | |
| +pub struct VisibleFrameInfo { | |
| + /// Zero-based index among visible frames. | |
| + pub index: usize, | |
| + /// Duration in milliseconds (0 for still images or the last frame). | |
| + pub duration_ms: f64, | |
| + /// Duration in raw ticks from the animation header. | |
| + pub duration_ticks: u32, | |
| + /// Byte offset of this frame's header in the input file. | |
| + pub(crate) file_offset: usize, | |
| + /// Whether this is the last frame in the codestream. | |
| + pub is_last: bool, | |
| + /// Whether this frame is a seek-keyframe for visible-frame playback. | |
| + /// | |
| + /// This is equivalent to `seek_target.visible_frames_to_skip == 0`. | |
| + pub is_keyframe: bool, | |
| + /// Precomputed seek inputs for this visible frame. | |
| + pub seek_target: VisibleFrameSeekTarget, | |
| + /// Frame name, if any. | |
| + pub name: String, | |
| +} | |
| + | |
| +/// Computed seek inputs for a target visible frame. | |
| +#[derive(Debug, Clone, Copy, PartialEq, Eq)] | |
| +pub struct VisibleFrameSeekTarget { | |
| + /// File byte offset to start feeding input from. | |
| + pub decode_start_file_offset: usize, | |
| + /// Remaining codestream bytes in the current container box at the seek | |
| + /// point. Pass this to [`JxlDecoder::start_new_frame`]. | |
| + pub remaining_in_box: u64, | |
| + /// Number of visible frames to skip after seek-start before decoding the | |
| + /// requested target frame. | |
| + pub visible_frames_to_skip: usize, | |
| +} | |
| + | |
| impl<S: JxlState> JxlDecoder<S> { | |
| fn wrap_inner(inner: Box<JxlDecoderInner>) -> Self { | |
| Self { | |
| @@ -54,6 +90,26 @@ impl<S: JxlState> JxlDecoder<S> { | |
| self.inner.decoded_frames() | |
| } | |
| + /// Returns the parsed frame index box, if the file contained one. | |
| + /// | |
| + /// The frame index box (`jxli`) is an optional part of the JXL container | |
| + /// format that provides a seek table for animated files, listing keyframe | |
| + /// byte offsets, timestamps, and frame counts. | |
| + /// | |
| + /// TODO(veluca): Provide a higher-level frame-index API aligned with | |
| + /// `scanned_frames()` / `VisibleFrameInfo` seek metadata. | |
| + pub fn frame_index(&self) -> Option<&FrameIndexBox> { | |
| + self.inner.frame_index() | |
| + } | |
| + | |
| + /// Returns visible frame info entries collected so far. | |
| + /// | |
| + /// When `JxlDecoderOptions::scan_frames_only` is enabled this is the | |
| + /// primary output of decoding. | |
| + pub fn scanned_frames(&self) -> &[VisibleFrameInfo] { | |
| + self.inner.scanned_frames() | |
| + } | |
| + | |
| /// Rewinds a decoder to the start of the file, allowing past frames to be displayed again. | |
| pub fn rewind(mut self) -> JxlDecoder<Initialized> { | |
| self.inner.rewind(); | |
| @@ -93,8 +149,6 @@ impl JxlDecoder<Initialized> { | |
| } | |
| impl JxlDecoder<WithImageInfo> { | |
| - // TODO(veluca): once frame skipping is implemented properly, expose that in the API. | |
| - | |
| /// Obtains the image's basic information. | |
| pub fn basic_info(&self) -> &JxlBasicInfo { | |
| self.inner.basic_info().unwrap() | |
| @@ -116,10 +170,15 @@ impl JxlDecoder<WithImageInfo> { | |
| self.inner.set_output_color_profile(profile) | |
| } | |
| + /// Retrieves the current pixel format for output buffers. | |
| pub fn current_pixel_format(&self) -> &JxlPixelFormat { | |
| self.inner.current_pixel_format().unwrap() | |
| } | |
| + /// Specifies pixel format for output buffers. | |
| + /// | |
| + /// Setting this may also change output color profile in some cases, if the profile was not set | |
| + /// manually before. | |
| pub fn set_pixel_format(&mut self, pixel_format: JxlPixelFormat) { | |
| self.inner.set_pixel_format(pixel_format); | |
| } | |
| @@ -132,10 +191,54 @@ impl JxlDecoder<WithImageInfo> { | |
| Ok(self.map_inner_processing_result(inner_result)) | |
| } | |
| + /// Draws all the pixels we have data for. This is useful for i.e. previewing LF frames. | |
| + /// | |
| + /// Note: see `process` for alignment requirements for the buffer data. | |
| + pub fn flush_pixels(&mut self, buffers: &mut [JxlOutputBuffer<'_>]) -> Result<()> { | |
| + self.inner.flush_pixels(buffers) | |
| + } | |
| + | |
| pub fn has_more_frames(&self) -> bool { | |
| self.inner.has_more_frames() | |
| } | |
| + /// Resets frame-level decoder state to prepare for decoding a new frame. | |
| + /// | |
| + /// This clears intermediate buffers (frame header, TOC, section data) while | |
| + /// preserving image-level state (file header, color profiles, pixel format, | |
| + /// reference frames). The box parser is restored to the correct | |
| + /// mid-codestream state using `remaining_in_box`, so the next `process()` | |
| + /// call correctly parses a new frame header from the input. | |
| + /// | |
| + /// # Arguments | |
| + /// | |
| + /// * `seek_target` -- from `VisibleFrameInfo::seek_target`. | |
| + /// Includes both the box-parser state (`remaining_in_box`) and the input | |
| + /// resume offset (`decode_start_file_offset`). | |
| + /// | |
| + /// After calling this, provide raw file input starting from | |
| + /// `seek_target.decode_start_file_offset`. | |
| + /// | |
| + /// # Example | |
| + /// | |
| + /// ```rust,ignore | |
| + /// // 1. Scan frame info using the regular decoder API. | |
| + /// let options = JxlDecoderOptions { | |
| + /// scan_frames_only: true, | |
| + /// ..Default::default() | |
| + /// }; | |
| + /// let decoder = JxlDecoder::<states::Initialized>::new(options); | |
| + /// // ...advance decoder and call `scanned_frames()`... | |
| + /// | |
| + /// // 2. Seek to frame N (bare codestream). | |
| + /// let target = &frames[n]; | |
| + /// decoder.start_new_frame(target.seek_target); | |
| + /// // 3. Provide input from target.seek_target.decode_start_file_offset and process(). | |
| + /// ``` | |
| + pub fn start_new_frame(&mut self, seek_target: VisibleFrameSeekTarget) { | |
| + self.inner.start_new_frame(seek_target.remaining_in_box); | |
| + } | |
| + | |
| #[cfg(test)] | |
| pub(crate) fn set_use_simple_pipeline(&mut self, u: bool) { | |
| self.inner.set_use_simple_pipeline(u); | |
| @@ -143,7 +246,17 @@ impl JxlDecoder<WithImageInfo> { | |
| } | |
| impl JxlDecoder<WithFrameInfo> { | |
| - /// Skip the current frame. | |
| + /// Skip the current frame without decoding pixels. | |
| + /// | |
| + /// This reads section data from the input to advance past the frame, but | |
| + /// does not render pixels. Reference frames that may be needed by later | |
| + /// frames are still decoded internally. | |
| + /// | |
| + /// For efficient frame seeking in animations, enable | |
| + /// `JxlDecoderOptions::scan_frames_only` and use | |
| + /// [`scanned_frames`](JxlDecoder::scanned_frames), then | |
| + /// [`start_new_frame`](JxlDecoder::start_new_frame) to jump directly to a | |
| + /// target frame. | |
| pub fn skip_frame( | |
| mut self, | |
| input: &mut impl JxlBitstreamInput, | |
| @@ -191,7 +304,6 @@ pub(crate) mod tests { | |
| use crate::api::{JxlDataFormat, JxlDecoderOptions}; | |
| use crate::error::Error; | |
| use crate::image::{Image, Rect}; | |
| - use crate::util::test::assert_almost_abs_eq_coords; | |
| use jxl_macros::for_each_test_file; | |
| use std::path::Path; | |
| @@ -202,6 +314,7 @@ pub(crate) mod tests { | |
| &std::fs::read("resources/test/green_queen_vardct_e3.jxl").unwrap(), | |
| u.arbitrary::<u8>().unwrap() as usize + 1, | |
| false, | |
| + false, | |
| None, | |
| ) | |
| .unwrap(); | |
| @@ -214,6 +327,7 @@ pub(crate) mod tests { | |
| mut input: &[u8], | |
| chunk_size: usize, | |
| use_simple_pipeline: bool, | |
| + do_flush: bool, | |
| callback: Option<Box<dyn FnMut(&Frame, usize) -> Result<(), Error>>>, | |
| ) -> Result<(usize, Vec<Vec<Image<f32>>>), Error> { | |
| let options = JxlDecoderOptions::default(); | |
| @@ -226,7 +340,7 @@ pub(crate) mod tests { | |
| let mut chunk_input = &input[0..0]; | |
| macro_rules! advance_decoder { | |
| - ($decoder: ident $(, $extra_arg: expr)?) => { | |
| + ($decoder: ident $(, $extra_arg: expr)? $(; $flush_arg: expr)?) => { | |
| loop { | |
| chunk_input = | |
| &input[..(chunk_input.len().saturating_add(chunk_size)).min(input.len())]; | |
| @@ -236,6 +350,12 @@ pub(crate) mod tests { | |
| match process_result.unwrap() { | |
| ProcessingResult::Complete { result } => break result, | |
| ProcessingResult::NeedsMoreInput { fallback, .. } => { | |
| + $( | |
| + let mut fallback = fallback; | |
| + if do_flush && !input.is_empty() { | |
| + fallback.flush_pixels($flush_arg)?; | |
| + } | |
| + )? | |
| if input.is_empty() { | |
| panic!("Unexpected end of input"); | |
| } | |
| @@ -281,9 +401,6 @@ pub(crate) mod tests { | |
| let mut frames = vec![]; | |
| loop { | |
| - // Process until we have frame info | |
| - let mut decoder_with_frame_info = advance_decoder!(decoder_with_image_info); | |
| - | |
| // First channel is interleaved. | |
| let mut buffers = vec![Image::new_with_value( | |
| (buffer_width * num_channels, buffer_height), | |
| @@ -313,7 +430,11 @@ pub(crate) mod tests { | |
| }) | |
| .collect(); | |
| - decoder_with_image_info = advance_decoder!(decoder_with_frame_info, &mut api_buffers); | |
| + // Process until we have frame info | |
| + let mut decoder_with_frame_info = | |
| + advance_decoder!(decoder_with_image_info; &mut api_buffers); | |
| + decoder_with_image_info = | |
| + advance_decoder!(decoder_with_frame_info, &mut api_buffers; &mut api_buffers); | |
| // All pixels should have been overwritten, so they should no longer be NaNs. | |
| for buf in buffers.iter() { | |
| @@ -341,76 +462,108 @@ pub(crate) mod tests { | |
| } | |
| fn decode_test_file(path: &Path) -> Result<(), Error> { | |
| - decode(&std::fs::read(path)?, usize::MAX, false, None)?; | |
| + decode(&std::fs::read(path)?, usize::MAX, false, false, None)?; | |
| Ok(()) | |
| } | |
| for_each_test_file!(decode_test_file); | |
| fn decode_test_file_chunks(path: &Path) -> Result<(), Error> { | |
| - decode(&std::fs::read(path)?, 1, false, None)?; | |
| + decode(&std::fs::read(path)?, 1, false, false, None)?; | |
| Ok(()) | |
| } | |
| for_each_test_file!(decode_test_file_chunks); | |
| + fn compare_frames( | |
| + path: &Path, | |
| + fc: usize, | |
| + f: &[Image<f32>], | |
| + sf: &[Image<f32>], | |
| + ) -> Result<(), Error> { | |
| + assert_eq!( | |
| + f.len(), | |
| + sf.len(), | |
| + "Frame {fc} has different channels counts", | |
| + ); | |
| + for (c, (b, sb)) in f.iter().zip(sf.iter()).enumerate() { | |
| + assert_eq!( | |
| + b.size(), | |
| + sb.size(), | |
| + "Channel {c} in frame {fc} has different sizes", | |
| + ); | |
| + let sz = b.size(); | |
| + if false { | |
| + let f = std::fs::File::create(Path::new("/tmp/").join(format!( | |
| + "{}_diff_chan{c}.pbm", | |
| + path.as_os_str().to_string_lossy().replace("/", "_") | |
| + )))?; | |
| + use std::io::Write; | |
| + let mut f = std::io::BufWriter::new(f); | |
| + writeln!(f, "P1\n{} {}", sz.0, sz.1)?; | |
| + for y in 0..sz.1 { | |
| + for x in 0..sz.0 { | |
| + if (b.row(y)[x] - sb.row(y)[x]).abs() > 1e-8 { | |
| + write!(f, "1")?; | |
| + } else { | |
| + write!(f, "0")?; | |
| + } | |
| + } | |
| + } | |
| + drop(f); | |
| + } | |
| + for y in 0..sz.1 { | |
| + for x in 0..sz.0 { | |
| + assert_eq!( | |
| + b.row(y)[x], | |
| + sb.row(y)[x], | |
| + "Pixels differ at position ({x}, {y}), channel {c}" | |
| + ); | |
| + } | |
| + } | |
| + } | |
| + Ok(()) | |
| + } | |
| + | |
| fn compare_pipelines(path: &Path) -> Result<(), Error> { | |
| let file = std::fs::read(path)?; | |
| - let simple_frames = decode(&file, usize::MAX, true, None)?.1; | |
| - let frames = decode(&file, usize::MAX, false, None)?.1; | |
| + let simple_frames = decode(&file, usize::MAX, true, false, None)?.1; | |
| + let frames = decode(&file, usize::MAX, false, false, None)?.1; | |
| assert_eq!(frames.len(), simple_frames.len()); | |
| for (fc, (f, sf)) in frames | |
| .into_iter() | |
| .zip(simple_frames.into_iter()) | |
| .enumerate() | |
| { | |
| - assert_eq!( | |
| - f.len(), | |
| - sf.len(), | |
| - "Frame {fc} has different channels counts", | |
| - ); | |
| - for (c, (b, sb)) in f.into_iter().zip(sf.into_iter()).enumerate() { | |
| - assert_eq!( | |
| - b.size(), | |
| - sb.size(), | |
| - "Channel {c} in frame {fc} has different sizes", | |
| - ); | |
| - // TODO(veluca): This check actually succeeds if we disable SIMD. | |
| - // With SIMD, the exact output of computations in epf.rs appear to depend on the | |
| - // lane that the computation was done in (???). We should investigate this. | |
| - // b.as_rect().check_equal(sb.as_rect()); | |
| - let sz = b.size(); | |
| - if false { | |
| - let f = std::fs::File::create(Path::new("/tmp/").join(format!( | |
| - "{}_diff_chan{c}.pbm", | |
| - path.as_os_str().to_string_lossy().replace("/", "_") | |
| - )))?; | |
| - use std::io::Write; | |
| - let mut f = std::io::BufWriter::new(f); | |
| - writeln!(f, "P1\n{} {}", sz.0, sz.1)?; | |
| - for y in 0..sz.1 { | |
| - for x in 0..sz.0 { | |
| - if (b.row(y)[x] - sb.row(y)[x]).abs() > 1e-8 { | |
| - write!(f, "1")?; | |
| - } else { | |
| - write!(f, "0")?; | |
| - } | |
| - } | |
| - } | |
| - drop(f); | |
| - } | |
| - for y in 0..sz.1 { | |
| - for x in 0..sz.0 { | |
| - assert_almost_abs_eq_coords(b.row(y)[x], sb.row(y)[x], 1e-5, (x, y), c); | |
| - } | |
| - } | |
| - } | |
| + compare_frames(path, fc, &f, &sf)?; | |
| } | |
| Ok(()) | |
| } | |
| for_each_test_file!(compare_pipelines); | |
| + fn compare_incremental(path: &Path) -> Result<(), Error> { | |
| + let file = std::fs::read(path).unwrap(); | |
| + // One-shot decode | |
| + let (_, one_shot_frames) = decode(&file, usize::MAX, false, false, None)?; | |
| + // Incremental decode with arbitrary flushes. | |
| + let (_, frames) = decode(&file, 123, false, true, None)?; | |
| + | |
| + // Compare one_shot_frames and frames | |
| + assert_eq!(one_shot_frames.len(), frames.len()); | |
| + for (fc, (f, sf)) in frames | |
| + .into_iter() | |
| + .zip(one_shot_frames.into_iter()) | |
| + .enumerate() | |
| + { | |
| + compare_frames(path, fc, &f, &sf)?; | |
| + } | |
| + | |
| + Ok(()) | |
| + } | |
| + | |
| + for_each_test_file!(compare_incremental); | |
| + | |
| #[test] | |
| fn test_preview_size_none_for_regular_files() { | |
| let file = std::fs::read("resources/test/basic.jxl").unwrap(); | |
| @@ -539,6 +692,55 @@ pub(crate) mod tests { | |
| assert!(result.is_err()); | |
| } | |
| + #[test] | |
| + fn test_default_output_tf_by_pixel_format() { | |
| + use crate::api::{JxlColorEncoding, JxlTransferFunction}; | |
| + | |
| + // Using test image with ICC profile to trigger default transfer function path | |
| + let file = std::fs::read("resources/test/lossy_with_icc.jxl").unwrap(); | |
| + let options = JxlDecoderOptions::default(); | |
| + let mut decoder = JxlDecoder::<states::Initialized>::new(options); | |
| + let mut input = file.as_slice(); | |
| + let mut decoder = loop { | |
| + match decoder.process(&mut input).unwrap() { | |
| + ProcessingResult::Complete { result } => break result, | |
| + ProcessingResult::NeedsMoreInput { fallback, .. } => decoder = fallback, | |
| + } | |
| + }; | |
| + | |
| + // Output data format will default to F32, so output color profile will be linear sRGB | |
| + assert_eq!( | |
| + *decoder.output_color_profile().transfer_function().unwrap(), | |
| + JxlTransferFunction::Linear, | |
| + ); | |
| + | |
| + // Integer data format will set output color profile to sRGB | |
| + decoder.set_pixel_format(JxlPixelFormat::rgba8(0)); | |
| + assert_eq!( | |
| + *decoder.output_color_profile().transfer_function().unwrap(), | |
| + JxlTransferFunction::SRGB, | |
| + ); | |
| + | |
| + decoder.set_pixel_format(JxlPixelFormat::rgba_f16(0)); | |
| + assert_eq!( | |
| + *decoder.output_color_profile().transfer_function().unwrap(), | |
| + JxlTransferFunction::Linear, | |
| + ); | |
| + | |
| + decoder.set_pixel_format(JxlPixelFormat::rgba16(0)); | |
| + assert_eq!( | |
| + *decoder.output_color_profile().transfer_function().unwrap(), | |
| + JxlTransferFunction::SRGB, | |
| + ); | |
| + | |
| + // Once output color profile is set by user, it will remain as is regardless of what pixel | |
| + // format is set | |
| + let profile = JxlColorProfile::Simple(JxlColorEncoding::srgb(false)); | |
| + decoder.set_output_color_profile(profile.clone()).unwrap(); | |
| + decoder.set_pixel_format(JxlPixelFormat::rgba_f16(0)); | |
| + assert!(decoder.output_color_profile() == &profile); | |
| + } | |
| + | |
| #[test] | |
| fn test_fill_opaque_alpha_both_pipelines() { | |
| use crate::api::{JxlColorType, JxlDataFormat, JxlPixelFormat}; | |
| @@ -1230,7 +1432,7 @@ pub(crate) mod tests { | |
| // The test passes if it doesn't panic with "attempt to add with overflow" | |
| // It's OK if it returns an error or panics with "Unexpected end of input" | |
| let result = panic::catch_unwind(|| { | |
| - let _ = decode(data, 1024, false, None); | |
| + let _ = decode(data, 1024, false, false, None); | |
| }); | |
| // If it panicked, make sure it wasn't an overflow panic | |
| @@ -1247,4 +1449,538 @@ pub(crate) mod tests { | |
| ); | |
| } | |
| } | |
| + | |
| + fn make_box(ty: &[u8; 4], content: &[u8]) -> Vec<u8> { | |
| + let len = (8 + content.len()) as u32; | |
| + let mut buf = Vec::new(); | |
| + buf.extend(len.to_be_bytes()); | |
| + buf.extend(ty); | |
| + buf.extend(content); | |
| + buf | |
| + } | |
| + | |
| + fn add_container_header(container: &mut Vec<u8>) { | |
| + // JXL signature box | |
| + let sig = [ | |
| + 0x00, 0x00, 0x00, 0x0c, 0x4a, 0x58, 0x4c, 0x20, 0x0d, 0x0a, 0x87, 0x0a, | |
| + ]; | |
| + // ftyp box | |
| + let ftyp = make_box(b"ftyp", b"jxl \x00\x00\x00\x00jxl "); | |
| + container.extend(&sig); | |
| + container.extend(&ftyp); | |
| + } | |
| + | |
| + /// Helper to wrap a bare codestream in a JXL container with a jxli frame index box. | |
| + fn wrap_with_frame_index( | |
| + codestream: &[u8], | |
| + tnum: u32, | |
| + tden: u32, | |
| + entries: &[(u64, u64, u64)], // (OFF_delta, T, F) | |
| + ) -> Vec<u8> { | |
| + use crate::util::test::build_frame_index_content; | |
| + | |
| + let jxli_content = build_frame_index_content(tnum, tden, entries); | |
| + | |
| + let jxli = make_box(b"jxli", &jxli_content); | |
| + let jxlc = make_box(b"jxlc", codestream); | |
| + | |
| + let mut container = Vec::new(); | |
| + add_container_header(&mut container); | |
| + container.extend(&jxli); | |
| + container.extend(&jxlc); | |
| + container | |
| + } | |
| + | |
| + /// Helper to wrap a bare codestream in a container split across jxlp boxes. | |
| + /// | |
| + /// `chunk_starts` are codestream offsets where each new jxlp chunk begins. | |
| + fn wrap_with_jxlp_chunks(codestream: &[u8], chunk_starts: &[usize]) -> Vec<u8> { | |
| + let mut starts = chunk_starts.to_vec(); | |
| + starts.sort_unstable(); | |
| + starts.dedup(); | |
| + if starts.first().copied() != Some(0) { | |
| + starts.insert(0, 0); | |
| + } | |
| + if starts.last().copied() != Some(codestream.len()) { | |
| + starts.push(codestream.len()); | |
| + } | |
| + assert!(starts.len() >= 2); | |
| + | |
| + let mut container = Vec::new(); | |
| + add_container_header(&mut container); | |
| + | |
| + let num_chunks = starts.len() - 1; | |
| + for i in 0..num_chunks { | |
| + let begin = starts[i]; | |
| + let end = starts[i + 1]; | |
| + assert!(begin <= end && end <= codestream.len()); | |
| + | |
| + let mut payload = Vec::with_capacity(4 + (end - begin)); | |
| + let mut index = i as u32; | |
| + if i + 1 == num_chunks { | |
| + index |= 0x8000_0000; | |
| + } | |
| + payload.extend(index.to_be_bytes()); | |
| + payload.extend(&codestream[begin..end]); | |
| + container.extend(make_box(b"jxlp", &payload)); | |
| + } | |
| + | |
| + container | |
| + } | |
| + | |
| + #[test] | |
| + fn test_frame_index_parsed_from_container() { | |
| + // Read a bare animation codestream and wrap it in a container with a jxli box. | |
| + let codestream = | |
| + std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap(); | |
| + | |
| + // Create synthetic frame index entries (delta offsets). | |
| + // These are synthetic -- we don't know real frame offsets, but we can verify parsing. | |
| + let entries = vec![ | |
| + (0u64, 100u64, 1u64), // Frame 0 at offset 0 | |
| + (500, 100, 1), // Frame 1 at offset 500 | |
| + (600, 100, 1), // Frame 2 at offset 1100 | |
| + ]; | |
| + | |
| + let container = wrap_with_frame_index(&codestream, 1, 1000, &entries); | |
| + | |
| + // Decode with a large chunk size so the jxli box is fully consumed. | |
| + let options = JxlDecoderOptions::default(); | |
| + let mut dec = JxlDecoder::<states::Initialized>::new(options); | |
| + let mut input: &[u8] = &container; | |
| + let dec = loop { | |
| + match dec.process(&mut input).unwrap() { | |
| + ProcessingResult::Complete { result } => break result, | |
| + ProcessingResult::NeedsMoreInput { fallback, .. } => { | |
| + if input.is_empty() { | |
| + panic!("Unexpected end of input"); | |
| + } | |
| + dec = fallback; | |
| + } | |
| + } | |
| + }; | |
| + | |
| + // Check that frame index was parsed. | |
| + let fi = dec.frame_index().expect("frame_index should be Some"); | |
| + assert_eq!(fi.num_frames(), 3); | |
| + assert_eq!(fi.tnum, 1); | |
| + assert_eq!(fi.tden.get(), 1000); | |
| + // Verify absolute offsets (accumulated from deltas) | |
| + assert_eq!(fi.entries[0].codestream_offset, 0); | |
| + assert_eq!(fi.entries[1].codestream_offset, 500); | |
| + assert_eq!(fi.entries[2].codestream_offset, 1100); | |
| + assert_eq!(fi.entries[0].duration_ticks, 100); | |
| + assert_eq!(fi.entries[2].frame_count, 1); | |
| + } | |
| + | |
| + #[test] | |
| + fn test_frame_index_none_for_bare_codestream() { | |
| + // A bare codestream has no container, so no frame index. | |
| + let data = | |
| + std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap(); | |
| + let options = JxlDecoderOptions::default(); | |
| + let mut dec = JxlDecoder::<states::Initialized>::new(options); | |
| + let mut input: &[u8] = &data; | |
| + let dec = loop { | |
| + match dec.process(&mut input).unwrap() { | |
| + ProcessingResult::Complete { result } => break result, | |
| + ProcessingResult::NeedsMoreInput { fallback, .. } => { | |
| + if input.is_empty() { | |
| + panic!("Unexpected end of input"); | |
| + } | |
| + dec = fallback; | |
| + } | |
| + } | |
| + }; | |
| + assert!(dec.frame_index().is_none()); | |
| + } | |
| + | |
| + fn scan_frames_with_decoder(mut input: &[u8], chunk_size: usize) -> Vec<VisibleFrameInfo> { | |
| + let mut chunk_input = &input[0..0]; | |
| + let options = JxlDecoderOptions { | |
| + scan_frames_only: true, | |
| + skip_preview: false, | |
| + ..Default::default() | |
| + }; | |
| + let mut initialized_decoder = JxlDecoder::<states::Initialized>::new(options); | |
| + | |
| + macro_rules! advance_process { | |
| + ($decoder: ident) => { | |
| + loop { | |
| + chunk_input = | |
| + &input[..(chunk_input.len().saturating_add(chunk_size)).min(input.len())]; | |
| + let available_before = chunk_input.len(); | |
| + let process_result = $decoder.process(&mut chunk_input); | |
| + input = &input[(available_before - chunk_input.len())..]; | |
| + match process_result.unwrap() { | |
| + ProcessingResult::Complete { result } => break result, | |
| + ProcessingResult::NeedsMoreInput { fallback, .. } => { | |
| + if input.is_empty() { | |
| + panic!("Unexpected end of input"); | |
| + } | |
| + $decoder = fallback; | |
| + } | |
| + } | |
| + } | |
| + }; | |
| + } | |
| + | |
| + macro_rules! advance_skip { | |
| + ($decoder: ident) => { | |
| + loop { | |
| + chunk_input = | |
| + &input[..(chunk_input.len().saturating_add(chunk_size)).min(input.len())]; | |
| + let available_before = chunk_input.len(); | |
| + let process_result = $decoder.skip_frame(&mut chunk_input); | |
| + input = &input[(available_before - chunk_input.len())..]; | |
| + match process_result.unwrap() { | |
| + ProcessingResult::Complete { result } => break result, | |
| + ProcessingResult::NeedsMoreInput { fallback, .. } => { | |
| + if input.is_empty() { | |
| + panic!("Unexpected end of input"); | |
| + } | |
| + $decoder = fallback; | |
| + } | |
| + } | |
| + } | |
| + }; | |
| + } | |
| + | |
| + let mut decoder_with_image_info = advance_process!(initialized_decoder); | |
| + | |
| + if !decoder_with_image_info.has_more_frames() { | |
| + return decoder_with_image_info.scanned_frames().to_vec(); | |
| + } | |
| + | |
| + loop { | |
| + let mut decoder_with_frame_info = advance_process!(decoder_with_image_info); | |
| + decoder_with_image_info = advance_skip!(decoder_with_frame_info); | |
| + if !decoder_with_image_info.has_more_frames() { | |
| + break; | |
| + } | |
| + } | |
| + | |
| + decoder_with_image_info.scanned_frames().to_vec() | |
| + } | |
| + | |
| + fn assert_start_new_frame_matches_sequential(data: &[u8], expect_bare_codestream: bool) { | |
| + use crate::api::{JxlDataFormat, JxlPixelFormat}; | |
| + use crate::image::{Image, Rect}; | |
| + | |
| + // 1. Scan frame info to get seek offsets. | |
| + let scanned_frames = scan_frames_with_decoder(data, usize::MAX); | |
| + assert!(scanned_frames.len() > 1, "need multiple frames"); | |
| + | |
| + // Compare against second visible frame from regular sequential decode. | |
| + let target_visible_index = 1; | |
| + let seek_target = scanned_frames[target_visible_index].seek_target; | |
| + | |
| + if expect_bare_codestream { | |
| + assert_eq!(seek_target.remaining_in_box, u64::MAX); | |
| + } else { | |
| + assert_ne!(seek_target.remaining_in_box, u64::MAX); | |
| + } | |
| + | |
| + // 2. Decode all frames sequentially and keep the reference frame. | |
| + let (_n, sequential_frames) = decode(data, usize::MAX, false, false, None).unwrap(); | |
| + let expected = &sequential_frames[target_visible_index]; | |
| + | |
| + // 3. Create decoder and parse image info. | |
| + let options = JxlDecoderOptions::default(); | |
| + let decoder = JxlDecoder::<states::Initialized>::new(options); | |
| + let mut input = data; | |
| + | |
| + let ProcessingResult::Complete { | |
| + result: mut decoder, | |
| + } = decoder.process(&mut input).unwrap() | |
| + else { | |
| + panic!("expected Complete with full data"); | |
| + }; | |
| + | |
| + let basic_info = decoder.basic_info().clone(); | |
| + let (width, height) = basic_info.size; | |
| + | |
| + // Match the same requested output format as the sequential helper. | |
| + let default_format = decoder.current_pixel_format().clone(); | |
| + let requested_format = JxlPixelFormat { | |
| + color_type: default_format.color_type, | |
| + color_data_format: Some(JxlDataFormat::f32()), | |
| + extra_channel_format: default_format | |
| + .extra_channel_format | |
| + .iter() | |
| + .map(|_| Some(JxlDataFormat::f32())) | |
| + .collect(), | |
| + }; | |
| + decoder.set_pixel_format(requested_format.clone()); | |
| + | |
| + let channels = requested_format.color_type.samples_per_pixel(); | |
| + let num_ec = requested_format.extra_channel_format.len(); | |
| + | |
| + // 4. Seek to decode-start and advance to the target visible frame. | |
| + decoder.start_new_frame(seek_target); | |
| + let mut input = &data[seek_target.decode_start_file_offset..]; | |
| + | |
| + for _ in 0..seek_target.visible_frames_to_skip { | |
| + let mut decoder_frame = loop { | |
| + match decoder.process(&mut input).unwrap() { | |
| + ProcessingResult::Complete { result } => break result, | |
| + ProcessingResult::NeedsMoreInput { fallback, .. } => { | |
| + decoder = fallback; | |
| + } | |
| + } | |
| + }; | |
| + | |
| + decoder = loop { | |
| + match decoder_frame.skip_frame(&mut input).unwrap() { | |
| + ProcessingResult::Complete { result } => break result, | |
| + ProcessingResult::NeedsMoreInput { fallback, .. } => { | |
| + decoder_frame = fallback; | |
| + } | |
| + } | |
| + }; | |
| + } | |
| + | |
| + let mut decoder_frame = loop { | |
| + match decoder.process(&mut input).unwrap() { | |
| + ProcessingResult::Complete { result } => break result, | |
| + ProcessingResult::NeedsMoreInput { fallback, .. } => { | |
| + decoder = fallback; | |
| + } | |
| + } | |
| + }; | |
| + | |
| + let mut color_buffer = Image::<f32>::new((width * channels, height)).unwrap(); | |
| + let mut ec_buffers: Vec<Image<f32>> = (0..num_ec) | |
| + .map(|_| Image::<f32>::new((width, height)).unwrap()) | |
| + .collect(); | |
| + let mut buffers: Vec<JxlOutputBuffer> = vec![JxlOutputBuffer::from_image_rect_mut( | |
| + color_buffer | |
| + .get_rect_mut(Rect { | |
| + origin: (0, 0), | |
| + size: (width * channels, height), | |
| + }) | |
| + .into_raw(), | |
| + )]; | |
| + for ec in ec_buffers.iter_mut() { | |
| + buffers.push(JxlOutputBuffer::from_image_rect_mut( | |
| + ec.get_rect_mut(Rect { | |
| + origin: (0, 0), | |
| + size: (width, height), | |
| + }) | |
| + .into_raw(), | |
| + )); | |
| + } | |
| + | |
| + let _decoder = loop { | |
| + match decoder_frame.process(&mut input, &mut buffers).unwrap() { | |
| + ProcessingResult::Complete { result } => break result, | |
| + ProcessingResult::NeedsMoreInput { fallback, .. } => { | |
| + decoder_frame = fallback; | |
| + } | |
| + } | |
| + }; | |
| + | |
| + // 5. Compare seek-decoded frame against sequential decode reference. | |
| + let mut seek_decoded = Vec::with_capacity(1 + num_ec); | |
| + seek_decoded.push(color_buffer); | |
| + seek_decoded.extend(ec_buffers); | |
| + compare_frames( | |
| + Path::new("start_new_frame_seek"), | |
| + target_visible_index, | |
| + expected, | |
| + &seek_decoded, | |
| + ) | |
| + .unwrap(); | |
| + } | |
| + | |
| + /// Test that `start_new_frame()` + scanner seek info decodes the same | |
| + /// frame as regular sequential decode for bare codestream input. | |
| + #[test] | |
| + fn test_start_new_frame_bare_codestream() { | |
| + let data = | |
| + std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap(); | |
| + assert_start_new_frame_matches_sequential(&data, true); | |
| + } | |
| + | |
| + /// Test that `start_new_frame()` + scanner seek info also works for boxed input. | |
| + #[test] | |
| + fn test_start_new_frame_boxed_codestream() { | |
| + let codestream = | |
| + std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap(); | |
| + let entries = vec![(0u64, 100u64, 1u64), (500, 100, 1), (600, 100, 1)]; | |
| + let container = wrap_with_frame_index(&codestream, 1, 1000, &entries); | |
| + assert_start_new_frame_matches_sequential(&container, false); | |
| + } | |
| + | |
| + /// Test seek/scanner behavior when codestream data is split across jxlp boxes, | |
| + /// with each visible frame starting in its own chunk. | |
| + #[test] | |
| + fn test_start_new_frame_boxed_jxlp_per_visible_frame() { | |
| + let codestream = | |
| + std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap(); | |
| + | |
| + let scanned_frames = scan_frames_with_decoder(&codestream, usize::MAX); | |
| + assert!(scanned_frames.len() > 1, "need multiple frames"); | |
| + | |
| + let (decoded_frames, _) = decode(&codestream, usize::MAX, false, false, None).unwrap(); | |
| + assert_eq!( | |
| + decoded_frames, | |
| + scanned_frames.len(), | |
| + "test file should have one codestream frame per visible frame", | |
| + ); | |
| + | |
| + let mut chunk_starts: Vec<usize> = scanned_frames.iter().map(|f| f.file_offset).collect(); | |
| + chunk_starts.sort_unstable(); | |
| + chunk_starts.dedup(); | |
| + assert_eq!(chunk_starts.len(), scanned_frames.len()); | |
| + | |
| + let container = wrap_with_jxlp_chunks(&codestream, &chunk_starts); | |
| + assert_start_new_frame_matches_sequential(&container, false); | |
| + } | |
| + | |
| + #[test] | |
| + fn test_scan_still_image() { | |
| + let data = std::fs::read("resources/test/green_queen_vardct_e3.jxl").unwrap(); | |
| + let frames = scan_frames_with_decoder(&data, usize::MAX); | |
| + | |
| + assert_eq!(frames.len(), 1); | |
| + assert!(frames[0].is_last); | |
| + assert!(frames[0].is_keyframe); | |
| + let total_duration_ms: f64 = frames.iter().map(|f| f.duration_ms).sum(); | |
| + assert_eq!(total_duration_ms, 0.0); | |
| + } | |
| + | |
| + #[test] | |
| + fn test_scan_bare_animation() { | |
| + let data = | |
| + std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap(); | |
| + let frames = scan_frames_with_decoder(&data, usize::MAX); | |
| + | |
| + assert!(frames.len() > 1, "expected multiple frames"); | |
| + | |
| + for (i, frame) in frames.iter().enumerate() { | |
| + assert_eq!(frame.index, i); | |
| + } | |
| + | |
| + assert!(frames.last().unwrap().is_last); | |
| + | |
| + assert!(frames[0].is_keyframe); | |
| + assert_eq!( | |
| + frames[0].seek_target.decode_start_file_offset, | |
| + frames[0].file_offset | |
| + ); | |
| + } | |
| + | |
| + #[test] | |
| + fn test_scan_animation_offsets_increase() { | |
| + let data = | |
| + std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap(); | |
| + let frames = scan_frames_with_decoder(&data, usize::MAX); | |
| + | |
| + for i in 1..frames.len() { | |
| + assert!( | |
| + frames[i].file_offset > frames[i - 1].file_offset, | |
| + "frame {} offset {} should be > frame {} offset {}", | |
| + i, | |
| + frames[i].file_offset, | |
| + i - 1, | |
| + frames[i - 1].file_offset, | |
| + ); | |
| + } | |
| + } | |
| + | |
| + #[test] | |
| + fn test_scan_incremental() { | |
| + let data = | |
| + std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap(); | |
| + | |
| + let frames = scan_frames_with_decoder(&data, 128); | |
| + assert!(frames.len() > 1); | |
| + assert!(frames.last().unwrap().is_last); | |
| + } | |
| + | |
| + #[test] | |
| + fn test_scan_keyframe_detection_still() { | |
| + let data = std::fs::read("resources/test/green_queen_vardct_e3.jxl").unwrap(); | |
| + let frames = scan_frames_with_decoder(&data, usize::MAX); | |
| + | |
| + assert_eq!(frames.len(), 1); | |
| + let f = &frames[0]; | |
| + assert!(f.is_keyframe); | |
| + assert_eq!(f.seek_target.decode_start_file_offset, f.file_offset); | |
| + assert_eq!(f.seek_target.visible_frames_to_skip, 0); | |
| + } | |
| + | |
| + #[test] | |
| + fn test_scan_decode_start_file_offset_consistency() { | |
| + let data = | |
| + std::fs::read("resources/test/conformance_test_images/animation_icos4d_5.jxl").unwrap(); | |
| + | |
| + let frames = scan_frames_with_decoder(&data, usize::MAX); | |
| + | |
| + for frame in &frames { | |
| + assert!( | |
| + frame.seek_target.decode_start_file_offset <= frame.file_offset, | |
| + "frame {}: decode_start_file_offset {} > file_offset {}", | |
| + frame.index, | |
| + frame.seek_target.decode_start_file_offset, | |
| + frame.file_offset, | |
| + ); | |
| + assert_eq!( | |
| + frame.is_keyframe, | |
| + frame.seek_target.visible_frames_to_skip == 0, | |
| + "frame {}: keyframe flag should match visible_frames_to_skip", | |
| + frame.index, | |
| + ); | |
| + } | |
| + } | |
| + | |
| + #[test] | |
| + fn test_scan_with_preview() { | |
| + let data = std::fs::read("resources/test/with_preview.jxl"); | |
| + if data.is_err() { | |
| + return; | |
| + } | |
| + let data = data.unwrap(); | |
| + let frames = scan_frames_with_decoder(&data, usize::MAX); | |
| + | |
| + assert!(frames.len() <= 1); | |
| + } | |
| + | |
| + #[test] | |
| + fn test_scan_patches_not_keyframe() { | |
| + let data = std::fs::read("resources/test/grayscale_patches_var_dct.jxl"); | |
| + if data.is_err() { | |
| + return; | |
| + } | |
| + let data = data.unwrap(); | |
| + let frames = scan_frames_with_decoder(&data, usize::MAX); | |
| + | |
| + assert!(!frames.is_empty()); | |
| + } | |
| + | |
| + /// Regression test for Chromium ClusterFuzz issue 474401148. | |
| + #[test] | |
| + fn test_fuzzer_xyb_icc_no_panic() { | |
| + use crate::api::ProcessingResult; | |
| + | |
| + #[rustfmt::skip] | |
| + let data: &[u8] = &[ | |
| + 0xff, 0x0a, 0x01, 0x00, 0x00, 0x04, 0x00, 0x00, | |
| + 0x00, 0x00, 0x00, 0x00, 0x00, 0x11, 0x25, 0x00, | |
| + ]; | |
| + | |
| + let opts = JxlDecoderOptions { | |
| + pixel_limit: Some(1024 * 1024 * 1024), | |
| + ..Default::default() | |
| + }; | |
| + let mut decoder = JxlDecoderInner::new(opts); | |
| + let mut input = data; | |
| + | |
| + if let Ok(ProcessingResult::Complete { .. }) = decoder.process(&mut input, None) | |
| + && let Some(profile) = decoder.output_color_profile() | |
| + { | |
| + let _ = profile.try_as_icc(); | |
| + } | |
| + } | |
| } | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/box_parser.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/box_parser.rs | |
| index eb66cb3b1f4cf..e2b452cbc81c9 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/box_parser.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/box_parser.rs | |
| @@ -3,6 +3,9 @@ | |
| // Use of this source code is governed by a BSD-style | |
| // license that can be found in the LICENSE file. | |
| +use std::io::IoSliceMut; | |
| + | |
| +use crate::container::frame_index::FrameIndexBox; | |
| use crate::error::{Error, Result}; | |
| use crate::api::{ | |
| @@ -15,6 +18,8 @@ enum ParseState { | |
| BoxNeeded, | |
| CodestreamBox(u64), | |
| SkippableBox(u64), | |
| + /// Buffering a jxli box: (remaining bytes, accumulated content). | |
| + BufferingFrameIndex(u64, Vec<u8>), | |
| } | |
| enum CodestreamBoxType { | |
| @@ -28,6 +33,10 @@ pub(super) struct BoxParser { | |
| pub(super) box_buffer: SmallBuffer, | |
| state: ParseState, | |
| box_type: CodestreamBoxType, | |
| + /// Parsed frame index box, if present in the file. | |
| + pub(super) frame_index: Option<FrameIndexBox>, | |
| + /// Total file bytes consumed from the underlying input. | |
| + pub(super) total_file_consumed: u64, | |
| } | |
| impl BoxParser { | |
| @@ -36,6 +45,8 @@ impl BoxParser { | |
| box_buffer: SmallBuffer::new(128), | |
| state: ParseState::SignatureNeeded, | |
| box_type: CodestreamBoxType::None, | |
| + frame_index: None, | |
| + total_file_consumed: 0, | |
| } | |
| } | |
| @@ -49,7 +60,8 @@ impl BoxParser { | |
| loop { | |
| match self.state.clone() { | |
| ParseState::SignatureNeeded => { | |
| - self.box_buffer.refill(|b| input.read(b), None)?; | |
| + let read = self.box_buffer.refill(|b| input.read(b), None)?; | |
| + self.total_file_consumed += read as u64; | |
| match check_signature_internal(&self.box_buffer)? { | |
| None => return Err(Error::InvalidSignature), | |
| Some(JxlSignatureType::Codestream) => { | |
| @@ -71,7 +83,9 @@ impl BoxParser { | |
| let skipped = if !self.box_buffer.is_empty() { | |
| self.box_buffer.consume(num) | |
| } else { | |
| - input.skip(num)? | |
| + let skipped = input.skip(num)?; | |
| + self.total_file_consumed += skipped as u64; | |
| + skipped | |
| }; | |
| if skipped == 0 { | |
| return Err(Error::OutOfBounds(num)); | |
| @@ -83,8 +97,35 @@ impl BoxParser { | |
| self.state = ParseState::SkippableBox(s); | |
| } | |
| } | |
| + ParseState::BufferingFrameIndex(mut remaining, mut buf) => { | |
| + let num = remaining.min(usize::MAX as u64) as usize; | |
| + if !self.box_buffer.is_empty() { | |
| + let take = num.min(self.box_buffer.len()); | |
| + buf.extend_from_slice(&self.box_buffer[..take]); | |
| + self.box_buffer.consume(take); | |
| + remaining -= take as u64; | |
| + } else { | |
| + let old_len = buf.len(); | |
| + buf.resize(old_len + num, 0); | |
| + let read = input.read(&mut [IoSliceMut::new(&mut buf[old_len..])])?; | |
| + self.total_file_consumed += read as u64; | |
| + if read == 0 { | |
| + return Err(Error::OutOfBounds(num)); | |
| + } | |
| + buf.truncate(old_len + read); | |
| + remaining -= read as u64; | |
| + } | |
| + if remaining == 0 { | |
| + // Parse the buffered frame index box. | |
| + self.frame_index = Some(FrameIndexBox::parse(&buf)?); | |
| + self.state = ParseState::BoxNeeded; | |
| + } else { | |
| + self.state = ParseState::BufferingFrameIndex(remaining, buf); | |
| + } | |
| + } | |
| ParseState::BoxNeeded => { | |
| - self.box_buffer.refill(|b| input.read(b), None)?; | |
| + let read = self.box_buffer.refill(|b| input.read(b), None)?; | |
| + self.total_file_consumed += read as u64; | |
| let min_len = match &self.box_buffer[..] { | |
| [0, 0, 0, 1, ..] => 16, | |
| _ => 8, | |
| @@ -148,6 +189,20 @@ impl BoxParser { | |
| }; | |
| self.state = ParseState::CodestreamBox(content_len); | |
| } | |
| + b"jxli" => { | |
| + if content_len == u64::MAX { | |
| + return Err(Error::InvalidBox); | |
| + } | |
| + // Reasonable size limit for a frame index box (16 MB). | |
| + if content_len > 16 * 1024 * 1024 { | |
| + self.state = ParseState::SkippableBox(content_len); | |
| + } else { | |
| + self.state = ParseState::BufferingFrameIndex( | |
| + content_len, | |
| + Vec::with_capacity(content_len as usize), | |
| + ); | |
| + } | |
| + } | |
| _ => { | |
| self.state = ParseState::SkippableBox(content_len); | |
| } | |
| @@ -158,6 +213,26 @@ impl BoxParser { | |
| } | |
| } | |
| + /// Accounts file bytes consumed directly by codestream parser reads/skips. | |
| + pub(super) fn mark_file_consumed(&mut self, amount: usize) { | |
| + self.total_file_consumed += amount as u64; | |
| + } | |
| + | |
| + /// Resets the box parser for seeking to a specific codestream position. | |
| + /// | |
| + /// Sets the parser to `CodestreamBox(remaining)` state with cleared | |
| + /// buffers. The caller must provide raw input starting from the file | |
| + /// position that corresponds to the target codestream offset. | |
| + /// | |
| + /// `remaining` is the number of codestream bytes left in the current | |
| + /// box from the target file position. For bare-codestream files this | |
| + /// is `u64::MAX`. | |
| + pub(super) fn reset_for_codestream_seek(&mut self, remaining: u64) { | |
| + self.box_buffer = SmallBuffer::new(128); | |
| + self.state = ParseState::CodestreamBox(remaining); | |
| + // Keep frame_index unchanged. | |
| + } | |
| + | |
| pub(super) fn consume_codestream(&mut self, amount: u64) { | |
| if let ParseState::CodestreamBox(cb) = &mut self.state { | |
| *cb = cb.checked_sub(amount).unwrap(); | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/mod.rs | |
| index a5b650eacd226..ab645281dfffd 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/mod.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/mod.rs | |
| @@ -14,8 +14,9 @@ use sections::SectionState; | |
| use crate::api::FrameCallback; | |
| use crate::{ | |
| api::{ | |
| - JxlBasicInfo, JxlBitstreamInput, JxlColorProfile, JxlDecoderOptions, JxlOutputBuffer, | |
| - JxlPixelFormat, | |
| + JxlBasicInfo, JxlBitstreamInput, JxlColorEncoding, JxlColorProfile, JxlDataFormat, | |
| + JxlDecoderOptions, JxlOutputBuffer, JxlPixelFormat, VisibleFrameInfo, | |
| + VisibleFrameSeekTarget, | |
| inner::{box_parser::BoxParser, process::SmallBuffer}, | |
| }, | |
| error::{Error, Result}, | |
| @@ -33,6 +34,13 @@ struct SectionBuffer { | |
| section: Section, | |
| } | |
| +#[derive(Clone, Copy)] | |
| +struct FrameStartInfo { | |
| + file_offset: usize, | |
| + remaining_in_box: u64, | |
| + visible_count_before: usize, | |
| +} | |
| + | |
| pub(super) struct CodestreamParser { | |
| // TODO(veluca): this would probably be cleaner with some kind of state enum. | |
| pub(super) file_header: Option<FileHeader>, | |
| @@ -44,6 +52,9 @@ pub(super) struct CodestreamParser { | |
| pub(super) embedded_color_profile: Option<JxlColorProfile>, | |
| pub(super) output_color_profile: Option<JxlColorProfile>, | |
| pub(super) pixel_format: Option<JxlPixelFormat>, | |
| + xyb_encoded: bool, | |
| + is_gray: bool, | |
| + pub(super) output_color_profile_set_by_user: bool, | |
| // These fields are populated when starting to decode a frame, and cleared once | |
| // the frame is done. | |
| @@ -79,6 +90,27 @@ pub(super) struct CodestreamParser { | |
| header_needed_bytes: Option<u64>, | |
| + // --- Frame info tracking (for frame scanning) --- | |
| + /// Collected visible frame info entries. | |
| + pub(super) scanned_frames: Vec<VisibleFrameInfo>, | |
| + /// Zero-based visible frame index counter. | |
| + visible_frame_index: usize, | |
| + /// File offsets and visibility info for every non-preview frame (visible | |
| + /// and non-visible), in parse order. | |
| + frame_starts: Vec<FrameStartInfo>, | |
| + /// For each reference slot, earliest frame index required to reconstruct | |
| + /// the current contents of that slot. | |
| + reference_slot_decode_start: [Option<usize>; DecoderState::MAX_STORED_FRAMES], | |
| + /// For each LF slot, earliest frame index required to reconstruct the | |
| + /// current contents of that slot. | |
| + lf_slot_decode_start: [Option<usize>; DecoderState::NUM_LF_FRAMES], | |
| + /// File byte offset where the current frame header parse started. | |
| + /// Set when we begin parsing a frame header. | |
| + current_frame_file_offset: usize, | |
| + /// Remaining codestream bytes in the current box at frame start. | |
| + /// Captured alongside `current_frame_file_offset`. | |
| + current_frame_remaining_in_box: u64, | |
| + | |
| #[cfg(test)] | |
| pub frame_callback: Option<Box<FrameCallback>>, | |
| #[cfg(test)] | |
| @@ -96,6 +128,9 @@ impl CodestreamParser { | |
| embedded_color_profile: None, | |
| output_color_profile: None, | |
| pixel_format: None, | |
| + xyb_encoded: false, | |
| + is_gray: false, | |
| + output_color_profile_set_by_user: false, | |
| frame_header: None, | |
| toc_parser: None, | |
| frame: None, | |
| @@ -115,6 +150,13 @@ impl CodestreamParser { | |
| candidate_hf_sections: HashSet::new(), | |
| has_more_frames: true, | |
| header_needed_bytes: None, | |
| + scanned_frames: Vec::new(), | |
| + visible_frame_index: 0, | |
| + frame_starts: Vec::new(), | |
| + reference_slot_decode_start: [None; DecoderState::MAX_STORED_FRAMES], | |
| + lf_slot_decode_start: [None; DecoderState::NUM_LF_FRAMES], | |
| + current_frame_file_offset: 0, | |
| + current_frame_remaining_in_box: u64::MAX, | |
| #[cfg(test)] | |
| frame_callback: None, | |
| #[cfg(test)] | |
| @@ -130,6 +172,125 @@ impl CodestreamParser { | |
| } | |
| } | |
| + /// Record frame info for the just-parsed frame. | |
| + /// Called after process_non_section() creates a Frame, for frame scanning. | |
| + fn record_frame_info(&mut self) { | |
| + let frame = match self.frame.as_ref() { | |
| + Some(f) => f, | |
| + None => return, | |
| + }; | |
| + let header = frame.header(); | |
| + | |
| + let current_frame_index = self.frame_starts.len(); | |
| + let is_visible = header.is_visible(); | |
| + self.frame_starts.push(FrameStartInfo { | |
| + file_offset: self.current_frame_file_offset, | |
| + remaining_in_box: self.current_frame_remaining_in_box, | |
| + visible_count_before: self.visible_frame_index, | |
| + }); | |
| + | |
| + let mut decode_start_frame_index = current_frame_index; | |
| + | |
| + // Track frame dependencies through reference slots. For blending we know | |
| + // exactly which slots are used. For patches we conservatively assume any | |
| + // reference slot may be used. | |
| + let mut used_reference_slots = [false; DecoderState::MAX_STORED_FRAMES]; | |
| + if header.needs_blending() { | |
| + for blending_info in header | |
| + .ec_blending_info | |
| + .iter() | |
| + .chain(std::iter::once(&header.blending_info)) | |
| + { | |
| + let source = blending_info.source as usize; | |
| + assert!( | |
| + source < DecoderState::MAX_STORED_FRAMES, | |
| + "invalid blending source slot {source}, max {}", | |
| + DecoderState::MAX_STORED_FRAMES - 1 | |
| + ); | |
| + used_reference_slots[source] = true; | |
| + } | |
| + } | |
| + if header.has_patches() { | |
| + used_reference_slots.fill(true); | |
| + } | |
| + | |
| + for (slot, used) in used_reference_slots.iter().enumerate() { | |
| + if *used && let Some(dep_start) = self.reference_slot_decode_start[slot] { | |
| + decode_start_frame_index = decode_start_frame_index.min(dep_start); | |
| + } | |
| + } | |
| + | |
| + if header.has_lf_frame() { | |
| + let lf_slot = header.lf_level as usize; | |
| + assert!( | |
| + lf_slot < DecoderState::NUM_LF_FRAMES, | |
| + "invalid lf slot {lf_slot}, max {}", | |
| + DecoderState::NUM_LF_FRAMES - 1 | |
| + ); | |
| + if let Some(dep_start) = self.lf_slot_decode_start[lf_slot] { | |
| + decode_start_frame_index = decode_start_frame_index.min(dep_start); | |
| + } | |
| + } | |
| + | |
| + if is_visible { | |
| + let duration_ticks = header.duration; | |
| + let duration_ms = if let Some(ref anim) = self.animation { | |
| + if anim.tps_numerator > 0 { | |
| + (duration_ticks as f64) * 1000.0 * (anim.tps_denominator as f64) | |
| + / (anim.tps_numerator as f64) | |
| + } else { | |
| + 0.0 | |
| + } | |
| + } else { | |
| + 0.0 | |
| + }; | |
| + | |
| + let decode_start = self.frame_starts[decode_start_frame_index]; | |
| + let seek_target = VisibleFrameSeekTarget { | |
| + decode_start_file_offset: decode_start.file_offset, | |
| + remaining_in_box: decode_start.remaining_in_box, | |
| + visible_frames_to_skip: self | |
| + .visible_frame_index | |
| + .saturating_sub(decode_start.visible_count_before), | |
| + }; | |
| + let is_keyframe = seek_target.visible_frames_to_skip == 0; | |
| + | |
| + self.scanned_frames.push(VisibleFrameInfo { | |
| + index: self.visible_frame_index, | |
| + duration_ms, | |
| + duration_ticks, | |
| + file_offset: self.current_frame_file_offset, | |
| + is_last: header.is_last, | |
| + is_keyframe, | |
| + seek_target, | |
| + name: header.name.clone(), | |
| + }); | |
| + | |
| + self.visible_frame_index += 1; | |
| + } | |
| + | |
| + // Update slot dependency origins after processing this frame. | |
| + if header.can_be_referenced { | |
| + let slot = header.save_as_reference as usize; | |
| + assert!( | |
| + slot < DecoderState::MAX_STORED_FRAMES, | |
| + "invalid save_as_reference slot {slot}, max {}", | |
| + DecoderState::MAX_STORED_FRAMES - 1 | |
| + ); | |
| + self.reference_slot_decode_start[slot] = Some(decode_start_frame_index); | |
| + } | |
| + | |
| + if header.lf_level != 0 { | |
| + let slot = (header.lf_level - 1) as usize; | |
| + assert!( | |
| + slot < DecoderState::NUM_LF_FRAMES, | |
| + "invalid lf save slot {slot}, max {}", | |
| + DecoderState::NUM_LF_FRAMES - 1 | |
| + ); | |
| + self.lf_slot_decode_start[slot] = Some(decode_start_frame_index); | |
| + } | |
| + } | |
| + | |
| /// Returns the number of passes that are fully completed across all groups. | |
| pub(super) fn num_completed_passes(&self) -> usize { | |
| self.section_state.num_completed_passes() | |
| @@ -151,12 +312,41 @@ impl CodestreamParser { | |
| pixel_format | |
| } | |
| + /// Resets frame-level state for seeking to a new frame. | |
| + /// | |
| + /// Preserves: file_header, decoder_state (including reference frames), | |
| + /// basic_info, animation, color profiles, pixel_format, xyb_encoded, | |
| + /// is_gray, output_color_profile_set_by_user, preview_done. | |
| + /// | |
| + /// Clears: frame_header, toc_parser, frame, all section buffers, | |
| + /// non_section_buf, and processing flags. | |
| + pub(super) fn start_new_frame(&mut self) { | |
| + self.frame_header = None; | |
| + self.toc_parser = None; | |
| + self.frame = None; | |
| + self.non_section_buf = SmallBuffer::new(4096); | |
| + self.non_section_bit_offset = 0; | |
| + self.sections.clear(); | |
| + self.ready_section_data = 0; | |
| + self.skip_sections = false; | |
| + self.process_without_output = false; | |
| + self.section_state = SectionState::new(0, 0); | |
| + self.lf_global_section = None; | |
| + self.lf_sections.clear(); | |
| + self.hf_global_section = None; | |
| + self.hf_sections.clear(); | |
| + self.candidate_hf_sections.clear(); | |
| + self.has_more_frames = true; | |
| + self.header_needed_bytes = None; | |
| + } | |
| + | |
| pub(super) fn process( | |
| &mut self, | |
| box_parser: &mut BoxParser, | |
| input: &mut dyn JxlBitstreamInput, | |
| decode_options: &JxlDecoderOptions, | |
| mut output_buffers: Option<&mut [JxlOutputBuffer]>, | |
| + do_flush: bool, | |
| ) -> Result<()> { | |
| if let Some(output_buffers) = &output_buffers { | |
| let px = self.pixel_format.as_ref().unwrap(); | |
| @@ -179,7 +369,11 @@ impl CodestreamParser { | |
| .frame | |
| .as_ref() | |
| .is_some_and(|f| f.header().can_be_referenced); | |
| - if !self.process_without_output && output_buffers.is_none() && !can_be_referenced { | |
| + if decode_options.scan_frames_only | |
| + || (!self.process_without_output | |
| + && output_buffers.is_none() | |
| + && !can_be_referenced) | |
| + { | |
| self.skip_sections = true; | |
| } | |
| @@ -229,7 +423,9 @@ impl CodestreamParser { | |
| let num = if !box_parser.box_buffer.is_empty() { | |
| box_parser.box_buffer.take(buffers) | |
| } else { | |
| - input.read(buffers)? | |
| + let num = input.read(buffers)?; | |
| + box_parser.mark_file_consumed(num); | |
| + num | |
| }; | |
| self.ready_section_data += num; | |
| box_parser.consume_codestream(num as u64); | |
| @@ -238,7 +434,7 @@ impl CodestreamParser { | |
| break; | |
| } | |
| } | |
| - match self.process_sections(decode_options, &mut output_buffers) { | |
| + match self.process_sections(decode_options, &mut output_buffers, do_flush) { | |
| Ok(None) => Ok(()), | |
| Ok(Some(missing)) => Err(Error::OutOfBounds(missing)), | |
| Err(Error::OutOfBounds(_)) => Err(Error::SectionTooShort), | |
| @@ -256,7 +452,9 @@ impl CodestreamParser { | |
| let skipped = if !box_parser.box_buffer.is_empty() { | |
| box_parser.box_buffer.consume(to_skip) | |
| } else { | |
| - input.skip(to_skip)? | |
| + let skipped = input.skip(to_skip)?; | |
| + box_parser.mark_file_consumed(skipped); | |
| + skipped | |
| }; | |
| box_parser.consume_codestream(skipped as u64); | |
| self.ready_section_data += skipped; | |
| @@ -295,25 +493,59 @@ impl CodestreamParser { | |
| } else { | |
| // Trying to read a frame or a file header. | |
| assert!(self.frame.is_none()); | |
| - assert!(self.has_more_frames); | |
| + if !self.has_more_frames { | |
| + // If this is a flush request and the file is complete, we are done. | |
| + // Otherwise, this is an API usage error. | |
| + assert!(do_flush); | |
| + return Ok(()); | |
| + } | |
| + | |
| + // Capture frame-start metadata once before parsing the next | |
| + // frame header. We do this after `get_more_codestream()` so we | |
| + // are robust to the previous frame ending exactly at a box | |
| + // boundary (BoxNeeded -> CodestreamBox transition). | |
| + let mut capture_frame_start = | |
| + self.decoder_state.is_some() && self.frame_header.is_none(); | |
| // Loop to handle incremental parsing (e.g. large ICC profiles) that may need | |
| // multiple buffer refills to complete. | |
| loop { | |
| let available_codestream = match box_parser.get_more_codestream(input) { | |
| Err(Error::OutOfBounds(_)) => 0, | |
| - Ok(c) => c as usize, | |
| + Ok(c) => c, | |
| Err(e) => return Err(e), | |
| }; | |
| + | |
| + if capture_frame_start { | |
| + // total_file_consumed counts bytes read/skipped from | |
| + // raw input. non_section_buf and box_buffer contain | |
| + // unread bytes already accounted there. | |
| + self.current_frame_file_offset = (box_parser.total_file_consumed as usize) | |
| + .saturating_sub(self.non_section_buf.len()) | |
| + .saturating_sub(box_parser.box_buffer.len()); | |
| + | |
| + // `available_codestream` includes bytes still in | |
| + // box_buffer and not yet in non_section_buf. | |
| + self.current_frame_remaining_in_box = if available_codestream > u64::MAX / 2 | |
| + { | |
| + u64::MAX | |
| + } else { | |
| + available_codestream.saturating_add(self.non_section_buf.len() as u64) | |
| + }; | |
| + capture_frame_start = false; | |
| + } | |
| + | |
| let c = self.non_section_buf.refill( | |
| |buf| { | |
| if !box_parser.box_buffer.is_empty() { | |
| Ok(box_parser.box_buffer.take(buf)) | |
| } else { | |
| - input.read(buf) | |
| + let read = input.read(buf)?; | |
| + box_parser.mark_file_consumed(read); | |
| + Ok(read) | |
| } | |
| }, | |
| - Some(available_codestream), | |
| + Some(available_codestream as usize), | |
| )? as u64; | |
| box_parser.consume_codestream(c); | |
| @@ -379,6 +611,11 @@ impl CodestreamParser { | |
| } | |
| } | |
| + // Record frame info for scanning (after preview check). | |
| + if !is_preview_frame { | |
| + self.record_frame_info(); | |
| + } | |
| + | |
| if self.has_visible_frame() { | |
| // Return to caller if we found visible frame info. | |
| return Ok(()); | |
| @@ -390,4 +627,49 @@ impl CodestreamParser { | |
| } | |
| } | |
| } | |
| + | |
| + pub(super) fn update_default_output_color_profile(&mut self) { | |
| + // Only set default output_color_profile if not already configured by user | |
| + if self.output_color_profile_set_by_user { | |
| + return; | |
| + } | |
| + | |
| + let embedded_color_profile = self.embedded_color_profile.as_ref().unwrap(); | |
| + let pixel_format = self.pixel_format.as_ref().unwrap(); | |
| + | |
| + // Determine default output color profile following libjxl logic: | |
| + // - For XYB: use embedded if can_output_to(), else: | |
| + // - if float samples are requested: linear sRGB, | |
| + // - else: sRGB | |
| + // - For non-XYB: use embedded color profile | |
| + let output_color_profile = if self.xyb_encoded { | |
| + // Use embedded if we can output to it, otherwise fall back to sRGB | |
| + let base_encoding = if embedded_color_profile.can_output_to() { | |
| + match &embedded_color_profile { | |
| + JxlColorProfile::Simple(enc) => enc.clone(), | |
| + JxlColorProfile::Icc(_) => { | |
| + unreachable!("can_output_to returns false for ICC") | |
| + } | |
| + } | |
| + } else { | |
| + let data_format = pixel_format | |
| + .color_data_format | |
| + .unwrap_or(JxlDataFormat::U8 { bit_depth: 8 }); | |
| + let is_float = matches!( | |
| + data_format, | |
| + JxlDataFormat::F32 { .. } | JxlDataFormat::F16 { .. } | |
| + ); | |
| + if is_float { | |
| + JxlColorEncoding::linear_srgb(self.is_gray) | |
| + } else { | |
| + JxlColorEncoding::srgb(self.is_gray) | |
| + } | |
| + }; | |
| + | |
| + JxlColorProfile::Simple(base_encoding) | |
| + } else { | |
| + embedded_color_profile.clone() | |
| + }; | |
| + self.output_color_profile = Some(output_color_profile); | |
| + } | |
| } | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/non_section.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/non_section.rs | |
| index 342fd6729f7a5..48ec9e6b9480f 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/non_section.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/non_section.rs | |
| @@ -117,6 +117,7 @@ impl CodestreamParser { | |
| if self.decoder_state.is_none() && self.embedded_color_profile.is_none() { | |
| let file_header = self.file_header.as_ref().unwrap(); | |
| + | |
| // Parse (or extract from file header) the ICC profile. | |
| let mut br = BitReader::new(&self.non_section_buf); | |
| br.skip_bits(self.non_section_bit_offset as usize)?; | |
| @@ -147,50 +148,17 @@ impl CodestreamParser { | |
| &file_header.image_metadata.color_encoding, | |
| )?) | |
| }; | |
| - // Determine default output color profile following libjxl logic: | |
| - // - For XYB: use embedded if can_output_to(), else linear sRGB fallback | |
| - // - For non-XYB: use embedded color profile | |
| - let output_color_profile = if file_header.image_metadata.xyb_encoded { | |
| - let is_gray = | |
| - file_header.image_metadata.color_encoding.color_space == ColorSpace::Gray; | |
| - | |
| - // Use embedded if we can output to it, otherwise fall back to linear sRGB | |
| - let base_encoding = if embedded_color_profile.can_output_to() { | |
| - match &embedded_color_profile { | |
| - JxlColorProfile::Simple(enc) => enc.clone(), | |
| - JxlColorProfile::Icc(_) => { | |
| - unreachable!("can_output_to returns false for ICC") | |
| - } | |
| - } | |
| - } else { | |
| - JxlColorEncoding::linear_srgb(is_gray) | |
| - }; | |
| - | |
| - JxlColorProfile::Simple(base_encoding) | |
| - } else { | |
| - embedded_color_profile.clone() | |
| - }; | |
| self.embedded_color_profile = Some(embedded_color_profile.clone()); | |
| - // Only set default output_color_profile if not already configured by user | |
| - if self.output_color_profile.is_none() { | |
| - self.output_color_profile = Some(output_color_profile); | |
| - } else { | |
| - // Validate user's output color profile choice (libjxl compatibility) | |
| - // For non-XYB without CMS: only same encoding as embedded is allowed | |
| - let user_profile = self.output_color_profile.as_ref().unwrap(); | |
| - if !file_header.image_metadata.xyb_encoded | |
| - && decode_options.cms.is_none() | |
| - && *user_profile != embedded_color_profile | |
| - { | |
| - return Err(Error::NonXybOutputNoCMS); | |
| - } | |
| - } | |
| + | |
| + let xyb_encoded = file_header.image_metadata.xyb_encoded; | |
| + let is_gray = file_header.image_metadata.color_encoding.color_space == ColorSpace::Gray; | |
| + self.xyb_encoded = xyb_encoded; | |
| + self.is_gray = is_gray; | |
| + | |
| // Only set default pixel_format if not already configured (e.g. via rewind) | |
| if self.pixel_format.is_none() { | |
| self.pixel_format = Some(JxlPixelFormat { | |
| - color_type: if file_header.image_metadata.color_encoding.color_space | |
| - == ColorSpace::Gray | |
| - { | |
| + color_type: if is_gray { | |
| JxlColorType::Grayscale | |
| } else { | |
| JxlColorType::Rgb | |
| @@ -207,6 +175,19 @@ impl CodestreamParser { | |
| }); | |
| } | |
| + if let Some(user_profile) = &self.output_color_profile { | |
| + // Validate user's output color profile choice (libjxl compatibility) | |
| + // For non-XYB without CMS: only same encoding as embedded is allowed | |
| + if !xyb_encoded | |
| + && decode_options.cms.is_none() | |
| + && *user_profile != embedded_color_profile | |
| + { | |
| + return Err(Error::NonXybOutputNoCMS); | |
| + } | |
| + } else { | |
| + self.update_default_output_color_profile(); | |
| + } | |
| + | |
| let mut br = BitReader::new(&self.non_section_buf); | |
| br.skip_bits(self.non_section_bit_offset as usize)?; | |
| br.jump_to_byte_boundary()?; | |
| @@ -298,7 +279,7 @@ impl CodestreamParser { | |
| // Save file_header before creating frame (for preview frame recovery) | |
| self.saved_file_header = self.decoder_state.as_ref().map(|ds| ds.file_header.clone()); | |
| - let frame = Frame::from_header_and_toc( | |
| + let mut frame = Frame::from_header_and_toc( | |
| self.frame_header.take().unwrap(), | |
| toc, | |
| self.decoder_state.take().unwrap(), | |
| @@ -360,6 +341,17 @@ impl CodestreamParser { | |
| self.section_state = | |
| SectionState::new(frame.header().num_lf_groups(), frame.header().num_groups()); | |
| + frame.prepare_render_pipeline( | |
| + self.pixel_format.as_ref().unwrap(), | |
| + decode_options.cms.as_deref(), | |
| + self.embedded_color_profile | |
| + .as_ref() | |
| + .expect("embedded_color_profile should be set before pipeline preparation"), | |
| + self.output_color_profile | |
| + .as_ref() | |
| + .expect("output_color_profile should be set before pipeline preparation"), | |
| + )?; | |
| + | |
| self.frame = Some(frame); | |
| Ok(()) | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/sections.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/sections.rs | |
| index ffdb3588b7334..4fc5278c5fc15 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/sections.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/codestream_parser/sections.rs | |
| @@ -8,15 +8,18 @@ use crate::{ | |
| bit_reader::BitReader, | |
| error::Result, | |
| frame::Section, | |
| + headers::frame_header::{Encoding, FrameType}, | |
| }; | |
| use super::CodestreamParser; | |
| +#[derive(Debug)] | |
| pub(super) struct SectionState { | |
| lf_global_done: bool, | |
| remaining_lf: usize, | |
| hf_global_done: bool, | |
| completed_passes: Vec<u8>, | |
| + lf_global_flush_len: usize, | |
| } | |
| impl SectionState { | |
| @@ -26,6 +29,7 @@ impl SectionState { | |
| remaining_lf: num_lf_groups, | |
| hf_global_done: false, | |
| completed_passes: vec![0; num_groups], | |
| + lf_global_flush_len: 0, | |
| } | |
| } | |
| @@ -41,8 +45,15 @@ impl CodestreamParser { | |
| &mut self, | |
| decode_options: &JxlDecoderOptions, | |
| output_buffers: &mut Option<&mut [JxlOutputBuffer<'_>]>, | |
| + do_flush: bool, | |
| ) -> Result<Option<usize>> { | |
| let frame = self.frame.as_mut().unwrap(); | |
| + | |
| + let output_profile = self | |
| + .output_color_profile | |
| + .as_ref() | |
| + .expect("output_color_profile should be set before pipeline preparation"); | |
| + | |
| let frame_header = frame.header(); | |
| // Dequeue ready sections. | |
| @@ -72,40 +83,81 @@ impl CodestreamParser { | |
| } | |
| let mut processed_section = false; | |
| + let mut called_render_hf = false; | |
| let pixel_format = self.pixel_format.as_ref().unwrap(); | |
| + | |
| + let complete_lf_global; | |
| + let (lf_global, lf_global_is_complete) = if let Some(d) = self.lf_global_section.take() { | |
| + complete_lf_global = d; | |
| + ( | |
| + Some(&complete_lf_global.data[..complete_lf_global.len]), | |
| + true, | |
| + ) | |
| + } else if do_flush | |
| + && self | |
| + .sections | |
| + .front() | |
| + .is_some_and(|s| s.section == Section::LfGlobal) | |
| + && 2 * self.ready_section_data > 3 * self.section_state.lf_global_flush_len | |
| + && frame_header.encoding == Encoding::Modular | |
| + && matches!( | |
| + frame_header.frame_type, | |
| + FrameType::RegularFrame | FrameType::LFFrame | |
| + ) | |
| + { | |
| + self.section_state.lf_global_flush_len = self.ready_section_data; | |
| + ( | |
| + Some(&self.sections[0].data[..self.ready_section_data]), | |
| + false, | |
| + ) | |
| + } else { | |
| + (None, false) | |
| + }; | |
| + | |
| 'process: { | |
| if frame_header.num_groups() == 1 && frame_header.passes.num_passes == 1 { | |
| // Single-group special case. | |
| - let Some(sec) = self.lf_global_section.take() else { | |
| + let Some(buf) = lf_global else { | |
| break 'process; | |
| }; | |
| - assert!(self.sections.is_empty()); | |
| - let mut br = BitReader::new(&sec.data); | |
| - frame.decode_lf_global(&mut br)?; | |
| - frame.decode_lf_group(0, &mut br)?; | |
| - frame.decode_hf_global(&mut br)?; | |
| - frame.prepare_render_pipeline( | |
| - self.pixel_format.as_ref().unwrap(), | |
| - decode_options.cms.as_deref(), | |
| - self.embedded_color_profile | |
| - .as_ref() | |
| - .expect("embedded_color_profile should be set before pipeline preparation"), | |
| - self.output_color_profile | |
| - .as_ref() | |
| - .expect("output_color_profile should be set before pipeline preparation"), | |
| - )?; | |
| - frame.finalize_lf()?; | |
| - frame.decode_and_render_hf_groups( | |
| - output_buffers, | |
| - pixel_format, | |
| - vec![(0, vec![(0, br)])], | |
| - )?; | |
| - processed_section = true; | |
| + assert!(self.sections.is_empty() || !lf_global_is_complete); | |
| + let mut br = BitReader::new(buf); | |
| + let res = (|| -> Result<()> { | |
| + frame.decode_lf_global(&mut br, !lf_global_is_complete)?; | |
| + frame.decode_lf_group(0, &mut br)?; | |
| + frame.decode_hf_global(&mut br)?; | |
| + frame.finalize_lf()?; | |
| + frame.decode_and_render_hf_groups( | |
| + output_buffers, | |
| + pixel_format, | |
| + vec![(0, vec![(0, br)])], | |
| + do_flush, | |
| + output_profile, | |
| + )?; | |
| + called_render_hf = true; | |
| + Ok(()) | |
| + })(); | |
| + match res { | |
| + Ok(_) => { | |
| + processed_section = true; | |
| + } | |
| + Err(_) if !lf_global_is_complete => { | |
| + // Ignore errors if we are doing partial parsing. | |
| + } | |
| + Err(e) => return Err(e), | |
| + } | |
| } else { | |
| - if let Some(lf_global) = self.lf_global_section.take() { | |
| - frame.decode_lf_global(&mut BitReader::new(&lf_global.data))?; | |
| - self.section_state.lf_global_done = true; | |
| - processed_section = true; | |
| + if let Some(buf) = lf_global { | |
| + match frame.decode_lf_global(&mut BitReader::new(buf), !lf_global_is_complete) { | |
| + Ok(_) => { | |
| + self.section_state.lf_global_done = true; | |
| + processed_section = true; | |
| + } | |
| + Err(_) if !lf_global_is_complete => { | |
| + // Ignore errors if we are doing partial parsing. | |
| + } | |
| + Err(e) => return Err(e), | |
| + } | |
| } | |
| if !self.section_state.lf_global_done { | |
| @@ -127,16 +179,6 @@ impl CodestreamParser { | |
| if let Some(hf_global) = self.hf_global_section.take() { | |
| frame.decode_hf_global(&mut BitReader::new(&hf_global.data))?; | |
| - frame.prepare_render_pipeline( | |
| - self.pixel_format.as_ref().unwrap(), | |
| - decode_options.cms.as_deref(), | |
| - self.embedded_color_profile.as_ref().expect( | |
| - "embedded_color_profile should be set before pipeline preparation", | |
| - ), | |
| - self.output_color_profile.as_ref().expect( | |
| - "output_color_profile should be set before pipeline preparation", | |
| - ), | |
| - )?; | |
| frame.finalize_lf()?; | |
| self.section_state.hf_global_done = true; | |
| processed_section = true; | |
| @@ -184,7 +226,14 @@ impl CodestreamParser { | |
| self.candidate_hf_sections.clear(); | |
| } | |
| - frame.decode_and_render_hf_groups(output_buffers, pixel_format, group_readers)?; | |
| + frame.decode_and_render_hf_groups( | |
| + output_buffers, | |
| + pixel_format, | |
| + group_readers, | |
| + do_flush, | |
| + output_profile, | |
| + )?; | |
| + called_render_hf = true; | |
| for g in processed_groups.into_iter() { | |
| for i in 0..self.section_state.completed_passes[g] { | |
| @@ -195,6 +244,16 @@ impl CodestreamParser { | |
| } | |
| } | |
| + if do_flush && !called_render_hf && frame.can_do_early_rendering() { | |
| + frame.decode_and_render_hf_groups( | |
| + output_buffers, | |
| + pixel_format, | |
| + vec![], | |
| + do_flush, | |
| + output_profile, | |
| + )?; | |
| + } | |
| + | |
| if !processed_section { | |
| let data_for_next_section = | |
| self.sections.front().unwrap().len - self.ready_section_data; | |
| @@ -230,7 +289,6 @@ impl CodestreamParser { | |
| if let Some(fh) = self.saved_file_header.take() { | |
| let mut new_state = crate::frame::DecoderState::new(fh); | |
| new_state.render_spotcolors = decode_options.render_spot_colors; | |
| - new_state.enable_output = decode_options.enable_output; | |
| self.decoder_state = Some(new_state); | |
| } | |
| } else { | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/mod.rs | |
| index 44aa57ce701d8..ac8dd81cb6600 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/mod.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/mod.rs | |
| @@ -6,11 +6,12 @@ | |
| #[cfg(test)] | |
| use crate::api::FrameCallback; | |
| use crate::{ | |
| - api::JxlFrameHeader, | |
| + api::{JxlFrameHeader, VisibleFrameInfo}, | |
| error::{Error, Result}, | |
| }; | |
| use super::{JxlBasicInfo, JxlColorProfile, JxlDecoderOptions, JxlPixelFormat}; | |
| +use crate::container::frame_index::FrameIndexBox; | |
| use box_parser::BoxParser; | |
| use codestream_parser::CodestreamParser; | |
| @@ -67,6 +68,7 @@ impl JxlDecoderInner { | |
| return Err(Error::ICCOutputNoCMS); | |
| } | |
| self.codestream_parser.output_color_profile = Some(profile); | |
| + self.codestream_parser.output_color_profile_set_by_user = true; | |
| Ok(()) | |
| } | |
| @@ -75,7 +77,10 @@ impl JxlDecoderInner { | |
| } | |
| pub fn set_pixel_format(&mut self, pixel_format: JxlPixelFormat) { | |
| + // TODO(veluca): return an error if we are asking for both planar and | |
| + // interleaved-in-color alpha. | |
| self.codestream_parser.pixel_format = Some(pixel_format); | |
| + self.codestream_parser.update_default_output_color_profile(); | |
| } | |
| pub fn frame_header(&self) -> Option<JxlFrameHeader> { | |
| @@ -131,6 +136,35 @@ impl JxlDecoderInner { | |
| self.codestream_parser.has_more_frames | |
| } | |
| + /// Returns the parsed frame index box, if the file contained one. | |
| + pub fn frame_index(&self) -> Option<&FrameIndexBox> { | |
| + self.box_parser.frame_index.as_ref() | |
| + } | |
| + | |
| + /// Returns visible frame info entries collected during parsing. | |
| + pub fn scanned_frames(&self) -> &[VisibleFrameInfo] { | |
| + &self.codestream_parser.scanned_frames | |
| + } | |
| + | |
| + /// Resets frame-level state to prepare for decoding a new frame. | |
| + /// | |
| + /// Preserves image-level state (file header, decoder state including | |
| + /// reference frames, color profiles, pixel format). Clears frame header, | |
| + /// TOC, section buffers, and restores the box parser to the correct | |
| + /// state so the next `process()` call parses a new frame header. | |
| + /// | |
| + /// `remaining_in_box` comes from | |
| + /// `VisibleFrameInfo::seek_target.remaining_in_box` and tells the box | |
| + /// parser how many codestream bytes remain in the current container box at | |
| + /// the target position. For bare-codestream files this is `u64::MAX`. | |
| + /// | |
| + /// The caller must provide raw file input starting from the target | |
| + /// frame's `seek_target.decode_start_file_offset`. | |
| + pub fn start_new_frame(&mut self, remaining_in_box: u64) { | |
| + self.box_parser.reset_for_codestream_seek(remaining_in_box); | |
| + self.codestream_parser.start_new_frame(); | |
| + } | |
| + | |
| #[cfg(test)] | |
| pub(crate) fn set_use_simple_pipeline(&mut self, u: bool) { | |
| self.codestream_parser.set_use_simple_pipeline(u); | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/process.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/process.rs | |
| index 50e6fe338613c..ecdb966d757f6 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/process.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/inner/process.rs | |
| @@ -127,11 +127,23 @@ impl JxlDecoderInner { | |
| input, | |
| &self.options, | |
| buffers, | |
| + false, | |
| )) | |
| } | |
| /// Draws all the pixels we have data for. | |
| - pub fn flush_pixels(&mut self, _buffers: &mut [JxlOutputBuffer]) -> Result<()> { | |
| - todo!() | |
| + pub fn flush_pixels(&mut self, buffers: &mut [JxlOutputBuffer]) -> Result<()> { | |
| + let mut input: &[u8] = &[]; | |
| + match self.codestream_parser.process( | |
| + &mut self.box_parser, | |
| + &mut input, | |
| + &self.options, | |
| + Some(buffers), | |
| + true, | |
| + ) { | |
| + Ok(()) => Ok(()), | |
| + Err(crate::error::Error::OutOfBounds(_)) => Ok(()), | |
| + Err(e) => Err(e), | |
| + } | |
| } | |
| } | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/mod.rs | |
| index 5be3ef129622e..18c4b430f8dab 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/mod.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/mod.rs | |
| @@ -12,6 +12,7 @@ mod inner; | |
| mod input; | |
| mod options; | |
| mod signature; | |
| +mod xyb_constants; | |
| pub use crate::image::JxlOutputBuffer; | |
| pub use color::*; | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/options.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/options.rs | |
| index 2bff60cda75ba..327a4456dbe70 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/options.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/options.rs | |
| @@ -22,7 +22,6 @@ pub struct JxlDecoderOptions { | |
| pub desired_intensity_target: Option<f32>, | |
| pub skip_preview: bool, | |
| pub progressive_mode: JxlProgressiveMode, | |
| - pub enable_output: bool, | |
| pub cms: Option<Box<dyn JxlCms>>, | |
| /// Fail decoding images with more than this number of pixels, or with frames with | |
| /// more than this number of pixels. The limit counts the product of pixels and | |
| @@ -40,6 +39,11 @@ pub struct JxlDecoderOptions { | |
| /// This produces premultiplied alpha output, which is useful for compositing. | |
| /// Default: false (output straight alpha) | |
| pub premultiply_output: bool, | |
| + /// If true, only parse frame headers/TOC and skip section decoding. | |
| + /// | |
| + /// This is useful for collecting [`VisibleFrameInfo`](crate::api::VisibleFrameInfo) | |
| + /// via the regular decoder API without producing pixels. | |
| + pub scan_frames_only: bool, | |
| } | |
| impl Default for JxlDecoderOptions { | |
| @@ -51,11 +55,11 @@ impl Default for JxlDecoderOptions { | |
| skip_preview: true, | |
| desired_intensity_target: None, | |
| progressive_mode: JxlProgressiveMode::Pass, | |
| - enable_output: true, | |
| cms: None, | |
| pixel_limit: None, | |
| high_precision: false, | |
| premultiply_output: false, | |
| + scan_frames_only: false, | |
| } | |
| } | |
| } | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/xyb_constants.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/xyb_constants.rs | |
| new file mode 100644 | |
| index 0000000000000..eb9356b228a56 | |
| --- /dev/null | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/xyb_constants.rs | |
| @@ -0,0 +1,86 @@ | |
| +// Copyright (c) the JPEG XL Project Authors. All rights reserved. | |
| +// | |
| +// Use of this source code is governed by a BSD-style | |
| +// license that can be found in the LICENSE file. | |
| + | |
| +//! XYB color space constants (matching libjxl) | |
| +//! | |
| +//! Allow excessive precision as these constants are copied verbatim from libjxl for compatibility | |
| + | |
| +#![allow(clippy::excessive_precision)] | |
| + | |
| +pub const OPSIN_ABSORBANCE_BIAS: f32 = 0.0037930732552754493; | |
| + | |
| +#[allow(dead_code)] | |
| +pub const NEG_OPSIN_ABSORBANCE_BIAS_RGB: [f32; 3] = [ | |
| + -OPSIN_ABSORBANCE_BIAS, | |
| + -OPSIN_ABSORBANCE_BIAS, | |
| + -OPSIN_ABSORBANCE_BIAS, | |
| +]; | |
| + | |
| +const SCALED_XYB_OFFSET: [f32; 3] = [0.015386134, 0.0, 0.27770459]; | |
| +const SCALED_XYB_SCALE: [f32; 3] = [22.995788804, 1.183000077, 1.502141333]; | |
| + | |
| +const fn reciprocal_sum(r1: f32, r2: f32) -> f32 { | |
| + (r1 * r2) / (r1 + r2) | |
| +} | |
| + | |
| +pub const XYB_OFFSET: [f32; 3] = [ | |
| + SCALED_XYB_OFFSET[0] + SCALED_XYB_OFFSET[1], | |
| + SCALED_XYB_OFFSET[1] - SCALED_XYB_OFFSET[0] + (1.0 / SCALED_XYB_SCALE[0]), | |
| + SCALED_XYB_OFFSET[1] + SCALED_XYB_OFFSET[2], | |
| +]; | |
| + | |
| +pub const fn xyb_scale() -> [f32; 3] { | |
| + [ | |
| + reciprocal_sum(SCALED_XYB_SCALE[0], SCALED_XYB_SCALE[1]), | |
| + reciprocal_sum(SCALED_XYB_SCALE[0], SCALED_XYB_SCALE[1]), | |
| + reciprocal_sum(SCALED_XYB_SCALE[1], SCALED_XYB_SCALE[2]), | |
| + ] | |
| +} | |
| + | |
| +const fn xyb_corner(x: usize, y: usize, b: usize, idx: usize) -> f32 { | |
| + let val = match idx { | |
| + 0 => x, | |
| + 1 => y, | |
| + _ => b, | |
| + }; | |
| + (val as f32 / SCALED_XYB_SCALE[idx]) - SCALED_XYB_OFFSET[idx] | |
| +} | |
| + | |
| +const fn scaled_a2b_corner(x: usize, y: usize, b: usize, idx: usize) -> f32 { | |
| + match idx { | |
| + 0 => xyb_corner(x, y, b, 1) + xyb_corner(x, y, b, 0), | |
| + 1 => xyb_corner(x, y, b, 1) - xyb_corner(x, y, b, 0), | |
| + _ => xyb_corner(x, y, b, 2) + xyb_corner(x, y, b, 1), | |
| + } | |
| +} | |
| + | |
| +const fn unscaled_a2b_corner(x: usize, y: usize, b: usize) -> [f32; 3] { | |
| + let scale = xyb_scale(); | |
| + [ | |
| + (scaled_a2b_corner(x, y, b, 0) + XYB_OFFSET[0]) * scale[0], | |
| + (scaled_a2b_corner(x, y, b, 1) + XYB_OFFSET[1]) * scale[1], | |
| + (scaled_a2b_corner(x, y, b, 2) + XYB_OFFSET[2]) * scale[2], | |
| + ] | |
| +} | |
| + | |
| +/// Compute the 2x2x2 CLUT cube for XYB to linear RGB conversion. | |
| +pub const fn unscaled_a2b_cube_full() -> [[[[f32; 3]; 2]; 2]; 2] { | |
| + [ | |
| + [ | |
| + [unscaled_a2b_corner(0, 0, 0), unscaled_a2b_corner(0, 0, 1)], | |
| + [unscaled_a2b_corner(0, 1, 0), unscaled_a2b_corner(0, 1, 1)], | |
| + ], | |
| + [ | |
| + [unscaled_a2b_corner(1, 0, 0), unscaled_a2b_corner(1, 0, 1)], | |
| + [unscaled_a2b_corner(1, 1, 0), unscaled_a2b_corner(1, 1, 1)], | |
| + ], | |
| + ] | |
| +} | |
| + | |
| +/// Matrix for XYB ICC profile (from libjxl). | |
| +pub const XYB_ICC_MATRIX: [f64; 9] = [ | |
| + 1.5170095, -1.1065225, 0.071623, -0.050022, 0.5683655, -0.018344, -1.387676, 1.1145555, | |
| + 0.6857255, | |
| +]; | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/frame_index.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/frame_index.rs | |
| new file mode 100644 | |
| index 0000000000000..8a81b02864676 | |
| --- /dev/null | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/frame_index.rs | |
| @@ -0,0 +1,236 @@ | |
| +// Copyright (c) the JPEG XL Project Authors. All rights reserved. | |
| +// | |
| +// Use of this source code is governed by a BSD-style | |
| +// license that can be found in the LICENSE file. | |
| + | |
| +//! Parser for the JPEG XL Frame Index box (`jxli`), as specified in | |
| +//! the JPEG XL container specification. | |
| +//! | |
| +//! The frame index box provides a seek table for animated JXL files, | |
| +//! listing keyframe byte offsets in the codestream, timestamps, and | |
| +//! frame counts. | |
| + | |
| +use std::num::NonZero; | |
| + | |
| +use byteorder::{BigEndian, ReadBytesExt}; | |
| + | |
| +use crate::error::{Error, Result}; | |
| +use crate::icc::read_varint_from_reader; | |
| +use crate::util::NewWithCapacity; | |
| + | |
| +/// A single entry in the frame index. | |
| +#[derive(Debug, Clone, PartialEq, Eq)] | |
| +pub struct FrameIndexEntry { | |
| + /// Absolute byte offset of this keyframe in the codestream. | |
| + /// (Accumulated from the delta-coded OFFi values.) | |
| + pub codestream_offset: u64, | |
| + /// Duration in ticks from this indexed frame to the next indexed frame | |
| + /// (or end of stream for the last entry). A tick lasts TNUM/TDEN seconds. | |
| + pub duration_ticks: u64, | |
| + /// Number of displayed frames from this indexed frame to the next indexed | |
| + /// frame (or end of stream for the last entry). | |
| + pub frame_count: u64, | |
| +} | |
| + | |
| +/// Parsed contents of a Frame Index box (`jxli`). | |
| +#[derive(Debug, Clone, PartialEq, Eq)] | |
| +pub struct FrameIndexBox { | |
| + /// Tick numerator. A tick lasts `tnum / tden` seconds. | |
| + pub tnum: u32, | |
| + /// Tick denominator (non-zero per spec). | |
| + pub tden: NonZero<u32>, | |
| + /// Indexed frame entries. | |
| + pub entries: Vec<FrameIndexEntry>, | |
| +} | |
| + | |
| +impl FrameIndexBox { | |
| + /// Returns the number of indexed frames. | |
| + pub fn num_frames(&self) -> usize { | |
| + self.entries.len() | |
| + } | |
| + | |
| + /// Returns the duration of one tick in seconds. | |
| + pub fn tick_duration_secs(&self) -> f64 { | |
| + self.tnum as f64 / self.tden.get() as f64 | |
| + } | |
| + | |
| + /// Finds the index entry for the keyframe at or before the given | |
| + /// codestream byte offset. | |
| + pub fn entry_for_offset(&self, offset: u64) -> Option<&FrameIndexEntry> { | |
| + // Entries are sorted by codestream_offset (monotonically increasing). | |
| + match self | |
| + .entries | |
| + .binary_search_by_key(&offset, |e| e.codestream_offset) | |
| + { | |
| + Ok(i) => Some(&self.entries[i]), | |
| + Err(0) => None, | |
| + Err(i) => Some(&self.entries[i - 1]), | |
| + } | |
| + } | |
| + | |
| + /// Parse a frame index box from its raw content bytes (after the box header). | |
| + pub fn parse(data: &[u8]) -> Result<Self> { | |
| + let mut reader = data; | |
| + | |
| + let nf = read_varint_from_reader(&mut reader)?; | |
| + if nf > u32::MAX as u64 { | |
| + return Err(Error::InvalidBox); | |
| + } | |
| + let nf = nf as usize; | |
| + | |
| + let tnum = reader | |
| + .read_u32::<BigEndian>() | |
| + .map_err(|_| Error::InvalidBox)?; | |
| + let tden = NonZero::new( | |
| + reader | |
| + .read_u32::<BigEndian>() | |
| + .map_err(|_| Error::InvalidBox)?, | |
| + ) | |
| + .ok_or(Error::InvalidBox)?; | |
| + | |
| + // Each entry requires at least 3 bytes (three varints, min 1 byte each). | |
| + // Cap the pre-allocation to avoid OOM from a crafted NF value. | |
| + // Use new_with_capacity to return Err on allocation failure instead of aborting. | |
| + let mut entries = Vec::new_with_capacity(nf.min(reader.len() / 3))?; | |
| + let mut absolute_offset: u64 = 0; | |
| + | |
| + for _ in 0..nf { | |
| + let off_delta = read_varint_from_reader(&mut reader)?; | |
| + let duration_ticks = read_varint_from_reader(&mut reader)?; | |
| + let frame_count = read_varint_from_reader(&mut reader)?; | |
| + | |
| + absolute_offset = absolute_offset | |
| + .checked_add(off_delta) | |
| + .ok_or(Error::InvalidBox)?; | |
| + | |
| + entries.push(FrameIndexEntry { | |
| + codestream_offset: absolute_offset, | |
| + duration_ticks, | |
| + frame_count, | |
| + }); | |
| + } | |
| + | |
| + Ok(FrameIndexBox { | |
| + tnum, | |
| + tden, | |
| + entries, | |
| + }) | |
| + } | |
| +} | |
| + | |
| +#[cfg(test)] | |
| +mod tests { | |
| + use super::*; | |
| + use crate::util::test::{build_frame_index_content, encode_varint}; | |
| + | |
| + fn build_frame_index(tnum: u32, tden: u32, entries: &[(u64, u64, u64)]) -> Vec<u8> { | |
| + build_frame_index_content(tnum, tden, entries) | |
| + } | |
| + | |
| + #[test] | |
| + fn test_parse_empty_index() { | |
| + let data = build_frame_index(1, 1000, &[]); | |
| + let index = FrameIndexBox::parse(&data).unwrap(); | |
| + assert_eq!(index.num_frames(), 0); | |
| + assert_eq!(index.tnum, 1); | |
| + assert_eq!(index.tden.get(), 1000); | |
| + } | |
| + | |
| + #[test] | |
| + fn test_parse_single_entry() { | |
| + // One frame at offset 0, duration 100 ticks, 1 frame | |
| + let data = build_frame_index(1, 1000, &[(0, 100, 1)]); | |
| + let index = FrameIndexBox::parse(&data).unwrap(); | |
| + assert_eq!(index.num_frames(), 1); | |
| + assert_eq!( | |
| + index.entries[0], | |
| + FrameIndexEntry { | |
| + codestream_offset: 0, | |
| + duration_ticks: 100, | |
| + frame_count: 1, | |
| + } | |
| + ); | |
| + } | |
| + | |
| + #[test] | |
| + fn test_parse_multiple_entries_delta_coding() { | |
| + // Three frames with delta-coded offsets: | |
| + // OFF0=100 (absolute: 100), T0=50, F0=2 | |
| + // OFF1=200 (absolute: 300), T1=50, F1=2 | |
| + // OFF2=150 (absolute: 450), T2=30, F2=1 | |
| + let data = build_frame_index(1, 1000, &[(100, 50, 2), (200, 50, 2), (150, 30, 1)]); | |
| + let index = FrameIndexBox::parse(&data).unwrap(); | |
| + assert_eq!(index.num_frames(), 3); | |
| + assert_eq!(index.entries[0].codestream_offset, 100); | |
| + assert_eq!(index.entries[1].codestream_offset, 300); | |
| + assert_eq!(index.entries[2].codestream_offset, 450); | |
| + assert_eq!(index.entries[0].duration_ticks, 50); | |
| + assert_eq!(index.entries[1].duration_ticks, 50); | |
| + assert_eq!(index.entries[2].duration_ticks, 30); | |
| + } | |
| + | |
| + #[test] | |
| + fn test_parse_large_varint() { | |
| + // Test with a value that requires multiple varint bytes | |
| + let mut data = Vec::new(); | |
| + data.extend(encode_varint(1)); // NF = 1 | |
| + data.extend(1u32.to_be_bytes()); // TNUM | |
| + data.extend(1000u32.to_be_bytes()); // TDEN | |
| + data.extend(encode_varint(0x1234_5678_9ABC)); // large offset | |
| + data.extend(encode_varint(42)); | |
| + data.extend(encode_varint(1)); | |
| + let index = FrameIndexBox::parse(&data).unwrap(); | |
| + assert_eq!(index.entries[0].codestream_offset, 0x1234_5678_9ABC); | |
| + } | |
| + | |
| + #[test] | |
| + fn test_entry_for_offset() { | |
| + let data = build_frame_index(1, 1000, &[(100, 50, 2), (200, 50, 2), (150, 30, 1)]); | |
| + let index = FrameIndexBox::parse(&data).unwrap(); | |
| + // Absolute offsets: 100, 300, 450 | |
| + | |
| + // Before first entry | |
| + assert!(index.entry_for_offset(50).is_none()); | |
| + // Exact match | |
| + assert_eq!(index.entry_for_offset(100).unwrap().codestream_offset, 100); | |
| + // Between entries | |
| + assert_eq!(index.entry_for_offset(200).unwrap().codestream_offset, 100); | |
| + assert_eq!(index.entry_for_offset(350).unwrap().codestream_offset, 300); | |
| + // Exact match on last | |
| + assert_eq!(index.entry_for_offset(450).unwrap().codestream_offset, 450); | |
| + // Past last | |
| + assert_eq!(index.entry_for_offset(999).unwrap().codestream_offset, 450); | |
| + } | |
| + | |
| + #[test] | |
| + fn test_zero_tden_rejected() { | |
| + let data = build_frame_index(1, 0, &[]); | |
| + assert!(FrameIndexBox::parse(&data).is_err()); | |
| + } | |
| + | |
| + #[test] | |
| + fn test_truncated_data() { | |
| + // Just NF=1, no TNUM/TDEN | |
| + let data = encode_varint(1); | |
| + assert!(FrameIndexBox::parse(&data).is_err()); | |
| + } | |
| + | |
| + #[test] | |
| + fn test_huge_nf_no_oom() { | |
| + // Crafted input: NF claims billions of entries but the data is tiny. | |
| + // This must not OOM -- Vec::with_capacity should be bounded by data length. | |
| + let mut data = Vec::new(); | |
| + data.extend(encode_varint(u32::MAX as u64)); // NF = 4 billion | |
| + data.extend(1u32.to_be_bytes()); // TNUM | |
| + data.extend(1000u32.to_be_bytes()); // TDEN | |
| + // No actual entry data -- parse should fail gracefully, not OOM. | |
| + assert!(FrameIndexBox::parse(&data).is_err()); | |
| + } | |
| + | |
| + #[test] | |
| + fn test_tick_duration() { | |
| + let data = build_frame_index(1, 1000, &[]); | |
| + let index = FrameIndexBox::parse(&data).unwrap(); | |
| + assert!((index.tick_duration_secs() - 0.001).abs() < 1e-9); | |
| + } | |
| +} | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/mod.rs | |
| index c6e9e505076b6..65f91b5e57ae0 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/mod.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/mod.rs | |
| @@ -6,6 +6,7 @@ | |
| // Originally written for jxl-oxide. | |
| pub mod box_header; | |
| +pub mod frame_index; | |
| pub mod parse; | |
| use box_header::*; | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/ans.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/ans.rs | |
| index c6b95d1a8ef1f..9da5cbee0388f 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/ans.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/ans.rs | |
| @@ -15,6 +15,11 @@ const RLE_MARKER_SYM: u16 = LOG_SUM_PROBS as u16 + 1; | |
| #[derive(Debug)] | |
| struct AnsHistogram { | |
| + // Safety invariant: | |
| + // - log_bucket_size <= LOG_SUM_PROBS | |
| + // - buckets.len() = 2^(LOG_SUM_PROBS - log_bucket_size) | |
| + // This relationship ensures that for any ANS state (12 bits), the bucket index | |
| + // computed as (state & 0xfff) >> log_bucket_size is always < buckets.len() | |
| buckets: Vec<Bucket>, | |
| log_bucket_size: usize, | |
| bucket_mask: u32, | |
| @@ -265,7 +270,7 @@ impl AnsHistogram { | |
| debug_assert!((5..=8).contains(&log_alpha_size)); | |
| let table_size = (1u16 << log_alpha_size) as usize; | |
| // 4 <= log_bucket_size <= 7 | |
| - let log_bucket_size = LOG_SUM_PROBS - log_alpha_size; | |
| + let log_bucket_size = LOG_SUM_PROBS.checked_sub(log_alpha_size).unwrap(); | |
| let bucket_size = 1u16 << log_bucket_size; | |
| let bucket_mask = bucket_size as u32 - 1; | |
| @@ -281,10 +286,9 @@ impl AnsHistogram { | |
| } else { | |
| Self::decode_dist_complex(br, &mut dist)? | |
| }; | |
| - | |
| - if let Some(single_sym_idx) = dist.iter().position(|&d| d == SUM_PROBS) { | |
| - let buckets = dist | |
| - .into_iter() | |
| + let single_symbol = dist.iter().position(|&d| d == SUM_PROBS).map(|x| x as u32); | |
| + let buckets = if let Some(single_sym_idx) = single_symbol { | |
| + dist.into_iter() | |
| .enumerate() | |
| .map(|(i, dist)| Bucket { | |
| dist, | |
| @@ -293,20 +297,19 @@ impl AnsHistogram { | |
| alias_cutoff: 0, | |
| alias_dist_xor: dist ^ SUM_PROBS, | |
| }) | |
| - .collect(); | |
| - return Ok(Self { | |
| - buckets, | |
| - log_bucket_size, | |
| - bucket_mask, | |
| - single_symbol: Some(single_sym_idx as u32), | |
| - }); | |
| - } | |
| + .collect() | |
| + } else { | |
| + Self::build_alias_map(alphabet_size, log_bucket_size, &dist) | |
| + }; | |
| + assert_eq!(buckets.len(), 1 << (LOG_SUM_PROBS - log_bucket_size)); | |
| + // Safety note: log_bucket_size <= LOG_SUM_PROBS by construction, and we | |
| + // just checked that buckets.len() = 2^(LOG_SUM_PROBS - log_bucket_size) | |
| Ok(Self { | |
| - buckets: Self::build_alias_map(alphabet_size, log_bucket_size, &dist), | |
| + buckets, | |
| log_bucket_size, | |
| bucket_mask, | |
| - single_symbol: None, | |
| + single_symbol, | |
| }) | |
| } | |
| @@ -356,7 +359,19 @@ impl AnsHistogram { | |
| let pos = idx & self.bucket_mask; | |
| debug_assert!(self.buckets.len().is_power_of_two()); | |
| - let bucket = self.buckets[i & (self.buckets.len() - 1)]; | |
| + debug_assert!( | |
| + i < self.buckets.len(), | |
| + "bucket index {} out of bounds (len = {})", | |
| + i, | |
| + self.buckets.len() | |
| + ); | |
| + // SAFETY: The struct-level safety invariant (see AnsHistogram::buckets) ensures that | |
| + // buckets.len() = 2^(LOG_SUM_PROBS - log_bucket_size). Since idx = state & 0xfff | |
| + // (12 bits) and i = idx >> log_bucket_size, we have i < buckets.len() always. | |
| + #[allow(unsafe_code)] | |
| + let bucket = unsafe { *self.buckets.get_unchecked(i) }; | |
| + // Safe version: (~3% slower for e2 lossless decoding) | |
| + // let bucket = self.buckets[i & (self.buckets.len() - 1)]; | |
| let alias_symbol = bucket.alias_symbol as u32; | |
| let alias_cutoff = bucket.alias_cutoff as u32; | |
| let dist = bucket.dist as u32; | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/decode.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/decode.rs | |
| index dd23c6247e226..e57a3211aafea 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/decode.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/decode.rs | |
| @@ -12,6 +12,7 @@ use crate::entropy_coding::huffman::*; | |
| use crate::entropy_coding::hybrid_uint::*; | |
| use crate::error::{Error, Result}; | |
| use crate::headers::encodings::*; | |
| +use crate::util::NewWithCapacity; | |
| use crate::util::tracing_wrappers::*; | |
| pub fn decode_varint16(br: &mut BitReader) -> Result<u16> { | |
| @@ -259,7 +260,7 @@ impl SymbolReader { | |
| min_symbol, | |
| min_length, | |
| dist_multiplier, | |
| - window: Vec::new(), | |
| + window: Vec::new_with_capacity(1 << Lz77State::LOG_WINDOW_SIZE)?, | |
| num_to_copy: 0, | |
| copy_pos: 0, | |
| num_decoded: 0, | |
| @@ -278,30 +279,50 @@ impl SymbolReader { | |
| } | |
| impl SymbolReader { | |
| - #[inline] | |
| - pub fn read_unsigned( | |
| + #[inline(always)] | |
| + pub fn read_unsigned_inline( | |
| &mut self, | |
| histograms: &Histograms, | |
| br: &mut BitReader, | |
| context: usize, | |
| ) -> u32 { | |
| let cluster = histograms.map_context_to_cluster(context); | |
| - self.read_unsigned_clustered(histograms, br, cluster) | |
| + self.read_unsigned_clustered_inline(histograms, br, cluster) | |
| + } | |
| + | |
| + #[inline(never)] | |
| + pub fn read_unsigned( | |
| + &mut self, | |
| + histograms: &Histograms, | |
| + br: &mut BitReader, | |
| + context: usize, | |
| + ) -> u32 { | |
| + self.read_unsigned_inline(histograms, br, context) | |
| } | |
| #[inline(always)] | |
| - pub fn read_signed( | |
| + pub fn read_signed_inline( | |
| &mut self, | |
| histograms: &Histograms, | |
| br: &mut BitReader, | |
| context: usize, | |
| ) -> i32 { | |
| - let unsigned = self.read_unsigned(histograms, br, context); | |
| + let unsigned = self.read_unsigned_inline(histograms, br, context); | |
| unpack_signed(unsigned) | |
| } | |
| - #[inline] | |
| - pub fn read_unsigned_clustered( | |
| + #[inline(never)] | |
| + pub fn read_signed( | |
| + &mut self, | |
| + histograms: &Histograms, | |
| + br: &mut BitReader, | |
| + context: usize, | |
| + ) -> i32 { | |
| + self.read_signed_inline(histograms, br, context) | |
| + } | |
| + | |
| + #[inline(always)] | |
| + pub fn read_unsigned_clustered_inline( | |
| &mut self, | |
| histograms: &Histograms, | |
| br: &mut BitReader, | |
| @@ -382,14 +403,69 @@ impl SymbolReader { | |
| } | |
| } | |
| + #[inline(never)] | |
| + pub fn read_unsigned_clustered( | |
| + &mut self, | |
| + histograms: &Histograms, | |
| + br: &mut BitReader, | |
| + cluster: usize, | |
| + ) -> u32 { | |
| + self.read_unsigned_clustered_inline(histograms, br, cluster) | |
| + } | |
| + | |
| #[inline(always)] | |
| + pub fn read_signed_clustered_inline( | |
| + &mut self, | |
| + histograms: &Histograms, | |
| + br: &mut BitReader, | |
| + cluster: usize, | |
| + ) -> i32 { | |
| + let unsigned = self.read_unsigned_clustered_inline(histograms, br, cluster); | |
| + unpack_signed(unsigned) | |
| + } | |
| + | |
| + #[inline(never)] | |
| pub fn read_signed_clustered( | |
| &mut self, | |
| histograms: &Histograms, | |
| br: &mut BitReader, | |
| cluster: usize, | |
| ) -> i32 { | |
| - let unsigned = self.read_unsigned_clustered(histograms, br, cluster); | |
| + self.read_signed_clustered_inline(histograms, br, cluster) | |
| + } | |
| + | |
| + /// Specialized fast path for when all HybridUint configs are 420. | |
| + /// | |
| + /// # Preconditions | |
| + /// - `histograms.can_use_config_420_fast_path()` must be true (no LZ77, all configs are 420) | |
| + /// - This assumes `SymbolReaderState::None` (verified by debug_assert) | |
| + #[inline(always)] | |
| + pub fn read_unsigned_clustered_config_420( | |
| + &mut self, | |
| + histograms: &Histograms, | |
| + br: &mut BitReader, | |
| + cluster: usize, | |
| + ) -> u32 { | |
| + debug_assert!(matches!(self.state, SymbolReaderState::None)); | |
| + debug_assert!(histograms.can_use_config_420_fast_path()); | |
| + | |
| + let token = match &histograms.codes { | |
| + Codes::Huffman(hc) => hc.read(br, cluster), | |
| + Codes::Ans(ans) => self.ans_reader.read(ans, br, cluster), | |
| + }; | |
| + HybridUint::read_config_420(token, br) | |
| + } | |
| + | |
| + /// Specialized fast path for signed reads when all configs are 420. | |
| + /// See [`read_unsigned_clustered_config_420`] for preconditions. | |
| + #[inline(always)] | |
| + pub fn read_signed_clustered_config_420( | |
| + &mut self, | |
| + histograms: &Histograms, | |
| + br: &mut BitReader, | |
| + cluster: usize, | |
| + ) -> i32 { | |
| + let unsigned = self.read_unsigned_clustered_config_420(histograms, br, cluster); | |
| unpack_signed(unsigned) | |
| } | |
| @@ -553,6 +629,17 @@ impl Histograms { | |
| pub fn num_histograms(&self) -> usize { | |
| *self.context_map.iter().max().unwrap() as usize + 1 | |
| } | |
| + | |
| + pub fn resize(&mut self, num_contexts: usize) { | |
| + self.context_map.resize(num_contexts, 0); | |
| + } | |
| + | |
| + /// Returns true if the config 420 fast path can be safely used. | |
| + /// Config 420: split_exponent=4, msb_in_token=2, lsb_in_token=0 (common pattern) | |
| + /// Requires: all configs are 420 AND LZ77 is disabled | |
| + pub fn can_use_config_420_fast_path(&self) -> bool { | |
| + !self.lz77_params.enabled && self.uint_configs.iter().all(|cfg| cfg.is_config_420()) | |
| + } | |
| } | |
| #[cfg(test)] | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/hybrid_uint.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/hybrid_uint.rs | |
| index fc6e7f6db4fba..447bca94abefa 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/hybrid_uint.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/hybrid_uint.rs | |
| @@ -53,6 +53,33 @@ impl HybridUint { | |
| }) | |
| } | |
| + /// Returns true if this config matches the 420 pattern (common in e3 images): | |
| + /// split_exponent=4, msb_in_token=2, lsb_in_token=0 | |
| + #[inline(always)] | |
| + pub fn is_config_420(&self) -> bool { | |
| + self.split_exponent == 4 | |
| + && self.split_token == 16 | |
| + && self.msb_in_token == 2 | |
| + && self.lsb_in_token == 0 | |
| + } | |
| + | |
| + /// Specialized fast path for 420 config: | |
| + /// split_exponent=4, msb_in_token=2, lsb_in_token=0 | |
| + #[inline(always)] | |
| + pub fn read_config_420(symbol: u32, br: &mut BitReader) -> u32 { | |
| + if symbol < 16 { | |
| + return symbol; | |
| + } | |
| + | |
| + // Equivalent to: 2 + ((symbol - 16) >> 2) | |
| + let nbits = (symbol >> 2) - 2; | |
| + let nbits = nbits & 31; | |
| + let bits = br.read_optimistic(nbits as usize) as u32; | |
| + let hi = (symbol & 3) | 4; | |
| + | |
| + (hi << nbits) | bits | |
| + } | |
| + | |
| #[inline] | |
| pub fn read(&self, symbol: u32, br: &mut BitReader) -> u32 { | |
| if symbol < self.split_token { | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/error.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/error.rs | |
| index c21679bf8c844..a8e299c70f73e 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/error.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/error.rs | |
| @@ -133,16 +133,12 @@ pub enum Error { | |
| // Generic arithmetic overflow. Prefer using other errors if possible. | |
| #[error("Arithmetic overflow")] | |
| ArithmeticOverflow, | |
| - #[error("Empty frame sequence")] | |
| - NoFrames, | |
| #[error( | |
| "Pipeline channel type mismatch: stage {0} channel {1}, expected {2:?} but found {3:?}" | |
| )] | |
| PipelineChannelTypeMismatch(String, usize, DataTypeTag, DataTypeTag), | |
| #[error("Invalid stage {0} after extend stage")] | |
| PipelineInvalidStageAfterExtend(String), | |
| - #[error("Channel {0} was not used in the render pipeline")] | |
| - PipelineChannelUnused(usize), | |
| #[error("Trying to copy rects of different size, src: {0}x{1} dst {2}x{3}")] | |
| CopyOfDifferentSize(usize, usize, usize, usize), | |
| #[error("LF quantization factor is too small: {0}")] | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/epf.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/epf.rs | |
| index c28bf701bd2b2..55dcedc8153ad 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/epf.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/epf.rs | |
| @@ -25,6 +25,15 @@ pub enum SigmaSource { | |
| Constant(f32), | |
| } | |
| +#[allow(clippy::excessive_precision)] | |
| +const INV_SIGMA_NUM: f32 = -1.1715728752538099024; | |
| + | |
| +impl Default for SigmaSource { | |
| + fn default() -> Self { | |
| + Self::Constant(INV_SIGMA_NUM / 2.0) | |
| + } | |
| +} | |
| + | |
| impl SigmaSource { | |
| pub fn new( | |
| frame_header: &FrameHeader, | |
| @@ -32,9 +41,6 @@ impl SigmaSource { | |
| hf_meta: &Option<HfMetadata>, | |
| ) -> Result<Self> { | |
| let rf = &frame_header.restoration_filter; | |
| - #[allow(clippy::excessive_precision)] | |
| - const INV_SIGMA_NUM: f32 = -1.1715728752538099024; | |
| - | |
| if frame_header.encoding == Encoding::VarDCT { | |
| let size_blocks = frame_header.size_blocks(); | |
| let sigma_xsize = size_blocks.0; | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/patches.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/patches.rs | |
| index b73a2d7306ec2..d981ef3d96bbd 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/patches.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/patches.rs | |
| @@ -172,6 +172,13 @@ pub struct PatchesDictionary { | |
| } | |
| impl PatchesDictionary { | |
| + pub fn new(num_extra_channels: usize) -> Self { | |
| + Self { | |
| + blendings_stride: num_extra_channels + 1, | |
| + ..Default::default() | |
| + } | |
| + } | |
| + | |
| #[cfg(test)] | |
| pub fn random<R: rand::Rng>( | |
| size: (usize, usize), | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/spline.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/spline.rs | |
| index b4a19b95e49e4..3e947d6cf23a3 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/spline.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/features/spline.rs | |
| @@ -14,8 +14,9 @@ use crate::{ | |
| entropy_coding::decode::{Histograms, SymbolReader, unpack_signed}, | |
| error::{Error, Result}, | |
| frame::color_correlation_map::ColorCorrelationParams, | |
| - util::{CeilLog2, NewWithCapacity, fast_cos, fast_erff, tracing_wrappers::*}, | |
| + util::{CeilLog2, NewWithCapacity, fast_cos, fast_erff_simd, tracing_wrappers::*}, | |
| }; | |
| +use jxl_simd::{F32SimdVec, ScalarDescriptor, SimdDescriptor, simd_function}; | |
| const MAX_NUM_CONTROL_POINTS: u32 = 1 << 20; | |
| const MAX_NUM_CONTROL_POINTS_PER_PIXEL_RATIO: u32 = 2; | |
| const DELTA_LIMIT: i64 = 1 << 30; | |
| @@ -520,7 +521,111 @@ impl Dct32 { | |
| } | |
| } | |
| +#[inline(always)] | |
| +fn draw_segment_inner<D: SimdDescriptor>( | |
| + d: D, | |
| + row: &mut [&mut [f32]], | |
| + row_pos: (usize, usize), | |
| + x_range: (usize, usize), | |
| + segment: &SplineSegment, | |
| +) -> usize { | |
| + let (x_start, x_end) = x_range; | |
| + let (row_x0, y) = row_pos; | |
| + let len = D::F32Vec::LEN; | |
| + if x_start + len > x_end { | |
| + return x_start; | |
| + } | |
| + | |
| + let inv_sigma = D::F32Vec::splat(d, segment.inv_sigma); | |
| + let half = D::F32Vec::splat(d, 0.5); | |
| + let one_over_2s2 = D::F32Vec::splat(d, 0.353_553_38); | |
| + let sigma_over_4_times_intensity = D::F32Vec::splat(d, segment.sigma_over_4_times_intensity); | |
| + let center_x = D::F32Vec::splat(d, segment.center_x); | |
| + let center_y = D::F32Vec::splat(d, segment.center_y); | |
| + let dy = D::F32Vec::splat(d, y as f32) - center_y; | |
| + let dy2 = dy * dy; | |
| + | |
| + let mut x_base_arr = [0.0f32; 16]; | |
| + for (i, val) in x_base_arr.iter_mut().enumerate() { | |
| + *val = i as f32; | |
| + } | |
| + let vx_base = D::F32Vec::load(d, &x_base_arr); | |
| + | |
| + let start_offset = x_start - row_x0; | |
| + let end_offset = x_end - row_x0; | |
| + | |
| + let [r0, r1, r2] = row else { unreachable!() }; | |
| + | |
| + let mut it0 = r0[start_offset..end_offset].chunks_exact_mut(len); | |
| + let mut it1 = r1[start_offset..end_offset].chunks_exact_mut(len); | |
| + let mut it2 = r2[start_offset..end_offset].chunks_exact_mut(len); | |
| + | |
| + let cm0 = D::F32Vec::splat(d, segment.color[0]); | |
| + let cm1 = D::F32Vec::splat(d, segment.color[1]); | |
| + let cm2 = D::F32Vec::splat(d, segment.color[2]); | |
| + | |
| + let num_chunks = (end_offset - start_offset) / len; | |
| + let mut x = x_start; | |
| + for _ in 0..num_chunks { | |
| + let vx = D::F32Vec::splat(d, x as f32) + vx_base; | |
| + let dx = vx - center_x; | |
| + let sqd = dx.mul_add(dx, dy2); | |
| + let distance = sqd.sqrt(); | |
| + | |
| + let arg1 = distance.mul_add(half, one_over_2s2) * inv_sigma; | |
| + let arg2 = distance.mul_add(half, D::F32Vec::splat(d, -0.353_553_38)) * inv_sigma; | |
| + let one_dimensional_factor = fast_erff_simd(d, arg1) - fast_erff_simd(d, arg2); | |
| + let local_intensity = | |
| + sigma_over_4_times_intensity * one_dimensional_factor * one_dimensional_factor; | |
| + | |
| + let c0 = it0.next().unwrap(); | |
| + cm0.mul_add(local_intensity, D::F32Vec::load(d, c0)) | |
| + .store(c0); | |
| + let c1 = it1.next().unwrap(); | |
| + cm1.mul_add(local_intensity, D::F32Vec::load(d, c1)) | |
| + .store(c1); | |
| + let c2 = it2.next().unwrap(); | |
| + cm2.mul_add(local_intensity, D::F32Vec::load(d, c2)) | |
| + .store(c2); | |
| + | |
| + x += len; | |
| + } | |
| + x | |
| +} | |
| + | |
| +simd_function!( | |
| + draw_segment_dispatch, | |
| + d: D, | |
| + fn draw_segment_simd( | |
| + row: &mut [&mut [f32]], | |
| + row_pos: (usize, usize), | |
| + xsize: usize, | |
| + segment: &SplineSegment, | |
| + ) { | |
| + let (x0, y) = row_pos; | |
| + let x1 = x0 + xsize; | |
| + let clamped_x0 = x0.max((segment.center_x - segment.maximum_distance).round() as usize); | |
| + let clamped_x1 = x1.min((segment.center_x + segment.maximum_distance).round() as usize + 1); | |
| + | |
| + if clamped_x1 <= clamped_x0 { | |
| + return; | |
| + } | |
| + | |
| + let x = clamped_x0; | |
| + let x = draw_segment_inner(d, row, (x0, y), (x, clamped_x1), segment); | |
| + let d = d.maybe_downgrade_256bit(); | |
| + let x = draw_segment_inner(d, row, (x0, y), (x, clamped_x1), segment); | |
| + let d = d.maybe_downgrade_128bit(); | |
| + let x = draw_segment_inner(d, row, (x0, y), (x, clamped_x1), segment); | |
| + draw_segment_inner(ScalarDescriptor, row, (x0, y), (x, clamped_x1), segment); | |
| + } | |
| +); | |
| + | |
| impl Splines { | |
| + pub fn is_initialized(&self) -> bool { | |
| + !self.segment_y_start.is_empty() | |
| + } | |
| + | |
| #[cfg(test)] | |
| pub fn create( | |
| quantization_adjustment: i32, | |
| @@ -540,7 +645,7 @@ impl Splines { | |
| let first_segment_index_pos = self.segment_y_start[row_pos.1]; | |
| let last_segment_index_pos = self.segment_y_start[row_pos.1 + 1]; | |
| for segment_index_pos in first_segment_index_pos..last_segment_index_pos { | |
| - self.draw_segment( | |
| + draw_segment_dispatch( | |
| row, | |
| row_pos, | |
| xsize, | |
| @@ -548,48 +653,6 @@ impl Splines { | |
| ); | |
| } | |
| } | |
| - fn draw_segment( | |
| - &self, | |
| - row: &mut [&mut [f32]], | |
| - row_pos: (usize, usize), | |
| - xsize: usize, | |
| - segment: &SplineSegment, | |
| - ) { | |
| - let (x0, y) = row_pos; | |
| - let x1 = x0 + xsize; | |
| - let clamped_x0 = x0.max((segment.center_x - segment.maximum_distance).round() as usize); | |
| - // one-past-the-end | |
| - let clamped_x1 = x1.min((segment.center_x + segment.maximum_distance).round() as usize + 1); | |
| - for x in clamped_x0..clamped_x1 { | |
| - self.draw_segment_at(row, (x, y), x0, segment); | |
| - } | |
| - } | |
| - fn draw_segment_at( | |
| - &self, | |
| - row: &mut [&mut [f32]], | |
| - pixel_pos: (usize, usize), | |
| - row_x0: usize, | |
| - segment: &SplineSegment, | |
| - ) { | |
| - let (x, y) = pixel_pos; | |
| - let inv_sigma = segment.inv_sigma; | |
| - let half = 0.5f32; | |
| - let one_over_2s2 = 0.353_553_38_f32; | |
| - let sigma_over_4_times_intensity = segment.sigma_over_4_times_intensity; | |
| - let dx = x as f32 - segment.center_x; | |
| - let dy = y as f32 - segment.center_y; | |
| - let sqd = dx * dx + dy * dy; | |
| - let distance = sqd.sqrt(); | |
| - let one_dimensional_factor = fast_erff((distance * half + one_over_2s2) * inv_sigma) | |
| - - fast_erff((distance * half - one_over_2s2) * inv_sigma); | |
| - let local_intensity = | |
| - sigma_over_4_times_intensity * one_dimensional_factor * one_dimensional_factor; | |
| - for (channel_index, row) in row.iter_mut().enumerate() { | |
| - let cm = segment.color[channel_index]; | |
| - let inp = row[x - row_x0]; | |
| - row[x - row_x0] = cm * local_intensity + inp; | |
| - } | |
| - } | |
| fn add_segment( | |
| &mut self, | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/block_context_map.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/block_context_map.rs | |
| index c48e1e22ddc70..9051f59650354 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/block_context_map.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/block_context_map.rs | |
| @@ -12,7 +12,11 @@ use crate::{ | |
| }; | |
| pub const NON_ZERO_BUCKETS: usize = 37; | |
| + | |
| +// Supremum of zero_density_context(x, y) + 1, when x + y <= 64. | |
| pub const ZERO_DENSITY_CONTEXT_COUNT: usize = 458; | |
| +// Supremum of zero_density_context(x, y) + 1. | |
| +pub const ZERO_DENSITY_CONTEXT_LIMIT: usize = 474; | |
| pub const COEFF_FREQ_CONTEXT: [usize; 64] = [ | |
| 0xBAD, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, | |
| @@ -27,7 +31,7 @@ pub const COEFF_NUM_NONZERO_CONTEXT: [usize; 64] = [ | |
| 206, 206, 206, 206, 206, 206, | |
| ]; | |
| -#[inline] | |
| +#[inline(always)] | |
| pub fn zero_density_context( | |
| nonzeros_left: usize, | |
| k: usize, | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/decode.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/decode.rs | |
| index f2435eb5ddf48..f58b1044de7cd 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/decode.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/decode.rs | |
| @@ -3,6 +3,7 @@ | |
| // Use of this source code is governed by a BSD-style | |
| // license that can be found in the LICENSE file. | |
| +use std::collections::BTreeSet; | |
| use std::sync::Arc; | |
| use super::render::pipeline; | |
| @@ -16,9 +17,14 @@ use super::{ | |
| quantizer::{LfQuantFactors, QuantizerParams}, | |
| }; | |
| use crate::error::Error; | |
| +use crate::features::epf::SigmaSource; | |
| +use crate::frame::block_context_map::{ZERO_DENSITY_CONTEXT_COUNT, ZERO_DENSITY_CONTEXT_LIMIT}; | |
| +use crate::headers::frame_header::FrameType; | |
| #[cfg(test)] | |
| use crate::render::SimpleRenderPipeline; | |
| use crate::render::buffer_splitter::BufferSplitter; | |
| +use crate::util::AtomicRefCell; | |
| +use crate::util::{ShiftRightCeil, mirror}; | |
| use crate::{ | |
| GROUP_DIM, | |
| bit_reader::BitReader, | |
| @@ -39,6 +45,111 @@ use crate::{ | |
| }; | |
| use jxl_transforms::transform_map::*; | |
| +use crate::headers::CustomTransformData; | |
| +use crate::render::RenderPipelineInOutStage; | |
| +use crate::render::stages::Upsample8x; | |
| +use crate::render::{Channels, ChannelsMut}; | |
| + | |
| +fn upsample_lf_group( | |
| + group: usize, | |
| + pixels: &mut [Image<f32>; 3], | |
| + lf_image: &[Image<f32>; 3], | |
| + header: &FrameHeader, | |
| + factors: &CustomTransformData, | |
| +) -> Result<()> { | |
| + let group_dim = header.group_dim(); | |
| + let lf_group_dim = group_dim / 8; | |
| + let (width_groups, _) = header.size_groups(); | |
| + let gx = group % width_groups; | |
| + let gy = group / width_groups; | |
| + | |
| + let upsample = Upsample8x::new(factors, 0); | |
| + let mut state = upsample.init_local_state(0)?.unwrap(); | |
| + | |
| + let max_width = pixels.iter().map(|x| x.size().0).max().unwrap(); | |
| + | |
| + // Temporary buffer for 8 output rows | |
| + // We reuse this buffer for each iteration to minimize allocation | |
| + let mut temp_out_buf: [_; 8] = std::array::from_fn(|_| vec![0.0f32; max_width + 128]); | |
| + | |
| + let mut input_rows_storage: [_; 5] = std::array::from_fn(|_| vec![0.0; max_width / 8 + 32]); | |
| + | |
| + for c in 0..3 { | |
| + let lf_img = &lf_image[c]; | |
| + let out_img = &mut pixels[c]; | |
| + let (out_width, out_height) = out_img.size(); | |
| + | |
| + let vs = header.vshift(c); | |
| + let hs = header.hshift(c); | |
| + | |
| + let lf_group_dim_x = lf_group_dim >> hs; | |
| + let lf_group_dim_y = lf_group_dim >> vs; | |
| + let lf_x0 = gx * lf_group_dim_x; | |
| + let lf_y0 = gy * lf_group_dim_y; | |
| + | |
| + let lf_width = lf_img.size().0.shrc(hs); | |
| + let lf_height = lf_img.size().1.shrc(hs); | |
| + | |
| + let start_x = lf_x0.saturating_sub(2); | |
| + let lf_x1 = (lf_x0 + lf_group_dim_x).min(lf_width); | |
| + let end_x = (lf_x1 + 2).min(lf_width); | |
| + let copy_width = end_x - start_x; | |
| + | |
| + for y in 0..lf_group_dim_y { | |
| + let cy = lf_y0 + y; | |
| + | |
| + for dy in -2..=2 { | |
| + let iy = cy as isize + dy; | |
| + let iy = mirror(iy, lf_height); | |
| + | |
| + let storage = &mut input_rows_storage[(dy + 2) as usize]; | |
| + | |
| + let save_start = if start_x == lf_x0 { 2 } else { 0 }; | |
| + let save_end = save_start + copy_width; | |
| + | |
| + storage[save_start..save_end].copy_from_slice(&lf_img.row(iy)[start_x..end_x]); | |
| + | |
| + if start_x == lf_x0 { | |
| + storage[0] = storage[2 + mirror(-2, copy_width)]; | |
| + storage[1] = storage[2 + mirror(-1, copy_width)]; | |
| + } | |
| + if end_x == lf_x1 { | |
| + storage[save_end] = storage[save_start + mirror(save_end as isize, save_end)]; | |
| + storage[save_end + 1] = | |
| + storage[save_start + mirror(save_end as isize + 1, save_end)]; | |
| + } | |
| + } | |
| + | |
| + let input_rows_refs = input_rows_storage.iter().map(|x| &x[..]).collect(); | |
| + let input_channels = Channels::new(input_rows_refs, 1, 5); | |
| + | |
| + { | |
| + // Prepare output refs | |
| + let output_rows_refs = temp_out_buf.iter_mut().map(|x| &mut x[..]).collect(); | |
| + let mut output_channels = ChannelsMut::new(output_rows_refs, 1, 8); | |
| + | |
| + upsample.process_row_chunk( | |
| + (0, 0), | |
| + lf_x1 - lf_x0, | |
| + &input_channels, | |
| + &mut output_channels, | |
| + Some(state.as_mut()), | |
| + ); | |
| + } | |
| + | |
| + // Copy back to out_img | |
| + let base_y = y * 8; | |
| + for (i, buf) in temp_out_buf.iter().enumerate() { | |
| + let out_y = base_y + i; | |
| + if out_y < out_height { | |
| + out_img.row_mut(out_y)[..out_width].copy_from_slice(&buf[..out_width]); | |
| + } | |
| + } | |
| + } | |
| + } | |
| + Ok(()) | |
| +} | |
| + | |
| impl Frame { | |
| pub fn from_header_and_toc( | |
| frame_header: FrameHeader, | |
| @@ -51,6 +162,9 @@ impl Frame { | |
| } else { | |
| decoder_state.nonvisible_frame_index += 1; | |
| } | |
| + if frame_header.frame_type == FrameType::LFFrame && frame_header.lf_level == 1 { | |
| + decoder_state.lf_frame_was_rendered = false; | |
| + } | |
| let image_metadata = &decoder_state.file_header.image_metadata; | |
| let is_gray = !frame_header.do_ycbcr | |
| && !image_metadata.xyb_encoded | |
| @@ -124,9 +238,13 @@ impl Frame { | |
| None | |
| }; | |
| + let num_extra_channels = image_metadata.extra_channel_info.len(); | |
| + | |
| Ok(Self { | |
| #[cfg(test)] | |
| use_simple_pipeline: decoder_state.use_simple_pipeline, | |
| + last_rendered_pass: vec![None; frame_header.num_groups()], | |
| + incomplete_groups: frame_header.num_groups(), | |
| header: frame_header, | |
| color_channels, | |
| toc, | |
| @@ -139,10 +257,39 @@ impl Frame { | |
| render_pipeline: None, | |
| reference_frame_data, | |
| lf_frame_data, | |
| - lf_global_was_rendered: false, | |
| + was_flushed_once: false, | |
| vardct_buffers: None, | |
| + groups_to_flush: BTreeSet::new(), | |
| + changed_since_last_flush: BTreeSet::new(), | |
| + patches: Arc::new(AtomicRefCell::new(PatchesDictionary::new( | |
| + num_extra_channels, | |
| + ))), | |
| + splines: Arc::new(AtomicRefCell::new(Splines::default())), | |
| + noise: Arc::new(AtomicRefCell::new(Noise::default())), | |
| + lf_quant: Arc::new(AtomicRefCell::new(LfQuantFactors::default())), | |
| + color_correlation_params: Arc::new(AtomicRefCell::new( | |
| + ColorCorrelationParams::default(), | |
| + )), | |
| + epf_sigma: Arc::new(AtomicRefCell::new(SigmaSource::default())), | |
| }) | |
| } | |
| + | |
| + pub fn allow_rendering_before_last_pass(&self) -> bool { | |
| + if self | |
| + .lf_global | |
| + .as_ref() | |
| + .is_none_or(|x| !x.modular_global.can_do_partial_render()) | |
| + { | |
| + return false; | |
| + } | |
| + | |
| + self.header.frame_type == FrameType::RegularFrame | |
| + || (self.header.frame_type == FrameType::LFFrame | |
| + && self.header.lf_level == 1 | |
| + // TODO(veluca): this should probably be "there is no alpha". | |
| + && self.header.num_extra_channels == 0) | |
| + } | |
| + | |
| /// Given a bit reader pointing at the end of the TOC, returns a vector of `BitReader`s, each | |
| /// of which reads a specific section. | |
| pub fn sections<'a>(&self, br: &'a mut BitReader) -> Result<Vec<BitReader<'a>>> { | |
| @@ -166,97 +313,109 @@ impl Frame { | |
| } | |
| Ok(shuffled_ret) | |
| } | |
| + | |
| #[instrument(level = "debug", skip_all)] | |
| - pub fn decode_lf_global(&mut self, br: &mut BitReader) -> Result<()> { | |
| + pub fn decode_lf_global(&mut self, br: &mut BitReader, allow_partial: bool) -> Result<()> { | |
| debug!(section_size = br.total_bits_available()); | |
| - assert!(self.lf_global.is_none()); | |
| - trace!(pos = br.total_bits_read()); | |
| - let patches = if self.header.has_patches() { | |
| - info!("decoding patches"); | |
| - Some(PatchesDictionary::read( | |
| - br, | |
| - self.header.size_padded().0, | |
| - self.header.size_padded().1, | |
| - self.decoder_state.extra_channel_info().len(), | |
| - &self.decoder_state.reference_frames[..], | |
| - )?) | |
| + if let Some(lfg) = &self.lf_global { | |
| + br.skip_bits(lfg.total_bits_read)?; | |
| } else { | |
| - None | |
| - }; | |
| + trace!(pos = br.total_bits_read()); | |
| - let splines = if self.header.has_splines() { | |
| - info!("decoding splines"); | |
| - Some(Splines::read(br, self.header.width * self.header.height)?) | |
| - } else { | |
| - None | |
| - }; | |
| + if self.header.has_patches() { | |
| + info!("decoding patches"); | |
| + let p = PatchesDictionary::read( | |
| + br, | |
| + self.header.size_padded().0, | |
| + self.header.size_padded().1, | |
| + self.decoder_state.extra_channel_info().len(), | |
| + &self.decoder_state.reference_frames[..], | |
| + )?; | |
| + *self.patches.borrow_mut() = p; | |
| + } | |
| - let noise = if self.header.has_noise() { | |
| - info!("decoding noise"); | |
| - Some(Noise::read(br)?) | |
| - } else { | |
| - None | |
| - }; | |
| + if self.header.has_splines() { | |
| + info!("decoding splines"); | |
| + let s = Splines::read(br, self.header.width * self.header.height)?; | |
| + *self.splines.borrow_mut() = s; | |
| + } | |
| - let lf_quant = LfQuantFactors::new(br)?; | |
| - debug!(?lf_quant); | |
| + if self.header.has_noise() { | |
| + info!("decoding noise"); | |
| + let n = Noise::read(br)?; | |
| + *self.noise.borrow_mut() = n; | |
| + } | |
| - let quant_params = if self.header.encoding == Encoding::VarDCT { | |
| - info!("decoding VarDCT quantizer params"); | |
| - Some(QuantizerParams::read(br)?) | |
| - } else { | |
| - None | |
| - }; | |
| - debug!(?quant_params); | |
| + let lf_quant = LfQuantFactors::new(br)?; | |
| + *self.lf_quant.borrow_mut() = lf_quant.clone(); | |
| + debug!(?lf_quant); | |
| - let block_context_map = if self.header.encoding == Encoding::VarDCT { | |
| - info!("decoding block context map"); | |
| - Some(BlockContextMap::read(br)?) | |
| - } else { | |
| - None | |
| - }; | |
| - debug!(?block_context_map); | |
| + let quant_params = if self.header.encoding == Encoding::VarDCT { | |
| + info!("decoding VarDCT quantizer params"); | |
| + Some(QuantizerParams::read(br)?) | |
| + } else { | |
| + None | |
| + }; | |
| + debug!(?quant_params); | |
| - let color_correlation_params = if self.header.encoding == Encoding::VarDCT { | |
| - info!("decoding color correlation params"); | |
| - Some(ColorCorrelationParams::read(br)?) | |
| - } else { | |
| - None | |
| - }; | |
| - debug!(?color_correlation_params); | |
| - | |
| - let tree = if br.read(1)? == 1 { | |
| - let size_limit = (1024 | |
| - + self.header.width as usize | |
| - * self.header.height as usize | |
| - * (self.color_channels + self.decoder_state.extra_channel_info().len()) | |
| - / 16) | |
| - .min(1 << 22); | |
| - Some(Tree::read(br, size_limit)?) | |
| - } else { | |
| - None | |
| - }; | |
| + let block_context_map = if self.header.encoding == Encoding::VarDCT { | |
| + info!("decoding block context map"); | |
| + Some(BlockContextMap::read(br)?) | |
| + } else { | |
| + None | |
| + }; | |
| + debug!(?block_context_map); | |
| - let modular_global = FullModularImage::read( | |
| - &self.header, | |
| - &self.decoder_state.file_header.image_metadata, | |
| - self.modular_color_channels(), | |
| - &tree, | |
| - br, | |
| - )?; | |
| + let color_correlation_params = if self.header.encoding == Encoding::VarDCT { | |
| + info!("decoding color correlation params"); | |
| + let ccp = ColorCorrelationParams::read(br)?; | |
| + *self.color_correlation_params.borrow_mut() = ccp; | |
| + Some(ccp) | |
| + } else { | |
| + None | |
| + }; | |
| + debug!(?color_correlation_params); | |
| + | |
| + let tree = if br.read(1)? == 1 { | |
| + let size_limit = (1024 | |
| + + self.header.width as usize | |
| + * self.header.height as usize | |
| + * (self.color_channels + self.decoder_state.extra_channel_info().len()) | |
| + / 16) | |
| + .min(1 << 22); | |
| + Some(Tree::read(br, size_limit)?) | |
| + } else { | |
| + None | |
| + }; | |
| - self.lf_global = Some(LfGlobalState { | |
| - patches: patches.map(Arc::new), | |
| - splines, | |
| - noise, | |
| - lf_quant, | |
| - quant_params, | |
| - block_context_map, | |
| - color_correlation_params, | |
| - tree, | |
| - modular_global, | |
| - }); | |
| + let modular_global = FullModularImage::read( | |
| + &self.header, | |
| + &self.decoder_state.file_header.image_metadata, | |
| + self.modular_color_channels(), | |
| + br, | |
| + )?; | |
| + | |
| + // Ensure that, if we call this function again, we resume from just after | |
| + // reading modular global data (excluding section 0 channels). | |
| + let total_bits_read = br.total_bits_read(); | |
| + | |
| + self.lf_global = Some(LfGlobalState { | |
| + lf_quant, | |
| + quant_params, | |
| + block_context_map, | |
| + color_correlation_params, | |
| + tree, | |
| + modular_global, | |
| + total_bits_read, | |
| + }); | |
| + } | |
| + | |
| + let lf_global = self.lf_global.as_mut().unwrap(); | |
| + | |
| + lf_global | |
| + .modular_global | |
| + .read_section0(&self.header, &lf_global.tree, br, allow_partial)?; | |
| Ok(()) | |
| } | |
| @@ -281,6 +440,9 @@ impl Frame { | |
| br, | |
| )?; | |
| } | |
| + | |
| + lf_global.modular_global.mark_group_to_be_read(1, group); | |
| + | |
| lf_global.modular_global.read_stream( | |
| ModularStreamId::ModularLF(group), | |
| &self.header, | |
| @@ -305,188 +467,305 @@ impl Frame { | |
| #[instrument(level = "debug", skip_all)] | |
| pub fn decode_hf_global(&mut self, br: &mut BitReader) -> Result<()> { | |
| debug!(section_size = br.total_bits_available()); | |
| - if self.header.encoding == Encoding::Modular { | |
| - return Ok(()); | |
| - } | |
| - let lf_global = self.lf_global.as_mut().unwrap(); | |
| - let dequant_matrices = DequantMatrices::decode(&self.header, lf_global, br)?; | |
| - let block_context_map = lf_global.block_context_map.as_mut().unwrap(); | |
| - let num_histo_bits = self.header.num_groups().ceil_log2(); | |
| - let num_histograms: u32 = br.read(num_histo_bits)? as u32 + 1; | |
| - info!( | |
| - "Processing HFGlobal section with {} passes and {} histograms", | |
| - self.header.passes.num_passes, num_histograms | |
| - ); | |
| - let mut passes: Vec<PassState> = vec![]; | |
| - #[allow(unused_variables)] | |
| - for i in 0..self.header.passes.num_passes as usize { | |
| - let used_orders = match br.read(2)? { | |
| - 0 => 0x5f, | |
| - 1 => 0x13, | |
| - 2 => 0, | |
| - _ => br.read(coeff_order::NUM_ORDERS)?, | |
| - } as u32; | |
| - debug!(used_orders); | |
| - let coeff_orders = decode_coeff_orders(used_orders, br)?; | |
| - assert_eq!(coeff_orders.len(), 3 * coeff_order::NUM_ORDERS); | |
| - let num_contexts = num_histograms as usize * block_context_map.num_ac_contexts(); | |
| + if self.header.encoding == Encoding::VarDCT { | |
| + let lf_global = self.lf_global.as_mut().unwrap(); | |
| + let dequant_matrices = DequantMatrices::decode(&self.header, lf_global, br)?; | |
| + let block_context_map = lf_global.block_context_map.as_mut().unwrap(); | |
| + let num_histo_bits = self.header.num_groups().ceil_log2(); | |
| + let num_histograms: u32 = br.read(num_histo_bits)? as u32 + 1; | |
| info!( | |
| - "Deconding histograms for pass {} with {} contexts", | |
| - i, num_contexts | |
| + "Processing HFGlobal section with {} passes and {} histograms", | |
| + self.header.passes.num_passes, num_histograms | |
| ); | |
| - let histograms = Histograms::decode(num_contexts, br, true)?; | |
| - debug!("Found {} histograms", histograms.num_histograms()); | |
| - passes.push(PassState { | |
| - coeff_orders, | |
| - histograms, | |
| + let mut passes: Vec<PassState> = vec![]; | |
| + #[allow(unused_variables)] | |
| + for i in 0..self.header.passes.num_passes as usize { | |
| + let used_orders = match br.read(2)? { | |
| + 0 => 0x5f, | |
| + 1 => 0x13, | |
| + 2 => 0, | |
| + _ => br.read(coeff_order::NUM_ORDERS)?, | |
| + } as u32; | |
| + debug!(used_orders); | |
| + let coeff_orders = decode_coeff_orders(used_orders, br)?; | |
| + assert_eq!(coeff_orders.len(), 3 * coeff_order::NUM_ORDERS); | |
| + let num_contexts = num_histograms as usize * block_context_map.num_ac_contexts(); | |
| + info!( | |
| + "Decoding histograms for pass {} with {} contexts", | |
| + i, num_contexts | |
| + ); | |
| + let mut histograms = Histograms::decode(num_contexts, br, true)?; | |
| + // Pad the context map to avoid index out of bounds in decode_vardct_group (group.rs#L514@752e6a4). | |
| + let padding = ZERO_DENSITY_CONTEXT_LIMIT - ZERO_DENSITY_CONTEXT_COUNT; | |
| + histograms.resize(num_contexts + padding); | |
| + debug!("Found {} histograms", histograms.num_histograms()); | |
| + passes.push(PassState { | |
| + coeff_orders, | |
| + histograms, | |
| + }); | |
| + } | |
| + // Note that, if we have extra channels that can be rendered progressively, | |
| + // we might end up re-drawing some VarDCT groups. In that case, we need to | |
| + // keep around the coefficients, so allocate coefficients under those conditions | |
| + // too. | |
| + // TODO(veluca): evaluate whether we can make this check more precise. | |
| + let hf_coefficients = if passes.len() <= 1 | |
| + && !(self | |
| + .lf_global | |
| + .as_mut() | |
| + .unwrap() | |
| + .modular_global | |
| + .can_do_partial_render() | |
| + && self.header.num_extra_channels > 0) | |
| + { | |
| + None | |
| + } else { | |
| + let xs = GROUP_DIM * GROUP_DIM; | |
| + let ys = self.header.num_groups(); | |
| + Some(( | |
| + Image::new((xs, ys))?, | |
| + Image::new((xs, ys))?, | |
| + Image::new((xs, ys))?, | |
| + )) | |
| + }; | |
| + | |
| + self.hf_global = Some(HfGlobalState { | |
| + num_histograms, | |
| + passes, | |
| + dequant_matrices, | |
| + hf_coefficients, | |
| }); | |
| } | |
| - let hf_coefficients = if passes.len() <= 1 { | |
| - None | |
| - } else { | |
| - let xs = GROUP_DIM * GROUP_DIM; | |
| - let ys = self.header.num_groups(); | |
| - Some(( | |
| - Image::new((xs, ys))?, | |
| - Image::new((xs, ys))?, | |
| - Image::new((xs, ys))?, | |
| - )) | |
| - }; | |
| - self.hf_global = Some(HfGlobalState { | |
| - num_histograms, | |
| - passes, | |
| - dequant_matrices, | |
| - hf_coefficients, | |
| - }); | |
| + // Set EPF sigma values to the correct values if we are doing EPF. | |
| + if self.header.restoration_filter.epf_iters > 0 { | |
| + *self.epf_sigma.borrow_mut() = SigmaSource::new( | |
| + &self.header, | |
| + self.lf_global.as_ref().unwrap(), | |
| + &self.hf_meta, | |
| + )?; | |
| + } | |
| Ok(()) | |
| } | |
| - #[instrument(level = "debug", skip(self, br, buffer_splitter))] | |
| - pub fn decode_hf_group( | |
| + pub fn render_noise_for_group( | |
| &mut self, | |
| group: usize, | |
| - pass: usize, | |
| - mut br: BitReader, | |
| + complete: bool, | |
| buffer_splitter: &mut BufferSplitter, | |
| ) -> Result<()> { | |
| - debug!(section_size = br.total_bits_available()); | |
| - if self.header.has_noise() { | |
| - // TODO(sboukortt): consider making this a dedicated stage | |
| - let num_channels = self.header.num_extra_channels as usize + 3; | |
| - | |
| - let group_dim = self.header.group_dim() as u32; | |
| - let xsize_groups = self.header.size_groups().0; | |
| - let gx = (group % xsize_groups) as u32; | |
| - let gy = (group / xsize_groups) as u32; | |
| - // TODO(sboukortt): test upsampling+noise | |
| - let upsampling = self.header.upsampling; | |
| - let x0 = gx * upsampling * group_dim; | |
| - let y0 = gy * upsampling * group_dim; | |
| - let x1 = ((x0 + upsampling * group_dim) as usize).min(self.header.size_upsampled().0); | |
| - let y1 = ((y0 + upsampling * group_dim) as usize).min(self.header.size_upsampled().1); | |
| - let xsize = x1 - x0 as usize; | |
| - let ysize = y1 - y0 as usize; | |
| - let mut rng = Xorshift128Plus::new_with_seeds( | |
| - self.decoder_state.visible_frame_index as u32, | |
| - self.decoder_state.nonvisible_frame_index as u32, | |
| - x0, | |
| - y0, | |
| - ); | |
| - let bits_to_float = |bits: u32| f32::from_bits((bits >> 9) | 0x3F800000); | |
| - for i in 0..3 { | |
| - let mut buf = pipeline!(self, p, p.get_buffer(num_channels + i)?); | |
| - const FLOATS_PER_BATCH: usize = | |
| - Xorshift128Plus::N * std::mem::size_of::<u64>() / std::mem::size_of::<f32>(); | |
| - let mut batch = [0u64; Xorshift128Plus::N]; | |
| - | |
| - for y in 0..ysize { | |
| - let row = buf.row_mut(y); | |
| - for batch_index in 0..xsize.div_ceil(FLOATS_PER_BATCH) { | |
| - rng.fill(&mut batch); | |
| - let batch_size = | |
| - (xsize - batch_index * FLOATS_PER_BATCH).min(FLOATS_PER_BATCH); | |
| - for i in 0..batch_size { | |
| - let x = FLOATS_PER_BATCH * batch_index + i; | |
| - let k = i / 2; | |
| - let high_bytes = i % 2 != 0; | |
| - let bits = if high_bytes { | |
| - ((batch[k] & 0xFFFFFFFF00000000) >> 32) as u32 | |
| - } else { | |
| - (batch[k] & 0xFFFFFFFF) as u32 | |
| - }; | |
| - row[x] = bits_to_float(bits); | |
| + // TODO(sboukortt): consider making this a dedicated stage | |
| + // TODO(veluca): SIMD. | |
| + let num_channels = self.header.num_extra_channels as usize + 3; | |
| + | |
| + let group_dim = self.header.group_dim() as u32; | |
| + let xsize_groups = self.header.size_groups().0; | |
| + let gx = (group % xsize_groups) as u32; | |
| + let gy = (group / xsize_groups) as u32; | |
| + let upsampling = self.header.upsampling; | |
| + let upsampled_size = self.header.size_upsampled(); | |
| + | |
| + // Total buffer covers the upsampled region for this group | |
| + let buf_x1 = ((gx + 1) * upsampling * group_dim) as usize; | |
| + let buf_y1 = ((gy + 1) * upsampling * group_dim) as usize; | |
| + let buf_xsize = buf_x1.min(upsampled_size.0) - (gx * upsampling * group_dim) as usize; | |
| + let buf_ysize = buf_y1.min(upsampled_size.1) - (gy * upsampling * group_dim) as usize; | |
| + | |
| + let bits_to_float = |bits: u32| f32::from_bits((bits >> 9) | 0x3F800000); | |
| + | |
| + // Get all 3 noise channel buffers upfront | |
| + let mut bufs = [ | |
| + pipeline!(self, p, p.get_buffer(num_channels)?), | |
| + pipeline!(self, p, p.get_buffer(num_channels + 1)?), | |
| + pipeline!(self, p, p.get_buffer(num_channels + 2)?), | |
| + ]; | |
| + | |
| + const FLOATS_PER_BATCH: usize = | |
| + Xorshift128Plus::N * std::mem::size_of::<u64>() / std::mem::size_of::<f32>(); | |
| + let mut batch = [0u64; Xorshift128Plus::N]; | |
| + | |
| + // libjxl iterates through upsampling subdivisions with separate RNG seeds. | |
| + // For each subregion, a single RNG is shared across all 3 channels. | |
| + for iy in 0..upsampling { | |
| + for ix in 0..upsampling { | |
| + // Seed coordinates for this subregion (matches libjxl) | |
| + let x0 = (gx * upsampling + ix) * group_dim; | |
| + let y0 = (gy * upsampling + iy) * group_dim; | |
| + | |
| + // Create RNG with this subregion's seed - shared across all 3 channels | |
| + let mut rng = Xorshift128Plus::new_with_seeds( | |
| + self.decoder_state.visible_frame_index as u32, | |
| + self.decoder_state.nonvisible_frame_index as u32, | |
| + x0, | |
| + y0, | |
| + ); | |
| + | |
| + // Subregion boundaries within the buffer | |
| + let sub_x0 = (ix * group_dim) as usize; | |
| + let sub_y0 = (iy * group_dim) as usize; | |
| + let sub_x1 = ((ix + 1) * group_dim) as usize; | |
| + let sub_y1 = ((iy + 1) * group_dim) as usize; | |
| + | |
| + // Clamp to actual buffer size | |
| + let sub_xsize = sub_x1.min(buf_xsize).saturating_sub(sub_x0); | |
| + let sub_ysize = sub_y1.min(buf_ysize).saturating_sub(sub_y0); | |
| + | |
| + // Skip if this subregion is entirely outside the buffer | |
| + if sub_xsize == 0 || sub_ysize == 0 { | |
| + continue; | |
| + } | |
| + | |
| + // Fill all 3 channels with this subregion's noise, sharing the RNG | |
| + for buf in &mut bufs { | |
| + for y in 0..sub_ysize { | |
| + let row = buf.row_mut(sub_y0 + y); | |
| + for batch_index in 0..sub_xsize.div_ceil(FLOATS_PER_BATCH) { | |
| + rng.fill(&mut batch); | |
| + let batch_size = | |
| + (sub_xsize - batch_index * FLOATS_PER_BATCH).min(FLOATS_PER_BATCH); | |
| + for i in 0..batch_size { | |
| + let x = sub_x0 + FLOATS_PER_BATCH * batch_index + i; | |
| + let k = i / 2; | |
| + let high_bytes = i % 2 != 0; | |
| + let bits = if high_bytes { | |
| + ((batch[k] & 0xFFFFFFFF00000000) >> 32) as u32 | |
| + } else { | |
| + (batch[k] & 0xFFFFFFFF) as u32 | |
| + }; | |
| + row[x] = bits_to_float(bits); | |
| + } | |
| } | |
| } | |
| } | |
| - pipeline!( | |
| - self, | |
| - p, | |
| - p.set_buffer_for_group(num_channels + i, group, 1, buf, buffer_splitter)? | |
| - ) | |
| } | |
| } | |
| + // Set all buffers after filling | |
| + let [buf0, buf1, buf2] = bufs; | |
| + pipeline!( | |
| + self, | |
| + p, | |
| + p.set_buffer_for_group(num_channels, group, complete, buf0, buffer_splitter)? | |
| + ); | |
| + pipeline!( | |
| + self, | |
| + p, | |
| + p.set_buffer_for_group(num_channels + 1, group, complete, buf1, buffer_splitter)? | |
| + ); | |
| + pipeline!( | |
| + self, | |
| + p, | |
| + p.set_buffer_for_group(num_channels + 2, group, complete, buf2, buffer_splitter)? | |
| + ); | |
| + Ok(()) | |
| + } | |
| + | |
| + // Returns `true` if VarDCT and noise data were effectively rendered. | |
| + #[instrument(level = "debug", skip(self, passes, buffer_splitter))] | |
| + pub fn decode_hf_group( | |
| + &mut self, | |
| + group: usize, | |
| + passes: &mut [(usize, BitReader)], | |
| + buffer_splitter: &mut BufferSplitter, | |
| + force_render: bool, | |
| + ) -> Result<bool> { | |
| + if passes.is_empty() { | |
| + assert!(force_render); | |
| + } | |
| + | |
| + let last_pass_in_file = self.header.passes.num_passes as usize - 1; | |
| + let was_complete = self.last_rendered_pass[group].is_some_and(|p| p >= last_pass_in_file); | |
| + | |
| + if let Some((p, _)) = passes.last() { | |
| + self.last_rendered_pass[group] = Some(*p); | |
| + }; | |
| + let pass_to_render = self.last_rendered_pass[group]; | |
| + let complete = pass_to_render.is_some_and(|p| p >= last_pass_in_file); | |
| + | |
| + if complete && !was_complete { | |
| + self.incomplete_groups = self.incomplete_groups.checked_sub(1).unwrap(); | |
| + } | |
| + | |
| + // Render if we are decoding the last pass, or if we are requesting an eager render and | |
| + // we can handle this case of eager renders. | |
| + let do_render = if complete { | |
| + true | |
| + } else if force_render { | |
| + self.allow_rendering_before_last_pass() | |
| + } else { | |
| + false | |
| + }; | |
| + | |
| + if !do_render && passes.is_empty() { | |
| + return Ok(false); | |
| + } | |
| + | |
| + if self.header.has_noise() && do_render { | |
| + self.render_noise_for_group(group, complete, buffer_splitter)?; | |
| + } | |
| + | |
| let lf_global = self.lf_global.as_mut().unwrap(); | |
| if self.header.encoding == Encoding::VarDCT { | |
| - info!("Decoding VarDCT group {group}, pass {pass}"); | |
| - let hf_global = self.hf_global.as_mut().unwrap(); | |
| - let hf_meta = self.hf_meta.as_mut().unwrap(); | |
| - let mut pixels = [ | |
| - pipeline!(self, p, p.get_buffer(0))?, | |
| - pipeline!(self, p, p.get_buffer(1))?, | |
| - pipeline!(self, p, p.get_buffer(2))?, | |
| - ]; | |
| - let buffers = self.vardct_buffers.get_or_insert_with(VarDctBuffers::new); | |
| - decode_vardct_group( | |
| - group, | |
| - pass, | |
| - &self.header, | |
| - lf_global, | |
| - hf_global, | |
| - hf_meta, | |
| - &self.lf_image, | |
| - &self.quant_lf, | |
| - &self | |
| - .decoder_state | |
| - .file_header | |
| - .transform_data | |
| - .opsin_inverse_matrix | |
| - .quant_biases, | |
| - &mut pixels, | |
| - &mut br, | |
| - buffers, | |
| - )?; | |
| - if self.decoder_state.enable_output | |
| - && pass + 1 == self.header.passes.num_passes as usize | |
| - { | |
| + let mut pixels = if do_render { | |
| + Some([ | |
| + pipeline!(self, p, p.get_buffer(0))?, | |
| + pipeline!(self, p, p.get_buffer(1))?, | |
| + pipeline!(self, p, p.get_buffer(2))?, | |
| + ]) | |
| + } else { | |
| + None | |
| + }; | |
| + if pass_to_render.is_none() && do_render { | |
| + info!("Upsampling LF for group {group}"); | |
| + upsample_lf_group( | |
| + group, | |
| + pixels.as_mut().unwrap(), | |
| + self.lf_image.as_ref().unwrap(), | |
| + &self.header, | |
| + &self.decoder_state.file_header.transform_data, | |
| + )?; | |
| + } else { | |
| + info!("Decoding VarDCT group {group}"); | |
| + let hf_global = self.hf_global.as_mut().unwrap(); | |
| + let hf_meta = self.hf_meta.as_mut().unwrap(); | |
| + let buffers = self.vardct_buffers.get_or_insert_with(VarDctBuffers::new); | |
| + decode_vardct_group( | |
| + group, | |
| + passes, | |
| + &self.header, | |
| + lf_global, | |
| + hf_global, | |
| + hf_meta, | |
| + &self.lf_image, | |
| + &self.quant_lf, | |
| + &self | |
| + .decoder_state | |
| + .file_header | |
| + .transform_data | |
| + .opsin_inverse_matrix | |
| + .quant_biases, | |
| + &mut pixels, | |
| + buffers, | |
| + )?; | |
| + } | |
| + if let Some(pixels) = pixels { | |
| for (c, img) in pixels.into_iter().enumerate() { | |
| pipeline!( | |
| self, | |
| p, | |
| - p.set_buffer_for_group(c, group, 1, img, buffer_splitter)? | |
| + p.set_buffer_for_group(c, group, complete, img, buffer_splitter)? | |
| ); | |
| } | |
| } | |
| } | |
| - lf_global.modular_global.read_stream( | |
| - ModularStreamId::ModularHF { group, pass }, | |
| - &self.header, | |
| - &lf_global.tree, | |
| - &mut br, | |
| - )?; | |
| - lf_global.modular_global.process_output( | |
| - 2 + pass, | |
| - group, | |
| - &self.header, | |
| - &mut |chan, group, num_passes, image| { | |
| - pipeline!( | |
| - self, | |
| - p, | |
| - p.set_buffer_for_group(chan, group, num_passes, image, buffer_splitter)? | |
| - ); | |
| - Ok(()) | |
| - }, | |
| - )?; | |
| - Ok(()) | |
| + | |
| + for (pass, br) in passes.iter_mut() { | |
| + lf_global.modular_global.read_stream( | |
| + ModularStreamId::ModularHF { group, pass: *pass }, | |
| + &self.header, | |
| + &lf_global.tree, | |
| + br, | |
| + )?; | |
| + } | |
| + Ok(do_render) | |
| } | |
| } | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/group.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/group.rs | |
| index b98d4541894b6..b7d8021b33962 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/group.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/group.rs | |
| @@ -18,7 +18,7 @@ use crate::{ | |
| }, | |
| headers::frame_header::FrameHeader, | |
| image::{Image, ImageRect, Rect}, | |
| - util::{CeilLog2, ShiftRightCeil, tracing_wrappers::*}, | |
| + util::{CeilLog2, ShiftRightCeil, SmallVec, tracing_wrappers::*}, | |
| }; | |
| use jxl_simd::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask, simd_function}; | |
| @@ -305,11 +305,69 @@ simd_function!( | |
| } | |
| ); | |
| +struct PassInfo<'a, 'b> { | |
| + histogram_index: usize, | |
| + reader: Option<SymbolReader>, | |
| + br: &'a mut BitReader<'b>, | |
| + shift: u32, | |
| + pass: usize, | |
| + // TODO(veluca): reuse this allocation. | |
| + num_nzeros: [Image<u32>; 3], | |
| +} | |
| + | |
| +impl<'a, 'b> PassInfo<'a, 'b> { | |
| + fn new( | |
| + hf_global: &HfGlobalState, | |
| + frame_header: &FrameHeader, | |
| + block_group_rect: Rect, | |
| + pass: usize, | |
| + br: &'a mut BitReader<'b>, | |
| + ) -> Result<Self> { | |
| + let num_histo_bits = hf_global.num_histograms.ceil_log2(); | |
| + debug!(?pass); | |
| + let histogram_index = br.read(num_histo_bits as usize)? as usize; | |
| + debug!(?histogram_index); | |
| + let reader = Some(SymbolReader::new( | |
| + &hf_global.passes[pass].histograms, | |
| + br, | |
| + None, | |
| + )?); | |
| + let shift = if pass < frame_header.passes.shift.len() { | |
| + frame_header.passes.shift[pass] | |
| + } else { | |
| + 0 | |
| + }; | |
| + let num_nzeros = [ | |
| + Image::new(( | |
| + block_group_rect.size.0 >> frame_header.hshift(0), | |
| + block_group_rect.size.1 >> frame_header.vshift(0), | |
| + ))?, | |
| + Image::new(( | |
| + block_group_rect.size.0 >> frame_header.hshift(1), | |
| + block_group_rect.size.1 >> frame_header.vshift(1), | |
| + ))?, | |
| + Image::new(( | |
| + block_group_rect.size.0 >> frame_header.hshift(2), | |
| + block_group_rect.size.1 >> frame_header.vshift(2), | |
| + ))?, | |
| + ]; | |
| + | |
| + Ok(Self { | |
| + histogram_index, | |
| + reader, | |
| + br, | |
| + shift, | |
| + pass, | |
| + num_nzeros, | |
| + }) | |
| + } | |
| +} | |
| + | |
| #[allow(clippy::too_many_arguments)] | |
| #[allow(clippy::type_complexity)] | |
| pub fn decode_vardct_group( | |
| group: usize, | |
| - pass: usize, | |
| + passes: &mut [(usize, BitReader)], | |
| frame_header: &FrameHeader, | |
| lf_global: &mut LfGlobalState, | |
| hf_global: &mut HfGlobalState, | |
| @@ -317,19 +375,19 @@ pub fn decode_vardct_group( | |
| lf_image: &Option<[Image<f32>; 3]>, | |
| quant_lf: &Image<u8>, | |
| quant_biases: &[f32; 4], | |
| - pixels: &mut [Image<f32>; 3], | |
| - br: &mut BitReader, | |
| + pixels: &mut Option<[Image<f32>; 3]>, | |
| buffers: &mut VarDctBuffers, | |
| ) -> Result<(), Error> { | |
| let x_dm_multiplier = (1.0 / (1.25)).powf(frame_header.x_qm_scale as f32 - 2.0); | |
| let b_dm_multiplier = (1.0 / (1.25)).powf(frame_header.b_qm_scale as f32 - 2.0); | |
| - let num_histo_bits = hf_global.num_histograms.ceil_log2(); | |
| - let histogram_index: usize = br.read(num_histo_bits as usize)? as usize; | |
| - debug!(?histogram_index); | |
| - let mut reader = SymbolReader::new(&hf_global.passes[pass].histograms, br, None)?; | |
| let block_group_rect = frame_header.block_group_rect(group); | |
| debug!(?block_group_rect); | |
| + let mut pass_info = passes | |
| + .iter_mut() | |
| + .map(|(pass, br)| PassInfo::new(hf_global, frame_header, block_group_rect, *pass, br)) | |
| + .collect::<Result<SmallVec<_, 4>>>()?; | |
| + | |
| // Reset and use pooled buffers | |
| buffers.reset(); | |
| let scratch = &mut buffers.scratch; | |
| @@ -350,23 +408,9 @@ pub fn decode_vardct_group( | |
| let ytob_map = hf_meta.ytob_map.get_rect(cmap_rect); | |
| let transform_map = hf_meta.transform_map.get_rect(block_group_rect); | |
| let raw_quant_map = hf_meta.raw_quant_map.get_rect(block_group_rect); | |
| - let mut num_nzeros: [Image<u32>; 3] = [ | |
| - Image::new(( | |
| - block_group_rect.size.0 >> frame_header.hshift(0), | |
| - block_group_rect.size.1 >> frame_header.vshift(0), | |
| - ))?, | |
| - Image::new(( | |
| - block_group_rect.size.0 >> frame_header.hshift(1), | |
| - block_group_rect.size.1 >> frame_header.vshift(1), | |
| - ))?, | |
| - Image::new(( | |
| - block_group_rect.size.0 >> frame_header.hshift(2), | |
| - block_group_rect.size.1 >> frame_header.vshift(2), | |
| - ))?, | |
| - ]; | |
| let quant_lf_rect = quant_lf.get_rect(block_group_rect); | |
| let block_context_map = lf_global.block_context_map.as_mut().unwrap(); | |
| - let context_offset = histogram_index * block_context_map.num_ac_contexts(); | |
| + // TODO(veluca): improve coefficient storage (smaller allocations, use 16 bits if possible). | |
| let coeffs = match hf_global.hf_coefficients.as_mut() { | |
| Some(hf_coefficients) => [ | |
| hf_coefficients.0.row_mut(group), | |
| @@ -380,11 +424,6 @@ pub fn decode_vardct_group( | |
| [coeffs_x, coeffs_y, coeffs_b] | |
| } | |
| }; | |
| - let shift_for_pass = if pass < frame_header.passes.shift.len() { | |
| - frame_header.passes.shift[pass] | |
| - } else { | |
| - 0 | |
| - }; | |
| let mut coeffs_offset = 0; | |
| let transform_buffer = &mut buffers.transform_buffer; | |
| @@ -474,94 +513,116 @@ pub fn decode_vardct_group( | |
| let num_blocks = cx * cy; | |
| let num_coeffs = num_blocks * BLOCK_SIZE; | |
| let log_num_blocks = num_blocks.ilog2() as usize; | |
| - let pass_info = &hf_global.passes[pass]; | |
| - for c in [1, 0, 2] { | |
| - if (sbx[c] << hshift[c]) != bx || (sby[c] << vshift[c] != by) { | |
| - continue; | |
| - } | |
| - trace!( | |
| - "Decoding block ({},{}) channel {} with {}x{} block transform {} (shape id {})", | |
| - sbx[c], sby[c], c, cx, cy, transform_id, shape_id | |
| - ); | |
| - let predicted_nzeros = predict_num_nonzeros(&num_nzeros[c], sbx[c], sby[c]); | |
| - let block_context = | |
| - block_context_map.block_context(quant_lf, raw_quant, shape_id, c); | |
| - let nonzero_context = block_context_map | |
| - .nonzero_context(predicted_nzeros, block_context) | |
| - + context_offset; | |
| - let mut nonzeros = | |
| - reader.read_unsigned(&pass_info.histograms, br, nonzero_context) as usize; | |
| - trace!( | |
| - "block ({},{},{c}) predicted_nzeros: {predicted_nzeros} \ | |
| + for PassInfo { | |
| + histogram_index, | |
| + reader, | |
| + br, | |
| + shift, | |
| + pass, | |
| + num_nzeros, | |
| + } in pass_info.iter_mut() | |
| + { | |
| + let reader = reader.as_mut().unwrap(); | |
| + let pass_info = &hf_global.passes[*pass]; | |
| + let context_offset = *histogram_index * block_context_map.num_ac_contexts(); | |
| + for c in [1, 0, 2] { | |
| + if (sbx[c] << hshift[c]) != bx || (sby[c] << vshift[c] != by) { | |
| + continue; | |
| + } | |
| + trace!( | |
| + "Decoding block ({},{}) channel {} with {}x{} block transform {} (shape id {})", | |
| + sbx[c], sby[c], c, cx, cy, transform_id, shape_id | |
| + ); | |
| + let predicted_nzeros = predict_num_nonzeros(&num_nzeros[c], sbx[c], sby[c]); | |
| + let block_context = | |
| + block_context_map.block_context(quant_lf, raw_quant, shape_id, c); | |
| + let nonzero_context = block_context_map | |
| + .nonzero_context(predicted_nzeros, block_context) | |
| + + context_offset; | |
| + let mut nonzeros = | |
| + reader.read_unsigned_inline(&pass_info.histograms, br, nonzero_context) | |
| + as usize; | |
| + trace!( | |
| + "block ({},{},{c}) predicted_nzeros: {predicted_nzeros} \ | |
| nzero_ctx: {nonzero_context} (offset: {context_offset}) \ | |
| nzeros: {nonzeros}", | |
| - sbx[c], sby[c] | |
| - ); | |
| - if nonzeros + num_blocks > num_coeffs { | |
| - return Err(Error::InvalidNumNonZeros(nonzeros, num_blocks)); | |
| - } | |
| - for iy in 0..cy { | |
| - let nzrow = num_nzeros[c].row_mut(sby[c] + iy); | |
| - for ix in 0..cx { | |
| - nzrow[sbx[c] + ix] = nonzeros.shrc(log_num_blocks) as u32; | |
| + sbx[c], sby[c] | |
| + ); | |
| + if nonzeros + num_blocks > num_coeffs { | |
| + return Err(Error::InvalidNumNonZeros(nonzeros, num_blocks)); | |
| } | |
| - } | |
| - let histo_offset = | |
| - block_context_map.zero_density_context_offset(block_context) + context_offset; | |
| - let mut prev = if nonzeros > num_coeffs / 16 { 0 } else { 1 }; | |
| - let permutation = &pass_info.coeff_orders[shape_id * 3 + c]; | |
| - let current_coeffs = &mut coeffs[c][coeffs_offset..coeffs_offset + num_coeffs]; | |
| - for k in num_blocks..num_coeffs { | |
| - if nonzeros == 0 { | |
| - break; | |
| + for iy in 0..cy { | |
| + let nzrow = num_nzeros[c].row_mut(sby[c] + iy); | |
| + for ix in 0..cx { | |
| + nzrow[sbx[c] + ix] = nonzeros.shrc(log_num_blocks) as u32; | |
| + } | |
| + } | |
| + let histo_offset = block_context_map.zero_density_context_offset(block_context) | |
| + + context_offset; | |
| + let mut prev = if nonzeros > num_coeffs / 16 { 0 } else { 1 }; | |
| + let permutation = &pass_info.coeff_orders[shape_id * 3 + c]; | |
| + let current_coeffs = &mut coeffs[c][coeffs_offset..coeffs_offset + num_coeffs]; | |
| + for k in num_blocks..num_coeffs { | |
| + if nonzeros == 0 { | |
| + break; | |
| + } | |
| + let ctx = | |
| + histo_offset + zero_density_context(nonzeros, k, log_num_blocks, prev); | |
| + let coeff = | |
| + reader.read_signed_inline(&pass_info.histograms, br, ctx) << *shift; | |
| + prev = if coeff != 0 { 1 } else { 0 }; | |
| + nonzeros -= prev; | |
| + let coeff_index = permutation[k] as usize; | |
| + current_coeffs[coeff_index] += coeff; | |
| + } | |
| + if nonzeros != 0 { | |
| + return Err(Error::EndOfBlockResidualNonZeros(nonzeros)); | |
| } | |
| - let ctx = | |
| - histo_offset + zero_density_context(nonzeros, k, log_num_blocks, prev); | |
| - let coeff = | |
| - reader.read_signed(&pass_info.histograms, br, ctx) << shift_for_pass; | |
| - prev = if coeff != 0 { 1 } else { 0 }; | |
| - nonzeros -= prev; | |
| - let coeff_index = permutation[k] as usize; | |
| - current_coeffs[coeff_index] += coeff; | |
| - } | |
| - if nonzeros != 0 { | |
| - return Err(Error::EndOfBlockResidualNonZeros(nonzeros)); | |
| } | |
| } | |
| - let qblock = [ | |
| - &coeffs[0][coeffs_offset..], | |
| - &coeffs[1][coeffs_offset..], | |
| - &coeffs[2][coeffs_offset..], | |
| - ]; | |
| - let dequant_matrices = &hf_global.dequant_matrices; | |
| - dequant_and_transform_to_pixels_dispatch( | |
| - quant_biases, | |
| - x_dm_multiplier, | |
| - b_dm_multiplier, | |
| - pixels, | |
| - scratch, | |
| - inv_global_scale, | |
| - transform_buffer, | |
| - hshift, | |
| - vshift, | |
| - by, | |
| - sby, | |
| - bx, | |
| - sbx, | |
| - x_cc_mul, | |
| - b_cc_mul, | |
| - raw_quant, | |
| - &lf_rects, | |
| - transform_type, | |
| - block_rect, | |
| - num_blocks, | |
| - num_coeffs, | |
| - &qblock, | |
| - dequant_matrices, | |
| - )?; | |
| + if let Some(pixels) = pixels { | |
| + let qblock = [ | |
| + &coeffs[0][coeffs_offset..], | |
| + &coeffs[1][coeffs_offset..], | |
| + &coeffs[2][coeffs_offset..], | |
| + ]; | |
| + let dequant_matrices = &hf_global.dequant_matrices; | |
| + dequant_and_transform_to_pixels_dispatch( | |
| + quant_biases, | |
| + x_dm_multiplier, | |
| + b_dm_multiplier, | |
| + pixels, | |
| + scratch, | |
| + inv_global_scale, | |
| + transform_buffer, | |
| + hshift, | |
| + vshift, | |
| + by, | |
| + sby, | |
| + bx, | |
| + sbx, | |
| + x_cc_mul, | |
| + b_cc_mul, | |
| + raw_quant, | |
| + &lf_rects, | |
| + transform_type, | |
| + block_rect, | |
| + num_blocks, | |
| + num_coeffs, | |
| + &qblock, | |
| + dequant_matrices, | |
| + )?; | |
| + } | |
| coeffs_offset += num_coeffs; | |
| } | |
| } | |
| - reader.check_final_state(&hf_global.passes[pass].histograms, br)?; | |
| + for PassInfo { | |
| + pass, br, reader, .. | |
| + } in pass_info.iter_mut() | |
| + { | |
| + std::mem::take(reader) | |
| + .unwrap() | |
| + .check_final_state(&hf_global.passes[*pass].histograms, br)?; | |
| + } | |
| Ok(()) | |
| } | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/lf_preview.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/lf_preview.rs | |
| new file mode 100644 | |
| index 0000000000000..43ebeb7f43aa5 | |
| --- /dev/null | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/lf_preview.rs | |
| @@ -0,0 +1,371 @@ | |
| +// Copyright (c) the JPEG XL Project Authors. All rights reserved. | |
| +// | |
| +// Use of this source code is governed by a BSD-style | |
| +// license that can be found in the LICENSE file. | |
| + | |
| +use crate::{ | |
| + api::{JxlColorProfile, JxlColorType, JxlDataFormat, JxlOutputBuffer, JxlPixelFormat}, | |
| + error::Result, | |
| + frame::Frame, | |
| + headers::{Orientation, frame_header::FrameType}, | |
| + image::{DataTypeTag, Rect}, | |
| + render::{ | |
| + Channels, ChannelsMut, RenderPipelineInOutStage, RenderPipelineInPlaceStage, | |
| + buffer_splitter::{BufferSplitter, SaveStageBufferInfo}, | |
| + low_memory_pipeline::row_buffers::RowBuffer, | |
| + save::SaveStage, | |
| + stages::{ | |
| + ConvertF32ToF16Stage, ConvertF32ToU8Stage, ConvertF32ToU16Stage, FromLinearStage, | |
| + OutputColorInfo, TransferFunction, Upsample8x, XybStage, | |
| + }, | |
| + }, | |
| + util::{f16, mirror}, | |
| +}; | |
| + | |
| +impl Frame { | |
| + #[allow(clippy::too_many_arguments)] | |
| + fn render_lf_frame_rect( | |
| + &mut self, | |
| + color_type: JxlColorType, | |
| + data_format: JxlDataFormat, | |
| + rect: Rect, | |
| + upsampled_rect: Rect, | |
| + orientation: Orientation, | |
| + output_buffers: &mut [Option<JxlOutputBuffer<'_>>], | |
| + full_size: (usize, usize), | |
| + output_color_info: &OutputColorInfo, | |
| + output_tf: &TransferFunction, | |
| + ) -> Result<()> { | |
| + let save_stage = SaveStage::new( | |
| + if color_type.has_alpha() { | |
| + &[0, 1, 2, 3] | |
| + } else { | |
| + &[0, 1, 2] | |
| + }, | |
| + orientation, | |
| + 0, | |
| + color_type, | |
| + data_format, | |
| + color_type.has_alpha(), | |
| + ); | |
| + let len = rect.size.0; | |
| + let ulen = len * 8; | |
| + enum DataFormatConverter { | |
| + U8(ConvertF32ToU8Stage), | |
| + U16(ConvertF32ToU16Stage), | |
| + F16(ConvertF32ToF16Stage), | |
| + None, | |
| + } | |
| + let (converter, constant_alpha) = match data_format { | |
| + JxlDataFormat::U8 { bit_depth } => ( | |
| + DataFormatConverter::U8(ConvertF32ToU8Stage::new(0, bit_depth)), | |
| + RowBuffer::new_filled(DataTypeTag::U8, ulen, &(1u8 << bit_depth).to_ne_bytes())?, | |
| + ), | |
| + JxlDataFormat::U16 { bit_depth, .. } => ( | |
| + DataFormatConverter::U16(ConvertF32ToU16Stage::new(0, bit_depth)), | |
| + RowBuffer::new_filled(DataTypeTag::U16, ulen, &(1u16 << bit_depth).to_ne_bytes())?, | |
| + ), | |
| + JxlDataFormat::F16 { .. } => ( | |
| + DataFormatConverter::F16(ConvertF32ToF16Stage::new(0)), | |
| + RowBuffer::new_filled( | |
| + DataTypeTag::F16, | |
| + ulen, | |
| + &(f16::from_f32(1.0).to_bits().to_ne_bytes()), | |
| + )?, | |
| + ), | |
| + JxlDataFormat::F32 { .. } => ( | |
| + DataFormatConverter::None, | |
| + RowBuffer::new_filled(DataTypeTag::F32, ulen, &1.0f32.to_ne_bytes())?, | |
| + ), | |
| + }; | |
| + | |
| + let upsample_stage = Upsample8x::new(&self.decoder_state.file_header.transform_data, 0); | |
| + let mut upsample_state = upsample_stage.init_local_state(0)?.unwrap(); | |
| + | |
| + let xyb_stage = XybStage::new(0, output_color_info.clone()); | |
| + | |
| + let from_linear_stage = FromLinearStage::new(0, output_tf.clone()); | |
| + | |
| + let mut lf_rows = [ | |
| + RowBuffer::new(DataTypeTag::F32, 2, 0, 0, len)?, | |
| + RowBuffer::new(DataTypeTag::F32, 2, 0, 0, len)?, | |
| + RowBuffer::new(DataTypeTag::F32, 2, 0, 0, len)?, | |
| + ]; | |
| + | |
| + // Converted to RGB in place. | |
| + let mut upsampled_rows = [ | |
| + RowBuffer::new(DataTypeTag::F32, 0, 3, 3, ulen)?, | |
| + RowBuffer::new(DataTypeTag::F32, 0, 3, 3, ulen)?, | |
| + RowBuffer::new(DataTypeTag::F32, 0, 3, 3, ulen)?, | |
| + ]; | |
| + | |
| + let mut output_rows = [ | |
| + RowBuffer::new(data_format.data_type(), 0, 0, 0, ulen)?, | |
| + RowBuffer::new(data_format.data_type(), 0, 0, 0, ulen)?, | |
| + RowBuffer::new(data_format.data_type(), 0, 0, 0, ulen)?, | |
| + ]; | |
| + | |
| + let src = if self.header.frame_type == FrameType::RegularFrame { | |
| + self.decoder_state.lf_frames[0].as_ref().unwrap() | |
| + } else { | |
| + self.lf_frame_data.as_ref().unwrap() | |
| + }; | |
| + | |
| + const LF_ROW_OFFSET: usize = 8; | |
| + | |
| + let x0 = rect.origin.0; | |
| + let x1 = rect.end().0; | |
| + | |
| + let y0 = rect.origin.1 as isize - 2; | |
| + let y1 = rect.end().1 as isize + 2; | |
| + | |
| + let lf_size = src[0].size(); | |
| + | |
| + for yy in y0..y1 { | |
| + let sy = mirror(yy, lf_size.1); | |
| + | |
| + // Fill in input. | |
| + for c in 0..3 { | |
| + let bufy = (yy + LF_ROW_OFFSET as isize) as usize; | |
| + let row = lf_rows[c].get_row_mut::<f32>(bufy); | |
| + let srow = src[c].row(sy); | |
| + let off = RowBuffer::x0_offset::<f32>(); | |
| + row[off..off + len].copy_from_slice(&srow[x0..x1]); | |
| + row[off - 1] = srow[mirror(x0 as isize - 1, lf_size.0)]; | |
| + row[off - 2] = srow[mirror(x0 as isize - 2, lf_size.0)]; | |
| + row[off + len] = srow[mirror(x1 as isize, lf_size.0)]; | |
| + row[off + len + 1] = srow[mirror(x1 as isize + 1, lf_size.0)]; | |
| + } | |
| + | |
| + if yy < y0 + 4 { | |
| + continue; | |
| + } | |
| + | |
| + let y = yy as usize - 2; | |
| + | |
| + // Upsample. | |
| + for c in 0..3 { | |
| + let off = RowBuffer::x0_offset::<f32>() - 2; | |
| + let input_rows_refs = [ | |
| + &lf_rows[c].get_row::<f32>(y + LF_ROW_OFFSET - 2)[off..], | |
| + &lf_rows[c].get_row::<f32>(y + LF_ROW_OFFSET - 1)[off..], | |
| + &lf_rows[c].get_row::<f32>(y + LF_ROW_OFFSET)[off..], | |
| + &lf_rows[c].get_row::<f32>(y + LF_ROW_OFFSET + 1)[off..], | |
| + &lf_rows[c].get_row::<f32>(y + LF_ROW_OFFSET + 2)[off..], | |
| + ] | |
| + .into_iter() | |
| + .collect(); | |
| + let input_channels = Channels::new(input_rows_refs, 1, 5); | |
| + | |
| + let output_rows_refs = | |
| + upsampled_rows[c].get_rows_mut(y * 8..y * 8 + 8, RowBuffer::x0_offset::<f32>()); | |
| + let mut output_channels = ChannelsMut::new(output_rows_refs, 1, 8); | |
| + | |
| + upsample_stage.process_row_chunk( | |
| + (0, 0), | |
| + len, | |
| + &input_channels, | |
| + &mut output_channels, | |
| + Some(upsample_state.as_mut()), | |
| + ); | |
| + } | |
| + | |
| + // un-XYB, convert and save. | |
| + for uy in y * 8..y * 8 + 8 { | |
| + // XYB | |
| + let [x, y, b] = &mut upsampled_rows; | |
| + let off = RowBuffer::x0_offset::<f32>(); | |
| + let mut rows = [ | |
| + &mut x.get_row_mut(uy)[off..], | |
| + &mut y.get_row_mut(uy)[off..], | |
| + &mut b.get_row_mut(uy)[off..], | |
| + ]; | |
| + xyb_stage.process_row_chunk((0, 0), ulen, &mut rows, None); | |
| + from_linear_stage.process_row_chunk((0, 0), ulen, &mut rows, None); | |
| + | |
| + macro_rules! convert { | |
| + ($s: expr, $t: ty) => { | |
| + for c in 0..3 { | |
| + let input_rows_refs = std::iter::once( | |
| + &upsampled_rows[c].get_row(uy)[RowBuffer::x0_offset::<f32>()..], | |
| + ) | |
| + .collect(); | |
| + let input_channels = Channels::new(input_rows_refs, 1, 1); | |
| + let output_rows_refs = output_rows[c] | |
| + .get_rows_mut(uy..uy + 1, RowBuffer::x0_offset::<$t>()); | |
| + let mut output_channels = ChannelsMut::new(output_rows_refs, 1, 1); | |
| + $s.process_row_chunk( | |
| + (0, 0), | |
| + ulen, | |
| + &input_channels, | |
| + &mut output_channels, | |
| + None, | |
| + ); | |
| + } | |
| + }; | |
| + } | |
| + | |
| + // Convert | |
| + let save_input = match &converter { | |
| + DataFormatConverter::U8(s) => { | |
| + convert!(s, u8); | |
| + &output_rows | |
| + } | |
| + DataFormatConverter::U16(s) => { | |
| + convert!(s, u16); | |
| + &output_rows | |
| + } | |
| + DataFormatConverter::F16(s) => { | |
| + convert!(s, f16); | |
| + &output_rows | |
| + } | |
| + DataFormatConverter::None => &upsampled_rows, | |
| + }; | |
| + | |
| + let input_no_alpha = [&save_input[0], &save_input[1], &save_input[2]]; | |
| + let input_alpha = [ | |
| + &save_input[0], | |
| + &save_input[1], | |
| + &save_input[2], | |
| + &constant_alpha, | |
| + ]; | |
| + | |
| + save_stage.save_lowmem( | |
| + if color_type.has_alpha() { | |
| + &input_alpha | |
| + } else { | |
| + &input_no_alpha | |
| + }, | |
| + output_buffers, | |
| + upsampled_rect.size, | |
| + uy, | |
| + upsampled_rect.origin, | |
| + full_size, | |
| + (0, 0), | |
| + )?; | |
| + } | |
| + } | |
| + | |
| + Ok(()) | |
| + } | |
| + | |
| + pub fn maybe_preview_lf_frame( | |
| + &mut self, | |
| + pixel_format: &JxlPixelFormat, | |
| + output_buffers: &mut [JxlOutputBuffer<'_>], | |
| + changed_regions: Option<&[Rect]>, | |
| + output_profile: &JxlColorProfile, | |
| + ) -> Result<()> { | |
| + if self.header.needs_blending() { | |
| + return Ok(()); | |
| + } | |
| + if !((self.header.has_lf_frame() && self.header.frame_type == FrameType::RegularFrame) | |
| + || (self.header.frame_type == FrameType::LFFrame && self.header.lf_level == 1)) | |
| + { | |
| + return Ok(()); | |
| + } | |
| + | |
| + let output_color_info = OutputColorInfo::from_header(&self.decoder_state.file_header)?; | |
| + | |
| + let Some(output_tf) = output_profile.transfer_function().map(|tf| { | |
| + TransferFunction::from_api_tf( | |
| + tf, | |
| + output_color_info.intensity_target, | |
| + output_color_info.luminances, | |
| + ) | |
| + }) else { | |
| + return Ok(()); | |
| + }; | |
| + | |
| + if output_tf.is_linear() { | |
| + return Ok(()); | |
| + } | |
| + | |
| + let image_metadata = &self.decoder_state.file_header.image_metadata; | |
| + if !image_metadata.xyb_encoded || !image_metadata.extra_channel_info.is_empty() { | |
| + // We only render LF frames for XYB VarDCT images with no extra channels. | |
| + // TODO(veluca): we might want to relax this to "no alpha". | |
| + return Ok(()); | |
| + } | |
| + let color_type = pixel_format.color_type; | |
| + let data_format = pixel_format.color_data_format.unwrap(); | |
| + if pixel_format.color_data_format.is_none() | |
| + || output_buffers.is_empty() | |
| + || !matches!( | |
| + color_type, | |
| + JxlColorType::Rgb | JxlColorType::Rgba | JxlColorType::Bgr | JxlColorType::Bgra, | |
| + ) | |
| + { | |
| + // We only render color data, and only to 3- or 4- channel output buffers. | |
| + return Ok(()); | |
| + } | |
| + // We already have a fully-rendered frame and we are not requesting to re-render | |
| + // specific regions. | |
| + if self.decoder_state.lf_frame_was_rendered && changed_regions.is_none() { | |
| + return Ok(()); | |
| + } | |
| + if changed_regions.is_none() { | |
| + self.decoder_state.lf_frame_was_rendered = true; | |
| + } | |
| + | |
| + let sz = &self.decoder_state.file_header.size; | |
| + let xsize = sz.xsize() as usize; | |
| + let ysize = sz.ysize() as usize; | |
| + | |
| + let mut regions_storage; | |
| + | |
| + let regions = if let Some(regions) = changed_regions { | |
| + regions | |
| + } else { | |
| + regions_storage = vec![]; | |
| + for i in (0..xsize.div_ceil(8)).step_by(256) { | |
| + let x0 = i; | |
| + let x1 = (i + 256).min(xsize.div_ceil(8)); | |
| + regions_storage.push(Rect { | |
| + origin: (x0, 0), | |
| + size: (x1 - x0, ysize.div_ceil(8)), | |
| + }); | |
| + } | |
| + ®ions_storage[..] | |
| + }; | |
| + | |
| + let orientation = image_metadata.orientation; | |
| + let info = SaveStageBufferInfo { | |
| + downsample: (0, 0), | |
| + orientation, | |
| + byte_size: data_format.bytes_per_sample() * color_type.samples_per_pixel(), | |
| + after_extend: false, | |
| + }; | |
| + let info = [Some(info)]; | |
| + let mut bufs = [Some(JxlOutputBuffer::reborrow(&mut output_buffers[0]))]; | |
| + let mut bufs = BufferSplitter::new(&mut bufs); | |
| + for r in regions { | |
| + let upsampled_rect = Rect { | |
| + size: (r.size.0 * 8, r.size.1 * 8), | |
| + origin: (r.origin.0 * 8, r.origin.1 * 8), | |
| + }; | |
| + let upsampled_rect = upsampled_rect.clip((xsize, ysize)); | |
| + let mut bufs = bufs.get_local_buffers( | |
| + &info, | |
| + upsampled_rect, | |
| + false, | |
| + (xsize, ysize), | |
| + (xsize, ysize), | |
| + (0, 0), | |
| + ); | |
| + self.render_lf_frame_rect( | |
| + color_type, | |
| + data_format, | |
| + *r, | |
| + upsampled_rect, | |
| + orientation, | |
| + &mut bufs, | |
| + (xsize, ysize), | |
| + &output_color_info, | |
| + &output_tf, | |
| + )?; | |
| + } | |
| + | |
| + Ok(()) | |
| + } | |
| +} | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/mod.rs | |
| index 7425d87fe19f3..61d285d66e29e 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/mod.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/mod.rs | |
| @@ -3,7 +3,7 @@ | |
| // Use of this source code is governed by a BSD-style | |
| // license that can be found in the LICENSE file. | |
| -use std::sync::Arc; | |
| +use std::{collections::BTreeSet, sync::Arc}; | |
| use crate::{ | |
| entropy_coding::decode::Histograms, | |
| @@ -12,7 +12,7 @@ use crate::{ | |
| headers::{ | |
| FileHeader, | |
| extra_channels::ExtraChannelInfo, | |
| - frame_header::{Encoding, FrameHeader}, | |
| + frame_header::{Encoding, FrameHeader, FrameType}, | |
| permutation::Permutation, | |
| toc::Toc, | |
| }, | |
| @@ -26,12 +26,16 @@ use modular::{FullModularImage, Tree}; | |
| use quant_weights::DequantMatrices; | |
| use quantizer::{LfQuantFactors, QuantizerParams}; | |
| +use crate::features::epf::SigmaSource; | |
| +use crate::util::AtomicRefCell; | |
| + | |
| mod adaptive_lf_smoothing; | |
| mod block_context_map; | |
| mod coeff_order; | |
| pub mod color_correlation_map; | |
| pub mod decode; | |
| mod group; | |
| +pub mod lf_preview; | |
| pub mod modular; | |
| mod quant_weights; | |
| pub mod quantizer; | |
| @@ -45,16 +49,15 @@ pub enum Section { | |
| Hf { group: usize, pass: usize }, | |
| } | |
| +#[derive(Debug)] | |
| pub struct LfGlobalState { | |
| - patches: Option<Arc<PatchesDictionary>>, | |
| - splines: Option<Splines>, | |
| - noise: Option<Noise>, | |
| lf_quant: LfQuantFactors, | |
| pub quant_params: Option<QuantizerParams>, | |
| block_context_map: Option<BlockContextMap>, | |
| color_correlation_params: Option<ColorCorrelationParams>, | |
| tree: Option<Tree>, | |
| modular_global: FullModularImage, | |
| + total_bits_read: usize, | |
| } | |
| pub struct PassState { | |
| @@ -113,10 +116,7 @@ impl ReferenceFrame { | |
| pub struct DecoderState { | |
| pub(super) file_header: FileHeader, | |
| pub(super) reference_frames: Arc<[Option<ReferenceFrame>; Self::MAX_STORED_FRAMES]>, | |
| - pub(super) lf_frames: [Option<[Image<f32>; 3]>; 4], | |
| - // TODO(veluca): do we really need this? ISTM it could be achieved by passing None for all the | |
| - // buffers, and it's not clear to me what use the decoder can make of it. | |
| - pub enable_output: bool, | |
| + pub(super) lf_frames: [Option<[Image<f32>; 3]>; Self::NUM_LF_FRAMES], | |
| pub render_spotcolors: bool, | |
| #[cfg(test)] | |
| pub use_simple_pipeline: bool, | |
| @@ -124,17 +124,21 @@ pub struct DecoderState { | |
| pub nonvisible_frame_index: usize, | |
| pub high_precision: bool, | |
| pub premultiply_output: bool, | |
| + // Whether the latest level 1 LF frame was fully rendered. | |
| + // If this is set to `true`, early flushing in the main frame | |
| + // (before HF is available) will do nothing. | |
| + pub lf_frame_was_rendered: bool, | |
| } | |
| impl DecoderState { | |
| pub const MAX_STORED_FRAMES: usize = 4; | |
| + pub const NUM_LF_FRAMES: usize = 4; | |
| pub fn new(file_header: FileHeader) -> Self { | |
| Self { | |
| file_header, | |
| reference_frames: Arc::new([None, None, None, None]), | |
| - lf_frames: [None, None, None, None], | |
| - enable_output: true, | |
| + lf_frames: std::array::from_fn(|_| None), | |
| render_spotcolors: true, | |
| #[cfg(test)] | |
| use_simple_pipeline: false, | |
| @@ -142,6 +146,7 @@ impl DecoderState { | |
| nonvisible_frame_index: 0, | |
| high_precision: false, | |
| premultiply_output: false, | |
| + lf_frame_was_rendered: false, | |
| } | |
| } | |
| @@ -169,6 +174,14 @@ pub struct HfMetadata { | |
| used_hf_types: u32, | |
| } | |
| +#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] | |
| +pub enum RenderUnit { | |
| + /// VarDCT data | |
| + VarDCT, | |
| + /// Modular channel with the given index | |
| + Modular(usize), | |
| +} | |
| + | |
| pub struct Frame { | |
| header: FrameHeader, | |
| toc: Toc, | |
| @@ -187,9 +200,21 @@ pub struct Frame { | |
| render_pipeline: Option<Box<crate::render::LowMemoryRenderPipeline>>, | |
| reference_frame_data: Option<Vec<Image<f32>>>, | |
| lf_frame_data: Option<[Image<f32>; 3]>, | |
| - lf_global_was_rendered: bool, | |
| + was_flushed_once: bool, | |
| /// Reusable buffers for VarDCT group decoding. | |
| vardct_buffers: Option<group::VarDctBuffers>, | |
| + // Last pass rendered so far for each HF group. | |
| + last_rendered_pass: Vec<Option<usize>>, | |
| + // Groups that should be rendered on the next call to flush(). | |
| + groups_to_flush: BTreeSet<usize>, | |
| + changed_since_last_flush: BTreeSet<(usize, RenderUnit)>, | |
| + incomplete_groups: usize, | |
| + patches: Arc<AtomicRefCell<PatchesDictionary>>, | |
| + splines: Arc<AtomicRefCell<Splines>>, | |
| + noise: Arc<AtomicRefCell<Noise>>, | |
| + lf_quant: Arc<AtomicRefCell<LfQuantFactors>>, | |
| + color_correlation_params: Arc<AtomicRefCell<ColorCorrelationParams>>, | |
| + epf_sigma: Arc<AtomicRefCell<SigmaSource>>, | |
| } | |
| impl Frame { | |
| @@ -221,6 +246,25 @@ impl Frame { | |
| } | |
| } | |
| + pub fn can_do_early_rendering(&self) -> bool { | |
| + if matches!( | |
| + self.header.frame_type, | |
| + FrameType::ReferenceOnly | FrameType::SkipProgressive | |
| + ) { | |
| + return false; | |
| + } | |
| + if self.header.has_lf_frame() { | |
| + return true; | |
| + } | |
| + if self.header.encoding == Encoding::VarDCT { | |
| + return false; | |
| + } | |
| + self.lf_global | |
| + .as_ref() | |
| + .map(|x| x.modular_global.can_do_early_partial_render()) | |
| + .unwrap_or_default() | |
| + } | |
| + | |
| pub fn finalize_lf(&mut self) -> Result<()> { | |
| if self.header.should_do_adaptive_lf_smoothing() { | |
| let lf_global = self.lf_global.as_mut().unwrap(); | |
| @@ -295,14 +339,14 @@ mod test { | |
| bytes: &[u8], | |
| verify: impl Fn(&Frame, usize) -> Result<()> + 'static, | |
| ) -> Result<usize> { | |
| - crate::api::tests::decode(bytes, usize::MAX, false, Some(Box::new(verify))).map(|x| x.0) | |
| + crate::api::tests::decode(bytes, usize::MAX, false, false, Some(Box::new(verify))) | |
| + .map(|x| x.0) | |
| } | |
| #[test] | |
| fn splines() -> Result<(), Error> { | |
| let verify_frame = move |frame: &Frame, _| { | |
| - let lf_global = frame.lf_global.as_ref().unwrap(); | |
| - let splines = lf_global.splines.as_ref().unwrap(); | |
| + let splines = frame.splines.borrow(); | |
| assert_eq!(splines.quantization_adjustment, 0); | |
| let expected_starting_points = [Point { x: 9.0, y: 54.0 }].to_vec(); | |
| assert_eq!(splines.starting_points, expected_starting_points); | |
| @@ -361,8 +405,7 @@ mod test { | |
| #[test] | |
| fn noise() -> Result<(), Error> { | |
| let verify_frame = |frame: &Frame, _| { | |
| - let lf_global = frame.lf_global.as_ref().unwrap(); | |
| - let noise = lf_global.noise.as_ref().unwrap(); | |
| + let noise = frame.noise.borrow(); | |
| let want_noise = [ | |
| 0.000000, 0.000977, 0.002930, 0.003906, 0.005859, 0.006836, 0.008789, 0.010742, | |
| ]; | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/borrowed_buffers.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/borrowed_buffers.rs | |
| index 4e3b4569ec8cf..29c93efa42e57 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/borrowed_buffers.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/borrowed_buffers.rs | |
| @@ -18,7 +18,6 @@ pub fn with_buffers<T>( | |
| buffers: &[ModularBufferInfo], | |
| indices: &[usize], | |
| grid: usize, | |
| - skip_empty: bool, | |
| f: impl FnOnce(Vec<&mut ModularChannel>) -> Result<T>, | |
| ) -> Result<T> { | |
| let mut bufs = vec![]; | |
| @@ -36,10 +35,12 @@ pub fn with_buffers<T>( | |
| }); | |
| } | |
| - // Skip zero-sized buffers when decoding - they don't contribute to the bitstream. | |
| - // This matches libjxl's behavior in DecodeGroup where zero-sized rects are skipped. | |
| - // The buffer is still allocated above so transforms can access it. | |
| - if skip_empty && (b.size.0 == 0 || b.size.1 == 0) { | |
| + // Skip zero-sized *tiles*. | |
| + // | |
| + // Note that some bitstreams can contain channels with one dimension being 0 (e.g. palette | |
| + // meta-channel with 0 colors has size (0, 3)). Those must still participate in channel | |
| + // numbering (but carry no entropy-coded pixels), so we only skip when both dimensions are 0. | |
| + if b.size.0 == 0 && b.size.1 == 0 { | |
| continue; | |
| } | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/bitstream.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/bitstream.rs | |
| index 930603f1f3f58..b2cc596252f6c 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/bitstream.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/bitstream.rs | |
| @@ -20,6 +20,7 @@ pub fn decode_modular_subbitstream( | |
| header: Option<GroupHeader>, | |
| global_tree: &Option<Tree>, | |
| br: &mut BitReader, | |
| + partial_decoded_buffers: Option<&mut usize>, | |
| ) -> Result<()> { | |
| // Skip decoding if all grids are zero-sized. | |
| let is_empty = buffers | |
| @@ -80,7 +81,21 @@ pub fn decode_modular_subbitstream( | |
| let mut reader = SymbolReader::new(&tree.histograms, br, Some(image_width))?; | |
| for i in 0..buffers.len() { | |
| - decode_modular_channel(&mut buffers, i, stream_id, &header, tree, &mut reader, br)?; | |
| + // Keep channel numbering stable, but skip actually decoding empty channels. | |
| + // This matches libjxl, which continues the loop without renumbering. | |
| + let (w, h) = buffers[i].data.size(); | |
| + if w == 0 || h == 0 { | |
| + continue; | |
| + } | |
| + if let Err(e) = | |
| + decode_modular_channel(&mut buffers, i, stream_id, &header, tree, &mut reader, br) | |
| + { | |
| + if let Some(p) = partial_decoded_buffers { | |
| + buffers[i].data.fill(0); | |
| + *p = i; | |
| + } | |
| + return Err(e); | |
| + } | |
| } | |
| reader.check_final_state(&tree.histograms, br)?; | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/channel.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/channel.rs | |
| index b9190ce996269..398eb204c491b 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/channel.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/channel.rs | |
| @@ -189,13 +189,16 @@ pub(super) fn decode_modular_channel( | |
| let special_tree = specialize_tree(tree, chan, stream_id, size.0, header)?; | |
| match special_tree { | |
| + TreeSpecialCase::NoTree(t) => { | |
| + decode_modular_channel_impl(buffers, chan, t, reader, br, &tree.histograms) | |
| + } | |
| TreeSpecialCase::NoWp(t) => { | |
| decode_modular_channel_impl(buffers, chan, t, reader, br, &tree.histograms) | |
| } | |
| - TreeSpecialCase::WpOnly(t) => { | |
| + TreeSpecialCase::WpOnlyConfig420(t) => { | |
| decode_modular_channel_impl(buffers, chan, t, reader, br, &tree.histograms) | |
| } | |
| - TreeSpecialCase::GradientLookup(t) => { | |
| + TreeSpecialCase::GradientLookupConfig420(t) => { | |
| decode_modular_channel_impl(buffers, chan, t, reader, br, &tree.histograms) | |
| } | |
| TreeSpecialCase::SingleGradientOnly(t) => { | |
| @@ -204,5 +207,6 @@ pub(super) fn decode_modular_channel( | |
| TreeSpecialCase::General(t) => { | |
| decode_modular_channel_impl(buffers, chan, t, reader, br, &tree.histograms) | |
| } | |
| - } | |
| + }?; | |
| + br.check_for_error() | |
| } | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/specialized_trees.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/specialized_trees.rs | |
| index ff7d2263a7a91..8ffebfe8a8720 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/specialized_trees.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/specialized_trees.rs | |
| @@ -86,7 +86,7 @@ impl ModularChannelDecoder for NoWpTree { | |
| &self.references, | |
| &mut self.property_buffer, | |
| ); | |
| - let dec = reader.read_signed(histograms, br, prediction_result.context as usize); | |
| + let dec = reader.read_signed_clustered(histograms, br, prediction_result.context as usize); | |
| make_pixel(dec, prediction_result.multiplier, prediction_result.guess) | |
| } | |
| } | |
| @@ -140,7 +140,7 @@ impl ModularChannelDecoder for GeneralTree { | |
| &self.no_wp_tree.references, | |
| &mut self.no_wp_tree.property_buffer, | |
| ); | |
| - let dec = reader.read_signed(histograms, br, prediction_result.context as usize); | |
| + let dec = reader.read_signed_clustered(histograms, br, prediction_result.context as usize); | |
| let val = make_pixel(dec, prediction_result.multiplier, prediction_result.guess); | |
| self.wp_state.update_errors(val, pos, xsize); | |
| val | |
| @@ -152,12 +152,7 @@ const LUT_MIN_SPLITVAL: i32 = -1024; | |
| const LUT_TABLE_SIZE: usize = (LUT_MAX_SPLITVAL - LUT_MIN_SPLITVAL + 1) as usize; | |
| const _: () = assert!(LUT_TABLE_SIZE.is_power_of_two()); | |
| -pub struct WpOnlyLookup { | |
| - lut: [u8; LUT_TABLE_SIZE], // Lookup (wp value -> *clustered* context id) | |
| - wp_state: WeightedPredictorState, | |
| -} | |
| - | |
| -fn make_lut(tree: &[TreeNode], histograms: &Histograms) -> Option<[u8; LUT_TABLE_SIZE]> { | |
| +fn make_lut(tree: &[TreeNode]) -> Option<[u8; LUT_TABLE_SIZE]> { | |
| struct RangeAndNode { | |
| range: Range<i32>, | |
| node: u32, | |
| @@ -198,8 +193,7 @@ fn make_lut(tree: &[TreeNode], histograms: &Histograms) -> Option<[u8; LUT_TABLE | |
| } | |
| let start = range.start - LUT_MIN_SPLITVAL; | |
| let end = range.end - LUT_MIN_SPLITVAL; | |
| - ans[start as usize..end as usize] | |
| - .fill(histograms.map_context_to_cluster(id as usize) as u8); | |
| + ans[start as usize..end as usize].fill(id as u8); | |
| } | |
| } | |
| } | |
| @@ -207,20 +201,30 @@ fn make_lut(tree: &[TreeNode], histograms: &Histograms) -> Option<[u8; LUT_TABLE | |
| Some(ans) | |
| } | |
| -impl WpOnlyLookup { | |
| +/// Specialized WpOnlyLookup for when all HybridUint configs are 420 | |
| +/// This allows using the fast-path entropy decoder | |
| +pub struct WpOnlyLookupConfig420 { | |
| + lut: [u8; LUT_TABLE_SIZE], | |
| + wp_state: WeightedPredictorState, | |
| +} | |
| + | |
| +impl WpOnlyLookupConfig420 { | |
| fn new( | |
| tree: &[TreeNode], | |
| histograms: &Histograms, | |
| header: &GroupHeader, | |
| xsize: usize, | |
| ) -> Option<Self> { | |
| + if !histograms.can_use_config_420_fast_path() { | |
| + return None; | |
| + } | |
| let wp_state = WeightedPredictorState::new(&header.wp_header, xsize); | |
| - let lut = make_lut(tree, histograms)?; | |
| + let lut = make_lut(tree)?; | |
| Some(Self { lut, wp_state }) | |
| } | |
| } | |
| -impl ModularChannelDecoder for WpOnlyLookup { | |
| +impl ModularChannelDecoder for WpOnlyLookupConfig420 { | |
| const NEEDS_TOP: bool = true; | |
| const NEEDS_TOPTOP: bool = true; | |
| @@ -243,25 +247,30 @@ impl ModularChannelDecoder for WpOnlyLookup { | |
| .predict_and_property(pos, xsize, &prediction_data); | |
| let ctx = self.lut[(property as i64 - LUT_MIN_SPLITVAL as i64) | |
| .clamp(0, LUT_TABLE_SIZE as i64 - 1) as usize]; | |
| - let dec = reader.read_signed_clustered(histograms, br, ctx as usize); | |
| + // Use the specialized 420 fast path | |
| + let dec = reader.read_signed_clustered_config_420(histograms, br, ctx as usize); | |
| let val = dec.wrapping_add(wp_pred as i32); | |
| self.wp_state.update_errors(val, pos, xsize); | |
| val | |
| } | |
| } | |
| -/// Fast path for trees that split only on property 9 (gradient: left + top - topleft) | |
| -/// with Gradient predictor, offset=0, multiplier=1. | |
| -/// Maps property 9 values directly to cluster IDs via a LUT. | |
| -/// This targets libjxl effort 2 encoding. | |
| -pub struct GradientLookup { | |
| - lut: [u8; LUT_TABLE_SIZE], | |
| -} | |
| - | |
| /// Property 9 is the "gradient property": left + top - topleft | |
| const GRADIENT_PROPERTY: u8 = 9; | |
| -fn make_gradient_lut(tree: &[TreeNode], histograms: &Histograms) -> Option<GradientLookup> { | |
| +/// Config 420 specialized version of gradient lookup for trees that split only on property 9. | |
| +/// This uses the specialized entropy decoder for config 420 + no LZ77. | |
| +pub struct GradientLookupConfig420 { | |
| + lut: [u8; LUT_TABLE_SIZE], | |
| +} | |
| + | |
| +fn make_gradient_lut_config_420( | |
| + tree: &[TreeNode], | |
| + histograms: &Histograms, | |
| +) -> Option<GradientLookupConfig420> { | |
| + if !histograms.can_use_config_420_fast_path() { | |
| + return None; | |
| + } | |
| // Verify all splits are on property 9 and all leaves have Gradient predictor | |
| for node in tree { | |
| match node { | |
| @@ -278,12 +287,11 @@ fn make_gradient_lut(tree: &[TreeNode], histograms: &Histograms) -> Option<Gradi | |
| } | |
| } | |
| - // Use existing make_lut which handles offset=0, multiplier=1 checks | |
| - let lut = make_lut(tree, histograms)?; | |
| - Some(GradientLookup { lut }) | |
| + let lut = make_lut(tree)?; | |
| + Some(GradientLookupConfig420 { lut }) | |
| } | |
| -impl ModularChannelDecoder for GradientLookup { | |
| +impl ModularChannelDecoder for GradientLookupConfig420 { | |
| const NEEDS_TOP: bool = true; | |
| const NEEDS_TOPTOP: bool = false; | |
| @@ -314,13 +322,14 @@ impl ModularChannelDecoder for GradientLookup { | |
| prediction_data.topleft as i64, | |
| ); | |
| - let dec = reader.read_signed_clustered(histograms, br, cluster as usize); | |
| + // Use the specialized config 420 fast path | |
| + let dec = reader.read_signed_clustered_config_420(histograms, br, cluster as usize); | |
| dec.wrapping_add(pred as i32) | |
| } | |
| } | |
| pub struct SingleGradientOnly { | |
| - ctx: usize, | |
| + clustered_ctx: usize, | |
| } | |
| impl ModularChannelDecoder for SingleGradientOnly { | |
| @@ -340,16 +349,42 @@ impl ModularChannelDecoder for SingleGradientOnly { | |
| histograms: &Histograms, | |
| ) -> i32 { | |
| let pred = Predictor::Gradient.predict_one(prediction_data, 0); | |
| - let dec = reader.read_signed(histograms, br, self.ctx); | |
| + let dec = reader.read_signed_clustered_inline(histograms, br, self.clustered_ctx); | |
| make_pixel(dec, 1, pred) | |
| } | |
| } | |
| +pub struct NoTree { | |
| + clustered_ctx: usize, | |
| +} | |
| + | |
| +impl ModularChannelDecoder for NoTree { | |
| + const NEEDS_TOP: bool = false; | |
| + const NEEDS_TOPTOP: bool = false; | |
| + | |
| + fn init_row(&mut self, _: &mut [&mut ModularChannel], _: usize, _: usize) {} | |
| + | |
| + #[inline(always)] | |
| + fn decode_one( | |
| + &mut self, | |
| + _: PredictionData, | |
| + _: (usize, usize), | |
| + _: usize, | |
| + reader: &mut SymbolReader, | |
| + br: &mut BitReader, | |
| + histograms: &Histograms, | |
| + ) -> i32 { | |
| + let dec = reader.read_signed_clustered_inline(histograms, br, self.clustered_ctx); | |
| + make_pixel(dec, 1, 0) | |
| + } | |
| +} | |
| + | |
| #[allow(clippy::large_enum_variant)] | |
| pub enum TreeSpecialCase { | |
| + NoTree(NoTree), | |
| NoWp(NoWpTree), | |
| - WpOnly(WpOnlyLookup), | |
| - GradientLookup(GradientLookup), | |
| + WpOnlyConfig420(WpOnlyLookupConfig420), | |
| + GradientLookupConfig420(GradientLookupConfig420), | |
| SingleGradientOnly(SingleGradientOnly), | |
| General(GeneralTree), | |
| } | |
| @@ -372,9 +407,10 @@ pub fn specialize_tree( | |
| let mut uses_non_wp = false; | |
| // Obtain a pruned tree without nodes that are not relevant in the current channel and stream. | |
| - // Proceed in BFS order, so that we know that the children of anode will be adjacent. | |
| + // Proceed in BFS order, so that we know that the children of a node will be adjacent. | |
| + // Also re-maps context IDs to cluster IDs. | |
| while let Some(v) = queue.pop_front() { | |
| - let node = tree.nodes[v as usize]; | |
| + let mut node = tree.nodes[v as usize]; | |
| match node { | |
| TreeNode::Split { | |
| property, | |
| @@ -409,11 +445,29 @@ pub fn specialize_tree( | |
| TreeNode::Leaf { predictor, .. } => { | |
| uses_wp |= predictor == Predictor::Weighted; | |
| uses_non_wp |= predictor != Predictor::Weighted; | |
| + let TreeNode::Leaf { id, .. } = &mut node else { | |
| + unreachable!() | |
| + }; | |
| + *id = tree.histograms.map_context_to_cluster(*id as usize) as u32; | |
| pruned_tree.push(node); | |
| } | |
| } | |
| } | |
| + if let [ | |
| + TreeNode::Leaf { | |
| + predictor: Predictor::Zero, | |
| + multiplier: 1, | |
| + offset: 0, | |
| + id, | |
| + }, | |
| + ] = &*pruned_tree | |
| + { | |
| + return Ok(TreeSpecialCase::NoTree(NoTree { | |
| + clustered_ctx: *id as usize, | |
| + })); | |
| + } | |
| + | |
| if let [ | |
| TreeNode::Leaf { | |
| predictor: Predictor::Gradient, | |
| @@ -424,20 +478,23 @@ pub fn specialize_tree( | |
| ] = &*pruned_tree | |
| { | |
| return Ok(TreeSpecialCase::SingleGradientOnly(SingleGradientOnly { | |
| - ctx: *id as usize, | |
| + clustered_ctx: *id as usize, | |
| })); | |
| } | |
| - if !uses_non_wp | |
| - && let Some(wp) = WpOnlyLookup::new(&pruned_tree, &tree.histograms, header, xsize) | |
| - { | |
| - return Ok(TreeSpecialCase::WpOnly(wp)); | |
| + if !uses_non_wp { | |
| + // Try the specialized 420 config version (fast path for effort 3 encoded images) | |
| + if let Some(wp) = WpOnlyLookupConfig420::new(&pruned_tree, &tree.histograms, header, xsize) | |
| + { | |
| + return Ok(TreeSpecialCase::WpOnlyConfig420(wp)); | |
| + } | |
| } | |
| - // Try gradient LUT for non-WP trees (targets effort 2 encoding) | |
| + // Non-WP trees (includes effort 2 encoding and some groups in effort > 3) | |
| if !uses_wp { | |
| - if let Some(gl) = make_gradient_lut(&pruned_tree, &tree.histograms) { | |
| - return Ok(TreeSpecialCase::GradientLookup(gl)); | |
| + // Try config 420 specialized gradient LUT version (fast path for effort 2 encoded images) | |
| + if let Some(gl) = make_gradient_lut_config_420(&pruned_tree, &tree.histograms) { | |
| + return Ok(TreeSpecialCase::GradientLookupConfig420(gl)); | |
| } | |
| return Ok(TreeSpecialCase::NoWp(NoWpTree::new( | |
| pruned_tree, | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/mod.rs | |
| index 9537774e4f530..c5d665f328b9e 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/mod.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/mod.rs | |
| @@ -3,7 +3,13 @@ | |
| // Use of this source code is governed by a BSD-style | |
| // license that can be found in the LICENSE file. | |
| -use std::{cmp::min, fmt::Debug}; | |
| +use std::{ | |
| + cmp::min, | |
| + collections::{BTreeMap, BTreeSet}, | |
| + fmt::Debug, | |
| + ops::Range, | |
| + sync::atomic::{AtomicUsize, Ordering}, | |
| +}; | |
| use crate::{ | |
| bit_reader::BitReader, | |
| @@ -14,11 +20,13 @@ use crate::{ | |
| quantizer::{self, LfQuantFactors, QuantizerParams}, | |
| }, | |
| headers::{ | |
| - ImageMetadata, JxlHeader, bit_depth::BitDepth, frame_header::FrameHeader, | |
| - modular::GroupHeader, | |
| + ImageMetadata, JxlHeader, | |
| + bit_depth::BitDepth, | |
| + frame_header::FrameHeader, | |
| + modular::{GroupHeader, TransformId}, | |
| }, | |
| image::{Image, Rect}, | |
| - util::{AtomicRefCell, CeilLog2, tracing_wrappers::*}, | |
| + util::{AtomicRefCell, CeilLog2, SmallVec, tracing_wrappers::*}, | |
| }; | |
| use jxl_transforms::transform_map::*; | |
| @@ -41,8 +49,8 @@ const IMAGE_OFFSET: (usize, usize) = (2, 2); | |
| #[derive(Clone, PartialEq, Eq, Copy)] | |
| struct ChannelInfo { | |
| - // The index of the output channel in the render pipeline, or -1 for non-output channels. | |
| - output_channel_idx: isize, | |
| + // The index of the output channel in the render pipeline. | |
| + output_channel_idx: Option<usize>, | |
| // width, height | |
| size: (usize, usize), | |
| shift: Option<(usize, usize)>, // None for meta-channels | |
| @@ -58,8 +66,8 @@ impl Debug for ChannelInfo { | |
| write!(f, "(meta)")?; | |
| } | |
| write!(f, "{:?}", self.bit_depth)?; | |
| - if self.output_channel_idx >= 0 { | |
| - write!(f, "(output channel {})", self.output_channel_idx)?; | |
| + if let Some(oc) = self.output_channel_idx { | |
| + write!(f, "(output channel {})", oc)?; | |
| } | |
| Ok(()) | |
| } | |
| @@ -162,7 +170,7 @@ impl ModularChannel { | |
| fn channel_info(&self) -> ChannelInfo { | |
| ChannelInfo { | |
| - output_channel_idx: -1, | |
| + output_channel_idx: None, | |
| size: self.data.size(), | |
| shift: self.shift, | |
| bit_depth: self.bit_depth, | |
| @@ -170,6 +178,10 @@ impl ModularChannel { | |
| } | |
| } | |
| +const BUFFER_STATUS_NOT_RENDERED: usize = 0; | |
| +const BUFFER_STATUS_PARTIAL_RENDER: usize = 1; | |
| +const BUFFER_STATUS_FINAL_RENDER: usize = 2; | |
| + | |
| // Note: this type uses interior mutability to get mutable references to multiple buffers at once. | |
| // In principle, this is not needed, but the overhead should be minimal so using `unsafe` here is | |
| // probably not worth it. | |
| @@ -177,34 +189,81 @@ impl ModularChannel { | |
| struct ModularBuffer { | |
| data: AtomicRefCell<Option<ModularChannel>>, | |
| // Number of times this buffer will be used, *including* when it is used for output. | |
| - remaining_uses: usize, | |
| - used_by_transforms: Vec<usize>, | |
| + remaining_uses: AtomicUsize, | |
| + // Transform steps that "strongly" or "weakly" use the image data in this buffer. | |
| + // A "strong" usage always triggers a re-render if the image data changes. | |
| + // A "weak" usage only triggers a re-render if the buffer is final, or if the | |
| + // current re-render was not only caused by weak re-renders. | |
| + used_by_transforms_strong: Vec<usize>, | |
| + used_by_transforms_weak: Vec<usize>, | |
| size: (usize, usize), | |
| + status: AtomicUsize, | |
| } | |
| impl ModularBuffer { | |
| + fn get_status(&self) -> usize { | |
| + self.status.load(Ordering::Relaxed) | |
| + } | |
| + | |
| + fn set_status(&self, val: usize) { | |
| + self.status.store(val, Ordering::Relaxed); | |
| + } | |
| + | |
| + // Iterator over (transform_id, is_strong_use) | |
| + fn users(&self, include_weak: bool) -> impl Iterator<Item = (usize, bool)> { | |
| + let strong = self.used_by_transforms_strong.iter().map(|x| (*x, true)); | |
| + let weak = if include_weak { | |
| + &self.used_by_transforms_weak[..] | |
| + } else { | |
| + &[] | |
| + } | |
| + .iter() | |
| + .map(|x| (*x, false)); | |
| + strong.chain(weak) | |
| + } | |
| + | |
| // Gives out a copy of the buffer + auxiliary buffer, marking the buffer as used. | |
| // If this was the last usage of the buffer, does not actually copy the buffer. | |
| - fn get_buffer(&mut self) -> Result<ModularChannel> { | |
| - self.remaining_uses = self.remaining_uses.checked_sub(1).unwrap(); | |
| - if self.remaining_uses == 0 { | |
| - Ok(self.data.borrow_mut().take().unwrap()) | |
| - } else { | |
| - Ok(self | |
| - .data | |
| - .borrow() | |
| - .as_ref() | |
| - .map(ModularChannel::try_clone) | |
| - .transpose()? | |
| - .unwrap()) | |
| + fn get_buffer(&self, can_consume: bool) -> Result<ModularChannel> { | |
| + if !can_consume { | |
| + return ModularChannel::try_clone(self.data.borrow().as_ref().unwrap()); | |
| } | |
| + let mut ret = None; | |
| + let _ = self.remaining_uses.fetch_update( | |
| + Ordering::Release, | |
| + Ordering::Acquire, | |
| + |remaining_pre| { | |
| + let remaining = remaining_pre.checked_sub(1).unwrap(); | |
| + if ret.is_none() { | |
| + if remaining == 0 { | |
| + ret = Some(Ok(self.data.borrow_mut().take().unwrap())) | |
| + } else { | |
| + ret = self.data.borrow().as_ref().map(ModularChannel::try_clone); | |
| + } | |
| + } else if remaining == 0 { | |
| + *self.data.borrow_mut() = None; | |
| + } | |
| + Some(remaining) | |
| + }, | |
| + ); | |
| + Ok(ret.transpose()?.unwrap()) | |
| } | |
| - fn mark_used(&mut self) { | |
| - self.remaining_uses = self.remaining_uses.checked_sub(1).unwrap(); | |
| - if self.remaining_uses == 0 { | |
| - *self.data.borrow_mut() = None; | |
| + fn mark_used(&self, can_consume: bool) { | |
| + if !can_consume { | |
| + return; | |
| } | |
| + let _ = self.remaining_uses.fetch_update( | |
| + Ordering::Release, | |
| + Ordering::Acquire, | |
| + |remaining_pre: usize| { | |
| + let remaining = remaining_pre.checked_sub(1).unwrap(); | |
| + if remaining == 0 { | |
| + *self.data.borrow_mut() = None; | |
| + } | |
| + Some(remaining) | |
| + }, | |
| + ); | |
| } | |
| } | |
| @@ -237,6 +296,7 @@ impl ModularBufferInfo { | |
| }; | |
| self.grid_shape.0 * grid_pos.1 + grid_pos.0 | |
| } | |
| + | |
| fn get_grid_rect( | |
| &self, | |
| frame_header: &FrameHeader, | |
| @@ -296,15 +356,41 @@ pub struct FullModularImage { | |
| // In order, LfGlobal, LfGroup, HfGroup(pass 0), ..., HfGroup(last pass). | |
| section_buffer_indices: Vec<Vec<usize>>, | |
| modular_color_channels: usize, | |
| + can_do_partial_render: bool, | |
| + can_do_early_partial_render: bool, | |
| + decoded_section0_channels: usize, | |
| + needed_section0_channels_for_early_render: usize, | |
| + global_header: Option<GroupHeader>, | |
| + buffers_for_channels: Vec<usize>, | |
| + // Buffers to _start rendering from_ on the next call to process_output. | |
| + // This is initially set to LF global and LF buffers, and populated with HF buffers | |
| + // just before we start decoding them. | |
| + ready_buffers_dry_run: BTreeSet<(usize, usize)>, | |
| + ready_buffers: BTreeSet<(usize, usize)>, | |
| + // Whether each channel is used or not by the render pipeline. | |
| + pipeline_used_channels: Vec<bool>, | |
| } | |
| impl FullModularImage { | |
| + pub fn can_do_partial_render(&self) -> bool { | |
| + self.can_do_partial_render | |
| + } | |
| + | |
| + pub fn can_do_early_partial_render(&self) -> bool { | |
| + self.can_do_early_partial_render | |
| + // Avoid green martians | |
| + && self.decoded_section0_channels >= self.needed_section0_channels_for_early_render | |
| + } | |
| + | |
| + pub fn set_pipeline_used_channels(&mut self, used: &[bool]) { | |
| + self.pipeline_used_channels = used.to_vec(); | |
| + } | |
| + | |
| #[instrument(level = "debug", skip_all)] | |
| pub fn read( | |
| frame_header: &FrameHeader, | |
| image_metadata: &ImageMetadata, | |
| modular_color_channels: usize, | |
| - global_tree: &Option<Tree>, | |
| br: &mut BitReader, | |
| ) -> Result<Self> { | |
| let mut channels = vec![]; | |
| @@ -312,7 +398,7 @@ impl FullModularImage { | |
| let shift = (frame_header.hshift(c), frame_header.vshift(c)); | |
| let size = frame_header.size(); | |
| channels.push(ChannelInfo { | |
| - output_channel_idx: c as isize, | |
| + output_channel_idx: Some(c), | |
| size: (size.0.div_ceil(1 << shift.0), size.1.div_ceil(1 << shift.1)), | |
| shift: Some(shift), | |
| bit_depth: image_metadata.bit_depth, | |
| @@ -332,7 +418,7 @@ impl FullModularImage { | |
| size.1.div_ceil(*ecups as usize), | |
| ); | |
| channels.push(ChannelInfo { | |
| - output_channel_idx: 3 + idx as isize, | |
| + output_channel_idx: Some(3 + idx), | |
| size, | |
| shift: Some((shift, shift)), | |
| bit_depth: image_metadata.bit_depth, | |
| @@ -350,12 +436,33 @@ impl FullModularImage { | |
| transform_steps: vec![], | |
| section_buffer_indices: vec![vec![]; 2 + frame_header.passes.num_passes as usize], | |
| modular_color_channels, | |
| + can_do_partial_render: true, | |
| + can_do_early_partial_render: false, | |
| + decoded_section0_channels: 0, | |
| + needed_section0_channels_for_early_render: 0, | |
| + global_header: None, | |
| + buffers_for_channels: vec![], | |
| + ready_buffers_dry_run: BTreeSet::new(), | |
| + ready_buffers: BTreeSet::new(), | |
| + pipeline_used_channels: vec![], | |
| }); | |
| } | |
| trace!("reading modular header"); | |
| let header = GroupHeader::read(br)?; | |
| + // Disallow progressive rendering with multi-channel palette transforms | |
| + // or delta-palette. | |
| + let has_problematic_palette_transform = header.transforms.iter().any(|x| { | |
| + x.id == TransformId::Palette | |
| + && (x.num_channels > 1 || x.predictor_id != Predictor::Zero as u32) | |
| + }); | |
| + | |
| + let has_squeeze_transform = header | |
| + .transforms | |
| + .iter() | |
| + .any(|x| x.id == TransformId::Squeeze); | |
| + | |
| let (mut buffer_info, transform_steps) = | |
| transforms::apply::meta_apply_transforms(&channels, &header)?; | |
| @@ -460,12 +567,13 @@ impl FullModularImage { | |
| ); | |
| for (pos, buf) in bi.buffer_grid.iter().enumerate() { | |
| trace!( | |
| - "Channel {i} grid {pos} ({}, {}) size: {:?}, uses: {}, used_by: {:?}", | |
| + "Channel {i} grid {pos} ({}, {}) size: {:?}, uses: {:?}, used_by: s {:?} w {:?}", | |
| pos % bi.grid_shape.0, | |
| pos / bi.grid_shape.0, | |
| buf.size, | |
| buf.remaining_uses, | |
| - buf.used_by_transforms | |
| + buf.used_by_transforms_strong, | |
| + buf.used_by_transforms_weak, | |
| ); | |
| } | |
| } | |
| @@ -475,24 +583,105 @@ impl FullModularImage { | |
| trace!("Transform {i}: {ts:?}"); | |
| } | |
| - with_buffers(&buffer_info, §ion_buffer_indices[0], 0, true, |bufs| { | |
| - decode_modular_subbitstream( | |
| - bufs, | |
| - ModularStreamId::GlobalData.get_id(frame_header), | |
| - Some(header), | |
| - global_tree, | |
| - br, | |
| - ) | |
| - })?; | |
| + let mut buffers_for_channels = vec![]; | |
| + | |
| + for (i, c) in buffer_info.iter().enumerate() { | |
| + if let Some(c) = c.info.output_channel_idx { | |
| + if buffers_for_channels.len() <= c { | |
| + buffers_for_channels.resize(c + 1, 0); | |
| + } | |
| + buffers_for_channels[c] = i; | |
| + } | |
| + } | |
| + | |
| + let num_meta_channels = buffer_info | |
| + .iter() | |
| + .filter(|b| b.coded_channel_id >= 0 && b.info.is_meta()) | |
| + .count(); | |
| Ok(FullModularImage { | |
| buffer_info, | |
| transform_steps, | |
| section_buffer_indices, | |
| modular_color_channels, | |
| + can_do_partial_render: !has_problematic_palette_transform, | |
| + can_do_early_partial_render: !has_problematic_palette_transform | |
| + && has_squeeze_transform, | |
| + decoded_section0_channels: 0, | |
| + needed_section0_channels_for_early_render: buffers_for_channels.len() | |
| + + num_meta_channels, | |
| + global_header: Some(header), | |
| + buffers_for_channels, | |
| + ready_buffers_dry_run: BTreeSet::new(), | |
| + ready_buffers: BTreeSet::new(), | |
| + pipeline_used_channels: vec![], | |
| }) | |
| } | |
| + pub fn read_section0( | |
| + &mut self, | |
| + frame_header: &FrameHeader, | |
| + global_tree: &Option<Tree>, | |
| + br: &mut BitReader, | |
| + allow_partial: bool, | |
| + ) -> Result<()> { | |
| + let mut decoded_if_partial = 0; | |
| + let ret = with_buffers( | |
| + &self.buffer_info, | |
| + &self.section_buffer_indices[0], | |
| + 0, | |
| + |bufs| { | |
| + decode_modular_subbitstream( | |
| + bufs, | |
| + ModularStreamId::GlobalData.get_id(frame_header), | |
| + self.global_header.clone(), | |
| + global_tree, | |
| + br, | |
| + Some(&mut decoded_if_partial), | |
| + ) | |
| + }, | |
| + ); | |
| + | |
| + match (ret, allow_partial) { | |
| + (Ok(_), _) => { | |
| + // Decoded section completely. | |
| + self.decoded_section0_channels = self.section_buffer_indices[0].len(); | |
| + } | |
| + (Err(_), true) => { | |
| + self.decoded_section0_channels = decoded_if_partial; | |
| + } | |
| + (Err(e), false) => { | |
| + return Err(e); | |
| + } | |
| + } | |
| + | |
| + for b in self.section_buffer_indices[0] | |
| + .iter() | |
| + .take(self.decoded_section0_channels) | |
| + { | |
| + if self.buffer_info[*b].buffer_grid[0].get_status() == BUFFER_STATUS_FINAL_RENDER { | |
| + continue; | |
| + } | |
| + // If we did a partial decode, we cannot be 100% sure of whether we correctly | |
| + // decoded all the sections. Thus, mark the sections as partially decoded. | |
| + self.buffer_info[*b].buffer_grid[0].set_status(if allow_partial { | |
| + BUFFER_STATUS_PARTIAL_RENDER | |
| + } else { | |
| + BUFFER_STATUS_FINAL_RENDER | |
| + }); | |
| + self.ready_buffers_dry_run.insert((*b, 0)); | |
| + } | |
| + | |
| + Ok(()) | |
| + } | |
| + | |
| + pub fn mark_group_to_be_read(&mut self, section_id: usize, group: usize) { | |
| + for b in self.section_buffer_indices[section_id].iter() { | |
| + self.buffer_info[*b].buffer_grid[group].set_status(BUFFER_STATUS_FINAL_RENDER); | |
| + self.ready_buffers_dry_run.insert((*b, group)); | |
| + } | |
| + } | |
| + | |
| #[allow(clippy::type_complexity)] | |
| #[instrument(level = "debug", skip(self, frame_header, global_tree, br), ret)] | |
| pub fn read_stream( | |
| @@ -520,7 +709,6 @@ impl FullModularImage { | |
| &self.buffer_info, | |
| &self.section_buffer_indices[section_id], | |
| grid, | |
| - true, | |
| |bufs| { | |
| decode_modular_subbitstream( | |
| bufs, | |
| @@ -528,60 +716,247 @@ impl FullModularImage { | |
| None, | |
| global_tree, | |
| br, | |
| - ) | |
| + None, | |
| + )?; | |
| + Ok(()) | |
| }, | |
| )?; | |
| + | |
| + Ok(()) | |
| + } | |
| + | |
| + fn maybe_output( | |
| + &self, | |
| + buf: usize, | |
| + grid: usize, | |
| + dry_run: bool, | |
| + pass_to_pipeline: &mut dyn FnMut(usize, usize, bool, Option<Image<i32>>) -> Result<()>, | |
| + ) -> Result<()> { | |
| + if let Some(chan) = self.buffer_info[buf].info.output_channel_idx { | |
| + let is_final = | |
| + self.buffer_info[buf].buffer_grid[grid].get_status() == BUFFER_STATUS_FINAL_RENDER; | |
| + let all_final = self.buffers_for_channels.iter().all(|x| { | |
| + self.buffer_info[*x].buffer_grid[grid].get_status() == BUFFER_STATUS_FINAL_RENDER | |
| + }); | |
| + let channels: SmallVec<usize, 3> = if chan == 0 && self.modular_color_channels == 1 { | |
| + (0..3).filter(|x| self.pipeline_used_channels[*x]).collect() | |
| + } else { | |
| + self.pipeline_used_channels[chan] | |
| + .then_some(chan) | |
| + .into_iter() | |
| + .collect() | |
| + }; | |
| + if channels.is_empty() { | |
| + return Ok(()); | |
| + } | |
| + if dry_run { | |
| + for c in channels.iter() { | |
| + pass_to_pipeline(*c, grid, is_final, None)?; | |
| + } | |
| + } else { | |
| + debug!("Rendering channel {chan:?}, grid position {grid}"); | |
| + let buf = self.buffer_info[buf].buffer_grid[grid].get_buffer(all_final)?; | |
| + for c in channels[1..].iter() { | |
| + pass_to_pipeline(*c, grid, is_final, Some(buf.data.try_clone()?))?; | |
| + } | |
| + pass_to_pipeline(channels[0], grid, is_final, Some(buf.data))?; | |
| + } | |
| + } | |
| Ok(()) | |
| } | |
| + // If `dry_run` is true, this call does not modify any state, and the calls to `pass_to_pipeline` | |
| + // will have None as an image. Otherwise, the image will always be `Some(..)`. | |
| + // It is *required* to do a dry run before doing an actual run after any event that might have | |
| + // readied some buffers. | |
| pub fn process_output( | |
| &mut self, | |
| - section_id: usize, | |
| - grid: usize, | |
| frame_header: &FrameHeader, | |
| - pass_to_pipeline: &mut dyn FnMut(usize, usize, usize, Image<i32>) -> Result<()>, | |
| + dry_run: bool, | |
| + pass_to_pipeline: &mut dyn FnMut(usize, usize, bool, Option<Image<i32>>) -> Result<()>, | |
| ) -> Result<()> { | |
| - let mut maybe_output = |bi: &mut ModularBufferInfo, grid: usize| -> Result<()> { | |
| - if bi.info.output_channel_idx >= 0 { | |
| - let chan = bi.info.output_channel_idx as usize; | |
| - debug!("Rendering channel {chan:?}, grid position {grid}"); | |
| - let buf = bi.buffer_grid[grid].get_buffer()?; | |
| - // TODO(veluca): figure out what to do with passes here. | |
| - if chan == 0 && self.modular_color_channels == 1 { | |
| - for i in 0..2 { | |
| - pass_to_pipeline(i, grid, 1, buf.data.try_clone()?)?; | |
| - } | |
| - pass_to_pipeline(2, grid, 1, buf.data)?; | |
| + // TODO(veluca): consider using `used_channel_mask` to avoid running transforms that produce | |
| + // channels that are not used. | |
| + | |
| + // layer -> (transform -> is_strong) | |
| + let mut to_process_by_layer = BTreeMap::<usize, BTreeMap<usize, bool>>::new(); | |
| + let mut buffers_to_output = vec![]; | |
| + | |
| + let ready_buffers = if dry_run { | |
| + std::mem::take(&mut self.ready_buffers_dry_run) | |
| + } else { | |
| + assert!(self.ready_buffers_dry_run.is_empty()); | |
| + std::mem::take(&mut self.ready_buffers) | |
| + }; | |
| + | |
| + for (buf, grid) in ready_buffers { | |
| + if self.buffer_info[buf].info.output_channel_idx.is_some() { | |
| + buffers_to_output.push((buf, grid)); | |
| + } | |
| + for (t, is_strong_dep) in self.buffer_info[buf].buffer_grid[grid].users(true) { | |
| + let layer = self.transform_steps[t].layer; | |
| + let layer = to_process_by_layer.entry(layer).or_default(); | |
| + let is_strong = layer.entry(t).or_default(); | |
| + *is_strong |= is_strong_dep; | |
| + } | |
| + if dry_run { | |
| + self.ready_buffers.insert((buf, grid)); | |
| + } | |
| + } | |
| + | |
| + // When doing a dry run, run the same logic as the real execution, but | |
| + // without modifying the actual buffer status -- instead, we use local | |
| + // overrides. | |
| + // This allows us to know what buffers will be produced before producing any. | |
| + let mut status_overrides = BTreeMap::new(); | |
| + | |
| + let get_status = | |
| + |status_overrides: &mut BTreeMap<(usize, usize), usize>, b: usize, g: usize| { | |
| + if let Some(s) = status_overrides.get(&(b, g)) { | |
| + *s | |
| } else { | |
| - pass_to_pipeline(chan, grid, 1, buf.data)?; | |
| + self.buffer_info[b].buffer_grid[g].get_status() | |
| + } | |
| + }; | |
| + | |
| + let mut new_dirty_transforms = vec![]; | |
| + while let Some((_, transforms)) = to_process_by_layer.pop_first() { | |
| + trace!("{transforms:?}"); | |
| + for (t, is_strong) in transforms { | |
| + let tfm = &self.transform_steps[t]; | |
| + trace!("{:?}", tfm); | |
| + | |
| + let dependency_status = tfm | |
| + .deps | |
| + .iter() | |
| + .map(|(b, g)| get_status(&mut status_overrides, *b, *g)) | |
| + .min() | |
| + .unwrap_or(BUFFER_STATUS_FINAL_RENDER); | |
| + | |
| + if dependency_status == BUFFER_STATUS_NOT_RENDERED { | |
| + continue; | |
| + } | |
| + let is_final = dependency_status == BUFFER_STATUS_FINAL_RENDER; | |
| + | |
| + let mut previous_output_status = None; | |
| + for (b, g) in tfm.outputs(&self.buffer_info) { | |
| + let status = get_status(&mut status_overrides, b, g); | |
| + if previous_output_status.is_none() { | |
| + previous_output_status = Some(status); | |
| + } | |
| + assert_eq!(Some(status), previous_output_status); | |
| + if dry_run { | |
| + status_overrides.insert((b, g), dependency_status); | |
| + } else { | |
| + self.buffer_info[b].buffer_grid[g].set_status(dependency_status); | |
| + } | |
| + } | |
| + let previous_output_status = previous_output_status.unwrap(); | |
| + | |
| + if !dry_run { | |
| + tfm.do_run(frame_header, &self.buffer_info, is_final)?; | |
| + } | |
| + | |
| + // If this was the first _or_ the last render, trigger a re-render across weak edges | |
| + // even if the render was caused by a weak edge. | |
| + // This is necessary to finish drawing those renders correctly. | |
| + let is_strong = is_strong | |
| + || (previous_output_status == BUFFER_STATUS_NOT_RENDERED | |
| + || dependency_status == BUFFER_STATUS_FINAL_RENDER); | |
| + for (buf, grid) in self.transform_steps[t].outputs(&self.buffer_info) { | |
| + if self.buffer_info[buf].info.output_channel_idx.is_some() { | |
| + buffers_to_output.push((buf, grid)); | |
| + } | |
| + for (t, is_strong_dep) in | |
| + self.buffer_info[buf].buffer_grid[grid].users(is_strong) | |
| + { | |
| + new_dirty_transforms.push((t, is_strong_dep)); | |
| + } | |
| + } | |
| + } | |
| + | |
| + for (t, is_strong_dep) in new_dirty_transforms.drain(..) { | |
| + let layer = self.transform_steps[t].layer; | |
| + let layer = to_process_by_layer.entry(layer).or_default(); | |
| + let is_strong = layer.entry(t).or_default(); | |
| + *is_strong |= is_strong_dep; | |
| + } | |
| + } | |
| + | |
| + // Pass all the output buffers to the render pipeline. | |
| + for (buf, grid) in buffers_to_output { | |
| + self.maybe_output(buf, grid, dry_run, pass_to_pipeline)?; | |
| + } | |
| + | |
| + Ok(()) | |
| + } | |
| + | |
| + pub fn channel_range(&self) -> Range<usize> { | |
| + if self.modular_color_channels != 0 { | |
| + 0..self.buffers_for_channels.len() | |
| + } else { | |
| + // VarDCT image. | |
| + 3..self.buffers_for_channels.len() | |
| + } | |
| + } | |
| + | |
| + pub fn flush_output( | |
| + &mut self, | |
| + group: usize, | |
| + chan: usize, | |
| + pass_to_pipeline: &mut dyn FnMut(usize, usize, bool, Image<i32>) -> Result<()>, | |
| + ) -> Result<()> { | |
| + if !self.can_do_partial_render() { | |
| + return Ok(()); | |
| + } | |
| + let buf_idx = self.buffers_for_channels[chan]; | |
| + // Skip channels that don't have a real buffer assignment. | |
| + // buffers_for_channels is zero-filled on resize, so intermediate channels | |
| + // (e.g. G/B when modular_color_channels==1) may alias buffer 0 incorrectly. | |
| + if self.buffer_info[buf_idx].info.output_channel_idx != Some(chan) { | |
| + return Ok(()); | |
| + } | |
| + self.maybe_output(buf_idx, group, false, &mut |chan, grid, complete, img| { | |
| + pass_to_pipeline(chan, grid, complete, img.unwrap()) | |
| + }) | |
| + } | |
| + | |
| + pub fn zero_fill_empty_channels( | |
| + &mut self, | |
| + num_passes: usize, | |
| + num_groups: usize, | |
| + num_lf_groups: usize, | |
| + ) -> Result<()> { | |
| + if !self.can_do_partial_render() { | |
| + return Ok(()); | |
| + } | |
| + if self.buffer_info.is_empty() { | |
| + return Ok(()); | |
| + } | |
| + let mut fill_buffer = |section: usize, grid| -> Result<()> { | |
| + // TODO(veluca): consider filling these buffers with placeholders instead of real images. | |
| + with_buffers( | |
| + &self.buffer_info, | |
| + &self.section_buffer_indices[section], | |
| + grid, | |
| + |_| Ok(()), | |
| + )?; | |
| + for b in self.section_buffer_indices[section].iter() { | |
| + if self.buffer_info[*b].buffer_grid[grid].get_status() == BUFFER_STATUS_NOT_RENDERED | |
| + { | |
| + self.buffer_info[*b].buffer_grid[grid].set_status(BUFFER_STATUS_PARTIAL_RENDER); | |
| + self.ready_buffers.insert((*b, grid)); | |
| } | |
| } | |
| Ok(()) | |
| }; | |
| - | |
| - let mut new_ready_transform_chunks = vec![]; | |
| - for buf in self.section_buffer_indices[section_id].iter().copied() { | |
| - maybe_output(&mut self.buffer_info[buf], grid)?; | |
| - let new_chunks = self.buffer_info[buf].buffer_grid[grid] | |
| - .used_by_transforms | |
| - .to_vec(); | |
| - trace!("Buffer {buf} grid position {grid} used by chunks {new_chunks:?}"); | |
| - new_ready_transform_chunks.extend(new_chunks); | |
| - } | |
| - | |
| - trace!(?new_ready_transform_chunks); | |
| - | |
| - while let Some(tfm) = new_ready_transform_chunks.pop() { | |
| - trace!("tfm = {tfm} chunk = {:?}", self.transform_steps[tfm]); | |
| - for (new_buf, new_grid) in | |
| - self.transform_steps[tfm].dep_ready(frame_header, &mut self.buffer_info)? | |
| - { | |
| - maybe_output(&mut self.buffer_info[new_buf], new_grid)?; | |
| - let new_chunks = self.buffer_info[new_buf].buffer_grid[new_grid] | |
| - .used_by_transforms | |
| - .to_vec(); | |
| - trace!("Buffer {new_buf} grid position {new_grid} used by chunks {new_chunks:?}"); | |
| - new_ready_transform_chunks.extend(new_chunks); | |
| + fill_buffer(0, 0)?; | |
| + for grid in 0..num_lf_groups { | |
| + fill_buffer(1, grid)?; | |
| + } | |
| + for pass in 0..num_passes { | |
| + for grid in 0..num_groups { | |
| + fill_buffer(2 + pass, grid)?; | |
| } | |
| } | |
| @@ -733,6 +1108,7 @@ pub fn decode_vardct_lf( | |
| None, | |
| global_tree, | |
| br, | |
| + None, | |
| )?; | |
| dequant_lf( | |
| r, | |
| @@ -780,6 +1156,7 @@ pub fn decode_hf_metadata( | |
| None, | |
| global_tree, | |
| br, | |
| + None, | |
| )?; | |
| let ytox_image = &buffers[0].data; | |
| let ytob_image = &buffers[1].data; | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/apply.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/apply.rs | |
| index 5f1b631370267..2c74441aa0f00 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/apply.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/apply.rs | |
| @@ -56,40 +56,44 @@ pub enum TransformStep { | |
| #[derive(Debug)] | |
| pub struct TransformStepChunk { | |
| pub(super) step: TransformStep, | |
| + | |
| // Grid position this transform should produce. | |
| // Note that this is a lie for Palette with AverageAll or Weighted, as the transform with | |
| // position (0, y) will produce the entire row of blocks (*, y) (and there will be no | |
| // transforms with position (x, y) with x > 0). | |
| pub(super) grid_pos: (usize, usize), | |
| - // Number of inputs that are not yet available. | |
| - pub(super) incomplete_deps: usize, | |
| + | |
| + // List of (buffer, grid) that this transform depends on. | |
| + pub(in super::super) deps: Vec<(usize, usize)>, | |
| + | |
| + // Processing layer that this transform belongs to. Layer 0 are transforms | |
| + // that only depend on coded channels, layer 1 are transforms that only | |
| + // depend on coded channels and layer 0 outputs, etc. Since transforms | |
| + // in the same layer have no inter-dependencies, they can be run at the | |
| + // same time. | |
| + pub(in super::super) layer: usize, | |
| } | |
| impl TransformStepChunk { | |
| - // Marks that one dependency of this transform is ready, and potentially runs the transform, | |
| - // returning the new buffers that are now ready. | |
| - #[instrument(level = "trace", skip_all)] | |
| - pub fn dep_ready( | |
| - &mut self, | |
| - frame_header: &FrameHeader, | |
| - buffers: &mut [ModularBufferInfo], | |
| - ) -> Result<Vec<(usize, usize)>> { | |
| - self.incomplete_deps = self.incomplete_deps.checked_sub(1).unwrap(); | |
| - if self.incomplete_deps > 0 { | |
| - trace!( | |
| - "skipping transform chunk because incomplete_deps = {}", | |
| - self.incomplete_deps | |
| - ); | |
| - return Ok(vec![]); | |
| - } | |
| - let buf_out: &[usize] = match &self.step { | |
| + fn buf_out(&self) -> &[usize] { | |
| + match &self.step { | |
| TransformStep::Rct { buf_out, .. } => buf_out, | |
| TransformStep::Palette { buf_out, .. } => buf_out, | |
| TransformStep::HSqueeze { buf_out, .. } | TransformStep::VSqueeze { buf_out, .. } => { | |
| - &[*buf_out] | |
| + std::slice::from_ref(buf_out) | |
| } | |
| - }; | |
| + } | |
| + } | |
| + // Runs this transform. This function *will* crash if the transform is not ready. | |
| + #[instrument(level = "trace", skip_all)] | |
| + pub fn do_run( | |
| + &self, | |
| + frame_header: &FrameHeader, | |
| + buffers: &[ModularBufferInfo], | |
| + is_final: bool, | |
| + ) -> Result<()> { | |
| + let buf_out = self.buf_out(); | |
| let out_grid_kind = buffers[buf_out[0]].grid_kind; | |
| let out_grid = buffers[buf_out[0]].get_grid_idx(out_grid_kind, self.grid_pos); | |
| let out_size = buffers[buf_out[0]].info.size; | |
| @@ -112,13 +116,12 @@ impl TransformStepChunk { | |
| // If not, creates buffers in the output that are a copy of the input buffers. | |
| // This should be rare. | |
| *buffers[buf_out[i]].buffer_grid[out_grid].data.borrow_mut() = | |
| - Some(buffers[buf_in[i]].buffer_grid[out_grid].get_buffer()?); | |
| + Some(buffers[buf_in[i]].buffer_grid[out_grid].get_buffer(is_final)?); | |
| } | |
| - with_buffers(buffers, buf_out, out_grid, false, |mut bufs| { | |
| + with_buffers(buffers, buf_out, out_grid, |mut bufs| { | |
| super::rct::do_rct_step(&mut bufs, *op, *perm); | |
| Ok(()) | |
| })?; | |
| - Ok(buf_out.iter().map(|x| (*x, out_grid)).collect()) | |
| } | |
| TransformStep::Palette { | |
| buf_in, | |
| @@ -127,10 +130,9 @@ impl TransformStepChunk { | |
| .. | |
| } if buffers[*buf_in].info.size.0 == 0 => { | |
| // Nothing to do, just bookkeeping. | |
| - buffers[*buf_in].buffer_grid[out_grid].mark_used(); | |
| - buffers[*buf_pal].buffer_grid[0].mark_used(); | |
| - with_buffers(buffers, buf_out, out_grid, false, |_| Ok(()))?; | |
| - Ok(buf_out.iter().map(|x| (*x, out_grid)).collect()) | |
| + buffers[*buf_in].buffer_grid[out_grid].mark_used(is_final); | |
| + buffers[*buf_pal].buffer_grid[0].mark_used(is_final); | |
| + with_buffers(buffers, buf_out, out_grid, |_| Ok(()))?; | |
| } | |
| TransformStep::Palette { | |
| buf_in, | |
| @@ -155,7 +157,7 @@ impl TransformStepChunk { | |
| }); | |
| // Ensure that the output buffers are present. | |
| // TODO(szabadka): Extend the callback to support many grid points. | |
| - with_buffers(buffers, buf_out, out_grid, false, |_| Ok(()))?; | |
| + with_buffers(buffers, buf_out, out_grid, |_| Ok(()))?; | |
| let grid_shape = buffers[buf_out[0]].grid_shape; | |
| let grid_x = out_grid % grid_shape.0; | |
| let grid_y = out_grid / grid_shape.0; | |
| @@ -191,9 +193,8 @@ impl TransformStepChunk { | |
| *predictor, | |
| ); | |
| } | |
| - buffers[*buf_in].buffer_grid[out_grid].mark_used(); | |
| - buffers[*buf_pal].buffer_grid[0].mark_used(); | |
| - Ok(buf_out.iter().map(|x| (*x, out_grid)).collect()) | |
| + buffers[*buf_in].buffer_grid[out_grid].mark_used(is_final); | |
| + buffers[*buf_pal].buffer_grid[0].mark_used(is_final); | |
| } | |
| TransformStep::Palette { | |
| buf_in, | |
| @@ -206,7 +207,6 @@ impl TransformStepChunk { | |
| } => { | |
| assert_eq!(out_grid_kind, buffers[*buf_in].grid_kind); | |
| assert_eq!(out_size, buffers[*buf_in].info.size); | |
| - let mut generated_chunks = Vec::<(usize, usize)>::new(); | |
| let grid_shape = buffers[buf_out[0]].grid_shape; | |
| { | |
| assert_eq!(out_grid % grid_shape.0, 0); | |
| @@ -222,7 +222,7 @@ impl TransformStepChunk { | |
| )); | |
| // Ensure that the output buffers are present. | |
| // TODO(szabadka): Extend the callback to support many grid points. | |
| - with_buffers(buffers, buf_out, out_grid + grid_x, false, |_| Ok(()))?; | |
| + with_buffers(buffers, buf_out, out_grid + grid_x, |_| Ok(()))?; | |
| } | |
| let in_buf_refs: Vec<&ModularChannel> = | |
| in_bufs.iter().map(|x| x.deref()).collect(); | |
| @@ -256,14 +256,10 @@ impl TransformStepChunk { | |
| wp_header, | |
| )?; | |
| } | |
| - buffers[*buf_pal].buffer_grid[0].mark_used(); | |
| + buffers[*buf_pal].buffer_grid[0].mark_used(is_final); | |
| for grid_x in 0..grid_shape.0 { | |
| - buffers[*buf_in].buffer_grid[out_grid + grid_x].mark_used(); | |
| - for buf in buf_out { | |
| - generated_chunks.push((*buf, out_grid + grid_x)); | |
| - } | |
| + buffers[*buf_in].buffer_grid[out_grid + grid_x].mark_used(is_final); | |
| } | |
| - Ok(generated_chunks) | |
| } | |
| TransformStep::HSqueeze { buf_in, buf_out } => { | |
| let buf_avg = &buffers[buf_in[0]]; | |
| @@ -309,7 +305,7 @@ impl TransformStepChunk { | |
| )) | |
| }; | |
| - with_buffers(buffers, &[*buf_out], out_grid, false, |mut bufs| { | |
| + with_buffers(buffers, &[*buf_out], out_grid, |mut bufs| { | |
| super::squeeze::do_hsqueeze_step( | |
| &in_avg.data.get_rect(buf_avg.get_grid_rect( | |
| frame_header, | |
| @@ -328,9 +324,8 @@ impl TransformStepChunk { | |
| Ok(()) | |
| })?; | |
| } | |
| - buffers[buf_in[0]].buffer_grid[in_grid].mark_used(); | |
| - buffers[buf_in[1]].buffer_grid[res_grid].mark_used(); | |
| - Ok(vec![(*buf_out, out_grid)]) | |
| + buffers[buf_in[0]].buffer_grid[in_grid].mark_used(is_final); | |
| + buffers[buf_in[1]].buffer_grid[res_grid].mark_used(is_final); | |
| } | |
| TransformStep::VSqueeze { buf_in, buf_out } => { | |
| let buf_avg = &buffers[buf_in[0]]; | |
| @@ -379,7 +374,7 @@ impl TransformStepChunk { | |
| buf_avg.get_grid_rect(frame_header, out_grid_kind, (gx, gy)); | |
| let res_grid_rect = | |
| buf_res.get_grid_rect(frame_header, out_grid_kind, (gx, gy)); | |
| - with_buffers(buffers, &[*buf_out], out_grid, false, |mut bufs| { | |
| + with_buffers(buffers, &[*buf_out], out_grid, |mut bufs| { | |
| super::squeeze::do_vsqueeze_step( | |
| &in_avg.data.get_rect(avg_grid_rect), | |
| &in_res.data.get_rect(res_grid_rect), | |
| @@ -390,11 +385,34 @@ impl TransformStepChunk { | |
| Ok(()) | |
| })?; | |
| } | |
| - buffers[buf_in[0]].buffer_grid[in_grid].mark_used(); | |
| - buffers[buf_in[1]].buffer_grid[res_grid].mark_used(); | |
| - Ok(vec![(*buf_out, out_grid)]) | |
| + buffers[buf_in[0]].buffer_grid[in_grid].mark_used(is_final); | |
| + buffers[buf_in[1]].buffer_grid[res_grid].mark_used(is_final); | |
| } | |
| - } | |
| + }; | |
| + | |
| + Ok(()) | |
| + } | |
| + | |
| + // Iterates over the list of outputs for this transform. | |
| + pub fn outputs(&self, buffers: &[ModularBufferInfo]) -> impl Iterator<Item = (usize, usize)> { | |
| + let buf_out = self.buf_out(); | |
| + let out_grid_kind = buffers[buf_out[0]].grid_kind; | |
| + let out_grid = buffers[buf_out[0]].get_grid_idx(out_grid_kind, self.grid_pos); | |
| + let grid_offset_up = match &self.step { | |
| + TransformStep::Palette { | |
| + buf_in, | |
| + buf_out, | |
| + predictor, | |
| + .. | |
| + } if buffers[*buf_in].info.size.0 != 0 && predictor.requires_full_row() => { | |
| + buffers[buf_out[0]].grid_shape.0 | |
| + } | |
| + _ => 1, | |
| + }; | |
| + | |
| + buf_out | |
| + .iter() | |
| + .flat_map(move |x| (0..grid_offset_up).map(move |y| (*x, out_grid + y))) | |
| } | |
| } | |
| @@ -445,7 +463,7 @@ fn meta_apply_single_transform( | |
| for i in 0..3 { | |
| let c = &mut channels[begin_channel + i]; | |
| let mut info = c.1; | |
| - info.output_channel_idx = -1; | |
| + info.output_channel_idx = None; | |
| c.0 = add_transform_buffer( | |
| info, | |
| format!( | |
| @@ -503,7 +521,7 @@ fn meta_apply_single_transform( | |
| ((w, h.div_ceil(2)), (w, h - h.div_ceil(2))) | |
| }; | |
| let new_0 = ChannelInfo { | |
| - output_channel_idx: -1, | |
| + output_channel_idx: None, | |
| shift: new_shift, | |
| size: new_size_0, | |
| bit_depth: chan.bit_depth, | |
| @@ -513,7 +531,7 @@ fn meta_apply_single_transform( | |
| format!("Squeezed channel, original channel {}", begin_channel + ic), | |
| ); | |
| let new_1 = ChannelInfo { | |
| - output_channel_idx: -1, | |
| + output_channel_idx: None, | |
| shift: new_shift, | |
| size: new_size_1, | |
| bit_depth: chan.bit_depth, | |
| @@ -551,7 +569,7 @@ fn meta_apply_single_transform( | |
| // equal in the line above. | |
| let bit_depth = channels[begin_channel].1.bit_depth; | |
| let pchan_info = ChannelInfo { | |
| - output_channel_idx: -1, | |
| + output_channel_idx: None, | |
| shift: None, | |
| size: (num_colors + num_deltas, num_channels), | |
| bit_depth, | |
| @@ -564,7 +582,7 @@ fn meta_apply_single_transform( | |
| ), | |
| ); | |
| let mut inchan_info = channels[begin_channel].1; | |
| - inchan_info.output_channel_idx = -1; | |
| + inchan_info.output_channel_idx = None; | |
| let inchan = add_transform_buffer( | |
| inchan_info, | |
| format!( | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/mod.rs | |
| index f1de5be25ec1a..6be51c221ed5c 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/mod.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/transforms/mod.rs | |
| @@ -3,10 +3,14 @@ | |
| // Use of this source code is governed by a BSD-style | |
| // license that can be found in the LICENSE file. | |
| +use std::sync::atomic::AtomicUsize; | |
| +use std::sync::atomic::Ordering; | |
| + | |
| use apply::TransformStep; | |
| pub use apply::TransformStepChunk; | |
| use num_derive::FromPrimitive; | |
| +use crate::frame::modular::BUFFER_STATUS_NOT_RENDERED; | |
| use crate::frame::modular::ModularBuffer; | |
| use crate::headers::frame_header::FrameHeader; | |
| use crate::util::AtomicRefCell; | |
| @@ -112,15 +116,17 @@ pub fn make_grids( | |
| // Create grids. | |
| for g in buffer_info.iter_mut() { | |
| - let is_output = g.info.output_channel_idx >= 0; | |
| + let is_output = g.info.output_channel_idx.is_some(); | |
| g.buffer_grid = get_grid_indices(g.grid_shape) | |
| .map(|(x, y)| ModularBuffer { | |
| data: AtomicRefCell::new(None), | |
| - remaining_uses: if is_output { 1 } else { 0 }, | |
| - used_by_transforms: vec![], | |
| + remaining_uses: AtomicUsize::new(if is_output { 1 } else { 0 }), | |
| + used_by_transforms_weak: vec![], | |
| + used_by_transforms_strong: vec![], | |
| size: g | |
| .get_grid_rect(frame_header, g.grid_kind, (x as usize, y as usize)) | |
| .size, | |
| + status: AtomicUsize::new(BUFFER_STATUS_NOT_RENDERED), | |
| }) | |
| .collect(); | |
| } | |
| @@ -135,7 +141,8 @@ pub fn make_grids( | |
| grid_transform_steps.push(TransformStepChunk { | |
| step: transform.clone(), | |
| grid_pos: (grid_pos.0 as usize, grid_pos.1 as usize), | |
| - incomplete_deps: 0, | |
| + deps: vec![], | |
| + layer: 0, | |
| }); | |
| ts | |
| }; | |
| @@ -145,6 +152,7 @@ pub fn make_grids( | |
| output_grid_kind: ModularGridKind, | |
| output_grid_shape: (usize, usize), | |
| output_grid_pos: (isize, isize), | |
| + is_weak: bool, | |
| grid_transform_steps: &mut Vec<TransformStepChunk>, | |
| buffer_info: &mut Vec<ModularBufferInfo>| { | |
| let output_grid_size = (output_grid_shape.0 as isize, output_grid_shape.1 as isize); | |
| @@ -159,15 +167,19 @@ pub fn make_grids( | |
| let output_grid_pos = (output_grid_pos.0 as usize, output_grid_pos.1 as usize); | |
| let input_grid_pos = | |
| buffer_info[input_buffer_idx].get_grid_idx(output_grid_kind, output_grid_pos); | |
| - if !buffer_info[input_buffer_idx].buffer_grid[input_grid_pos] | |
| - .used_by_transforms | |
| - .contains(&ts) | |
| + let grid = &mut buffer_info[input_buffer_idx].buffer_grid[input_grid_pos]; | |
| + if !grid.used_by_transforms_weak.contains(&ts) | |
| + && !grid.used_by_transforms_strong.contains(&ts) | |
| { | |
| - buffer_info[input_buffer_idx].buffer_grid[input_grid_pos].remaining_uses += 1; | |
| - buffer_info[input_buffer_idx].buffer_grid[input_grid_pos] | |
| - .used_by_transforms | |
| - .push(ts); | |
| - grid_transform_steps[ts].incomplete_deps += 1; | |
| + grid.remaining_uses.fetch_add(1, Ordering::Relaxed); | |
| + grid_transform_steps[ts] | |
| + .deps | |
| + .push((input_buffer_idx, input_grid_pos)); | |
| + if is_weak { | |
| + grid.used_by_transforms_weak.push(ts); | |
| + } else { | |
| + grid.used_by_transforms_strong.push(ts); | |
| + } | |
| } | |
| }; | |
| @@ -191,6 +203,7 @@ pub fn make_grids( | |
| out_kind, | |
| out_shape, | |
| (x, y), | |
| + false, | |
| &mut grid_transform_steps, | |
| buffer_info, | |
| ); | |
| @@ -219,6 +232,7 @@ pub fn make_grids( | |
| out_kind, | |
| out_shape, | |
| (x, y), | |
| + false, | |
| &mut grid_transform_steps, | |
| buffer_info, | |
| ); | |
| @@ -229,6 +243,7 @@ pub fn make_grids( | |
| out_kind, | |
| out_shape, | |
| (x, y), | |
| + false, | |
| &mut grid_transform_steps, | |
| buffer_info, | |
| ); | |
| @@ -239,6 +254,7 @@ pub fn make_grids( | |
| out_kind, | |
| out_shape, | |
| (x, y - 1), | |
| + false, | |
| &mut grid_transform_steps, | |
| buffer_info, | |
| ); | |
| @@ -265,6 +281,7 @@ pub fn make_grids( | |
| out_kind, | |
| out_shape, | |
| (x, y), | |
| + false, | |
| &mut grid_transform_steps, | |
| buffer_info, | |
| ); | |
| @@ -274,6 +291,7 @@ pub fn make_grids( | |
| out_kind, | |
| out_shape, | |
| (x, y), | |
| + false, | |
| &mut grid_transform_steps, | |
| buffer_info, | |
| ); | |
| @@ -289,6 +307,7 @@ pub fn make_grids( | |
| out_kind, | |
| out_shape, | |
| (x + dx, y + dy), | |
| + false, | |
| &mut grid_transform_steps, | |
| buffer_info, | |
| ); | |
| @@ -309,6 +328,7 @@ pub fn make_grids( | |
| out_kind, | |
| out_shape, | |
| (x, y), | |
| + false, | |
| &mut grid_transform_steps, | |
| buffer_info, | |
| ); | |
| @@ -320,6 +340,7 @@ pub fn make_grids( | |
| out_kind, | |
| out_shape, | |
| (x + 1, y), | |
| + true, | |
| &mut grid_transform_steps, | |
| buffer_info, | |
| ); | |
| @@ -330,6 +351,7 @@ pub fn make_grids( | |
| out_kind, | |
| out_shape, | |
| (x - 1, y), | |
| + true, | |
| &mut grid_transform_steps, | |
| buffer_info, | |
| ); | |
| @@ -348,6 +370,7 @@ pub fn make_grids( | |
| out_kind, | |
| out_shape, | |
| (x, y), | |
| + false, | |
| &mut grid_transform_steps, | |
| buffer_info, | |
| ); | |
| @@ -359,6 +382,7 @@ pub fn make_grids( | |
| out_kind, | |
| out_shape, | |
| (x, y + 1), | |
| + true, | |
| &mut grid_transform_steps, | |
| buffer_info, | |
| ); | |
| @@ -369,6 +393,7 @@ pub fn make_grids( | |
| out_kind, | |
| out_shape, | |
| (x, y - 1), | |
| + true, | |
| &mut grid_transform_steps, | |
| buffer_info, | |
| ); | |
| @@ -377,6 +402,46 @@ pub fn make_grids( | |
| } | |
| } | |
| + // Compute the layer of each transform step. | |
| + // TODO(veluca): for parallelization purposes, it might make sense to try to ensure that | |
| + // transforms in the same layer are as similar in runtime as possible. | |
| + let mut transforms_needed_by = vec![vec![]; grid_transform_steps.len()]; | |
| + let mut enabled_transforms = vec![vec![]; grid_transform_steps.len()]; | |
| + for (i, s) in grid_transform_steps.iter().enumerate() { | |
| + for (b, g) in s.outputs(buffer_info) { | |
| + for (t, _) in buffer_info[b].buffer_grid[g].users(true) { | |
| + transforms_needed_by[t].push(i); | |
| + enabled_transforms[i].push(t); | |
| + } | |
| + } | |
| + } | |
| + | |
| + let mut missing_prerequisites: Vec<_> = transforms_needed_by.iter().map(|x| x.len()).collect(); | |
| + | |
| + let mut stack = vec![]; | |
| + for (i, m) in missing_prerequisites.iter().enumerate() { | |
| + if *m == 0 { | |
| + stack.push(i); | |
| + } | |
| + } | |
| + | |
| + while let Some(i) = stack.pop() { | |
| + assert_eq!(missing_prerequisites[i], 0); | |
| + for e in enabled_transforms[i].iter() { | |
| + missing_prerequisites[*e] = missing_prerequisites[*e].checked_sub(1).unwrap(); | |
| + if missing_prerequisites[*e] == 0 { | |
| + stack.push(*e); | |
| + } | |
| + } | |
| + | |
| + grid_transform_steps[i].layer = transforms_needed_by[i] | |
| + .iter() | |
| + .map(|x| grid_transform_steps[*x].layer) | |
| + .max() | |
| + .unwrap_or(0) | |
| + + 1; | |
| + } | |
| + | |
| trace!(?grid_transform_steps, ?buffer_info); | |
| grid_transform_steps | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/tree.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/tree.rs | |
| index 59730862b7061..b5f0022a95270 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/tree.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/tree.rs | |
| @@ -63,6 +63,124 @@ pub struct Tree { | |
| pub histograms: Histograms, | |
| } | |
| +fn validate_tree(tree: &[TreeNode], num_properties: usize) -> Result<()> { | |
| + const HEIGHT_LIMIT: usize = 2048; | |
| + | |
| + if tree.is_empty() { | |
| + return Ok(()); | |
| + } | |
| + | |
| + // This mirrors libjxl's ValidateTree(), but avoids allocating | |
| + // `num_properties * tree.len()` entries. | |
| + // | |
| + // We do an explicit DFS and keep the property ranges only for the current root->node path. | |
| + // When descending into a child we update exactly one property's range (the one we split on) | |
| + // and store the previous range in the child frame; when returning from that child we restore | |
| + // it. This makes memory O(num_properties + height) instead of O(num_properties * tree_size). | |
| + | |
| + #[derive(Clone, Copy, Debug)] | |
| + enum Stage { | |
| + Enter, | |
| + AfterLeft, | |
| + AfterRight, | |
| + } | |
| + | |
| + struct Frame { | |
| + node: usize, | |
| + depth: usize, | |
| + stage: Stage, | |
| + restore: Option<(usize, (i32, i32))>, | |
| + } | |
| + | |
| + let mut property_ranges: Vec<(i32, i32)> = vec![(i32::MIN, i32::MAX); num_properties]; | |
| + let mut stack = vec![Frame { | |
| + node: 0, | |
| + depth: 0, | |
| + stage: Stage::Enter, | |
| + restore: None, | |
| + }]; | |
| + | |
| + while let Some(mut frame) = stack.pop() { | |
| + if frame.depth > HEIGHT_LIMIT { | |
| + return Err(Error::TreeTooTall(frame.depth, HEIGHT_LIMIT)); | |
| + } | |
| + | |
| + match (frame.stage, tree[frame.node]) { | |
| + (Stage::Enter, TreeNode::Leaf { .. }) => { | |
| + if let Some((p, old)) = frame.restore { | |
| + property_ranges[p] = old; | |
| + } | |
| + } | |
| + ( | |
| + Stage::Enter, | |
| + TreeNode::Split { | |
| + property, | |
| + val, | |
| + left, | |
| + right: _, | |
| + }, | |
| + ) => { | |
| + let p = property as usize; | |
| + let (l, u) = property_ranges[p]; | |
| + if l > val || u <= val { | |
| + return Err(Error::TreeSplitOnEmptyRange(property, val, l, u)); | |
| + } | |
| + | |
| + frame.stage = Stage::AfterLeft; | |
| + let depth = frame.depth; | |
| + stack.push(frame); | |
| + | |
| + // Descend into left child: range becomes (val+1, u). | |
| + let old = property_ranges[p]; | |
| + property_ranges[p] = (val + 1, u); | |
| + stack.push(Frame { | |
| + node: left as usize, | |
| + depth: depth + 1, | |
| + stage: Stage::Enter, | |
| + restore: Some((p, old)), | |
| + }); | |
| + } | |
| + ( | |
| + Stage::AfterLeft, | |
| + TreeNode::Split { | |
| + property, | |
| + val, | |
| + left: _, | |
| + right, | |
| + }, | |
| + ) => { | |
| + let p = property as usize; | |
| + let (l, u) = property_ranges[p]; | |
| + if l > val || u <= val { | |
| + return Err(Error::TreeSplitOnEmptyRange(property, val, l, u)); | |
| + } | |
| + | |
| + frame.stage = Stage::AfterRight; | |
| + let depth = frame.depth; | |
| + stack.push(frame); | |
| + | |
| + // Descend into right child: range becomes (l, val). | |
| + let old = property_ranges[p]; | |
| + property_ranges[p] = (l, val); | |
| + stack.push(Frame { | |
| + node: right as usize, | |
| + depth: depth + 1, | |
| + stage: Stage::Enter, | |
| + restore: Some((p, old)), | |
| + }); | |
| + } | |
| + (Stage::AfterRight, TreeNode::Split { .. }) => { | |
| + if let Some((p, old)) = frame.restore { | |
| + property_ranges[p] = old; | |
| + } | |
| + } | |
| + _ => unreachable!("invalid tree validation state"), | |
| + } | |
| + } | |
| + | |
| + Ok(()) | |
| +} | |
| + | |
| impl Debug for Tree { | |
| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
| write!(f, "Tree[{:?}]", self.nodes) | |
| @@ -358,52 +476,7 @@ impl Tree { | |
| tree_reader.check_final_state(&tree_histograms, br)?; | |
| let num_properties = max_property as usize + 1; | |
| - let mut property_ranges = Vec::new_with_capacity(num_properties * tree.len())?; | |
| - property_ranges.resize(num_properties * tree.len(), (i32::MIN, i32::MAX)); | |
| - let mut height = Vec::new_with_capacity(tree.len())?; | |
| - height.resize(tree.len(), 0); | |
| - for i in 0..tree.len() { | |
| - const HEIGHT_LIMIT: usize = 2048; | |
| - if height[i] > HEIGHT_LIMIT { | |
| - return Err(Error::TreeTooLarge(height[i], HEIGHT_LIMIT)); | |
| - } | |
| - if let TreeNode::Split { | |
| - property, | |
| - val, | |
| - left, | |
| - right, | |
| - } = tree[i] | |
| - { | |
| - height[left as usize] = height[i] + 1; | |
| - height[right as usize] = height[i] + 1; | |
| - for p in 0..num_properties { | |
| - if p == property as usize { | |
| - let (l, u) = property_ranges[i * num_properties + p]; | |
| - if l > val || u <= val { | |
| - return Err(Error::TreeSplitOnEmptyRange(p as u8, val, l, u)); | |
| - } | |
| - trace!( | |
| - "splitting at node {i} on property {p}, range [{l}, {u}] at position {val}" | |
| - ); | |
| - property_ranges[left as usize * num_properties + p] = (val + 1, u); | |
| - property_ranges[right as usize * num_properties + p] = (l, val); | |
| - } else { | |
| - property_ranges[left as usize * num_properties + p] = | |
| - property_ranges[i * num_properties + p]; | |
| - property_ranges[right as usize * num_properties + p] = | |
| - property_ranges[i * num_properties + p]; | |
| - } | |
| - } | |
| - } else { | |
| - #[cfg(feature = "tracing")] | |
| - { | |
| - for p in 0..num_properties { | |
| - let (l, u) = property_ranges[i * num_properties + p]; | |
| - trace!("final range at node {i} property {p}: [{l}, {u}]"); | |
| - } | |
| - } | |
| - } | |
| - } | |
| + validate_tree(&tree, num_properties)?; | |
| let histograms = Histograms::decode(tree.len().div_ceil(2), br, true)?; | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quant_weights.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quant_weights.rs | |
| index 7eb13c4ce2495..0bcbda39a020f 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quant_weights.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quant_weights.rs | |
| @@ -254,6 +254,7 @@ impl QuantEncoding { | |
| None, | |
| &lf_global.tree, | |
| br, | |
| + None, | |
| )?; | |
| let mut qtable = Vec::with_capacity(required_size_x * required_size_y * 3); | |
| for channel in image.iter_mut() { | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quantizer.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quantizer.rs | |
| index 57413e811b57a..e1797e3c4fc7e 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quantizer.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/quantizer.rs | |
| @@ -13,12 +13,23 @@ use crate::{ | |
| pub const NUM_QUANT_TABLES: usize = 17; | |
| pub const GLOBAL_SCALE_DENOM: usize = 1 << 16; | |
| -#[derive(Debug)] | |
| +#[derive(Debug, Clone)] | |
| pub struct LfQuantFactors { | |
| pub quant_factors: [f32; 3], | |
| pub inv_quant_factors: [f32; 3], | |
| } | |
| +impl Default for LfQuantFactors { | |
| + fn default() -> Self { | |
| + let quant_factors = quant_weights::LF_QUANT; | |
| + let inv_quant_factors = quant_factors.map(f32::recip); | |
| + Self { | |
| + quant_factors, | |
| + inv_quant_factors, | |
| + } | |
| + } | |
| +} | |
| + | |
| impl LfQuantFactors { | |
| pub fn new(br: &mut BitReader) -> Result<LfQuantFactors> { | |
| let mut quant_factors = [0.0f32; 3]; | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/render.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/render.rs | |
| index c8803bf28e113..f419c9858c55d 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/render.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/render.rs | |
| @@ -12,16 +12,27 @@ use crate::api::JxlOutputBuffer; | |
| use crate::bit_reader::BitReader; | |
| use crate::error::{Error, Result}; | |
| use crate::features::epf::SigmaSource; | |
| +use crate::features::noise::Noise; | |
| +use crate::features::patches::PatchesDictionary; | |
| +use crate::features::spline::Splines; | |
| +use crate::frame::RenderUnit; | |
| +use crate::frame::color_correlation_map::ColorCorrelationParams; | |
| +use crate::frame::quantizer::LfQuantFactors; | |
| use crate::headers::frame_header::Encoding; | |
| +use crate::headers::frame_header::FrameType; | |
| use crate::headers::{Orientation, color_encoding::ColorSpace, extra_channels::ExtraChannel}; | |
| +use crate::image::Image; | |
| use crate::image::Rect; | |
| +use crate::util::AtomicRefCell; | |
| +use std::sync::Arc; | |
| + | |
| #[cfg(test)] | |
| use crate::render::SimpleRenderPipeline; | |
| use crate::render::buffer_splitter::BufferSplitter; | |
| use crate::render::{LowMemoryRenderPipeline, RenderPipeline, RenderPipelineBuilder, stages::*}; | |
| use crate::{ | |
| api::JxlPixelFormat, | |
| - frame::{DecoderState, Frame, LfGlobalState}, | |
| + frame::{DecoderState, Frame}, | |
| headers::frame_header::FrameHeader, | |
| }; | |
| @@ -66,7 +77,7 @@ impl Frame { | |
| mut pipeline: RenderPipelineBuilder<P>, | |
| channels: &[usize], | |
| data_format: JxlDataFormat, | |
| - ) -> Result<RenderPipelineBuilder<P>> { | |
| + ) -> RenderPipelineBuilder<P> { | |
| use crate::render::stages::{ | |
| ConvertF32ToF16Stage, ConvertF32ToU8Stage, ConvertF32ToU16Stage, | |
| }; | |
| @@ -75,24 +86,24 @@ impl Frame { | |
| JxlDataFormat::U8 { bit_depth } => { | |
| for &channel in channels { | |
| pipeline = | |
| - pipeline.add_inout_stage(ConvertF32ToU8Stage::new(channel, bit_depth))?; | |
| + pipeline.add_inout_stage(ConvertF32ToU8Stage::new(channel, bit_depth)); | |
| } | |
| } | |
| JxlDataFormat::U16 { bit_depth, .. } => { | |
| for &channel in channels { | |
| pipeline = | |
| - pipeline.add_inout_stage(ConvertF32ToU16Stage::new(channel, bit_depth))?; | |
| + pipeline.add_inout_stage(ConvertF32ToU16Stage::new(channel, bit_depth)); | |
| } | |
| } | |
| JxlDataFormat::F16 { .. } => { | |
| for &channel in channels { | |
| - pipeline = pipeline.add_inout_stage(ConvertF32ToF16Stage::new(channel))?; | |
| + pipeline = pipeline.add_inout_stage(ConvertF32ToF16Stage::new(channel)); | |
| } | |
| } | |
| // F32 doesn't need conversion - the pipeline already uses f32 | |
| JxlDataFormat::F32 { .. } => {} | |
| } | |
| - Ok(pipeline) | |
| + pipeline | |
| } | |
| /// Check if CMS will consume a black channel that the user requested in the output. | |
| @@ -126,8 +137,10 @@ impl Frame { | |
| api_buffers: &mut Option<&mut [JxlOutputBuffer<'_>]>, | |
| pixel_format: &JxlPixelFormat, | |
| groups: Vec<(usize, Vec<(usize, BitReader)>)>, | |
| + do_flush: bool, | |
| + output_profile: &JxlColorProfile, | |
| ) -> Result<()> { | |
| - if self.render_pipeline.is_none() { | |
| + if self.render_pipeline.is_none() || self.lf_global.is_none() { | |
| assert_eq!(groups.iter().map(|x| x.1.len()).sum::<usize>(), 0); | |
| // We don't yet have any output ready (as the pipeline would be initialized otherwise), | |
| // so exit without doing anything. | |
| @@ -194,41 +207,128 @@ impl Frame { | |
| pipeline!(self, p, p.render_outside_frame(&mut buffer_splitter)?); | |
| - // Render data from the lf global section, if we didn't do so already, before rendering HF. | |
| - if !self.lf_global_was_rendered { | |
| - self.lf_global_was_rendered = true; | |
| - let lf_global = self.lf_global.as_mut().unwrap(); | |
| - let mut pass_to_pipeline = |chan, group, num_passes, image| { | |
| + let modular_global = &mut self.lf_global.as_mut().unwrap().modular_global; | |
| + | |
| + modular_global.set_pipeline_used_channels(pipeline!(self, p, p.used_channel_mask())); | |
| + | |
| + // STEP 1: if we are requesting a flush, and did not flush before, mark modular channels | |
| + // as having been decoded as 0. | |
| + if !self.was_flushed_once && do_flush { | |
| + self.was_flushed_once = true; | |
| + self.groups_to_flush.extend(0..self.header.num_groups()); | |
| + modular_global.zero_fill_empty_channels( | |
| + self.header.passes.num_passes as usize, | |
| + self.header.num_groups(), | |
| + self.header.num_lf_groups(), | |
| + )?; | |
| + } | |
| + | |
| + // STEP 2: ensure that groups that will be re-rendered are marked as such. | |
| + // VarDCT data to be rendered. | |
| + for (g, _) in groups.iter() { | |
| + self.groups_to_flush.insert(*g); | |
| + pipeline!(self, p, p.mark_group_to_rerender(*g)); | |
| + } | |
| + // Modular data to be re-rendered. | |
| + { | |
| + let modular_global = &mut self.lf_global.as_mut().unwrap().modular_global; | |
| + for (group, passes) in groups.iter() { | |
| + for (pass, _) in passes.iter() { | |
| + modular_global.mark_group_to_be_read(2 + *pass, *group); | |
| + } | |
| + } | |
| + let mut pass_to_pipeline = |_, group, _, _| { | |
| + self.groups_to_flush.insert(group); | |
| + pipeline!(self, p, p.mark_group_to_rerender(group)); | |
| + Ok(()) | |
| + }; | |
| + modular_global.process_output(&self.header, true, &mut pass_to_pipeline)?; | |
| + } | |
| + | |
| + // STEP 3: decode the groups, eagerly rendering VarDCT channels and noise. | |
| + for (group, mut passes) in groups { | |
| + if self.decode_hf_group(group, &mut passes, &mut buffer_splitter, do_flush)? { | |
| + self.changed_since_last_flush | |
| + .insert((group, RenderUnit::VarDCT)); | |
| + } | |
| + } | |
| + | |
| + // STEP 4: process all modular transforms that can now be processed, | |
| + // flushing buffers that will not be used again, if either we are forcing a render now | |
| + // or we are done with the file. | |
| + if self.incomplete_groups == 0 || do_flush { | |
| + let modular_global = &mut self.lf_global.as_mut().unwrap().modular_global; | |
| + let mut pass_to_pipeline = |chan, group, complete, image: Option<Image<i32>>| { | |
| + self.changed_since_last_flush | |
| + .insert((group, RenderUnit::Modular(chan))); | |
| pipeline!( | |
| self, | |
| p, | |
| - p.set_buffer_for_group(chan, group, num_passes, image, &mut buffer_splitter)? | |
| + p.set_buffer_for_group( | |
| + chan, | |
| + group, | |
| + complete, | |
| + image.unwrap(), | |
| + &mut buffer_splitter | |
| + )? | |
| ); | |
| Ok(()) | |
| }; | |
| - lf_global | |
| - .modular_global | |
| - .process_output(0, 0, &self.header, &mut pass_to_pipeline)?; | |
| - for group in 0..self.header.num_lf_groups() { | |
| - lf_global.modular_global.process_output( | |
| - 1, | |
| - group, | |
| - &self.header, | |
| - &mut pass_to_pipeline, | |
| - )?; | |
| + modular_global.process_output(&self.header, false, &mut pass_to_pipeline)?; | |
| + | |
| + // STEP 5: re-render VarDCT/noise data in rendered groups for which it was | |
| + // not rendered, or re-send to pipeline modular channels that were not | |
| + // updated in those groups. | |
| + for g in std::mem::take(&mut self.groups_to_flush) { | |
| + if self | |
| + .changed_since_last_flush | |
| + .take(&(g, RenderUnit::VarDCT)) | |
| + .is_none() | |
| + { | |
| + self.decode_hf_group(g, &mut [], &mut buffer_splitter, true)?; | |
| + } | |
| + let modular_global = &mut self.lf_global.as_mut().unwrap().modular_global; | |
| + let mut pass_to_pipeline = |chan, group, complete, image| { | |
| + pipeline!( | |
| + self, | |
| + p, | |
| + p.set_buffer_for_group(chan, group, complete, image, &mut buffer_splitter)? | |
| + ); | |
| + Ok(()) | |
| + }; | |
| + for c in modular_global.channel_range() { | |
| + if self | |
| + .changed_since_last_flush | |
| + .take(&(g, RenderUnit::Modular(c))) | |
| + .is_none() | |
| + { | |
| + modular_global.flush_output(g, c, &mut pass_to_pipeline)?; | |
| + } | |
| + } | |
| } | |
| } | |
| - for (group, passes) in groups { | |
| - // TODO(veluca): render all the available passes at once. | |
| - for (pass, br) in passes { | |
| - self.decode_hf_group(group, pass, br, &mut buffer_splitter)?; | |
| - } | |
| - } | |
| + let regions = buffer_splitter.into_changed_regions(); | |
| self.reference_frame_data = reference_frame_data; | |
| self.lf_frame_data = lf_frame_data; | |
| + if self.header.frame_type == FrameType::LFFrame && self.header.lf_level == 1 { | |
| + if do_flush && let Some(buffers) = api_buffers { | |
| + self.maybe_preview_lf_frame( | |
| + pixel_format, | |
| + buffers, | |
| + Some(®ions[..]), | |
| + output_profile, | |
| + )?; | |
| + } else if self.incomplete_groups == 0 { | |
| + // If we are not requesting another flush at the end of the LF frame, we | |
| + // probably have a partial render. Ensure we re-render the LF frame when | |
| + // decoding the actual frame. | |
| + self.decoder_state.lf_frame_was_rendered = false; | |
| + } | |
| + } | |
| + | |
| Ok(()) | |
| } | |
| @@ -236,8 +336,12 @@ impl Frame { | |
| pub(crate) fn build_render_pipeline<T: RenderPipeline>( | |
| decoder_state: &DecoderState, | |
| frame_header: &FrameHeader, | |
| - lf_global: &LfGlobalState, | |
| - epf_sigma: &Option<SigmaSource>, | |
| + patches: Arc<AtomicRefCell<PatchesDictionary>>, | |
| + splines: Arc<AtomicRefCell<Splines>>, | |
| + noise: Arc<AtomicRefCell<Noise>>, | |
| + lf_quant: Arc<AtomicRefCell<LfQuantFactors>>, | |
| + color_correlation_params: Arc<AtomicRefCell<ColorCorrelationParams>>, | |
| + epf_sigma: Arc<AtomicRefCell<SigmaSource>>, | |
| pixel_format: &JxlPixelFormat, | |
| cms: Option<&dyn JxlCms>, | |
| input_profile: &JxlColorProfile, | |
| @@ -251,31 +355,29 @@ impl Frame { | |
| frame_header.size_upsampled(), | |
| frame_header.upsampling.ilog2() as usize, | |
| frame_header.log_group_dim(), | |
| - frame_header.passes.num_passes as usize, | |
| ); | |
| if frame_header.encoding == Encoding::Modular { | |
| if decoder_state.file_header.image_metadata.xyb_encoded { | |
| - pipeline = pipeline | |
| - .add_inout_stage(ConvertModularXYBToF32Stage::new(0, &lf_global.lf_quant))? | |
| + pipeline = pipeline.add_inout_stage(ConvertModularXYBToF32Stage::new(0, lf_quant)) | |
| } else { | |
| for i in 0..3 { | |
| pipeline = pipeline | |
| - .add_inout_stage(ConvertModularToF32Stage::new(i, metadata.bit_depth))?; | |
| + .add_inout_stage(ConvertModularToF32Stage::new(i, metadata.bit_depth)); | |
| } | |
| } | |
| } | |
| for i in 3..num_channels { | |
| let ec_bit_depth = metadata.extra_channel_info[i - 3].bit_depth(); | |
| - pipeline = pipeline.add_inout_stage(ConvertModularToF32Stage::new(i, ec_bit_depth))?; | |
| + pipeline = pipeline.add_inout_stage(ConvertModularToF32Stage::new(i, ec_bit_depth)); | |
| } | |
| for c in 0..3 { | |
| if frame_header.hshift(c) != 0 { | |
| - pipeline = pipeline.add_inout_stage(HorizontalChromaUpsample::new(c))?; | |
| + pipeline = pipeline.add_inout_stage(HorizontalChromaUpsample::new(c)); | |
| } | |
| if frame_header.vshift(c) != 0 { | |
| - pipeline = pipeline.add_inout_stage(VerticalChromaUpsample::new(c))?; | |
| + pipeline = pipeline.add_inout_stage(VerticalChromaUpsample::new(c)); | |
| } | |
| } | |
| @@ -286,17 +388,17 @@ impl Frame { | |
| 0, | |
| filters.gab_x_weight1, | |
| filters.gab_x_weight2, | |
| - ))? | |
| + )) | |
| .add_inout_stage(GaborishStage::new( | |
| 1, | |
| filters.gab_y_weight1, | |
| filters.gab_y_weight2, | |
| - ))? | |
| + )) | |
| .add_inout_stage(GaborishStage::new( | |
| 2, | |
| filters.gab_b_weight1, | |
| filters.gab_b_weight2, | |
| - ))?; | |
| + )); | |
| } | |
| let rf = &frame_header.restoration_filter; | |
| @@ -305,24 +407,24 @@ impl Frame { | |
| rf.epf_pass0_sigma_scale, | |
| rf.epf_border_sad_mul, | |
| rf.epf_channel_scale, | |
| - epf_sigma.clone().unwrap(), | |
| - ))? | |
| + epf_sigma.clone(), | |
| + )) | |
| } | |
| if rf.epf_iters >= 1 { | |
| pipeline = pipeline.add_inout_stage(Epf1Stage::new( | |
| 1.0, | |
| rf.epf_border_sad_mul, | |
| rf.epf_channel_scale, | |
| - epf_sigma.clone().unwrap(), | |
| - ))? | |
| + epf_sigma.clone(), | |
| + )) | |
| } | |
| if rf.epf_iters >= 2 { | |
| pipeline = pipeline.add_inout_stage(Epf2Stage::new( | |
| rf.epf_pass2_sigma_scale, | |
| rf.epf_border_sad_mul, | |
| rf.epf_channel_scale, | |
| - epf_sigma.clone().unwrap(), | |
| - ))? | |
| + epf_sigma.clone(), | |
| + )) | |
| } | |
| let late_ec_upsample = frame_header.upsampling > 1 | |
| @@ -340,26 +442,26 @@ impl Frame { | |
| 4 => pipeline.add_inout_stage(Upsample4x::new(transform_data, 3 + ec)), | |
| 8 => pipeline.add_inout_stage(Upsample8x::new(transform_data, 3 + ec)), | |
| _ => unreachable!(), | |
| - }?; | |
| + }; | |
| } | |
| } | |
| } | |
| if frame_header.has_patches() { | |
| - pipeline = pipeline.add_inplace_stage(PatchesStage { | |
| - patches: lf_global.patches.clone().unwrap(), | |
| - extra_channels: metadata.extra_channel_info.clone(), | |
| - decoder_state: decoder_state.reference_frames.clone(), | |
| - })? | |
| + pipeline = pipeline.add_inplace_stage(PatchesStage::new( | |
| + patches, | |
| + metadata.extra_channel_info.clone(), | |
| + decoder_state.reference_frames.clone(), | |
| + )) | |
| } | |
| if frame_header.has_splines() { | |
| pipeline = pipeline.add_inplace_stage(SplinesStage::new( | |
| - lf_global.splines.clone().unwrap(), | |
| + splines, | |
| frame_header.size(), | |
| - &lf_global.color_correlation_params.unwrap_or_default(), | |
| + color_correlation_params.clone(), | |
| decoder_state.high_precision, | |
| - )?)? | |
| + )) | |
| } | |
| if frame_header.upsampling > 1 { | |
| @@ -375,20 +477,20 @@ impl Frame { | |
| 4 => pipeline.add_inout_stage(Upsample4x::new(transform_data, c)), | |
| 8 => pipeline.add_inout_stage(Upsample8x::new(transform_data, c)), | |
| _ => unreachable!(), | |
| - }?; | |
| + }; | |
| } | |
| } | |
| if frame_header.has_noise() { | |
| pipeline = pipeline | |
| - .add_inout_stage(ConvolveNoiseStage::new(num_channels))? | |
| - .add_inout_stage(ConvolveNoiseStage::new(num_channels + 1))? | |
| - .add_inout_stage(ConvolveNoiseStage::new(num_channels + 2))? | |
| + .add_inout_stage(ConvolveNoiseStage::new(num_channels)) | |
| + .add_inout_stage(ConvolveNoiseStage::new(num_channels + 1)) | |
| + .add_inout_stage(ConvolveNoiseStage::new(num_channels + 2)) | |
| .add_inplace_stage(AddNoiseStage::new( | |
| - *lf_global.noise.as_ref().unwrap(), | |
| - lf_global.color_correlation_params.unwrap_or_default(), | |
| + noise, | |
| + color_correlation_params, | |
| num_channels, | |
| - ))?; | |
| + )); | |
| } | |
| // Calculate the actual number of API-provided buffers based on pixel_format. | |
| @@ -414,7 +516,7 @@ impl Frame { | |
| JxlColorType::Grayscale, | |
| JxlDataFormat::f32(), | |
| false, | |
| - )?; | |
| + ); | |
| } | |
| } | |
| if frame_header.can_be_referenced && frame_header.save_before_ct { | |
| @@ -426,7 +528,7 @@ impl Frame { | |
| JxlColorType::Grayscale, | |
| JxlDataFormat::f32(), | |
| false, | |
| - )?; | |
| + ); | |
| } | |
| } | |
| @@ -461,9 +563,9 @@ impl Frame { | |
| let xyb_encoded = decoder_state.file_header.image_metadata.xyb_encoded; | |
| if frame_header.do_ycbcr { | |
| - pipeline = pipeline.add_inplace_stage(YcbcrToRgbStage::new(0))?; | |
| + pipeline = pipeline.add_inplace_stage(YcbcrToRgbStage::new(0)); | |
| } else if xyb_encoded { | |
| - pipeline = pipeline.add_inplace_stage(XybStage::new(0, output_color_info.clone()))?; | |
| + pipeline = pipeline.add_inplace_stage(XybStage::new(0, output_color_info.clone())); | |
| } | |
| // Insert CMS stage if profiles differ. | |
| @@ -547,7 +649,7 @@ impl Frame { | |
| out_channels, | |
| cms_black_channel, | |
| max_pixels, | |
| - ))?; | |
| + )); | |
| cms_used = true; | |
| } | |
| } | |
| @@ -556,7 +658,7 @@ impl Frame { | |
| // - Only if output is non-linear AND | |
| // - CMS was not used (CMS already handles the full conversion including TF) | |
| if xyb_encoded && !output_tf.is_linear() && !cms_used { | |
| - pipeline = pipeline.add_inplace_stage(FromLinearStage::new(0, output_tf.clone()))?; | |
| + pipeline = pipeline.add_inplace_stage(FromLinearStage::new(0, output_tf.clone())); | |
| } | |
| if frame_header.needs_blending() { | |
| @@ -564,14 +666,14 @@ impl Frame { | |
| frame_header, | |
| &decoder_state.file_header, | |
| decoder_state.reference_frames.clone(), | |
| - )?)?; | |
| + )?); | |
| // TODO(veluca): we might not need to add an extend stage if the image size is | |
| // compatible with the frame size. | |
| pipeline = pipeline.add_extend_stage(ExtendToImageDimensionsStage::new( | |
| frame_header, | |
| &decoder_state.file_header, | |
| decoder_state.reference_frames.clone(), | |
| - )?)?; | |
| + )?); | |
| } | |
| if frame_header.can_be_referenced && !frame_header.save_before_ct { | |
| @@ -583,7 +685,7 @@ impl Frame { | |
| JxlColorType::Grayscale, | |
| JxlDataFormat::f32(), | |
| false, | |
| - )?; | |
| + ); | |
| } | |
| } | |
| @@ -597,7 +699,7 @@ impl Frame { | |
| { | |
| if info.ec_type == ExtraChannel::SpotColor { | |
| pipeline = pipeline | |
| - .add_inplace_stage(SpotColorStage::new(i, info.spot_color.unwrap()))?; | |
| + .add_inplace_stage(SpotColorStage::new(i, info.spot_color.unwrap())); | |
| } | |
| } | |
| } | |
| @@ -659,10 +761,10 @@ impl Frame { | |
| 0, | |
| num_color_channels, | |
| alpha_channel, | |
| - ))?; | |
| + )); | |
| } | |
| // Add conversion stages for non-float output formats | |
| - pipeline = Self::add_conversion_stages(pipeline, color_source_channels, *df)?; | |
| + pipeline = Self::add_conversion_stages(pipeline, color_source_channels, *df); | |
| pipeline = pipeline.add_save_stage( | |
| color_source_channels, | |
| metadata.orientation, | |
| @@ -670,20 +772,26 @@ impl Frame { | |
| pixel_format.color_type, | |
| *df, | |
| fill_opaque_alpha, | |
| - )?; | |
| + ); | |
| } | |
| + let mut save_idx = if pixel_format.color_data_format.is_some() { | |
| + 1 | |
| + } else { | |
| + 0 | |
| + }; | |
| for i in 0..frame_header.num_extra_channels as usize { | |
| if let Some(df) = &pixel_format.extra_channel_format[i] { | |
| // Add conversion stages for non-float output formats | |
| - pipeline = Self::add_conversion_stages(pipeline, &[3 + i], *df)?; | |
| + pipeline = Self::add_conversion_stages(pipeline, &[3 + i], *df); | |
| pipeline = pipeline.add_save_stage( | |
| &[3 + i], | |
| metadata.orientation, | |
| - 1 + i, | |
| + save_idx, | |
| JxlColorType::Grayscale, | |
| *df, | |
| false, | |
| - )?; | |
| + ); | |
| + save_idx += 1; | |
| } | |
| } | |
| } | |
| @@ -697,20 +805,17 @@ impl Frame { | |
| input_profile: &JxlColorProfile, | |
| output_profile: &JxlColorProfile, | |
| ) -> Result<()> { | |
| - let lf_global = self.lf_global.as_mut().unwrap(); | |
| - let epf_sigma = if self.header.restoration_filter.epf_iters > 0 { | |
| - Some(SigmaSource::new(&self.header, lf_global, &self.hf_meta)?) | |
| - } else { | |
| - None | |
| - }; | |
| - | |
| #[cfg(test)] | |
| let render_pipeline = if self.use_simple_pipeline { | |
| Self::build_render_pipeline::<SimpleRenderPipeline>( | |
| &self.decoder_state, | |
| &self.header, | |
| - lf_global, | |
| - &epf_sigma, | |
| + self.patches.clone(), | |
| + self.splines.clone(), | |
| + self.noise.clone(), | |
| + self.lf_quant.clone(), | |
| + self.color_correlation_params.clone(), | |
| + self.epf_sigma.clone(), | |
| pixel_format, | |
| cms, | |
| input_profile, | |
| @@ -720,8 +825,12 @@ impl Frame { | |
| Self::build_render_pipeline::<LowMemoryRenderPipeline>( | |
| &self.decoder_state, | |
| &self.header, | |
| - lf_global, | |
| - &epf_sigma, | |
| + self.patches.clone(), | |
| + self.splines.clone(), | |
| + self.noise.clone(), | |
| + self.lf_quant.clone(), | |
| + self.color_correlation_params.clone(), | |
| + self.epf_sigma.clone(), | |
| pixel_format, | |
| cms, | |
| input_profile, | |
| @@ -732,15 +841,19 @@ impl Frame { | |
| let render_pipeline = Self::build_render_pipeline::<LowMemoryRenderPipeline>( | |
| &self.decoder_state, | |
| &self.header, | |
| - lf_global, | |
| - &epf_sigma, | |
| + self.patches.clone(), | |
| + self.splines.clone(), | |
| + self.noise.clone(), | |
| + self.lf_quant.clone(), | |
| + self.color_correlation_params.clone(), | |
| + self.epf_sigma.clone(), | |
| pixel_format, | |
| cms, | |
| input_profile, | |
| output_profile, | |
| )?; | |
| self.render_pipeline = Some(render_pipeline); | |
| - self.lf_global_was_rendered = false; | |
| + self.was_flushed_once = false; | |
| Ok(()) | |
| } | |
| } | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/color_encoding.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/color_encoding.rs | |
| index d4299928e6e7d..5ec6ba1dea95a 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/color_encoding.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/color_encoding.rs | |
| @@ -192,9 +192,9 @@ pub struct ColorEncoding { | |
| impl ColorEncoding { | |
| pub fn check(&self, _: &Empty) -> Result<(), Error> { | |
| - if !self.want_icc | |
| - && (self.color_space == ColorSpace::Unknown | |
| - || self.tf.transfer_function == TransferFunction::Unknown) | |
| + if self.color_space == ColorSpace::Unknown | |
| + || self.tf.transfer_function == TransferFunction::Unknown | |
| + || self.color_space == ColorSpace::XYB | |
| { | |
| Err(Error::InvalidColorEncoding) | |
| } else { | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/frame_header.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/frame_header.rs | |
| index b7fc359ec7597..ffa52591b07e4 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/frame_header.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/frame_header.rs | |
| @@ -851,4 +851,14 @@ mod test_frame_header { | |
| }, | |
| ) | |
| } | |
| + | |
| + #[test] | |
| + fn test_frame_name() { | |
| + let (_, frame_header, _) = | |
| + read_headers_and_toc(include_bytes!("../../resources/test/named_frame_test.jxl")) | |
| + .unwrap(); | |
| + assert_eq!(frame_header.frame_type, FrameType::RegularFrame); | |
| + assert_eq!(frame_header.name, "TestFrameName"); | |
| + assert_eq!(frame_header.name.len(), 13); | |
| + } | |
| } | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/modular.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/modular.rs | |
| index 5ac270930c26e..286f8a4423b28 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/modular.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/headers/modular.rs | |
| @@ -97,7 +97,7 @@ pub enum TransformId { | |
| Invalid = 3, | |
| } | |
| -#[derive(UnconditionalCoder, Debug, PartialEq)] | |
| +#[derive(UnconditionalCoder, Debug, PartialEq, Clone)] | |
| #[validate] | |
| pub struct Transform { | |
| #[coder(Bits(2))] | |
| @@ -157,7 +157,7 @@ impl Transform { | |
| } | |
| } | |
| -#[derive(UnconditionalCoder, Debug, PartialEq)] | |
| +#[derive(UnconditionalCoder, Debug, PartialEq, Clone)] | |
| pub struct GroupHeader { | |
| pub use_global_tree: bool, | |
| pub wp_header: WeightedHeader, | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/mod.rs | |
| index 8144607a21d19..37617a15fa9e8 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/mod.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/mod.rs | |
| @@ -20,7 +20,8 @@ mod stream; | |
| mod tag; | |
| use header::read_header; | |
| -use stream::{IccStream, read_varint_from_reader}; | |
| +use stream::IccStream; | |
| +pub(crate) use stream::read_varint_from_reader; | |
| use tag::{read_single_command, read_tag_list}; | |
| const ICC_CONTEXTS: usize = 41; | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/stream.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/stream.rs | |
| index 4ca91091311d4..0de64e9422655 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/stream.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/icc/stream.rs | |
| @@ -25,7 +25,7 @@ fn read_varint(mut read_one: impl FnMut() -> Result<u8>) -> Result<u64> { | |
| Ok(value) | |
| } | |
| -pub(super) fn read_varint_from_reader(stream: &mut impl Read) -> Result<u64> { | |
| +pub(crate) fn read_varint_from_reader(stream: &mut impl Read) -> Result<u64> { | |
| read_varint(|| stream.read_u8().map_err(|_| Error::IccEndOfStream)) | |
| } | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/image/raw.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/image/raw.rs | |
| index 3fbbb20562fc6..9c689e788a29a 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/image/raw.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/image/raw.rs | |
| @@ -20,6 +20,10 @@ pub struct OwnedRawImage { | |
| } | |
| impl OwnedRawImage { | |
| + pub fn new(byte_size: (usize, usize)) -> Result<Self> { | |
| + Self::new_zeroed_with_padding(byte_size, (0, 0), (0, 0)) | |
| + } | |
| + | |
| pub fn new_zeroed_with_padding( | |
| byte_size: (usize, usize), | |
| offset: (usize, usize), | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/buffer_splitter.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/buffer_splitter.rs | |
| index d5c1b06c38119..60fab1c83c5d9 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/buffer_splitter.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/buffer_splitter.rs | |
| @@ -7,22 +7,28 @@ use crate::{api::JxlOutputBuffer, headers::Orientation, image::Rect, util::Shift | |
| // Information for splitting the output buffers. | |
| #[derive(Debug)] | |
| -pub(super) struct SaveStageBufferInfo { | |
| - pub(super) downsample: (u8, u8), | |
| - pub(super) orientation: Orientation, | |
| - pub(super) byte_size: usize, | |
| - pub(super) after_extend: bool, | |
| +pub struct SaveStageBufferInfo { | |
| + pub downsample: (u8, u8), | |
| + pub orientation: Orientation, | |
| + pub byte_size: usize, | |
| + pub after_extend: bool, | |
| } | |
| /// Data structure responsible for handing out access to portions of the output buffers. | |
| -pub struct BufferSplitter<'a, 'b>(&'a mut [Option<JxlOutputBuffer<'b>>]); | |
| +pub struct BufferSplitter<'a, 'b> { | |
| + buffers: &'a mut [Option<JxlOutputBuffer<'b>>], | |
| + requested_rects: Vec<Rect>, | |
| +} | |
| impl<'a, 'b> BufferSplitter<'a, 'b> { | |
| pub fn new(bufs: &'a mut [Option<JxlOutputBuffer<'b>>]) -> Self { | |
| - Self(bufs) | |
| + Self { | |
| + buffers: bufs, | |
| + requested_rects: vec![], | |
| + } | |
| } | |
| - pub(super) fn get_local_buffers( | |
| + pub(crate) fn get_local_buffers( | |
| &mut self, | |
| save_buffer_info: &[Option<SaveStageBufferInfo>], | |
| rect: Rect, | |
| @@ -31,8 +37,9 @@ impl<'a, 'b> BufferSplitter<'a, 'b> { | |
| full_image_size: (usize, usize), | |
| frame_origin: (isize, isize), | |
| ) -> Vec<Option<JxlOutputBuffer<'_>>> { | |
| + self.requested_rects.push(rect); | |
| let mut local_buffers = vec![]; | |
| - let buffers = &mut *self.0; | |
| + let buffers = &mut *self.buffers; | |
| local_buffers.reserve(buffers.len()); | |
| for _ in 0..buffers.len() { | |
| local_buffers.push(None::<JxlOutputBuffer>); | |
| @@ -97,7 +104,11 @@ impl<'a, 'b> BufferSplitter<'a, 'b> { | |
| local_buffers | |
| } | |
| + pub fn into_changed_regions(self) -> Vec<Rect> { | |
| + self.requested_rects | |
| + } | |
| + | |
| pub fn get_full_buffers(&mut self) -> &mut [Option<JxlOutputBuffer<'b>>] { | |
| - &mut *self.0 | |
| + &mut *self.buffers | |
| } | |
| } | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/builder.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/builder.rs | |
| index 0e4957a62401b..9523e1d1607ff 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/builder.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/builder.rs | |
| @@ -6,8 +6,10 @@ | |
| use crate::api::{JxlColorType, JxlDataFormat}; | |
| use crate::error::{Error, Result}; | |
| use crate::headers::Orientation; | |
| +use crate::render::StageSpecialCase; | |
| use crate::render::internal::ChannelInfo; | |
| use crate::render::save::SaveStage; | |
| +use crate::render::stages::ConvertI32ToU8Stage; | |
| use crate::util::{ShiftRightCeil, tracing_wrappers::*}; | |
| use super::internal::{RenderPipelineShared, Stage}; | |
| @@ -25,7 +27,6 @@ impl<Pipeline: RenderPipeline> RenderPipelineBuilder<Pipeline> { | |
| size: (usize, usize), | |
| downsampling_shift: usize, | |
| mut log_group_size: usize, | |
| - num_passes: usize, | |
| chunk_size: usize, | |
| ) -> Self { | |
| info!("creating render pipeline"); | |
| @@ -47,70 +48,20 @@ impl<Pipeline: RenderPipeline> RenderPipelineBuilder<Pipeline> { | |
| log_group_size, | |
| group_count: (size.0.shrc(log_group_size), size.1.shrc(log_group_size)), | |
| stages: vec![], | |
| - group_chan_ready_passes: vec![ | |
| - vec![0; num_channels]; | |
| - size.0.shrc(log_group_size) | |
| - * size.1.shrc(log_group_size) | |
| + group_chan_complete: vec![ | |
| + vec![false; num_channels]; | |
| + size.0.shrc(log_group_size) * size.1.shrc(log_group_size) | |
| ], | |
| - num_passes, | |
| chunk_size, | |
| extend_stage_index: None, | |
| + channel_is_used: vec![false; num_channels], | |
| }, | |
| } | |
| } | |
| - pub(super) fn add_stage_internal(mut self, stage: Stage<Pipeline::Buffer>) -> Result<Self> { | |
| - let input_type = stage.input_type(); | |
| - let output_type = stage.output_type(); | |
| - let shift = stage.shift(); | |
| - let border = stage.border(); | |
| - let is_extend = matches!(stage, Stage::Extend(_)); | |
| - let current_info = self.shared.channel_info.last().unwrap().clone(); | |
| - debug!( | |
| - last_stage_channel_info = ?current_info, | |
| - extend_stage_index= ?self.shared.extend_stage_index, | |
| - "adding stage '{stage}'", | |
| - ); | |
| - let mut after_info = vec![]; | |
| - for (c, info) in current_info.iter().enumerate() { | |
| - if !stage.uses_channel(c) { | |
| - after_info.push(ChannelInfo { | |
| - ty: info.ty, | |
| - downsample: (0, 0), | |
| - }); | |
| - } else { | |
| - if let Some(ty) = info.ty | |
| - && ty != input_type | |
| - { | |
| - return Err(Error::PipelineChannelTypeMismatch( | |
| - stage.to_string(), | |
| - c, | |
| - input_type, | |
| - ty, | |
| - )); | |
| - } | |
| - after_info.push(ChannelInfo { | |
| - ty: Some(output_type.unwrap_or(input_type)), | |
| - downsample: shift, | |
| - }); | |
| - } | |
| - } | |
| - if self.shared.extend_stage_index.is_some() | |
| - && (shift != (0, 0) || border != (0, 0) || is_extend) | |
| - { | |
| - return Err(Error::PipelineInvalidStageAfterExtend(stage.to_string())); | |
| - } | |
| - if is_extend { | |
| - self.shared.extend_stage_index = Some(self.shared.stages.len()); | |
| - } | |
| - debug!( | |
| - new_channel_info = ?after_info, | |
| - extend_stage_index= ?self.shared.extend_stage_index, | |
| - "added stage '{stage}'", | |
| - ); | |
| - self.shared.channel_info.push(after_info); | |
| + pub(super) fn add_stage_internal(mut self, stage: Stage<Pipeline::Buffer>) -> Self { | |
| self.shared.stages.push(stage); | |
| - Ok(self) | |
| + self | |
| } | |
| pub fn new( | |
| @@ -118,19 +69,16 @@ impl<Pipeline: RenderPipeline> RenderPipelineBuilder<Pipeline> { | |
| size: (usize, usize), | |
| downsampling_shift: usize, | |
| log_group_size: usize, | |
| - num_passes: usize, | |
| ) -> Self { | |
| Self::new_with_chunk_size( | |
| num_channels, | |
| size, | |
| downsampling_shift, | |
| log_group_size, | |
| - num_passes, | |
| 1 << (log_group_size + downsampling_shift), | |
| ) | |
| } | |
| - #[instrument(skip_all, err)] | |
| pub fn add_save_stage( | |
| self, | |
| channels: &[usize], | |
| @@ -139,7 +87,7 @@ impl<Pipeline: RenderPipeline> RenderPipelineBuilder<Pipeline> { | |
| color_type: JxlColorType, | |
| data_format: JxlDataFormat, | |
| fill_opaque_alpha: bool, | |
| - ) -> Result<Self> { | |
| + ) -> Self { | |
| let stage = SaveStage::new( | |
| channels, | |
| orientation, | |
| @@ -151,25 +99,131 @@ impl<Pipeline: RenderPipeline> RenderPipelineBuilder<Pipeline> { | |
| self.add_stage_internal(Stage::Save(stage)) | |
| } | |
| - #[instrument(skip_all, err)] | |
| - pub fn add_extend_stage(self, extend: ExtendToImageDimensionsStage) -> Result<Self> { | |
| + pub fn add_extend_stage(self, extend: ExtendToImageDimensionsStage) -> Self { | |
| self.add_stage_internal(Stage::Extend(extend)) | |
| } | |
| - #[instrument(skip_all, err)] | |
| - pub fn add_inplace_stage<S: RenderPipelineInPlaceStage>(self, stage: S) -> Result<Self> { | |
| + pub fn add_inplace_stage<S: RenderPipelineInPlaceStage>(self, stage: S) -> Self { | |
| self.add_stage_internal(Stage::InPlace(Pipeline::box_inplace_stage(stage))) | |
| } | |
| - #[instrument(skip_all, err)] | |
| - pub fn add_inout_stage<S: RenderPipelineInOutStage>(self, stage: S) -> Result<Self> { | |
| + pub fn add_inout_stage<S: RenderPipelineInOutStage>(self, stage: S) -> Self { | |
| self.add_stage_internal(Stage::InOut(Pipeline::box_inout_stage(stage))) | |
| } | |
| #[instrument(skip_all, err)] | |
| pub fn build(mut self) -> Result<Box<Pipeline>> { | |
| + let mut stage_is_used = vec![false; self.shared.stages.len()]; | |
| + let num_channels = self.shared.num_channels(); | |
| + let mut channel_next_use = vec![None; num_channels]; | |
| + // Prune unused stages. | |
| + for i in (0..self.shared.stages.len()).rev() { | |
| + let stage = &self.shared.stages[i]; | |
| + if matches!(stage, Stage::Save(_)) { | |
| + for (c, next_use) in channel_next_use.iter_mut().enumerate() { | |
| + if stage.uses_channel(c) { | |
| + self.shared.channel_is_used[c] = true; | |
| + *next_use = Some(i); | |
| + } | |
| + } | |
| + } | |
| + for c in 0..num_channels { | |
| + if stage.uses_channel(c) { | |
| + stage_is_used[i] |= self.shared.channel_is_used[c]; | |
| + } | |
| + } | |
| + if stage_is_used[i] { | |
| + match self.shared.stages[i].is_special_case() { | |
| + None => (), | |
| + Some(StageSpecialCase::F32ToU8 { .. }) => (), | |
| + Some(StageSpecialCase::ModularToF32 { channel, bit_depth }) => { | |
| + let n = channel_next_use[channel].unwrap(); | |
| + if let Some(StageSpecialCase::F32ToU8 { | |
| + channel: c, | |
| + bit_depth: b, | |
| + }) = self.shared.stages[n].is_special_case() | |
| + { | |
| + assert_eq!(c, channel); | |
| + if b % bit_depth == 0 { | |
| + let mult = ((1 << b) - 1) / ((1 << bit_depth) - 1); | |
| + // Remove the next stage, and replace the current stage with I32 -> I8 | |
| + // conversion. | |
| + stage_is_used[n] = false; | |
| + self.shared.stages[i] = Stage::InOut(Pipeline::box_inout_stage( | |
| + ConvertI32ToU8Stage::new(c, mult, (1 << b) - 1), | |
| + )); | |
| + } | |
| + } | |
| + } | |
| + } | |
| + for (c, next_use) in channel_next_use.iter_mut().enumerate() { | |
| + if self.shared.stages[i].uses_channel(c) { | |
| + self.shared.channel_is_used[c] = true; | |
| + *next_use = Some(i); | |
| + } | |
| + } | |
| + } | |
| + } | |
| + self.shared.stages = self | |
| + .shared | |
| + .stages | |
| + .into_iter() | |
| + .zip(stage_is_used) | |
| + .filter_map(|(s, used)| used.then_some(s)) | |
| + .collect(); | |
| + for (i, stage) in self.shared.stages.iter().enumerate() { | |
| + let input_type = stage.input_type(); | |
| + let output_type = stage.output_type(); | |
| + let shift = stage.shift(); | |
| + let border = stage.border(); | |
| + let is_extend = matches!(stage, Stage::Extend(_)); | |
| + let current_info = self.shared.channel_info.last().unwrap().clone(); | |
| + debug!( | |
| + last_stage_channel_info = ?current_info, | |
| + extend_stage_index= ?self.shared.extend_stage_index, | |
| + "adding stage '{stage}'", | |
| + ); | |
| + let mut after_info = vec![]; | |
| + for (c, info) in current_info.iter().enumerate() { | |
| + if !stage.uses_channel(c) { | |
| + after_info.push(ChannelInfo { | |
| + ty: info.ty, | |
| + downsample: (0, 0), | |
| + }); | |
| + } else { | |
| + if let Some(ty) = info.ty | |
| + && ty != input_type | |
| + { | |
| + return Err(Error::PipelineChannelTypeMismatch( | |
| + stage.to_string(), | |
| + c, | |
| + input_type, | |
| + ty, | |
| + )); | |
| + } | |
| + after_info.push(ChannelInfo { | |
| + ty: Some(output_type.unwrap_or(input_type)), | |
| + downsample: shift, | |
| + }); | |
| + } | |
| + } | |
| + if self.shared.extend_stage_index.is_some() | |
| + && (shift != (0, 0) || border != (0, 0) || is_extend) | |
| + { | |
| + return Err(Error::PipelineInvalidStageAfterExtend(stage.to_string())); | |
| + } | |
| + if is_extend { | |
| + self.shared.extend_stage_index = Some(i); | |
| + } | |
| + debug!( | |
| + new_channel_info = ?after_info, | |
| + extend_stage_index= ?self.shared.extend_stage_index, | |
| + "added stage '{stage}'", | |
| + ); | |
| + self.shared.channel_info.push(after_info); | |
| + } | |
| + | |
| let channel_info = &mut self.shared.channel_info; | |
| - let num_channels = channel_info[0].len(); | |
| let mut cur_downsamples = vec![(0u8, 0u8); num_channels]; | |
| for (s, stage) in self.shared.stages.iter().enumerate().rev() { | |
| let [current_info, next_info, ..] = &mut channel_info[s..] else { | |
| @@ -232,11 +286,12 @@ impl<Pipeline: RenderPipeline> RenderPipelineBuilder<Pipeline> { | |
| ); | |
| } | |
| - // Ensure all channels have been used, so that we know the types of all buffers at all | |
| - // stages. | |
| for (c, chinfo) in channel_info.iter().flat_map(|x| x.iter().enumerate()) { | |
| if chinfo.ty.is_none() { | |
| - return Err(Error::PipelineChannelUnused(c)); | |
| + assert!(!self.shared.channel_is_used[c]); | |
| + for g in self.shared.group_chan_complete.iter_mut() { | |
| + g[c] = true; | |
| + } | |
| } | |
| } | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/internal.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/internal.rs | |
| index 89e2989d03efa..e2a98cd58a36c 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/internal.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/internal.rs | |
| @@ -8,6 +8,7 @@ use std::fmt::Display; | |
| use crate::error::Result; | |
| use crate::image::{DataTypeTag, ImageDataType}; | |
| +use crate::render::StageSpecialCase; | |
| use crate::util::ShiftRightCeil; | |
| use super::save::SaveStage; | |
| @@ -74,6 +75,13 @@ impl<Buffer: 'static> Stage<Buffer> { | |
| _ => None, | |
| } | |
| } | |
| + pub(super) fn is_special_case(&self) -> Option<StageSpecialCase> { | |
| + match self { | |
| + Stage::InOut(s) => s.is_special_case(), | |
| + Stage::InPlace(s) => s.is_special_case(), | |
| + _ => None, | |
| + } | |
| + } | |
| } | |
| impl<Buffer> Display for Stage<Buffer> { | |
| @@ -98,11 +106,11 @@ pub struct RenderPipelineShared<Buffer> { | |
| pub input_size: (usize, usize), | |
| pub log_group_size: usize, | |
| pub group_count: (usize, usize), | |
| - pub group_chan_ready_passes: Vec<Vec<usize>>, | |
| - pub num_passes: usize, | |
| + pub group_chan_complete: Vec<Vec<bool>>, | |
| pub chunk_size: usize, | |
| pub stages: Vec<Stage<Buffer>>, | |
| pub extend_stage_index: Option<usize>, | |
| + pub channel_is_used: Vec<bool>, | |
| } | |
| impl<Buffer> RenderPipelineShared<Buffer> { | |
| @@ -158,7 +166,11 @@ impl<Buffer> RenderPipelineShared<Buffer> { | |
| } | |
| pub fn num_channels(&self) -> usize { | |
| - self.channel_info[0].len() | |
| + self.channel_is_used.len() | |
| + } | |
| + | |
| + pub fn num_used_channels(&self) -> usize { | |
| + self.channel_is_used.iter().filter(|x| **x).count() | |
| } | |
| } | |
| @@ -171,6 +183,7 @@ pub trait InPlaceStage: Any + Display { | |
| fn init_local_state(&self, thread_index: usize) -> Result<Option<Box<dyn Any>>>; | |
| fn uses_channel(&self, c: usize) -> bool; | |
| fn ty(&self) -> DataTypeTag; | |
| + fn is_special_case(&self) -> Option<StageSpecialCase>; | |
| } | |
| pub trait RunInPlaceStage<Buffer: PipelineBuffer>: InPlaceStage { | |
| @@ -192,6 +205,9 @@ impl<T: RenderPipelineInPlaceStage> InPlaceStage for T { | |
| fn ty(&self) -> DataTypeTag { | |
| T::Type::DATA_TYPE_ID | |
| } | |
| + fn is_special_case(&self) -> Option<StageSpecialCase> { | |
| + self.is_special_case() | |
| + } | |
| } | |
| pub trait InOutStage: Any + Display { | |
| @@ -201,6 +217,7 @@ pub trait InOutStage: Any + Display { | |
| fn uses_channel(&self, c: usize) -> bool; | |
| fn input_type(&self) -> DataTypeTag; | |
| fn output_type(&self) -> DataTypeTag; | |
| + fn is_special_case(&self) -> Option<StageSpecialCase>; | |
| } | |
| impl<T: RenderPipelineInOutStage> InOutStage for T { | |
| @@ -222,6 +239,9 @@ impl<T: RenderPipelineInOutStage> InOutStage for T { | |
| fn output_type(&self) -> DataTypeTag { | |
| T::OutputT::DATA_TYPE_ID | |
| } | |
| + fn is_special_case(&self) -> Option<StageSpecialCase> { | |
| + self.is_special_case() | |
| + } | |
| } | |
| pub trait RunInOutStage<Buffer: PipelineBuffer>: InOutStage { | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/group_scheduler.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/group_scheduler.rs | |
| new file mode 100644 | |
| index 0000000000000..abc810ef8a42c | |
| --- /dev/null | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/group_scheduler.rs | |
| @@ -0,0 +1,372 @@ | |
| +// Copyright (c) the JPEG XL Project Authors. All rights reserved. | |
| +// | |
| +// Use of this source code is governed by a BSD-style | |
| +// license that can be found in the LICENSE file. | |
| + | |
| +use std::ops::Range; | |
| + | |
| +use crate::error::Result; | |
| +use crate::image::{OwnedRawImage, Rect}; | |
| +use crate::render::LowMemoryRenderPipeline; | |
| +use crate::render::buffer_splitter::BufferSplitter; | |
| +use crate::render::internal::{ChannelInfo, Stage}; | |
| +use crate::util::tracing_wrappers::*; | |
| + | |
| +pub(super) struct InputBuffer { | |
| + // One buffer per channel. | |
| + pub(super) data: Vec<Option<OwnedRawImage>>, | |
| + // Storage for left/right borders. Includes corners. | |
| + pub(super) leftright: Vec<Option<OwnedRawImage>>, | |
| + // Storage for top/bottom borders. Includes corners. | |
| + pub(super) topbottom: Vec<Option<OwnedRawImage>>, | |
| + // Number of ready channels in the current pass. | |
| + ready_channels: usize, | |
| + pub(super) is_ready: bool, | |
| + num_completed_groups_3x3: usize, | |
| +} | |
| + | |
| +impl InputBuffer { | |
| + pub(super) fn set_buffer(&mut self, chan: usize, buf: OwnedRawImage) { | |
| + assert!(self.data[chan].is_none()); | |
| + self.data[chan] = Some(buf); | |
| + self.ready_channels += 1; | |
| + } | |
| + | |
| + pub(super) fn new(num_channels: usize) -> Self { | |
| + let b = || (0..num_channels).map(|_| None).collect(); | |
| + Self { | |
| + data: b(), | |
| + leftright: b(), | |
| + topbottom: b(), | |
| + ready_channels: 0, | |
| + is_ready: false, | |
| + num_completed_groups_3x3: 0, | |
| + } | |
| + } | |
| +} | |
| + | |
| +// Finds a small set of rectangles that cover all the "true" values in `ready_mask`, | |
| +// and calls `f` on each such rectangle. | |
| +fn foreach_ready_rect( | |
| + ready_mask: [bool; 9], | |
| + mut f: impl FnMut(Range<u8>, Range<u8>) -> Result<()>, | |
| +) -> Result<()> { | |
| + // x range in middle row | |
| + let xrange = (1 - ready_mask[3] as u8)..(2 + ready_mask[5] as u8); | |
| + let can_extend_top = xrange.clone().all(|x| ready_mask[x as usize]); | |
| + let can_extend_bottom = xrange.clone().all(|x| ready_mask[6 + x as usize]); | |
| + let yrange = (1 - can_extend_top as u8)..(2 + can_extend_bottom as u8); | |
| + f(xrange.clone(), yrange)?; | |
| + | |
| + if !can_extend_top { | |
| + if ready_mask[1] { | |
| + let xrange = (1 - ready_mask[0] as u8)..(2 + ready_mask[2] as u8); | |
| + f(xrange, 0..1)?; | |
| + } else { | |
| + if ready_mask[0] { | |
| + f(0..1, 0..1)?; | |
| + } | |
| + if ready_mask[2] { | |
| + f(2..3, 0..1)?; | |
| + } | |
| + } | |
| + } else { | |
| + if ready_mask[0] && !xrange.contains(&0) { | |
| + f(0..1, 0..1)?; | |
| + } | |
| + if ready_mask[2] && !xrange.contains(&2) { | |
| + f(2..3, 0..1)?; | |
| + } | |
| + } | |
| + | |
| + if !can_extend_bottom { | |
| + if ready_mask[7] { | |
| + let xrange = (1 - ready_mask[6] as u8)..(2 + ready_mask[8] as u8); | |
| + f(xrange, 2..3)?; | |
| + } else { | |
| + if ready_mask[6] { | |
| + f(0..1, 2..3)?; | |
| + } | |
| + if ready_mask[8] { | |
| + f(2..3, 2..3)?; | |
| + } | |
| + } | |
| + } else { | |
| + if ready_mask[6] && !xrange.contains(&0) { | |
| + f(0..1, 2..3)?; | |
| + } | |
| + if ready_mask[8] && !xrange.contains(&2) { | |
| + f(2..3, 2..3)?; | |
| + } | |
| + } | |
| + | |
| + Ok(()) | |
| +} | |
| + | |
| +impl LowMemoryRenderPipeline { | |
| + pub(super) fn maybe_get_scratch_buffer( | |
| + &mut self, | |
| + channel: usize, | |
| + kind: usize, | |
| + ) -> Option<OwnedRawImage> { | |
| + self.scratch_channel_buffers[channel * 3 + kind].pop() | |
| + } | |
| + | |
| + fn store_scratch_buffer(&mut self, channel: usize, kind: usize, image: OwnedRawImage) { | |
| + self.scratch_channel_buffers[channel * 3 + kind].push(image) | |
| + } | |
| + | |
| + pub(super) fn render_with_new_group( | |
| + &mut self, | |
| + g: usize, | |
| + buffer_splitter: &mut BufferSplitter, | |
| + ) -> Result<()> { | |
| + let buf = &mut self.input_buffers[g]; | |
| + assert!(buf.ready_channels <= self.shared.num_used_channels()); | |
| + if buf.ready_channels != self.shared.num_used_channels() { | |
| + return Ok(()); | |
| + } | |
| + buf.ready_channels = 0; | |
| + let (gx, gy) = self.shared.group_position(g); | |
| + debug!("new data ready for group {gx},{gy}"); | |
| + | |
| + // Prepare output buffers for the group. | |
| + let (origin, size) = if let Some(e) = self.shared.extend_stage_index { | |
| + let Stage::Extend(e) = &self.shared.stages[e] else { | |
| + unreachable!("extend stage is not an extend stage"); | |
| + }; | |
| + (e.frame_origin, e.image_size) | |
| + } else { | |
| + ((0, 0), self.shared.input_size) | |
| + }; | |
| + let gsz = 1 << self.shared.log_group_size; | |
| + let group_rect = Rect { | |
| + size: (gsz, gsz), | |
| + origin: (gsz * gx, gsz * gy), | |
| + } | |
| + .clip(self.shared.input_size); | |
| + | |
| + { | |
| + for c in 0..self.shared.num_channels() { | |
| + if !self.shared.channel_is_used[c] { | |
| + continue; | |
| + } | |
| + let (bx, by) = self.border_size; | |
| + let (sx, sy) = self.input_buffers[g].data[c].as_ref().unwrap().byte_size(); | |
| + let ChannelInfo { | |
| + ty, | |
| + downsample: (dx, dy), | |
| + } = self.shared.channel_info[0][c]; | |
| + let ty = ty.unwrap(); | |
| + let bx = bx >> dx; | |
| + let by = by >> dy; | |
| + let mut topbottom = if let Some(b) = self.input_buffers[g].topbottom[c].take() { | |
| + b | |
| + } else if let Some(b) = self.maybe_get_scratch_buffer(c, 1) { | |
| + b | |
| + } else { | |
| + let height = 4 * by; | |
| + let width = (1 << self.shared.log_group_size) * ty.size(); | |
| + OwnedRawImage::new_zeroed_with_padding((width, height), (0, 0), (0, 0))? | |
| + }; | |
| + let mut leftright = if let Some(b) = self.input_buffers[g].leftright[c].take() { | |
| + b | |
| + } else if let Some(b) = self.maybe_get_scratch_buffer(c, 2) { | |
| + b | |
| + } else { | |
| + let height = 1 << self.shared.log_group_size; | |
| + let width = 4 * bx * ty.size(); | |
| + OwnedRawImage::new_zeroed_with_padding((width, height), (0, 0), (0, 0))? | |
| + }; | |
| + let input = self.input_buffers[g].data[c].as_ref().unwrap(); | |
| + if by != 0 { | |
| + for y in 0..(2 * by).min(sy) { | |
| + topbottom.row_mut(y)[..sx].copy_from_slice(input.row(y)); | |
| + topbottom.row_mut(4 * by - 1 - y)[..sx] | |
| + .copy_from_slice(input.row(sy - y - 1)); | |
| + } | |
| + } | |
| + if bx != 0 { | |
| + let cs = (bx * 2 * ty.size()).min(sx); | |
| + for y in 0..sy { | |
| + let row_out = leftright.row_mut(y); | |
| + let row_in = input.row(y); | |
| + row_out[..cs].copy_from_slice(&row_in[..cs]); | |
| + row_out[4 * bx * ty.size() - cs..].copy_from_slice(&row_in[sx - cs..]); | |
| + } | |
| + } | |
| + self.input_buffers[g].leftright[c] = Some(leftright); | |
| + self.input_buffers[g].topbottom[c] = Some(topbottom); | |
| + } | |
| + self.input_buffers[g].is_ready = true; | |
| + } | |
| + | |
| + let gxm1 = gx.saturating_sub(1); | |
| + let gym1 = gy.saturating_sub(1); | |
| + let gxp1 = (gx + 1).min(self.shared.group_count.0 - 1); | |
| + let gyp1 = (gy + 1).min(self.shared.group_count.1 - 1); | |
| + let gw = self.shared.group_count.0; | |
| + // TODO(veluca): this code probably needs to be adapted for multithreading. | |
| + let mut ready_mask = [ | |
| + self.input_buffers[gym1 * gw + gxm1].is_ready, | |
| + self.input_buffers[gym1 * gw + gx].is_ready, | |
| + self.input_buffers[gym1 * gw + gxp1].is_ready, | |
| + self.input_buffers[gy * gw + gxm1].is_ready, | |
| + self.input_buffers[gy * gw + gx].is_ready, // should be guaranteed to be 1. | |
| + self.input_buffers[gy * gw + gxp1].is_ready, | |
| + self.input_buffers[gyp1 * gw + gxm1].is_ready, | |
| + self.input_buffers[gyp1 * gw + gx].is_ready, | |
| + self.input_buffers[gyp1 * gw + gxp1].is_ready, | |
| + ]; | |
| + // We can only render a corner if we have all the 4 adjacent groups. Thus, mask out corners if | |
| + // the corresponding side buffers are not ready. | |
| + ready_mask[0] &= ready_mask[1]; | |
| + ready_mask[0] &= ready_mask[3]; | |
| + ready_mask[2] &= ready_mask[1]; | |
| + ready_mask[2] &= ready_mask[5]; | |
| + ready_mask[6] &= ready_mask[3]; | |
| + ready_mask[6] &= ready_mask[7]; | |
| + ready_mask[8] &= ready_mask[5]; | |
| + ready_mask[8] &= ready_mask[7]; | |
| + | |
| + foreach_ready_rect(ready_mask, |xrange, yrange| { | |
| + let y0 = match (gy == 0, yrange.start) { | |
| + (true, 0) => group_rect.origin.1, | |
| + (false, 0) => group_rect.origin.1 - self.border_size.1, | |
| + (_, 1) => group_rect.origin.1 + self.border_size.1, | |
| + // (_, 2) | |
| + _ => group_rect.end().1 - self.border_size.1, | |
| + }; | |
| + let x0 = match (gx == 0, xrange.start) { | |
| + (true, 0) => group_rect.origin.0, | |
| + (false, 0) => group_rect.origin.0 - self.border_size.0, | |
| + (_, 1) => group_rect.origin.0 + self.border_size.0, | |
| + // (_, 2) | |
| + _ => group_rect.end().0 - self.border_size.0, | |
| + }; | |
| + | |
| + let y1 = match (gy + 1 == self.shared.group_count.1, yrange.end) { | |
| + (true, 3) => group_rect.end().1, | |
| + (false, 3) => group_rect.end().1 + self.border_size.1, | |
| + (_, 2) => group_rect.end().1 - self.border_size.1, | |
| + // (_, 1) | |
| + _ => group_rect.origin.1 + self.border_size.1, | |
| + }; | |
| + | |
| + let x1 = match (gx + 1 == self.shared.group_count.0, xrange.end) { | |
| + (true, 3) => group_rect.end().0, | |
| + (false, 3) => group_rect.end().0 + self.border_size.0, | |
| + (_, 2) => group_rect.end().0 - self.border_size.0, | |
| + // (_, 1) | |
| + _ => group_rect.origin.0 + self.border_size.0, | |
| + }; | |
| + | |
| + let image_area = Rect { | |
| + origin: (x0, y0), | |
| + size: (x1 - x0, y1 - y0), | |
| + }; | |
| + | |
| + let mut local_buffers = buffer_splitter.get_local_buffers( | |
| + &self.save_buffer_info, | |
| + image_area, | |
| + false, | |
| + self.shared.input_size, | |
| + size, | |
| + origin, | |
| + ); | |
| + | |
| + self.render_group((gx, gy), image_area, &mut local_buffers)?; | |
| + Ok(()) | |
| + })?; | |
| + | |
| + for c in 0..self.input_buffers[g].data.len() { | |
| + if let Some(b) = std::mem::take(&mut self.input_buffers[g].data[c]) { | |
| + self.store_scratch_buffer(c, 0, b); | |
| + } | |
| + } | |
| + | |
| + // Clear border buffers that will not be used again. | |
| + // This is certainly the case if *all* the groups in the 3x3 group area around | |
| + // the current group are complete. | |
| + if self.shared.group_chan_complete[g].iter().all(|x| *x) { | |
| + for g in [ | |
| + gym1 * gw + gxm1, | |
| + gym1 * gw + gx, | |
| + gym1 * gw + gxp1, | |
| + gy * gw + gxm1, | |
| + gy * gw + gx, | |
| + gy * gw + gxp1, | |
| + gyp1 * gw + gxm1, | |
| + gyp1 * gw + gx, | |
| + gyp1 * gw + gxp1, | |
| + ] { | |
| + self.input_buffers[g].num_completed_groups_3x3 += 1; | |
| + if self.input_buffers[g].num_completed_groups_3x3 != 9 { | |
| + continue; | |
| + } | |
| + for c in 0..self.input_buffers[g].data.len() { | |
| + if let Some(b) = std::mem::take(&mut self.input_buffers[g].topbottom[c]) { | |
| + self.store_scratch_buffer(c, 1, b); | |
| + } | |
| + if let Some(b) = std::mem::take(&mut self.input_buffers[g].leftright[c]) { | |
| + self.store_scratch_buffer(c, 2, b); | |
| + } | |
| + } | |
| + } | |
| + } | |
| + | |
| + Ok(()) | |
| + } | |
| +} | |
| + | |
| +#[cfg(test)] | |
| +mod tests { | |
| + use super::*; | |
| + | |
| + #[test] | |
| + fn test_foreach_ready_rect() { | |
| + for i in 0..512 { | |
| + let mut ready_mask = [false; 9]; | |
| + for j in 0..9 { | |
| + if (i >> j) & 1 == 1 { | |
| + ready_mask[j] = true; | |
| + } | |
| + } | |
| + if !ready_mask[4] { | |
| + continue; | |
| + } | |
| + | |
| + let mut covered = [false; 9]; | |
| + foreach_ready_rect(ready_mask, |xr, yr| { | |
| + for y in yr { | |
| + for x in xr.clone() { | |
| + let idx = (y as usize) * 3 + (x as usize); | |
| + assert!( | |
| + ready_mask[idx], | |
| + "Covered not ready index {} in mask {:?} (x={}, y={})", | |
| + idx, ready_mask, x, y | |
| + ); | |
| + assert!( | |
| + !covered[idx], | |
| + "Double coverage of index {} in mask {:?}", | |
| + idx, ready_mask | |
| + ); | |
| + covered[idx] = true; | |
| + } | |
| + } | |
| + Ok(()) | |
| + }) | |
| + .unwrap(); | |
| + | |
| + for j in 0..9 { | |
| + if ready_mask[j] { | |
| + assert!( | |
| + covered[j], | |
| + "Failed to cover index {} in mask {:?}", | |
| + j, ready_mask | |
| + ); | |
| + } | |
| + } | |
| + } | |
| + } | |
| +} | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/helpers.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/helpers.rs | |
| index cf4a65e81049e..7f8214ff35abe 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/helpers.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/helpers.rs | |
| @@ -43,17 +43,3 @@ pub(super) fn get_distinct_indices<'a, T>( | |
| .map(|x| std::mem::take(x).expect("Not all elements were found")) | |
| .collect() | |
| } | |
| - | |
| -/// Mirror-reflects a value v to fit in a [0; s) range. | |
| -pub(super) fn mirror(mut v: isize, s: usize) -> usize { | |
| - // TODO(veluca): consider speeding this up if needed. | |
| - loop { | |
| - if v < 0 { | |
| - v = -v - 1; | |
| - } else if v >= s as isize { | |
| - v = s as isize * 2 - v - 1; | |
| - } else { | |
| - return v as usize; | |
| - } | |
| - } | |
| -} | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/mod.rs | |
| index b2d33cade1396..761175a12aa09 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/mod.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/mod.rs | |
| @@ -11,27 +11,23 @@ use row_buffers::RowBuffer; | |
| use crate::api::JxlOutputBuffer; | |
| use crate::error::Result; | |
| -use crate::image::{Image, ImageDataType, OwnedRawImage, Rect}; | |
| +use crate::image::{DataTypeTag, Image, ImageDataType, OwnedRawImage, Rect}; | |
| use crate::render::MAX_BORDER; | |
| use crate::render::buffer_splitter::{BufferSplitter, SaveStageBufferInfo}; | |
| use crate::render::internal::Stage; | |
| +use crate::render::low_memory_pipeline::group_scheduler::InputBuffer; | |
| use crate::util::{ShiftRightCeil, tracing_wrappers::*}; | |
| use super::RenderPipeline; | |
| use super::internal::{RenderPipelineShared, RunInOutStage, RunInPlaceStage}; | |
| +mod group_scheduler; | |
| mod helpers; | |
| mod render_group; | |
| -pub(super) mod row_buffers; | |
| +pub(crate) mod row_buffers; | |
| mod run_stage; | |
| mod save; | |
| -struct InputBuffer { | |
| - // One buffer per channel. | |
| - data: Vec<Option<OwnedRawImage>>, | |
| - completed_passes: usize, | |
| -} | |
| - | |
| pub struct LowMemoryRenderPipeline { | |
| shared: RenderPipelineShared<RowBuffer>, | |
| input_buffers: Vec<InputBuffer>, | |
| @@ -49,7 +45,8 @@ pub struct LowMemoryRenderPipeline { | |
| // The amount of pixels that we need to read (for every channel) in non-edge groups to run all | |
| // stages correctly. | |
| input_border_pixels: Vec<(usize, usize)>, | |
| - has_nontrivial_border: bool, | |
| + // Size of the border, in image (i.e. non-downsampled) pixels. | |
| + border_size: (usize, usize), | |
| // For every stage, the downsampling level of *any* channel that the stage uses at that point. | |
| // Note that this must be equal across all the used channels. | |
| downsampling_for_stage: Vec<(usize, usize)>, | |
| @@ -60,160 +57,21 @@ pub struct LowMemoryRenderPipeline { | |
| opaque_alpha_buffers: Vec<Option<RowBuffer>>, | |
| // Sorted indices to call get_distinct_indices. | |
| sorted_buffer_indices: Vec<Vec<(usize, usize, usize)>>, | |
| - // For each channel, buffers that could be reused to store group data for that channel. | |
| + // For each channel and the 3 kinds of buffers (center / topbottom / leftright), buffers that | |
| + // could be reused to store group data for that channel. | |
| + // Indexed by [3*channel] = center, [3*channel+1] = topbottom, [3*channel+2] = leftright. | |
| scratch_channel_buffers: Vec<Vec<OwnedRawImage>>, | |
| } | |
| -impl LowMemoryRenderPipeline { | |
| - // TODO(veluca): most of this logic will need to change to ensure better cache utilization and | |
| - // lower memory usage. | |
| - fn render_with_new_group( | |
| - &mut self, | |
| - new_group_id: usize, | |
| - buffer_splitter: &mut BufferSplitter, | |
| - ) -> Result<()> { | |
| - let (gx, gy) = self.shared.group_position(new_group_id); | |
| - | |
| - // We put groups that are 2 afar here, because even if they could not have become | |
| - // renderable, they might have become freeable. | |
| - let mut possible_groups = vec![]; | |
| - for dy in -2..=2 { | |
| - let igy = gy as isize + dy; | |
| - if igy < 0 || igy >= self.shared.group_count.1 as isize { | |
| - continue; | |
| - } | |
| - for dx in -2..=2 { | |
| - let igx = gx as isize + dx; | |
| - if igx < 0 || igx >= self.shared.group_count.0 as isize { | |
| - continue; | |
| - } | |
| - possible_groups.push(igy as usize * self.shared.group_count.0 + igx as usize); | |
| - } | |
| - } | |
| - | |
| - // First, render all groups that have made progress; only check those that *could* have | |
| - // made progress. | |
| - for g in possible_groups.iter().copied() { | |
| - let ready_passes = self.shared.group_chan_ready_passes[g] | |
| - .iter() | |
| - .copied() | |
| - .min() | |
| - .unwrap(); | |
| - if self.input_buffers[g].completed_passes < ready_passes { | |
| - let (gx, gy) = self.shared.group_position(g); | |
| - let mut fully_ready_passes = ready_passes; | |
| - // Here we assume that we never need more than one group worth of border. | |
| - if self.has_nontrivial_border { | |
| - for dy in -1..=1 { | |
| - let igy = gy as isize + dy; | |
| - if igy < 0 || igy >= self.shared.group_count.1 as isize { | |
| - continue; | |
| - } | |
| - for dx in -1..=1 { | |
| - let igx = gx as isize + dx; | |
| - if igx < 0 || igx >= self.shared.group_count.0 as isize { | |
| - continue; | |
| - } | |
| - let ig = (igy as usize) * self.shared.group_count.0 + igx as usize; | |
| - let ready_passes = self.shared.group_chan_ready_passes[ig] | |
| - .iter() | |
| - .copied() | |
| - .min() | |
| - .unwrap(); | |
| - fully_ready_passes = fully_ready_passes.min(ready_passes); | |
| - } | |
| - } | |
| - } | |
| - if self.input_buffers[g].completed_passes >= fully_ready_passes { | |
| - continue; | |
| - } | |
| - debug!( | |
| - "new ready passes for group {gx},{gy} ({} completed, \ | |
| - {ready_passes} ready, {fully_ready_passes} ready including neighbours)", | |
| - self.input_buffers[g].completed_passes | |
| - ); | |
| - | |
| - // Prepare output buffers for the group. | |
| - let (origin, size) = if let Some(e) = self.shared.extend_stage_index { | |
| - let Stage::Extend(e) = &self.shared.stages[e] else { | |
| - unreachable!("extend stage is not an extend stage"); | |
| - }; | |
| - (e.frame_origin, e.image_size) | |
| - } else { | |
| - ((0, 0), self.shared.input_size) | |
| - }; | |
| - let gsz = ( | |
| - 1 << self.shared.log_group_size, | |
| - 1 << self.shared.log_group_size, | |
| - ); | |
| - let rect_to_render = Rect { | |
| - size: gsz, | |
| - origin: (gsz.0 * gx, gsz.1 * gy), | |
| - }; | |
| - let mut local_buffers = buffer_splitter.get_local_buffers( | |
| - &self.save_buffer_info, | |
| - rect_to_render, | |
| - false, | |
| - self.shared.input_size, | |
| - size, | |
| - origin, | |
| - ); | |
| - | |
| - self.render_group((gx, gy), &mut local_buffers)?; | |
| - | |
| - self.input_buffers[g].completed_passes = fully_ready_passes; | |
| - } | |
| - } | |
| - | |
| - // Clear buffers that will not be used again. | |
| - for g in possible_groups.iter().copied() { | |
| - let (gx, gy) = self.shared.group_position(g); | |
| - let mut neigh_complete_passes = self.input_buffers[g].completed_passes; | |
| - if self.has_nontrivial_border { | |
| - for dy in -1..=1 { | |
| - let igy = gy as isize + dy; | |
| - if igy < 0 || igy >= self.shared.group_count.1 as isize { | |
| - continue; | |
| - } | |
| - for dx in -1..=1 { | |
| - let igx = gx as isize + dx; | |
| - if igx < 0 || igx >= self.shared.group_count.0 as isize { | |
| - continue; | |
| - } | |
| - let ig = (igy as usize) * self.shared.group_count.0 + igx as usize; | |
| - neigh_complete_passes = self.input_buffers[ig] | |
| - .completed_passes | |
| - .min(neigh_complete_passes); | |
| - } | |
| - } | |
| - } | |
| - if self.shared.num_passes <= neigh_complete_passes { | |
| - for (c, b) in self.input_buffers[g].data.iter_mut().enumerate() { | |
| - if let Some(b) = std::mem::take(b) { | |
| - self.scratch_channel_buffers[c].push(b); | |
| - } | |
| - } | |
| - } | |
| - } | |
| - Ok(()) | |
| - } | |
| -} | |
| - | |
| impl RenderPipeline for LowMemoryRenderPipeline { | |
| type Buffer = RowBuffer; | |
| fn new_from_shared(shared: RenderPipelineShared<Self::Buffer>) -> Result<Self> { | |
| let mut input_buffers = vec![]; | |
| - for _ in 0..shared.group_chan_ready_passes.len() { | |
| - input_buffers.push(InputBuffer { | |
| - data: vec![], | |
| - completed_passes: 0, | |
| - }); | |
| - for _ in 0..shared.group_chan_ready_passes[0].len() { | |
| - input_buffers.last_mut().unwrap().data.push(None); | |
| - } | |
| + let nc = shared.num_channels(); | |
| + for _ in 0..shared.group_chan_complete.len() { | |
| + input_buffers.push(InputBuffer::new(nc)); | |
| } | |
| - let nc = shared.channel_info[0].len(); | |
| let mut previous_inout: Vec<_> = (0..nc).map(|x| (0usize, x)).collect(); | |
| let mut stage_input_buffer_index = vec![]; | |
| let mut next_border_and_cur_downsample = vec![vec![]]; | |
| @@ -245,9 +103,10 @@ impl RenderPipeline for LowMemoryRenderPipeline { | |
| let mut initial_buffers = vec![]; | |
| for chan in 0..nc { | |
| initial_buffers.push(RowBuffer::new( | |
| - shared.channel_info[0][chan].ty.unwrap(), | |
| + shared.channel_info[0][chan].ty.unwrap_or(DataTypeTag::U8), | |
| next_border_and_cur_downsample[0][chan].0 as usize, | |
| 0, | |
| + 0, | |
| shared.chunk_size >> shared.channel_info[0][chan].downsample.0, | |
| )?); | |
| } | |
| @@ -261,6 +120,7 @@ impl RenderPipeline for LowMemoryRenderPipeline { | |
| stage.output_type().unwrap(), | |
| *next_y_border as usize, | |
| stage.shift().1 as usize, | |
| + stage.shift().0 as usize, | |
| shared.chunk_size >> *dsx, | |
| )?); | |
| } | |
| @@ -385,6 +245,24 @@ impl RenderPipeline for LowMemoryRenderPipeline { | |
| }) | |
| .collect(); | |
| + let mut border_size = (0, 0); | |
| + for c in 0..nc { | |
| + border_size.0 = border_size | |
| + .0 | |
| + .max(border_pixels[c].0 << shared.channel_info[0][c].downsample.0); | |
| + border_size.1 = border_size | |
| + .1 | |
| + .max(border_pixels[c].1 << shared.channel_info[0][c].downsample.1); | |
| + } | |
| + for s in 0..shared.stages.len() { | |
| + border_size.0 = border_size | |
| + .0 | |
| + .max(border_pixels_per_stage[s].0 << downsampling_for_stage[s].0); | |
| + border_size.1 = border_size | |
| + .1 | |
| + .max(border_pixels_per_stage[s].1 << downsampling_for_stage[s].1); | |
| + } | |
| + | |
| Ok(Self { | |
| input_buffers, | |
| stage_input_buffer_index, | |
| @@ -392,7 +270,7 @@ impl RenderPipeline for LowMemoryRenderPipeline { | |
| padding_was_rendered: false, | |
| save_buffer_info, | |
| stage_output_border_pixels: border_pixels_per_stage, | |
| - has_nontrivial_border: border_pixels.iter().any(|x| *x != (0, 0)), | |
| + border_size, | |
| input_border_pixels: border_pixels, | |
| local_states: shared | |
| .stages | |
| @@ -403,13 +281,13 @@ impl RenderPipeline for LowMemoryRenderPipeline { | |
| downsampling_for_stage, | |
| opaque_alpha_buffers, | |
| sorted_buffer_indices, | |
| - scratch_channel_buffers: (0..nc).map(|_| vec![]).collect(), | |
| + scratch_channel_buffers: (0..nc * 3).map(|_| vec![]).collect(), | |
| }) | |
| } | |
| #[instrument(skip_all, err)] | |
| fn get_buffer<T: ImageDataType>(&mut self, channel: usize) -> Result<Image<T>> { | |
| - if let Some(b) = self.scratch_channel_buffers[channel].pop() { | |
| + if let Some(b) = self.maybe_get_scratch_buffer(channel, 0) { | |
| return Ok(Image::from_raw(b)); | |
| } | |
| let sz = self.shared.group_size_for_channel(channel, T::DATA_TYPE_ID); | |
| @@ -420,20 +298,23 @@ impl RenderPipeline for LowMemoryRenderPipeline { | |
| &mut self, | |
| channel: usize, | |
| group_id: usize, | |
| - num_passes: usize, | |
| + complete: bool, | |
| buf: Image<T>, | |
| buffer_splitter: &mut BufferSplitter, | |
| ) -> Result<()> { | |
| - debug!( | |
| - "filling data for group {}, channel {}, using type {:?}", | |
| - group_id, | |
| - channel, | |
| - T::DATA_TYPE_ID, | |
| - ); | |
| - self.input_buffers[group_id].data[channel] = Some(buf.into_raw()); | |
| - self.shared.group_chan_ready_passes[group_id][channel] += num_passes; | |
| + if self.shared.channel_is_used[channel] { | |
| + debug!( | |
| + "filling data for group {}, channel {}, using type {:?}", | |
| + group_id, | |
| + channel, | |
| + T::DATA_TYPE_ID, | |
| + ); | |
| + self.input_buffers[group_id].set_buffer(channel, buf.into_raw()); | |
| + self.shared.group_chan_complete[group_id][channel] = complete; | |
| - self.render_with_new_group(group_id, buffer_splitter) | |
| + self.render_with_new_group(group_id, buffer_splitter)?; | |
| + } | |
| + Ok(()) | |
| } | |
| fn check_buffer_sizes(&self, buffers: &mut [Option<JxlOutputBuffer>]) -> Result<()> { | |
| @@ -535,6 +416,10 @@ impl RenderPipeline for LowMemoryRenderPipeline { | |
| Ok(()) | |
| } | |
| + fn mark_group_to_rerender(&mut self, g: usize) { | |
| + self.input_buffers[g].is_ready = false; | |
| + } | |
| + | |
| fn box_inout_stage<S: super::RenderPipelineInOutStage>( | |
| stage: S, | |
| ) -> Box<dyn RunInOutStage<Self::Buffer>> { | |
| @@ -546,4 +431,8 @@ impl RenderPipeline for LowMemoryRenderPipeline { | |
| ) -> Box<dyn RunInPlaceStage<Self::Buffer>> { | |
| Box::new(stage) | |
| } | |
| + | |
| + fn used_channel_mask(&self) -> &[bool] { | |
| + &self.shared.channel_is_used | |
| + } | |
| } | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/render_group.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/render_group.rs | |
| index 6d4ded09003bd..6f9b65b67e1fe 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/render_group.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/render_group.rs | |
| @@ -8,15 +8,12 @@ use std::ops::Range; | |
| use crate::{ | |
| api::JxlOutputBuffer, | |
| error::Result, | |
| - image::DataTypeTag, | |
| + image::{DataTypeTag, Rect}, | |
| render::{ | |
| - internal::Stage, | |
| - low_memory_pipeline::{ | |
| - helpers::{get_distinct_indices, mirror}, | |
| - run_stage::ExtraInfo, | |
| - }, | |
| + internal::{ChannelInfo, Stage}, | |
| + low_memory_pipeline::{helpers::get_distinct_indices, run_stage::ExtraInfo}, | |
| }, | |
| - util::{ShiftRightCeil, SmallVec, tracing_wrappers::*}, | |
| + util::{ShiftRightCeil, SmallVec, mirror, tracing_wrappers::*}, | |
| }; | |
| use super::{LowMemoryRenderPipeline, row_buffers::RowBuffer}; | |
| @@ -70,79 +67,134 @@ fn apply_x_padding( | |
| } | |
| impl LowMemoryRenderPipeline { | |
| - fn fill_initial_buffers(&mut self, c: usize, y: usize, y0: usize, (gx, gy): (usize, usize)) { | |
| - let ty = self.shared.channel_info[0][c] | |
| - .ty | |
| - .expect("Channel info should be populated at this point"); | |
| - let gys = 1 | |
| - << (self.shared.log_group_size - self.shared.channel_info[0][c].downsample.1 as usize); | |
| + fn fill_initial_buffers( | |
| + &mut self, | |
| + c: usize, | |
| + y: usize, | |
| + (x0, xsize): (usize, usize), | |
| + (gx, gy): (usize, usize), | |
| + ) { | |
| + if !self.shared.channel_is_used[c] { | |
| + return; | |
| + } | |
| + let ChannelInfo { | |
| + ty, | |
| + downsample: (dx, dy), | |
| + } = self.shared.channel_info[0][c]; | |
| + let ty = ty.expect("Channel info should be populated at this point"); | |
| + let group_ysize = 1 << (self.shared.log_group_size - dy as usize); | |
| + let group_xsize = 1 << (self.shared.log_group_size - dx as usize); | |
| + | |
| + let (bx, by) = self.border_size; | |
| - let (input_y, igy) = if y < y0 { | |
| - (y + gys - y0, gy - 1) | |
| - } else if y >= y0 + gys { | |
| - (y - y0 - gys, gy + 1) | |
| + let group_y0 = gy * group_ysize; | |
| + let group_x0 = gx << (self.shared.log_group_size - dx as usize); | |
| + let group_x1 = group_x0 + group_xsize; | |
| + | |
| + let (input_y, igy, is_topbottom) = if y < group_y0 { | |
| + (y + (by >> dy) * 4 - group_y0, gy - 1, true) | |
| + } else if y >= group_y0 + group_ysize { | |
| + (y - group_y0 - group_ysize, gy + 1, true) | |
| } else { | |
| - (y - y0, gy) | |
| + (y - group_y0, gy, false) | |
| }; | |
| let output_row = self.row_buffers[0][c].get_row_mut::<u8>(y); | |
| - // Both are in units of bytes. | |
| - let x0_offset = RowBuffer::x0_byte_offset(); | |
| - let extrax = self.input_border_pixels[c].0 * ty.size(); | |
| + | |
| + let copy_x0 = x0.saturating_sub(self.input_border_pixels[c].0); | |
| + let copy_x1 = | |
| + (x0 + xsize + self.input_border_pixels[c].0).min(self.shared.input_size.0.shrc(dx)); | |
| + | |
| + debug_assert!(copy_x1 >= group_x0); | |
| + | |
| + let mut copy_byte_offset = RowBuffer::x0_byte_offset() - (x0 - copy_x0) * ty.size(); | |
| let base_gid = igy * self.shared.group_count.0 + gx; | |
| - // Previous group horizontally, if any. | |
| - if gx > 0 && extrax != 0 { | |
| - let input_buf = self.input_buffers[base_gid - 1].data[c].as_ref().unwrap(); | |
| + // Previous group horizontally, if needed. | |
| + if copy_x0 < group_x0 { | |
| + let (input_buf, xs) = if is_topbottom { | |
| + ( | |
| + self.input_buffers[base_gid - 1].topbottom[c] | |
| + .as_ref() | |
| + .unwrap(), | |
| + group_xsize, | |
| + ) | |
| + } else { | |
| + ( | |
| + self.input_buffers[base_gid - 1].leftright[c] | |
| + .as_ref() | |
| + .unwrap(), | |
| + 4 * (bx >> dx), | |
| + ) | |
| + }; | |
| let input_row = input_buf.row(input_y); | |
| - output_row[x0_offset - extrax..x0_offset] | |
| - .copy_from_slice(&input_row[input_buf.byte_size().0 - extrax..]); | |
| + | |
| + let to_copy = (group_x0 - copy_x0) * ty.size(); | |
| + let src_byte_offset = xs * ty.size() - to_copy; | |
| + | |
| + output_row[copy_byte_offset..copy_byte_offset + to_copy] | |
| + .copy_from_slice(&input_row[src_byte_offset..src_byte_offset + to_copy]); | |
| + copy_byte_offset += to_copy; | |
| } | |
| - let input_buf = self.input_buffers[base_gid].data[c].as_ref().unwrap(); | |
| + let input_buf = if is_topbottom { | |
| + self.input_buffers[base_gid].topbottom[c].as_ref().unwrap() | |
| + } else { | |
| + self.input_buffers[base_gid].data[c].as_ref().unwrap() | |
| + }; | |
| let input_row = input_buf.row(input_y); | |
| - let gxs = input_buf.byte_size().0; // bytes | |
| - output_row[x0_offset..x0_offset + gxs].copy_from_slice(input_row); | |
| + let copy_start = copy_x0.saturating_sub(group_x0) * ty.size(); | |
| + let copy_end = (copy_x1.min(group_x1) - group_x0) * ty.size(); | |
| + let to_copy = copy_end - copy_start; | |
| + output_row[copy_byte_offset..copy_byte_offset + to_copy] | |
| + .copy_from_slice(&input_row[copy_start..copy_end]); | |
| + copy_byte_offset += to_copy; | |
| // Next group horizontally, if any. | |
| - if gx + 1 < self.shared.group_count.0 && extrax != 0 { | |
| - let input_buf = self.input_buffers[base_gid + 1].data[c].as_ref().unwrap(); | |
| + if copy_x1 > group_x1 { | |
| + let input_buf = if is_topbottom { | |
| + self.input_buffers[base_gid + 1].topbottom[c] | |
| + .as_ref() | |
| + .unwrap() | |
| + } else { | |
| + self.input_buffers[base_gid + 1].leftright[c] | |
| + .as_ref() | |
| + .unwrap() | |
| + }; | |
| let input_row = input_buf.row(input_y); | |
| let dx = self.shared.channel_info[0][c].downsample.0; | |
| let gid = gy * self.shared.group_count.0 + gx; | |
| let next_group_xsize = self.shared.group_size(gid + 1).0.shrc(dx); | |
| - let border_x = extrax.min(next_group_xsize * ty.size()); | |
| - output_row[gxs + x0_offset..gxs + x0_offset + border_x] | |
| - .copy_from_slice(&input_row[..border_x]); | |
| - if border_x < extrax { | |
| - let pad_from = ((gxs + border_x) / ty.size()) as isize; | |
| - let pad_to = ((gxs + extrax) / ty.size()) as isize; | |
| + let border_x = (copy_x1 - group_x1).min(next_group_xsize); | |
| + output_row[copy_byte_offset..copy_byte_offset + border_x * ty.size()] | |
| + .copy_from_slice(&input_row[..border_x * ty.size()]); | |
| + if border_x + group_x1 < copy_x1 { | |
| + let pad_from = (xsize + border_x) as isize; | |
| + let pad_to = (xsize + copy_x1 - group_x1) as isize; | |
| apply_x_padding(ty, output_row, pad_from..pad_to, 0..pad_from); | |
| } | |
| } | |
| } | |
| - // Renders a single group worth of data. | |
| + // Renders *parts* of group's worth of data. | |
| + // In particular, renders the sub-rectangle given in `image_area`, where (1, 1) refers to | |
| + // the center of the group, and 0 and 2 include data from the neighbouring group (if any). | |
| #[instrument(skip(self, buffers))] | |
| pub(super) fn render_group( | |
| &mut self, | |
| (gx, gy): (usize, usize), | |
| + image_area: Rect, | |
| buffers: &mut [Option<JxlOutputBuffer>], | |
| ) -> Result<()> { | |
| - let gid = gy * self.shared.group_count.0 + gx; | |
| - let (xsize, num_rows) = self.shared.group_size(gid); | |
| - let (x0, y0) = self.shared.group_offset(gid); | |
| + let start_of_row = image_area.origin.0 == 0; | |
| + let end_of_row = image_area.end().0 == self.shared.input_size.0; | |
| - let num_channels = self.shared.num_channels(); | |
| - let mut num_extra_rows = 0; | |
| + let Rect { | |
| + origin: (x0, y0), | |
| + size: (xsize, num_rows), | |
| + } = image_area; | |
| - for c in 0..num_channels { | |
| - num_extra_rows = num_extra_rows | |
| - .max(self.input_border_pixels[c].1 << self.shared.channel_info[0][c].downsample.1); | |
| - } | |
| - for s in 0..self.shared.stages.len() { | |
| - num_extra_rows = num_extra_rows | |
| - .max(self.stage_output_border_pixels[s].1 << self.downsampling_for_stage[s].1); | |
| - } | |
| + let num_channels = self.shared.num_channels(); | |
| + let num_extra_rows = self.border_size.1; | |
| // This follows the same implementation strategy as the C++ code in libjxl. | |
| // We pretend that every stage has a vertical shift of 0, i.e. it is as tall | |
| @@ -152,7 +204,7 @@ impl LowMemoryRenderPipeline { | |
| // when vy % (1<<vshift) == 0. | |
| let vy0 = y0.saturating_sub(num_extra_rows); | |
| - let vy1 = y0 + num_rows + num_extra_rows; | |
| + let vy1 = image_area.end().1 + num_extra_rows; | |
| for vy in vy0..vy1 { | |
| let mut current_origin = (0, 0); | |
| @@ -161,7 +213,7 @@ impl LowMemoryRenderPipeline { | |
| // Step 1: read input channels. | |
| for c in 0..num_channels { | |
| // Same logic as below, but adapted to the input stage. | |
| - let dy = self.shared.channel_info[0][c].downsample.1; | |
| + let (dx, dy) = self.shared.channel_info[0][c].downsample; | |
| let scaled_y_border = self.input_border_pixels[c].1 << dy; | |
| let stage_vy = vy as isize - num_extra_rows as isize + scaled_y_border as isize; | |
| if stage_vy % (1 << dy) != 0 { | |
| @@ -176,7 +228,7 @@ impl LowMemoryRenderPipeline { | |
| continue; | |
| } | |
| let y = y as usize; | |
| - self.fill_initial_buffers(c, y, y0 >> dy, (gx, gy)); | |
| + self.fill_initial_buffers(c, y, (x0 >> dx, xsize >> dx), (gx, gy)); | |
| } | |
| // Step 2: go through stages one by one. | |
| for (i, stage) in self.shared.stages.iter().enumerate() { | |
| @@ -215,8 +267,8 @@ impl LowMemoryRenderPipeline { | |
| current_row: y, | |
| group_x0: x0 >> dx, | |
| out_extra_x, | |
| - is_first_xgroup: gx == 0, | |
| - is_last_xgroup: gx + 1 == self.shared.group_count.0, | |
| + start_of_row, | |
| + end_of_row, | |
| image_height: shifted_ysize, | |
| }, | |
| &mut buffers, | |
| @@ -294,8 +346,8 @@ impl LowMemoryRenderPipeline { | |
| current_row: y, | |
| group_x0: x0 >> dx, | |
| out_extra_x, | |
| - is_first_xgroup: gx == 0, | |
| - is_last_xgroup: gx + 1 == self.shared.group_count.0, | |
| + start_of_row, | |
| + end_of_row, | |
| image_height: shifted_ysize, | |
| }, | |
| &input_data, | |
| @@ -351,8 +403,8 @@ impl LowMemoryRenderPipeline { | |
| current_row: y, | |
| group_x0: x0, | |
| out_extra_x: 0, | |
| - is_first_xgroup: false, | |
| - is_last_xgroup: false, | |
| + start_of_row: false, | |
| + end_of_row: false, | |
| image_height: self.shared.input_size.1, | |
| }, | |
| &mut buffers, | |
| @@ -397,8 +449,8 @@ impl LowMemoryRenderPipeline { | |
| current_row: y, | |
| group_x0: x0, | |
| out_extra_x: 0, | |
| - is_first_xgroup: false, | |
| - is_last_xgroup: false, | |
| + start_of_row: false, | |
| + end_of_row: false, | |
| image_height: self.shared.input_size.1, | |
| }, | |
| &input_data, | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/row_buffers.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/row_buffers.rs | |
| index 43c4617c41e92..4cf01155da2a4 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/row_buffers.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/row_buffers.rs | |
| @@ -33,13 +33,15 @@ impl RowBuffer { | |
| data_type: DataTypeTag, | |
| next_y_border: usize, | |
| y_shift: usize, | |
| + x_shift: usize, | |
| row_len: usize, | |
| ) -> Result<Self> { | |
| let num_rows = (1 << y_shift) + 2 * next_y_border; | |
| let num_rows = num_rows.next_power_of_two(); | |
| // Input offset is at *one* cacheline, and we need up to *two* cachelines on the other | |
| // side as the data might exceed xsize slightly. | |
| - let row_stride = (row_len * data_type.size()).div_ceil(CACHE_LINE_BYTE_SIZE) + 3; | |
| + let row_stride = | |
| + (row_len * data_type.size()).div_ceil(CACHE_LINE_BYTE_SIZE) + (3 << x_shift); | |
| let mut buffer = Vec::<CacheLine>::new(); | |
| buffer.try_reserve_exact(row_stride * num_rows)?; | |
| buffer.resize(row_stride * num_rows, CacheLine::default()); | |
| @@ -54,13 +56,15 @@ impl RowBuffer { | |
| /// Creates a new row buffer with a single row filled with a repeating pattern. | |
| /// Used for constant values like opaque alpha. | |
| pub fn new_filled(data_type: DataTypeTag, row_len: usize, fill_pattern: &[u8]) -> Result<Self> { | |
| - let mut result = Self::new(data_type, 0, 0, row_len)?; | |
| + let mut result = Self::new(data_type, 0, 0, 0, row_len)?; | |
| let row_bytes: &mut [u8] = result.get_row_mut(0); | |
| - let start = Self::x0_offset::<u8>(); | |
| - let end = start + row_len * fill_pattern.len(); | |
| - for (i, byte) in row_bytes[start..end].iter_mut().enumerate() { | |
| + | |
| + // Fill the *entire* allocated row, including the padding on both sides, | |
| + // so cross-group border sampling doesn't read zeros (transparent alpha). | |
| + for (i, byte) in row_bytes.iter_mut().enumerate() { | |
| *byte = fill_pattern[i % fill_pattern.len()]; | |
| } | |
| + | |
| Ok(result) | |
| } | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/run_stage.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/run_stage.rs | |
| index 704b5a5bdc079..5acced8b34dda 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/run_stage.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/run_stage.rs | |
| @@ -9,9 +9,9 @@ use crate::{ | |
| render::{ | |
| Channels, ChannelsMut, RunInPlaceStage, | |
| internal::{PipelineBuffer, RunInOutStage}, | |
| - low_memory_pipeline::{helpers::mirror, render_group::ChannelVec}, | |
| + low_memory_pipeline::render_group::ChannelVec, | |
| }, | |
| - util::{ShiftRightCeil, SmallVec, tracing_wrappers::*}, | |
| + util::{ShiftRightCeil, SmallVec, mirror, tracing_wrappers::*}, | |
| }; | |
| use super::{ | |
| @@ -26,8 +26,8 @@ pub struct ExtraInfo { | |
| pub(super) out_extra_x: usize, | |
| pub(super) current_row: usize, | |
| pub(super) group_x0: usize, | |
| - pub(super) is_first_xgroup: bool, | |
| - pub(super) is_last_xgroup: bool, | |
| + pub(super) start_of_row: bool, | |
| + pub(super) end_of_row: bool, | |
| pub(super) image_height: usize, | |
| } | |
| @@ -46,16 +46,16 @@ impl<T: RenderPipelineInPlaceStage> RunInPlaceStage<RowBuffer> for T { | |
| group_x0, | |
| out_extra_x, | |
| image_height: _, | |
| - is_first_xgroup, | |
| - is_last_xgroup, | |
| + start_of_row, | |
| + end_of_row, | |
| }: ExtraInfo, | |
| buffers: &mut [&mut RowBuffer], | |
| state: Option<&mut dyn Any>, | |
| ) { | |
| let x0 = RowBuffer::x0_offset::<T::Type>(); | |
| - let xpre = if is_first_xgroup { 0 } else { out_extra_x }; | |
| + let xpre = if start_of_row { 0 } else { out_extra_x }; | |
| let xstart = x0 - xpre; | |
| - let xend = x0 + xsize + if is_last_xgroup { 0 } else { out_extra_x }; | |
| + let xend = x0 + xsize + if end_of_row { 0 } else { out_extra_x }; | |
| let mut rows: ChannelVec<_> = buffers | |
| .iter_mut() | |
| .map(|x| &mut x.get_row_mut::<T::Type>(current_row)[xstart..]) | |
| @@ -80,8 +80,8 @@ impl<T: RenderPipelineInOutStage> RunInOutStage<RowBuffer> for T { | |
| group_x0, | |
| out_extra_x, | |
| image_height, | |
| - is_first_xgroup, | |
| - is_last_xgroup, | |
| + start_of_row, | |
| + end_of_row, | |
| }: ExtraInfo, | |
| input_buffers: &[&RowBuffer], | |
| output_buffers: &mut [RowBuffer], | |
| @@ -89,7 +89,7 @@ impl<T: RenderPipelineInOutStage> RunInOutStage<RowBuffer> for T { | |
| ) { | |
| let ibordery = Self::BORDER.1 as isize; | |
| let x0 = RowBuffer::x0_offset::<T::InputT>(); | |
| - let xpre = if is_first_xgroup { | |
| + let xpre = if start_of_row { | |
| 0 | |
| } else { | |
| out_extra_x.shrc(T::SHIFT.0) | |
| @@ -97,7 +97,7 @@ impl<T: RenderPipelineInOutStage> RunInOutStage<RowBuffer> for T { | |
| let xstart = x0 - xpre; | |
| let xend = x0 | |
| + xsize | |
| - + if is_last_xgroup { | |
| + + if end_of_row { | |
| 0 | |
| } else { | |
| out_extra_x.shrc(T::SHIFT.0) | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/identity.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/identity.rs | |
| index b586e9d8dd00d..a5024c175cc7c 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/identity.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/identity.rs | |
| @@ -8,105 +8,200 @@ | |
| use std::mem::MaybeUninit; | |
| use std::ops::Range; | |
| -use jxl_simd::{F32SimdVec, SimdDescriptor, simd_function}; | |
| +use jxl_simd::{F32SimdVec, SimdDescriptor, U8SimdVec, U16SimdVec, simd_function}; | |
| use crate::{ | |
| api::{Endianness, JxlDataFormat, JxlOutputBuffer}, | |
| render::low_memory_pipeline::row_buffers::RowBuffer, | |
| }; | |
| -#[inline(always)] | |
| -fn run_interleaved_2<D: SimdDescriptor>( | |
| - d: D, | |
| - a: &[f32], | |
| - b: &[f32], | |
| - out: &mut [MaybeUninit<f32>], | |
| -) -> usize { | |
| - let len = D::F32Vec::LEN; | |
| - let mut n = 0; | |
| - | |
| - for ((chunk_a, chunk_b), chunk_out) in a | |
| - .chunks_exact(len) | |
| - .zip(b.chunks_exact(len)) | |
| - .zip(out.chunks_exact_mut(len * 2)) | |
| - { | |
| - let va = D::F32Vec::load(d, chunk_a); | |
| - let vb = D::F32Vec::load(d, chunk_b); | |
| - D::F32Vec::store_interleaved_2_uninit(va, vb, chunk_out); | |
| - n += len; | |
| - } | |
| +macro_rules! define_run_interleaved { | |
| + ($fn_name:ident, $ty:ty, $vec_trait:ident, $store_fn:ident, $cnt:expr, $($arg:ident),+) => { | |
| + #[inline(always)] | |
| + fn $fn_name<D: SimdDescriptor>( | |
| + d: D, | |
| + $($arg: &[$ty]),+, | |
| + out: &mut [MaybeUninit<$ty>], | |
| + ) -> usize { | |
| + let len = D::$vec_trait::LEN; | |
| + let mut n = 0; | |
| + let limit = [$($arg.len()),+][0]; | |
| + | |
| + { | |
| + let out_chunks = out[..limit * $cnt].chunks_exact_mut(len * $cnt); | |
| + $(let mut $arg = $arg.chunks_exact(len);)+ | |
| + for out_chunk in out_chunks { | |
| + $(let $arg = D::$vec_trait::load(d, $arg.next().unwrap());)+ | |
| + D::$vec_trait::$store_fn($($arg),+, out_chunk); | |
| + n += len; | |
| + } | |
| + } | |
| - n | |
| + let d256 = d.maybe_downgrade_256bit(); | |
| + let len256 = <D::Descriptor256 as SimdDescriptor>::$vec_trait::LEN; | |
| + if len256 < len { | |
| + let out_chunks = out[n * $cnt..limit * $cnt].chunks_exact_mut(len256 * $cnt); | |
| + $(let mut $arg = $arg[n..limit].chunks_exact(len256);)+ | |
| + for out_chunk in out_chunks { | |
| + $(let $arg = <D::Descriptor256 as SimdDescriptor>::$vec_trait::load(d256, $arg.next().unwrap());)+ | |
| + <D::Descriptor256 as SimdDescriptor>::$vec_trait::$store_fn($($arg),+, out_chunk); | |
| + n += len256; | |
| + } | |
| + } | |
| + | |
| + let d128 = d.maybe_downgrade_128bit(); | |
| + let len128 = <D::Descriptor128 as SimdDescriptor>::$vec_trait::LEN; | |
| + if len128 < len { | |
| + let out_chunks = out[n * $cnt..limit * $cnt].chunks_exact_mut(len128 * $cnt); | |
| + $(let mut $arg = $arg[n..limit].chunks_exact(len128);)+ | |
| + for out_chunk in out_chunks { | |
| + $(let $arg = <D::Descriptor128 as SimdDescriptor>::$vec_trait::load(d128, $arg.next().unwrap());)+ | |
| + <D::Descriptor128 as SimdDescriptor>::$vec_trait::$store_fn($($arg),+, out_chunk); | |
| + n += len128; | |
| + } | |
| + } | |
| + | |
| + n | |
| + } | |
| + }; | |
| } | |
| -#[inline(always)] | |
| -fn run_interleaved_3<D: SimdDescriptor>( | |
| +define_run_interleaved!( | |
| + run_interleaved_2_f32, | |
| + f32, | |
| + F32Vec, | |
| + store_interleaved_2_uninit, | |
| + 2, | |
| + a, | |
| + b | |
| +); | |
| +define_run_interleaved!( | |
| + run_interleaved_3_f32, | |
| + f32, | |
| + F32Vec, | |
| + store_interleaved_3_uninit, | |
| + 3, | |
| + a, | |
| + b, | |
| + c | |
| +); | |
| +define_run_interleaved!( | |
| + run_interleaved_4_f32, | |
| + f32, | |
| + F32Vec, | |
| + store_interleaved_4_uninit, | |
| + 4, | |
| + a, | |
| + b, | |
| + c, | |
| + e | |
| +); | |
| + | |
| +simd_function!( | |
| + store_interleaved_f32, | |
| d: D, | |
| - a: &[f32], | |
| - b: &[f32], | |
| - c: &[f32], | |
| - out: &mut [MaybeUninit<f32>], | |
| -) -> usize { | |
| - let len = D::F32Vec::LEN; | |
| - let mut n = 0; | |
| - | |
| - for (((chunk_a, chunk_b), chunk_c), chunk_out) in a | |
| - .chunks_exact(len) | |
| - .zip(b.chunks_exact(len)) | |
| - .zip(c.chunks_exact(len)) | |
| - .zip(out.chunks_exact_mut(len * 3)) | |
| - { | |
| - let va = D::F32Vec::load(d, chunk_a); | |
| - let vb = D::F32Vec::load(d, chunk_b); | |
| - let vc = D::F32Vec::load(d, chunk_c); | |
| - D::F32Vec::store_interleaved_3_uninit(va, vb, vc, chunk_out); | |
| - n += len; | |
| + fn store_interleaved_impl_f32( | |
| + inputs: &[&[f32]], | |
| + output: &mut [MaybeUninit<f32>] | |
| + ) -> usize { | |
| + match inputs.len() { | |
| + 2 => run_interleaved_2_f32(d, inputs[0], inputs[1], output), | |
| + 3 => run_interleaved_3_f32(d, inputs[0], inputs[1], inputs[2], output), | |
| + 4 => run_interleaved_4_f32(d, inputs[0], inputs[1], inputs[2], inputs[3], output), | |
| + _ => 0, | |
| + } | |
| } | |
| +); | |
| - n | |
| -} | |
| +define_run_interleaved!( | |
| + run_interleaved_2_u8, | |
| + u8, | |
| + U8Vec, | |
| + store_interleaved_2_uninit, | |
| + 2, | |
| + a, | |
| + b | |
| +); | |
| +define_run_interleaved!( | |
| + run_interleaved_3_u8, | |
| + u8, | |
| + U8Vec, | |
| + store_interleaved_3_uninit, | |
| + 3, | |
| + a, | |
| + b, | |
| + c | |
| +); | |
| +define_run_interleaved!( | |
| + run_interleaved_4_u8, | |
| + u8, | |
| + U8Vec, | |
| + store_interleaved_4_uninit, | |
| + 4, | |
| + a, | |
| + b, | |
| + c, | |
| + e | |
| +); | |
| -#[inline(always)] | |
| -fn run_interleaved_4<D: SimdDescriptor>( | |
| +simd_function!( | |
| + store_interleaved_u8, | |
| d: D, | |
| - a: &[f32], | |
| - b: &[f32], | |
| - c: &[f32], | |
| - e: &[f32], | |
| - out: &mut [MaybeUninit<f32>], | |
| -) -> usize { | |
| - let len = D::F32Vec::LEN; | |
| - let mut n = 0; | |
| - | |
| - for ((((chunk_a, chunk_b), chunk_c), chunk_e), chunk_out) in a | |
| - .chunks_exact(len) | |
| - .zip(b.chunks_exact(len)) | |
| - .zip(c.chunks_exact(len)) | |
| - .zip(e.chunks_exact(len)) | |
| - .zip(out.chunks_exact_mut(len * 4)) | |
| - { | |
| - let va = D::F32Vec::load(d, chunk_a); | |
| - let vb = D::F32Vec::load(d, chunk_b); | |
| - let vc = D::F32Vec::load(d, chunk_c); | |
| - let ve = D::F32Vec::load(d, chunk_e); | |
| - D::F32Vec::store_interleaved_4_uninit(va, vb, vc, ve, chunk_out); | |
| - n += len; | |
| + fn store_interleaved_impl_u8( | |
| + inputs: &[&[u8]], | |
| + output: &mut [MaybeUninit<u8>] | |
| + ) -> usize { | |
| + match inputs.len() { | |
| + 2 => run_interleaved_2_u8(d, inputs[0], inputs[1], output), | |
| + 3 => run_interleaved_3_u8(d, inputs[0], inputs[1], inputs[2], output), | |
| + 4 => run_interleaved_4_u8(d, inputs[0], inputs[1], inputs[2], inputs[3], output), | |
| + _ => 0, | |
| + } | |
| } | |
| +); | |
| - n | |
| -} | |
| +define_run_interleaved!( | |
| + run_interleaved_2_u16, | |
| + u16, | |
| + U16Vec, | |
| + store_interleaved_2_uninit, | |
| + 2, | |
| + a, | |
| + b | |
| +); | |
| +define_run_interleaved!( | |
| + run_interleaved_3_u16, | |
| + u16, | |
| + U16Vec, | |
| + store_interleaved_3_uninit, | |
| + 3, | |
| + a, | |
| + b, | |
| + c | |
| +); | |
| +define_run_interleaved!( | |
| + run_interleaved_4_u16, | |
| + u16, | |
| + U16Vec, | |
| + store_interleaved_4_uninit, | |
| + 4, | |
| + a, | |
| + b, | |
| + c, | |
| + e | |
| +); | |
| simd_function!( | |
| - store_interleaved, | |
| + store_interleaved_u16, | |
| d: D, | |
| - fn store_interleaved_impl( | |
| - inputs: &[&[f32]], | |
| - output: &mut [MaybeUninit<f32>] | |
| + fn store_interleaved_impl_u16( | |
| + inputs: &[&[u16]], | |
| + output: &mut [MaybeUninit<u16>] | |
| ) -> usize { | |
| match inputs.len() { | |
| - 2 => run_interleaved_2(d, inputs[0], inputs[1], output), | |
| - 3 => run_interleaved_3(d, inputs[0], inputs[1], inputs[2], output), | |
| - 4 => run_interleaved_4(d, inputs[0], inputs[1], inputs[2], inputs[3], output), | |
| + 2 => run_interleaved_2_u16(d, inputs[0], inputs[1], output), | |
| + 3 => run_interleaved_3_u16(d, inputs[0], inputs[1], inputs[2], output), | |
| + 4 => run_interleaved_4_u16(d, inputs[0], inputs[1], inputs[2], inputs[3], output), | |
| _ => 0, | |
| } | |
| } | |
| @@ -153,6 +248,43 @@ pub(super) fn store( | |
| } | |
| input_buf.len() / data_format.bytes_per_sample() | |
| } | |
| + (channels, 1, true) if (2..=4).contains(&channels) => { | |
| + let start_u8 = byte_start; | |
| + let end_u8 = byte_end; | |
| + let mut slices = [&[] as &[u8]; 4]; | |
| + for (i, buf) in input_buf.iter().enumerate() { | |
| + slices[i] = &buf.get_row::<u8>(input_y)[start_u8..end_u8]; | |
| + } | |
| + // Note that, by the conditions on the *_uninit methods on U8Vec, this function | |
| + // never writes uninitialized memory. | |
| + store_interleaved_u8(&slices[..channels], output_buf) | |
| + } | |
| + (channels, 2, true) if (2..=4).contains(&channels) => { | |
| + let ptr = output_buf.as_mut_ptr(); | |
| + if ptr.align_offset(std::mem::align_of::<u16>()) == 0 { | |
| + let len_u16 = output_buf.len() / 2; | |
| + // SAFETY: we checked alignment above, and the size is correct by definition | |
| + // (note that it is guaranteed that MaybeUninit<T> has the same size and align | |
| + // of T for any T). | |
| + let output_u16 = unsafe { | |
| + std::slice::from_raw_parts_mut( | |
| + output_buf.as_mut_ptr().cast::<MaybeUninit<u16>>(), | |
| + len_u16, | |
| + ) | |
| + }; | |
| + let start_u16 = byte_start / 2; | |
| + let end_u16 = byte_end / 2; | |
| + let mut slices = [&[] as &[u16]; 4]; | |
| + for (i, buf) in input_buf.iter().enumerate() { | |
| + slices[i] = &buf.get_row::<u16>(input_y)[start_u16..end_u16]; | |
| + } | |
| + // Note that, by the conditions on the *_uninit methods on U16Vec, this function | |
| + // never writes uninitialized memory. | |
| + store_interleaved_u16(&slices[..channels], output_u16) | |
| + } else { | |
| + 0 | |
| + } | |
| + } | |
| (channels, 4, true) if (2..=4).contains(&channels) => { | |
| let ptr = output_buf.as_mut_ptr(); | |
| if ptr.align_offset(std::mem::align_of::<f32>()) == 0 { | |
| @@ -177,7 +309,7 @@ pub(super) fn store( | |
| // Note that, by the conditions on the *_uninit methods on F32Vec, this function | |
| // never writes uninitialized memory. | |
| - store_interleaved(&slices[..channels], output_f32) | |
| + store_interleaved_f32(&slices[..channels], output_f32) | |
| } else { | |
| 0 | |
| } | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/mod.rs | |
| index d5d13699ec3f7..98c2975535536 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/mod.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/save/mod.rs | |
| @@ -18,7 +18,7 @@ mod identity; | |
| impl SaveStage { | |
| // Takes as input only those channels that are *actually* saved. | |
| #[allow(clippy::too_many_arguments)] | |
| - pub(super) fn save_lowmem( | |
| + pub(crate) fn save_lowmem( | |
| &self, | |
| data: &[&RowBuffer], | |
| buffers: &mut [Option<JxlOutputBuffer>], | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/mod.rs | |
| index 4270f22eb3af7..5748513ba7ad9 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/mod.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/mod.rs | |
| @@ -17,8 +17,8 @@ pub mod buffer_splitter; | |
| mod builder; | |
| mod channels; | |
| mod internal; | |
| -mod low_memory_pipeline; | |
| -mod save; | |
| +pub mod low_memory_pipeline; | |
| +pub mod save; | |
| mod simd_utils; | |
| #[cfg(test)] | |
| mod simple_pipeline; | |
| @@ -42,6 +42,11 @@ pub(crate) use low_memory_pipeline::LowMemoryRenderPipeline; | |
| #[cfg(test)] | |
| pub(crate) use simple_pipeline::SimpleRenderPipeline; | |
| +pub enum StageSpecialCase { | |
| + F32ToU8 { channel: usize, bit_depth: u8 }, | |
| + ModularToF32 { channel: usize, bit_depth: u8 }, | |
| +} | |
| + | |
| /// Modifies channels in-place. | |
| pub trait RenderPipelineInPlaceStage: Any + std::fmt::Display { | |
| type Type: ImageDataType; | |
| @@ -60,6 +65,10 @@ pub trait RenderPipelineInPlaceStage: Any + std::fmt::Display { | |
| } | |
| fn uses_channel(&self, c: usize) -> bool; | |
| + | |
| + fn is_special_case(&self) -> Option<StageSpecialCase> { | |
| + None | |
| + } | |
| } | |
| /// Modifies data and writes it to a new buffer, of possibly different type. | |
| @@ -97,6 +106,10 @@ pub trait RenderPipelineInOutStage: Any + std::fmt::Display { | |
| } | |
| fn uses_channel(&self, c: usize) -> bool; | |
| + | |
| + fn is_special_case(&self) -> Option<StageSpecialCase> { | |
| + None | |
| + } | |
| } | |
| // TODO(veluca): find a way to reduce the generated code due to having two builders, to integrate | |
| @@ -111,14 +124,13 @@ pub(crate) trait RenderPipeline: Sized { | |
| /// pass, a new buffer, or a re-used buffer from i.e. previously decoded frames. | |
| fn get_buffer<T: ImageDataType>(&mut self, channel: usize) -> Result<Image<T>>; | |
| - /// Gives back the buffer for a channel and group to the render pipeline, marking that | |
| - /// `num_passes` additional passes (wrt. the previous call to this method for the same channel | |
| - /// and group, or 0 if no previous call happend) were rendered into the input buffer. | |
| + /// Gives back the buffer for a channel and group to the render pipeline, marking whether | |
| + /// this will be the last time that this function is called for this group. | |
| fn set_buffer_for_group<T: ImageDataType>( | |
| &mut self, | |
| channel: usize, | |
| group_id: usize, | |
| - num_passes: usize, | |
| + complete: bool, | |
| buf: Image<T>, | |
| buffer_splitter: &mut BufferSplitter, | |
| ) -> Result<()>; | |
| @@ -131,6 +143,9 @@ pub(crate) trait RenderPipeline: Sized { | |
| /// implementation to ensure rendering only happens once. | |
| fn render_outside_frame(&mut self, buffer_splitter: &mut BufferSplitter) -> Result<()>; | |
| + // Marks a group for being re-rendered later. | |
| + fn mark_group_to_rerender(&mut self, g: usize); | |
| + | |
| fn box_inout_stage<S: RenderPipelineInOutStage>( | |
| stage: S, | |
| ) -> Box<dyn RunInOutStage<Self::Buffer>>; | |
| @@ -138,4 +153,6 @@ pub(crate) trait RenderPipeline: Sized { | |
| fn box_inplace_stage<S: RenderPipelineInPlaceStage>( | |
| stage: S, | |
| ) -> Box<dyn RunInPlaceStage<Self::Buffer>>; | |
| + | |
| + fn used_channel_mask(&self) -> &[bool]; | |
| } | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/mod.rs | |
| index df09a8fd1c462..227003bdfe4f7 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/mod.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/mod.rs | |
| @@ -20,37 +20,28 @@ mod extend; | |
| mod run_stage; | |
| mod save; | |
| -/// A RenderPipeline that waits for all input of a pass to be ready before doing any rendering, and | |
| +/// A RenderPipeline that waits for all input to be ready before doing any rendering, and | |
| /// prioritizes simplicity over memory usage and computational efficiency. | |
| /// Eventually meant to be used only for verification purposes. | |
| pub struct SimpleRenderPipeline { | |
| shared: RenderPipelineShared<Image<f64>>, | |
| input_buffers: Vec<Image<f64>>, | |
| - completed_passes: usize, | |
| } | |
| impl SimpleRenderPipeline { | |
| #[instrument(skip_all, err)] | |
| fn do_render(&mut self, buffer_splitter: &mut BufferSplitter) -> Result<()> { | |
| - let ready_passes = self | |
| + let ready = self | |
| .shared | |
| - .group_chan_ready_passes | |
| + .group_chan_complete | |
| .iter() | |
| .flat_map(|x| x.iter()) | |
| - .copied() | |
| - .min() | |
| - .unwrap(); | |
| - if ready_passes <= self.completed_passes { | |
| - debug!( | |
| - "no more ready passes ({} completed, {ready_passes} ready)", | |
| - self.completed_passes | |
| - ); | |
| + .all(|x| *x); | |
| + if !ready { | |
| + debug!("not yet ready"); | |
| return Ok(()); | |
| } | |
| - debug!( | |
| - "new ready passes ({} completed, {ready_passes} ready)", | |
| - self.completed_passes | |
| - ); | |
| + debug!("ready to render"); | |
| let mut current_buffers = clone_images(&self.input_buffers)?; | |
| @@ -129,7 +120,6 @@ impl SimpleRenderPipeline { | |
| current_buffers = output_buffers; | |
| } | |
| - self.completed_passes = ready_passes; | |
| Ok(()) | |
| } | |
| } | |
| @@ -154,7 +144,6 @@ impl RenderPipeline for SimpleRenderPipeline { | |
| Ok(Self { | |
| shared, | |
| input_buffers, | |
| - completed_passes: 0, | |
| }) | |
| } | |
| @@ -168,7 +157,7 @@ impl RenderPipeline for SimpleRenderPipeline { | |
| &mut self, | |
| channel: usize, | |
| group_id: usize, | |
| - num_passes: usize, | |
| + complete: bool, | |
| buf: Image<T>, | |
| buffer_splitter: &mut BufferSplitter, | |
| ) -> Result<()> { | |
| @@ -178,22 +167,24 @@ impl RenderPipeline for SimpleRenderPipeline { | |
| channel, | |
| T::DATA_TYPE_ID, | |
| ); | |
| - let sz = self.shared.group_size_for_channel(channel, T::DATA_TYPE_ID); | |
| - let goffset = self.shared.group_offset(group_id); | |
| - let ChannelInfo { ty, downsample } = self.shared.channel_info[0][channel]; | |
| - let off = (goffset.0 >> downsample.0, goffset.1 >> downsample.1); | |
| - debug!(?sz, input_buffers_sz=?self.input_buffers[channel].size(), offset=?off, ?downsample, ?goffset); | |
| - let ty = ty.unwrap(); | |
| - assert_eq!(ty, T::DATA_TYPE_ID); | |
| - let total_sz = self.input_buffers[channel].size(); | |
| - for y in 0..sz.1.min(total_sz.1 - off.1) { | |
| - let row_in = buf.row(y); | |
| - let row_out = self.input_buffers[channel].row_mut(y + off.1); | |
| - for x in 0..sz.0.min(total_sz.0 - off.0) { | |
| - row_out[x + off.0] = row_in[x].to_f64(); | |
| + if self.shared.channel_is_used[channel] { | |
| + let sz = self.shared.group_size_for_channel(channel, T::DATA_TYPE_ID); | |
| + let goffset = self.shared.group_offset(group_id); | |
| + let ChannelInfo { ty, downsample } = self.shared.channel_info[0][channel]; | |
| + let off = (goffset.0 >> downsample.0, goffset.1 >> downsample.1); | |
| + debug!(?sz, input_buffers_sz=?self.input_buffers[channel].size(), offset=?off, ?downsample, ?goffset); | |
| + let ty = ty.unwrap(); | |
| + assert_eq!(ty, T::DATA_TYPE_ID); | |
| + let total_sz = self.input_buffers[channel].size(); | |
| + for y in 0..sz.1.min(total_sz.1 - off.1) { | |
| + let row_in = buf.row(y); | |
| + let row_out = self.input_buffers[channel].row_mut(y + off.1); | |
| + for x in 0..sz.0.min(total_sz.0 - off.0) { | |
| + row_out[x + off.0] = row_in[x].to_f64(); | |
| + } | |
| } | |
| + self.shared.group_chan_complete[group_id][channel] = complete; | |
| } | |
| - self.shared.group_chan_ready_passes[group_id][channel] += num_passes; | |
| self.do_render(buffer_splitter) | |
| } | |
| @@ -208,6 +199,8 @@ impl RenderPipeline for SimpleRenderPipeline { | |
| Ok(()) | |
| } | |
| + fn mark_group_to_rerender(&mut self, _g: usize) {} | |
| + | |
| fn box_inout_stage<S: RenderPipelineInOutStage>( | |
| stage: S, | |
| ) -> Box<dyn super::RunInOutStage<Self::Buffer>> { | |
| @@ -219,4 +212,8 @@ impl RenderPipeline for SimpleRenderPipeline { | |
| ) -> Box<dyn super::RunInPlaceStage<Self::Buffer>> { | |
| Box::new(stage) | |
| } | |
| + | |
| + fn used_channel_mask(&self) -> &[bool] { | |
| + &self.shared.channel_is_used | |
| + } | |
| } | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/run_stage.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/run_stage.rs | |
| index 24a0ee6a59065..bfaea994305a2 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/run_stage.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/simple_pipeline/run_stage.rs | |
| @@ -13,7 +13,7 @@ use crate::{ | |
| RenderPipelineInOutStage, RenderPipelineInPlaceStage, RunInOutStage, RunInPlaceStage, | |
| internal::PipelineBuffer, | |
| }, | |
| - util::{SmallVec, round_up_size_to_cache_line, tracing_wrappers::*}, | |
| + util::{SmallVec, mirror, round_up_size_to_cache_line, tracing_wrappers::*}, | |
| }; | |
| impl PipelineBuffer for Image<f64> { | |
| @@ -122,31 +122,20 @@ impl<T: RenderPipelineInOutStage> RunInOutStage<Image<f64>> for T { | |
| numc | |
| ]; | |
| - let mirror = |mut v: i64, size: i64| { | |
| - while v < 0 || v >= size { | |
| - if v < 0 { | |
| - v = -v - 1; | |
| - } | |
| - if v >= size { | |
| - v = size + (size - v) - 1; | |
| - } | |
| - } | |
| - v as usize | |
| - }; | |
| for y in 0..input_size.1 { | |
| for x in (0..input_size.0).step_by(chunk_size) { | |
| - let border_x = Self::BORDER.0 as i64; | |
| - let border_y = Self::BORDER.1 as i64; | |
| + let border_x = Self::BORDER.0 as isize; | |
| + let border_y = Self::BORDER.1 as isize; | |
| let xsize = input_size.0.min(x + chunk_size) - x; | |
| - let xs = xsize as i64; | |
| + let xs = xsize as isize; | |
| debug!("position: {x}x{y} xsize: {xsize}"); | |
| for c in 0..numc { | |
| for iy in -border_y..=border_y { | |
| - let imgy = mirror(y as i64 + iy, input_size.1 as i64); | |
| + let imgy = mirror(y as isize + iy, input_size.1); | |
| let in_row = input_buffers[c].row(imgy); | |
| let buf_in_row = &mut buffer_in[c][(iy + border_y) as usize]; | |
| for ix in (-border_x..0).chain(xs..xs + border_x) { | |
| - let imgx = mirror(x as i64 + ix, input_size.0 as i64); | |
| + let imgx = mirror(x as isize + ix, input_size.0); | |
| buf_in_row[(ix + border_x) as usize] = | |
| T::InputT::from_f64(in_row[imgx]); | |
| } | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/convert.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/convert.rs | |
| index 1ddd46f593538..b9a941106c39b 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/convert.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/convert.rs | |
| @@ -3,64 +3,29 @@ | |
| // Use of this source code is governed by a BSD-style | |
| // license that can be found in the LICENSE file. | |
| +use std::sync::Arc; | |
| + | |
| use crate::{ | |
| frame::quantizer::LfQuantFactors, | |
| headers::bit_depth::BitDepth, | |
| - render::{Channels, ChannelsMut, RenderPipelineInOutStage}, | |
| + render::{Channels, ChannelsMut, RenderPipelineInOutStage, StageSpecialCase}, | |
| + util::AtomicRefCell, | |
| }; | |
| -use jxl_simd::{F32SimdVec, I32SimdVec, simd_function}; | |
| - | |
| -pub struct ConvertU8F32Stage { | |
| - channel: usize, | |
| -} | |
| - | |
| -impl ConvertU8F32Stage { | |
| - pub fn new(channel: usize) -> ConvertU8F32Stage { | |
| - ConvertU8F32Stage { channel } | |
| - } | |
| -} | |
| - | |
| -impl std::fmt::Display for ConvertU8F32Stage { | |
| - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
| - write!(f, "convert U8 data to F32 in channel {}", self.channel) | |
| - } | |
| -} | |
| - | |
| -impl RenderPipelineInOutStage for ConvertU8F32Stage { | |
| - type InputT = u8; | |
| - type OutputT = f32; | |
| - const SHIFT: (u8, u8) = (0, 0); | |
| - const BORDER: (u8, u8) = (0, 0); | |
| - | |
| - fn uses_channel(&self, c: usize) -> bool { | |
| - c == self.channel | |
| - } | |
| - | |
| - fn process_row_chunk( | |
| - &self, | |
| - _position: (usize, usize), | |
| - xsize: usize, | |
| - input_rows: &Channels<u8>, | |
| - output_rows: &mut ChannelsMut<f32>, | |
| - _state: Option<&mut dyn std::any::Any>, | |
| - ) { | |
| - let input = &input_rows[0]; | |
| - for i in 0..xsize { | |
| - output_rows[0][0][i] = input[0][i] as f32 * (1.0 / 255.0); | |
| - } | |
| - } | |
| -} | |
| +use jxl_simd::{F32SimdVec, I32SimdVec, SimdMask, simd_function}; | |
| pub struct ConvertModularXYBToF32Stage { | |
| first_channel: usize, | |
| - scale: [f32; 3], | |
| + lf_quant: Arc<AtomicRefCell<LfQuantFactors>>, | |
| } | |
| impl ConvertModularXYBToF32Stage { | |
| - pub fn new(first_channel: usize, lf_quant: &LfQuantFactors) -> ConvertModularXYBToF32Stage { | |
| + pub fn new( | |
| + first_channel: usize, | |
| + lf_quant: Arc<AtomicRefCell<LfQuantFactors>>, | |
| + ) -> ConvertModularXYBToF32Stage { | |
| ConvertModularXYBToF32Stage { | |
| first_channel, | |
| - scale: lf_quant.quant_factors, | |
| + lf_quant, | |
| } | |
| } | |
| } | |
| @@ -69,10 +34,9 @@ impl std::fmt::Display for ConvertModularXYBToF32Stage { | |
| fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
| write!( | |
| f, | |
| - "convert modular xyb data to F32 in channels {}..{} with scales {:?}", | |
| + "convert modular xyb data to F32 in channels {}..{}", | |
| self.first_channel, | |
| self.first_channel + 2, | |
| - self.scale | |
| ) | |
| } | |
| } | |
| @@ -95,7 +59,8 @@ impl RenderPipelineInOutStage for ConvertModularXYBToF32Stage { | |
| output_rows: &mut ChannelsMut<f32>, | |
| _state: Option<&mut dyn std::any::Any>, | |
| ) { | |
| - let [scale_x, scale_y, scale_b] = self.scale; | |
| + let lf_quant = self.lf_quant.borrow(); | |
| + let [scale_x, scale_y, scale_b] = lf_quant.quant_factors; | |
| assert_eq!( | |
| input_rows.len(), | |
| 3, | |
| @@ -257,6 +222,27 @@ fn int_to_float_generic(input: &[i32], output: &mut [f32], bits: u32, exp_bits: | |
| } | |
| } | |
| +// SIMD modular to 32 bit float conversion | |
| +simd_function!( | |
| + modular_to_float_32bit_simd_dispatch, | |
| + d: D, | |
| + fn modular_to_float_32bit_simd(input: &[i32], output: &mut [f32], scale: f32, xsize: usize) { | |
| + let simd_width = D::I32Vec::LEN; | |
| + | |
| + let scale = D::F32Vec::splat(d, scale); | |
| + | |
| + // Process complete SIMD vectors | |
| + for (in_chunk, out_chunk) in input | |
| + .chunks_exact(simd_width) | |
| + .zip(output.chunks_exact_mut(simd_width)) | |
| + .take(xsize.div_ceil(simd_width)) | |
| + { | |
| + let val = D::I32Vec::load(d, in_chunk); | |
| + (val.as_f32() * scale).store(out_chunk); | |
| + } | |
| + } | |
| +); | |
| + | |
| impl RenderPipelineInOutStage for ConvertModularToF32Stage { | |
| type InputT = i32; | |
| type OutputT = f32; | |
| @@ -279,11 +265,19 @@ impl RenderPipelineInOutStage for ConvertModularToF32Stage { | |
| if self.bit_depth.floating_point_sample() { | |
| int_to_float(input[0], output_rows[0][0], &self.bit_depth, xsize); | |
| } else { | |
| - // TODO(veluca): SIMDfy this code. | |
| let scale = 1.0 / ((1u64 << self.bit_depth.bits_per_sample()) - 1) as f32; | |
| - for i in 0..xsize { | |
| - output_rows[0][0][i] = input[0][i] as f32 * scale; | |
| - } | |
| + modular_to_float_32bit_simd_dispatch(input[0], output_rows[0][0], scale, xsize); | |
| + } | |
| + } | |
| + | |
| + fn is_special_case(&self) -> Option<StageSpecialCase> { | |
| + if self.bit_depth.floating_point_sample() { | |
| + None | |
| + } else { | |
| + Some(StageSpecialCase::ModularToF32 { | |
| + channel: self.channel, | |
| + bit_depth: self.bit_depth.bits_per_sample() as u8, | |
| + }) | |
| } | |
| } | |
| } | |
| @@ -358,6 +352,89 @@ impl RenderPipelineInOutStage for ConvertF32ToU8Stage { | |
| let max = ((1u32 << self.bit_depth) - 1) as f32; | |
| f32_to_u8_simd_dispatch(input, output, max, xsize); | |
| } | |
| + | |
| + fn is_special_case(&self) -> Option<StageSpecialCase> { | |
| + Some(StageSpecialCase::F32ToU8 { | |
| + channel: self.channel, | |
| + bit_depth: self.bit_depth, | |
| + }) | |
| + } | |
| +} | |
| + | |
| +/// Stage that converts i32 values to u8 values, applying a multiplier. | |
| +pub struct ConvertI32ToU8Stage { | |
| + channel: usize, | |
| + multiplier: i32, | |
| + max: i32, | |
| +} | |
| + | |
| +impl ConvertI32ToU8Stage { | |
| + pub fn new(channel: usize, multiplier: i32, max: i32) -> ConvertI32ToU8Stage { | |
| + ConvertI32ToU8Stage { | |
| + channel, | |
| + multiplier, | |
| + max, | |
| + } | |
| + } | |
| +} | |
| + | |
| +impl std::fmt::Display for ConvertI32ToU8Stage { | |
| + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { | |
| + write!( | |
| + f, | |
| + "convert I32 to U8 in channel {} with multiplier {}", | |
| + self.channel, self.multiplier | |
| + ) | |
| + } | |
| +} | |
| + | |
| +// SIMD I32 to U8 conversion | |
| +simd_function!( | |
| + i32_to_u8_simd_dispatch, | |
| + d: D, | |
| + fn i32_to_u8_simd(input: &[i32], output: &mut [u8], scale: i32, max: i32, xsize: usize) { | |
| + let simd_width = D::F32Vec::LEN; | |
| + let scale = D::I32Vec::splat(d, scale); | |
| + let max = D::I32Vec::splat(d, max); | |
| + let zero = D::I32Vec::splat(d, 0); | |
| + | |
| + // Process SIMD vectors using div_ceil (buffers are padded) | |
| + for (input_chunk, output_chunk) in input | |
| + .chunks_exact(simd_width) | |
| + .zip(output.chunks_exact_mut(simd_width)) | |
| + .take(xsize.div_ceil(simd_width)) | |
| + { | |
| + let val = D::I32Vec::load(d, input_chunk); | |
| + let scaled = val * scale; | |
| + let zeroclip = scaled.lt_zero().if_then_else_i32(zero, scaled); | |
| + let clip = scaled.gt(max).if_then_else_i32(max, zeroclip); | |
| + clip.store_u8(output_chunk); | |
| + } | |
| + } | |
| +); | |
| + | |
| +impl RenderPipelineInOutStage for ConvertI32ToU8Stage { | |
| + type InputT = i32; | |
| + type OutputT = u8; | |
| + const SHIFT: (u8, u8) = (0, 0); | |
| + const BORDER: (u8, u8) = (0, 0); | |
| + | |
| + fn uses_channel(&self, c: usize) -> bool { | |
| + c == self.channel | |
| + } | |
| + | |
| + fn process_row_chunk( | |
| + &self, | |
| + _position: (usize, usize), | |
| + xsize: usize, | |
| + input_rows: &Channels<i32>, | |
| + output_rows: &mut ChannelsMut<u8>, | |
| + _state: Option<&mut dyn std::any::Any>, | |
| + ) { | |
| + let input = input_rows[0][0]; | |
| + let output = &mut output_rows[0][0]; | |
| + i32_to_u8_simd_dispatch(input, output, self.multiplier, self.max, xsize); | |
| + } | |
| } | |
| /// Stage that converts f32 values in [0, 1] range to u16 values. | |
| @@ -481,11 +558,6 @@ mod test { | |
| use crate::headers::bit_depth::BitDepth; | |
| use test_log::test; | |
| - #[test] | |
| - fn u8_consistency() -> Result<()> { | |
| - crate::render::test::test_stage_consistency(|| ConvertU8F32Stage::new(0), (500, 500), 1) | |
| - } | |
| - | |
| #[test] | |
| fn f32_to_u8_consistency() -> Result<()> { | |
| crate::render::test::test_stage_consistency( | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf0.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf0.rs | |
| index bbeff0ddab3c9..ca83a7b6623b8 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf0.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf0.rs | |
| @@ -3,6 +3,8 @@ | |
| // Use of this source code is governed by a BSD-style | |
| // license that can be found in the LICENSE file. | |
| +use std::sync::Arc; | |
| + | |
| use crate::{ | |
| BLOCK_DIM, MIN_SIGMA, | |
| features::epf::SigmaSource, | |
| @@ -10,6 +12,7 @@ use crate::{ | |
| Channels, ChannelsMut, RenderPipelineInOutStage, | |
| stages::epf::common::{get_sigma, prepare_sad_mul_storage}, | |
| }, | |
| + util::AtomicRefCell, | |
| }; | |
| use jxl_simd::{F32SimdVec, SimdMask, simd_function}; | |
| @@ -21,7 +24,7 @@ pub struct Epf0Stage { | |
| /// (inverse) multiplier for sigma on borders | |
| border_sad_mul: f32, | |
| channel_scale: [f32; 3], | |
| - sigma: SigmaSource, | |
| + sigma: Arc<AtomicRefCell<SigmaSource>>, | |
| } | |
| impl std::fmt::Display for Epf0Stage { | |
| @@ -39,7 +42,7 @@ impl Epf0Stage { | |
| sigma_scale: f32, | |
| border_sad_mul: f32, | |
| channel_scale: [f32; 3], | |
| - sigma: SigmaSource, | |
| + sigma: Arc<AtomicRefCell<SigmaSource>>, | |
| ) -> Self { | |
| Self { | |
| sigma, | |
| @@ -64,7 +67,8 @@ simd_function!( | |
| assert_eq!(input_rows.len(), 3); | |
| assert_eq!(output_rows.len(), 3); | |
| - let row_sigma = stage.sigma.row(ypos / BLOCK_DIM); | |
| + let sigma = stage.sigma.borrow(); | |
| + let row_sigma = sigma.row(ypos / BLOCK_DIM); | |
| const { assert!(D::F32Vec::LEN <= 16) }; | |
| @@ -76,7 +80,8 @@ simd_function!( | |
| let sigma = get_sigma(d, x + xpos, row_sigma); | |
| let sad_mul = D::F32Vec::load(d, &sad_mul_storage[x % 8..]); | |
| - if D::F32Vec::splat(d, MIN_SIGMA).gt(sigma).all() { | |
| + let sigma_mask = D::F32Vec::splat(d, MIN_SIGMA).gt(sigma); | |
| + if sigma_mask.all() { | |
| for (input_c, output_c) in input_rows.iter().zip(output_rows.iter_mut()) { | |
| D::F32Vec::load(d, &input_c[3][3 + x..]).store(&mut output_c[0][x..]); | |
| } | |
| @@ -204,7 +209,10 @@ simd_function!( | |
| ] { | |
| out = D::F32Vec::load(d, &input_c[row_idx][col_idx..]).mul_add(sads[sad_idx], out); | |
| } | |
| - (out * inv_w).store(&mut output_c[0][x..]); | |
| + out *= inv_w; | |
| + let p33 = D::F32Vec::load(d, &input_c[3][3 + x..]); | |
| + let out = sigma_mask.if_then_else_f32(p33, out); | |
| + out.store(&mut output_c[0][x..]); | |
| } | |
| } | |
| }); | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf1.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf1.rs | |
| index 53570b34c9b0b..5a1807050f60b 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf1.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf1.rs | |
| @@ -3,6 +3,8 @@ | |
| // Use of this source code is governed by a BSD-style | |
| // license that can be found in the LICENSE file. | |
| +use std::sync::Arc; | |
| + | |
| use crate::{ | |
| BLOCK_DIM, MIN_SIGMA, | |
| features::epf::SigmaSource, | |
| @@ -10,6 +12,7 @@ use crate::{ | |
| Channels, ChannelsMut, RenderPipelineInOutStage, | |
| stages::epf::common::{get_sigma, prepare_sad_mul_storage}, | |
| }, | |
| + util::AtomicRefCell, | |
| }; | |
| use jxl_simd::{F32SimdVec, SimdMask, simd_function}; | |
| @@ -21,7 +24,7 @@ pub struct Epf1Stage { | |
| /// (inverse) multiplier for sigma on borders | |
| border_sad_mul: f32, | |
| channel_scale: [f32; 3], | |
| - sigma: SigmaSource, | |
| + sigma: Arc<AtomicRefCell<SigmaSource>>, | |
| } | |
| impl std::fmt::Display for Epf1Stage { | |
| @@ -39,7 +42,7 @@ impl Epf1Stage { | |
| sigma_scale: f32, | |
| border_sad_mul: f32, | |
| channel_scale: [f32; 3], | |
| - sigma: SigmaSource, | |
| + sigma: Arc<AtomicRefCell<SigmaSource>>, | |
| ) -> Self { | |
| Self { | |
| sigma, | |
| @@ -64,7 +67,8 @@ fn epf1_process_row_chunk( | |
| assert_eq!(input_rows.len(), 3); | |
| assert_eq!(output_rows.len(), 3); | |
| - let row_sigma = stage.sigma.row(ypos / BLOCK_DIM); | |
| + let sigma = stage.sigma.borrow(); | |
| + let row_sigma = sigma.row(ypos / BLOCK_DIM); | |
| let sm = stage.sigma_scale * 1.65; | |
| let bsm = sm * stage.border_sad_mul; | |
| @@ -74,7 +78,8 @@ fn epf1_process_row_chunk( | |
| let sigma = get_sigma(d, x + xpos, row_sigma); | |
| let sad_mul = D::F32Vec::load(d, &sad_mul_storage[x % 8..]); | |
| - if D::F32Vec::splat(d, MIN_SIGMA).gt(sigma).all() { | |
| + let sigma_mask = D::F32Vec::splat(d, MIN_SIGMA).gt(sigma); | |
| + if sigma_mask.all() { | |
| for (input_c, output_c) in input_rows.iter().zip(output_rows.iter_mut()) { | |
| D::F32Vec::load(d, &input_c[2][2 + x..]).store(&mut output_c[0][x..]); | |
| } | |
| @@ -140,7 +145,10 @@ fn epf1_process_row_chunk( | |
| ] { | |
| out = D::F32Vec::load(d, &input_c[row_idx][col_idx..]).mul_add(sads[sad_idx], out); | |
| } | |
| - (out * inv_w).store(&mut output_c[0][x..]); | |
| + out *= inv_w; | |
| + let p22 = D::F32Vec::load(d, &input_c[2][2 + x..]); | |
| + let out = sigma_mask.if_then_else_f32(p22, out); | |
| + out.store(&mut output_c[0][x..]); | |
| } | |
| } | |
| }); | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf2.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf2.rs | |
| index 867b36b07c9f7..6c3cb920a62f5 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf2.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/epf2.rs | |
| @@ -3,6 +3,8 @@ | |
| // Use of this source code is governed by a BSD-style | |
| // license that can be found in the LICENSE file. | |
| +use std::sync::Arc; | |
| + | |
| use crate::{ | |
| BLOCK_DIM, MIN_SIGMA, | |
| features::epf::SigmaSource, | |
| @@ -10,6 +12,7 @@ use crate::{ | |
| Channels, ChannelsMut, RenderPipelineInOutStage, | |
| stages::epf::common::{get_sigma, prepare_sad_mul_storage}, | |
| }, | |
| + util::AtomicRefCell, | |
| }; | |
| use jxl_simd::{F32SimdVec, SimdMask, simd_function}; | |
| @@ -21,7 +24,7 @@ pub struct Epf2Stage { | |
| /// (inverse) multiplier for sigma on borders | |
| border_sad_mul: f32, | |
| channel_scale: [f32; 3], | |
| - sigma: SigmaSource, | |
| + sigma: Arc<AtomicRefCell<SigmaSource>>, | |
| } | |
| impl std::fmt::Display for Epf2Stage { | |
| @@ -39,7 +42,7 @@ impl Epf2Stage { | |
| sigma_scale: f32, | |
| border_sad_mul: f32, | |
| channel_scale: [f32; 3], | |
| - sigma: SigmaSource, | |
| + sigma: Arc<AtomicRefCell<SigmaSource>>, | |
| ) -> Self { | |
| Self { | |
| sigma, | |
| @@ -65,7 +68,8 @@ fn epf2_process_row_chunk( | |
| let (input_x, input_y, input_b) = (&input_rows[0], &input_rows[1], &input_rows[2]); | |
| let (output_x, output_y, output_b) = output_rows.split_first_3_mut(); | |
| - let row_sigma = stage.sigma.row(ypos / BLOCK_DIM); | |
| + let sigma = stage.sigma.borrow(); | |
| + let row_sigma = sigma.row(ypos / BLOCK_DIM); | |
| const { assert!(D::F32Vec::LEN <= 16) }; | |
| @@ -77,7 +81,8 @@ fn epf2_process_row_chunk( | |
| let sigma = get_sigma(d, x + xpos, row_sigma); | |
| let sad_mul = D::F32Vec::load(d, &sad_mul_storage[x % 8..]); | |
| - if D::F32Vec::splat(d, MIN_SIGMA).gt(sigma).all() { | |
| + let sigma_mask = D::F32Vec::splat(d, MIN_SIGMA).gt(sigma); | |
| + if sigma_mask.all() { | |
| D::F32Vec::load(d, &input_x[1][1 + x..]).store(&mut output_x[0][x..]); | |
| D::F32Vec::load(d, &input_y[1][1 + x..]).store(&mut output_y[0][x..]); | |
| D::F32Vec::load(d, &input_b[1][1 + x..]).store(&mut output_b[0][x..]); | |
| @@ -119,9 +124,15 @@ fn epf2_process_row_chunk( | |
| let inv_w = D::F32Vec::splat(d, 1.0) / w_acc; | |
| - (x_acc * inv_w).store(&mut output_x[0][x..]); | |
| - (y_acc * inv_w).store(&mut output_y[0][x..]); | |
| - (b_acc * inv_w).store(&mut output_b[0][x..]); | |
| + x_acc *= inv_w; | |
| + y_acc *= inv_w; | |
| + b_acc *= inv_w; | |
| + x_acc = sigma_mask.if_then_else_f32(D::F32Vec::load(d, &input_x[1][1+x..]), x_acc); | |
| + y_acc = sigma_mask.if_then_else_f32(D::F32Vec::load(d, &input_y[1][1+x..]), y_acc); | |
| + b_acc = sigma_mask.if_then_else_f32(D::F32Vec::load(d, &input_b[1][1+x..]), b_acc); | |
| + x_acc.store(&mut output_x[0][x..]); | |
| + y_acc.store(&mut output_y[0][x..]); | |
| + b_acc.store(&mut output_b[0][x..]); | |
| } | |
| }); | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/test.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/test.rs | |
| index ae3ada234812c..90cccf901f064 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/test.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/epf/test.rs | |
| @@ -9,12 +9,13 @@ use rand::SeedableRng; | |
| use test_log::test; | |
| use super::*; | |
| -use crate::{error::Result, features::epf::SigmaSource, image::Image}; | |
| +use crate::{error::Result, features::epf::SigmaSource, image::Image, util::AtomicRefCell}; | |
| #[test] | |
| fn epf0_consistency() -> Result<()> { | |
| let mut rng = rand_xorshift::XorShiftRng::seed_from_u64(0); | |
| let sigma = SigmaSource::Variable(Arc::new(Image::new_random((128, 128), &mut rng).unwrap())); | |
| + let sigma = Arc::new(AtomicRefCell::new(sigma)); | |
| crate::render::test::test_stage_consistency( | |
| || Epf0Stage::new(0.9, 2.3 / 3.0, [40.0, 5.0, 3.5], sigma.clone()), | |
| (512, 512), | |
| @@ -26,6 +27,7 @@ fn epf0_consistency() -> Result<()> { | |
| fn epf1_consistency() -> Result<()> { | |
| let mut rng = rand_xorshift::XorShiftRng::seed_from_u64(0); | |
| let sigma = SigmaSource::Variable(Arc::new(Image::new_random((128, 128), &mut rng).unwrap())); | |
| + let sigma = Arc::new(AtomicRefCell::new(sigma)); | |
| crate::render::test::test_stage_consistency( | |
| || Epf1Stage::new(1.0, 2.3 / 3.0, [40.0, 5.0, 3.5], sigma.clone()), | |
| (512, 512), | |
| @@ -37,6 +39,7 @@ fn epf1_consistency() -> Result<()> { | |
| fn epf2_consistency() -> Result<()> { | |
| let mut rng = rand_xorshift::XorShiftRng::seed_from_u64(0); | |
| let sigma = SigmaSource::Variable(Arc::new(Image::new_random((128, 128), &mut rng).unwrap())); | |
| + let sigma = Arc::new(AtomicRefCell::new(sigma)); | |
| crate::render::test::test_stage_consistency( | |
| || Epf2Stage::new(6.5, 2.3 / 3.0, [40.0, 5.0, 3.5], sigma.clone()), | |
| (512, 512), | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/mod.rs | |
| index 3002776c9e2c9..a3fc4bb043027 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/mod.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/mod.rs | |
| @@ -17,7 +17,7 @@ mod premultiply_alpha; | |
| mod splines; | |
| mod spot; | |
| mod to_linear; | |
| -mod upsample; | |
| +pub mod upsample; | |
| mod xyb; | |
| mod ycbcr; | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/noise.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/noise.rs | |
| index 88dc6395f0421..fd717f345b5fe 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/noise.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/noise.rs | |
| @@ -5,10 +5,13 @@ | |
| #![allow(clippy::needless_range_loop)] | |
| +use std::{any::Any, sync::Arc}; | |
| + | |
| use crate::{ | |
| features::noise::Noise, | |
| frame::color_correlation_map::ColorCorrelationParams, | |
| render::{Channels, ChannelsMut, RenderPipelineInOutStage, RenderPipelineInPlaceStage}, | |
| + util::AtomicRefCell, | |
| }; | |
| use jxl_simd::{F32SimdVec, simd_function}; | |
| @@ -103,16 +106,16 @@ impl RenderPipelineInOutStage for ConvolveNoiseStage { | |
| } | |
| pub struct AddNoiseStage { | |
| - noise: Noise, | |
| + noise: Arc<AtomicRefCell<Noise>>, | |
| first_channel: usize, | |
| - color_correlation: ColorCorrelationParams, | |
| + color_correlation: Arc<AtomicRefCell<ColorCorrelationParams>>, | |
| } | |
| impl AddNoiseStage { | |
| #[allow(dead_code)] | |
| pub fn new( | |
| - noise: Noise, | |
| - color_correlation: ColorCorrelationParams, | |
| + noise: Arc<AtomicRefCell<Noise>>, | |
| + color_correlation: Arc<AtomicRefCell<ColorCorrelationParams>>, | |
| first_channel: usize, | |
| ) -> AddNoiseStage { | |
| assert!(first_channel > 2); | |
| @@ -148,11 +151,16 @@ impl RenderPipelineInPlaceStage for AddNoiseStage { | |
| _position: (usize, usize), | |
| xsize: usize, | |
| row: &mut [&mut [f32]], | |
| - _state: Option<&mut dyn std::any::Any>, | |
| + _state: Option<&mut dyn Any>, | |
| ) { | |
| + let noise = self.noise.borrow(); | |
| + if noise.lut == [0.0; 8] { | |
| + return; | |
| + } | |
| + let color_correlation = self.color_correlation.borrow(); | |
| let norm_const = 0.22; | |
| - let ytox = self.color_correlation.y_to_x_lf(); | |
| - let ytob = self.color_correlation.y_to_b_lf(); | |
| + let ytox = color_correlation.y_to_x_lf(); | |
| + let ytob = color_correlation.y_to_b_lf(); | |
| for x in 0..xsize { | |
| let row_rnd_r = row[3][x]; | |
| let row_rnd_g = row[4][x]; | |
| @@ -161,8 +169,8 @@ impl RenderPipelineInPlaceStage for AddNoiseStage { | |
| let vy = row[1][x]; | |
| let in_g = vy - vx; | |
| let in_r = vy + vx; | |
| - let noise_strength_g = self.noise.strength(in_g * 0.5); | |
| - let noise_strength_r = self.noise.strength(in_r * 0.5); | |
| + let noise_strength_g = noise.strength(in_g * 0.5); | |
| + let noise_strength_r = noise.strength(in_r * 0.5); | |
| let addit_rnd_noise_red = row_rnd_r * norm_const; | |
| let addit_rnd_noise_green = row_rnd_g * norm_const; | |
| let addit_rnd_noise_correlated = row_rnd_c * norm_const; | |
| @@ -182,6 +190,8 @@ impl RenderPipelineInPlaceStage for AddNoiseStage { | |
| #[cfg(test)] | |
| mod test { | |
| + use std::sync::Arc; | |
| + | |
| use crate::{ | |
| error::Result, | |
| features::noise::Noise, | |
| @@ -191,7 +201,7 @@ mod test { | |
| stages::noise::{AddNoiseStage, ConvolveNoiseStage}, | |
| test::make_and_run_simple_pipeline, | |
| }, | |
| - util::test::assert_almost_abs_eq, | |
| + util::{AtomicRefCell, test::assert_almost_abs_eq}, | |
| }; | |
| use test_log::test; | |
| @@ -228,10 +238,10 @@ mod test { | |
| let input_c4: Image<f32> = Image::new_range((xsize, ysize), 0.1, 0.1)?; | |
| let input_c5: Image<f32> = Image::new_range((xsize, ysize), 0.1, 0.1)?; | |
| let stage = AddNoiseStage::new( | |
| - Noise { | |
| + Arc::new(AtomicRefCell::new(Noise { | |
| lut: [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0], | |
| - }, | |
| - ColorCorrelationParams::default(), | |
| + })), | |
| + Arc::new(AtomicRefCell::new(ColorCorrelationParams::default())), | |
| 3, | |
| ); | |
| let output = make_and_run_simple_pipeline( | |
| @@ -325,10 +335,10 @@ mod test { | |
| crate::render::test::test_stage_consistency( | |
| || { | |
| AddNoiseStage::new( | |
| - Noise { | |
| + Arc::new(AtomicRefCell::new(Noise { | |
| lut: [0.0, 2.0, 1.0, 0.0, 1.0, 3.0, 1.1, 2.3], | |
| - }, | |
| - ColorCorrelationParams::default(), | |
| + })), | |
| + Arc::new(AtomicRefCell::new(ColorCorrelationParams::default())), | |
| 3, | |
| ) | |
| }, | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/patches.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/patches.rs | |
| index 225302a3955ee..38ac4e3d34085 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/patches.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/patches.rs | |
| @@ -6,15 +6,31 @@ | |
| use std::{any::Any, sync::Arc}; | |
| use crate::{ | |
| - features::patches::PatchesDictionary, frame::ReferenceFrame, | |
| - headers::extra_channels::ExtraChannelInfo, render::RenderPipelineInPlaceStage, | |
| - util::NewWithCapacity as _, | |
| + features::patches::PatchesDictionary, | |
| + frame::ReferenceFrame, | |
| + headers::extra_channels::ExtraChannelInfo, | |
| + render::RenderPipelineInPlaceStage, | |
| + util::{AtomicRefCell, NewWithCapacity as _}, | |
| }; | |
| pub struct PatchesStage { | |
| - pub patches: Arc<PatchesDictionary>, | |
| - pub extra_channels: Vec<ExtraChannelInfo>, | |
| - pub decoder_state: Arc<[Option<ReferenceFrame>; 4]>, | |
| + patches: Arc<AtomicRefCell<PatchesDictionary>>, | |
| + extra_channels: Vec<ExtraChannelInfo>, | |
| + decoder_state: Arc<[Option<ReferenceFrame>; 4]>, | |
| +} | |
| + | |
| +impl PatchesStage { | |
| + pub fn new( | |
| + patches: Arc<AtomicRefCell<PatchesDictionary>>, | |
| + extra_channels: Vec<ExtraChannelInfo>, | |
| + decoder_state: Arc<[Option<ReferenceFrame>; 4]>, | |
| + ) -> Self { | |
| + Self { | |
| + patches, | |
| + extra_channels, | |
| + decoder_state, | |
| + } | |
| + } | |
| } | |
| impl std::fmt::Display for PatchesStage { | |
| @@ -37,8 +53,15 @@ impl RenderPipelineInPlaceStage for PatchesStage { | |
| row: &mut [&mut [f32]], | |
| state: Option<&mut dyn Any>, | |
| ) { | |
| + let patches = self.patches.borrow(); | |
| + if patches.positions.is_empty() { | |
| + return; | |
| + } | |
| let state: &mut Vec<usize> = state.unwrap().downcast_mut().unwrap(); | |
| - self.patches.add_one_row( | |
| + if state.capacity() < patches.positions.len() { | |
| + state.reserve(patches.positions.len() - state.len()); | |
| + } | |
| + patches.add_one_row( | |
| row, | |
| position, | |
| xsize, | |
| @@ -49,7 +72,10 @@ impl RenderPipelineInPlaceStage for PatchesStage { | |
| } | |
| fn init_local_state(&self, _thread_index: usize) -> crate::error::Result<Option<Box<dyn Any>>> { | |
| - let patches_for_row_result = Vec::<usize>::new_with_capacity(self.patches.positions.len())?; | |
| + // TODO(veluca): I think this is wrong, check that. | |
| + let patches = self.patches.borrow(); | |
| + let len = patches.positions.len(); | |
| + let patches_for_row_result = Vec::<usize>::new_with_capacity(len)?; | |
| Ok(Some(Box::new(patches_for_row_result) as Box<dyn Any>)) | |
| } | |
| } | |
| @@ -70,13 +96,13 @@ mod test { | |
| let (file_header, _, _) = | |
| read_headers_and_toc(include_bytes!("../../../resources/test/basic.jxl")).unwrap(); | |
| let mut rng = rand_xorshift::XorShiftRng::seed_from_u64(0); | |
| - let patch_dict = Arc::new(PatchesDictionary::random( | |
| + let patch_dict = PatchesDictionary::random( | |
| (500, 500), | |
| file_header.image_metadata.extra_channel_info.len(), | |
| 0, | |
| 4, | |
| &mut rng, | |
| - )); | |
| + ); | |
| let reference_frames = Arc::new([ | |
| Some(ReferenceFrame::random(&mut rng, 500, 500, 4, false)?), | |
| Some(ReferenceFrame::random(&mut rng, 500, 500, 4, false)?), | |
| @@ -85,7 +111,7 @@ mod test { | |
| ]); | |
| crate::render::test::test_stage_consistency( | |
| || PatchesStage { | |
| - patches: patch_dict.clone(), | |
| + patches: Arc::new(AtomicRefCell::new(patch_dict.clone())), | |
| extra_channels: file_header.image_metadata.extra_channel_info.clone(), | |
| decoder_state: reference_frames.clone(), | |
| }, | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/splines.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/splines.rs | |
| index b2ebd88461e85..295c33c663ad2 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/splines.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/stages/splines.rs | |
| @@ -3,29 +3,33 @@ | |
| // Use of this source code is governed by a BSD-style | |
| // license that can be found in the LICENSE file. | |
| +use std::{any::Any, sync::Arc}; | |
| + | |
| use crate::{ | |
| - error::Result, features::spline::Splines, frame::color_correlation_map::ColorCorrelationParams, | |
| - render::RenderPipelineInPlaceStage, | |
| + features::spline::Splines, frame::color_correlation_map::ColorCorrelationParams, | |
| + render::RenderPipelineInPlaceStage, util::AtomicRefCell, | |
| }; | |
| pub struct SplinesStage { | |
| - splines: Splines, | |
| + splines: Arc<AtomicRefCell<Splines>>, | |
| + image_size: (usize, usize), | |
| + color_correlation_params: Arc<AtomicRefCell<ColorCorrelationParams>>, | |
| + high_precision: bool, | |
| } | |
| impl SplinesStage { | |
| pub fn new( | |
| - mut splines: Splines, | |
| - frame_size: (usize, usize), | |
| - color_correlation_params: &ColorCorrelationParams, | |
| + splines: Arc<AtomicRefCell<Splines>>, | |
| + image_size: (usize, usize), | |
| + color_correlation_params: Arc<AtomicRefCell<ColorCorrelationParams>>, | |
| high_precision: bool, | |
| - ) -> Result<Self> { | |
| - splines.initialize_draw_cache( | |
| - frame_size.0 as u64, | |
| - frame_size.1 as u64, | |
| + ) -> Self { | |
| + SplinesStage { | |
| + splines, | |
| + image_size, | |
| color_correlation_params, | |
| high_precision, | |
| - )?; | |
| - Ok(SplinesStage { splines }) | |
| + } | |
| } | |
| } | |
| @@ -47,17 +51,36 @@ impl RenderPipelineInPlaceStage for SplinesStage { | |
| position: (usize, usize), | |
| xsize: usize, | |
| row: &mut [&mut [f32]], | |
| - _state: Option<&mut dyn std::any::Any>, | |
| + _state: Option<&mut dyn Any>, | |
| ) { | |
| - self.splines.draw_segments(row, position, xsize); | |
| + // TODO(veluca): this is wrong!! Race condition in MT. | |
| + let mut splines = self.splines.borrow_mut(); | |
| + if splines.splines.is_empty() { | |
| + return; | |
| + } | |
| + if !splines.is_initialized() { | |
| + let color_correlation_params = self.color_correlation_params.borrow(); | |
| + splines | |
| + .initialize_draw_cache( | |
| + self.image_size.0 as u64, | |
| + self.image_size.1 as u64, | |
| + &color_correlation_params, | |
| + self.high_precision, | |
| + ) | |
| + .unwrap(); | |
| + } | |
| + splines.draw_segments(row, position, xsize); | |
| } | |
| } | |
| #[cfg(test)] | |
| mod test { | |
| + use std::sync::Arc; | |
| + | |
| use crate::features::spline::{Point, QuantizedSpline, Splines}; | |
| use crate::frame::color_correlation_map::ColorCorrelationParams; | |
| use crate::render::test::make_and_run_simple_pipeline; | |
| + use crate::util::AtomicRefCell; | |
| use crate::util::test::{self, assert_all_almost_abs_eq, read_pfm}; | |
| use crate::{error::Result, image::Image, render::stages::splines::SplinesStage}; | |
| use test_log::test; | |
| @@ -104,12 +127,11 @@ mod test { | |
| ); | |
| let output: Vec<Image<f32>> = make_and_run_simple_pipeline( | |
| SplinesStage::new( | |
| - splines.clone(), | |
| + Arc::new(AtomicRefCell::new(splines.clone())), | |
| size, | |
| - &ColorCorrelationParams::default(), | |
| + Arc::new(AtomicRefCell::new(ColorCorrelationParams::default())), | |
| true, | |
| - ) | |
| - .unwrap(), | |
| + ), | |
| &target_images, | |
| size, | |
| 0, | |
| @@ -123,6 +145,7 @@ mod test { | |
| Ok(()) | |
| } | |
| + #[ignore = "spline rendering is not fully consistent due to sqrt precision differences"] | |
| #[test] | |
| fn splines_consistency() -> Result<()> { | |
| let splines = Splines::create( | |
| @@ -160,12 +183,11 @@ mod test { | |
| crate::render::test::test_stage_consistency( | |
| || { | |
| SplinesStage::new( | |
| - splines.clone(), | |
| + Arc::new(AtomicRefCell::new(splines.clone())), | |
| (500, 500), | |
| - &ColorCorrelationParams::default(), | |
| + Arc::new(AtomicRefCell::new(ColorCorrelationParams::default())), | |
| false, | |
| ) | |
| - .unwrap() | |
| }, | |
| (500, 500), | |
| 6, | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/test.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/test.rs | |
| index 4e1f80a5a3f90..77797bc4d2b23 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/test.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/test.rs | |
| @@ -103,10 +103,9 @@ fn make_and_run_simple_pipeline_impl<InputT: ImageDataType, OutputT: ImageDataTy | |
| image_size, | |
| downsampling_shift, | |
| LOG_GROUP_SIZE, | |
| - 1, | |
| chunk_size, | |
| ) | |
| - .add_stage_internal(stage)?; | |
| + .add_stage_internal(stage); | |
| let jxl_data_type = match OutputT::DATA_TYPE_ID { | |
| DataTypeTag::U8 | DataTypeTag::I8 => JxlDataFormat::U8 { bit_depth: 8 }, | |
| @@ -129,7 +128,7 @@ fn make_and_run_simple_pipeline_impl<InputT: ImageDataType, OutputT: ImageDataTy | |
| JxlColorType::Grayscale, | |
| jxl_data_type, | |
| false, | |
| - )?; | |
| + ); | |
| } | |
| let mut pipeline = pipeline.build()?; | |
| @@ -168,7 +167,7 @@ fn make_and_run_simple_pipeline_impl<InputT: ImageDataType, OutputT: ImageDataTy | |
| pipeline.set_buffer_for_group( | |
| c, | |
| g, | |
| - 1, | |
| + true, | |
| extract_group_rect(&input_images[c], g, log_group_size)?, | |
| &mut buffer_splitter, | |
| )?; | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/fast_math.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/fast_math.rs | |
| index 74cb6784bf85c..6fa51988ba393 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/fast_math.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/fast_math.rs | |
| @@ -58,6 +58,22 @@ pub fn fast_erff(x: f32) -> f32 { | |
| result.copysign(x) | |
| } | |
| +#[inline(always)] | |
| +pub fn fast_erff_simd<D: SimdDescriptor>(d: D, x: D::F32Vec) -> D::F32Vec { | |
| + let absx = x.abs(); | |
| + let denom1 = absx.mul_add( | |
| + D::F32Vec::splat(d, 7.77394369e-02), | |
| + D::F32Vec::splat(d, 2.05260015e-04), | |
| + ); | |
| + let denom2 = denom1.mul_add(absx, D::F32Vec::splat(d, 2.32120216e-01)); | |
| + let denom3 = denom2.mul_add(absx, D::F32Vec::splat(d, 2.77820801e-01)); | |
| + let denom4 = denom3.mul_add(absx, D::F32Vec::splat(d, 1.0)); | |
| + let denom5 = denom4 * denom4; | |
| + let inv_denom5 = D::F32Vec::splat(d, 1.0) / denom5; | |
| + let result = D::F32Vec::splat(d, 1.0) - inv_denom5 * inv_denom5; | |
| + result.copysign(x) | |
| +} | |
| + | |
| #[inline] | |
| pub fn fast_pow2f(x: f32) -> f32 { | |
| let x_floor = x.floor(); | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mirror.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mirror.rs | |
| new file mode 100644 | |
| index 0000000000000..4d7e6c2fcd8e5 | |
| --- /dev/null | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mirror.rs | |
| @@ -0,0 +1,18 @@ | |
| +// Copyright (c) the JPEG XL Project Authors. All rights reserved. | |
| +// | |
| +// Use of this source code is governed by a BSD-style | |
| +// license that can be found in the LICENSE file. | |
| + | |
| +/// Mirror-reflects a value v to fit in a [0; s) range. | |
| +pub fn mirror(mut v: isize, s: usize) -> usize { | |
| + // TODO(veluca): consider speeding this up if needed. | |
| + loop { | |
| + if v < 0 { | |
| + v = -v - 1; | |
| + } else if v >= s as isize { | |
| + v = s as isize * 2 - v - 1; | |
| + } else { | |
| + return v as usize; | |
| + } | |
| + } | |
| +} | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mod.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mod.rs | |
| index ac8db74e3a558..514820bcd24ef 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mod.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mod.rs | |
| @@ -14,6 +14,7 @@ mod fast_math; | |
| mod float16; | |
| mod linalg; | |
| mod log2; | |
| +mod mirror; | |
| pub mod ndarray; | |
| mod rational_poly; | |
| mod shift_right_ceil; | |
| @@ -30,6 +31,7 @@ pub use fast_math::*; | |
| pub use float16::f16; | |
| pub use linalg::*; | |
| pub use log2::*; | |
| +pub use mirror::*; | |
| pub(crate) use ndarray::*; | |
| pub use rational_poly::*; | |
| pub use shift_right_ceil::*; | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/test.rs b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/test.rs | |
| index a9b3ca24d4cd6..8e47c7bb1ce68 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/test.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/test.rs | |
| @@ -222,6 +222,39 @@ pub fn check_equal_images<T: ImageDataType>(a: &Image<T>, b: &Image<T>) { | |
| } | |
| } | |
| +/// Encode a u64 value as a LEB128 varint. Useful for building test data for | |
| +/// frame index boxes and other container structures. | |
| +pub fn encode_varint(mut value: u64) -> Vec<u8> { | |
| + let mut result = Vec::new(); | |
| + loop { | |
| + let mut byte = (value & 0x7f) as u8; | |
| + value >>= 7; | |
| + if value > 0 { | |
| + byte |= 0x80; | |
| + } | |
| + result.push(byte); | |
| + if value == 0 { | |
| + break; | |
| + } | |
| + } | |
| + result | |
| +} | |
| + | |
| +/// Build raw jxli frame index box content bytes from tnum, tden, and | |
| +/// delta-coded entries `(OFF_delta, T, F)`. | |
| +pub fn build_frame_index_content(tnum: u32, tden: u32, entries: &[(u64, u64, u64)]) -> Vec<u8> { | |
| + let mut buf = Vec::new(); | |
| + buf.extend(encode_varint(entries.len() as u64)); | |
| + buf.extend(tnum.to_be_bytes()); | |
| + buf.extend(tden.to_be_bytes()); | |
| + for &(off, t, f) in entries { | |
| + buf.extend(encode_varint(off)); | |
| + buf.extend(encode_varint(t)); | |
| + buf.extend(encode_varint(f)); | |
| + } | |
| + buf | |
| +} | |
| + | |
| pub fn read_headers_and_toc(image: &[u8]) -> Result<(FileHeader, FrameHeader, Toc), JXLError> { | |
| let codestream = ContainerParser::collect_codestream(image).unwrap(); | |
| let mut br = BitReader::new(&codestream); | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/aarch64/neon.rs b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/aarch64/neon.rs | |
| index c0d6499398b2c..5dbd975587f40 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/aarch64/neon.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/aarch64/neon.rs | |
| @@ -14,7 +14,7 @@ use std::{ | |
| use crate::U32SimdVec; | |
| -use super::super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask}; | |
| +use super::super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask, U8SimdVec, U16SimdVec}; | |
| // Safety invariant: this type is only ever constructed if neon is available. | |
| #[derive(Clone, Copy, Debug)] | |
| @@ -41,6 +41,10 @@ impl SimdDescriptor for NeonDescriptor { | |
| type U32Vec = U32VecNeon; | |
| + type U16Vec = U16VecNeon; | |
| + | |
| + type U8Vec = U8VecNeon; | |
| + | |
| type Mask = MaskNeon; | |
| type Bf16Table8 = Bf16Table8Neon; | |
| @@ -122,7 +126,7 @@ unsafe impl F32SimdVec for F32VecNeon { | |
| fn load(d: Self::Descriptor, mem: &[f32]) -> Self { | |
| assert!(mem.len() >= Self::LEN); | |
| // SAFETY: we just checked that `mem` has enough space. Moreover, we know neon is available | |
| - // from the safety invariant on `d`. | |
| + // from the safety invariant on `d`. vld1q_f32 supports unaligned loads. | |
| Self(unsafe { vld1q_f32(mem.as_ptr()) }, d) | |
| } | |
| @@ -130,7 +134,7 @@ unsafe impl F32SimdVec for F32VecNeon { | |
| fn store(&self, mem: &mut [f32]) { | |
| assert!(mem.len() >= Self::LEN); | |
| // SAFETY: we just checked that `mem` has enough space. Moreover, we know neon is available | |
| - // from the safety invariant on `d`. | |
| + // from the safety invariant on `d`. vst1q_f32 supports unaligned stores. | |
| unsafe { vst1q_f32(mem.as_mut_ptr(), self.0) } | |
| } | |
| @@ -138,9 +142,9 @@ unsafe impl F32SimdVec for F32VecNeon { | |
| fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<f32>]) { | |
| assert!(dest.len() >= 2 * Self::LEN); | |
| // SAFETY: we just checked that `dest` has enough space, and neon is available | |
| - // from the safety invariant on the descriptor stored in `a`. | |
| + // from the safety invariant on the descriptor stored in `a`. vst2q_f32 supports unaligned stores. | |
| unsafe { | |
| - let dest_ptr = dest.as_mut_ptr() as *mut f32; | |
| + let dest_ptr = dest.as_mut_ptr().cast::<f32>(); | |
| vst2q_f32(dest_ptr, float32x4x2_t(a.0, b.0)); | |
| } | |
| } | |
| @@ -148,9 +152,9 @@ unsafe impl F32SimdVec for F32VecNeon { | |
| #[inline(always)] | |
| fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<f32>]) { | |
| assert!(dest.len() >= 3 * Self::LEN); | |
| - // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. vst3q_f32 supports unaligned stores. | |
| unsafe { | |
| - let dest_ptr = dest.as_mut_ptr() as *mut f32; | |
| + let dest_ptr = dest.as_mut_ptr().cast::<f32>(); | |
| vst3q_f32(dest_ptr, float32x4x3_t(a.0, b.0, c.0)); | |
| } | |
| } | |
| @@ -165,9 +169,9 @@ unsafe impl F32SimdVec for F32VecNeon { | |
| ) { | |
| assert!(dest.len() >= 4 * Self::LEN); | |
| // SAFETY: we just checked that `dest` has enough space, and neon is available | |
| - // from the safety invariant on the descriptor stored in `a`. | |
| + // from the safety invariant on the descriptor stored in `a`. vst4q_f32 supports unaligned stores. | |
| unsafe { | |
| - let dest_ptr = dest.as_mut_ptr() as *mut f32; | |
| + let dest_ptr = dest.as_mut_ptr().cast::<f32>(); | |
| vst4q_f32(dest_ptr, float32x4x4_t(a.0, b.0, c.0, d.0)); | |
| } | |
| } | |
| @@ -277,7 +281,7 @@ unsafe impl F32SimdVec for F32VecNeon { | |
| fn load_deinterleaved_2(d: Self::Descriptor, src: &[f32]) -> (Self, Self) { | |
| assert!(src.len() >= 2 * Self::LEN); | |
| // SAFETY: we just checked that `src` has enough space, and neon is available | |
| - // from the safety invariant on `d`. | |
| + // from the safety invariant on `d`. vld2q_f32 supports unaligned loads. | |
| let float32x4x2_t(a, b) = unsafe { vld2q_f32(src.as_ptr()) }; | |
| (Self(a, d), Self(b, d)) | |
| } | |
| @@ -286,7 +290,7 @@ unsafe impl F32SimdVec for F32VecNeon { | |
| fn load_deinterleaved_3(d: Self::Descriptor, src: &[f32]) -> (Self, Self, Self) { | |
| assert!(src.len() >= 3 * Self::LEN); | |
| // SAFETY: we just checked that `src` has enough space, and neon is available | |
| - // from the safety invariant on `d`. | |
| + // from the safety invariant on `d`. vld3q_f32 supports unaligned loads. | |
| let float32x4x3_t(a, b, c) = unsafe { vld3q_f32(src.as_ptr()) }; | |
| (Self(a, d), Self(b, d), Self(c, d)) | |
| } | |
| @@ -295,7 +299,7 @@ unsafe impl F32SimdVec for F32VecNeon { | |
| fn load_deinterleaved_4(d: Self::Descriptor, src: &[f32]) -> (Self, Self, Self, Self) { | |
| assert!(src.len() >= 4 * Self::LEN); | |
| // SAFETY: we just checked that `src` has enough space, and neon is available | |
| - // from the safety invariant on `d`. | |
| + // from the safety invariant on `d`. vld4q_f32 supports unaligned loads. | |
| let float32x4x4_t(a, b, c, e) = unsafe { vld4q_f32(src.as_ptr()) }; | |
| (Self(a, d), Self(b, d), Self(c, d), Self(e, d)) | |
| } | |
| @@ -337,7 +341,7 @@ unsafe impl F32SimdVec for F32VecNeon { | |
| assert!(data.len() > 3); | |
| // Transposed load | |
| - // SAFETY: input is verified to be large enough for this pointer. | |
| + // SAFETY: input is verified to be large enough for this pointer. vld4q_f32 supports unaligned loads. | |
| let float32x4x4_t(p0, p1, p2, p3) = unsafe { vld4q_f32(data.as_ptr().cast()) }; | |
| F32VecNeon(p0, d).store_array(&mut data[0]); | |
| @@ -422,9 +426,9 @@ unsafe impl F32SimdVec for F32VecNeon { | |
| let u16s = vqmovun_s32(i32s); | |
| let u8s = vqmovn_u16(vcombine_u16(u16s, u16s)); | |
| // Store lower 4 bytes | |
| - // SAFETY: we checked dest has enough space | |
| + // SAFETY: we checked dest has enough space. vst1_lane_u32 supports unaligned stores. | |
| unsafe { | |
| - vst1_lane_u32::<0>(dest.as_mut_ptr() as *mut u32, vreinterpret_u32_u8(u8s)); | |
| + vst1_lane_u32::<0>(dest.as_mut_ptr().cast(), vreinterpret_u32_u8(u8s)); | |
| } | |
| } | |
| @@ -436,7 +440,7 @@ unsafe impl F32SimdVec for F32VecNeon { | |
| let i32s = vcvtq_s32_f32(rounded); | |
| let u16s = vqmovun_s32(i32s); | |
| // Store 4 u16s (8 bytes) | |
| - // SAFETY: we checked dest has enough space | |
| + // SAFETY: we checked dest has enough space. vst1_u16 supports unaligned stores. | |
| unsafe { | |
| vst1_u16(dest.as_mut_ptr(), u16s); | |
| } | |
| @@ -447,7 +451,8 @@ unsafe impl F32SimdVec for F32VecNeon { | |
| // Use inline asm because Rust stdarch incorrectly requires fp16 target feature | |
| // for vcvt_f16_f32 (fixed in https://github.com/rust-lang/stdarch/pull/1978) | |
| let f16_bits: uint16x4_t; | |
| - // SAFETY: NEON is available (guaranteed by descriptor), dest has enough space | |
| + // SAFETY: NEON is available (guaranteed by descriptor), dest has enough space, | |
| + // vst1_u16 supports unaligned stores. | |
| unsafe { | |
| std::arch::asm!( | |
| "fcvtn {out:v}.4h, {inp:v}.4s", | |
| @@ -466,7 +471,8 @@ unsafe impl F32SimdVec for F32VecNeon { | |
| // Use inline asm because Rust stdarch incorrectly requires fp16 target feature | |
| // for vcvt_f32_f16 (fixed in https://github.com/rust-lang/stdarch/pull/1978) | |
| let result: float32x4_t; | |
| - // SAFETY: NEON is available (guaranteed by descriptor), mem has enough space | |
| + // SAFETY: NEON is available (guaranteed by descriptor), mem has enough space. | |
| + // vld1_u16 supports unaligned loads. | |
| unsafe { | |
| let f16_bits = vld1_u16(mem.as_ptr()); | |
| std::arch::asm!( | |
| @@ -487,7 +493,7 @@ unsafe impl F32SimdVec for F32VecNeon { | |
| // Convert f32 table to BF16 packed in 128 bits (16 bytes for 8 entries) | |
| // BF16 is the high 16 bits of f32 | |
| // SAFETY: neon is available from target_feature, and `table` is large | |
| - // enough for the loads. | |
| + // enough for the loads. vld1q_f32 supports unaligned loads. | |
| let (table_lo, table_hi) = | |
| unsafe { (vld1q_f32(table.as_ptr()), vld1q_f32(table.as_ptr().add(4))) }; | |
| @@ -695,13 +701,28 @@ impl I32SimdVec for I32VecNeon { | |
| fn store_u16(self, dest: &mut [u16]) { | |
| assert!(dest.len() >= Self::LEN); | |
| // SAFETY: We know neon is available from the safety invariant on `self.1`, | |
| - // and we just checked that `dest` has enough space. | |
| + // and we just checked that `dest` has enough space. vst1_u16 supports unaligned | |
| + // stores. | |
| unsafe { | |
| // vmovn narrows i32 to i16 by taking the lower 16 bits | |
| let narrowed = vmovn_s32(self.0); | |
| vst1_u16(dest.as_mut_ptr(), vreinterpret_u16_s16(narrowed)); | |
| } | |
| } | |
| + | |
| + #[inline(always)] | |
| + fn store_u8(self, dest: &mut [u8]) { | |
| + assert!(dest.len() >= Self::LEN); | |
| + // SAFETY: We know neon is available from the safety invariant on `self.1`, | |
| + // and we just checked that `dest` has enough space. vst1_lane_u32 supports unaligned stores. | |
| + unsafe { | |
| + // vmovn narrows i32 -> i16 -> i8 | |
| + let narrowed_i16 = vmovn_s32(self.0); | |
| + let combined_i16 = vcombine_s16(narrowed_i16, narrowed_i16); | |
| + let narrowed_i8 = vmovn_s16(combined_i16); | |
| + vst1_lane_u32::<0>(dest.as_mut_ptr().cast(), vreinterpret_u32_s8(narrowed_i8)); | |
| + } | |
| + } | |
| } | |
| impl Add<I32VecNeon> for I32VecNeon { | |
| @@ -837,6 +858,150 @@ impl U32SimdVec for U32VecNeon { | |
| } | |
| } | |
| +#[derive(Clone, Copy, Debug)] | |
| +#[repr(transparent)] | |
| +pub struct U8VecNeon(uint8x16_t, NeonDescriptor); | |
| + | |
| +// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*) | |
| +// ensure that they write valid data to the output slice without reading uninitialized memory. | |
| +unsafe impl U8SimdVec for U8VecNeon { | |
| + type Descriptor = NeonDescriptor; | |
| + const LEN: usize = 16; | |
| + | |
| + #[inline(always)] | |
| + fn load(d: Self::Descriptor, mem: &[u8]) -> Self { | |
| + assert!(mem.len() >= Self::LEN); | |
| + // SAFETY: we just checked that `mem` has enough space. Moreover, we know neon is available | |
| + // from the safety invariant on `d`. vld1q_u8 supports unaligned loads. | |
| + Self(unsafe { vld1q_u8(mem.as_ptr()) }, d) | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn splat(d: Self::Descriptor, v: u8) -> Self { | |
| + // SAFETY: We know neon is available from the safety invariant on `d`. | |
| + Self(unsafe { vdupq_n_u8(v) }, d) | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store(&self, mem: &mut [u8]) { | |
| + assert!(mem.len() >= Self::LEN); | |
| + // SAFETY: we just checked that `mem` has enough space. Moreover, we know neon is available | |
| + // from the safety invariant on `d`. vst1q_u8 supports unaligned stores. | |
| + unsafe { vst1q_u8(mem.as_mut_ptr(), self.0) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u8>]) { | |
| + assert!(dest.len() >= 2 * Self::LEN); | |
| + // SAFETY: we just checked that `dest` has enough space, and neon is available | |
| + // from the safety invariant on the descriptor stored in `a`. vst2q_u8 supports unaligned stores. | |
| + unsafe { | |
| + let dest_ptr = dest.as_mut_ptr().cast::<u8>(); | |
| + vst2q_u8(dest_ptr, uint8x16x2_t(a.0, b.0)); | |
| + } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u8>]) { | |
| + assert!(dest.len() >= 3 * Self::LEN); | |
| + // SAFETY: we just checked that `dest` has enough space, and neon is available | |
| + // from the safety invariant on the descriptor stored in `a`. vst3q_u8 supports unaligned stores. | |
| + unsafe { | |
| + let dest_ptr = dest.as_mut_ptr().cast::<u8>(); | |
| + vst3q_u8(dest_ptr, uint8x16x3_t(a.0, b.0, c.0)); | |
| + } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_4_uninit( | |
| + a: Self, | |
| + b: Self, | |
| + c: Self, | |
| + d: Self, | |
| + dest: &mut [MaybeUninit<u8>], | |
| + ) { | |
| + assert!(dest.len() >= 4 * Self::LEN); | |
| + // SAFETY: we just checked that `dest` has enough space, and neon is available | |
| + // from the safety invariant on the descriptor stored in `a`. vst4q_u8 supports unaligned stores. | |
| + unsafe { | |
| + let dest_ptr = dest.as_mut_ptr().cast::<u8>(); | |
| + vst4q_u8(dest_ptr, uint8x16x4_t(a.0, b.0, c.0, d.0)); | |
| + } | |
| + } | |
| +} | |
| + | |
| +#[derive(Clone, Copy, Debug)] | |
| +#[repr(transparent)] | |
| +pub struct U16VecNeon(uint16x8_t, NeonDescriptor); | |
| + | |
| +// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*) | |
| +// ensure that they write valid data to the output slice without reading uninitialized memory. | |
| +unsafe impl U16SimdVec for U16VecNeon { | |
| + type Descriptor = NeonDescriptor; | |
| + const LEN: usize = 8; | |
| + | |
| + #[inline(always)] | |
| + fn load(d: Self::Descriptor, mem: &[u16]) -> Self { | |
| + assert!(mem.len() >= Self::LEN); | |
| + // SAFETY: we just checked that `mem` has enough space. Moreover, we know neon is available | |
| + // from the safety invariant on `d`. vld1q_u16 supports unaligned loads. | |
| + Self(unsafe { vld1q_u16(mem.as_ptr().cast()) }, d) | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn splat(d: Self::Descriptor, v: u16) -> Self { | |
| + // SAFETY: We know neon is available from the safety invariant on `d`. | |
| + Self(unsafe { vdupq_n_u16(v) }, d) | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store(&self, mem: &mut [u16]) { | |
| + assert!(mem.len() >= Self::LEN); | |
| + // SAFETY: we just checked that `mem` has enough space. Moreover, we know neon is available | |
| + // from the safety invariant on `d`. vst1q_u16 supports unaligned stores. | |
| + unsafe { vst1q_u16(mem.as_mut_ptr().cast(), self.0) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u16>]) { | |
| + assert!(dest.len() >= 2 * Self::LEN); | |
| + // SAFETY: we just checked that `dest` has enough space, and neon is available | |
| + // from the safety invariant on the descriptor stored in `a`. vst2q_u16 supports unaligned stores. | |
| + unsafe { | |
| + let dest_ptr = dest.as_mut_ptr().cast::<u16>(); | |
| + vst2q_u16(dest_ptr, uint16x8x2_t(a.0, b.0)); | |
| + } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u16>]) { | |
| + assert!(dest.len() >= 3 * Self::LEN); | |
| + // SAFETY: we just checked that `dest` has enough space, and neon is available | |
| + // from the safety invariant on the descriptor stored in `a`. vst3q_u16 supports unaligned stores. | |
| + unsafe { | |
| + let dest_ptr = dest.as_mut_ptr().cast::<u16>(); | |
| + vst3q_u16(dest_ptr, uint16x8x3_t(a.0, b.0, c.0)); | |
| + } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_4_uninit( | |
| + a: Self, | |
| + b: Self, | |
| + c: Self, | |
| + d: Self, | |
| + dest: &mut [MaybeUninit<u16>], | |
| + ) { | |
| + assert!(dest.len() >= 4 * Self::LEN); | |
| + // SAFETY: we just checked that `dest` has enough space, and neon is available | |
| + // from the safety invariant on the descriptor stored in `a`. vst4q_u16 supports unaligned stores. | |
| + unsafe { | |
| + let dest_ptr = dest.as_mut_ptr().cast::<u16>(); | |
| + vst4q_u16(dest_ptr, uint16x8x4_t(a.0, b.0, c.0, d.0)); | |
| + } | |
| + } | |
| +} | |
| + | |
| #[derive(Clone, Copy, Debug)] | |
| #[repr(transparent)] | |
| pub struct MaskNeon(uint32x4_t, NeonDescriptor); | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/lib.rs b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/lib.rs | |
| index 4f06dbddcd7b8..0129db2a1fa96 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/lib.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/lib.rs | |
| @@ -44,6 +44,10 @@ pub trait SimdDescriptor: Sized + Copy + Debug + Send + Sync { | |
| type U32Vec: U32SimdVec<Descriptor = Self>; | |
| + type U16Vec: U16SimdVec<Descriptor = Self>; | |
| + | |
| + type U8Vec: U8SimdVec<Descriptor = Self>; | |
| + | |
| type Mask: SimdMask<Descriptor = Self>; | |
| /// Prepared 8-entry BF16 lookup table for fast approximate lookups. | |
| @@ -124,7 +128,7 @@ pub unsafe trait F32SimdVec: | |
| // SAFETY: f32 and MaybeUninit<f32> have the same layout. | |
| // We are writing to initialized memory, so treating it as uninit for writing is fine. | |
| let dest = unsafe { | |
| - std::slice::from_raw_parts_mut(dest.as_mut_ptr() as *mut MaybeUninit<f32>, dest.len()) | |
| + std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<f32>>(), dest.len()) | |
| }; | |
| Self::store_interleaved_2_uninit(a, b, dest); | |
| } | |
| @@ -136,7 +140,7 @@ pub unsafe trait F32SimdVec: | |
| // SAFETY: f32 and MaybeUninit<f32> have the same layout. | |
| // We are writing to initialized memory, so treating it as uninit for writing is fine. | |
| let dest = unsafe { | |
| - std::slice::from_raw_parts_mut(dest.as_mut_ptr() as *mut MaybeUninit<f32>, dest.len()) | |
| + std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<f32>>(), dest.len()) | |
| }; | |
| Self::store_interleaved_3_uninit(a, b, c, dest); | |
| } | |
| @@ -148,7 +152,7 @@ pub unsafe trait F32SimdVec: | |
| // SAFETY: f32 and MaybeUninit<f32> have the same layout. | |
| // We are writing to initialized memory, so treating it as uninit for writing is fine. | |
| let dest = unsafe { | |
| - std::slice::from_raw_parts_mut(dest.as_mut_ptr() as *mut MaybeUninit<f32>, dest.len()) | |
| + std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<f32>>(), dest.len()) | |
| }; | |
| Self::store_interleaved_4_uninit(a, b, c, d, dest); | |
| } | |
| @@ -344,6 +348,10 @@ pub trait I32SimdVec: | |
| /// Stores the lower 16 bits of each i32 lane as u16 values. | |
| /// Requires `dest.len() >= Self::LEN` or it will panic. | |
| fn store_u16(self, dest: &mut [u16]); | |
| + | |
| + /// Stores the lower 8 bits of each i32 lane as u8 values. | |
| + /// Requires `dest.len() >= Self::LEN` or it will panic. | |
| + fn store_u8(self, dest: &mut [u8]); | |
| } | |
| pub trait U32SimdVec: Sized + Copy + Debug + Send + Sync { | |
| @@ -357,6 +365,150 @@ pub trait U32SimdVec: Sized + Copy + Debug + Send + Sync { | |
| fn shr<const AMOUNT_U: u32, const AMOUNT_I: i32>(self) -> Self; | |
| } | |
| +/// # Safety | |
| +/// | |
| +/// Implementors are required to respect the safety promises of the methods in this trait. | |
| +/// Specifically, this applies to the store_*_uninit methods. | |
| +pub unsafe trait U8SimdVec: Sized + Copy + Debug + Send + Sync { | |
| + type Descriptor: SimdDescriptor; | |
| + | |
| + const LEN: usize; | |
| + | |
| + fn load(d: Self::Descriptor, mem: &[u8]) -> Self; | |
| + fn splat(d: Self::Descriptor, v: u8) -> Self; | |
| + fn store(&self, mem: &mut [u8]); | |
| + | |
| + /// Stores two vectors interleaved: [a0, b0, a1, b1, a2, b2, ...]. | |
| + /// Requires `dest.len() >= 2 * Self::LEN` or it will panic. | |
| + #[inline(always)] | |
| + fn store_interleaved_2(a: Self, b: Self, dest: &mut [u8]) { | |
| + // SAFETY: u8 and MaybeUninit<u8> have the same layout. | |
| + // We are writing to initialized memory, so treating it as uninit for writing is fine. | |
| + let dest = unsafe { | |
| + std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<u8>>(), dest.len()) | |
| + }; | |
| + Self::store_interleaved_2_uninit(a, b, dest); | |
| + } | |
| + | |
| + /// Stores three vectors interleaved: [a0, b0, c0, a1, b1, c1, ...]. | |
| + /// Requires `dest.len() >= 3 * Self::LEN` or it will panic. | |
| + #[inline(always)] | |
| + fn store_interleaved_3(a: Self, b: Self, c: Self, dest: &mut [u8]) { | |
| + // SAFETY: u8 and MaybeUninit<u8> have the same layout. | |
| + // We are writing to initialized memory, so treating it as uninit for writing is fine. | |
| + let dest = unsafe { | |
| + std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<u8>>(), dest.len()) | |
| + }; | |
| + Self::store_interleaved_3_uninit(a, b, c, dest); | |
| + } | |
| + | |
| + /// Stores four vectors interleaved: [a0, b0, c0, d0, a1, b1, c1, d1, ...]. | |
| + /// Requires `dest.len() >= 4 * Self::LEN` or it will panic. | |
| + #[inline(always)] | |
| + fn store_interleaved_4(a: Self, b: Self, c: Self, d: Self, dest: &mut [u8]) { | |
| + // SAFETY: u8 and MaybeUninit<u8> have the same layout. | |
| + // We are writing to initialized memory, so treating it as uninit for writing is fine. | |
| + let dest = unsafe { | |
| + std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<u8>>(), dest.len()) | |
| + }; | |
| + Self::store_interleaved_4_uninit(a, b, c, d, dest); | |
| + } | |
| + | |
| + /// Stores two vectors interleaved: [a0, b0, a1, b1, a2, b2, ...]. | |
| + /// Requires `dest.len() >= 2 * Self::LEN` or it will panic. | |
| + /// | |
| + /// Safety note: | |
| + /// Does not write uninitialized data into `dest`. | |
| + fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u8>]); | |
| + | |
| + /// Stores three vectors interleaved: [a0, b0, c0, a1, b1, c1, ...]. | |
| + /// Requires `dest.len() >= 3 * Self::LEN` or it will panic. | |
| + /// Safety note: | |
| + /// Does not write uninitialized data into `dest`. | |
| + fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u8>]); | |
| + | |
| + /// Stores four vectors interleaved: [a0, b0, c0, d0, a1, b1, c1, d1, ...]. | |
| + /// Requires `dest.len() >= 4 * Self::LEN` or it will panic. | |
| + /// Safety note: | |
| + /// Does not write uninitialized data into `dest`. | |
| + fn store_interleaved_4_uninit(a: Self, b: Self, c: Self, d: Self, dest: &mut [MaybeUninit<u8>]); | |
| +} | |
| + | |
| +/// # Safety | |
| +/// | |
| +/// Implementors are required to respect the safety promises of the methods in this trait. | |
| +/// Specifically, this applies to the store_*_uninit methods. | |
| +pub unsafe trait U16SimdVec: Sized + Copy + Debug + Send + Sync { | |
| + type Descriptor: SimdDescriptor; | |
| + | |
| + const LEN: usize; | |
| + | |
| + fn load(d: Self::Descriptor, mem: &[u16]) -> Self; | |
| + fn splat(d: Self::Descriptor, v: u16) -> Self; | |
| + fn store(&self, mem: &mut [u16]); | |
| + | |
| + /// Stores two vectors interleaved: [a0, b0, a1, b1, a2, b2, ...]. | |
| + /// Requires `dest.len() >= 2 * Self::LEN` or it will panic. | |
| + #[inline(always)] | |
| + fn store_interleaved_2(a: Self, b: Self, dest: &mut [u16]) { | |
| + // SAFETY: u16 and MaybeUninit<u16> have the same layout. | |
| + // We are writing to initialized memory, so treating it as uninit for writing is fine. | |
| + let dest = unsafe { | |
| + std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<u16>>(), dest.len()) | |
| + }; | |
| + Self::store_interleaved_2_uninit(a, b, dest); | |
| + } | |
| + | |
| + /// Stores three vectors interleaved: [a0, b0, c0, a1, b1, c1, ...]. | |
| + /// Requires `dest.len() >= 3 * Self::LEN` or it will panic. | |
| + #[inline(always)] | |
| + fn store_interleaved_3(a: Self, b: Self, c: Self, dest: &mut [u16]) { | |
| + // SAFETY: u16 and MaybeUninit<u16> have the same layout. | |
| + // We are writing to initialized memory, so treating it as uninit for writing is fine. | |
| + let dest = unsafe { | |
| + std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<u16>>(), dest.len()) | |
| + }; | |
| + Self::store_interleaved_3_uninit(a, b, c, dest); | |
| + } | |
| + | |
| + /// Stores four vectors interleaved: [a0, b0, c0, d0, a1, b1, c1, d1, ...]. | |
| + /// Requires `dest.len() >= 4 * Self::LEN` or it will panic. | |
| + #[inline(always)] | |
| + fn store_interleaved_4(a: Self, b: Self, c: Self, d: Self, dest: &mut [u16]) { | |
| + // SAFETY: u16 and MaybeUninit<u16> have the same layout. | |
| + // We are writing to initialized memory, so treating it as uninit for writing is fine. | |
| + let dest = unsafe { | |
| + std::slice::from_raw_parts_mut(dest.as_mut_ptr().cast::<MaybeUninit<u16>>(), dest.len()) | |
| + }; | |
| + Self::store_interleaved_4_uninit(a, b, c, d, dest); | |
| + } | |
| + | |
| + /// Stores two vectors interleaved: [a0, b0, a1, b1, a2, b2, ...]. | |
| + /// Requires `dest.len() >= 2 * Self::LEN` or it will panic. | |
| + /// | |
| + /// Safety note: | |
| + /// Does not write uninitialized data into `dest`. | |
| + fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u16>]); | |
| + | |
| + /// Stores three vectors interleaved: [a0, b0, c0, a1, b1, c1, ...]. | |
| + /// Requires `dest.len() >= 3 * Self::LEN` or it will panic. | |
| + /// Safety note: | |
| + /// Does not write uninitialized data into `dest`. | |
| + fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u16>]); | |
| + | |
| + /// Stores four vectors interleaved: [a0, b0, c0, d0, a1, b1, c1, d1, ...]. | |
| + /// Requires `dest.len() >= 4 * Self::LEN` or it will panic. | |
| + /// Safety note: | |
| + /// Does not write uninitialized data into `dest`. | |
| + fn store_interleaved_4_uninit( | |
| + a: Self, | |
| + b: Self, | |
| + c: Self, | |
| + d: Self, | |
| + dest: &mut [MaybeUninit<u16>], | |
| + ); | |
| +} | |
| + | |
| #[macro_export] | |
| macro_rules! shl { | |
| ($val: expr, $amount: literal) => { | |
| @@ -436,7 +588,8 @@ mod test { | |
| use arbtest::arbitrary::Unstructured; | |
| use crate::{ | |
| - F32SimdVec, I32SimdVec, ScalarDescriptor, SimdDescriptor, test_all_instruction_sets, | |
| + F32SimdVec, I32SimdVec, ScalarDescriptor, SimdDescriptor, U8SimdVec, U16SimdVec, | |
| + test_all_instruction_sets, | |
| }; | |
| enum Distribution { | |
| @@ -1215,4 +1368,160 @@ mod test { | |
| } | |
| } | |
| test_all_instruction_sets!(test_store_u16); | |
| + | |
| + fn test_store_interleaved_2_u8<D: SimdDescriptor>(d: D) { | |
| + let len = D::U8Vec::LEN; | |
| + let a: Vec<u8> = (0..len).map(|i| i as u8).collect(); | |
| + let b: Vec<u8> = (0..len).map(|i| (i + 100) as u8).collect(); | |
| + let mut output = vec![0u8; 2 * len]; | |
| + | |
| + let a_vec = D::U8Vec::load(d, &a); | |
| + let b_vec = D::U8Vec::load(d, &b); | |
| + D::U8Vec::store_interleaved_2(a_vec, b_vec, &mut output); | |
| + | |
| + for i in 0..len { | |
| + assert_eq!(output[2 * i], a[i]); | |
| + assert_eq!(output[2 * i + 1], b[i]); | |
| + } | |
| + } | |
| + test_all_instruction_sets!(test_store_interleaved_2_u8); | |
| + | |
| + fn test_store_interleaved_3_u8<D: SimdDescriptor>(d: D) { | |
| + let len = D::U8Vec::LEN; | |
| + let a: Vec<u8> = (0..len).map(|i| i as u8).collect(); | |
| + let b: Vec<u8> = (0..len).map(|i| (i + 100) as u8).collect(); | |
| + let c: Vec<u8> = (0..len).map(|i| (i + 50) as u8).collect(); | |
| + let mut output = vec![0u8; 3 * len]; | |
| + | |
| + let a_vec = D::U8Vec::load(d, &a); | |
| + let b_vec = D::U8Vec::load(d, &b); | |
| + let c_vec = D::U8Vec::load(d, &c); | |
| + D::U8Vec::store_interleaved_3(a_vec, b_vec, c_vec, &mut output); | |
| + | |
| + for i in 0..len { | |
| + assert_eq!(output[3 * i], a[i]); | |
| + assert_eq!(output[3 * i + 1], b[i]); | |
| + assert_eq!(output[3 * i + 2], c[i]); | |
| + } | |
| + } | |
| + test_all_instruction_sets!(test_store_interleaved_3_u8); | |
| + | |
| + fn test_store_interleaved_4_u8<D: SimdDescriptor>(d: D) { | |
| + let len = D::U8Vec::LEN; | |
| + let a: Vec<u8> = (0..len).map(|i| i as u8).collect(); | |
| + let b: Vec<u8> = (0..len).map(|i| (i + 100) as u8).collect(); | |
| + let c: Vec<u8> = (0..len).map(|i| (i + 50) as u8).collect(); | |
| + let e: Vec<u8> = (0..len).map(|i| (i + 200) as u8).collect(); | |
| + let mut output = vec![0u8; 4 * len]; | |
| + | |
| + let a_vec = D::U8Vec::load(d, &a); | |
| + let b_vec = D::U8Vec::load(d, &b); | |
| + let c_vec = D::U8Vec::load(d, &c); | |
| + let d_vec = D::U8Vec::load(d, &e); | |
| + D::U8Vec::store_interleaved_4(a_vec, b_vec, c_vec, d_vec, &mut output); | |
| + | |
| + for i in 0..len { | |
| + assert_eq!(output[4 * i], a[i]); | |
| + assert_eq!(output[4 * i + 1], b[i]); | |
| + assert_eq!(output[4 * i + 2], c[i]); | |
| + assert_eq!(output[4 * i + 3], e[i]); | |
| + } | |
| + } | |
| + test_all_instruction_sets!(test_store_interleaved_4_u8); | |
| + | |
| + fn test_store_interleaved_2_u16<D: SimdDescriptor>(d: D) { | |
| + let len = D::U16Vec::LEN; | |
| + let a: Vec<u16> = (0..len).map(|i| i as u16).collect(); | |
| + let b: Vec<u16> = (0..len).map(|i| (i + 1000) as u16).collect(); | |
| + let mut output = vec![0u16; 2 * len]; | |
| + | |
| + let a_vec = D::U16Vec::load(d, &a); | |
| + let b_vec = D::U16Vec::load(d, &b); | |
| + D::U16Vec::store_interleaved_2(a_vec, b_vec, &mut output); | |
| + | |
| + for i in 0..len { | |
| + assert_eq!(output[2 * i], a[i]); | |
| + assert_eq!(output[2 * i + 1], b[i]); | |
| + } | |
| + } | |
| + test_all_instruction_sets!(test_store_interleaved_2_u16); | |
| + | |
| + fn test_store_interleaved_3_u16<D: SimdDescriptor>(d: D) { | |
| + let len = D::U16Vec::LEN; | |
| + let a: Vec<u16> = (0..len).map(|i| i as u16).collect(); | |
| + let b: Vec<u16> = (0..len).map(|i| (i + 1000) as u16).collect(); | |
| + let c: Vec<u16> = (0..len).map(|i| (i + 2000) as u16).collect(); | |
| + let mut output = vec![0u16; 3 * len]; | |
| + | |
| + let a_vec = D::U16Vec::load(d, &a); | |
| + let b_vec = D::U16Vec::load(d, &b); | |
| + let c_vec = D::U16Vec::load(d, &c); | |
| + D::U16Vec::store_interleaved_3(a_vec, b_vec, c_vec, &mut output); | |
| + | |
| + for i in 0..len { | |
| + assert_eq!(output[3 * i], a[i]); | |
| + assert_eq!(output[3 * i + 1], b[i]); | |
| + assert_eq!(output[3 * i + 2], c[i]); | |
| + } | |
| + } | |
| + test_all_instruction_sets!(test_store_interleaved_3_u16); | |
| + | |
| + fn test_store_interleaved_4_u16<D: SimdDescriptor>(d: D) { | |
| + let len = D::U16Vec::LEN; | |
| + let a: Vec<u16> = (0..len).map(|i| i as u16).collect(); | |
| + let b: Vec<u16> = (0..len).map(|i| (i + 1000) as u16).collect(); | |
| + let c: Vec<u16> = (0..len).map(|i| (i + 2000) as u16).collect(); | |
| + let e: Vec<u16> = (0..len).map(|i| (i + 3000) as u16).collect(); | |
| + let mut output = vec![0u16; 4 * len]; | |
| + | |
| + let a_vec = D::U16Vec::load(d, &a); | |
| + let b_vec = D::U16Vec::load(d, &b); | |
| + let c_vec = D::U16Vec::load(d, &c); | |
| + let d_vec = D::U16Vec::load(d, &e); | |
| + D::U16Vec::store_interleaved_4(a_vec, b_vec, c_vec, d_vec, &mut output); | |
| + | |
| + for i in 0..len { | |
| + assert_eq!(output[4 * i], a[i]); | |
| + assert_eq!(output[4 * i + 1], b[i]); | |
| + assert_eq!(output[4 * i + 2], c[i]); | |
| + assert_eq!(output[4 * i + 3], e[i]); | |
| + } | |
| + } | |
| + test_all_instruction_sets!(test_store_interleaved_4_u16); | |
| + | |
| + fn test_store_u8<D: SimdDescriptor>(d: D) { | |
| + let data = [ | |
| + 0xba_i32, | |
| + 0x12345678_i32, | |
| + 0xdeadbabeu32 as i32, | |
| + 0x76543210_i32, | |
| + 0x11111111_i32, | |
| + 0x00000000_i32, | |
| + 0xffffffffu32 as i32, | |
| + 0x12345678_i32, | |
| + 0x87654321u32 as i32, | |
| + 0xabcdef01u32 as i32, | |
| + 0x10203040_i32, | |
| + 0x50607080_i32, | |
| + 0x01020304_i32, | |
| + 0x05060708_i32, | |
| + 0x090a0b0c_i32, | |
| + 0x0d0e0f00_i32, | |
| + ]; | |
| + let mut output = [0u8; 16]; | |
| + for i in (0..16).step_by(D::I32Vec::LEN) { | |
| + let vec = D::I32Vec::load(d, &data[i..]); | |
| + vec.store_u8(&mut output[i..]); | |
| + } | |
| + | |
| + for i in 0..16 { | |
| + let expected = (data[i] & 0xff) as u8; | |
| + assert_eq!( | |
| + output[i], expected, | |
| + "store_u8 failed at index {}: expected 0x{:02x}, got 0x{:02x}", | |
| + i, expected, output[i] | |
| + ); | |
| + } | |
| + } | |
| + test_all_instruction_sets!(test_store_u8); | |
| } | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/scalar.rs b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/scalar.rs | |
| index f0444c34bf4f8..a423db2f1d56b 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/scalar.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/scalar.rs | |
| @@ -8,7 +8,7 @@ use std::num::Wrapping; | |
| use crate::{U32SimdVec, f16, impl_f32_array_interface}; | |
| -use super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask}; | |
| +use super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask, U8SimdVec, U16SimdVec}; | |
| #[derive(Clone, Copy, Debug)] | |
| pub struct ScalarDescriptor; | |
| @@ -17,6 +17,8 @@ impl SimdDescriptor for ScalarDescriptor { | |
| type F32Vec = f32; | |
| type I32Vec = Wrapping<i32>; | |
| type U32Vec = Wrapping<u32>; | |
| + type U8Vec = u8; | |
| + type U16Vec = u16; | |
| type Mask = bool; | |
| type Bf16Table8 = [f32; 8]; | |
| @@ -310,6 +312,11 @@ impl I32SimdVec for Wrapping<i32> { | |
| fn store_u16(self, dest: &mut [u16]) { | |
| dest[0] = self.0 as u16; | |
| } | |
| + | |
| + #[inline(always)] | |
| + fn store_u8(self, dest: &mut [u8]) { | |
| + dest[0] = self.0 as u8; | |
| + } | |
| } | |
| impl U32SimdVec for Wrapping<u32> { | |
| @@ -328,6 +335,104 @@ impl U32SimdVec for Wrapping<u32> { | |
| } | |
| } | |
| +// SAFETY: This implementation only write initialized data in the | |
| +// `&mut [MaybeUninit<u8>]` arguments to *_uninit methods. | |
| +unsafe impl U8SimdVec for u8 { | |
| + type Descriptor = ScalarDescriptor; | |
| + const LEN: usize = 1; | |
| + | |
| + #[inline(always)] | |
| + fn load(_d: Self::Descriptor, mem: &[u8]) -> Self { | |
| + mem[0] | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn splat(_d: Self::Descriptor, v: u8) -> Self { | |
| + v | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store(&self, mem: &mut [u8]) { | |
| + mem[0] = *self; | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u8>]) { | |
| + dest[0].write(a); | |
| + dest[1].write(b); | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u8>]) { | |
| + dest[0].write(a); | |
| + dest[1].write(b); | |
| + dest[2].write(c); | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_4_uninit( | |
| + a: Self, | |
| + b: Self, | |
| + c: Self, | |
| + d: Self, | |
| + dest: &mut [MaybeUninit<u8>], | |
| + ) { | |
| + dest[0].write(a); | |
| + dest[1].write(b); | |
| + dest[2].write(c); | |
| + dest[3].write(d); | |
| + } | |
| +} | |
| + | |
| +// SAFETY: This implementation only write initialized data in the | |
| +// `&mut [MaybeUninit<u16>]` arguments to *_uninit methods. | |
| +unsafe impl U16SimdVec for u16 { | |
| + type Descriptor = ScalarDescriptor; | |
| + const LEN: usize = 1; | |
| + | |
| + #[inline(always)] | |
| + fn load(_d: Self::Descriptor, mem: &[u16]) -> Self { | |
| + mem[0] | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn splat(_d: Self::Descriptor, v: u16) -> Self { | |
| + v | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store(&self, mem: &mut [u16]) { | |
| + mem[0] = *self; | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u16>]) { | |
| + dest[0].write(a); | |
| + dest[1].write(b); | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u16>]) { | |
| + dest[0].write(a); | |
| + dest[1].write(b); | |
| + dest[2].write(c); | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_4_uninit( | |
| + a: Self, | |
| + b: Self, | |
| + c: Self, | |
| + d: Self, | |
| + dest: &mut [MaybeUninit<u16>], | |
| + ) { | |
| + dest[0].write(a); | |
| + dest[1].write(b); | |
| + dest[2].write(c); | |
| + dest[3].write(d); | |
| + } | |
| +} | |
| + | |
| impl SimdMask for bool { | |
| type Descriptor = ScalarDescriptor; | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx.rs b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx.rs | |
| index 0da8ec9f0da4d..0ab752a656478 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx.rs | |
| @@ -5,7 +5,7 @@ | |
| use crate::{U32SimdVec, impl_f32_array_interface, x86_64::sse42::Sse42Descriptor}; | |
| -use super::super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask}; | |
| +use super::super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask, U8SimdVec, U16SimdVec}; | |
| use std::{ | |
| arch::x86_64::*, | |
| mem::MaybeUninit, | |
| @@ -124,6 +124,8 @@ impl SimdDescriptor for AvxDescriptor { | |
| type F32Vec = F32VecAvx; | |
| type I32Vec = I32VecAvx; | |
| type U32Vec = U32VecAvx; | |
| + type U8Vec = U8VecAvx; | |
| + type U16Vec = U16VecAvx; | |
| type Mask = MaskAvx; | |
| type Bf16Table8 = Bf16Table8Avx; | |
| @@ -198,16 +200,16 @@ unsafe impl F32SimdVec for F32VecAvx { | |
| fn load(d: Self::Descriptor, mem: &[f32]) -> Self { | |
| assert!(mem.len() >= Self::LEN); | |
| // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx is available | |
| - // from the safety invariant on `d`. | |
| - Self(unsafe { _mm256_loadu_ps(mem.as_ptr()) }, d) | |
| + // from the safety invariant on `d`. _mm256_loadu_ps supports unaligned loads. | |
| + Self(unsafe { _mm256_loadu_ps(mem.as_ptr().cast()) }, d) | |
| } | |
| #[inline(always)] | |
| fn store(&self, mem: &mut [f32]) { | |
| assert!(mem.len() >= Self::LEN); | |
| // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx is available | |
| - // from the safety invariant on `self.1`. | |
| - unsafe { _mm256_storeu_ps(mem.as_mut_ptr(), self.0) } | |
| + // from the safety invariant on `self.1`. _mm256_storeu_ps supports unaligned stores. | |
| + unsafe { _mm256_storeu_ps(mem.as_mut_ptr().cast(), self.0) } | |
| } | |
| #[inline(always)] | |
| @@ -223,9 +225,9 @@ unsafe impl F32SimdVec for F32VecAvx { | |
| // Need to permute to get correct order | |
| let out0 = _mm256_permute2f128_ps::<0x20>(lo, hi); // lower halves: [a0,b0,a1,b1, a2,b2,a3,b3] | |
| let out1 = _mm256_permute2f128_ps::<0x31>(lo, hi); // upper halves: [a4,b4,a5,b5, a6,b6,a7,b7] | |
| - // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. _mm256_storeu_ps supports unaligned stores. | |
| unsafe { | |
| - let dest_ptr = dest.as_mut_ptr() as *mut f32; | |
| + let dest_ptr = dest.as_mut_ptr().cast::<f32>(); | |
| _mm256_storeu_ps(dest_ptr, out0); | |
| _mm256_storeu_ps(dest_ptr.add(8), out1); | |
| } | |
| @@ -274,9 +276,9 @@ unsafe impl F32SimdVec for F32VecAvx { | |
| let out2 = _mm256_blend_ps::<0b01001001>(a2, b2); | |
| let out2 = _mm256_blend_ps::<0b10010010>(out2, c2); | |
| - // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. _mm256_storeu_ps supports unaligned stores. | |
| unsafe { | |
| - let dest_ptr = dest.as_mut_ptr() as *mut f32; | |
| + let dest_ptr = dest.as_mut_ptr().cast::<f32>(); | |
| _mm256_storeu_ps(dest_ptr, out0); | |
| _mm256_storeu_ps(dest_ptr.add(8), out1); | |
| _mm256_storeu_ps(dest_ptr.add(16), out2); | |
| @@ -335,9 +337,9 @@ unsafe impl F32SimdVec for F32VecAvx { | |
| let out2 = _mm256_permute2f128_ps::<0x31>(abcd_0, abcd_1); | |
| let out3 = _mm256_permute2f128_ps::<0x31>(abcd_2, abcd_3); | |
| - // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. _mm256_storeu_ps supports unaligned stores. | |
| unsafe { | |
| - let dest_ptr = dest.as_mut_ptr() as *mut f32; | |
| + let dest_ptr = dest.as_mut_ptr().cast::<f32>(); | |
| _mm256_storeu_ps(dest_ptr, out0); | |
| _mm256_storeu_ps(dest_ptr.add(8), out1); | |
| _mm256_storeu_ps(dest_ptr.add(16), out2); | |
| @@ -636,9 +638,15 @@ unsafe impl F32SimdVec for F32VecAvx { | |
| // Pack 8 u16s to 8 u8s (use same vector twice, take lower half) | |
| let u8s = _mm_packus_epi16(u16s, u16s); | |
| // Store lower 8 bytes | |
| - // SAFETY: we checked dest has enough space | |
| + let val = _mm_cvtsi128_si64(u8s); | |
| + let bytes = val.to_ne_bytes(); | |
| + // SAFETY: | |
| + // 1. `src` (bytes.as_ptr()) is valid for 8 bytes as it is a local [u8; 8]. | |
| + // 2. `dst` (dest.as_mut_ptr()) is valid for 8 bytes because dest.len() >= 8. | |
| + // 3. `src` and `dst` are properly aligned for u8 (alignment 1). | |
| + // 4. `src` and `dst` do not overlap as `src` is a local stack array. | |
| unsafe { | |
| - _mm_storel_epi64(dest.as_mut_ptr() as *mut __m128i, u8s); | |
| + std::ptr::copy_nonoverlapping(bytes.as_ptr(), dest.as_mut_ptr().cast::<u8>(), 8); | |
| } | |
| } | |
| // SAFETY: avx2 is available from the safety invariant on the descriptor. | |
| @@ -661,9 +669,9 @@ unsafe impl F32SimdVec for F32VecAvx { | |
| // Pack 4+4 i32s to 8 u16s | |
| let u16s = _mm_packus_epi32(lo, hi); | |
| // Store 8 u16s (16 bytes) | |
| - // SAFETY: we checked dest has enough space | |
| + // SAFETY: we checked dest has enough space. _mm_storeu_si128 supports unaligned stores. | |
| unsafe { | |
| - _mm_storeu_si128(dest.as_mut_ptr() as *mut __m128i, u16s); | |
| + _mm_storeu_si128(dest.as_mut_ptr().cast(), u16s); | |
| } | |
| } | |
| // SAFETY: avx2 is available from the safety invariant on the descriptor. | |
| @@ -678,8 +686,8 @@ unsafe impl F32SimdVec for F32VecAvx { | |
| #[inline] | |
| fn load_f16_impl(d: AvxDescriptor, mem: &[u16]) -> F32VecAvx { | |
| assert!(mem.len() >= F32VecAvx::LEN); | |
| - // SAFETY: mem.len() >= 8 is checked above | |
| - let bits = unsafe { _mm_loadu_si128(mem.as_ptr() as *const __m128i) }; | |
| + // SAFETY: mem.len() >= 8 is checked above. _mm_loadu_si128 supports unaligned loads. | |
| + let bits = unsafe { _mm_loadu_si128(mem.as_ptr().cast()) }; | |
| F32VecAvx(_mm256_cvtph_ps(bits), d) | |
| } | |
| // SAFETY: avx2 and f16c are available from the safety invariant on the descriptor | |
| @@ -693,8 +701,8 @@ unsafe impl F32SimdVec for F32VecAvx { | |
| fn store_f16_bits_impl(v: __m256, dest: &mut [u16]) { | |
| assert!(dest.len() >= F32VecAvx::LEN); | |
| let bits = _mm256_cvtps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(v); | |
| - // SAFETY: dest.len() >= 8 is checked above | |
| - unsafe { _mm_storeu_si128(dest.as_mut_ptr() as *mut __m128i, bits) }; | |
| + // SAFETY: dest.len() >= 8 is checked above. _mm_storeu_si128 supports unaligned stores. | |
| + unsafe { _mm_storeu_si128(dest.as_mut_ptr().cast(), bits) }; | |
| } | |
| // SAFETY: avx2 and f16c are available from the safety invariant on the descriptor | |
| unsafe { store_f16_bits_impl(self.0, dest) } | |
| @@ -800,8 +808,8 @@ impl I32SimdVec for I32VecAvx { | |
| fn load(d: Self::Descriptor, mem: &[i32]) -> Self { | |
| assert!(mem.len() >= Self::LEN); | |
| // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx is available | |
| - // from the safety invariant on `d`. | |
| - Self(unsafe { _mm256_loadu_si256(mem.as_ptr() as *const _) }, d) | |
| + // from the safety invariant on `d`. _mm256_loadu_si256 supports unaligned loads. | |
| + Self(unsafe { _mm256_loadu_si256(mem.as_ptr().cast()) }, d) | |
| } | |
| #[inline(always)] | |
| @@ -893,7 +901,7 @@ impl I32SimdVec for I32VecAvx { | |
| ), | |
| ); | |
| let tmp = _mm256_permute4x64_epi64(tmp, 0xD8); | |
| - // SAFETY: we just checked that `dest` has enough space. | |
| + // SAFETY: we just checked that `dest` has enough space. _mm_storeu_si128 supports unaligned stores. | |
| unsafe { | |
| _mm_storeu_si128(dest.as_mut_ptr().cast(), _mm256_extracti128_si256::<0>(tmp)) | |
| }; | |
| @@ -901,6 +909,38 @@ impl I32SimdVec for I32VecAvx { | |
| // SAFETY: avx2 is available from the safety invariant on the descriptor. | |
| unsafe { store_u16_impl(self.0, dest) } | |
| } | |
| + | |
| + #[inline(always)] | |
| + fn store_u8(self, dest: &mut [u8]) { | |
| + #[target_feature(enable = "avx2")] | |
| + #[inline] | |
| + fn store_u8_impl(v: __m256i, dest: &mut [u8]) { | |
| + assert!(dest.len() >= I32VecAvx::LEN); | |
| + let tmp = _mm256_shuffle_epi8( | |
| + v, | |
| + _mm256_setr_epi8( | |
| + 0, 4, 8, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // | |
| + 0, 4, 8, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, | |
| + ), | |
| + ); | |
| + let lo = _mm256_castsi256_si128(tmp); | |
| + let hi = _mm256_extracti128_si256::<1>(tmp); | |
| + let packed = _mm_unpacklo_epi32(lo, hi); | |
| + let val = _mm_cvtsi128_si64(packed); | |
| + let bytes = val.to_ne_bytes(); | |
| + // SAFETY: | |
| + // 1. we just checked that `dest` has enough space (dest.len() >= 8). | |
| + // 2. `src` (bytes.as_ptr()) is valid for 8 bytes as it is a local [u8; 8]. | |
| + // 3. `dst` (dest.as_mut_ptr()) is valid for 8 bytes because dest.len() >= 8. | |
| + // 4. `src` and `dst` are properly aligned for u8 (alignment 1). | |
| + // 5. `src` and `dst` do not overlap as `src` is a local stack array. | |
| + unsafe { | |
| + std::ptr::copy_nonoverlapping(bytes.as_ptr(), dest.as_mut_ptr().cast::<u8>(), 8); | |
| + } | |
| + } | |
| + // SAFETY: avx2 is available from the safety invariant on the descriptor. | |
| + unsafe { store_u8_impl(self.0, dest) } | |
| + } | |
| } | |
| impl Add<I32VecAvx> for I32VecAvx { | |
| @@ -1035,6 +1075,414 @@ impl U32SimdVec for U32VecAvx { | |
| } | |
| } | |
| +#[derive(Clone, Copy, Debug)] | |
| +#[repr(transparent)] | |
| +pub struct U8VecAvx(__m256i, AvxDescriptor); | |
| + | |
| +// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*) | |
| +// ensure that they write valid data to the output slice without reading uninitialized memory. | |
| +unsafe impl U8SimdVec for U8VecAvx { | |
| + type Descriptor = AvxDescriptor; | |
| + const LEN: usize = 32; | |
| + | |
| + #[inline(always)] | |
| + fn load(d: Self::Descriptor, mem: &[u8]) -> Self { | |
| + assert!(mem.len() >= U8VecAvx::LEN); | |
| + // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx2 is available | |
| + // from the safety invariant on `d`. _mm256_loadu_si256 supports unaligned loads. | |
| + unsafe { Self(_mm256_loadu_si256(mem.as_ptr().cast()), d) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn splat(d: Self::Descriptor, v: u8) -> Self { | |
| + // SAFETY: We know avx2 is available from the safety invariant on `self.1`. | |
| + unsafe { Self(_mm256_set1_epi8(v as i8), d) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store(&self, mem: &mut [u8]) { | |
| + assert!(mem.len() >= U8VecAvx::LEN); | |
| + // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx2 is available | |
| + // from the safety invariant on `d`. _mm256_storeu_si256 supports unaligned stores. | |
| + unsafe { _mm256_storeu_si256(mem.as_mut_ptr().cast(), self.0) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u8>]) { | |
| + #[target_feature(enable = "avx2")] | |
| + #[inline] | |
| + fn store_interleaved_2_impl(a: __m256i, b: __m256i, dest: &mut [MaybeUninit<u8>]) { | |
| + assert!(dest.len() >= 2 * U8VecAvx::LEN); | |
| + // a = [A0..A15 | A16..A31] | |
| + // b = [B0..B15 | B16..B31] | |
| + let lo = _mm256_unpacklo_epi8(a, b); // [A0 B0..A7 B7 | A16 B16..A23 B23] | |
| + let hi = _mm256_unpackhi_epi8(a, b); // [A8 B8..A15 B15 | A24 B24..A31 B31] | |
| + | |
| + // R0 = [A0 B0..A7 B7 | A8 B8..A15 B15] | |
| + let out0 = _mm256_permute2x128_si256::<0x20>(lo, hi); | |
| + // R1 = [A16 B16..A23 B23 | A24 B24..A31 B31] | |
| + let out1 = _mm256_permute2x128_si256::<0x31>(lo, hi); | |
| + | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m256i` is valid. _mm256_storeu_si256 supports unaligned stores. | |
| + unsafe { | |
| + let dest_ptr = dest.as_mut_ptr().cast::<__m256i>(); | |
| + _mm256_storeu_si256(dest_ptr, out0); | |
| + _mm256_storeu_si256(dest_ptr.add(1), out1); | |
| + } | |
| + } | |
| + // SAFETY: avx2 is available from the safety invariant on the descriptor. | |
| + unsafe { store_interleaved_2_impl(a.0, b.0, dest) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u8>]) { | |
| + #[target_feature(enable = "avx2")] | |
| + #[inline] | |
| + fn store_interleaved_3_impl( | |
| + a: __m256i, | |
| + b: __m256i, | |
| + c: __m256i, | |
| + dest: &mut [MaybeUninit<u8>], | |
| + ) { | |
| + assert!(dest.len() >= 3 * U8VecAvx::LEN); | |
| + | |
| + // U8 Masks | |
| + let mask_a0 = _mm256_setr_epi8( | |
| + 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5, -1, -1, 6, -1, -1, 7, -1, | |
| + -1, 8, -1, -1, 9, -1, -1, 10, -1, | |
| + ); | |
| + let mask_a1 = _mm256_setr_epi8( | |
| + -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1, 0, -1, -1, 1, -1, | |
| + -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5, | |
| + ); | |
| + let mask_a2 = _mm256_setr_epi8( | |
| + -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1, -1, 11, -1, -1, 12, -1, | |
| + -1, 13, -1, -1, 14, -1, -1, 15, -1, -1, | |
| + ); | |
| + let mask_b0 = _mm256_setr_epi8( | |
| + -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5, -1, -1, 6, -1, -1, 7, | |
| + -1, -1, 8, -1, -1, 9, -1, -1, 10, | |
| + ); | |
| + let mask_b1 = _mm256_setr_epi8( | |
| + -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1, 0, -1, -1, 1, | |
| + -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, | |
| + ); | |
| + let mask_b2 = _mm256_setr_epi8( | |
| + 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1, -1, 11, -1, -1, 12, | |
| + -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, | |
| + ); | |
| + let mask_c0 = _mm256_setr_epi8( | |
| + -1, -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5, -1, -1, 6, -1, | |
| + -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, | |
| + ); | |
| + let mask_c1 = _mm256_setr_epi8( | |
| + 10, -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1, 0, -1, -1, | |
| + 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, | |
| + ); | |
| + let mask_c2 = _mm256_setr_epi8( | |
| + -1, 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1, -1, 11, -1, -1, | |
| + 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, | |
| + ); | |
| + | |
| + // Create duplicated vectors for lane swizzling | |
| + let a_dup_lo = _mm256_permute2x128_si256::<0x00>(a, a); | |
| + let b_dup_lo = _mm256_permute2x128_si256::<0x00>(b, b); | |
| + let c_dup_lo = _mm256_permute2x128_si256::<0x00>(c, c); | |
| + | |
| + let a_dup_hi = _mm256_permute2x128_si256::<0x11>(a, a); | |
| + let b_dup_hi = _mm256_permute2x128_si256::<0x11>(b, b); | |
| + let c_dup_hi = _mm256_permute2x128_si256::<0x11>(c, c); | |
| + | |
| + let out0 = _mm256_or_si256( | |
| + _mm256_or_si256( | |
| + _mm256_shuffle_epi8(a_dup_lo, mask_a0), | |
| + _mm256_shuffle_epi8(b_dup_lo, mask_b0), | |
| + ), | |
| + _mm256_shuffle_epi8(c_dup_lo, mask_c0), | |
| + ); | |
| + | |
| + let out1 = _mm256_or_si256( | |
| + _mm256_or_si256( | |
| + _mm256_shuffle_epi8(a, mask_a1), | |
| + _mm256_shuffle_epi8(b, mask_b1), | |
| + ), | |
| + _mm256_shuffle_epi8(c, mask_c1), | |
| + ); | |
| + | |
| + let out2 = _mm256_or_si256( | |
| + _mm256_or_si256( | |
| + _mm256_shuffle_epi8(a_dup_hi, mask_a2), | |
| + _mm256_shuffle_epi8(b_dup_hi, mask_b2), | |
| + ), | |
| + _mm256_shuffle_epi8(c_dup_hi, mask_c2), | |
| + ); | |
| + | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m256i` is valid. _mm256_storeu_si256 supports unaligned stores. | |
| + unsafe { | |
| + let dest_ptr = dest.as_mut_ptr().cast::<__m256i>(); | |
| + _mm256_storeu_si256(dest_ptr, out0); | |
| + _mm256_storeu_si256(dest_ptr.add(1), out1); | |
| + _mm256_storeu_si256(dest_ptr.add(2), out2); | |
| + } | |
| + } | |
| + // SAFETY: avx2 is available from the safety invariant on the descriptor. | |
| + unsafe { store_interleaved_3_impl(a.0, b.0, c.0, dest) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_4_uninit( | |
| + a: Self, | |
| + b: Self, | |
| + c: Self, | |
| + d: Self, | |
| + dest: &mut [MaybeUninit<u8>], | |
| + ) { | |
| + #[target_feature(enable = "avx2")] | |
| + #[inline] | |
| + fn store_interleaved_4_impl( | |
| + a: __m256i, | |
| + b: __m256i, | |
| + c: __m256i, | |
| + d: __m256i, | |
| + dest: &mut [MaybeUninit<u8>], | |
| + ) { | |
| + assert!(dest.len() >= 4 * U8VecAvx::LEN); | |
| + // First interleave pairs: ab and cd | |
| + let ab_lo = _mm256_unpacklo_epi8(a, b); | |
| + let ab_hi = _mm256_unpackhi_epi8(a, b); | |
| + let cd_lo = _mm256_unpacklo_epi8(c, d); | |
| + let cd_hi = _mm256_unpackhi_epi8(c, d); | |
| + | |
| + // Then interleave the pairs to get 4-byte chunks | |
| + let out0_p = _mm256_unpacklo_epi16(ab_lo, cd_lo); | |
| + let out1_p = _mm256_unpackhi_epi16(ab_lo, cd_lo); | |
| + let out2_p = _mm256_unpacklo_epi16(ab_hi, cd_hi); | |
| + let out3_p = _mm256_unpackhi_epi16(ab_hi, cd_hi); | |
| + | |
| + // Reorder lanes | |
| + let out0 = _mm256_permute2x128_si256::<0x20>(out0_p, out1_p); | |
| + let out1 = _mm256_permute2x128_si256::<0x20>(out2_p, out3_p); | |
| + let out2 = _mm256_permute2x128_si256::<0x31>(out0_p, out1_p); | |
| + let out3 = _mm256_permute2x128_si256::<0x31>(out2_p, out3_p); | |
| + | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m256i` is valid. _mm256_storeu_si256 supports unaligned stores. | |
| + unsafe { | |
| + let dest_ptr = dest.as_mut_ptr().cast::<__m256i>(); | |
| + _mm256_storeu_si256(dest_ptr, out0); | |
| + _mm256_storeu_si256(dest_ptr.add(1), out1); | |
| + _mm256_storeu_si256(dest_ptr.add(2), out2); | |
| + _mm256_storeu_si256(dest_ptr.add(3), out3); | |
| + } | |
| + } | |
| + // SAFETY: avx2 is available from the safety invariant on the descriptor. | |
| + unsafe { store_interleaved_4_impl(a.0, b.0, c.0, d.0, dest) } | |
| + } | |
| +} | |
| + | |
| +#[derive(Clone, Copy, Debug)] | |
| +#[repr(transparent)] | |
| +pub struct U16VecAvx(__m256i, AvxDescriptor); | |
| + | |
| +// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*) | |
| +// ensure that they write valid data to the output slice without reading uninitialized memory. | |
| +unsafe impl U16SimdVec for U16VecAvx { | |
| + type Descriptor = AvxDescriptor; | |
| + const LEN: usize = 16; | |
| + | |
| + #[inline(always)] | |
| + fn load(d: Self::Descriptor, mem: &[u16]) -> Self { | |
| + assert!(mem.len() >= U16VecAvx::LEN); | |
| + // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx2 is available | |
| + // from the safety invariant on `d`. _mm256_loadu_si256 supports unaligned loads. | |
| + unsafe { Self(_mm256_loadu_si256(mem.as_ptr().cast()), d) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn splat(d: Self::Descriptor, v: u16) -> Self { | |
| + // SAFETY: avx2 is available from the safety invariant on the descriptor. | |
| + unsafe { Self(_mm256_set1_epi16(v as i16), d) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store(&self, mem: &mut [u16]) { | |
| + assert!(mem.len() >= U16VecAvx::LEN); | |
| + // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx2 is available | |
| + // from the safety invariant on `d`. _mm256_storeu_si256 supports unaligned stores. | |
| + unsafe { _mm256_storeu_si256(mem.as_mut_ptr().cast(), self.0) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u16>]) { | |
| + #[target_feature(enable = "avx2")] | |
| + #[inline] | |
| + fn store_interleaved_2_impl(a: __m256i, b: __m256i, dest: &mut [MaybeUninit<u16>]) { | |
| + assert!(dest.len() >= 2 * U16VecAvx::LEN); | |
| + // a = [A0..A7 | A8..A15] | |
| + // b = [B0..B7 | B8..B15] | |
| + let lo = _mm256_unpacklo_epi16(a, b); // [A0 B0..A3 B3 | A8 B8..A11 B11] | |
| + let hi = _mm256_unpackhi_epi16(a, b); // [A4 B4..A7 B7 | A12 B12..A15 B15] | |
| + | |
| + // R0 = [A0 B0..A7 B7] | |
| + let out0 = _mm256_permute2x128_si256::<0x20>(lo, hi); | |
| + // R1 = [A8 B8..A15 B15] | |
| + let out1 = _mm256_permute2x128_si256::<0x31>(lo, hi); | |
| + | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m256i` is valid. _mm256_storeu_si256 supports unaligned stores. | |
| + unsafe { | |
| + let dest_ptr = dest.as_mut_ptr().cast::<__m256i>(); | |
| + _mm256_storeu_si256(dest_ptr, out0); | |
| + _mm256_storeu_si256(dest_ptr.add(1), out1); | |
| + } | |
| + } | |
| + // SAFETY: avx2 is available from the safety invariant on the descriptor. | |
| + unsafe { store_interleaved_2_impl(a.0, b.0, dest) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u16>]) { | |
| + #[target_feature(enable = "avx2")] | |
| + #[inline] | |
| + fn store_interleaved_3_impl( | |
| + a: __m256i, | |
| + b: __m256i, | |
| + c: __m256i, | |
| + dest: &mut [MaybeUninit<u16>], | |
| + ) { | |
| + assert!(dest.len() >= 3 * U16VecAvx::LEN); | |
| + | |
| + // U16 Masks | |
| + let mask_a0 = _mm256_setr_epi8( | |
| + 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5, -1, -1, -1, -1, 6, 7, -1, -1, -1, | |
| + -1, 8, 9, -1, -1, -1, -1, 10, 11, | |
| + ); | |
| + let mask_a1 = _mm256_setr_epi8( | |
| + -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1, -1, -1, 0, 1, -1, -1, -1, | |
| + -1, 2, 3, -1, -1, -1, -1, 4, 5, -1, -1, | |
| + ); | |
| + let mask_a2 = _mm256_setr_epi8( | |
| + -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1, 10, 11, -1, -1, -1, -1, 12, 13, | |
| + -1, -1, -1, -1, 14, 15, -1, -1, -1, -1, | |
| + ); | |
| + let mask_b0 = _mm256_setr_epi8( | |
| + -1, -1, 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5, -1, -1, -1, -1, 6, 7, -1, | |
| + -1, -1, -1, 8, 9, -1, -1, -1, -1, | |
| + ); | |
| + let mask_b1 = _mm256_setr_epi8( | |
| + 10, 11, -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1, -1, -1, 0, 1, -1, | |
| + -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5, | |
| + ); | |
| + let mask_b2 = _mm256_setr_epi8( | |
| + -1, -1, -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1, 10, 11, -1, -1, -1, -1, | |
| + 12, 13, -1, -1, -1, -1, 14, 15, -1, -1, | |
| + ); | |
| + let mask_c0 = _mm256_setr_epi8( | |
| + -1, -1, -1, -1, 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5, -1, -1, -1, -1, | |
| + 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, | |
| + ); | |
| + let mask_c1 = _mm256_setr_epi8( | |
| + -1, -1, 10, 11, -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1, -1, -1, 0, | |
| + 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, | |
| + ); | |
| + let mask_c2 = _mm256_setr_epi8( | |
| + 4, 5, -1, -1, -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1, 10, 11, -1, -1, | |
| + -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, | |
| + ); | |
| + | |
| + // Create duplicated vectors for lane swizzling | |
| + let a_dup_lo = _mm256_permute2x128_si256::<0x00>(a, a); | |
| + let b_dup_lo = _mm256_permute2x128_si256::<0x00>(b, b); | |
| + let c_dup_lo = _mm256_permute2x128_si256::<0x00>(c, c); | |
| + | |
| + let a_dup_hi = _mm256_permute2x128_si256::<0x11>(a, a); | |
| + let b_dup_hi = _mm256_permute2x128_si256::<0x11>(b, b); | |
| + let c_dup_hi = _mm256_permute2x128_si256::<0x11>(c, c); | |
| + | |
| + let out0 = _mm256_or_si256( | |
| + _mm256_or_si256( | |
| + _mm256_shuffle_epi8(a_dup_lo, mask_a0), | |
| + _mm256_shuffle_epi8(b_dup_lo, mask_b0), | |
| + ), | |
| + _mm256_shuffle_epi8(c_dup_lo, mask_c0), | |
| + ); | |
| + | |
| + let out1 = _mm256_or_si256( | |
| + _mm256_or_si256( | |
| + _mm256_shuffle_epi8(a, mask_a1), | |
| + _mm256_shuffle_epi8(b, mask_b1), | |
| + ), | |
| + _mm256_shuffle_epi8(c, mask_c1), | |
| + ); | |
| + | |
| + let out2 = _mm256_or_si256( | |
| + _mm256_or_si256( | |
| + _mm256_shuffle_epi8(a_dup_hi, mask_a2), | |
| + _mm256_shuffle_epi8(b_dup_hi, mask_b2), | |
| + ), | |
| + _mm256_shuffle_epi8(c_dup_hi, mask_c2), | |
| + ); | |
| + | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m256i` is valid. _mm256_storeu_si256 supports unaligned stores. | |
| + unsafe { | |
| + let dest_ptr = dest.as_mut_ptr().cast::<__m256i>(); | |
| + _mm256_storeu_si256(dest_ptr, out0); | |
| + _mm256_storeu_si256(dest_ptr.add(1), out1); | |
| + _mm256_storeu_si256(dest_ptr.add(2), out2); | |
| + } | |
| + } | |
| + // SAFETY: avx2 is available from the safety invariant on the descriptor. | |
| + unsafe { store_interleaved_3_impl(a.0, b.0, c.0, dest) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_4_uninit( | |
| + a: Self, | |
| + b: Self, | |
| + c: Self, | |
| + d: Self, | |
| + dest: &mut [MaybeUninit<u16>], | |
| + ) { | |
| + #[target_feature(enable = "avx2")] | |
| + #[inline] | |
| + fn store_interleaved_4_impl( | |
| + a: __m256i, | |
| + b: __m256i, | |
| + c: __m256i, | |
| + d: __m256i, | |
| + dest: &mut [MaybeUninit<u16>], | |
| + ) { | |
| + assert!(dest.len() >= 4 * U16VecAvx::LEN); | |
| + // First interleave pairs: ab and cd | |
| + let ab_lo = _mm256_unpacklo_epi16(a, b); | |
| + let ab_hi = _mm256_unpackhi_epi16(a, b); | |
| + let cd_lo = _mm256_unpacklo_epi16(c, d); | |
| + let cd_hi = _mm256_unpackhi_epi16(c, d); | |
| + | |
| + // Then interleave the pairs to get 4-u16 chunks (8 bytes) | |
| + let out0_p = _mm256_unpacklo_epi32(ab_lo, cd_lo); | |
| + let out1_p = _mm256_unpackhi_epi32(ab_lo, cd_lo); | |
| + let out2_p = _mm256_unpacklo_epi32(ab_hi, cd_hi); | |
| + let out3_p = _mm256_unpackhi_epi32(ab_hi, cd_hi); | |
| + | |
| + // Reorder lanes | |
| + let out0 = _mm256_permute2x128_si256::<0x20>(out0_p, out1_p); | |
| + let out1 = _mm256_permute2x128_si256::<0x20>(out2_p, out3_p); | |
| + let out2 = _mm256_permute2x128_si256::<0x31>(out0_p, out1_p); | |
| + let out3 = _mm256_permute2x128_si256::<0x31>(out2_p, out3_p); | |
| + | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m256i` is valid. _mm256_storeu_si256 supports unaligned stores. | |
| + unsafe { | |
| + let dest_ptr = dest.as_mut_ptr().cast::<__m256i>(); | |
| + _mm256_storeu_si256(dest_ptr, out0); | |
| + _mm256_storeu_si256(dest_ptr.add(1), out1); | |
| + _mm256_storeu_si256(dest_ptr.add(2), out2); | |
| + _mm256_storeu_si256(dest_ptr.add(3), out3); | |
| + } | |
| + } | |
| + // SAFETY: avx2 is available from the safety invariant on the descriptor. | |
| + unsafe { store_interleaved_4_impl(a.0, b.0, c.0, d.0, dest) } | |
| + } | |
| +} | |
| + | |
| impl SimdMask for MaskAvx { | |
| type Descriptor = AvxDescriptor; | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx512.rs b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx512.rs | |
| index 89086c50c9715..48bc32a61032b 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx512.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/avx512.rs | |
| @@ -3,7 +3,9 @@ | |
| // Use of this source code is governed by a BSD-style | |
| // license that can be found in the LICENSE file. | |
| -use super::super::{AvxDescriptor, F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask}; | |
| +use super::super::{ | |
| + AvxDescriptor, F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask, U8SimdVec, U16SimdVec, | |
| +}; | |
| use crate::{Sse42Descriptor, U32SimdVec, impl_f32_array_interface}; | |
| use std::{ | |
| arch::x86_64::*, | |
| @@ -43,6 +45,8 @@ impl SimdDescriptor for Avx512Descriptor { | |
| type F32Vec = F32VecAvx512; | |
| type I32Vec = I32VecAvx512; | |
| type U32Vec = U32VecAvx512; | |
| + type U8Vec = U8VecAvx512; | |
| + type U16Vec = U16VecAvx512; | |
| type Mask = MaskAvx512; | |
| type Bf16Table8 = Bf16Table8Avx512; | |
| @@ -149,9 +153,9 @@ unsafe impl F32SimdVec for F32VecAvx512 { | |
| let out0 = _mm512_permutex2var_ps(lo, idx_lo, hi); | |
| let out1 = _mm512_permutex2var_ps(lo, idx_hi, hi); | |
| - // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. _mm512_storeu_ps supports unaligned stores. | |
| unsafe { | |
| - let dest_ptr = dest.as_mut_ptr() as *mut f32; | |
| + let dest_ptr = dest.as_mut_ptr().cast::<f32>(); | |
| _mm512_storeu_ps(dest_ptr, out0); | |
| _mm512_storeu_ps(dest_ptr.add(16), out1); | |
| } | |
| @@ -192,9 +196,9 @@ unsafe impl F32SimdVec for F32VecAvx512 { | |
| let out2 = _mm512_permutex2var_ps(a, idx_ab2, b); | |
| let out2 = _mm512_mask_permutexvar_ps(out2, 0b1001001001001001, idx_c2, c); | |
| - // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. _mm512_storeu_ps supports unaligned stores. | |
| unsafe { | |
| - let dest_ptr = dest.as_mut_ptr() as *mut f32; | |
| + let dest_ptr = dest.as_mut_ptr().cast::<f32>(); | |
| _mm512_storeu_ps(dest_ptr, out0); | |
| _mm512_storeu_ps(dest_ptr.add(16), out1); | |
| _mm512_storeu_ps(dest_ptr.add(32), out2); | |
| @@ -291,9 +295,9 @@ unsafe impl F32SimdVec for F32VecAvx512 { | |
| let out1 = _mm512_permutex2var_ps(pair01_13, idx_0, pair23_13); | |
| let out3 = _mm512_permutex2var_ps(pair01_13, idx_1, pair23_13); | |
| - // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. _mm512_storeu_ps supports unaligned stores. | |
| unsafe { | |
| - let dest_ptr = dest.as_mut_ptr() as *mut f32; | |
| + let dest_ptr = dest.as_mut_ptr().cast::<f32>(); | |
| _mm512_storeu_ps(dest_ptr, out0); | |
| _mm512_storeu_ps(dest_ptr.add(16), out1); | |
| _mm512_storeu_ps(dest_ptr.add(32), out2); | |
| @@ -428,9 +432,9 @@ unsafe impl F32SimdVec for F32VecAvx512 { | |
| let out6 = _mm512_permutex2var_ps(full_0_13, idx_hi, full_1_13); | |
| let out7 = _mm512_permutex2var_ps(full_2_13, idx_hi, full_3_13); | |
| - // SAFETY: we just checked that dest has enough space. | |
| + // SAFETY: we just checked that dest has enough space. _mm512_storeu_ps supports unaligned stores. | |
| unsafe { | |
| - let ptr = dest.as_mut_ptr(); | |
| + let ptr = dest.as_mut_ptr().cast::<f32>(); | |
| _mm512_storeu_ps(ptr, out0); | |
| _mm512_storeu_ps(ptr.add(16), out1); | |
| _mm512_storeu_ps(ptr.add(32), out2); | |
| @@ -454,7 +458,7 @@ unsafe impl F32SimdVec for F32VecAvx512 { | |
| assert!(src.len() >= 2 * F32VecAvx512::LEN); | |
| // Input: [a0,b0,a1,b1,...,a15,b15] | |
| // Output: a = [a0..a15], b = [b0..b15] | |
| - // SAFETY: we just checked that src has enough space. | |
| + // SAFETY: we just checked that src has enough space. _mm512_loadu_ps supports unaligned loads. | |
| let (in0, in1) = unsafe { | |
| ( | |
| _mm512_loadu_ps(src.as_ptr()), | |
| @@ -491,7 +495,7 @@ unsafe impl F32SimdVec for F32VecAvx512 { | |
| // in2: [c10,a11,b11,c11,a12,b12,c12,a13,b13,c13,a14,b14,c14,a15,b15,c15] | |
| // Output: a = [a0..a15], b = [b0..b15], c = [c0..c15] | |
| - // SAFETY: we just checked that src has enough space. | |
| + // SAFETY: we just checked that src has enough space. _mm512_loadu_ps supports unaligned loads. | |
| let (in0, in1, in2) = unsafe { | |
| ( | |
| _mm512_loadu_ps(src.as_ptr()), | |
| @@ -544,7 +548,7 @@ unsafe impl F32SimdVec for F32VecAvx512 { | |
| assert!(src.len() >= 4 * F32VecAvx512::LEN); | |
| // Input: [a0,b0,c0,d0,a1,b1,c1,d1,...] (64 floats) | |
| // Output: a = [a0..a15], b = [b0..b15], c = [c0..c15], d = [d0..d15] | |
| - // SAFETY: we just checked that src has enough space. | |
| + // SAFETY: we just checked that src has enough space. _mm512_loadu_ps supports unaligned loads. | |
| let (in0, in1, in2, in3) = unsafe { | |
| ( | |
| _mm512_loadu_ps(src.as_ptr()), | |
| @@ -700,7 +704,7 @@ unsafe impl F32SimdVec for F32VecAvx512 { | |
| // Store 16 bytes | |
| // SAFETY: we checked dest has enough space | |
| unsafe { | |
| - _mm_storeu_si128(dest.as_mut_ptr() as *mut __m128i, u8s); | |
| + _mm_storeu_si128(dest.as_mut_ptr().cast(), u8s); | |
| } | |
| } | |
| // SAFETY: avx512f and avx512bw are available from the safety invariant on the descriptor. | |
| @@ -722,7 +726,7 @@ unsafe impl F32SimdVec for F32VecAvx512 { | |
| // Store 16 u16s (32 bytes) | |
| // SAFETY: we checked dest has enough space | |
| unsafe { | |
| - _mm256_storeu_si256(dest.as_mut_ptr() as *mut __m256i, u16s); | |
| + _mm256_storeu_si256(dest.as_mut_ptr().cast(), u16s); | |
| } | |
| } | |
| // SAFETY: avx512f and avx512bw are available from the safety invariant on the descriptor. | |
| @@ -738,8 +742,8 @@ unsafe impl F32SimdVec for F32VecAvx512 { | |
| #[inline] | |
| fn load_f16_impl(d: Avx512Descriptor, mem: &[u16]) -> F32VecAvx512 { | |
| assert!(mem.len() >= F32VecAvx512::LEN); | |
| - // SAFETY: mem.len() >= 16 is checked above | |
| - let bits = unsafe { _mm256_loadu_si256(mem.as_ptr() as *const __m256i) }; | |
| + // SAFETY: mem.len() >= 16 is checked above. | |
| + let bits = unsafe { _mm256_loadu_si256(mem.as_ptr().cast()) }; | |
| F32VecAvx512(_mm512_cvtph_ps(bits), d) | |
| } | |
| // SAFETY: avx512f is available from the safety invariant on the descriptor | |
| @@ -754,8 +758,8 @@ unsafe impl F32SimdVec for F32VecAvx512 { | |
| fn store_f16_bits_impl(v: __m512, dest: &mut [u16]) { | |
| assert!(dest.len() >= F32VecAvx512::LEN); | |
| let bits = _mm512_cvtps_ph::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(v); | |
| - // SAFETY: dest.len() >= 16 is checked above | |
| - unsafe { _mm256_storeu_si256(dest.as_mut_ptr() as *mut __m256i, bits) }; | |
| + // SAFETY: dest.len() >= 16 is checked above. | |
| + unsafe { _mm256_storeu_si256(dest.as_mut_ptr().cast(), bits) }; | |
| } | |
| // SAFETY: avx512f is available from the safety invariant on the descriptor | |
| unsafe { store_f16_bits_impl(self.0, dest) } | |
| @@ -1070,6 +1074,22 @@ impl I32SimdVec for I32VecAvx512 { | |
| // SAFETY: avx512f is available from the safety invariant on the descriptor. | |
| unsafe { store_u16_impl(self.0, dest) } | |
| } | |
| + | |
| + #[inline(always)] | |
| + fn store_u8(self, dest: &mut [u8]) { | |
| + #[target_feature(enable = "avx512f")] | |
| + #[inline] | |
| + fn store_u8_impl(v: __m512i, dest: &mut [u8]) { | |
| + assert!(dest.len() >= I32VecAvx512::LEN); | |
| + let tmp_vec = _mm512_cvtepi32_epi8(v); | |
| + // SAFETY: We just checked `dst` has enough space. | |
| + unsafe { | |
| + _mm_storeu_si128(dest.as_mut_ptr().cast(), tmp_vec); | |
| + } | |
| + } | |
| + // SAFETY: avx512f is available from the safety invariant on the descriptor. | |
| + unsafe { store_u8_impl(self.0, dest) } | |
| + } | |
| } | |
| impl Add<I32VecAvx512> for I32VecAvx512 { | |
| @@ -1204,6 +1224,398 @@ impl U32SimdVec for U32VecAvx512 { | |
| } | |
| } | |
| +#[derive(Clone, Copy, Debug)] | |
| +#[repr(transparent)] | |
| +pub struct U8VecAvx512(__m512i, Avx512Descriptor); | |
| + | |
| +// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*) | |
| +// ensure that they write valid data to the output slice without reading uninitialized memory. | |
| +unsafe impl U8SimdVec for U8VecAvx512 { | |
| + type Descriptor = Avx512Descriptor; | |
| + const LEN: usize = 64; | |
| + | |
| + #[inline(always)] | |
| + fn load(d: Self::Descriptor, mem: &[u8]) -> Self { | |
| + assert!(mem.len() >= Self::LEN); | |
| + // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx512f is available | |
| + // from the safety invariant on `d`. _mm512_loadu_si512 supports unaligned loads. | |
| + unsafe { Self(_mm512_loadu_si512(mem.as_ptr().cast()), d) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn splat(d: Self::Descriptor, v: u8) -> Self { | |
| + // SAFETY: We know avx512f is available from the safety invariant on `d`. | |
| + unsafe { Self(_mm512_set1_epi8(v as i8), d) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store(&self, mem: &mut [u8]) { | |
| + assert!(mem.len() >= Self::LEN); | |
| + // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx512f is available | |
| + // from the safety invariant on `d`. _mm512_storeu_si512 supports unaligned stores. | |
| + unsafe { _mm512_storeu_si512(mem.as_mut_ptr().cast(), self.0) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u8>]) { | |
| + #[target_feature(enable = "avx512f,avx512bw")] | |
| + #[inline] | |
| + fn impl_u8_2(a: __m512i, b: __m512i, dest: &mut [MaybeUninit<u8>]) { | |
| + assert!(dest.len() >= 2 * U8VecAvx512::LEN); | |
| + let lo = _mm512_unpacklo_epi8(a, b); | |
| + let hi = _mm512_unpackhi_epi8(a, b); | |
| + let idx0 = _mm512_setr_epi64(0, 1, 8, 9, 2, 3, 10, 11); | |
| + let idx1 = _mm512_setr_epi64(4, 5, 12, 13, 6, 7, 14, 15); | |
| + let out0 = _mm512_permutex2var_epi64(lo, idx0, hi); | |
| + let out1 = _mm512_permutex2var_epi64(lo, idx1, hi); | |
| + | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m512i` is valid. _mm512_storeu_si512 supports unaligned stores. | |
| + unsafe { | |
| + let ptr = dest.as_mut_ptr().cast::<__m512i>(); | |
| + _mm512_storeu_si512(ptr, out0); | |
| + _mm512_storeu_si512(ptr.add(1), out1); | |
| + } | |
| + } | |
| + // SAFETY: We know avx512f and avx512bw are available from the safety invariant on `d`. | |
| + unsafe { impl_u8_2(a.0, b.0, dest) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u8>]) { | |
| + #[target_feature(enable = "avx512f,avx512bw")] | |
| + #[inline] | |
| + fn impl_u8_3(a: __m512i, b: __m512i, c: __m512i, dest: &mut [MaybeUninit<u8>]) { | |
| + assert!(dest.len() >= 3 * U8VecAvx512::LEN); | |
| + | |
| + let mask_a0 = _mm512_broadcast_i32x4(_mm_setr_epi8( | |
| + 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5, | |
| + )); | |
| + let mask_b0 = _mm512_broadcast_i32x4(_mm_setr_epi8( | |
| + -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, | |
| + )); | |
| + let mask_c0 = _mm512_broadcast_i32x4(_mm_setr_epi8( | |
| + -1, -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, | |
| + )); | |
| + | |
| + let mask_a1 = _mm512_broadcast_i32x4(_mm_setr_epi8( | |
| + -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1, | |
| + )); | |
| + let mask_b1 = _mm512_broadcast_i32x4(_mm_setr_epi8( | |
| + 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, | |
| + )); | |
| + let mask_c1 = _mm512_broadcast_i32x4(_mm_setr_epi8( | |
| + -1, 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, | |
| + )); | |
| + | |
| + let mask_a2 = _mm512_broadcast_i32x4(_mm_setr_epi8( | |
| + -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1, | |
| + )); | |
| + let mask_b2 = _mm512_broadcast_i32x4(_mm_setr_epi8( | |
| + -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, | |
| + )); | |
| + let mask_c2 = _mm512_broadcast_i32x4(_mm_setr_epi8( | |
| + 10, -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, | |
| + )); | |
| + | |
| + let res0 = _mm512_or_si512( | |
| + _mm512_or_si512( | |
| + _mm512_shuffle_epi8(a, mask_a0), | |
| + _mm512_shuffle_epi8(b, mask_b0), | |
| + ), | |
| + _mm512_shuffle_epi8(c, mask_c0), | |
| + ); | |
| + let res1 = _mm512_or_si512( | |
| + _mm512_or_si512( | |
| + _mm512_shuffle_epi8(a, mask_a1), | |
| + _mm512_shuffle_epi8(b, mask_b1), | |
| + ), | |
| + _mm512_shuffle_epi8(c, mask_c1), | |
| + ); | |
| + let res2 = _mm512_or_si512( | |
| + _mm512_or_si512( | |
| + _mm512_shuffle_epi8(a, mask_a2), | |
| + _mm512_shuffle_epi8(b, mask_b2), | |
| + ), | |
| + _mm512_shuffle_epi8(c, mask_c2), | |
| + ); | |
| + let idx_a0 = _mm512_setr_epi64(0, 1, 8, 9, 2, 3, 0, 1); | |
| + let part_a0 = _mm512_permutex2var_epi64(res0, idx_a0, res1); | |
| + let idx_f0 = _mm512_setr_epi64(0, 1, 2, 3, 8, 9, 4, 5); | |
| + let final0 = _mm512_permutex2var_epi64(part_a0, idx_f0, res2); | |
| + let idx_a1 = _mm512_setr_epi64(2, 3, 10, 11, 4, 5, 0, 1); | |
| + let part_a1 = _mm512_permutex2var_epi64(res1, idx_a1, res2); | |
| + let idx_f1 = _mm512_setr_epi64(0, 1, 2, 3, 12, 13, 4, 5); | |
| + let final1 = _mm512_permutex2var_epi64(part_a1, idx_f1, res0); | |
| + let idx_a2 = _mm512_setr_epi64(4, 5, 14, 15, 6, 7, 0, 1); | |
| + let part_a2 = _mm512_permutex2var_epi64(res2, idx_a2, res0); | |
| + let idx_f2 = _mm512_setr_epi64(0, 1, 2, 3, 14, 15, 4, 5); | |
| + let final2 = _mm512_permutex2var_epi64(part_a2, idx_f2, res1); | |
| + | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m512i` is valid. _mm512_storeu_si512 supports unaligned stores. | |
| + unsafe { | |
| + let ptr = dest.as_mut_ptr().cast::<__m512i>(); | |
| + _mm512_storeu_si512(ptr, final0); | |
| + _mm512_storeu_si512(ptr.add(1), final1); | |
| + _mm512_storeu_si512(ptr.add(2), final2); | |
| + } | |
| + } | |
| + // SAFETY: We know avx512f and avx512bw are available from the safety invariant on `d`. | |
| + unsafe { impl_u8_3(a.0, b.0, c.0, dest) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_4_uninit( | |
| + a: Self, | |
| + b: Self, | |
| + c: Self, | |
| + d: Self, | |
| + dest: &mut [MaybeUninit<u8>], | |
| + ) { | |
| + #[target_feature(enable = "avx512f,avx512bw")] | |
| + #[inline] | |
| + fn impl_u8_4(a: __m512i, b: __m512i, c: __m512i, d: __m512i, dest: &mut [MaybeUninit<u8>]) { | |
| + assert!(dest.len() >= 4 * U8VecAvx512::LEN); | |
| + let ab_lo = _mm512_unpacklo_epi8(a, b); | |
| + let ab_hi = _mm512_unpackhi_epi8(a, b); | |
| + let cd_lo = _mm512_unpacklo_epi8(c, d); | |
| + let cd_hi = _mm512_unpackhi_epi8(c, d); | |
| + | |
| + let abcd_0 = _mm512_unpacklo_epi16(ab_lo, cd_lo); | |
| + let abcd_1 = _mm512_unpackhi_epi16(ab_lo, cd_lo); | |
| + let abcd_2 = _mm512_unpacklo_epi16(ab_hi, cd_hi); | |
| + let abcd_3 = _mm512_unpackhi_epi16(ab_hi, cd_hi); | |
| + | |
| + let idx_even = _mm512_setr_epi64(0, 1, 8, 9, 2, 3, 10, 11); | |
| + let idx_odd = _mm512_setr_epi64(4, 5, 12, 13, 6, 7, 14, 15); | |
| + | |
| + let pair01_02 = _mm512_permutex2var_epi64(abcd_0, idx_even, abcd_1); | |
| + let pair01_13 = _mm512_permutex2var_epi64(abcd_0, idx_odd, abcd_1); | |
| + let pair23_02 = _mm512_permutex2var_epi64(abcd_2, idx_even, abcd_3); | |
| + let pair23_13 = _mm512_permutex2var_epi64(abcd_2, idx_odd, abcd_3); | |
| + | |
| + let idx_0 = _mm512_setr_epi64(0, 1, 2, 3, 8, 9, 10, 11); | |
| + let idx_1 = _mm512_setr_epi64(4, 5, 6, 7, 12, 13, 14, 15); | |
| + | |
| + let out0 = _mm512_permutex2var_epi64(pair01_02, idx_0, pair23_02); | |
| + let out1 = _mm512_permutex2var_epi64(pair01_02, idx_1, pair23_02); | |
| + let out2 = _mm512_permutex2var_epi64(pair01_13, idx_0, pair23_13); | |
| + let out3 = _mm512_permutex2var_epi64(pair01_13, idx_1, pair23_13); | |
| + | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m512i` is valid. _mm512_storeu_si512 supports unaligned stores. | |
| + unsafe { | |
| + let ptr = dest.as_mut_ptr().cast::<__m512i>(); | |
| + _mm512_storeu_si512(ptr, out0); | |
| + _mm512_storeu_si512(ptr.add(1), out1); | |
| + _mm512_storeu_si512(ptr.add(2), out2); | |
| + _mm512_storeu_si512(ptr.add(3), out3); | |
| + } | |
| + } | |
| + // SAFETY: We know avx512f and avx512bw are available from the safety invariant on `d`. | |
| + unsafe { impl_u8_4(a.0, b.0, c.0, d.0, dest) } | |
| + } | |
| +} | |
| + | |
| +#[derive(Clone, Copy, Debug)] | |
| +#[repr(transparent)] | |
| +pub struct U16VecAvx512(__m512i, Avx512Descriptor); | |
| + | |
| +// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*) | |
| +// ensure that they write valid data to the output slice without reading uninitialized memory. | |
| +unsafe impl U16SimdVec for U16VecAvx512 { | |
| + type Descriptor = Avx512Descriptor; | |
| + const LEN: usize = 32; | |
| + | |
| + #[inline(always)] | |
| + fn load(d: Self::Descriptor, mem: &[u16]) -> Self { | |
| + assert!(mem.len() >= Self::LEN); | |
| + // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx512f is available | |
| + // from the safety invariant on `d`. _mm512_loadu_si512 supports unaligned loads. | |
| + unsafe { Self(_mm512_loadu_si512(mem.as_ptr().cast()), d) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn splat(d: Self::Descriptor, v: u16) -> Self { | |
| + // SAFETY: avx512 available. | |
| + unsafe { Self(_mm512_set1_epi16(v as i16), d) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store(&self, mem: &mut [u16]) { | |
| + assert!(mem.len() >= Self::LEN); | |
| + // SAFETY: we just checked that `mem` has enough space. Moreover, we know avx512f is available | |
| + // from the safety invariant on `d`. _mm512_storeu_si512 supports unaligned stores. | |
| + unsafe { _mm512_storeu_si512(mem.as_mut_ptr().cast(), self.0) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u16>]) { | |
| + #[target_feature(enable = "avx512f,avx512bw")] | |
| + #[inline] | |
| + fn impl_u16_2(a: __m512i, b: __m512i, dest: &mut [MaybeUninit<u16>]) { | |
| + assert!(dest.len() >= 2 * U16VecAvx512::LEN); | |
| + let lo = _mm512_unpacklo_epi16(a, b); | |
| + let hi = _mm512_unpackhi_epi16(a, b); | |
| + let idx0 = _mm512_setr_epi64(0, 1, 8, 9, 2, 3, 10, 11); | |
| + let idx1 = _mm512_setr_epi64(4, 5, 12, 13, 6, 7, 14, 15); | |
| + let out0 = _mm512_permutex2var_epi64(lo, idx0, hi); | |
| + let out1 = _mm512_permutex2var_epi64(lo, idx1, hi); | |
| + | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m512i` is valid. _mm512_storeu_si512 supports unaligned stores. | |
| + unsafe { | |
| + let ptr = dest.as_mut_ptr().cast::<__m512i>(); | |
| + _mm512_storeu_si512(ptr, out0); | |
| + _mm512_storeu_si512(ptr.add(1), out1); | |
| + } | |
| + } | |
| + // SAFETY: We know avx512f and avx512bw are available from the safety invariant on `d`. | |
| + unsafe { impl_u16_2(a.0, b.0, dest) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u16>]) { | |
| + #[target_feature(enable = "avx512f,avx512bw")] | |
| + #[inline] | |
| + fn impl_u16_3(a: __m512i, b: __m512i, c: __m512i, dest: &mut [MaybeUninit<u16>]) { | |
| + assert!(dest.len() >= 3 * U16VecAvx512::LEN); | |
| + | |
| + let mask_a0 = _mm512_broadcast_i32x4(_mm_setr_epi8( | |
| + 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5, -1, -1, | |
| + )); | |
| + let mask_b0 = _mm512_broadcast_i32x4(_mm_setr_epi8( | |
| + -1, -1, 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5, | |
| + )); | |
| + let mask_c0 = _mm512_broadcast_i32x4(_mm_setr_epi8( | |
| + -1, -1, -1, -1, 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, | |
| + )); | |
| + | |
| + let mask_a1 = _mm512_broadcast_i32x4(_mm_setr_epi8( | |
| + -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1, 10, 11, | |
| + )); | |
| + let mask_b1 = _mm512_broadcast_i32x4(_mm_setr_epi8( | |
| + -1, -1, -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1, | |
| + )); | |
| + let mask_c1 = _mm512_broadcast_i32x4(_mm_setr_epi8( | |
| + 4, 5, -1, -1, -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, | |
| + )); | |
| + | |
| + let mask_a2 = _mm512_broadcast_i32x4(_mm_setr_epi8( | |
| + -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1, -1, -1, | |
| + )); | |
| + let mask_b2 = _mm512_broadcast_i32x4(_mm_setr_epi8( | |
| + 10, 11, -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1, | |
| + )); | |
| + let mask_c2 = _mm512_broadcast_i32x4(_mm_setr_epi8( | |
| + -1, -1, 10, 11, -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, | |
| + )); | |
| + | |
| + let res0 = _mm512_or_si512( | |
| + _mm512_or_si512( | |
| + _mm512_shuffle_epi8(a, mask_a0), | |
| + _mm512_shuffle_epi8(b, mask_b0), | |
| + ), | |
| + _mm512_shuffle_epi8(c, mask_c0), | |
| + ); | |
| + let res1 = _mm512_or_si512( | |
| + _mm512_or_si512( | |
| + _mm512_shuffle_epi8(a, mask_a1), | |
| + _mm512_shuffle_epi8(b, mask_b1), | |
| + ), | |
| + _mm512_shuffle_epi8(c, mask_c1), | |
| + ); | |
| + let res2 = _mm512_or_si512( | |
| + _mm512_or_si512( | |
| + _mm512_shuffle_epi8(a, mask_a2), | |
| + _mm512_shuffle_epi8(b, mask_b2), | |
| + ), | |
| + _mm512_shuffle_epi8(c, mask_c2), | |
| + ); | |
| + | |
| + let idx_a0 = _mm512_setr_epi64(0, 1, 8, 9, 2, 3, 0, 1); | |
| + let part_a0 = _mm512_permutex2var_epi64(res0, idx_a0, res1); | |
| + let idx_f0 = _mm512_setr_epi64(0, 1, 2, 3, 8, 9, 4, 5); | |
| + let final0 = _mm512_permutex2var_epi64(part_a0, idx_f0, res2); | |
| + | |
| + let idx_a1 = _mm512_setr_epi64(2, 3, 10, 11, 4, 5, 0, 1); | |
| + let part_a1 = _mm512_permutex2var_epi64(res1, idx_a1, res2); | |
| + let idx_f1 = _mm512_setr_epi64(0, 1, 2, 3, 12, 13, 4, 5); | |
| + let final1 = _mm512_permutex2var_epi64(part_a1, idx_f1, res0); | |
| + | |
| + let idx_a2 = _mm512_setr_epi64(4, 5, 14, 15, 6, 7, 0, 1); | |
| + let part_a2 = _mm512_permutex2var_epi64(res2, idx_a2, res0); | |
| + let idx_f2 = _mm512_setr_epi64(0, 1, 2, 3, 14, 15, 4, 5); | |
| + let final2 = _mm512_permutex2var_epi64(part_a2, idx_f2, res1); | |
| + | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m512i` is valid. _mm512_storeu_si512 supports unaligned stores. | |
| + unsafe { | |
| + let ptr = dest.as_mut_ptr().cast::<__m512i>(); | |
| + _mm512_storeu_si512(ptr, final0); | |
| + _mm512_storeu_si512(ptr.add(1), final1); | |
| + _mm512_storeu_si512(ptr.add(2), final2); | |
| + } | |
| + } | |
| + // SAFETY: We know avx512f and avx512bw are available from the safety invariant on `d`. | |
| + unsafe { impl_u16_3(a.0, b.0, c.0, dest) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_4_uninit( | |
| + a: Self, | |
| + b: Self, | |
| + c: Self, | |
| + d: Self, | |
| + dest: &mut [MaybeUninit<u16>], | |
| + ) { | |
| + #[target_feature(enable = "avx512f,avx512bw")] | |
| + #[inline] | |
| + fn impl_u16_4( | |
| + a: __m512i, | |
| + b: __m512i, | |
| + c: __m512i, | |
| + d: __m512i, | |
| + dest: &mut [MaybeUninit<u16>], | |
| + ) { | |
| + assert!(dest.len() >= 4 * U16VecAvx512::LEN); | |
| + let ab_lo = _mm512_unpacklo_epi16(a, b); | |
| + let ab_hi = _mm512_unpackhi_epi16(a, b); | |
| + let cd_lo = _mm512_unpacklo_epi16(c, d); | |
| + let cd_hi = _mm512_unpackhi_epi16(c, d); | |
| + | |
| + let abcd_0 = _mm512_unpacklo_epi32(ab_lo, cd_lo); | |
| + let abcd_1 = _mm512_unpackhi_epi32(ab_lo, cd_lo); | |
| + let abcd_2 = _mm512_unpacklo_epi32(ab_hi, cd_hi); | |
| + let abcd_3 = _mm512_unpackhi_epi32(ab_hi, cd_hi); | |
| + | |
| + // Transpose 4x4 of 128-bit lanes (same as u8) | |
| + let idx_even = _mm512_setr_epi64(0, 1, 8, 9, 2, 3, 10, 11); | |
| + let idx_odd = _mm512_setr_epi64(4, 5, 12, 13, 6, 7, 14, 15); | |
| + | |
| + let pair01_02 = _mm512_permutex2var_epi64(abcd_0, idx_even, abcd_1); | |
| + let pair01_13 = _mm512_permutex2var_epi64(abcd_0, idx_odd, abcd_1); | |
| + let pair23_02 = _mm512_permutex2var_epi64(abcd_2, idx_even, abcd_3); | |
| + let pair23_13 = _mm512_permutex2var_epi64(abcd_2, idx_odd, abcd_3); | |
| + | |
| + let idx_0 = _mm512_setr_epi64(0, 1, 2, 3, 8, 9, 10, 11); | |
| + let idx_1 = _mm512_setr_epi64(4, 5, 6, 7, 12, 13, 14, 15); | |
| + | |
| + let out0 = _mm512_permutex2var_epi64(pair01_02, idx_0, pair23_02); | |
| + let out1 = _mm512_permutex2var_epi64(pair01_02, idx_1, pair23_02); | |
| + let out2 = _mm512_permutex2var_epi64(pair01_13, idx_0, pair23_13); | |
| + let out3 = _mm512_permutex2var_epi64(pair01_13, idx_1, pair23_13); | |
| + | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m512i` is valid. _mm512_storeu_si512 supports unaligned stores. | |
| + unsafe { | |
| + let ptr = dest.as_mut_ptr().cast::<__m512i>(); | |
| + _mm512_storeu_si512(ptr, out0); | |
| + _mm512_storeu_si512(ptr.add(1), out1); | |
| + _mm512_storeu_si512(ptr.add(2), out2); | |
| + _mm512_storeu_si512(ptr.add(3), out3); | |
| + } | |
| + } | |
| + // SAFETY: We know avx512f and avx512bw are available from the safety invariant on `d`. | |
| + unsafe { impl_u16_4(a.0, b.0, c.0, d.0, dest) } | |
| + } | |
| +} | |
| + | |
| impl SimdMask for MaskAvx512 { | |
| type Descriptor = Avx512Descriptor; | |
| diff --git a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/sse42.rs b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/sse42.rs | |
| index b4021570c3f6d..5a4f52f4c30f9 100644 | |
| --- a/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/sse42.rs | |
| +++ b/third_party/rust/chromium_crates_io/vendor/jxl_simd-v0_3/src/x86_64/sse42.rs | |
| @@ -5,7 +5,7 @@ | |
| use crate::{U32SimdVec, impl_f32_array_interface}; | |
| -use super::super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask}; | |
| +use super::super::{F32SimdVec, I32SimdVec, SimdDescriptor, SimdMask, U8SimdVec, U16SimdVec}; | |
| use std::{ | |
| arch::x86_64::*, | |
| mem::MaybeUninit, | |
| @@ -31,6 +31,8 @@ impl SimdDescriptor for Sse42Descriptor { | |
| type F32Vec = F32VecSse42; | |
| type I32Vec = I32VecSse42; | |
| type U32Vec = U32VecSse42; | |
| + type U16Vec = U16VecSse42; | |
| + type U8Vec = U8VecSse42; | |
| type Mask = MaskSse42; | |
| type Bf16Table8 = Bf16Table8Sse42; | |
| @@ -131,7 +133,7 @@ unsafe impl F32SimdVec for F32VecSse42 { | |
| let hi = _mm_unpackhi_ps(a, b); | |
| // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. | |
| unsafe { | |
| - let dest_ptr = dest.as_mut_ptr() as *mut f32; | |
| + let dest_ptr = dest.as_mut_ptr().cast::<f32>(); | |
| _mm_storeu_ps(dest_ptr, lo); | |
| _mm_storeu_ps(dest_ptr.add(4), hi); | |
| } | |
| @@ -184,7 +186,7 @@ unsafe impl F32SimdVec for F32VecSse42 { | |
| // Store the results | |
| // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. | |
| unsafe { | |
| - let dest_ptr = dest.as_mut_ptr() as *mut f32; | |
| + let dest_ptr = dest.as_mut_ptr().cast::<f32>(); | |
| _mm_storeu_ps(dest_ptr, out0); | |
| _mm_storeu_ps(dest_ptr.add(4), out1); | |
| _mm_storeu_ps(dest_ptr.add(8), out2); | |
| @@ -227,7 +229,7 @@ unsafe impl F32SimdVec for F32VecSse42 { | |
| // SAFETY: `dest` has enough space and writing to `MaybeUninit<f32>` through `*mut f32` is valid. | |
| unsafe { | |
| - let dest_ptr = dest.as_mut_ptr() as *mut f32; | |
| + let dest_ptr = dest.as_mut_ptr().cast::<f32>(); | |
| _mm_storeu_ps(dest_ptr, out0); | |
| _mm_storeu_ps(dest_ptr.add(4), out1); | |
| _mm_storeu_ps(dest_ptr.add(8), out2); | |
| @@ -575,10 +577,15 @@ unsafe impl F32SimdVec for F32VecSse42 { | |
| let u16s = _mm_packus_epi32(i32s, i32s); | |
| let u8s = _mm_packus_epi16(u16s, u16s); | |
| // Store lower 4 bytes | |
| - // SAFETY: we checked dest has enough space | |
| + let val = _mm_cvtsi128_si32(u8s); | |
| + let bytes = val.to_ne_bytes(); | |
| + // SAFETY: | |
| + // 1. `src` (bytes.as_ptr()) is valid for 4 bytes as it is a local [u8; 4]. | |
| + // 2. `dst` (dest.as_mut_ptr()) is valid for 4 bytes because dest.len() >= 4. | |
| + // 3. `src` and `dst` are properly aligned for u8 (alignment 1). | |
| + // 4. `src` and `dst` do not overlap as `src` is a local stack array. | |
| unsafe { | |
| - let ptr = dest.as_mut_ptr() as *mut i32; | |
| - *ptr = _mm_cvtsi128_si32(u8s); | |
| + std::ptr::copy_nonoverlapping(bytes.as_ptr(), dest.as_mut_ptr().cast::<u8>(), 4); | |
| } | |
| } | |
| // SAFETY: sse4.2 is available from the safety invariant on the descriptor. | |
| @@ -598,9 +605,15 @@ unsafe impl F32SimdVec for F32VecSse42 { | |
| // Pack i32 -> u16 (use same vector twice, take lower half) | |
| let u16s = _mm_packus_epi32(i32s, i32s); | |
| // Store lower 8 bytes (4 u16s) | |
| - // SAFETY: we checked dest has enough space | |
| + let val = _mm_cvtsi128_si64(u16s); | |
| + let bytes = val.to_ne_bytes(); | |
| + // SAFETY: | |
| + // 1. `src` (bytes.as_ptr()) is valid for 8 bytes as it is a local [u8; 8]. | |
| + // 2. `dst` (dest.as_mut_ptr()) is valid for 8 bytes because dest.len() >= 4 and each element is 2 bytes. | |
| + // 3. `src` and `dst` are properly aligned for u8 (alignment 1). | |
| + // 4. `src` and `dst` do not overlap as `src` is a local stack array. | |
| unsafe { | |
| - _mm_storel_epi64(dest.as_mut_ptr() as *mut __m128i, u16s); | |
| + std::ptr::copy_nonoverlapping(bytes.as_ptr(), dest.as_mut_ptr().cast::<u8>(), 8); | |
| } | |
| } | |
| // SAFETY: sse4.2 is available from the safety invariant on the descriptor. | |
| @@ -732,7 +745,7 @@ impl I32SimdVec for I32VecSse42 { | |
| assert!(mem.len() >= Self::LEN); | |
| // SAFETY: we just checked that `mem` has enough space. Moreover, we know sse4.2 is available | |
| // from the safety invariant on `d`. | |
| - Self(unsafe { _mm_loadu_si128(mem.as_ptr() as *const _) }, d) | |
| + Self(unsafe { _mm_loadu_si128(mem.as_ptr().cast()) }, d) | |
| } | |
| #[inline(always)] | |
| @@ -820,17 +833,50 @@ impl I32SimdVec for I32VecSse42 { | |
| #[inline] | |
| fn store_u16_impl(v: __m128i, dest: &mut [u16]) { | |
| assert!(dest.len() >= I32VecSse42::LEN); | |
| - // Use scalar loop since _mm_packs_epi32 would saturate incorrectly for unsigned values | |
| - let mut tmp = [0i32; 4]; | |
| - // SAFETY: tmp has 4 elements, matching LEN | |
| - unsafe { _mm_storeu_si128(tmp.as_mut_ptr() as *mut __m128i, v) }; | |
| - for i in 0..4 { | |
| - dest[i] = tmp[i] as u16; | |
| + // Truncate i32 -> u16 using shuffle | |
| + let shuffle_mask = | |
| + _mm_setr_epi8(0, 1, 4, 5, 8, 9, 12, 13, -1, -1, -1, -1, -1, -1, -1, -1); | |
| + let u16s = _mm_shuffle_epi8(v, shuffle_mask); | |
| + let val = _mm_cvtsi128_si64(u16s); | |
| + let bytes = val.to_ne_bytes(); | |
| + // SAFETY: | |
| + // 1. `src` (bytes.as_ptr()) is valid for 8 bytes as it is a local [u8; 8]. | |
| + // 2. `dst` (dest.as_mut_ptr()) is valid for 8 bytes because dest.len() >= 4 and each element is 2 bytes. | |
| + // 3. `src` and `dst` are properly aligned for u8 (alignment 1). | |
| + // 4. `src` and `dst` do not overlap as `src` is a local stack array. | |
| + unsafe { | |
| + std::ptr::copy_nonoverlapping(bytes.as_ptr(), dest.as_mut_ptr().cast::<u8>(), 8); | |
| } | |
| } | |
| // SAFETY: sse4.2 is available from the safety invariant on the descriptor. | |
| unsafe { store_u16_impl(self.0, dest) } | |
| } | |
| + | |
| + #[inline(always)] | |
| + fn store_u8(self, dest: &mut [u8]) { | |
| + #[target_feature(enable = "sse4.2")] | |
| + #[inline] | |
| + fn store_u8_impl(v: __m128i, dest: &mut [u8]) { | |
| + assert!(dest.len() >= I32VecSse42::LEN); | |
| + // Truncate i32 -> u8 using shuffle | |
| + let shuffle_mask = | |
| + _mm_setr_epi8(0, 4, 8, 12, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); | |
| + let u8s = _mm_shuffle_epi8(v, shuffle_mask); | |
| + // Store lower 4 bytes | |
| + let val = _mm_cvtsi128_si32(u8s); | |
| + let bytes = val.to_ne_bytes(); | |
| + // SAFETY: | |
| + // 1. `src` (bytes.as_ptr()) is valid for 4 bytes as it is a local [u8; 4]. | |
| + // 2. `dst` (dest.as_mut_ptr()) is valid for 4 bytes because dest.len() >= 4. | |
| + // 3. `src` and `dst` are properly aligned for u8 (alignment 1). | |
| + // 4. `src` and `dst` do not overlap as `src` is a local stack array. | |
| + unsafe { | |
| + std::ptr::copy_nonoverlapping(bytes.as_ptr(), dest.as_mut_ptr().cast::<u8>(), 4); | |
| + } | |
| + } | |
| + // SAFETY: sse4.2 is available from the safety invariant on the descriptor. | |
| + unsafe { store_u8_impl(self.0, dest) } | |
| + } | |
| } | |
| impl Add<I32VecSse42> for I32VecSse42 { | |
| @@ -939,6 +985,312 @@ impl U32SimdVec for U32VecSse42 { | |
| } | |
| } | |
| +#[derive(Clone, Copy, Debug)] | |
| +#[repr(transparent)] | |
| +pub struct U8VecSse42(__m128i, Sse42Descriptor); | |
| + | |
| +// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*) | |
| +// ensure that they write valid data to the output slice without reading uninitialized memory. | |
| +unsafe impl U8SimdVec for U8VecSse42 { | |
| + type Descriptor = Sse42Descriptor; | |
| + const LEN: usize = 16; | |
| + | |
| + #[inline(always)] | |
| + fn load(d: Self::Descriptor, mem: &[u8]) -> Self { | |
| + assert!(mem.len() >= Self::LEN); | |
| + // SAFETY: we just checked that `mem` has enough space. Moreover, we know sse4.2 is available | |
| + // from the safety invariant on `d`. | |
| + unsafe { Self(_mm_loadu_si128(mem.as_ptr().cast()), d) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn splat(d: Self::Descriptor, v: u8) -> Self { | |
| + // SAFETY: We know sse4.2 is available from the safety invariant on `d`. | |
| + unsafe { Self(_mm_set1_epi8(v as i8), d) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store(&self, mem: &mut [u8]) { | |
| + assert!(mem.len() >= Self::LEN); | |
| + // SAFETY: we just checked that `mem` has enough space. Moreover, we know sse4.2 is available | |
| + // from the safety invariant on `self.1`. | |
| + unsafe { _mm_storeu_si128(mem.as_mut_ptr().cast(), self.0) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u8>]) { | |
| + #[target_feature(enable = "sse4.2")] | |
| + #[inline] | |
| + fn store_interleaved_2_impl(a: __m128i, b: __m128i, dest: &mut [MaybeUninit<u8>]) { | |
| + assert!(dest.len() >= 2 * U8VecSse42::LEN); | |
| + let lo = _mm_unpacklo_epi8(a, b); | |
| + let hi = _mm_unpackhi_epi8(a, b); | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m128i` is valid. | |
| + unsafe { | |
| + let dest_ptr = dest.as_mut_ptr().cast::<__m128i>(); | |
| + _mm_storeu_si128(dest_ptr, lo); | |
| + _mm_storeu_si128(dest_ptr.add(1), hi); | |
| + } | |
| + } | |
| + // SAFETY: sse4.2 is available from the safety invariant on the descriptor. | |
| + unsafe { store_interleaved_2_impl(a.0, b.0, dest) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u8>]) { | |
| + #[target_feature(enable = "sse4.2")] | |
| + #[inline] | |
| + fn store_interleaved_3_impl( | |
| + a: __m128i, | |
| + b: __m128i, | |
| + c: __m128i, | |
| + dest: &mut [MaybeUninit<u8>], | |
| + ) { | |
| + assert!(dest.len() >= 3 * U8VecSse42::LEN); | |
| + | |
| + // Masks for out0 | |
| + let mask_a0 = _mm_setr_epi8(0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1, 5); | |
| + let mask_b0 = _mm_setr_epi8(-1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1, -1); | |
| + let mask_c0 = _mm_setr_epi8(-1, -1, 0, -1, -1, 1, -1, -1, 2, -1, -1, 3, -1, -1, 4, -1); | |
| + | |
| + // Masks for out1 | |
| + let mask_a1 = _mm_setr_epi8(-1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10, -1); | |
| + let mask_b1 = _mm_setr_epi8(5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1, 10); | |
| + let mask_c1 = _mm_setr_epi8(-1, 5, -1, -1, 6, -1, -1, 7, -1, -1, 8, -1, -1, 9, -1, -1); | |
| + | |
| + // Masks for out2 | |
| + let mask_a2 = _mm_setr_epi8( | |
| + -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, -1, | |
| + ); | |
| + let mask_b2 = _mm_setr_epi8( | |
| + -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, -1, | |
| + ); | |
| + let mask_c2 = _mm_setr_epi8( | |
| + 10, -1, -1, 11, -1, -1, 12, -1, -1, 13, -1, -1, 14, -1, -1, 15, | |
| + ); | |
| + | |
| + let out0 = _mm_or_si128( | |
| + _mm_or_si128(_mm_shuffle_epi8(a, mask_a0), _mm_shuffle_epi8(b, mask_b0)), | |
| + _mm_shuffle_epi8(c, mask_c0), | |
| + ); | |
| + let out1 = _mm_or_si128( | |
| + _mm_or_si128(_mm_shuffle_epi8(a, mask_a1), _mm_shuffle_epi8(b, mask_b1)), | |
| + _mm_shuffle_epi8(c, mask_c1), | |
| + ); | |
| + let out2 = _mm_or_si128( | |
| + _mm_or_si128(_mm_shuffle_epi8(a, mask_a2), _mm_shuffle_epi8(b, mask_b2)), | |
| + _mm_shuffle_epi8(c, mask_c2), | |
| + ); | |
| + | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m128i` is valid. | |
| + unsafe { | |
| + let ptr = dest.as_mut_ptr().cast::<__m128i>(); | |
| + _mm_storeu_si128(ptr, out0); | |
| + _mm_storeu_si128(ptr.add(1), out1); | |
| + _mm_storeu_si128(ptr.add(2), out2); | |
| + } | |
| + } | |
| + // SAFETY: sse4.2 is available from the safety invariant on the descriptor. | |
| + unsafe { store_interleaved_3_impl(a.0, b.0, c.0, dest) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_4_uninit( | |
| + a: Self, | |
| + b: Self, | |
| + c: Self, | |
| + d: Self, | |
| + dest: &mut [MaybeUninit<u8>], | |
| + ) { | |
| + #[target_feature(enable = "sse4.2")] | |
| + #[inline] | |
| + fn store_interleaved_4_impl( | |
| + a: __m128i, | |
| + b: __m128i, | |
| + c: __m128i, | |
| + d: __m128i, | |
| + dest: &mut [MaybeUninit<u8>], | |
| + ) { | |
| + assert!(dest.len() >= 4 * U8VecSse42::LEN); | |
| + // First interleave pairs: ab and cd | |
| + let ab_lo = _mm_unpacklo_epi8(a, b); | |
| + let ab_hi = _mm_unpackhi_epi8(a, b); | |
| + let cd_lo = _mm_unpacklo_epi8(c, d); | |
| + let cd_hi = _mm_unpackhi_epi8(c, d); | |
| + | |
| + // Then interleave the pairs to get final layout | |
| + let out0 = _mm_unpacklo_epi16(ab_lo, cd_lo); | |
| + let out1 = _mm_unpackhi_epi16(ab_lo, cd_lo); | |
| + let out2 = _mm_unpacklo_epi16(ab_hi, cd_hi); | |
| + let out3 = _mm_unpackhi_epi16(ab_hi, cd_hi); | |
| + | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<u8>` through `*mut __m128i` is valid. | |
| + unsafe { | |
| + let dest_ptr = dest.as_mut_ptr().cast::<__m128i>(); | |
| + _mm_storeu_si128(dest_ptr, out0); | |
| + _mm_storeu_si128(dest_ptr.add(1), out1); | |
| + _mm_storeu_si128(dest_ptr.add(2), out2); | |
| + _mm_storeu_si128(dest_ptr.add(3), out3); | |
| + } | |
| + } | |
| + // SAFETY: sse4.2 is available from the safety invariant on the descriptor. | |
| + unsafe { store_interleaved_4_impl(a.0, b.0, c.0, d.0, dest) } | |
| + } | |
| +} | |
| + | |
| +#[derive(Clone, Copy, Debug)] | |
| +#[repr(transparent)] | |
| +pub struct U16VecSse42(__m128i, Sse42Descriptor); | |
| + | |
| +// SAFETY: The methods in this implementation that write to `MaybeUninit` (store_interleaved_*) | |
| +// ensure that they write valid data to the output slice without reading uninitialized memory. | |
| +unsafe impl U16SimdVec for U16VecSse42 { | |
| + type Descriptor = Sse42Descriptor; | |
| + const LEN: usize = 8; | |
| + | |
| + #[inline(always)] | |
| + fn load(d: Self::Descriptor, mem: &[u16]) -> Self { | |
| + assert!(mem.len() >= Self::LEN); | |
| + // SAFETY: we just checked that `mem` has enough space. Moreover, we know sse4.2 is available | |
| + // from the safety invariant on `d`. | |
| + unsafe { Self(_mm_loadu_si128(mem.as_ptr().cast()), d) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn splat(d: Self::Descriptor, v: u16) -> Self { | |
| + // SAFETY: We know sse4.2 is available from the safety invariant on `d`. | |
| + unsafe { Self(_mm_set1_epi16(v as i16), d) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store(&self, mem: &mut [u16]) { | |
| + assert!(mem.len() >= Self::LEN); | |
| + // SAFETY: we just checked that `mem` has enough space. Moreover, we know sse4.2 is available | |
| + // from the safety invariant on `self.1`. | |
| + unsafe { _mm_storeu_si128(mem.as_mut_ptr().cast(), self.0) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_2_uninit(a: Self, b: Self, dest: &mut [MaybeUninit<u16>]) { | |
| + #[target_feature(enable = "sse4.2")] | |
| + #[inline] | |
| + fn store_interleaved_2_impl(a: __m128i, b: __m128i, dest: &mut [MaybeUninit<u16>]) { | |
| + assert!(dest.len() >= 2 * U16VecSse42::LEN); | |
| + let lo = _mm_unpacklo_epi16(a, b); | |
| + let hi = _mm_unpackhi_epi16(a, b); | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m128i` is valid. | |
| + unsafe { | |
| + let dest_ptr = dest.as_mut_ptr().cast::<__m128i>(); | |
| + _mm_storeu_si128(dest_ptr, lo); | |
| + _mm_storeu_si128(dest_ptr.add(1), hi); | |
| + } | |
| + } | |
| + // SAFETY: sse4.2 is available from the safety invariant on the descriptor. | |
| + unsafe { store_interleaved_2_impl(a.0, b.0, dest) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_3_uninit(a: Self, b: Self, c: Self, dest: &mut [MaybeUninit<u16>]) { | |
| + #[target_feature(enable = "sse4.2")] | |
| + #[inline] | |
| + fn store_interleaved_3_impl( | |
| + a: __m128i, | |
| + b: __m128i, | |
| + c: __m128i, | |
| + dest: &mut [MaybeUninit<u16>], | |
| + ) { | |
| + assert!(dest.len() >= 3 * U16VecSse42::LEN); | |
| + | |
| + // Masks for out0 | |
| + let mask_a0 = _mm_setr_epi8(0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5, -1, -1); | |
| + let mask_b0 = _mm_setr_epi8(-1, -1, 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1, 4, 5); | |
| + let mask_c0 = _mm_setr_epi8(-1, -1, -1, -1, 0, 1, -1, -1, -1, -1, 2, 3, -1, -1, -1, -1); | |
| + | |
| + // Masks for out1 | |
| + let mask_a1 = _mm_setr_epi8(-1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1, 10, 11); | |
| + let mask_b1 = _mm_setr_epi8(-1, -1, -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1, -1, -1); | |
| + let mask_c1 = _mm_setr_epi8(4, 5, -1, -1, -1, -1, 6, 7, -1, -1, -1, -1, 8, 9, -1, -1); | |
| + | |
| + // Masks for out2 | |
| + let mask_a2 = _mm_setr_epi8( | |
| + -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1, -1, -1, | |
| + ); | |
| + let mask_b2 = _mm_setr_epi8( | |
| + 10, 11, -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, -1, -1, | |
| + ); | |
| + let mask_c2 = _mm_setr_epi8( | |
| + -1, -1, 10, 11, -1, -1, -1, -1, 12, 13, -1, -1, -1, -1, 14, 15, | |
| + ); | |
| + | |
| + let out0 = _mm_or_si128( | |
| + _mm_or_si128(_mm_shuffle_epi8(a, mask_a0), _mm_shuffle_epi8(b, mask_b0)), | |
| + _mm_shuffle_epi8(c, mask_c0), | |
| + ); | |
| + let out1 = _mm_or_si128( | |
| + _mm_or_si128(_mm_shuffle_epi8(a, mask_a1), _mm_shuffle_epi8(b, mask_b1)), | |
| + _mm_shuffle_epi8(c, mask_c1), | |
| + ); | |
| + let out2 = _mm_or_si128( | |
| + _mm_or_si128(_mm_shuffle_epi8(a, mask_a2), _mm_shuffle_epi8(b, mask_b2)), | |
| + _mm_shuffle_epi8(c, mask_c2), | |
| + ); | |
| + | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m128i` is valid. | |
| + unsafe { | |
| + let ptr = dest.as_mut_ptr().cast::<__m128i>(); | |
| + _mm_storeu_si128(ptr, out0); | |
| + _mm_storeu_si128(ptr.add(1), out1); | |
| + _mm_storeu_si128(ptr.add(2), out2); | |
| + } | |
| + } | |
| + // SAFETY: sse4.2 is available from the safety invariant on the descriptor. | |
| + unsafe { store_interleaved_3_impl(a.0, b.0, c.0, dest) } | |
| + } | |
| + | |
| + #[inline(always)] | |
| + fn store_interleaved_4_uninit( | |
| + a: Self, | |
| + b: Self, | |
| + c: Self, | |
| + d: Self, | |
| + dest: &mut [MaybeUninit<u16>], | |
| + ) { | |
| + #[target_feature(enable = "sse4.2")] | |
| + #[inline] | |
| + fn store_interleaved_4_impl( | |
| + a: __m128i, | |
| + b: __m128i, | |
| + c: __m128i, | |
| + d: __m128i, | |
| + dest: &mut [MaybeUninit<u16>], | |
| + ) { | |
| + assert!(dest.len() >= 4 * U16VecSse42::LEN); | |
| + // First interleave pairs: ab and cd | |
| + let ab_lo = _mm_unpacklo_epi16(a, b); | |
| + let ab_hi = _mm_unpackhi_epi16(a, b); | |
| + let cd_lo = _mm_unpacklo_epi16(c, d); | |
| + let cd_hi = _mm_unpackhi_epi16(c, d); | |
| + | |
| + // Then interleave the pairs to get final layout | |
| + let out0 = _mm_unpacklo_epi32(ab_lo, cd_lo); | |
| + let out1 = _mm_unpackhi_epi32(ab_lo, cd_lo); | |
| + let out2 = _mm_unpacklo_epi32(ab_hi, cd_hi); | |
| + let out3 = _mm_unpackhi_epi32(ab_hi, cd_hi); | |
| + | |
| + // SAFETY: `dest` has enough space and writing to `MaybeUninit<u16>` through `*mut __m128i` is valid. | |
| + unsafe { | |
| + let dest_ptr = dest.as_mut_ptr().cast::<__m128i>(); | |
| + _mm_storeu_si128(dest_ptr, out0); | |
| + _mm_storeu_si128(dest_ptr.add(1), out1); | |
| + _mm_storeu_si128(dest_ptr.add(2), out2); | |
| + _mm_storeu_si128(dest_ptr.add(3), out3); | |
| + } | |
| + } | |
| + // SAFETY: sse4.2 is available from the safety invariant on the descriptor. | |
| + unsafe { store_interleaved_4_impl(a.0, b.0, c.0, d.0, dest) } | |
| + } | |
| +} | |
| + | |
| impl SimdMask for MaskSse42 { | |
| type Descriptor = Sse42Descriptor; | |
| diff --git a/third_party/rust/jxl/v0_3/BUILD.gn b/third_party/rust/jxl/v0_3/BUILD.gn | |
| index 4407346d077db..87612e3a76cfa 100644 | |
| --- a/third_party/rust/jxl/v0_3/BUILD.gn | |
| +++ b/third_party/rust/jxl/v0_3/BUILD.gn | |
| @@ -28,11 +28,13 @@ cargo_crate("lib") { | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/mod.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/options.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/signature.rs", | |
| + "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/api/xyb_constants.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/bit_reader.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/color/mod.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/color/tf.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/box_header.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/mod.rs", | |
| + "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/frame_index.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/container/parse.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/ans.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/entropy_coding/context_map.rs", | |
| @@ -53,6 +55,7 @@ cargo_crate("lib") { | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/color_correlation_map.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/decode.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/group.rs", | |
| + "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/lf_preview.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/mod.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/borrowed_buffers.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/frame/modular/decode/bitstream.rs", | |
| @@ -101,6 +104,7 @@ cargo_crate("lib") { | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/channels.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/internal.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/helpers.rs", | |
| + "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/group_scheduler.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/mod.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/render_group.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/render/low_memory_pipeline/row_buffers.rs", | |
| @@ -148,6 +152,7 @@ cargo_crate("lib") { | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/float16.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/linalg.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/log2.rs", | |
| + "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mirror.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/mod.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/ndarray.rs", | |
| "//third_party/rust/chromium_crates_io/vendor/jxl-v0_3/src/util/rational_poly.rs", | |
| diff --git a/third_party/rust/jxl/v0_3/wrapper/lib.rs b/third_party/rust/jxl/v0_3/wrapper/lib.rs | |
| index 0e7e83dc6f1d5..66696c1235d80 100644 | |
| --- a/third_party/rust/jxl/v0_3/wrapper/lib.rs | |
| +++ b/third_party/rust/jxl/v0_3/wrapper/lib.rs | |
| @@ -4,8 +4,12 @@ | |
| //! Minimal C++ wrapper for jxl-rs decoder. | |
| //! | |
| -//! This thin wrapper provides C++-compatible types for the jxl-rs decoder. | |
| -//! State tracking is handled by the C++ caller (JXLImageDecoder). | |
| +//! Two decoder types are exposed: | |
| +//! | |
| +//! - `JxlRsFrameScanner`: lightweight frame-header-only scanner that discovers | |
| +//! frame count, durations, and seek offsets without decoding any pixels. | |
| +//! - `JxlRsDecoder`: full pixel decoder with the original state-machine API, | |
| +//! plus new seeking and progressive flush support. | |
| use jxl::api::{ | |
| check_signature, Endianness, JxlBasicInfo, JxlColorEncoding, JxlColorProfile, JxlColorType, | |
| @@ -62,15 +66,60 @@ mod ffi { | |
| bytes_consumed: usize, | |
| } | |
| + /// Information about a single visible frame discovered by the scanner. | |
| + #[derive(Debug, Clone)] | |
| + struct JxlRsVisibleFrameInfo { | |
| + /// Duration in milliseconds. | |
| + duration_ms: f64, | |
| + /// Whether this frame can be decoded independently (no dependencies). | |
| + is_keyframe: bool, | |
| + /// Whether this is the last frame in the codestream. | |
| + is_last: bool, | |
| + /// File byte offset to start feeding input from when seeking. | |
| + decode_start_file_offset: usize, | |
| + /// Box parser state at seek point (for container-wrapped files). | |
| + remaining_in_box: u64, | |
| + /// Number of visible frames to skip after seeking before decoding | |
| + /// the target. | |
| + visible_frames_to_skip: usize, | |
| + } | |
| + | |
| extern "Rust" { | |
| + // ---- Frame scanner (lightweight, no pixel decoding) ---- | |
| + type JxlRsFrameScanner; | |
| + | |
| + fn jxl_rs_frame_scanner_create(pixel_limit: u64) -> Box<JxlRsFrameScanner>; | |
| + | |
| + /// Feed data to the scanner. Returns Success when all frames have been | |
| + /// scanned (is_last seen), NeedMoreInput if more data is needed, or | |
| + /// Error on failure. | |
| + fn feed( | |
| + self: &mut JxlRsFrameScanner, | |
| + data: &[u8], | |
| + all_input: bool, | |
| + ) -> JxlRsProcessResult; | |
| + | |
| + /// Get basic info (valid after first successful feed). | |
| + fn get_basic_info(self: &JxlRsFrameScanner) -> JxlRsBasicInfo; | |
| + | |
| + /// Get ICC profile data. | |
| + fn get_icc_profile(self: &JxlRsFrameScanner) -> &[u8]; | |
| + | |
| + /// Number of visible frames discovered so far. | |
| + fn frame_count(self: &JxlRsFrameScanner) -> usize; | |
| + | |
| + /// Get info for a specific frame index. | |
| + fn get_frame_info(self: &JxlRsFrameScanner, index: usize) -> JxlRsVisibleFrameInfo; | |
| + | |
| + /// Whether basic info has been parsed. | |
| + fn has_basic_info(self: &JxlRsFrameScanner) -> bool; | |
| + | |
| + // ---- Full pixel decoder ---- | |
| type JxlRsDecoder; | |
| fn jxl_rs_decoder_create(pixel_limit: u64, premultiply_alpha: bool) -> Box<JxlRsDecoder>; | |
| fn jxl_rs_signature_check(data: &[u8]) -> bool; | |
| - /// Rewind decoder for animation loop replay. | |
| - fn rewind(self: &mut JxlRsDecoder); | |
| - | |
| /// Set the output pixel format. Must be called after getting basic info. | |
| fn set_pixel_format( | |
| self: &mut JxlRsDecoder, | |
| @@ -85,53 +134,198 @@ mod ffi { | |
| all_input: bool, | |
| ) -> JxlRsProcessResult; | |
| - /// Parse until next frame header is available. Returns Success if no more frames. | |
| + /// Parse until next frame header is available. | |
| fn parse_frame_header( | |
| self: &mut JxlRsDecoder, | |
| data: &[u8], | |
| all_input: bool, | |
| ) -> JxlRsProcessResult; | |
| - /// Decode frame pixels into the provided buffer. | |
| - fn decode_frame( | |
| + /// Decode frame pixels with custom stride (for direct frame buffer | |
| + /// decoding). | |
| + fn decode_frame_with_stride( | |
| self: &mut JxlRsDecoder, | |
| data: &[u8], | |
| all_input: bool, | |
| buffer: &mut [u8], | |
| width: u32, | |
| height: u32, | |
| + row_stride: usize, | |
| ) -> JxlRsProcessResult; | |
| - /// Decode frame pixels with custom stride (for direct frame buffer decoding). | |
| - fn decode_frame_with_stride( | |
| + /// Flush whatever pixels have been decoded so far into the buffer. | |
| + /// Use for progressive rendering. | |
| + fn flush_pixels( | |
| self: &mut JxlRsDecoder, | |
| - data: &[u8], | |
| - all_input: bool, | |
| buffer: &mut [u8], | |
| width: u32, | |
| height: u32, | |
| row_stride: usize, | |
| ) -> JxlRsProcessResult; | |
| - /// Get basic info (valid after parse_basic_info succeeds, or a decode | |
| - /// call that yields BasicInfo). | |
| + /// Get basic info (valid after parse_basic_info succeeds). | |
| fn get_basic_info(self: &JxlRsDecoder) -> JxlRsBasicInfo; | |
| /// Get frame header (valid after parse_frame_header succeeds). | |
| fn get_frame_header(self: &JxlRsDecoder) -> JxlRsFrameHeader; | |
| /// Get ICC profile data (valid after parse_basic_info succeeds). | |
| - /// Returns an empty slice if no embedded ICC profile exists. | |
| fn get_icc_profile(self: &JxlRsDecoder) -> &[u8]; | |
| /// Check if more frames are available. | |
| fn has_more_frames(self: &JxlRsDecoder) -> bool; | |
| + | |
| + /// Seek the decoder to a specific frame using offsets from the scanner. | |
| + /// After calling this, provide input starting from | |
| + /// decode_start_file_offset. The decoder must have basic info parsed. | |
| + fn seek_to_frame( | |
| + self: &mut JxlRsDecoder, | |
| + remaining_in_box: u64, | |
| + ); | |
| + | |
| + /// Skip N visible frames without decoding pixels. | |
| + /// Use after seek_to_frame when visible_frames_to_skip > 0. | |
| + /// Returns Success when one frame has been skipped, NeedMoreInput or | |
| + /// Error otherwise. | |
| + fn skip_visible_frame( | |
| + self: &mut JxlRsDecoder, | |
| + data: &[u8], | |
| + all_input: bool, | |
| + ) -> JxlRsProcessResult; | |
| } | |
| } | |
| use ffi::*; | |
| -/// Thin wrapper around JxlDecoderInner. | |
| +// --------------------------------------------------------------------------- | |
| +// Frame Scanner | |
| +// --------------------------------------------------------------------------- | |
| + | |
| +/// Lightweight scanner that discovers frame info without decoding pixels. | |
| +pub struct JxlRsFrameScanner { | |
| + decoder: JxlDecoderInner, | |
| + icc_profile: Vec<u8>, | |
| + has_basic_info: bool, | |
| +} | |
| + | |
| +fn jxl_rs_frame_scanner_create(pixel_limit: u64) -> Box<JxlRsFrameScanner> { | |
| + let mut opts = JxlDecoderOptions::default(); | |
| + opts.scan_frames_only = true; | |
| + if pixel_limit > 0 { | |
| + opts.pixel_limit = Some(pixel_limit as usize); | |
| + } | |
| + | |
| + Box::new(JxlRsFrameScanner { | |
| + decoder: JxlDecoderInner::new(opts), | |
| + icc_profile: Vec::new(), | |
| + has_basic_info: false, | |
| + }) | |
| +} | |
| + | |
| +impl JxlRsFrameScanner { | |
| + fn feed(&mut self, data: &[u8], all_input: bool) -> JxlRsProcessResult { | |
| + let mut input = data; | |
| + let len_before = input.len(); | |
| + | |
| + loop { | |
| + match self.decoder.process(&mut input, None) { | |
| + Ok(ProcessingResult::Complete { .. }) => { | |
| + if !self.has_basic_info && self.decoder.basic_info().is_some() { | |
| + self.has_basic_info = true; | |
| + if let Some(profile) = self.decoder.output_color_profile() { | |
| + if let Some(icc) = profile.try_as_icc() { | |
| + if !icc.is_empty() { | |
| + self.icc_profile = icc.into_owned(); | |
| + } | |
| + } | |
| + } | |
| + } | |
| + | |
| + if !self.decoder.has_more_frames() { | |
| + return JxlRsProcessResult { | |
| + status: JxlRsStatus::Success, | |
| + bytes_consumed: len_before - input.len(), | |
| + }; | |
| + } | |
| + } | |
| + Ok(ProcessingResult::NeedsMoreInput { .. }) => { | |
| + return JxlRsProcessResult { | |
| + status: if all_input { | |
| + JxlRsStatus::Error | |
| + } else { | |
| + JxlRsStatus::NeedMoreInput | |
| + }, | |
| + bytes_consumed: len_before - input.len(), | |
| + }; | |
| + } | |
| + Err(_) => { | |
| + return JxlRsProcessResult { | |
| + status: JxlRsStatus::Error, | |
| + bytes_consumed: 0, | |
| + }; | |
| + } | |
| + } | |
| + } | |
| + } | |
| + | |
| + fn get_basic_info(&self) -> JxlRsBasicInfo { | |
| + let mut info = self | |
| + .decoder | |
| + .basic_info() | |
| + .map(JxlRsBasicInfo::from) | |
| + .unwrap_or_default(); | |
| + | |
| + if let Some(profile) = self.decoder.embedded_color_profile() { | |
| + info.is_grayscale = matches!( | |
| + profile, | |
| + JxlColorProfile::Simple(JxlColorEncoding::GrayscaleColorSpace { .. }) | |
| + ); | |
| + } | |
| + | |
| + info | |
| + } | |
| + | |
| + fn get_icc_profile(&self) -> &[u8] { | |
| + &self.icc_profile | |
| + } | |
| + | |
| + fn frame_count(&self) -> usize { | |
| + self.decoder.scanned_frames().len() | |
| + } | |
| + | |
| + fn get_frame_info(&self, index: usize) -> JxlRsVisibleFrameInfo { | |
| + let frames = self.decoder.scanned_frames(); | |
| + if index >= frames.len() { | |
| + return JxlRsVisibleFrameInfo { | |
| + duration_ms: 0.0, | |
| + is_keyframe: false, | |
| + is_last: false, | |
| + decode_start_file_offset: 0, | |
| + remaining_in_box: 0, | |
| + visible_frames_to_skip: 0, | |
| + }; | |
| + } | |
| + let f = &frames[index]; | |
| + JxlRsVisibleFrameInfo { | |
| + duration_ms: f.duration_ms, | |
| + is_keyframe: f.is_keyframe, | |
| + is_last: f.is_last, | |
| + decode_start_file_offset: f.seek_target.decode_start_file_offset, | |
| + remaining_in_box: f.seek_target.remaining_in_box, | |
| + visible_frames_to_skip: f.seek_target.visible_frames_to_skip, | |
| + } | |
| + } | |
| + | |
| + fn has_basic_info(&self) -> bool { | |
| + self.has_basic_info | |
| + } | |
| +} | |
| + | |
| +// --------------------------------------------------------------------------- | |
| +// Full Pixel Decoder | |
| +// --------------------------------------------------------------------------- | |
| + | |
| +/// Full pixel decoder with seeking and progressive flush support. | |
| pub struct JxlRsDecoder { | |
| decoder: JxlDecoderInner, | |
| pixel_format: Option<JxlPixelFormat>, | |
| @@ -140,7 +334,7 @@ pub struct JxlRsDecoder { | |
| fn jxl_rs_decoder_create(pixel_limit: u64, premultiply_alpha: bool) -> Box<JxlRsDecoder> { | |
| let mut opts = JxlDecoderOptions::default(); | |
| - opts.progressive_mode = JxlProgressiveMode::FullFrame; | |
| + opts.progressive_mode = JxlProgressiveMode::Pass; | |
| opts.premultiply_output = premultiply_alpha; | |
| if pixel_limit > 0 { | |
| opts.pixel_limit = Some(pixel_limit as usize); | |
| @@ -162,10 +356,6 @@ fn jxl_rs_signature_check(data: &[u8]) -> bool { | |
| } | |
| impl JxlRsDecoder { | |
| - fn rewind(&mut self) { | |
| - let _ = self.decoder.rewind(); | |
| - } | |
| - | |
| fn set_pixel_format(&mut self, format: JxlRsPixelFormat, num_extra_channels: u32) { | |
| let pixel_format = match format { | |
| JxlRsPixelFormat::Rgba8 => JxlPixelFormat { | |
| @@ -216,9 +406,6 @@ impl JxlRsDecoder { | |
| match self.decoder.process(&mut input, None) { | |
| Ok(ProcessingResult::Complete { .. }) => { | |
| - // Extract ICC profile on first successful parse. | |
| - // Use try_as_icc() which returns None on error instead of | |
| - // as_icc() which panics on malformed color profiles. | |
| if self.icc_profile.is_empty() { | |
| if let Some(profile) = self.decoder.output_color_profile() { | |
| if let Some(icc) = profile.try_as_icc() { | |
| @@ -282,22 +469,17 @@ impl JxlRsDecoder { | |
| } | |
| } | |
| - fn extract_frame_header(&self) -> Option<JxlRsFrameHeader> { | |
| - let fh = self.decoder.frame_header()?; | |
| - Some(JxlRsFrameHeader { | |
| - duration_ms: fh.duration.unwrap_or(0.0), | |
| - name_length: fh.name.len() as u32, | |
| - }) | |
| - } | |
| - | |
| - fn decode_frame( | |
| + fn decode_frame_with_stride( | |
| &mut self, | |
| data: &[u8], | |
| all_input: bool, | |
| buffer: &mut [u8], | |
| width: u32, | |
| height: u32, | |
| + row_stride: usize, | |
| ) -> JxlRsProcessResult { | |
| + use std::mem::MaybeUninit; | |
| + | |
| let mut input = data; | |
| let len_before = input.len(); | |
| @@ -308,8 +490,8 @@ impl JxlRsDecoder { | |
| .map(|d| d.bytes_per_sample() * 4) | |
| .unwrap_or(4); | |
| let bytes_per_row = width as usize * bytes_per_pixel; | |
| - let expected_size = bytes_per_row * height as usize; | |
| + let expected_size = row_stride * (height as usize - 1) + bytes_per_row; | |
| if buffer.len() < expected_size { | |
| return JxlRsProcessResult { | |
| status: JxlRsStatus::Error, | |
| @@ -317,7 +499,16 @@ impl JxlRsDecoder { | |
| }; | |
| } | |
| - let output = JxlOutputBuffer::new(buffer, height as usize, bytes_per_row); | |
| + // SAFETY: The buffer is valid for writes, and we've verified it has | |
| + // enough space. | |
| + let output = unsafe { | |
| + JxlOutputBuffer::new_from_ptr( | |
| + buffer.as_mut_ptr() as *mut MaybeUninit<u8>, | |
| + height as usize, | |
| + bytes_per_row, | |
| + row_stride, | |
| + ) | |
| + }; | |
| match self.decoder.process(&mut input, Some(&mut [output])) { | |
| Ok(ProcessingResult::Complete { .. }) => JxlRsProcessResult { | |
| @@ -344,10 +535,8 @@ impl JxlRsDecoder { | |
| } | |
| } | |
| - fn decode_frame_with_stride( | |
| + fn flush_pixels( | |
| &mut self, | |
| - data: &[u8], | |
| - all_input: bool, | |
| buffer: &mut [u8], | |
| width: u32, | |
| height: u32, | |
| @@ -355,9 +544,6 @@ impl JxlRsDecoder { | |
| ) -> JxlRsProcessResult { | |
| use std::mem::MaybeUninit; | |
| - let mut input = data; | |
| - let len_before = input.len(); | |
| - | |
| let bytes_per_pixel = self | |
| .pixel_format | |
| .as_ref() | |
| @@ -366,7 +552,6 @@ impl JxlRsDecoder { | |
| .unwrap_or(4); | |
| let bytes_per_row = width as usize * bytes_per_pixel; | |
| - // Validate buffer size with custom stride | |
| let expected_size = row_stride * (height as usize - 1) + bytes_per_row; | |
| if buffer.len() < expected_size { | |
| return JxlRsProcessResult { | |
| @@ -375,8 +560,6 @@ impl JxlRsDecoder { | |
| }; | |
| } | |
| - // SAFETY: The buffer is valid for writes, and we've verified it has enough space. | |
| - // new_from_ptr allows custom stride (bytes_between_rows). | |
| let output = unsafe { | |
| JxlOutputBuffer::new_from_ptr( | |
| buffer.as_mut_ptr() as *mut MaybeUninit<u8>, | |
| @@ -386,24 +569,11 @@ impl JxlRsDecoder { | |
| ) | |
| }; | |
| - match self.decoder.process(&mut input, Some(&mut [output])) { | |
| - Ok(ProcessingResult::Complete { .. }) => JxlRsProcessResult { | |
| + match self.decoder.flush_pixels(&mut [output]) { | |
| + Ok(()) => JxlRsProcessResult { | |
| status: JxlRsStatus::Success, | |
| - bytes_consumed: len_before - input.len(), | |
| + bytes_consumed: 0, | |
| }, | |
| - Ok(ProcessingResult::NeedsMoreInput { .. }) => { | |
| - if all_input { | |
| - JxlRsProcessResult { | |
| - status: JxlRsStatus::Error, | |
| - bytes_consumed: 0, | |
| - } | |
| - } else { | |
| - JxlRsProcessResult { | |
| - status: JxlRsStatus::NeedMoreInput, | |
| - bytes_consumed: len_before - input.len(), | |
| - } | |
| - } | |
| - } | |
| Err(_) => JxlRsProcessResult { | |
| status: JxlRsStatus::Error, | |
| bytes_consumed: 0, | |
| @@ -418,7 +588,6 @@ impl JxlRsDecoder { | |
| .map(JxlRsBasicInfo::from) | |
| .unwrap_or_default(); | |
| - // Check if the image is grayscale based on the embedded color profile. | |
| if let Some(profile) = self.decoder.embedded_color_profile() { | |
| info.is_grayscale = matches!( | |
| profile, | |
| @@ -429,6 +598,14 @@ impl JxlRsDecoder { | |
| info | |
| } | |
| + fn extract_frame_header(&self) -> Option<JxlRsFrameHeader> { | |
| + let fh = self.decoder.frame_header()?; | |
| + Some(JxlRsFrameHeader { | |
| + duration_ms: fh.duration.unwrap_or(0.0), | |
| + name_length: fh.name.len() as u32, | |
| + }) | |
| + } | |
| + | |
| fn get_frame_header(&self) -> JxlRsFrameHeader { | |
| self.extract_frame_header().unwrap_or_default() | |
| } | |
| @@ -440,6 +617,60 @@ impl JxlRsDecoder { | |
| fn has_more_frames(&self) -> bool { | |
| self.decoder.has_more_frames() | |
| } | |
| + | |
| + fn seek_to_frame(&mut self, remaining_in_box: u64) { | |
| + self.decoder.start_new_frame(remaining_in_box); | |
| + } | |
| + | |
| + fn skip_visible_frame( | |
| + &mut self, | |
| + data: &[u8], | |
| + all_input: bool, | |
| + ) -> JxlRsProcessResult { | |
| + let mut input = data; | |
| + let len_before = input.len(); | |
| + | |
| + // Phase 1: process to get frame header (WithImageInfo -> WithFrameInfo) | |
| + match self.decoder.process(&mut input, None) { | |
| + Ok(ProcessingResult::Complete { .. }) => {} | |
| + Ok(ProcessingResult::NeedsMoreInput { .. }) => { | |
| + return JxlRsProcessResult { | |
| + status: if all_input { | |
| + JxlRsStatus::Error | |
| + } else { | |
| + JxlRsStatus::NeedMoreInput | |
| + }, | |
| + bytes_consumed: len_before - input.len(), | |
| + }; | |
| + } | |
| + Err(_) => { | |
| + return JxlRsProcessResult { | |
| + status: JxlRsStatus::Error, | |
| + bytes_consumed: 0, | |
| + }; | |
| + } | |
| + } | |
| + | |
| + // Phase 2: skip frame (WithFrameInfo -> WithImageInfo) | |
| + match self.decoder.process(&mut input, None) { | |
| + Ok(ProcessingResult::Complete { .. }) => JxlRsProcessResult { | |
| + status: JxlRsStatus::Success, | |
| + bytes_consumed: len_before - input.len(), | |
| + }, | |
| + Ok(ProcessingResult::NeedsMoreInput { .. }) => JxlRsProcessResult { | |
| + status: if all_input { | |
| + JxlRsStatus::Error | |
| + } else { | |
| + JxlRsStatus::NeedMoreInput | |
| + }, | |
| + bytes_consumed: len_before - input.len(), | |
| + }, | |
| + Err(_) => JxlRsProcessResult { | |
| + status: JxlRsStatus::Error, | |
| + bytes_consumed: 0, | |
| + }, | |
| + } | |
| + } | |
| } | |
| impl Default for JxlRsBasicInfo { | |
| @@ -486,8 +717,6 @@ impl From<&JxlBasicInfo> for JxlRsBasicInfo { | |
| animation_tps_denominator: tps_den, | |
| uses_original_profile: info.uses_original_profile, | |
| orientation: info.orientation as u32, | |
| - // Note: is_grayscale is set by get_basic_info() after checking the | |
| - // color profile, since JxlBasicInfo doesn't contain color info. | |
| is_grayscale: false, | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment