aosp12/external/webrtc/video/frame_encode_metadata_write...

/*
 *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "video/frame_encode_metadata_writer.h"

#include <cstddef>
#include <vector>

#include "api/video/i420_buffer.h"
#include "api/video/video_frame.h"
#include "api/video/video_timing.h"
#include "common_video/h264/h264_common.h"
#include "common_video/test/utilities.h"
#include "modules/video_coding/include/video_coding_defines.h"
#include "rtc_base/time_utils.h"
#include "test/gmock.h"
#include "test/gtest.h"

namespace webrtc {
namespace test {
namespace {

const rtc::scoped_refptr<I420Buffer> kFrameBuffer = I420Buffer::Create(4, 4);

inline size_t FrameSize(const size_t& min_frame_size,
                        const size_t& max_frame_size,
                        const int& s,
                        const int& i) {
  return min_frame_size + (s + 1) * i % (max_frame_size - min_frame_size);
}

class FakeEncodedImageCallback : public EncodedImageCallback {
 public:
  FakeEncodedImageCallback() : num_frames_dropped_(0) {}
  Result OnEncodedImage(const EncodedImage& encoded_image,
                        const CodecSpecificInfo* codec_specific_info,
                        const RTPFragmentationHeader* fragmentation) override {
    return Result(Result::OK);
  }
  void OnDroppedFrame(DropReason reason) override { ++num_frames_dropped_; }
  size_t GetNumFramesDropped() { return num_frames_dropped_; }

 private:
  size_t num_frames_dropped_;
};

enum class FrameType {
  kNormal,
  kTiming,
  kDropped,
};

bool IsTimingFrame(const EncodedImage& image) {
  return image.timing_.flags != VideoSendTiming::kInvalid &&
         image.timing_.flags != VideoSendTiming::kNotTriggered;
}

// Emulates |num_frames| on |num_streams| frames with capture timestamps
// increased by 1 from 0. Size of each frame is between
// |min_frame_size| and |max_frame_size|, outliers are counted relatevely to
// |average_frame_sizes[]| for each stream.
std::vector<std::vector<FrameType>> GetTimingFrames(
    const int64_t delay_ms,
    const size_t min_frame_size,
    const size_t max_frame_size,
    std::vector<size_t> average_frame_sizes,
    const int num_streams,
    const int num_frames) {
  FakeEncodedImageCallback sink;
  FrameEncodeMetadataWriter encode_timer(&sink);
  VideoCodec codec_settings;
  codec_settings.numberOfSimulcastStreams = num_streams;
  codec_settings.timing_frame_thresholds = {delay_ms,
                                            kDefaultOutlierFrameSizePercent};
  encode_timer.OnEncoderInit(codec_settings, false);
  const size_t kFramerate = 30;
  VideoBitrateAllocation bitrate_allocation;
  for (int si = 0; si < num_streams; ++si) {
    bitrate_allocation.SetBitrate(si, 0,
                                  average_frame_sizes[si] * 8 * kFramerate);
  }
  encode_timer.OnSetRates(bitrate_allocation, kFramerate);

  std::vector<std::vector<FrameType>> result(num_streams);
  int64_t current_timestamp = 0;
  for (int i = 0; i < num_frames; ++i) {
    current_timestamp += 1;
    VideoFrame frame = VideoFrame::Builder()
                           .set_timestamp_rtp(current_timestamp * 90)
                           .set_timestamp_ms(current_timestamp)
                           .set_video_frame_buffer(kFrameBuffer)
                           .build();
    encode_timer.OnEncodeStarted(frame);
    for (int si = 0; si < num_streams; ++si) {
      // every (5+s)-th frame is dropped on s-th stream by design.
      bool dropped = i % (5 + si) == 0;

      EncodedImage image;
      image.SetEncodedData(EncodedImageBuffer::Create(max_frame_size));
      image.set_size(FrameSize(min_frame_size, max_frame_size, si, i));
      image.capture_time_ms_ = current_timestamp;
      image.SetTimestamp(static_cast<uint32_t>(current_timestamp * 90));
      image.SetSpatialIndex(si);

      if (dropped) {
        result[si].push_back(FrameType::kDropped);
        continue;
      }

      encode_timer.FillTimingInfo(si, &image);

      if (IsTimingFrame(image)) {
        result[si].push_back(FrameType::kTiming);
      } else {
        result[si].push_back(FrameType::kNormal);
      }
    }
  }
  return result;
}
}  // namespace

TEST(FrameEncodeMetadataWriterTest, MarksTimingFramesPeriodicallyTogether) {
  const int64_t kDelayMs = 29;
  const size_t kMinFrameSize = 10;
  const size_t kMaxFrameSize = 20;
  const int kNumFrames = 1000;
  const int kNumStreams = 3;
  // No outliers as 1000 is larger than anything from range [10,20].
  const std::vector<size_t> kAverageSize = {1000, 1000, 1000};
  auto frames = GetTimingFrames(kDelayMs, kMinFrameSize, kMaxFrameSize,
                                kAverageSize, kNumStreams, kNumFrames);
  // Timing frames should be tirggered every delayMs.
  // As no outliers are expected, frames on all streams have to be
  // marked together.
  int last_timing_frame = -1;
  for (int i = 0; i < kNumFrames; ++i) {
    int num_normal = 0;
    int num_timing = 0;
    int num_dropped = 0;
    for (int s = 0; s < kNumStreams; ++s) {
      if (frames[s][i] == FrameType::kTiming) {
        ++num_timing;
      } else if (frames[s][i] == FrameType::kNormal) {
        ++num_normal;
      } else {
        ++num_dropped;
      }
    }
    // Can't have both normal and timing frames at the same timstamp.
    EXPECT_TRUE(num_timing == 0 || num_normal == 0);
    if (num_dropped < kNumStreams) {
      if (last_timing_frame == -1 || i >= last_timing_frame + kDelayMs) {
        // If didn't have timing frames for a period, current sent frame has to
        // be one. No normal frames should be sent.
        EXPECT_EQ(num_normal, 0);
      } else {
        // No unneeded timing frames should be sent.
        EXPECT_EQ(num_timing, 0);
      }
    }
    if (num_timing > 0)
      last_timing_frame = i;
  }
}

TEST(FrameEncodeMetadataWriterTest, MarksOutliers) {
  const int64_t kDelayMs = 29;
  const size_t kMinFrameSize = 2495;
  const size_t kMaxFrameSize = 2505;
  const int kNumFrames = 1000;
  const int kNumStreams = 3;
  // Possible outliers as 1000 lies in range [995, 1005].
  const std::vector<size_t> kAverageSize = {998, 1000, 1004};
  auto frames = GetTimingFrames(kDelayMs, kMinFrameSize, kMaxFrameSize,
                                kAverageSize, kNumStreams, kNumFrames);
  // All outliers should be marked.
  for (int i = 0; i < kNumFrames; ++i) {
    for (int s = 0; s < kNumStreams; ++s) {
      if (FrameSize(kMinFrameSize, kMaxFrameSize, s, i) >=
          kAverageSize[s] * kDefaultOutlierFrameSizePercent / 100) {
        // Too big frame. May be dropped or timing, but not normal.
        EXPECT_NE(frames[s][i], FrameType::kNormal);
      }
    }
  }
}

TEST(FrameEncodeMetadataWriterTest, NoTimingFrameIfNoEncodeStartTime) {
  int64_t timestamp = 1;
  constexpr size_t kFrameSize = 500;
  EncodedImage image;
  image.SetEncodedData(EncodedImageBuffer::Create(kFrameSize));
  image.capture_time_ms_ = timestamp;
  image.SetTimestamp(static_cast<uint32_t>(timestamp * 90));

  FakeEncodedImageCallback sink;
  FrameEncodeMetadataWriter encode_timer(&sink);
  VideoCodec codec_settings;
  // Make all frames timing frames.
  codec_settings.timing_frame_thresholds.delay_ms = 1;
  encode_timer.OnEncoderInit(codec_settings, false);
  VideoBitrateAllocation bitrate_allocation;
  bitrate_allocation.SetBitrate(0, 0, 500000);
  encode_timer.OnSetRates(bitrate_allocation, 30);

  // Verify a single frame works with encode start time set.
  VideoFrame frame = VideoFrame::Builder()
                         .set_timestamp_ms(timestamp)
                         .set_timestamp_rtp(timestamp * 90)
                         .set_video_frame_buffer(kFrameBuffer)
                         .build();
  encode_timer.OnEncodeStarted(frame);
  encode_timer.FillTimingInfo(0, &image);
  EXPECT_TRUE(IsTimingFrame(image));

  // New frame, now skip OnEncodeStarted. Should not result in timing frame.
  image.capture_time_ms_ = ++timestamp;
  image.SetTimestamp(static_cast<uint32_t>(timestamp * 90));
  image.timing_ = EncodedImage::Timing();
  encode_timer.FillTimingInfo(0, &image);
  EXPECT_FALSE(IsTimingFrame(image));
}

TEST(FrameEncodeMetadataWriterTest,
     AdjustsCaptureTimeForInternalSourceEncoder) {
  const int64_t kEncodeStartDelayMs = 2;
  const int64_t kEncodeFinishDelayMs = 10;
  constexpr size_t kFrameSize = 500;

  int64_t timestamp = 1;
  EncodedImage image;
  image.SetEncodedData(EncodedImageBuffer::Create(kFrameSize));
  image.capture_time_ms_ = timestamp;
  image.SetTimestamp(static_cast<uint32_t>(timestamp * 90));

  FakeEncodedImageCallback sink;
  FrameEncodeMetadataWriter encode_timer(&sink);

  VideoCodec codec_settings;
  // Make all frames timing frames.
  codec_settings.timing_frame_thresholds.delay_ms = 1;
  encode_timer.OnEncoderInit(codec_settings, true);

  VideoBitrateAllocation bitrate_allocation;
  bitrate_allocation.SetBitrate(0, 0, 500000);
  encode_timer.OnSetRates(bitrate_allocation, 30);

  // Verify a single frame without encode timestamps isn't a timing frame.
  encode_timer.FillTimingInfo(0, &image);
  EXPECT_FALSE(IsTimingFrame(image));

  // New frame, but this time with encode timestamps set in timing_.
  // This should be a timing frame.
  image.capture_time_ms_ = ++timestamp;
  image.SetTimestamp(static_cast<uint32_t>(timestamp * 90));
  image.timing_ = EncodedImage::Timing();
  image.timing_.encode_start_ms = timestamp + kEncodeStartDelayMs;
  image.timing_.encode_finish_ms = timestamp + kEncodeFinishDelayMs;

  encode_timer.FillTimingInfo(0, &image);
  EXPECT_TRUE(IsTimingFrame(image));

  // Frame is captured kEncodeFinishDelayMs before it's encoded, so restored
  // capture timestamp should be kEncodeFinishDelayMs in the past.
  EXPECT_NEAR(image.capture_time_ms_, rtc::TimeMillis() - kEncodeFinishDelayMs,
              1);
}

TEST(FrameEncodeMetadataWriterTest, NotifiesAboutDroppedFrames) {
  const int64_t kTimestampMs1 = 47721840;
  const int64_t kTimestampMs2 = 47721850;
  const int64_t kTimestampMs3 = 47721860;
  const int64_t kTimestampMs4 = 47721870;

  FakeEncodedImageCallback sink;
  FrameEncodeMetadataWriter encode_timer(&sink);
  encode_timer.OnEncoderInit(VideoCodec(), false);
  // Any non-zero bitrate needed to be set before the first frame.
  VideoBitrateAllocation bitrate_allocation;
  bitrate_allocation.SetBitrate(0, 0, 500000);
  encode_timer.OnSetRates(bitrate_allocation, 30);

  EncodedImage image;
  VideoFrame frame = VideoFrame::Builder()
                         .set_timestamp_rtp(kTimestampMs1 * 90)
                         .set_timestamp_ms(kTimestampMs1)
                         .set_video_frame_buffer(kFrameBuffer)
                         .build();

  image.capture_time_ms_ = kTimestampMs1;
  image.SetTimestamp(static_cast<uint32_t>(image.capture_time_ms_ * 90));
  frame.set_timestamp(image.capture_time_ms_ * 90);
  frame.set_timestamp_us(image.capture_time_ms_ * 1000);
  encode_timer.OnEncodeStarted(frame);

  EXPECT_EQ(0u, sink.GetNumFramesDropped());
  encode_timer.FillTimingInfo(0, &image);

  image.capture_time_ms_ = kTimestampMs2;
  image.SetTimestamp(static_cast<uint32_t>(image.capture_time_ms_ * 90));
  image.timing_ = EncodedImage::Timing();
  frame.set_timestamp(image.capture_time_ms_ * 90);
  frame.set_timestamp_us(image.capture_time_ms_ * 1000);
  encode_timer.OnEncodeStarted(frame);
  // No OnEncodedImageCall for timestamp2. Yet, at this moment it's not known
  // that frame with timestamp2 was dropped.
  EXPECT_EQ(0u, sink.GetNumFramesDropped());

  image.capture_time_ms_ = kTimestampMs3;
  image.SetTimestamp(static_cast<uint32_t>(image.capture_time_ms_ * 90));
  image.timing_ = EncodedImage::Timing();
  frame.set_timestamp(image.capture_time_ms_ * 90);
  frame.set_timestamp_us(image.capture_time_ms_ * 1000);
  encode_timer.OnEncodeStarted(frame);
  encode_timer.FillTimingInfo(0, &image);
  EXPECT_EQ(1u, sink.GetNumFramesDropped());

  image.capture_time_ms_ = kTimestampMs4;
  image.SetTimestamp(static_cast<uint32_t>(image.capture_time_ms_ * 90));
  image.timing_ = EncodedImage::Timing();
  frame.set_timestamp(image.capture_time_ms_ * 90);
  frame.set_timestamp_us(image.capture_time_ms_ * 1000);
  encode_timer.OnEncodeStarted(frame);
  encode_timer.FillTimingInfo(0, &image);
  EXPECT_EQ(1u, sink.GetNumFramesDropped());
}

TEST(FrameEncodeMetadataWriterTest, RestoresCaptureTimestamps) {
  EncodedImage image;
  const int64_t kTimestampMs = 123456;
  FakeEncodedImageCallback sink;

  FrameEncodeMetadataWriter encode_timer(&sink);
  encode_timer.OnEncoderInit(VideoCodec(), false);
  // Any non-zero bitrate needed to be set before the first frame.
  VideoBitrateAllocation bitrate_allocation;
  bitrate_allocation.SetBitrate(0, 0, 500000);
  encode_timer.OnSetRates(bitrate_allocation, 30);

  image.capture_time_ms_ = kTimestampMs;  // Correct timestamp.
  image.SetTimestamp(static_cast<uint32_t>(image.capture_time_ms_ * 90));
  VideoFrame frame = VideoFrame::Builder()
                         .set_timestamp_ms(image.capture_time_ms_)
                         .set_timestamp_rtp(image.capture_time_ms_ * 90)
                         .set_video_frame_buffer(kFrameBuffer)
                         .build();
  encode_timer.OnEncodeStarted(frame);
  image.capture_time_ms_ = 0;  // Incorrect timestamp.
  encode_timer.FillTimingInfo(0, &image);
  EXPECT_EQ(kTimestampMs, image.capture_time_ms_);
}

TEST(FrameEncodeMetadataWriterTest, CopiesRotation) {
  EncodedImage image;
  const int64_t kTimestampMs = 123456;
  FakeEncodedImageCallback sink;

  FrameEncodeMetadataWriter encode_timer(&sink);
  encode_timer.OnEncoderInit(VideoCodec(), false);
  // Any non-zero bitrate needed to be set before the first frame.
  VideoBitrateAllocation bitrate_allocation;
  bitrate_allocation.SetBitrate(0, 0, 500000);
  encode_timer.OnSetRates(bitrate_allocation, 30);

  image.SetTimestamp(static_cast<uint32_t>(kTimestampMs * 90));
  VideoFrame frame = VideoFrame::Builder()
                         .set_timestamp_ms(kTimestampMs)
                         .set_timestamp_rtp(kTimestampMs * 90)
                         .set_rotation(kVideoRotation_180)
                         .set_video_frame_buffer(kFrameBuffer)
                         .build();
  encode_timer.OnEncodeStarted(frame);
  encode_timer.FillTimingInfo(0, &image);
  EXPECT_EQ(kVideoRotation_180, image.rotation_);
}

TEST(FrameEncodeMetadataWriterTest, SetsContentType) {
  EncodedImage image;
  const int64_t kTimestampMs = 123456;
  FakeEncodedImageCallback sink;

  FrameEncodeMetadataWriter encode_timer(&sink);
  VideoCodec codec;
  codec.mode = VideoCodecMode::kScreensharing;
  encode_timer.OnEncoderInit(codec, false);
  // Any non-zero bitrate needed to be set before the first frame.
  VideoBitrateAllocation bitrate_allocation;
  bitrate_allocation.SetBitrate(0, 0, 500000);
  encode_timer.OnSetRates(bitrate_allocation, 30);

  image.SetTimestamp(static_cast<uint32_t>(kTimestampMs * 90));
  VideoFrame frame = VideoFrame::Builder()
                         .set_timestamp_ms(kTimestampMs)
                         .set_timestamp_rtp(kTimestampMs * 90)
                         .set_rotation(kVideoRotation_180)
                         .set_video_frame_buffer(kFrameBuffer)
                         .build();
  encode_timer.OnEncodeStarted(frame);
  encode_timer.FillTimingInfo(0, &image);
  EXPECT_EQ(VideoContentType::SCREENSHARE, image.content_type_);
}

TEST(FrameEncodeMetadataWriterTest, CopiesColorSpace) {
  EncodedImage image;
  const int64_t kTimestampMs = 123456;
  FakeEncodedImageCallback sink;

  FrameEncodeMetadataWriter encode_timer(&sink);
  encode_timer.OnEncoderInit(VideoCodec(), false);
  // Any non-zero bitrate needed to be set before the first frame.
  VideoBitrateAllocation bitrate_allocation;
  bitrate_allocation.SetBitrate(0, 0, 500000);
  encode_timer.OnSetRates(bitrate_allocation, 30);

  webrtc::ColorSpace color_space =
      CreateTestColorSpace(/*with_hdr_metadata=*/true);
  image.SetTimestamp(static_cast<uint32_t>(kTimestampMs * 90));
  VideoFrame frame = VideoFrame::Builder()
                         .set_timestamp_ms(kTimestampMs)
                         .set_timestamp_rtp(kTimestampMs * 90)
                         .set_color_space(color_space)
                         .set_video_frame_buffer(kFrameBuffer)
                         .build();
  encode_timer.OnEncodeStarted(frame);
  encode_timer.FillTimingInfo(0, &image);
  ASSERT_NE(image.ColorSpace(), nullptr);
  EXPECT_EQ(color_space, *image.ColorSpace());
}

TEST(FrameEncodeMetadataWriterTest, CopiesPacketInfos) {
  EncodedImage image;
  const int64_t kTimestampMs = 123456;
  FakeEncodedImageCallback sink;

  FrameEncodeMetadataWriter encode_timer(&sink);
  encode_timer.OnEncoderInit(VideoCodec(), false);
  // Any non-zero bitrate needed to be set before the first frame.
  VideoBitrateAllocation bitrate_allocation;
  bitrate_allocation.SetBitrate(0, 0, 500000);
  encode_timer.OnSetRates(bitrate_allocation, 30);

  RtpPacketInfos packet_infos = CreatePacketInfos(3);
  image.SetTimestamp(static_cast<uint32_t>(kTimestampMs * 90));
  VideoFrame frame = VideoFrame::Builder()
                         .set_timestamp_ms(kTimestampMs)
                         .set_timestamp_rtp(kTimestampMs * 90)
                         .set_packet_infos(packet_infos)
                         .set_video_frame_buffer(kFrameBuffer)
                         .build();
  encode_timer.OnEncodeStarted(frame);
  encode_timer.FillTimingInfo(0, &image);
  EXPECT_EQ(image.PacketInfos().size(), 3U);
}

TEST(FrameEncodeMetadataWriterTest, DoesNotRewriteBitstreamWithoutCodecInfo) {
  uint8_t buffer[] = {1, 2, 3};
  EncodedImage image(buffer, sizeof(buffer), sizeof(buffer));
  const RTPFragmentationHeader fragmentation;

  FakeEncodedImageCallback sink;
  FrameEncodeMetadataWriter encode_metadata_writer(&sink);
  EXPECT_EQ(
      encode_metadata_writer.UpdateBitstream(nullptr, &fragmentation, &image),
      nullptr);
  EXPECT_EQ(image.data(), buffer);
  EXPECT_EQ(image.size(), sizeof(buffer));
}

TEST(FrameEncodeMetadataWriterTest, DoesNotRewriteVp8Bitstream) {
  uint8_t buffer[] = {1, 2, 3};
  EncodedImage image(buffer, sizeof(buffer), sizeof(buffer));
  CodecSpecificInfo codec_specific_info;
  codec_specific_info.codecType = kVideoCodecVP8;
  const RTPFragmentationHeader fragmentation;

  FakeEncodedImageCallback sink;
  FrameEncodeMetadataWriter encode_metadata_writer(&sink);
  EXPECT_EQ(encode_metadata_writer.UpdateBitstream(&codec_specific_info,
                                                   &fragmentation, &image),
            nullptr);
  EXPECT_EQ(image.data(), buffer);
  EXPECT_EQ(image.size(), sizeof(buffer));
}

TEST(FrameEncodeMetadataWriterTest,
     DoesNotRewriteH264BitstreamWithoutFragmentation) {
  uint8_t buffer[] = {1, 2, 3};
  EncodedImage image(buffer, sizeof(buffer), sizeof(buffer));
  CodecSpecificInfo codec_specific_info;
  codec_specific_info.codecType = kVideoCodecH264;

  FakeEncodedImageCallback sink;
  FrameEncodeMetadataWriter encode_metadata_writer(&sink);
  EXPECT_EQ(encode_metadata_writer.UpdateBitstream(&codec_specific_info,
                                                   nullptr, &image),
            nullptr);
  EXPECT_EQ(image.data(), buffer);
  EXPECT_EQ(image.size(), sizeof(buffer));
}

TEST(FrameEncodeMetadataWriterTest, RewritesH264BitstreamWithNonOptimalSps) {
  uint8_t original_sps[] = {0,    0,    0,    1,    H264::NaluType::kSps,
                            0x00, 0x00, 0x03, 0x03, 0xF4,
                            0x05, 0x03, 0xC7, 0xC0};
  const uint8_t kRewrittenSps[] = {0,    0,    0,    1,    H264::NaluType::kSps,
                                   0x00, 0x00, 0x03, 0x03, 0xF4,
                                   0x05, 0x03, 0xC7, 0xE0, 0x1B,
                                   0x41, 0x10, 0x8D, 0x00};

  EncodedImage image(original_sps, sizeof(original_sps), sizeof(original_sps));
  image._frameType = VideoFrameType::kVideoFrameKey;

  CodecSpecificInfo codec_specific_info;
  codec_specific_info.codecType = kVideoCodecH264;

  RTPFragmentationHeader fragmentation;
  fragmentation.VerifyAndAllocateFragmentationHeader(1);
  fragmentation.fragmentationOffset[0] = 4;
  fragmentation.fragmentationLength[0] = sizeof(original_sps) - 4;

  FakeEncodedImageCallback sink;
  FrameEncodeMetadataWriter encode_metadata_writer(&sink);
  std::unique_ptr<RTPFragmentationHeader> modified_fragmentation =
      encode_metadata_writer.UpdateBitstream(&codec_specific_info,
                                             &fragmentation, &image);

  ASSERT_NE(modified_fragmentation, nullptr);
  EXPECT_THAT(std::vector<uint8_t>(image.data(), image.data() + image.size()),
              testing::ElementsAreArray(kRewrittenSps));
  ASSERT_THAT(modified_fragmentation->fragmentationVectorSize, 1U);
  EXPECT_EQ(modified_fragmentation->fragmentationOffset[0], 4U);
  EXPECT_EQ(modified_fragmentation->fragmentationLength[0],
            sizeof(kRewrittenSps) - 4);
}

}  // namespace test
}  // namespace webrtc