$OpenBSD: patch-src_demuxers_demux_real_c,v 1.1 2013/11/30 22:13:37 brad Exp $

- Fix realvideo reordered pts.
- Better a/v sync.

--- src/demuxers/demux_real.c.orig	Wed Nov 20 19:25:44 2013
+++ src/demuxers/demux_real.c	Wed Nov 20 21:08:18 2013
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2000-2012 the xine project
+ * Copyright (C) 2000-2013 the xine project
  *
  * This file is part of xine, a free video player.
  *
@@ -122,6 +122,7 @@ typedef struct {
   uint8_t             *frame_buffer;
   uint32_t             frame_num_bytes;
   uint32_t             sub_packet_cnt;
+  uint32_t             audio_time;
 } real_stream_t;
 
 typedef struct {
@@ -158,13 +159,7 @@ typedef struct {
 
   int64_t              last_pts[2];
   int                  send_newpts;
-  int                  buf_flag_seek;
 
-  uint32_t             last_ts;
-  uint32_t             next_ts;
-  int                  last_seq;
-  int                  next_seq;
-
   int                  fragment_size; /* video sub-demux */
   int                  fragment_count;
   uint32_t            *fragment_tab;
@@ -976,96 +971,72 @@ static int demux_real_parse_references( demux_real_t *
 #define WRAP_THRESHOLD           220000
 #define PTS_AUDIO                0
 #define PTS_VIDEO                1
+#define PTS_BOTH                 2
 
 static void check_newpts (demux_real_t *this, int64_t pts, int video, int preview) {
   const int64_t diff = pts - this->last_pts[video];
-  lprintf ("check_newpts %"PRId64"\n", pts);
 
-  if (!preview && pts &&
-      (this->send_newpts || (this->last_pts[video] && abs(diff)>WRAP_THRESHOLD) ) ) {
+  if (preview)
+    return;
 
-    lprintf ("diff=%"PRId64"\n", diff);
-
-    if (this->buf_flag_seek) {
-      _x_demux_control_newpts(this->stream, pts, BUF_FLAG_SEEK);
-      this->buf_flag_seek = 0;
-    } else {
-      _x_demux_control_newpts(this->stream, pts, 0);
-    }
-    this->send_newpts = 0;
-    this->last_pts[1-video] = 0;
+  /* Metronom does not strictly follow audio pts. They usually are too coarse
+     for seamless playback. Instead, it takes the latest discontinuity as a
+     starting point. This can lead to terrible lags for our very long audio frames.
+     So let's make sure audio has the last word here. */
+  if (this->send_newpts > video) {
+    _x_demux_control_newpts (this->stream, pts, BUF_FLAG_SEEK);
+    this->send_newpts         = video;
+    this->last_pts[video]     = pts;
+    this->last_pts[1 - video] = 0;
+  } else if (pts && (this->last_pts[video]) && (abs (diff) > WRAP_THRESHOLD)) {
+    _x_demux_control_newpts (this->stream, pts, 0);
+    this->send_newpts         = 0;
+    this->last_pts[1 - video] = 0;
   }
 
-  if (!preview && pts )
+  if (pts)
     this->last_pts[video] = pts;
 }
 
-static uint32_t real_fix_timestamp (demux_real_t *this, uint8_t *hdr, uint32_t ts_in) {
-  int      pict_type;
-  int      seq;
-  uint32_t ts_out;
-
-  switch(this->video_stream->buf_type) {
+static uint32_t real_get_reordered_pts (demux_real_t *this, uint8_t *hdr, uint32_t dts) {
+  int      pict_type; /* I1/I2/P/B-frame */
+  uint32_t t, pts;
+  /* lower 13 bits of pts are stored within the frame */
+  pict_type = hdr[0];
+  t = ((((uint32_t)hdr[1] << 8) | hdr[2]) << 8) | hdr[3];
+  switch (this->video_stream->buf_type) {
     case BUF_VIDEO_RV20:
-      pict_type = (hdr[0] & 0xC0) >> 6;
-      seq       = ((hdr[1] & 0x7F) << 6) + ((hdr[2] & 0xFC) >> 2);
-      break;
+      pict_type >>= 6;
+      t         >>= 10;
+    break;
     case BUF_VIDEO_RV30:
-      pict_type = (hdr[0] & 0x18) >> 3;
-      seq       = ((hdr[1] & 0x0F) << 9) + (hdr[2] << 1) + ((hdr[3] & 0x80) >> 7);
-      break;
+      pict_type >>= 3;
+      t         >>= 7;
+    break;
     case BUF_VIDEO_RV40:
-      pict_type = (hdr[0] & 0x60) >> 5;
-      seq       = ((hdr[1] & 0x07) << 10) + (hdr[2] << 2) + ((hdr[3] & 0xC0) >> 6);
-      break;
+      pict_type >>= 5;
+      t         >>= 6;
+    break;
     default:
       xprintf(this->stream->xine, XINE_VERBOSITY_DEBUG,
               "demux_real: can't fix timestamp for buf type 0x%08x\n",
               this->video_stream->buf_type);
-      return ts_in;
-      break;
+      return (dts);
+    break;
   }
 
-  switch (pict_type) {
-    case 0:
-    case 1:
-      /* I frame */
-      ts_out = this->next_ts;
-
-      this->last_ts = this->next_ts;
-      this->next_ts = ts_in;
-
-      this->last_seq = this->next_seq;
-      this->next_seq = seq;
-      break;
-    case 2:
-      /* P frame */
-      ts_out = this->next_ts;
-
-      this->last_ts  = this->next_ts;
-      if (seq < this->next_seq)
-        this->next_ts += seq + 8192 - this->next_seq;
-      else
-        this->next_ts += seq - this->next_seq;
-
-      this->last_seq = this->next_seq;
-      this->next_seq = seq;
-      break;
-    case 3:
-      /* B frame */
-      if (seq < this->last_seq)
-        ts_out = ((seq + 8192 - this->last_seq) + this->last_ts);
-      else
-        ts_out = ((seq - this->last_seq) + this->last_ts);
-      break;
-    default:
-      xprintf(this->stream->xine, XINE_VERBOSITY_DEBUG,
-              "demux_real: unknown pict_type: %d\n", pict_type);
-      ts_out = 0;
-      break;
+  pict_type &= 3;
+  t &= 0x1fff;
+  pts = (dts & (~0x1fff)) | t;
+  /* snap to dts +/- 4.095 seconds */
+  if (dts + 0x1000 < pts) pts -= 0x2000;
+  else if (dts > pts + 0x1000) pts += 0x2000;
+  if (this->stream->xine->verbosity == XINE_VERBOSITY_DEBUG + 1) {
+    xprintf (this->stream->xine, XINE_VERBOSITY_DEBUG + 1,
+      "demux_real: video pts: %d.%03d:%04d -> %d.%03d (%d)\n",
+      dts / 1000, dts % 1000, t, pts / 1000, pts % 1000, pict_type);
   }
-
-  return ts_out;
+  return (pts);
 }
 
 static int stream_read_char (demux_real_t *this) {
@@ -1121,7 +1092,7 @@ static int demux_real_send_chunk(demux_plugin_t *this_
   const uint16_t stream   = _X_BE_16(&header[4]);
   const off_t offset __attr_unused = this->input->get_current_pos(this->input);
   uint16_t size     = _X_BE_16(&header[2]) - DATA_PACKET_HEADER_SIZE;
-  const uint32_t timestamp= _X_BE_32(&header[6]);
+  uint32_t timestamp= _X_BE_32(&header[6]);
   int64_t pts      = (int64_t) timestamp * 90;
 
   /* Data packet header with version 1 contains 1 extra byte */
@@ -1330,14 +1301,14 @@ static int demux_real_send_chunk(demux_plugin_t *this_
         /* if the video stream has b-frames fix the timestamps */
         if((this->video_stream->format >= 0x20200002) &&
            (buf->decoder_flags & BUF_FLAG_FRAME_START))
-          pts = (int64_t) real_fix_timestamp(this, buf->content, timestamp) * 90;
+          pts = (int64_t)real_get_reordered_pts (this, buf->content, timestamp) * 90;
 
         /* this test was moved from ffmpeg video decoder.
          * fixme: is pts only valid on frame start? */
-        if( buf->decoder_flags & BUF_FLAG_FRAME_START )
+        if (buf->decoder_flags & BUF_FLAG_FRAME_START) {
           buf->pts = pts;
-        else
-          buf->pts = 0;
+          check_newpts (this, pts, PTS_VIDEO, 0);
+        } else buf->pts = 0;
         pts = 0;
 
         buf->extra_info->input_normpos = normpos;
@@ -1371,6 +1342,35 @@ static int demux_real_send_chunk(demux_plugin_t *this_
     else
       this->audio_need_keyframe = 0;
 
+    /* speed up when not debugging */
+    if (this->stream->xine->verbosity == XINE_VERBOSITY_DEBUG + 1) {
+      xprintf (this->stream->xine, XINE_VERBOSITY_DEBUG + 1,
+        "demux_real: audio pts: %d.%03d %s%s\n",
+        timestamp / 1000, timestamp % 1000,
+        keyframe ? "*" : " ",
+        this->audio_stream->sub_packet_cnt ? " " : "s");
+    }
+
+    /* cook audio frames are fairly long (almost 2 seconds). For obfuscation
+       purposes, they are sent as multiple fragments in intentionally wrong order.
+       The first sent fragment has the timestamp for the whole frame.
+
+       Sometimes, the remaining fragments all carry the same time, and appear
+       immediately thereafter. This is easy.
+
+       Sometimes, the remaining fragments carry fake timestamps interpolated across
+       the frame duration. Consequently, they will be muxed between the next few
+       video frames. We get the complete frame ~2 seconds late. This is ugly.
+       Let's be careful not to trap metronom into a big lag. */
+    if (!this->audio_stream->sub_packet_cnt)
+      this->audio_stream->audio_time = timestamp;
+    else
+      timestamp = this->audio_stream->audio_time;
+    /* nasty kludge, maybe this is somewhere in mdpr? */
+    if (this->audio_stream->buf_type == BUF_AUDIO_COOK)
+      timestamp += 120;
+    pts = (int64_t) timestamp * 90;
+
     /* if we have a seekable stream then use the timestamp for the data
      * packet for more accurate seeking - if not then estimate time using
      * average bitrate */
@@ -1549,6 +1549,7 @@ static void demux_real_send_headers(demux_plugin_t *th
 
   this->last_pts[0]   = 0;
   this->last_pts[1]   = 0;
+  this->send_newpts   = PTS_BOTH;
 
   this->avg_bitrate   = 1;
 
@@ -1624,7 +1625,6 @@ static int demux_real_seek (demux_plugin_t *this_gen,
       if(this->audio_stream)
         this->audio_stream->sub_packet_cnt = 0;
 
-      this->buf_flag_seek = 1;
       _x_demux_flush_engine(this->stream);
     }
   }
@@ -1636,12 +1636,9 @@ static int demux_real_seek (demux_plugin_t *this_gen,
     this->input->seek_time(this->input, start_time, SEEK_SET);
   }
 
-  this->send_newpts     = 1;
+  this->send_newpts     = PTS_BOTH;
   this->old_seqnum      = -1;
   this->fragment_size   = 0;
-
-  this->next_ts         = 0;
-  this->next_seq        = 0;
 
   this->status          = DEMUX_OK;
 
