FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
subtitles.c
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012-2013 Clément Bœsch <u pkh me>
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #include "avformat.h"
22 #include "subtitles.h"
23 #include "avio_internal.h"
24 #include "libavutil/avassert.h"
25 #include "libavutil/avstring.h"
26 
28 {
29  int i;
30  r->pb = pb;
31  r->buf_pos = r->buf_len = 0;
32  r->type = FF_UTF_8;
33  for (i = 0; i < 2; i++)
34  r->buf[r->buf_len++] = avio_r8(r->pb);
35  if (strncmp("\xFF\xFE", r->buf, 2) == 0) {
36  r->type = FF_UTF16LE;
37  r->buf_pos += 2;
38  } else if (strncmp("\xFE\xFF", r->buf, 2) == 0) {
39  r->type = FF_UTF16BE;
40  r->buf_pos += 2;
41  } else {
42  r->buf[r->buf_len++] = avio_r8(r->pb);
43  if (strncmp("\xEF\xBB\xBF", r->buf, 3) == 0) {
44  // UTF8
45  r->buf_pos += 3;
46  }
47  }
48 }
49 
50 void ff_text_init_buf(FFTextReader *r, void *buf, size_t size)
51 {
52  memset(&r->buf_pb, 0, sizeof(r->buf_pb));
53  ffio_init_context(&r->buf_pb, buf, size, 0, NULL, NULL, NULL, NULL);
54  ff_text_init_avio(r, &r->buf_pb);
55 }
56 
58 {
59  return avio_tell(r->pb) - r->buf_len + r->buf_pos;
60 }
61 
63 {
64  uint32_t val;
65  uint8_t tmp;
66  if (r->buf_pos < r->buf_len)
67  return r->buf[r->buf_pos++];
68  if (r->type == FF_UTF16LE) {
69  GET_UTF16(val, avio_rl16(r->pb), return 0;)
70  } else if (r->type == FF_UTF16BE) {
71  GET_UTF16(val, avio_rb16(r->pb), return 0;)
72  } else {
73  return avio_r8(r->pb);
74  }
75  if (!val)
76  return 0;
77  r->buf_pos = 0;
78  r->buf_len = 0;
79  PUT_UTF8(val, tmp, r->buf[r->buf_len++] = tmp;)
80  return r->buf[r->buf_pos++]; // buf_len is at least 1
81 }
82 
83 void ff_text_read(FFTextReader *r, char *buf, size_t size)
84 {
85  for ( ; size > 0; size--)
86  *buf++ = ff_text_r8(r);
87 }
88 
90 {
91  return r->buf_pos >= r->buf_len && avio_feof(r->pb);
92 }
93 
95 {
96  int c;
97  if (r->buf_pos < r->buf_len)
98  return r->buf[r->buf_pos];
99  c = ff_text_r8(r);
100  if (!avio_feof(r->pb)) {
101  r->buf_pos = 0;
102  r->buf_len = 1;
103  r->buf[0] = c;
104  }
105  return c;
106 }
107 
109  const uint8_t *event, int len, int merge)
110 {
111  AVPacket *subs, *sub;
112 
113  if (merge && q->nb_subs > 0) {
114  /* merge with previous event */
115 
116  int old_len;
117  sub = &q->subs[q->nb_subs - 1];
118  old_len = sub->size;
119  if (av_grow_packet(sub, len) < 0)
120  return NULL;
121  memcpy(sub->data + old_len, event, len);
122  } else {
123  /* new event */
124 
125  if (q->nb_subs >= INT_MAX/sizeof(*q->subs) - 1)
126  return NULL;
127  subs = av_fast_realloc(q->subs, &q->allocated_size,
128  (q->nb_subs + 1) * sizeof(*q->subs));
129  if (!subs)
130  return NULL;
131  q->subs = subs;
132  sub = &subs[q->nb_subs++];
133  if (av_new_packet(sub, len) < 0)
134  return NULL;
135  sub->flags |= AV_PKT_FLAG_KEY;
136  sub->pts = sub->dts = 0;
137  memcpy(sub->data, event, len);
138  }
139  return sub;
140 }
141 
142 static int cmp_pkt_sub_ts_pos(const void *a, const void *b)
143 {
144  const AVPacket *s1 = a;
145  const AVPacket *s2 = b;
146  if (s1->pts == s2->pts) {
147  if (s1->pos == s2->pos)
148  return 0;
149  return s1->pos > s2->pos ? 1 : -1;
150  }
151  return s1->pts > s2->pts ? 1 : -1;
152 }
153 
154 static int cmp_pkt_sub_pos_ts(const void *a, const void *b)
155 {
156  const AVPacket *s1 = a;
157  const AVPacket *s2 = b;
158  if (s1->pos == s2->pos) {
159  if (s1->pts == s2->pts)
160  return 0;
161  return s1->pts > s2->pts ? 1 : -1;
162  }
163  return s1->pos > s2->pos ? 1 : -1;
164 }
165 
167 {
168  int i;
169 
170  qsort(q->subs, q->nb_subs, sizeof(*q->subs),
173  for (i = 0; i < q->nb_subs; i++)
174  if (q->subs[i].duration == -1 && i < q->nb_subs - 1)
175  q->subs[i].duration = q->subs[i + 1].pts - q->subs[i].pts;
176 }
177 
179 {
180  AVPacket *sub = q->subs + q->current_sub_idx;
181 
182  if (q->current_sub_idx == q->nb_subs)
183  return AVERROR_EOF;
184  if (av_copy_packet(pkt, sub) < 0) {
185  return AVERROR(ENOMEM);
186  }
187 
188  pkt->dts = pkt->pts;
189  q->current_sub_idx++;
190  return 0;
191 }
192 
193 static int search_sub_ts(const FFDemuxSubtitlesQueue *q, int64_t ts)
194 {
195  int s1 = 0, s2 = q->nb_subs - 1;
196 
197  if (s2 < s1)
198  return AVERROR(ERANGE);
199 
200  for (;;) {
201  int mid;
202 
203  if (s1 == s2)
204  return s1;
205  if (s1 == s2 - 1)
206  return q->subs[s1].pts <= q->subs[s2].pts ? s1 : s2;
207  mid = (s1 + s2) / 2;
208  if (q->subs[mid].pts <= ts)
209  s1 = mid;
210  else
211  s2 = mid;
212  }
213 }
214 
216  int64_t min_ts, int64_t ts, int64_t max_ts, int flags)
217 {
218  if (flags & AVSEEK_FLAG_BYTE) {
219  return AVERROR(ENOSYS);
220  } else if (flags & AVSEEK_FLAG_FRAME) {
221  if (ts < 0 || ts >= q->nb_subs)
222  return AVERROR(ERANGE);
223  q->current_sub_idx = ts;
224  } else {
225  int i, idx = search_sub_ts(q, ts);
226  int64_t ts_selected;
227 
228  if (idx < 0)
229  return idx;
230  for (i = idx; i < q->nb_subs && q->subs[i].pts < min_ts; i++)
231  if (stream_index == -1 || q->subs[i].stream_index == stream_index)
232  idx = i;
233  for (i = idx; i > 0 && q->subs[i].pts > max_ts; i--)
234  if (stream_index == -1 || q->subs[i].stream_index == stream_index)
235  idx = i;
236 
237  ts_selected = q->subs[idx].pts;
238  if (ts_selected < min_ts || ts_selected > max_ts)
239  return AVERROR(ERANGE);
240 
241  /* look back in the latest subtitles for overlapping subtitles */
242  for (i = idx - 1; i >= 0; i--) {
243  int64_t pts = q->subs[i].pts;
244  if (q->subs[i].duration <= 0 ||
245  (stream_index != -1 && q->subs[i].stream_index != stream_index))
246  continue;
247  if (pts >= min_ts && pts > ts_selected - q->subs[i].duration)
248  idx = i;
249  else
250  break;
251  }
252 
253  /* If the queue is used to store multiple subtitles streams (like with
254  * VobSub) and the stream index is not specified, we need to make sure
255  * to focus on the smallest file position offset for a same timestamp;
256  * queue is ordered by pts and then filepos, so we can take the first
257  * entry for a given timestamp. */
258  if (stream_index == -1)
259  while (idx > 0 && q->subs[idx - 1].pts == q->subs[idx].pts)
260  idx--;
261 
262  q->current_sub_idx = idx;
263  }
264  return 0;
265 }
266 
268 {
269  int i;
270 
271  for (i = 0; i < q->nb_subs; i++)
272  av_free_packet(&q->subs[i]);
273  av_freep(&q->subs);
274  q->nb_subs = q->allocated_size = q->current_sub_idx = 0;
275 }
276 
278 {
279  int i = 0;
280  char end_chr;
281 
282  if (!*c) // cached char?
283  *c = ff_text_r8(tr);
284  if (!*c)
285  return 0;
286 
287  end_chr = *c == '<' ? '>' : '<';
288  do {
289  av_bprint_chars(buf, *c, 1);
290  *c = ff_text_r8(tr);
291  i++;
292  } while (*c != end_chr && *c);
293  if (end_chr == '>') {
294  av_bprint_chars(buf, '>', 1);
295  *c = 0;
296  }
297  return i;
298 }
299 
300 const char *ff_smil_get_attr_ptr(const char *s, const char *attr)
301 {
302  int in_quotes = 0;
303  const int len = strlen(attr);
304 
305  while (*s) {
306  while (*s) {
307  if (!in_quotes && av_isspace(*s))
308  break;
309  in_quotes ^= *s == '"'; // XXX: support escaping?
310  s++;
311  }
312  while (av_isspace(*s))
313  s++;
314  if (!av_strncasecmp(s, attr, len) && s[len] == '=')
315  return s + len + 1 + (s[len + 1] == '"');
316  }
317  return NULL;
318 }
319 
320 static inline int is_eol(char c)
321 {
322  return c == '\r' || c == '\n';
323 }
324 
326 {
327  char eol_buf[5], last_was_cr = 0;
328  int n = 0, i = 0, nb_eol = 0;
329 
330  av_bprint_clear(buf);
331 
332  for (;;) {
333  char c = ff_text_r8(tr);
334 
335  if (!c)
336  break;
337 
338  /* ignore all initial line breaks */
339  if (n == 0 && is_eol(c))
340  continue;
341 
342  /* line break buffering: we don't want to add the trailing \r\n */
343  if (is_eol(c)) {
344  nb_eol += c == '\n' || last_was_cr;
345  if (nb_eol == 2)
346  break;
347  eol_buf[i++] = c;
348  if (i == sizeof(eol_buf) - 1)
349  break;
350  last_was_cr = c == '\r';
351  continue;
352  }
353 
354  /* only one line break followed by data: we flush the line breaks
355  * buffer */
356  if (i) {
357  eol_buf[i] = 0;
358  av_bprintf(buf, "%s", eol_buf);
359  i = nb_eol = 0;
360  }
361 
362  av_bprint_chars(buf, c, 1);
363  n++;
364  }
365 }
366 
368 {
369  FFTextReader tr;
370  tr.buf_pos = tr.buf_len = 0;
371  tr.type = 0;
372  tr.pb = pb;
374 }
375 
376 ptrdiff_t ff_subtitles_read_line(FFTextReader *tr, char *buf, size_t size)
377 {
378  size_t cur = 0;
379  if (!size)
380  return 0;
381  while (cur + 1 < size) {
382  unsigned char c = ff_text_r8(tr);
383  if (!c)
384  return ff_text_eof(tr) ? cur : AVERROR_INVALIDDATA;
385  if (c == '\r' || c == '\n')
386  break;
387  buf[cur++] = c;
388  buf[cur] = '\0';
389  }
390  if (ff_text_peek_r8(tr) == '\r')
391  ff_text_r8(tr);
392  if (ff_text_peek_r8(tr) == '\n')
393  ff_text_r8(tr);
394  return cur;
395 }