FFmpeg
 All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups Pages
subtitles.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2012 Clément Bœsch
3  *
4  * This file is part of FFmpeg.
5  *
6  * FFmpeg is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Lesser General Public
8  * License as published by the Free Software Foundation; either
9  * version 2.1 of the License, or (at your option) any later version.
10  *
11  * FFmpeg is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Lesser General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with FFmpeg; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19  */
20 
21 #ifndef AVFORMAT_SUBTITLES_H
22 #define AVFORMAT_SUBTITLES_H
23 
24 #include <stdint.h>
25 #include <stddef.h>
26 #include "avformat.h"
27 #include "libavutil/bprint.h"
28 
29 enum sub_sort {
30  SUB_SORT_TS_POS = 0, ///< sort by timestamps, then position
31  SUB_SORT_POS_TS, ///< sort by position, then timestamps
32 };
33 
35  FF_UTF_8, // or other 8 bit encodings
38 };
39 
40 typedef struct {
41  int type;
43  unsigned char buf[8];
44  int buf_pos, buf_len;
46 } FFTextReader;
47 
48 /**
49  * Initialize the FFTextReader from the given AVIOContext. This function will
50  * read some bytes from pb, and test for UTF-8 or UTF-16 BOMs. Further accesses
51  * to FFTextReader will read more data from pb.
52  *
53  * The purpose of FFTextReader is to transparently convert read data to UTF-8
54  * if the stream had a UTF-16 BOM.
55  *
56  * @param r object which will be initialized
57  * @param pb stream to read from (referenced as long as FFTextReader is in use)
58  */
60 
61 /**
62  * Similar to ff_text_init_avio(), but sets it up to read from a bounded buffer.
63  *
64  * @param r object which will be initialized
65  * @param buf buffer to read from (referenced as long as FFTextReader is in use)
66  * @param size size of buf
67  */
68 void ff_text_init_buf(FFTextReader *r, void *buf, size_t size);
69 
70 /**
71  * Return the byte position of the next byte returned by ff_text_r8(). For
72  * UTF-16 source streams, this will return the original position, but it will
73  * be incorrect if a codepoint was only partially read with ff_text_r8().
74  */
75 int64_t ff_text_pos(FFTextReader *r);
76 
77 /**
78  * Return the next byte. The return value is always 0 - 255. Returns 0 on EOF.
79  * If the source stream is UTF-16, this reads from the stream converted to
80  * UTF-8. On invalid UTF-16, 0 is returned.
81  */
83 
84 /**
85  * Return non-zero if EOF was reached.
86  */
88 
89 /**
90  * Like ff_text_r8(), but don't remove the byte from the buffer.
91  */
93 
94 /**
95  * Read the given number of bytes (in UTF-8). On error or EOF, \0 bytes are
96  * written.
97  */
98 void ff_text_read(FFTextReader *r, char *buf, size_t size);
99 
100 typedef struct {
101  AVPacket *subs; ///< array of subtitles packets
102  int nb_subs; ///< number of subtitles packets
103  int allocated_size; ///< allocated size for subs
104  int current_sub_idx; ///< current position for the read packet callback
105  enum sub_sort sort; ///< sort method to use when finalizing subtitles
107 
108 /**
109  * Insert a new subtitle event.
110  *
111  * @param event the subtitle line, may not be zero terminated
112  * @param len the length of the event (in strlen() sense, so without '\0')
113  * @param merge set to 1 if the current event should be concatenated with the
114  * previous one instead of adding a new entry, 0 otherwise
115  */
117  const uint8_t *event, int len, int merge);
118 
119 /**
120  * Set missing durations and sort subtitles by PTS, and then byte position.
121  */
123 
124 /**
125  * Generic read_packet() callback for subtitles demuxers using this queue
126  * system.
127  */
129 
130 /**
131  * Update current_sub_idx to emulate a seek. Except the first parameter, it
132  * matches AVInputFormat->read_seek2 prototypes.
133  */
135  int64_t min_ts, int64_t ts, int64_t max_ts, int flags);
136 
137 /**
138  * Remove and destroy all the subtitles packets.
139  */
141 
142 /**
143  * SMIL helper to load next chunk ("<...>" or untagged content) in buf.
144  *
145  * @param c cached character, to avoid a backward seek
146  */
148 
149 /**
150  * SMIL helper to point on the value of an attribute in the given tag.
151  *
152  * @param s SMIL tag ("<...>")
153  * @param attr the attribute to look for
154  */
155 const char *ff_smil_get_attr_ptr(const char *s, const char *attr);
156 
157 /**
158  * @brief Same as ff_subtitles_read_text_chunk(), but read from an AVIOContext.
159  */
161 
162 /**
163  * @brief Read a subtitles chunk from FFTextReader.
164  *
165  * A chunk is defined by a multiline "event", ending with a second line break.
166  * The trailing line breaks are trimmed. CRLF are supported.
167  * Example: "foo\r\nbar\r\n\r\nnext" will print "foo\r\nbar" into buf, and pb
168  * will focus on the 'n' of the "next" string.
169  *
170  * @param tr I/O context
171  * @param buf an initialized buf where the chunk is written
172  *
173  * @note buf is cleared before writing into it.
174  */
176 
177 /**
178  * Get the number of characters to increment to jump to the next line, or to
179  * the end of the string.
180  * The function handles the following line breaks schemes:
181  * LF, CRLF (MS), or standalone CR (old MacOS).
182  */
183 static av_always_inline int ff_subtitles_next_line(const char *ptr)
184 {
185  int n = strcspn(ptr, "\r\n");
186  ptr += n;
187  if (*ptr == '\r') {
188  ptr++;
189  n++;
190  }
191  if (*ptr == '\n')
192  n++;
193  return n;
194 }
195 
196 /**
197  * Read a line of text. Discards line ending characters.
198  * The function handles the following line breaks schemes:
199  * LF, CRLF (MS), or standalone CR (old MacOS).
200  *
201  * Returns the number of bytes written to buf. Always writes a terminating 0,
202  * similar as with snprintf.
203  *
204  * @note returns a negative error code if a \0 byte is found
205  */
206 ptrdiff_t ff_subtitles_read_line(FFTextReader *tr, char *buf, size_t size);
207 
208 #endif /* AVFORMAT_SUBTITLES_H */