[FFmpeg-devel] [PATCH] lavc/movtextenc: fix incorrect offset calculation for UTF-8 characters

Erik BrĂ¥then Solem erikbsolem at hotmail.com
Wed Mar 8 03:36:42 EET 2017


The 3GPP Timed Text (TTXT / tx3g / mov_text) specification counts multibyte UTF-8 characters as one single character, ffmpeg currently counts bytes. This produces files where style boxes have incorrect offsets. This patch introduces:
1. a separate variable that keeps track of the byte count
2. a for loop that excludes continuation bytes from the character counting

Fixes trac #6021 (encoding part).

---
 libavcodec/movtextenc.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/libavcodec/movtextenc.c b/libavcodec/movtextenc.c
index 20e01e2..8d09ff4 100644
--- a/libavcodec/movtextenc.c
+++ b/libavcodec/movtextenc.c
@@ -70,6 +70,7 @@ typedef struct {
     uint8_t style_fontsize;
     uint32_t style_color;
     uint16_t text_pos;
+    uint16_t byte_size;
 } MovTextContext;
 
 typedef struct {
@@ -302,7 +303,10 @@ static void mov_text_text_cb(void *priv, const char *text, int len)
 {
     MovTextContext *s = priv;
     av_bprint_append_data(&s->buffer, text, len);
-    s->text_pos += len;
+    for (int i = 0; i < len; i++)
+        if ((text[i] & 0xC0) != 0x80)
+            s->text_pos++; /* increase character count */
+    s->byte_size += len; /* increase byte count */
 }
 
 static void mov_text_new_line_cb(void *priv, int forced)
@@ -310,6 +314,7 @@ static void mov_text_new_line_cb(void *priv, int forced)
     MovTextContext *s = priv;
     av_bprint_append_data(&s->buffer, "\n", 1);
     s->text_pos += 1;
+    s->byte_size += 1;
 }
 
 static const ASSCodesCallbacks mov_text_callbacks = {
@@ -328,6 +333,7 @@ static int mov_text_encode_frame(AVCodecContext *avctx, unsigned char *buf,
     size_t j;
 
     s->text_pos = 0;
+    s->byte_size = 0;
     s->count = 0;
     s->box_flags = 0;
     s->style_entries = 0;
@@ -362,7 +368,7 @@ static int mov_text_encode_frame(AVCodecContext *avctx, unsigned char *buf,
         }
     }
 
-    AV_WB16(buf, s->text_pos);
+    AV_WB16(buf, s->byte_size);
     buf += 2;
 
     if (!av_bprint_is_complete(&s->buffer)) {
-- 
1.9.5 (Apple Git-50.3)



More information about the ffmpeg-devel mailing list