FFmpeg
vf_deshake_opencl.c
Go to the documentation of this file.
1 /*
2  * This file is part of FFmpeg.
3  *
4  * FFmpeg is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 2.1 of the License, or (at your option) any later version.
8  *
9  * FFmpeg is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with FFmpeg; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17  *
18  * Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
19  * Copyright (C) 2009, Willow Garage Inc., all rights reserved.
20  * Copyright (C) 2013, OpenCV Foundation, all rights reserved.
21  * Third party copyrights are property of their respective owners.
22  *
23  * Redistribution and use in source and binary forms, with or without modification,
24  * are permitted provided that the following conditions are met:
25  *
26  * * Redistribution's of source code must retain the above copyright notice,
27  * this list of conditions and the following disclaimer.
28  *
29  * * Redistribution's in binary form must reproduce the above copyright notice,
30  * this list of conditions and the following disclaimer in the documentation
31  * and/or other materials provided with the distribution.
32  *
33  * * The name of the copyright holders may not be used to endorse or promote products
34  * derived from this software without specific prior written permission.
35  *
36  * This software is provided by the copyright holders and contributors "as is" and
37  * any express or implied warranties, including, but not limited to, the implied
38  * warranties of merchantability and fitness for a particular purpose are disclaimed.
39  * In no event shall the Intel Corporation or contributors be liable for any direct,
40  * indirect, incidental, special, exemplary, or consequential damages
41  * (including, but not limited to, procurement of substitute goods or services;
42  * loss of use, data, or profits; or business interruption) however caused
43  * and on any theory of liability, whether in contract, strict liability,
44  * or tort (including negligence or otherwise) arising in any way out of
45  * the use of this software, even if advised of the possibility of such damage.
46  */
47 
48 #include <stdbool.h>
49 #include <float.h>
50 #include <libavutil/lfg.h>
51 #include "libavutil/opt.h"
52 #include "libavutil/imgutils.h"
53 #include "libavutil/mem.h"
54 #include "libavutil/fifo.h"
55 #include "libavutil/common.h"
56 #include "libavutil/avassert.h"
57 #include "libavutil/pixfmt.h"
58 #include "avfilter.h"
59 #include "framequeue.h"
60 #include "filters.h"
61 #include "transform.h"
62 #include "formats.h"
63 #include "internal.h"
64 #include "opencl.h"
65 #include "opencl_source.h"
66 #include "video.h"
67 
68 /*
69 This filter matches feature points between frames (dealing with outliers) and then
70 uses the matches to estimate an affine transform between frames. This transform is
71 decomposed into various values (translation, scale, rotation) and the values are
72 summed relative to the start of the video to obtain on absolute camera position
73 for each frame. This "camera path" is then smoothed via a gaussian filter, resulting
74 in a new path that is turned back into an affine transform and applied to each
75 frame to render it.
76 
77 High-level overview:
78 
79 All of the work to extract motion data from frames occurs in queue_frame. Motion data
80 is buffered in a smoothing window, so queue_frame simply computes the absolute camera
81 positions and places them in ringbuffers.
82 
83 filter_frame is responsible for looking at the absolute camera positions currently
84 in the ringbuffers, applying the gaussian filter, and then transforming the frames.
85 */
86 
87 // Number of bits for BRIEF descriptors
88 #define BREIFN 512
89 // Size of the patch from which a BRIEF descriptor is extracted
90 // This is the size used in OpenCV
91 #define BRIEF_PATCH_SIZE 31
92 #define BRIEF_PATCH_SIZE_HALF (BRIEF_PATCH_SIZE / 2)
93 
94 #define MATCHES_CONTIG_SIZE 2000
95 
96 #define ROUNDED_UP_DIV(a, b) ((a + (b - 1)) / b)
97 
98 typedef struct PointPair {
99  // Previous frame
100  cl_float2 p1;
101  // Current frame
102  cl_float2 p2;
103 } PointPair;
104 
105 typedef struct MotionVector {
107  // Used to mark vectors as potential outliers
109 } MotionVector;
110 
111 // Denotes the indices for the different types of motion in the ringbuffers array
118 
119  // Should always be last
121 };
122 
123 // Struct that holds data for drawing point match debug data
124 typedef struct DebugMatches {
126  // The points used to calculate the affine transform for a frame
127  MotionVector model_matches[3];
128 
130  // For cases where we couldn't calculate a model
132 } DebugMatches;
133 
134 // Groups together the ringbuffers that store absolute distortion / position values
135 // for each frame
136 typedef struct AbsoluteFrameMotion {
137  // Array with the various ringbuffers, indexed via the RingbufferIndices enum
138  AVFifoBuffer *ringbuffers[RingbufCount];
139 
140  // Offset to get to the current frame being processed
141  // (not in bytes)
143  // Keeps track of where the start and end of contiguous motion data is (to
144  // deal with cases where no motion data is found between two frames)
147 
150 
151 // Takes care of freeing the arrays within the DebugMatches inside of the
152 // debug_matches ringbuffer and then freeing the buffer itself.
154  DebugMatches dm;
155 
156  if (!afm->debug_matches) {
157  return;
158  }
159 
160  while (av_fifo_size(afm->debug_matches) > 0) {
162  afm->debug_matches,
163  &dm,
164  sizeof(DebugMatches),
165  NULL
166  );
167 
168  av_freep(&dm.matches);
169  }
170 
172 }
173 
174 // Stores the translation, scale, rotation, and skew deltas between two frames
175 typedef struct FrameDelta {
176  cl_float2 translation;
177  float rotation;
178  cl_float2 scale;
179  cl_float2 skew;
180 } FrameDelta;
181 
182 typedef struct SimilarityMatrix {
183  // The 2x3 similarity matrix
184  double matrix[6];
186 
187 typedef struct CropInfo {
188  // The top left corner of the bounding box for the crop
189  cl_float2 top_left;
190  // The bottom right corner of the bounding box for the crop
191  cl_float2 bottom_right;
192 } CropInfo;
193 
194 // Returned from function that determines start and end values for iteration
195 // around the current frame in a ringbuffer
196 typedef struct IterIndices {
197  int start;
198  int end;
199 } IterIndices;
200 
201 typedef struct DeshakeOpenCLContext {
203  // Whether or not the above `OpenCLFilterContext` has been initialized
205 
206  // These variables are used in the activate callback
207  int64_t duration;
208  bool eof;
209 
210  // State for random number generation
212 
213  // FIFO frame queue used to buffer future frames for processing
215  // Ringbuffers for frame positions
217 
218  // The number of frames' motion to consider before and after the frame we are
219  // smoothing
221  // The number of the frame we are currently processing
223 
224  // Stores a 1d array of normalised gaussian kernel values for convolution
225  float *gauss_kernel;
226 
227  // Buffer for error values used in RANSAC code
228  float *ransac_err;
229 
230  // Information regarding how to crop the smoothed luminance (or RGB) planes
232  // Information regarding how to crop the smoothed chroma planes
234 
235  // Whether or not we are processing YUV input (as oppposed to RGB)
236  bool is_yuv;
237  // The underlying format of the hardware surfaces
239 
240  // Buffer to copy `matches` into for the CPU to work with
243 
245 
246  cl_command_queue command_queue;
247  cl_kernel kernel_grayscale;
252  cl_kernel kernel_transform;
254 
255  // Stores a frame converted to grayscale
256  cl_mem grayscale;
257  // Stores the harris response for a frame (measure of "cornerness" for each pixel)
258  cl_mem harris_buf;
259 
260  // Detected features after non-maximum suppression and sub-pixel refinement
262  // Saved from the previous frame
264 
265  // BRIEF sampling pattern that is randomly initialized
267  // Feature point descriptors for the current frame
268  cl_mem descriptors;
269  // Feature point descriptors for the previous frame
271  // Vectors between points in current and previous frame
272  cl_mem matches;
274  // Holds the matrix to transform luminance (or RGB) with
275  cl_mem transform_y;
276  // Holds the matrix to transform chroma with
277  cl_mem transform_uv;
278 
279  // Configurable options
280 
282  int debug_on;
284 
285  // Whether or not feature points should be refined at a sub-pixel level
287  // If the user sets a value other than the default, 0, this percentage is
288  // translated into a sigma value ranging from 0.5 to 40.0
290  // This number is multiplied by the video frame rate to determine the size
291  // of the smooth window
293 
294  // Debug stuff
295 
299 
300  // These store the total time spent executing the different kernels in nanoseconds
301  unsigned long long grayscale_time;
302  unsigned long long harris_response_time;
303  unsigned long long refine_features_time;
304  unsigned long long brief_descriptors_time;
305  unsigned long long match_descriptors_time;
306  unsigned long long transform_time;
307  unsigned long long crop_upscale_time;
308 
309  // Time spent copying matched features from the device to the host
310  unsigned long long read_buf_time;
312 
313 // Returns a random uniformly-distributed number in [low, high]
314 static int rand_in(int low, int high, AVLFG *alfg) {
315  return (av_lfg_get(alfg) % (high - low)) + low;
316 }
317 
318 // Returns the average execution time for an event given the total time and the
319 // number of frames processed.
320 static double averaged_event_time_ms(unsigned long long total_time, int num_frames) {
321  return (double)total_time / (double)num_frames / 1000000.0;
322 }
323 
324 // The following code is loosely ported from OpenCV
325 
326 // Estimates affine transform from 3 point pairs
327 // model is a 2x3 matrix:
328 // a b c
329 // d e f
330 static void run_estimate_kernel(const MotionVector *point_pairs, double *model)
331 {
332  // src points
333  double x1 = point_pairs[0].p.p1.s[0];
334  double y1 = point_pairs[0].p.p1.s[1];
335  double x2 = point_pairs[1].p.p1.s[0];
336  double y2 = point_pairs[1].p.p1.s[1];
337  double x3 = point_pairs[2].p.p1.s[0];
338  double y3 = point_pairs[2].p.p1.s[1];
339 
340  // dest points
341  double X1 = point_pairs[0].p.p2.s[0];
342  double Y1 = point_pairs[0].p.p2.s[1];
343  double X2 = point_pairs[1].p.p2.s[0];
344  double Y2 = point_pairs[1].p.p2.s[1];
345  double X3 = point_pairs[2].p.p2.s[0];
346  double Y3 = point_pairs[2].p.p2.s[1];
347 
348  double d = 1.0 / ( x1*(y2-y3) + x2*(y3-y1) + x3*(y1-y2) );
349 
350  model[0] = d * ( X1*(y2-y3) + X2*(y3-y1) + X3*(y1-y2) );
351  model[1] = d * ( X1*(x3-x2) + X2*(x1-x3) + X3*(x2-x1) );
352  model[2] = d * ( X1*(x2*y3 - x3*y2) + X2*(x3*y1 - x1*y3) + X3*(x1*y2 - x2*y1) );
353 
354  model[3] = d * ( Y1*(y2-y3) + Y2*(y3-y1) + Y3*(y1-y2) );
355  model[4] = d * ( Y1*(x3-x2) + Y2*(x1-x3) + Y3*(x2-x1) );
356  model[5] = d * ( Y1*(x2*y3 - x3*y2) + Y2*(x3*y1 - x1*y3) + Y3*(x1*y2 - x2*y1) );
357 }
358 
359 // Checks that the 3 points in the given array are not collinear
360 static bool points_not_collinear(const cl_float2 **points)
361 {
362  int j, k, i = 2;
363 
364  for (j = 0; j < i; j++) {
365  double dx1 = points[j]->s[0] - points[i]->s[0];
366  double dy1 = points[j]->s[1] - points[i]->s[1];
367 
368  for (k = 0; k < j; k++) {
369  double dx2 = points[k]->s[0] - points[i]->s[0];
370  double dy2 = points[k]->s[1] - points[i]->s[1];
371 
372  // Assuming a 3840 x 2160 video with a point at (0, 0) and one at
373  // (3839, 2159), this prevents a third point from being within roughly
374  // 0.5 of a pixel of the line connecting the two on both axes
375  if (fabs(dx2*dy1 - dy2*dx1) <= 1.0) {
376  return false;
377  }
378  }
379  }
380 
381  return true;
382 }
383 
384 // Checks a subset of 3 point pairs to make sure that the points are not collinear
385 // and not too close to each other
386 static bool check_subset(const MotionVector *pairs_subset)
387 {
388  const cl_float2 *prev_points[] = {
389  &pairs_subset[0].p.p1,
390  &pairs_subset[1].p.p1,
391  &pairs_subset[2].p.p1
392  };
393 
394  const cl_float2 *curr_points[] = {
395  &pairs_subset[0].p.p2,
396  &pairs_subset[1].p.p2,
397  &pairs_subset[2].p.p2
398  };
399 
400  return points_not_collinear(prev_points) && points_not_collinear(curr_points);
401 }
402 
403 // Selects a random subset of 3 points from point_pairs and places them in pairs_subset
404 static bool get_subset(
405  AVLFG *alfg,
406  const MotionVector *point_pairs,
407  const int num_point_pairs,
408  MotionVector *pairs_subset,
409  int max_attempts
410 ) {
411  int idx[3];
412  int i = 0, j, iters = 0;
413 
414  for (; iters < max_attempts; iters++) {
415  for (i = 0; i < 3 && iters < max_attempts;) {
416  int idx_i = 0;
417 
418  for (;;) {
419  idx_i = idx[i] = rand_in(0, num_point_pairs, alfg);
420 
421  for (j = 0; j < i; j++) {
422  if (idx_i == idx[j]) {
423  break;
424  }
425  }
426 
427  if (j == i) {
428  break;
429  }
430  }
431 
432  pairs_subset[i] = point_pairs[idx[i]];
433  i++;
434  }
435 
436  if (i == 3 && !check_subset(pairs_subset)) {
437  continue;
438  }
439  break;
440  }
441 
442  return i == 3 && iters < max_attempts;
443 }
444 
445 // Computes the error for each of the given points based on the given model.
446 static void compute_error(
447  const MotionVector *point_pairs,
448  const int num_point_pairs,
449  const double *model,
450  float *err
451 ) {
452  double F0 = model[0], F1 = model[1], F2 = model[2];
453  double F3 = model[3], F4 = model[4], F5 = model[5];
454 
455  for (int i = 0; i < num_point_pairs; i++) {
456  const cl_float2 *f = &point_pairs[i].p.p1;
457  const cl_float2 *t = &point_pairs[i].p.p2;
458 
459  double a = F0*f->s[0] + F1*f->s[1] + F2 - t->s[0];
460  double b = F3*f->s[0] + F4*f->s[1] + F5 - t->s[1];
461 
462  err[i] = a*a + b*b;
463  }
464 }
465 
466 // Determines which of the given point matches are inliers for the given model
467 // based on the specified threshold.
468 //
469 // err must be an array of num_point_pairs length
470 static int find_inliers(
471  MotionVector *point_pairs,
472  const int num_point_pairs,
473  const double *model,
474  float *err,
475  double thresh
476 ) {
477  float t = (float)(thresh * thresh);
478  int i, n = num_point_pairs, num_inliers = 0;
479 
480  compute_error(point_pairs, num_point_pairs, model, err);
481 
482  for (i = 0; i < n; i++) {
483  if (err[i] <= t) {
484  // This is an inlier
485  point_pairs[i].should_consider = true;
486  num_inliers += 1;
487  } else {
488  point_pairs[i].should_consider = false;
489  }
490  }
491 
492  return num_inliers;
493 }
494 
495 // Determines the number of iterations required to achieve the desired confidence level.
496 //
497 // The equation used to determine the number of iterations to do is:
498 // 1 - confidence = (1 - inlier_probability^num_points)^num_iters
499 //
500 // Solving for num_iters:
501 //
502 // num_iters = log(1 - confidence) / log(1 - inlier_probability^num_points)
503 //
504 // A more in-depth explanation can be found at https://en.wikipedia.org/wiki/Random_sample_consensus
505 // under the 'Parameters' heading
506 static int ransac_update_num_iters(double confidence, double num_outliers, int max_iters)
507 {
508  double num, denom;
509 
510  confidence = av_clipd(confidence, 0.0, 1.0);
511  num_outliers = av_clipd(num_outliers, 0.0, 1.0);
512 
513  // avoid inf's & nan's
514  num = FFMAX(1.0 - confidence, DBL_MIN);
515  denom = 1.0 - pow(1.0 - num_outliers, 3);
516  if (denom < DBL_MIN) {
517  return 0;
518  }
519 
520  num = log(num);
521  denom = log(denom);
522 
523  return denom >= 0 || -num >= max_iters * (-denom) ? max_iters : (int)round(num / denom);
524 }
525 
526 // Estimates an affine transform between the given pairs of points using RANdom
527 // SAmple Consensus
528 static bool estimate_affine_2d(
529  DeshakeOpenCLContext *deshake_ctx,
530  MotionVector *point_pairs,
531  DebugMatches *debug_matches,
532  const int num_point_pairs,
533  double *model_out,
534  const double threshold,
535  const int max_iters,
536  const double confidence
537 ) {
538  bool result = false;
539  double best_model[6], model[6];
540  MotionVector pairs_subset[3], best_pairs[3];
541 
542  int iter, niters = FFMAX(max_iters, 1);
543  int good_count, max_good_count = 0;
544 
545  // We need at least 3 points to build a model from
546  if (num_point_pairs < 3) {
547  return false;
548  } else if (num_point_pairs == 3) {
549  // There are only 3 points, so RANSAC doesn't apply here
550  run_estimate_kernel(point_pairs, model_out);
551 
552  for (int i = 0; i < 3; ++i) {
553  point_pairs[i].should_consider = true;
554  }
555 
556  return true;
557  }
558 
559  for (iter = 0; iter < niters; ++iter) {
560  bool found = get_subset(&deshake_ctx->alfg, point_pairs, num_point_pairs, pairs_subset, 10000);
561 
562  if (!found) {
563  if (iter == 0) {
564  return false;
565  }
566 
567  break;
568  }
569 
570  run_estimate_kernel(pairs_subset, model);
571  good_count = find_inliers(point_pairs, num_point_pairs, model, deshake_ctx->ransac_err, threshold);
572 
573  if (good_count > FFMAX(max_good_count, 2)) {
574  for (int mi = 0; mi < 6; ++mi) {
575  best_model[mi] = model[mi];
576  }
577 
578  for (int pi = 0; pi < 3; pi++) {
579  best_pairs[pi] = pairs_subset[pi];
580  }
581 
582  max_good_count = good_count;
583  niters = ransac_update_num_iters(
584  confidence,
585  (double)(num_point_pairs - good_count) / num_point_pairs,
586  niters
587  );
588  }
589  }
590 
591  if (max_good_count > 0) {
592  for (int mi = 0; mi < 6; ++mi) {
593  model_out[mi] = best_model[mi];
594  }
595 
596  for (int pi = 0; pi < 3; ++pi) {
597  debug_matches->model_matches[pi] = best_pairs[pi];
598  }
599  debug_matches->num_model_matches = 3;
600 
601  // Find the inliers again for the best model for debugging
602  find_inliers(point_pairs, num_point_pairs, best_model, deshake_ctx->ransac_err, threshold);
603  result = true;
604  }
605 
606  return result;
607 }
608 
609 // "Wiggles" the first point in best_pairs around a tiny bit in order to decrease the
610 // total error
611 static void optimize_model(
612  DeshakeOpenCLContext *deshake_ctx,
613  MotionVector *best_pairs,
614  MotionVector *inliers,
615  const int num_inliers,
616  float best_err,
617  double *model_out
618 ) {
619  float move_x_val = 0.01;
620  float move_y_val = 0.01;
621  bool move_x = true;
622  float old_move_x_val = 0;
623  double model[6];
624  int last_changed = 0;
625 
626  for (int iters = 0; iters < 200; iters++) {
627  float total_err = 0;
628 
629  if (move_x) {
630  best_pairs[0].p.p2.s[0] += move_x_val;
631  } else {
632  best_pairs[0].p.p2.s[0] += move_y_val;
633  }
634 
635  run_estimate_kernel(best_pairs, model);
636  compute_error(inliers, num_inliers, model, deshake_ctx->ransac_err);
637 
638  for (int j = 0; j < num_inliers; j++) {
639  total_err += deshake_ctx->ransac_err[j];
640  }
641 
642  if (total_err < best_err) {
643  for (int mi = 0; mi < 6; ++mi) {
644  model_out[mi] = model[mi];
645  }
646 
647  best_err = total_err;
648  last_changed = iters;
649  } else {
650  // Undo the change
651  if (move_x) {
652  best_pairs[0].p.p2.s[0] -= move_x_val;
653  } else {
654  best_pairs[0].p.p2.s[0] -= move_y_val;
655  }
656 
657  if (iters - last_changed > 4) {
658  // We've already improved the model as much as we can
659  break;
660  }
661 
662  old_move_x_val = move_x_val;
663 
664  if (move_x) {
665  move_x_val *= -1;
666  } else {
667  move_y_val *= -1;
668  }
669 
670  if (old_move_x_val < 0) {
671  move_x = false;
672  } else {
673  move_x = true;
674  }
675  }
676  }
677 }
678 
679 // Uses a process similar to that of RANSAC to find a transform that minimizes
680 // the total error for a set of point matches determined to be inliers
681 //
682 // (Pick random subsets, compute model, find total error, iterate until error
683 // is minimized.)
684 static bool minimize_error(
685  DeshakeOpenCLContext *deshake_ctx,
686  MotionVector *inliers,
687  DebugMatches *debug_matches,
688  const int num_inliers,
689  double *model_out,
690  const int max_iters
691 ) {
692  bool result = false;
693  float best_err = FLT_MAX;
694  double best_model[6], model[6];
695  MotionVector pairs_subset[3], best_pairs[3];
696 
697  for (int i = 0; i < max_iters; i++) {
698  float total_err = 0;
699  bool found = get_subset(&deshake_ctx->alfg, inliers, num_inliers, pairs_subset, 10000);
700 
701  if (!found) {
702  if (i == 0) {
703  return false;
704  }
705 
706  break;
707  }
708 
709  run_estimate_kernel(pairs_subset, model);
710  compute_error(inliers, num_inliers, model, deshake_ctx->ransac_err);
711 
712  for (int j = 0; j < num_inliers; j++) {
713  total_err += deshake_ctx->ransac_err[j];
714  }
715 
716  if (total_err < best_err) {
717  for (int mi = 0; mi < 6; ++mi) {
718  best_model[mi] = model[mi];
719  }
720 
721  for (int pi = 0; pi < 3; pi++) {
722  best_pairs[pi] = pairs_subset[pi];
723  }
724 
725  best_err = total_err;
726  }
727  }
728 
729  for (int mi = 0; mi < 6; ++mi) {
730  model_out[mi] = best_model[mi];
731  }
732 
733  for (int pi = 0; pi < 3; ++pi) {
734  debug_matches->model_matches[pi] = best_pairs[pi];
735  }
736  debug_matches->num_model_matches = 3;
737  result = true;
738 
739  optimize_model(deshake_ctx, best_pairs, inliers, num_inliers, best_err, model_out);
740  return result;
741 }
742 
743 // End code from OpenCV
744 
745 // Decomposes a similarity matrix into translation, rotation, scale, and skew
746 //
747 // See http://frederic-wang.fr/decomposition-of-2d-transform-matrices.html
748 static FrameDelta decompose_transform(double *model)
749 {
750  FrameDelta ret;
751 
752  double a = model[0];
753  double c = model[1];
754  double e = model[2];
755  double b = model[3];
756  double d = model[4];
757  double f = model[5];
758  double delta = a * d - b * c;
759 
760  memset(&ret, 0, sizeof(ret));
761 
762  ret.translation.s[0] = e;
763  ret.translation.s[1] = f;
764 
765  // This is the QR method
766  if (a != 0 || b != 0) {
767  double r = hypot(a, b);
768 
769  ret.rotation = FFSIGN(b) * acos(a / r);
770  ret.scale.s[0] = r;
771  ret.scale.s[1] = delta / r;
772  ret.skew.s[0] = atan((a * c + b * d) / (r * r));
773  ret.skew.s[1] = 0;
774  } else if (c != 0 || d != 0) {
775  double s = sqrt(c * c + d * d);
776 
777  ret.rotation = M_PI / 2 - FFSIGN(d) * acos(-c / s);
778  ret.scale.s[0] = delta / s;
779  ret.scale.s[1] = s;
780  ret.skew.s[0] = 0;
781  ret.skew.s[1] = atan((a * c + b * d) / (s * s));
782  } // otherwise there is only translation
783 
784  return ret;
785 }
786 
787 // Move valid vectors from the 2d buffer into a 1d buffer where they are contiguous
789  DeshakeOpenCLContext *deshake_ctx,
790  int size_y,
791  int size_x
792 ) {
793  int num_vectors = 0;
794 
795  for (int i = 0; i < size_y; ++i) {
796  for (int j = 0; j < size_x; ++j) {
797  MotionVector v = deshake_ctx->matches_host[j + i * size_x];
798 
799  if (v.should_consider) {
800  deshake_ctx->matches_contig_host[num_vectors] = v;
801  ++num_vectors;
802  }
803 
804  // Make sure we do not exceed the amount of space we allocated for these vectors
805  if (num_vectors == MATCHES_CONTIG_SIZE - 1) {
806  return num_vectors;
807  }
808  }
809  }
810  return num_vectors;
811 }
812 
813 // Returns the gaussian kernel value for the given x coordinate and sigma value
814 static float gaussian_for(int x, float sigma) {
815  return 1.0f / expf(((float)x * (float)x) / (2.0f * sigma * sigma));
816 }
817 
818 // Makes a normalized gaussian kernel of the given length for the given sigma
819 // and places it in gauss_kernel
820 static void make_gauss_kernel(float *gauss_kernel, float length, float sigma)
821 {
822  float gauss_sum = 0;
823  int window_half = length / 2;
824 
825  for (int i = 0; i < length; ++i) {
826  float val = gaussian_for(i - window_half, sigma);
827 
828  gauss_sum += val;
829  gauss_kernel[i] = val;
830  }
831 
832  // Normalize the gaussian values
833  for (int i = 0; i < length; ++i) {
834  gauss_kernel[i] /= gauss_sum;
835  }
836 }
837 
838 // Returns indices to start and end iteration at in order to iterate over a window
839 // of length size centered at the current frame in a ringbuffer
840 //
841 // Always returns numbers that result in a window of length size, even if that
842 // means specifying negative indices or indices past the end of the values in the
843 // ringbuffers. Make sure you clip indices appropriately within your loop.
845  IterIndices indices;
846 
847  indices.start = deshake_ctx->abs_motion.curr_frame_offset - (length / 2);
848  indices.end = deshake_ctx->abs_motion.curr_frame_offset + (length / 2) + (length % 2);
849 
850  return indices;
851 }
852 
853 // Sets val to the value in the given ringbuffer at the given offset, taking care of
854 // clipping the offset into the appropriate range
855 static void ringbuf_float_at(
856  DeshakeOpenCLContext *deshake_ctx,
858  float *val,
859  int offset
860 ) {
861  int clip_start, clip_end, offset_clipped;
862  if (deshake_ctx->abs_motion.data_end_offset != -1) {
863  clip_end = deshake_ctx->abs_motion.data_end_offset;
864  } else {
865  // This expression represents the last valid index in the buffer,
866  // which we use repeatedly at the end of the video.
867  clip_end = deshake_ctx->smooth_window - (av_fifo_space(values) / sizeof(float)) - 1;
868  }
869 
870  if (deshake_ctx->abs_motion.data_start_offset != -1) {
871  clip_start = deshake_ctx->abs_motion.data_start_offset;
872  } else {
873  // Negative indices will occur at the start of the video, and we want
874  // them to be clipped to 0 in order to repeatedly use the position of
875  // the first frame.
876  clip_start = 0;
877  }
878 
879  offset_clipped = av_clip(
880  offset,
881  clip_start,
882  clip_end
883  );
884 
886  values,
887  val,
888  offset_clipped * sizeof(float),
889  sizeof(float),
890  NULL
891  );
892 }
893 
894 // Returns smoothed current frame value of the given buffer of floats based on the
895 // given Gaussian kernel and its length (also the window length, centered around the
896 // current frame) and the "maximum value" of the motion.
897 //
898 // This "maximum value" should be the width / height of the image in the case of
899 // translation and an empirically chosen constant for rotation / scale.
900 //
901 // The sigma chosen to generate the final gaussian kernel with used to smooth the
902 // camera path is either hardcoded (set by user, deshake_ctx->smooth_percent) or
903 // adaptively chosen.
904 static float smooth(
905  DeshakeOpenCLContext *deshake_ctx,
906  float *gauss_kernel,
907  int length,
908  float max_val,
910 ) {
911  float new_large_s = 0, new_small_s = 0, new_best = 0, old, diff_between,
912  percent_of_max, inverted_percent;
913  IterIndices indices = start_end_for(deshake_ctx, length);
914  float large_sigma = 40.0f;
915  float small_sigma = 2.0f;
916  float best_sigma;
917 
918  if (deshake_ctx->smooth_percent) {
919  best_sigma = (large_sigma - 0.5f) * deshake_ctx->smooth_percent + 0.5f;
920  } else {
921  // Strategy to adaptively smooth trajectory:
922  //
923  // 1. Smooth path with large and small sigma values
924  // 2. Take the absolute value of the difference between them
925  // 3. Get a percentage by putting the difference over the "max value"
926  // 4, Invert the percentage
927  // 5. Calculate a new sigma value weighted towards the larger sigma value
928  // 6. Determine final smoothed trajectory value using that sigma
929 
930  make_gauss_kernel(gauss_kernel, length, large_sigma);
931  for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {
932  ringbuf_float_at(deshake_ctx, values, &old, i);
933  new_large_s += old * gauss_kernel[j];
934  }
935 
936  make_gauss_kernel(gauss_kernel, length, small_sigma);
937  for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {
938  ringbuf_float_at(deshake_ctx, values, &old, i);
939  new_small_s += old * gauss_kernel[j];
940  }
941 
942  diff_between = fabsf(new_large_s - new_small_s);
943  percent_of_max = diff_between / max_val;
944  inverted_percent = 1 - percent_of_max;
945  best_sigma = large_sigma * powf(inverted_percent, 40);
946  }
947 
948  make_gauss_kernel(gauss_kernel, length, best_sigma);
949  for (int i = indices.start, j = 0; i < indices.end; ++i, ++j) {
950  ringbuf_float_at(deshake_ctx, values, &old, i);
951  new_best += old * gauss_kernel[j];
952  }
953 
954  return new_best;
955 }
956 
957 // Returns the position of the given point after the transform is applied
958 static cl_float2 transformed_point(float x, float y, float *transform) {
959  cl_float2 ret;
960 
961  ret.s[0] = x * transform[0] + y * transform[1] + transform[2];
962  ret.s[1] = x * transform[3] + y * transform[4] + transform[5];
963 
964  return ret;
965 }
966 
967 // Creates an affine transform that scales from the center of a frame
969  float x_shift,
970  float y_shift,
971  float angle,
972  float scale_x,
973  float scale_y,
974  float center_w,
975  float center_h,
976  float *matrix
977 ) {
978  cl_float2 center_s;
979  float center_s_w, center_s_h;
980 
982  0,
983  0,
984  0,
985  scale_x,
986  scale_y,
987  matrix
988  );
989 
990  center_s = transformed_point(center_w, center_h, matrix);
991  center_s_w = center_w - center_s.s[0];
992  center_s_h = center_h - center_s.s[1];
993 
995  x_shift + center_s_w,
996  y_shift + center_s_h,
997  angle,
998  scale_x,
999  scale_y,
1000  matrix
1001  );
1002 }
1003 
1004 // Determines the crop necessary to eliminate black borders from a smoothed frame
1005 // and updates target crop accordingly
1007  CropInfo* crop,
1008  float *transform,
1009  float frame_width,
1010  float frame_height
1011 ) {
1012  float new_width, new_height, adjusted_width, adjusted_height, adjusted_x, adjusted_y;
1013 
1014  cl_float2 top_left = transformed_point(0, 0, transform);
1015  cl_float2 top_right = transformed_point(frame_width, 0, transform);
1016  cl_float2 bottom_left = transformed_point(0, frame_height, transform);
1017  cl_float2 bottom_right = transformed_point(frame_width, frame_height, transform);
1018  float ar_h = frame_height / frame_width;
1019  float ar_w = frame_width / frame_height;
1020 
1021  if (crop->bottom_right.s[0] == 0) {
1022  // The crop hasn't been set to the original size of the plane
1023  crop->bottom_right.s[0] = frame_width;
1024  crop->bottom_right.s[1] = frame_height;
1025  }
1026 
1027  crop->top_left.s[0] = FFMAX3(
1028  crop->top_left.s[0],
1029  top_left.s[0],
1030  bottom_left.s[0]
1031  );
1032 
1033  crop->top_left.s[1] = FFMAX3(
1034  crop->top_left.s[1],
1035  top_left.s[1],
1036  top_right.s[1]
1037  );
1038 
1039  crop->bottom_right.s[0] = FFMIN3(
1040  crop->bottom_right.s[0],
1041  bottom_right.s[0],
1042  top_right.s[0]
1043  );
1044 
1045  crop->bottom_right.s[1] = FFMIN3(
1046  crop->bottom_right.s[1],
1047  bottom_right.s[1],
1048  bottom_left.s[1]
1049  );
1050 
1051  // Make sure our potentially new bounding box has the same aspect ratio
1052  new_height = crop->bottom_right.s[1] - crop->top_left.s[1];
1053  new_width = crop->bottom_right.s[0] - crop->top_left.s[0];
1054 
1055  adjusted_width = new_height * ar_w;
1056  adjusted_x = crop->bottom_right.s[0] - adjusted_width;
1057 
1058  if (adjusted_x >= crop->top_left.s[0]) {
1059  crop->top_left.s[0] = adjusted_x;
1060  } else {
1061  adjusted_height = new_width * ar_h;
1062  adjusted_y = crop->bottom_right.s[1] - adjusted_height;
1063  crop->top_left.s[1] = adjusted_y;
1064  }
1065 }
1066 
1068 {
1069  DeshakeOpenCLContext *ctx = avctx->priv;
1070  cl_int cle;
1071 
1072  for (int i = 0; i < RingbufCount; i++)
1074 
1075  if (ctx->debug_on)
1077 
1078  if (ctx->gauss_kernel)
1079  av_freep(&ctx->gauss_kernel);
1080 
1081  if (ctx->ransac_err)
1082  av_freep(&ctx->ransac_err);
1083 
1084  if (ctx->matches_host)
1085  av_freep(&ctx->matches_host);
1086 
1087  if (ctx->matches_contig_host)
1089 
1090  if (ctx->inliers)
1091  av_freep(&ctx->inliers);
1092 
1093  ff_framequeue_free(&ctx->fq);
1094 
1101  if (ctx->debug_on)
1103 
1105 
1106  if (!ctx->is_yuv)
1114  CL_RELEASE_MEMORY(ctx->matches);
1118  if (ctx->debug_on) {
1121  }
1122 
1123  ff_opencl_filter_uninit(avctx);
1124 }
1125 
1127 {
1128  DeshakeOpenCLContext *ctx = avctx->priv;
1129  AVFilterLink *outlink = avctx->outputs[0];
1130  AVFilterLink *inlink = avctx->inputs[0];
1131  // Pointer to the host-side pattern buffer to be initialized and then copied
1132  // to the GPU
1133  PointPair *pattern_host = NULL;
1134  cl_int cle;
1135  int err;
1136  cl_ulong8 zeroed_ulong8;
1137  FFFrameQueueGlobal fqg;
1138  cl_image_format grayscale_format;
1139  cl_image_desc grayscale_desc;
1140  cl_command_queue_properties queue_props;
1141 
1142  const enum AVPixelFormat disallowed_formats[14] = {
1157  };
1158 
1159  // Number of elements for an array
1160  const int image_grid_32 = ROUNDED_UP_DIV(outlink->h, 32) * ROUNDED_UP_DIV(outlink->w, 32);
1161 
1162  const int descriptor_buf_size = image_grid_32 * (BREIFN / 8);
1163  const int features_buf_size = image_grid_32 * sizeof(cl_float2);
1164 
1165  const AVHWFramesContext *hw_frames_ctx = (AVHWFramesContext*)inlink->hw_frames_ctx->data;
1166  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hw_frames_ctx->sw_format);
1167 
1168  av_assert0(hw_frames_ctx);
1169  av_assert0(desc);
1170 
1172  ff_framequeue_init(&ctx->fq, &fqg);
1173  ctx->eof = false;
1174  ctx->smooth_window = (int)(av_q2d(avctx->inputs[0]->frame_rate) * ctx->smooth_window_multiplier);
1175  ctx->curr_frame = 0;
1176 
1177  memset(&zeroed_ulong8, 0, sizeof(cl_ulong8));
1178 
1179  ctx->gauss_kernel = av_malloc_array(ctx->smooth_window, sizeof(float));
1180  if (!ctx->gauss_kernel) {
1181  err = AVERROR(ENOMEM);
1182  goto fail;
1183  }
1184 
1185  ctx->ransac_err = av_malloc_array(MATCHES_CONTIG_SIZE, sizeof(float));
1186  if (!ctx->ransac_err) {
1187  err = AVERROR(ENOMEM);
1188  goto fail;
1189  }
1190 
1191  for (int i = 0; i < RingbufCount; i++) {
1193  ctx->smooth_window,
1194  sizeof(float)
1195  );
1196 
1197  if (!ctx->abs_motion.ringbuffers[i]) {
1198  err = AVERROR(ENOMEM);
1199  goto fail;
1200  }
1201  }
1202 
1203  if (ctx->debug_on) {
1205  ctx->smooth_window / 2,
1206  sizeof(DebugMatches)
1207  );
1208 
1209  if (!ctx->abs_motion.debug_matches) {
1210  err = AVERROR(ENOMEM);
1211  goto fail;
1212  }
1213  }
1214 
1215  ctx->abs_motion.curr_frame_offset = 0;
1216  ctx->abs_motion.data_start_offset = -1;
1217  ctx->abs_motion.data_end_offset = -1;
1218 
1219  pattern_host = av_malloc_array(BREIFN, sizeof(PointPair));
1220  if (!pattern_host) {
1221  err = AVERROR(ENOMEM);
1222  goto fail;
1223  }
1224 
1225  ctx->matches_host = av_malloc_array(image_grid_32, sizeof(MotionVector));
1226  if (!ctx->matches_host) {
1227  err = AVERROR(ENOMEM);
1228  goto fail;
1229  }
1230 
1232  if (!ctx->matches_contig_host) {
1233  err = AVERROR(ENOMEM);
1234  goto fail;
1235  }
1236 
1238  if (!ctx->inliers) {
1239  err = AVERROR(ENOMEM);
1240  goto fail;
1241  }
1242 
1243  // Initializing the patch pattern for building BREIF descriptors with
1244  av_lfg_init(&ctx->alfg, 234342424);
1245  for (int i = 0; i < BREIFN; ++i) {
1246  PointPair pair;
1247 
1248  for (int j = 0; j < 2; ++j) {
1249  pair.p1.s[j] = rand_in(-BRIEF_PATCH_SIZE_HALF, BRIEF_PATCH_SIZE_HALF + 1, &ctx->alfg);
1250  pair.p2.s[j] = rand_in(-BRIEF_PATCH_SIZE_HALF, BRIEF_PATCH_SIZE_HALF + 1, &ctx->alfg);
1251  }
1252 
1253  pattern_host[i] = pair;
1254  }
1255 
1256  for (int i = 0; i < 14; i++) {
1257  if (ctx->sw_format == disallowed_formats[i]) {
1258  av_log(avctx, AV_LOG_ERROR, "unsupported format in deshake_opencl.\n");
1259  err = AVERROR(ENOSYS);
1260  goto fail;
1261  }
1262  }
1263 
1264  if (desc->flags & AV_PIX_FMT_FLAG_RGB) {
1265  ctx->is_yuv = false;
1266  } else {
1267  ctx->is_yuv = true;
1268  }
1269  ctx->sw_format = hw_frames_ctx->sw_format;
1270 
1272  if (err < 0)
1273  goto fail;
1274 
1275  if (ctx->debug_on) {
1276  queue_props = CL_QUEUE_PROFILING_ENABLE;
1277  } else {
1278  queue_props = 0;
1279  }
1280  ctx->command_queue = clCreateCommandQueue(
1281  ctx->ocf.hwctx->context,
1282  ctx->ocf.hwctx->device_id,
1283  queue_props,
1284  &cle
1285  );
1286  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create OpenCL command queue %d.\n", cle);
1287 
1288  CL_CREATE_KERNEL(ctx, grayscale);
1289  CL_CREATE_KERNEL(ctx, harris_response);
1290  CL_CREATE_KERNEL(ctx, refine_features);
1291  CL_CREATE_KERNEL(ctx, brief_descriptors);
1292  CL_CREATE_KERNEL(ctx, match_descriptors);
1294  CL_CREATE_KERNEL(ctx, crop_upscale);
1295  if (ctx->debug_on)
1296  CL_CREATE_KERNEL(ctx, draw_debug_info);
1297 
1298  if (!ctx->is_yuv) {
1299  grayscale_format.image_channel_order = CL_R;
1300  grayscale_format.image_channel_data_type = CL_FLOAT;
1301 
1302  grayscale_desc = (cl_image_desc) {
1303  .image_type = CL_MEM_OBJECT_IMAGE2D,
1304  .image_width = outlink->w,
1305  .image_height = outlink->h,
1306  .image_depth = 0,
1307  .image_array_size = 0,
1308  .image_row_pitch = 0,
1309  .image_slice_pitch = 0,
1310  .num_mip_levels = 0,
1311  .num_samples = 0,
1312  .buffer = NULL,
1313  };
1314 
1315  ctx->grayscale = clCreateImage(
1316  ctx->ocf.hwctx->context,
1317  0,
1318  &grayscale_format,
1319  &grayscale_desc,
1320  NULL,
1321  &cle
1322  );
1323  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to create grayscale image: %d.\n", cle);
1324  }
1325 
1326  CL_CREATE_BUFFER(ctx, harris_buf, outlink->h * outlink->w * sizeof(float));
1327  CL_CREATE_BUFFER(ctx, refined_features, features_buf_size);
1328  CL_CREATE_BUFFER(ctx, prev_refined_features, features_buf_size);
1330  ctx,
1331  brief_pattern,
1332  CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR,
1333  BREIFN * sizeof(PointPair),
1334  pattern_host
1335  );
1336  CL_CREATE_BUFFER(ctx, descriptors, descriptor_buf_size);
1337  CL_CREATE_BUFFER(ctx, prev_descriptors, descriptor_buf_size);
1338  CL_CREATE_BUFFER(ctx, matches, image_grid_32 * sizeof(MotionVector));
1339  CL_CREATE_BUFFER(ctx, matches_contig, MATCHES_CONTIG_SIZE * sizeof(MotionVector));
1340  CL_CREATE_BUFFER(ctx, transform_y, 9 * sizeof(float));
1341  CL_CREATE_BUFFER(ctx, transform_uv, 9 * sizeof(float));
1342  if (ctx->debug_on) {
1343  CL_CREATE_BUFFER(ctx, debug_matches, MATCHES_CONTIG_SIZE * sizeof(MotionVector));
1344  CL_CREATE_BUFFER(ctx, debug_model_matches, 3 * sizeof(MotionVector));
1345  }
1346 
1347  ctx->initialized = 1;
1348  av_freep(&pattern_host);
1349 
1350  return 0;
1351 
1352 fail:
1353  av_freep(&pattern_host);
1354  return err;
1355 }
1356 
1357 // Logs debug information about the transform data
1358 static void transform_debug(AVFilterContext *avctx, float *new_vals, float *old_vals, int curr_frame) {
1359  av_log(avctx, AV_LOG_VERBOSE,
1360  "Frame %d:\n"
1361  "\tframe moved from: %f x, %f y\n"
1362  "\t to: %f x, %f y\n"
1363  "\t rotated from: %f degrees\n"
1364  "\t to: %f degrees\n"
1365  "\t scaled from: %f x, %f y\n"
1366  "\t to: %f x, %f y\n"
1367  "\n"
1368  "\tframe moved by: %f x, %f y\n"
1369  "\t rotated by: %f degrees\n"
1370  "\t scaled by: %f x, %f y\n",
1371  curr_frame,
1372  old_vals[RingbufX], old_vals[RingbufY],
1373  new_vals[RingbufX], new_vals[RingbufY],
1374  old_vals[RingbufRot] * (180.0 / M_PI),
1375  new_vals[RingbufRot] * (180.0 / M_PI),
1376  old_vals[RingbufScaleX], old_vals[RingbufScaleY],
1377  new_vals[RingbufScaleX], new_vals[RingbufScaleY],
1378  old_vals[RingbufX] - new_vals[RingbufX], old_vals[RingbufY] - new_vals[RingbufY],
1379  old_vals[RingbufRot] * (180.0 / M_PI) - new_vals[RingbufRot] * (180.0 / M_PI),
1380  new_vals[RingbufScaleX] / old_vals[RingbufScaleX], new_vals[RingbufScaleY] / old_vals[RingbufScaleY]
1381  );
1382 }
1383 
1384 // Uses the buffered motion information to determine a transform that smooths the
1385 // given frame and applies it
1386 static int filter_frame(AVFilterLink *link, AVFrame *input_frame)
1387 {
1388  AVFilterContext *avctx = link->dst;
1389  AVFilterLink *outlink = avctx->outputs[0];
1390  DeshakeOpenCLContext *deshake_ctx = avctx->priv;
1391  AVFrame *cropped_frame = NULL, *transformed_frame = NULL;
1392  int err;
1393  cl_int cle;
1394  float new_vals[RingbufCount];
1395  float old_vals[RingbufCount];
1396  // Luma (in the case of YUV) transform, or just the transform in the case of RGB
1397  float transform_y[9];
1398  // Chroma transform
1399  float transform_uv[9];
1400  // Luma crop transform (or RGB)
1401  float transform_crop_y[9];
1402  // Chroma crop transform
1403  float transform_crop_uv[9];
1404  float transform_debug_rgb[9];
1405  size_t global_work[2];
1406  int64_t duration;
1407  cl_mem src, transformed, dst;
1408  cl_mem transforms[3];
1409  CropInfo crops[3];
1410  cl_event transform_event, crop_upscale_event;
1411  DebugMatches debug_matches;
1412  cl_int num_model_matches;
1413 
1414  const float center_w = (float)input_frame->width / 2;
1415  const float center_h = (float)input_frame->height / 2;
1416 
1417  const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(deshake_ctx->sw_format);
1418  const int chroma_width = AV_CEIL_RSHIFT(input_frame->width, desc->log2_chroma_w);
1419  const int chroma_height = AV_CEIL_RSHIFT(input_frame->height, desc->log2_chroma_h);
1420 
1421  const float center_w_chroma = (float)chroma_width / 2;
1422  const float center_h_chroma = (float)chroma_height / 2;
1423 
1424  const float luma_w_over_chroma_w = ((float)input_frame->width / (float)chroma_width);
1425  const float luma_h_over_chroma_h = ((float)input_frame->height / (float)chroma_height);
1426 
1427  if (deshake_ctx->debug_on) {
1429  deshake_ctx->abs_motion.debug_matches,
1430  &debug_matches,
1431  sizeof(DebugMatches),
1432  NULL
1433  );
1434  }
1435 
1436  if (input_frame->pkt_duration) {
1437  duration = input_frame->pkt_duration;
1438  } else {
1439  duration = av_rescale_q(1, av_inv_q(outlink->frame_rate), outlink->time_base);
1440  }
1441  deshake_ctx->duration = input_frame->pts + duration;
1442 
1443  // Get the absolute transform data for this frame
1444  for (int i = 0; i < RingbufCount; i++) {
1446  deshake_ctx->abs_motion.ringbuffers[i],
1447  &old_vals[i],
1448  deshake_ctx->abs_motion.curr_frame_offset * sizeof(float),
1449  sizeof(float),
1450  NULL
1451  );
1452  }
1453 
1454  if (deshake_ctx->tripod_mode) {
1455  // If tripod mode is turned on we simply undo all motion relative to the
1456  // first frame
1457 
1458  new_vals[RingbufX] = 0.0f;
1459  new_vals[RingbufY] = 0.0f;
1460  new_vals[RingbufRot] = 0.0f;
1461  new_vals[RingbufScaleX] = 1.0f;
1462  new_vals[RingbufScaleY] = 1.0f;
1463  } else {
1464  // Tripod mode is off and we need to smooth a moving camera
1465 
1466  new_vals[RingbufX] = smooth(
1467  deshake_ctx,
1468  deshake_ctx->gauss_kernel,
1469  deshake_ctx->smooth_window,
1470  input_frame->width,
1471  deshake_ctx->abs_motion.ringbuffers[RingbufX]
1472  );
1473  new_vals[RingbufY] = smooth(
1474  deshake_ctx,
1475  deshake_ctx->gauss_kernel,
1476  deshake_ctx->smooth_window,
1477  input_frame->height,
1478  deshake_ctx->abs_motion.ringbuffers[RingbufY]
1479  );
1480  new_vals[RingbufRot] = smooth(
1481  deshake_ctx,
1482  deshake_ctx->gauss_kernel,
1483  deshake_ctx->smooth_window,
1484  M_PI / 4,
1485  deshake_ctx->abs_motion.ringbuffers[RingbufRot]
1486  );
1487  new_vals[RingbufScaleX] = smooth(
1488  deshake_ctx,
1489  deshake_ctx->gauss_kernel,
1490  deshake_ctx->smooth_window,
1491  2.0f,
1492  deshake_ctx->abs_motion.ringbuffers[RingbufScaleX]
1493  );
1494  new_vals[RingbufScaleY] = smooth(
1495  deshake_ctx,
1496  deshake_ctx->gauss_kernel,
1497  deshake_ctx->smooth_window,
1498  2.0f,
1499  deshake_ctx->abs_motion.ringbuffers[RingbufScaleY]
1500  );
1501  }
1502 
1504  old_vals[RingbufX] - new_vals[RingbufX],
1505  old_vals[RingbufY] - new_vals[RingbufY],
1506  old_vals[RingbufRot] - new_vals[RingbufRot],
1507  new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1508  new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1509  center_w,
1510  center_h,
1511  transform_y
1512  );
1513 
1515  (old_vals[RingbufX] - new_vals[RingbufX]) / luma_w_over_chroma_w,
1516  (old_vals[RingbufY] - new_vals[RingbufY]) / luma_h_over_chroma_h,
1517  old_vals[RingbufRot] - new_vals[RingbufRot],
1518  new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1519  new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1520  center_w_chroma,
1521  center_h_chroma,
1522  transform_uv
1523  );
1524 
1525  CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_y, 9 * sizeof(float), transform_y, NULL);
1526  CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_uv, 9 * sizeof(float), transform_uv, NULL);
1527 
1528  if (deshake_ctx->debug_on)
1529  transform_debug(avctx, new_vals, old_vals, deshake_ctx->curr_frame);
1530 
1531  cropped_frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);
1532  if (!cropped_frame) {
1533  err = AVERROR(ENOMEM);
1534  goto fail;
1535  }
1536 
1537  transformed_frame = ff_get_video_buffer(outlink, outlink->w, outlink->h);
1538  if (!transformed_frame) {
1539  err = AVERROR(ENOMEM);
1540  goto fail;
1541  }
1542 
1543  transforms[0] = deshake_ctx->transform_y;
1544  transforms[1] = transforms[2] = deshake_ctx->transform_uv;
1545 
1546  for (int p = 0; p < FF_ARRAY_ELEMS(transformed_frame->data); p++) {
1547  // Transform all of the planes appropriately
1548  src = (cl_mem)input_frame->data[p];
1549  transformed = (cl_mem)transformed_frame->data[p];
1550 
1551  if (!transformed)
1552  break;
1553 
1554  err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, p, 0);
1555  if (err < 0)
1556  goto fail;
1557 
1559  deshake_ctx->command_queue,
1560  deshake_ctx->kernel_transform,
1561  global_work,
1562  NULL,
1563  &transform_event,
1564  { sizeof(cl_mem), &src },
1565  { sizeof(cl_mem), &transformed },
1566  { sizeof(cl_mem), &transforms[p] },
1567  );
1568  }
1569 
1570  if (deshake_ctx->debug_on && !deshake_ctx->is_yuv && debug_matches.num_matches > 0) {
1572  deshake_ctx->command_queue,
1573  deshake_ctx->debug_matches,
1574  debug_matches.num_matches * sizeof(MotionVector),
1575  debug_matches.matches,
1576  NULL
1577  );
1578 
1580  deshake_ctx->command_queue,
1581  deshake_ctx->debug_model_matches,
1582  debug_matches.num_model_matches * sizeof(MotionVector),
1583  debug_matches.model_matches,
1584  NULL
1585  );
1586 
1587  num_model_matches = debug_matches.num_model_matches;
1588 
1589  // Invert the transform
1591  new_vals[RingbufX] - old_vals[RingbufX],
1592  new_vals[RingbufY] - old_vals[RingbufY],
1593  new_vals[RingbufRot] - old_vals[RingbufRot],
1594  old_vals[RingbufScaleX] / new_vals[RingbufScaleX],
1595  old_vals[RingbufScaleY] / new_vals[RingbufScaleY],
1596  center_w,
1597  center_h,
1598  transform_debug_rgb
1599  );
1600 
1601  CL_BLOCKING_WRITE_BUFFER(deshake_ctx->command_queue, deshake_ctx->transform_y, 9 * sizeof(float), transform_debug_rgb, NULL);
1602 
1603  transformed = (cl_mem)transformed_frame->data[0];
1605  deshake_ctx->command_queue,
1606  deshake_ctx->kernel_draw_debug_info,
1607  (size_t[]){ debug_matches.num_matches },
1608  NULL,
1609  NULL,
1610  { sizeof(cl_mem), &transformed },
1611  { sizeof(cl_mem), &deshake_ctx->debug_matches },
1612  { sizeof(cl_mem), &deshake_ctx->debug_model_matches },
1613  { sizeof(cl_int), &num_model_matches },
1614  { sizeof(cl_mem), &deshake_ctx->transform_y }
1615  );
1616  }
1617 
1618  if (deshake_ctx->should_crop) {
1619  // Generate transforms for cropping
1621  (old_vals[RingbufX] - new_vals[RingbufX]) / 5,
1622  (old_vals[RingbufY] - new_vals[RingbufY]) / 5,
1623  (old_vals[RingbufRot] - new_vals[RingbufRot]) / 5,
1624  new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1625  new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1626  center_w,
1627  center_h,
1628  transform_crop_y
1629  );
1630  update_needed_crop(&deshake_ctx->crop_y, transform_crop_y, input_frame->width, input_frame->height);
1631 
1633  (old_vals[RingbufX] - new_vals[RingbufX]) / (5 * luma_w_over_chroma_w),
1634  (old_vals[RingbufY] - new_vals[RingbufY]) / (5 * luma_h_over_chroma_h),
1635  (old_vals[RingbufRot] - new_vals[RingbufRot]) / 5,
1636  new_vals[RingbufScaleX] / old_vals[RingbufScaleX],
1637  new_vals[RingbufScaleY] / old_vals[RingbufScaleY],
1638  center_w_chroma,
1639  center_h_chroma,
1640  transform_crop_uv
1641  );
1642  update_needed_crop(&deshake_ctx->crop_uv, transform_crop_uv, chroma_width, chroma_height);
1643 
1644  crops[0] = deshake_ctx->crop_y;
1645  crops[1] = crops[2] = deshake_ctx->crop_uv;
1646 
1647  for (int p = 0; p < FF_ARRAY_ELEMS(cropped_frame->data); p++) {
1648  // Crop all of the planes appropriately
1649  dst = (cl_mem)cropped_frame->data[p];
1650  transformed = (cl_mem)transformed_frame->data[p];
1651 
1652  if (!dst)
1653  break;
1654 
1655  err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, p, 0);
1656  if (err < 0)
1657  goto fail;
1658 
1660  deshake_ctx->command_queue,
1661  deshake_ctx->kernel_crop_upscale,
1662  global_work,
1663  NULL,
1664  &crop_upscale_event,
1665  { sizeof(cl_mem), &transformed },
1666  { sizeof(cl_mem), &dst },
1667  { sizeof(cl_float2), &crops[p].top_left },
1668  { sizeof(cl_float2), &crops[p].bottom_right },
1669  );
1670  }
1671  }
1672 
1673  if (deshake_ctx->curr_frame < deshake_ctx->smooth_window / 2) {
1674  // This means we are somewhere at the start of the video. We need to
1675  // increment the current frame offset until it reaches the center of
1676  // the ringbuffers (as the current frame will be located there for
1677  // the rest of the video).
1678  //
1679  // The end of the video is taken care of by draining motion data
1680  // one-by-one out of the buffer, causing the (at that point fixed)
1681  // offset to move towards later frames' data.
1682  ++deshake_ctx->abs_motion.curr_frame_offset;
1683  }
1684 
1685  if (deshake_ctx->abs_motion.data_end_offset != -1) {
1686  // Keep the end offset in sync with the frame it's supposed to be
1687  // positioned at
1688  --deshake_ctx->abs_motion.data_end_offset;
1689 
1690  if (deshake_ctx->abs_motion.data_end_offset == deshake_ctx->abs_motion.curr_frame_offset - 1) {
1691  // The end offset would be the start of the new video sequence; flip to
1692  // start offset
1693  deshake_ctx->abs_motion.data_end_offset = -1;
1694  deshake_ctx->abs_motion.data_start_offset = deshake_ctx->abs_motion.curr_frame_offset;
1695  }
1696  } else if (deshake_ctx->abs_motion.data_start_offset != -1) {
1697  // Keep the start offset in sync with the frame it's supposed to be
1698  // positioned at
1699  --deshake_ctx->abs_motion.data_start_offset;
1700  }
1701 
1702  if (deshake_ctx->debug_on) {
1703  deshake_ctx->transform_time += ff_opencl_get_event_time(transform_event);
1704  if (deshake_ctx->should_crop) {
1705  deshake_ctx->crop_upscale_time += ff_opencl_get_event_time(crop_upscale_event);
1706  }
1707  }
1708 
1709  ++deshake_ctx->curr_frame;
1710 
1711  if (deshake_ctx->debug_on)
1712  av_freep(&debug_matches.matches);
1713 
1714  if (deshake_ctx->should_crop) {
1715  err = av_frame_copy_props(cropped_frame, input_frame);
1716  if (err < 0)
1717  goto fail;
1718 
1719  av_frame_free(&transformed_frame);
1720  av_frame_free(&input_frame);
1721  return ff_filter_frame(outlink, cropped_frame);
1722 
1723  } else {
1724  err = av_frame_copy_props(transformed_frame, input_frame);
1725  if (err < 0)
1726  goto fail;
1727 
1728  av_frame_free(&cropped_frame);
1729  av_frame_free(&input_frame);
1730  return ff_filter_frame(outlink, transformed_frame);
1731  }
1732 
1733 fail:
1734  clFinish(deshake_ctx->command_queue);
1735 
1736  if (deshake_ctx->debug_on)
1737  if (debug_matches.matches)
1738  av_freep(&debug_matches.matches);
1739 
1740  av_frame_free(&input_frame);
1741  av_frame_free(&transformed_frame);
1742  av_frame_free(&cropped_frame);
1743  return err;
1744 }
1745 
1746 // Add the given frame to the frame queue to eventually be processed.
1747 //
1748 // Also determines the motion from the previous frame and updates the stored
1749 // motion information accordingly.
1750 static int queue_frame(AVFilterLink *link, AVFrame *input_frame)
1751 {
1752  AVFilterContext *avctx = link->dst;
1753  DeshakeOpenCLContext *deshake_ctx = avctx->priv;
1754  int err;
1755  int num_vectors;
1756  int num_inliers = 0;
1757  cl_int cle;
1758  FrameDelta relative;
1759  SimilarityMatrix model;
1760  size_t global_work[2];
1761  size_t harris_global_work[2];
1762  size_t grid_32_global_work[2];
1763  int grid_32_h, grid_32_w;
1764  size_t local_work[2];
1765  cl_mem src, temp;
1766  float prev_vals[5];
1767  float new_vals[5];
1768  cl_event grayscale_event, harris_response_event, refine_features_event,
1769  brief_event, match_descriptors_event, read_buf_event;
1770  DebugMatches debug_matches;
1771 
1772  num_vectors = 0;
1773 
1774  local_work[0] = 8;
1775  local_work[1] = 8;
1776 
1777  err = ff_opencl_filter_work_size_from_image(avctx, global_work, input_frame, 0, 0);
1778  if (err < 0)
1779  goto fail;
1780 
1781  err = ff_opencl_filter_work_size_from_image(avctx, harris_global_work, input_frame, 0, 8);
1782  if (err < 0)
1783  goto fail;
1784 
1785  err = ff_opencl_filter_work_size_from_image(avctx, grid_32_global_work, input_frame, 0, 32);
1786  if (err < 0)
1787  goto fail;
1788 
1789  // We want a single work-item for each 32x32 block of pixels in the input frame
1790  grid_32_global_work[0] /= 32;
1791  grid_32_global_work[1] /= 32;
1792 
1793  grid_32_h = ROUNDED_UP_DIV(input_frame->height, 32);
1794  grid_32_w = ROUNDED_UP_DIV(input_frame->width, 32);
1795 
1796  if (deshake_ctx->is_yuv) {
1797  deshake_ctx->grayscale = (cl_mem)input_frame->data[0];
1798  } else {
1799  src = (cl_mem)input_frame->data[0];
1800 
1802  deshake_ctx->command_queue,
1803  deshake_ctx->kernel_grayscale,
1804  global_work,
1805  NULL,
1806  &grayscale_event,
1807  { sizeof(cl_mem), &src },
1808  { sizeof(cl_mem), &deshake_ctx->grayscale }
1809  );
1810  }
1811 
1813  deshake_ctx->command_queue,
1814  deshake_ctx->kernel_harris_response,
1815  harris_global_work,
1816  local_work,
1817  &harris_response_event,
1818  { sizeof(cl_mem), &deshake_ctx->grayscale },
1819  { sizeof(cl_mem), &deshake_ctx->harris_buf }
1820  );
1821 
1823  deshake_ctx->command_queue,
1824  deshake_ctx->kernel_refine_features,
1825  grid_32_global_work,
1826  NULL,
1827  &refine_features_event,
1828  { sizeof(cl_mem), &deshake_ctx->grayscale },
1829  { sizeof(cl_mem), &deshake_ctx->harris_buf },
1830  { sizeof(cl_mem), &deshake_ctx->refined_features },
1831  { sizeof(cl_int), &deshake_ctx->refine_features }
1832  );
1833 
1835  deshake_ctx->command_queue,
1836  deshake_ctx->kernel_brief_descriptors,
1837  grid_32_global_work,
1838  NULL,
1839  &brief_event,
1840  { sizeof(cl_mem), &deshake_ctx->grayscale },
1841  { sizeof(cl_mem), &deshake_ctx->refined_features },
1842  { sizeof(cl_mem), &deshake_ctx->descriptors },
1843  { sizeof(cl_mem), &deshake_ctx->brief_pattern}
1844  );
1845 
1846  if (av_fifo_size(deshake_ctx->abs_motion.ringbuffers[RingbufX]) == 0) {
1847  // This is the first frame we've been given to queue, meaning there is
1848  // no previous frame to match descriptors to
1849 
1850  goto no_motion_data;
1851  }
1852 
1854  deshake_ctx->command_queue,
1855  deshake_ctx->kernel_match_descriptors,
1856  grid_32_global_work,
1857  NULL,
1858  &match_descriptors_event,
1859  { sizeof(cl_mem), &deshake_ctx->prev_refined_features },
1860  { sizeof(cl_mem), &deshake_ctx->refined_features },
1861  { sizeof(cl_mem), &deshake_ctx->descriptors },
1862  { sizeof(cl_mem), &deshake_ctx->prev_descriptors },
1863  { sizeof(cl_mem), &deshake_ctx->matches }
1864  );
1865 
1866  cle = clEnqueueReadBuffer(
1867  deshake_ctx->command_queue,
1868  deshake_ctx->matches,
1869  CL_TRUE,
1870  0,
1871  grid_32_h * grid_32_w * sizeof(MotionVector),
1872  deshake_ctx->matches_host,
1873  0,
1874  NULL,
1875  &read_buf_event
1876  );
1877  CL_FAIL_ON_ERROR(AVERROR(EIO), "Failed to read matches to host: %d.\n", cle);
1878 
1879  num_vectors = make_vectors_contig(deshake_ctx, grid_32_h, grid_32_w);
1880 
1881  if (num_vectors < 10) {
1882  // Not enough matches to get reliable motion data for this frame
1883  //
1884  // From this point on all data is relative to this frame rather than the
1885  // original frame. We have to make sure that we don't mix values that were
1886  // relative to the original frame with the new values relative to this
1887  // frame when doing the gaussian smoothing. We keep track of where the old
1888  // values end using this data_end_offset field in order to accomplish
1889  // that goal.
1890  //
1891  // If no motion data is present for multiple frames in a short window of
1892  // time, we leave the end where it was to avoid mixing 0s in with the
1893  // old data (and just treat them all as part of the new values)
1894  if (deshake_ctx->abs_motion.data_end_offset == -1) {
1895  deshake_ctx->abs_motion.data_end_offset =
1896  av_fifo_size(deshake_ctx->abs_motion.ringbuffers[RingbufX]) / sizeof(float) - 1;
1897  }
1898 
1899  goto no_motion_data;
1900  }
1901 
1902  if (!estimate_affine_2d(
1903  deshake_ctx,
1904  deshake_ctx->matches_contig_host,
1905  &debug_matches,
1906  num_vectors,
1907  model.matrix,
1908  10.0,
1909  3000,
1910  0.999999999999
1911  )) {
1912  goto no_motion_data;
1913  }
1914 
1915  for (int i = 0; i < num_vectors; i++) {
1916  if (deshake_ctx->matches_contig_host[i].should_consider) {
1917  deshake_ctx->inliers[num_inliers] = deshake_ctx->matches_contig_host[i];
1918  num_inliers++;
1919  }
1920  }
1921 
1922  if (!minimize_error(
1923  deshake_ctx,
1924  deshake_ctx->inliers,
1925  &debug_matches,
1926  num_inliers,
1927  model.matrix,
1928  400
1929  )) {
1930  goto no_motion_data;
1931  }
1932 
1933 
1934  relative = decompose_transform(model.matrix);
1935 
1936  // Get the absolute transform data for the previous frame
1937  for (int i = 0; i < RingbufCount; i++) {
1939  deshake_ctx->abs_motion.ringbuffers[i],
1940  &prev_vals[i],
1941  av_fifo_size(deshake_ctx->abs_motion.ringbuffers[i]) - sizeof(float),
1942  sizeof(float),
1943  NULL
1944  );
1945  }
1946 
1947  new_vals[RingbufX] = prev_vals[RingbufX] + relative.translation.s[0];
1948  new_vals[RingbufY] = prev_vals[RingbufY] + relative.translation.s[1];
1949  new_vals[RingbufRot] = prev_vals[RingbufRot] + relative.rotation;
1950  new_vals[RingbufScaleX] = prev_vals[RingbufScaleX] / relative.scale.s[0];
1951  new_vals[RingbufScaleY] = prev_vals[RingbufScaleY] / relative.scale.s[1];
1952 
1953  if (deshake_ctx->debug_on) {
1954  if (!deshake_ctx->is_yuv) {
1955  deshake_ctx->grayscale_time += ff_opencl_get_event_time(grayscale_event);
1956  }
1957  deshake_ctx->harris_response_time += ff_opencl_get_event_time(harris_response_event);
1958  deshake_ctx->refine_features_time += ff_opencl_get_event_time(refine_features_event);
1959  deshake_ctx->brief_descriptors_time += ff_opencl_get_event_time(brief_event);
1960  deshake_ctx->match_descriptors_time += ff_opencl_get_event_time(match_descriptors_event);
1961  deshake_ctx->read_buf_time += ff_opencl_get_event_time(read_buf_event);
1962  }
1963 
1964  goto end;
1965 
1966 no_motion_data:
1967  new_vals[RingbufX] = 0.0f;
1968  new_vals[RingbufY] = 0.0f;
1969  new_vals[RingbufRot] = 0.0f;
1970  new_vals[RingbufScaleX] = 1.0f;
1971  new_vals[RingbufScaleY] = 1.0f;
1972 
1973  for (int i = 0; i < num_vectors; i++) {
1974  deshake_ctx->matches_contig_host[i].should_consider = false;
1975  }
1976  debug_matches.num_model_matches = 0;
1977 
1978  if (deshake_ctx->debug_on) {
1979  av_log(avctx, AV_LOG_VERBOSE,
1980  "\n[ALERT] No motion data found in queue_frame, motion reset to 0\n\n"
1981  );
1982  }
1983 
1984  goto end;
1985 
1986 end:
1987  // Swap the descriptor buffers (we don't need the previous frame's descriptors
1988  // again so we will use that space for the next frame's descriptors)
1989  temp = deshake_ctx->prev_descriptors;
1990  deshake_ctx->prev_descriptors = deshake_ctx->descriptors;
1991  deshake_ctx->descriptors = temp;
1992 
1993  // Same for the refined features
1994  temp = deshake_ctx->prev_refined_features;
1995  deshake_ctx->prev_refined_features = deshake_ctx->refined_features;
1996  deshake_ctx->refined_features = temp;
1997 
1998  if (deshake_ctx->debug_on) {
1999  if (num_vectors == 0) {
2000  debug_matches.matches = NULL;
2001  } else {
2002  debug_matches.matches = av_malloc_array(num_vectors, sizeof(MotionVector));
2003 
2004  if (!debug_matches.matches) {
2005  err = AVERROR(ENOMEM);
2006  goto fail;
2007  }
2008  }
2009 
2010  for (int i = 0; i < num_vectors; i++) {
2011  debug_matches.matches[i] = deshake_ctx->matches_contig_host[i];
2012  }
2013  debug_matches.num_matches = num_vectors;
2014 
2016  deshake_ctx->abs_motion.debug_matches,
2017  &debug_matches,
2018  sizeof(DebugMatches),
2019  NULL
2020  );
2021  }
2022 
2023  for (int i = 0; i < RingbufCount; i++) {
2025  deshake_ctx->abs_motion.ringbuffers[i],
2026  &new_vals[i],
2027  sizeof(float),
2028  NULL
2029  );
2030  }
2031 
2032  return ff_framequeue_add(&deshake_ctx->fq, input_frame);
2033 
2034 fail:
2035  clFinish(deshake_ctx->command_queue);
2036  av_frame_free(&input_frame);
2037  return err;
2038 }
2039 
2041 {
2042  AVFilterLink *inlink = ctx->inputs[0];
2043  AVFilterLink *outlink = ctx->outputs[0];
2044  DeshakeOpenCLContext *deshake_ctx = ctx->priv;
2045  AVFrame *frame = NULL;
2046  int ret, status;
2047  int64_t pts;
2048 
2049  FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink);
2050 
2051  if (!deshake_ctx->eof) {
2052  ret = ff_inlink_consume_frame(inlink, &frame);
2053  if (ret < 0)
2054  return ret;
2055  if (ret > 0) {
2056  if (!frame->hw_frames_ctx)
2057  return AVERROR(EINVAL);
2058 
2059  if (!deshake_ctx->initialized) {
2060  ret = deshake_opencl_init(ctx);
2061  if (ret < 0)
2062  return ret;
2063  }
2064 
2065  // If there is no more space in the ringbuffers, remove the oldest
2066  // values to make room for the new ones
2067  if (av_fifo_space(deshake_ctx->abs_motion.ringbuffers[RingbufX]) == 0) {
2068  for (int i = 0; i < RingbufCount; i++) {
2069  av_fifo_drain(deshake_ctx->abs_motion.ringbuffers[i], sizeof(float));
2070  }
2071  }
2072  ret = queue_frame(inlink, frame);
2073  if (ret < 0)
2074  return ret;
2075  if (ret >= 0) {
2076  // See if we have enough buffered frames to process one
2077  //
2078  // "enough" is half the smooth window of queued frames into the future
2079  if (ff_framequeue_queued_frames(&deshake_ctx->fq) >= deshake_ctx->smooth_window / 2) {
2080  return filter_frame(inlink, ff_framequeue_take(&deshake_ctx->fq));
2081  }
2082  }
2083  }
2084  }
2085 
2086  if (!deshake_ctx->eof && ff_inlink_acknowledge_status(inlink, &status, &pts)) {
2087  if (status == AVERROR_EOF) {
2088  deshake_ctx->eof = true;
2089  }
2090  }
2091 
2092  if (deshake_ctx->eof) {
2093  // Finish processing the rest of the frames in the queue.
2094  while(ff_framequeue_queued_frames(&deshake_ctx->fq) != 0) {
2095  for (int i = 0; i < RingbufCount; i++) {
2096  av_fifo_drain(deshake_ctx->abs_motion.ringbuffers[i], sizeof(float));
2097  }
2098 
2099  ret = filter_frame(inlink, ff_framequeue_take(&deshake_ctx->fq));
2100  if (ret < 0) {
2101  return ret;
2102  }
2103  }
2104 
2105  if (deshake_ctx->debug_on) {
2106  av_log(ctx, AV_LOG_VERBOSE,
2107  "Average kernel execution times:\n"
2108  "\t grayscale: %0.3f ms\n"
2109  "\t harris_response: %0.3f ms\n"
2110  "\t refine_features: %0.3f ms\n"
2111  "\tbrief_descriptors: %0.3f ms\n"
2112  "\tmatch_descriptors: %0.3f ms\n"
2113  "\t transform: %0.3f ms\n"
2114  "\t crop_upscale: %0.3f ms\n"
2115  "Average buffer read times:\n"
2116  "\t features buf: %0.3f ms\n",
2117  averaged_event_time_ms(deshake_ctx->grayscale_time, deshake_ctx->curr_frame),
2118  averaged_event_time_ms(deshake_ctx->harris_response_time, deshake_ctx->curr_frame),
2119  averaged_event_time_ms(deshake_ctx->refine_features_time, deshake_ctx->curr_frame),
2120  averaged_event_time_ms(deshake_ctx->brief_descriptors_time, deshake_ctx->curr_frame),
2121  averaged_event_time_ms(deshake_ctx->match_descriptors_time, deshake_ctx->curr_frame),
2122  averaged_event_time_ms(deshake_ctx->transform_time, deshake_ctx->curr_frame),
2123  averaged_event_time_ms(deshake_ctx->crop_upscale_time, deshake_ctx->curr_frame),
2124  averaged_event_time_ms(deshake_ctx->read_buf_time, deshake_ctx->curr_frame)
2125  );
2126  }
2127 
2128  ff_outlink_set_status(outlink, AVERROR_EOF, deshake_ctx->duration);
2129  return 0;
2130  }
2131 
2132  if (!deshake_ctx->eof) {
2133  FF_FILTER_FORWARD_WANTED(outlink, inlink);
2134  }
2135 
2136  return FFERROR_NOT_READY;
2137 }
2138 
2140  {
2141  .name = "default",
2142  .type = AVMEDIA_TYPE_VIDEO,
2143  .config_props = &ff_opencl_filter_config_input,
2144  },
2145  { NULL }
2146 };
2147 
2149  {
2150  .name = "default",
2151  .type = AVMEDIA_TYPE_VIDEO,
2152  .config_props = &ff_opencl_filter_config_output,
2153  },
2154  { NULL }
2155 };
2156 
2157 #define OFFSET(x) offsetof(DeshakeOpenCLContext, x)
2158 #define FLAGS AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_VIDEO_PARAM
2159 
2161  {
2162  "tripod", "simulates a tripod by preventing any camera movement whatsoever "
2163  "from the original frame",
2164  OFFSET(tripod_mode), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS
2165  },
2166  {
2167  "debug", "turn on additional debugging information",
2168  OFFSET(debug_on), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, FLAGS
2169  },
2170  {
2171  "adaptive_crop", "attempt to subtly crop borders to reduce mirrored content",
2172  OFFSET(should_crop), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS
2173  },
2174  {
2175  "refine_features", "refine feature point locations at a sub-pixel level",
2176  OFFSET(refine_features), AV_OPT_TYPE_BOOL, {.i64 = 1}, 0, 1, FLAGS
2177  },
2178  {
2179  "smooth_strength", "smoothing strength (0 attempts to adaptively determine optimal strength)",
2180  OFFSET(smooth_percent), AV_OPT_TYPE_FLOAT, {.dbl = 0.0f}, 0.0f, 1.0f, FLAGS
2181  },
2182  {
2183  "smooth_window_multiplier", "multiplier for number of frames to buffer for motion data",
2184  OFFSET(smooth_window_multiplier), AV_OPT_TYPE_FLOAT, {.dbl = 2.0}, 0.1, 10.0, FLAGS
2185  },
2186  { NULL }
2187 };
2188 
2189 AVFILTER_DEFINE_CLASS(deshake_opencl);
2190 
2192  .name = "deshake_opencl",
2193  .description = NULL_IF_CONFIG_SMALL("Feature-point based video stabilization filter"),
2194  .priv_size = sizeof(DeshakeOpenCLContext),
2195  .priv_class = &deshake_opencl_class,
2199  .activate = activate,
2200  .inputs = deshake_opencl_inputs,
2201  .outputs = deshake_opencl_outputs,
2202  .flags_internal = FF_FILTER_FLAG_HWFRAME_AWARE
2203 };
Context structure for the Lagged Fibonacci PRNG.
Definition: lfg.h:33
#define F1(l, r, i)
Definition: cast5.c:47
planar GBR 4:4:4:4 40bpp, little-endian
Definition: pixfmt.h:291
int ff_inlink_consume_frame(AVFilterLink *link, AVFrame **rframe)
Take a frame from the link&#39;s FIFO and update the link&#39;s stats.
Definition: avfilter.c:1476
#define NULL
Definition: coverity.c:32
unsigned long long crop_upscale_time
const char const char void * val
Definition: avisynth_c.h:863
#define FF_FILTER_FLAG_HWFRAME_AWARE
The filter is aware of hardware frames, and any hardware frame context should not be automatically pr...
Definition: internal.h:365
cl_kernel kernel_brief_descriptors
const AVPixFmtDescriptor * av_pix_fmt_desc_get(enum AVPixelFormat pix_fmt)
Definition: pixdesc.c:2549
This structure describes decoded (raw) audio or video data.
Definition: frame.h:295
AbsoluteFrameMotion abs_motion
#define mi
#define F2(l, r, i)
Definition: cast5.c:40
AVOption.
Definition: opt.h:246
int ff_opencl_filter_work_size_from_image(AVFilterContext *avctx, size_t *work_size, AVFrame *frame, int plane, int block_alignment)
Find the work size needed needed for a given plane of an image.
Definition: opencl.c:278
int ff_opencl_filter_config_input(AVFilterLink *inlink)
Check that the input link contains a suitable hardware frames context and extract the device from it...
Definition: opencl.c:60
#define CL_CREATE_BUFFER(ctx, buffer_name, size)
Create a buffer with the given information.
Definition: opencl.h:237
int ff_opencl_filter_query_formats(AVFilterContext *avctx)
Return that all inputs and outputs support only AV_PIX_FMT_OPENCL.
Definition: opencl.c:28
misc image utilities
Main libavfilter public API header.
Memory handling functions.
else temp
Definition: vf_mcdeint.c:256
const char * desc
Definition: nvenc.c:68
static av_cold int init(AVCodecContext *avctx)
Definition: avrndec.c:35
Queue of AVFrame pointers.
Definition: framequeue.h:53
#define CL_RELEASE_MEMORY(m)
release an OpenCL Memory Object
Definition: opencl.h:114
static void free_debug_matches(AbsoluteFrameMotion *afm)
static void optimize_model(DeshakeOpenCLContext *deshake_ctx, MotionVector *best_pairs, MotionVector *inliers, const int num_inliers, float best_err, double *model_out)
planar GBR 4:4:4 24bpp
Definition: pixfmt.h:168
static FrameDelta decompose_transform(double *model)
cl_float2 skew
static int deshake_opencl_init(AVFilterContext *avctx)
The reader does not expect b to be semantically here and if the code is changed by maybe adding a a division or other the signedness will almost certainly be mistaken To avoid this confusion a new type was SUINT is the C unsigned type but it holds a signed int to use the same example SUINT a
Definition: undefined.txt:36
#define OFFSET(x)
AVS_VideoFrame int int int int new_height
Definition: avisynth_c.h:913
#define MATCHES_CONTIG_SIZE
return FFERROR_NOT_READY
cl_ulong ff_opencl_get_event_time(cl_event event)
Gets the command start and end times for the given event and returns the difference (the time that th...
Definition: opencl.c:354
AVFrame * ff_get_video_buffer(AVFilterLink *link, int w, int h)
Request a picture buffer with a specific set of permissions.
Definition: video.c:99
#define src
Definition: vp8dsp.c:254
int ff_opencl_filter_init(AVFilterContext *avctx)
Initialise an OpenCL filter context.
Definition: opencl.c:147
const char * ff_opencl_source_deshake
static const struct @100 transforms[18]
uint8_t log2_chroma_w
Amount to shift the luma width right to find the chroma width.
Definition: pixdesc.h:92
MotionVector * matches
static void ff_outlink_set_status(AVFilterLink *link, int status, int64_t pts)
Set the status field of a link from the source filter.
Definition: filters.h:189
AVFifoBuffer * debug_matches
int av_fifo_generic_write(AVFifoBuffer *f, void *src, int size, int(*func)(void *, void *, int))
Feed data from a user-supplied callback to an AVFifoBuffer.
Definition: fifo.c:122
AVOpenCLDeviceContext * hwctx
Definition: opencl.h:41
AVBufferRef * hw_frames_ctx
For hwaccel-format frames, this should be a reference to the AVHWFramesContext describing the frame...
Definition: frame.h:634
unsigned long long refine_features_time
const char * name
Pad name.
Definition: internal.h:60
cl_command_queue command_queue
AVFilterLink ** inputs
array of pointers to input links
Definition: avfilter.h:346
#define av_assert0(cond)
assert() equivalent, that is always enabled.
Definition: avassert.h:37
planar GBRA 4:4:4:4 64bpp, big-endian
Definition: pixfmt.h:216
int ff_filter_frame(AVFilterLink *link, AVFrame *frame)
Send a frame of data to the next filter.
Definition: avfilter.c:1075
#define av_cold
Definition: attributes.h:82
static av_cold int uninit(AVCodecContext *avctx)
Definition: crystalhd.c:279
float delta
static av_cold void deshake_opencl_uninit(AVFilterContext *avctx)
AVOptions.
it s the only field you need to keep assuming you have a context There is some magic you don t need to care about around this just let it vf offset
#define f(width, name)
Definition: cbs_vp9.c:255
static av_cold int end(AVCodecContext *avctx)
Definition: avrndec.c:90
Undefined Behavior In the C some operations are like signed integer dereferencing freed accessing outside allocated Undefined Behavior must not occur in a C it is not safe even if the output of undefined operations is unused The unsafety may seem nit picking but Optimizing compilers have in fact optimized code on the assumption that no undefined Behavior occurs Optimizing code based on wrong assumptions can and has in some cases lead to effects beyond the output of computations The signed integer overflow problem in speed critical code Code which is highly optimized and works with signed integers sometimes has the problem that often the output of the computation does not c
Definition: undefined.txt:32
cl_device_id device_id
The primary device ID of the device.
int64_t pts
Presentation timestamp in time_base units (time when frame should be shown to user).
Definition: frame.h:388
int av_fifo_space(const AVFifoBuffer *f)
Return the amount of space in bytes in the AVFifoBuffer, that is the amount of data you can write int...
Definition: fifo.c:82
int64_t duration
Definition: movenc.c:63
#define BRIEF_PATCH_SIZE_HALF
cl_float2 bottom_right
static void update_needed_crop(CropInfo *crop, float *transform, float frame_width, float frame_height)
planar GBR 4:4:4 48bpp, big-endian
Definition: pixfmt.h:174
static double av_q2d(AVRational a)
Convert an AVRational to a double.
Definition: rational.h:104
cl_float2 top_left
#define FFMIN3(a, b, c)
Definition: common.h:97
#define AVERROR_EOF
End of file.
Definition: error.h:55
#define AV_LOG_VERBOSE
Detailed information.
Definition: log.h:192
int ff_opencl_filter_config_output(AVFilterLink *outlink)
Create a suitable hardware frames context for the output.
Definition: opencl.c:96
int ff_framequeue_add(FFFrameQueue *fq, AVFrame *frame)
Add a frame.
Definition: framequeue.c:63
#define av_log(a,...)
#define FF_FILTER_FORWARD_STATUS_BACK(outlink, inlink)
Forward the status on an output link to an input link.
Definition: filters.h:199
RingbufferIndices
static const AVFilterPad deshake_opencl_outputs[]
AVFilter ff_vf_deshake_opencl
A filter pad used for either input or output.
Definition: internal.h:54
int64_t av_rescale_q(int64_t a, AVRational bq, AVRational cq)
Rescale a 64-bit integer by 2 rational numbers.
Definition: mathematics.c:142
#define expf(x)
Definition: libm.h:283
#define CL_RELEASE_QUEUE(q)
release an OpenCL Command Queue
Definition: opencl.h:127
int ff_inlink_acknowledge_status(AVFilterLink *link, int *rstatus, int64_t *rpts)
Test and acknowledge the change of status on the link.
Definition: avfilter.c:1431
planar GBR 4:4:4 27bpp, big-endian
Definition: pixfmt.h:170
#define i(width, name, range_min, range_max)
Definition: cbs_h2645.c:259
int width
Definition: frame.h:353
#define AV_LOG_ERROR
Something went wrong and cannot losslessly be recovered.
Definition: log.h:176
uint8_t log2_chroma_h
Amount to shift the luma height right to find the chroma height.
Definition: pixdesc.h:101
#define AV_PIX_FMT_FLAG_RGB
The pixel format contains RGB-like data (as opposed to YUV/grayscale).
Definition: pixdesc.h:148
void av_frame_free(AVFrame **frame)
Free the frame and any dynamically allocated objects in it, e.g.
Definition: frame.c:203
#define NULL_IF_CONFIG_SMALL(x)
Return NULL if CONFIG_SMALL is true, otherwise the argument without modification. ...
Definition: internal.h:186
const char * r
Definition: vf_curves.c:114
void * priv
private data for use by the filter
Definition: avfilter.h:353
int av_fifo_generic_read(AVFifoBuffer *f, void *dest, int buf_size, void(*func)(void *, void *, int))
Feed data from an AVFifoBuffer to a user-supplied callback.
Definition: fifo.c:213
unsigned long long brief_descriptors_time
static bool estimate_affine_2d(DeshakeOpenCLContext *deshake_ctx, MotionVector *point_pairs, DebugMatches *debug_matches, const int num_point_pairs, double *model_out, const double threshold, const int max_iters, const double confidence)
simple assert() macros that are a bit more flexible than ISO C assert().
#define F3(l, r, i)
Definition: cast5.c:33
GLsizei GLsizei * length
Definition: opengl_enc.c:114
static av_always_inline av_const double round(double x)
Definition: libm.h:444
#define FFMAX(a, b)
Definition: common.h:94
static cl_float2 transformed_point(float x, float y, float *transform)
#define fail()
Definition: checkasm.h:122
static bool get_subset(AVLFG *alfg, const MotionVector *point_pairs, const int num_point_pairs, MotionVector *pairs_subset, int max_attempts)
#define CL_CREATE_KERNEL(ctx, kernel_name)
Create a kernel with the given name.
Definition: opencl.h:93
#define powf(x, y)
Definition: libm.h:50
void ff_framequeue_free(FFFrameQueue *fq)
Free the queue and all queued frames.
Definition: framequeue.c:53
AVFifoBuffer * ringbuffers[RingbufCount]
#define ROUNDED_UP_DIV(a, b)
planar GBR 4:4:4:4 48bpp, big-endian
Definition: pixfmt.h:287
static av_const double hypot(double x, double y)
Definition: libm.h:366
#define b
Definition: input.c:41
planar GBR 4:4:4:4 40bpp, big-endian
Definition: pixfmt.h:290
static int rand_in(int low, int high, AVLFG *alfg)
MotionVector model_matches[3]
#define FFSIGN(a)
Definition: common.h:73
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return values
MotionVector * matches_contig_host
static void transform_center_scale(float x_shift, float y_shift, float angle, float scale_x, float scale_y, float center_w, float center_h, float *matrix)
AVFrame * ff_framequeue_take(FFFrameQueue *fq)
Take the first frame in the queue.
Definition: framequeue.c:98
AVFormatContext * ctx
Definition: movenc.c:48
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several the filter must be ready for frames arriving randomly on any input any filter with several inputs will most likely require some kind of queuing mechanism It is perfectly acceptable to have a limited queue and to drop frames when the inputs are too unbalanced request_frame For filters that do not use the this method is called when a frame is wanted on an output For a it should directly call filter_frame on the corresponding output For a if there are queued frames already one of these frames should be pushed If the filter should request a frame on one of its repeatedly until at least one frame has been pushed Return or at least make progress towards producing a frame
#define s(width, name)
Definition: cbs_vp9.c:257
static double averaged_event_time_ms(unsigned long long total_time, int num_frames)
int n
Definition: avisynth_c.h:760
static IterIndices start_end_for(DeshakeOpenCLContext *deshake_ctx, int length)
static void transform_debug(AVFilterContext *avctx, float *new_vals, float *old_vals, int curr_frame)
static int filter_frame(AVFilterLink *link, AVFrame *input_frame)
transform input video
static const AVFilterPad outputs[]
Definition: af_acontrast.c:203
#define FF_ARRAY_ELEMS(a)
static void make_gauss_kernel(float *gauss_kernel, float length, float sigma)
planar GBR 4:4:4:4 48bpp, little-endian
Definition: pixfmt.h:288
static const int8_t transform[32][32]
Definition: hevcdsp.c:27
static int queue_frame(AVFilterLink *link, AVFrame *input_frame)
static int activate(AVFilterContext *ctx)
int64_t pkt_duration
duration of the corresponding packet, expressed in AVStream->time_base units, 0 if unknown...
Definition: frame.h:574
static void compute_error(const MotionVector *point_pairs, const int num_point_pairs, const double *model, float *err)
int av_fifo_size(const AVFifoBuffer *f)
Return the amount of data in bytes in the AVFifoBuffer, that is the amount of data you can read from ...
Definition: fifo.c:77
int av_fifo_generic_peek_at(AVFifoBuffer *f, void *dest, int offset, int buf_size, void(*func)(void *, void *, int))
Feed data at specific position from an AVFifoBuffer to a user-supplied callback.
Definition: fifo.c:151
Descriptor that unambiguously describes how the bits of a pixel are stored in the up to 4 data planes...
Definition: pixdesc.h:81
planar GBR 4:4:4 30bpp, big-endian
Definition: pixfmt.h:172
MotionVector * matches_host
uint8_t * data
The data buffer.
Definition: buffer.h:89
static float smooth(DeshakeOpenCLContext *deshake_ctx, float *gauss_kernel, int length, float max_val, AVFifoBuffer *values)
a very simple circular buffer FIFO implementation
static unsigned int av_lfg_get(AVLFG *c)
Get the next random unsigned 32-bit number using an ALFG.
Definition: lfg.h:53
these buffered frames must be flushed immediately if a new input produces new the filter must not call request_frame to get more It must just process the frame or queue it The task of requesting more frames is left to the filter s request_frame method or the application If a filter has several inputs
static int ransac_update_num_iters(double confidence, double num_outliers, int max_iters)
#define CL_CREATE_BUFFER_FLAGS(ctx, buffer_name, flags, size, host_ptr)
Create a buffer with the given information.
Definition: opencl.h:197
Filter definition.
Definition: avfilter.h:144
they must not be accessed directly The fifo field contains the frames that are queued in the input for processing by the filter The status_in and status_out fields contains the queued status(EOF or error) of the link
This struct describes a set or pool of "hardware" frames (i.e.
Definition: hwcontext.h:124
cl_float2 p2
const char * name
Filter name.
Definition: avfilter.h:148
av_cold void av_lfg_init(AVLFG *c, unsigned int seed)
Definition: lfg.c:32
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a link
#define CL_RUN_KERNEL_WITH_ARGS(queue, kernel, global_work_size, local_work_size, event,...)
Uses the above macro to enqueue the given kernel and then additionally runs it to completion via clFi...
Definition: opencl.h:180
AVFilterLink ** outputs
array of pointers to output links
Definition: avfilter.h:350
#define FLAGS
static float gaussian_for(int x, float sigma)
#define CL_FAIL_ON_ERROR(errcode,...)
A helper macro to handle OpenCL errors.
Definition: opencl.h:74
static void ringbuf_float_at(DeshakeOpenCLContext *deshake_ctx, AVFifoBuffer *values, float *val, int offset)
static int64_t pts
AVFifoBuffer * av_fifo_alloc_array(size_t nmemb, size_t size)
Initialize an AVFifoBuffer.
Definition: fifo.c:49
uint8_t * data[AV_NUM_DATA_POINTERS]
pointer to the picture/channel planes.
Definition: frame.h:309
unsigned long long grayscale_time
The exact code depends on how similar the blocks are and how related they are to the and needs to apply these operations to the correct inlink or outlink if there are several Macros are available to factor that when no extra processing is inlink
static av_always_inline AVRational av_inv_q(AVRational q)
Invert a rational.
Definition: rational.h:159
static const AVOption deshake_opencl_options[]
int
static int query_formats(AVFilterContext *ctx)
Definition: aeval.c:244
static size_t ff_framequeue_queued_frames(const FFFrameQueue *fq)
Get the number of queued frames.
Definition: framequeue.h:146
common internal and external API header
planar GBRA 4:4:4:4 32bpp
Definition: pixfmt.h:215
planar GBR 4:4:4 27bpp, little-endian
Definition: pixfmt.h:171
void ff_opencl_filter_uninit(AVFilterContext *avctx)
Uninitialise an OpenCL filter context.
Definition: opencl.c:156
unsigned long long match_descriptors_time
#define CL_RELEASE_KERNEL(k)
release an OpenCL Kernel
Definition: opencl.h:101
unsigned long long transform_time
#define CL_BLOCKING_WRITE_BUFFER(queue, buffer, size, host_ptr, event)
Perform a blocking write to a buffer.
Definition: opencl.h:214
static int make_vectors_contig(DeshakeOpenCLContext *deshake_ctx, int size_y, int size_x)
FF_FILTER_FORWARD_WANTED(outlink, inlink)
unsigned long long harris_response_time
static bool minimize_error(DeshakeOpenCLContext *deshake_ctx, MotionVector *inliers, DebugMatches *debug_matches, const int num_inliers, double *model_out, const int max_iters)
cl_float2 p1
pixel format definitions
void ff_get_matrix(float x_shift, float y_shift, float angle, float scale_x, float scale_y, float *matrix)
Get an affine transformation matrix from given translation, rotation, and zoom factors.
Definition: transform.c:106
#define BREIFN
cl_context context
The OpenCL context which will contain all operations and frames on this device.
static bool points_not_collinear(const cl_float2 **points)
cl_float2 translation
void ff_framequeue_global_init(FFFrameQueueGlobal *fqg)
Init a global structure.
Definition: framequeue.c:30
OpenCLFilterContext ocf
An instance of a filter.
Definition: avfilter.h:338
cl_kernel kernel_match_descriptors
and forward the result(frame or status change) to the corresponding input.If nothing is possible
int height
Definition: frame.h:353
static void run_estimate_kernel(const MotionVector *point_pairs, double *model)
AVFILTER_DEFINE_CLASS(deshake_opencl)
static const AVFilterPad deshake_opencl_inputs[]
#define av_freep(p)
static bool check_subset(const MotionVector *pairs_subset)
#define M_PI
Definition: mathematics.h:52
planar GBR 4:4:4 48bpp, little-endian
Definition: pixfmt.h:175
void av_fifo_freep(AVFifoBuffer **f)
Free an AVFifoBuffer and reset pointer to NULL.
Definition: fifo.c:63
#define av_malloc_array(a, b)
int ff_opencl_filter_load_program(AVFilterContext *avctx, const char **program_source_array, int nb_strings)
Load a new OpenCL program from strings in memory.
Definition: opencl.c:171
internal API functions
cl_float2 scale
Filter the word “frame” indicates either a video frame or a group of audio as stored in an AVFrame structure Format for each input and each output the list of supported formats For video that means pixel format For audio that means channel sample they are references to shared objects When the negotiation mechanism computes the intersection of the formats supported at each end of a all references to both lists are replaced with a reference to the intersection And when a single format is eventually chosen for a link amongst the remaining all references to the list are updated That means that if a filter requires that its input and output have the same format amongst a supported all it has to do is use a reference to the same list of formats query_formats can leave some formats unset and return AVERROR(EAGAIN) to cause the negotiation mechanism toagain later.That can be used by filters with complex requirements to use the format negotiated on one link to set the formats supported on another.Frame references ownership and permissions
void ff_framequeue_init(FFFrameQueue *fq, FFFrameQueueGlobal *fqg)
Init a frame queue and attach it to a global structure.
Definition: framequeue.c:47
planar GBRA 4:4:4:4 64bpp, little-endian
Definition: pixfmt.h:217
void av_fifo_drain(AVFifoBuffer *f, int size)
Discard data from the FIFO.
Definition: fifo.c:233
Structure to hold global options and statistics for frame queues.
Definition: framequeue.h:46
AVPixelFormat
Pixel format.
Definition: pixfmt.h:64
int av_frame_copy_props(AVFrame *dst, const AVFrame *src)
Copy only "metadata" fields from src to dst.
Definition: frame.c:659
#define FFMAX3(a, b, c)
Definition: common.h:95
planar GBR 4:4:4 30bpp, little-endian
Definition: pixfmt.h:173
static int find_inliers(MotionVector *point_pairs, const int num_point_pairs, const double *model, float *err, double thresh)
#define AV_CEIL_RSHIFT(a, b)
Definition: common.h:58
unsigned long long read_buf_time