• R/O
  • HTTP
  • SSH
  • HTTPS

Commit

Tags
Aucun tag

Frequently used words (click to add to your profile)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

hardware/intel/intel-driver


Commit MetaInfo

Révision8d1651b4d130213af0350cab3b9ce1b1179d1c04 (tree)
l'heure2016-05-09 17:52:22
AuteurXiang, Haihao <haihao.xiang@inte...>
CommiterXiang, Haihao

Message de Log

Implement low power mode on SKL

VDEnc is one of the fixed function pipelines in VDBox which is a dedicated
engine for low power mode

Signed-off-by: Xiang, Haihao <haihao.xiang@intel.com>
Reviewed-By: Sean V Kelley <sean.v.kelley@intel.com>

Change Summary

Modification

--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -66,6 +66,7 @@ source_c = \
6666 gen9_mfc.c \
6767 gen9_mfc_hevc.c \
6868 gen9_mfd.c \
69+ gen9_vdenc.c \
6970 gen75_picture_process.c \
7071 gen75_vme.c \
7172 gen75_vpp_gpe.c \
@@ -110,6 +111,7 @@ source_h = \
110111 gen8_post_processing.h \
111112 gen9_mfd.h \
112113 gen9_mfc.h \
114+ gen9_vdenc.h \
113115 i965_avc_bsd.h \
114116 i965_avc_hw_scoreboard.h\
115117 i965_avc_ildb.h \
--- /dev/null
+++ b/src/gen9_vdenc.c
@@ -0,0 +1,3771 @@
1+/*
2+ * Copyright © 2015 Intel Corporation
3+ *
4+ * Permission is hereby granted, free of charge, to any person obtaining a
5+ * copy of this software and associated documentation files (the
6+ * "Software"), to deal in the Software without restriction, including
7+ * without limitation the rights to use, copy, modify, merge, publish,
8+ * distribute, sub license, and/or sell copies of the Software, and to
9+ * permit persons to whom the Software is furnished to do so, subject to
10+ * the following conditions:
11+ *
12+ * The above copyright notice and this permission notice (including the
13+ * next paragraph) shall be included in all copies or substantial portions
14+ * of the Software.
15+ *
16+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23+ *
24+ * Authors:
25+ * Xiang Haihao <haihao.xiang@intel.com>
26+ *
27+ */
28+
29+#include <stdio.h>
30+#include <stdlib.h>
31+#include <string.h>
32+#include <math.h>
33+#include <assert.h>
34+
35+#include "intel_batchbuffer.h"
36+#include "i965_defines.h"
37+#include "i965_structs.h"
38+#include "i965_drv_video.h"
39+#include "i965_encoder.h"
40+#include "i965_encoder_utils.h"
41+#include "intel_media.h"
42+#include "gen9_vdenc.h"
43+
44+static const uint8_t buf_rate_adj_tab_i_lowdelay[72] = {
45+ 0, 0, -8, -12, -16, -20, -28, -36,
46+ 0, 0, -4, -8, -12, -16, -24, -32,
47+ 4, 2, 0, -1, -3, -8, -16, -24,
48+ 8, 4, 2, 0, -1, -4, -8, -16,
49+ 20, 16, 4, 0, -1, -4, -8, -16,
50+ 24, 20, 16, 8, 4, 0, -4, -8,
51+ 28, 24, 20, 16, 8, 4, 0, -8,
52+ 32, 24, 20, 16, 8, 4, 0, -4,
53+ 64, 48, 28, 20, 16, 12, 8, 4,
54+};
55+
56+static const uint8_t buf_rate_adj_tab_p_lowdelay[72] = {
57+ -8, -24, -32, -40, -44, -48, -52, -80,
58+ -8, -16, -32, -40, -40, -44, -44, -56,
59+ 0, 0, -12, -20, -24, -28, -32, -36,
60+ 8, 4, 0, 0, -8, -16, -24, -32,
61+ 32, 16, 8, 4, -4, -8, -16, -20,
62+ 36, 24, 16, 8, 4, -2, -4, -8,
63+ 40, 36, 24, 20, 16, 8, 0, -8,
64+ 48, 40, 28, 24, 20, 12, 0, -4,
65+ 64, 48, 28, 20, 16, 12, 8, 4,
66+};
67+
68+static const uint8_t buf_rate_adj_tab_b_lowdelay[72] = {
69+ 0, -4, -8, -16, -24, -32, -40, -48,
70+ 1, 0, -4, -8, -16, -24, -32, -40,
71+ 4, 2, 0, -1, -3, -8, -16, -24,
72+ 8, 4, 2, 0, -1, -4, -8, -16,
73+ 20, 16, 4, 0, -1, -4, -8, -16,
74+ 24, 20, 16, 8, 4, 0, -4, -8,
75+ 28, 24, 20, 16, 8, 4, 0, -8,
76+ 32, 24, 20, 16, 8, 4, 0, -4,
77+ 64, 48, 28, 20, 16, 12, 8, 4,
78+};
79+
80+static const int8_t dist_qp_adj_tab_i_vbr[81] = {
81+ +0, 0, 0, 0, 0, 3, 4, 6, 8,
82+ +0, 0, 0, 0, 0, 2, 3, 5, 7,
83+ -1, 0, 0, 0, 0, 2, 2, 4, 5,
84+ -1, -1, 0, 0, 0, 1, 2, 2, 4,
85+ -2, -2, -1, 0, 0, 0, 1, 2, 4,
86+ -2, -2, -1, 0, 0, 0, 1, 2, 4,
87+ -3, -2, -1, -1, 0, 0, 1, 2, 5,
88+ -3, -2, -1, -1, 0, 0, 2, 4, 7,
89+ -4, -3, -2, -1, 0, 1, 3, 5, 8,
90+};
91+
92+static const int8_t dist_qp_adj_tab_p_vbr[81] = {
93+ -1, 0, 0, 0, 0, 1, 1, 2, 3,
94+ -1, -1, 0, 0, 0, 1, 1, 2, 3,
95+ -2, -1, -1, 0, 0, 1, 1, 2, 3,
96+ -3, -2, -2, -1, 0, 0, 1, 2, 3,
97+ -3, -2, -1, -1, 0, 0, 1, 2, 3,
98+ -3, -2, -1, -1, 0, 0, 1, 2, 3,
99+ -3, -2, -1, -1, 0, 0, 1, 2, 3,
100+ -3, -2, -1, -1, 0, 0, 1, 2, 3,
101+ -3, -2, -1, -1, 0, 0, 1, 2, 3,
102+};
103+
104+static const int8_t dist_qp_adj_tab_b_vbr[81] = {
105+ +0, 0, 0, 0, 0, 2, 3, 3, 4,
106+ +0, 0, 0, 0, 0, 2, 3, 3, 4,
107+ -1, 0, 0, 0, 0, 2, 2, 3, 3,
108+ -1, -1, 0, 0, 0, 1, 2, 2, 2,
109+ -1, -1, -1, 0, 0, 0, 1, 2, 2,
110+ -2, -1, -1, 0, 0, 0, 0, 1, 2,
111+ -2, -1, -1, -1, 0, 0, 0, 1, 3,
112+ -2, -2, -1, -1, 0, 0, 1, 1, 3,
113+ -2, -2, -1, -1, 0, 1, 1, 2, 4,
114+};
115+
116+static const int8_t buf_rate_adj_tab_i_vbr[72] = {
117+ -4, -20, -28, -36, -40, -44, -48, -80,
118+ +0, -8, -12, -20, -24, -28, -32, -36,
119+ +0, 0, -8, -16, -20, -24, -28, -32,
120+ +8, 4, 0, 0, -8, -16, -24, -28,
121+ 32, 24, 16, 2, -4, -8, -16, -20,
122+ 36, 32, 28, 16, 8, 0, -4, -8,
123+ 40, 36, 24, 20, 16, 8, 0, -8,
124+ 48, 40, 28, 24, 20, 12, 0, -4,
125+ 64, 48, 28, 20, 16, 12, 8, 4,
126+};
127+
128+static const int8_t buf_rate_adj_tab_p_vbr[72] = {
129+ -8, -24, -32, -44, -48, -56, -64, -80,
130+ -8, -16, -32, -40, -44, -52, -56, -64,
131+ +0, 0, -16, -28, -36, -40, -44, -48,
132+ +8, 4, 0, 0, -8, -16, -24, -36,
133+ 20, 12, 4, 0, -8, -8, -8, -16,
134+ 24, 16, 8, 8, 8, 0, -4, -8,
135+ 40, 36, 24, 20, 16, 8, 0, -8,
136+ 48, 40, 28, 24, 20, 12, 0, -4,
137+ 64, 48, 28, 20, 16, 12, 8, 4,
138+};
139+
140+static const int8_t buf_rate_adj_tab_b_vbr[72] = {
141+ 0, -4, -8, -16, -24, -32, -40, -48,
142+ 1, 0, -4, -8, -16, -24, -32, -40,
143+ 4, 2, 0, -1, -3, -8, -16, -24,
144+ 8, 4, 2, 0, -1, -4, -8, -16,
145+ 20, 16, 4, 0, -1, -4, -8, -16,
146+ 24, 20, 16, 8, 4, 0, -4, -8,
147+ 28, 24, 20, 16, 8, 4, 0, -8,
148+ 32, 24, 20, 16, 8, 4, 0, -4,
149+ 64, 48, 28, 20, 16, 12, 8, 4,
150+};
151+
152+static struct huc_brc_update_constant_data
153+gen9_brc_update_constant_data = {
154+ .global_rate_qp_adj_tab_i = {
155+ 48, 40, 32, 24, 16, 8, 0, -8,
156+ 40, 32, 24, 16, 8, 0, -8, -16,
157+ 32, 24, 16, 8, 0, -8, -16, -24,
158+ 24, 16, 8, 0, -8, -16, -24, -32,
159+ 16, 8, 0, -8, -16, -24, -32, -40,
160+ 8, 0, -8, -16, -24, -32, -40, -48,
161+ 0, -8, -16, -24, -32, -40, -48, -56,
162+ 48, 40, 32, 24, 16, 8, 0, -8,
163+ },
164+
165+ .global_rate_qp_adj_tab_p = {
166+ 48, 40, 32, 24, 16, 8, 0, -8,
167+ 40, 32, 24, 16, 8, 0, -8, -16,
168+ 16, 8, 8, 4, -8, -16, -16, -24,
169+ 8, 0, 0, -8, -16, -16, -16, -24,
170+ 8, 0, 0, -24, -32, -32, -32, -48,
171+ 0, -16, -16, -24, -32, -48, -56, -64,
172+ -8, -16, -32, -32, -48, -48, -56, -64,
173+ -16,-32, -48, -48, -48, -56, -64, -80,
174+ },
175+
176+ .global_rate_qp_adj_tab_b = {
177+ 48, 40, 32, 24, 16, 8, 0, -8,
178+ 40, 32, 24, 16, 8, 0, -8, -16,
179+ 32, 24, 16, 8, 0, -8, -16, -24,
180+ 24, 16, 8, 0, -8, -8, -16, -24,
181+ 16, 8, 0, 0, -8, -16, -24, -32,
182+ 16, 8, 0, 0, -8, -16, -24, -32,
183+ 0, -8, -8, -16, -32, -48, -56, -64,
184+ 0, -8, -8, -16, -32, -48, -56, -64
185+ },
186+
187+ .dist_threshld_i = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
188+ .dist_threshld_p = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
189+ .dist_threshld_b = { 2, 4, 8, 12, 19, 32, 64, 128, 0, 0 },
190+
191+ .dist_qp_adj_tab_i = {
192+ 0, 0, 0, 0, 0, 3, 4, 6, 8,
193+ 0, 0, 0, 0, 0, 2, 3, 5, 7,
194+ -1, 0, 0, 0, 0, 2, 2, 4, 5,
195+ -1, -1, 0, 0, 0, 1, 2, 2, 4,
196+ -2, -2, -1, 0, 0, 0, 1, 2, 4,
197+ -2, -2, -1, 0, 0, 0, 1, 2, 4,
198+ -3, -2, -1, -1, 0, 0, 1, 2, 5,
199+ -3, -2, -1, -1, 0, 0, 2, 4, 7,
200+ -4, -3, -2, -1, 0, 1, 3, 5, 8,
201+ },
202+
203+ .dist_qp_adj_tab_p = {
204+ -1, 0, 0, 0, 0, 1, 1, 2, 3,
205+ -1, -1, 0, 0, 0, 1, 1, 2, 3,
206+ -2, -1, -1, 0, 0, 1, 1, 2, 3,
207+ -3, -2, -2, -1, 0, 0, 1, 2, 3,
208+ -3, -2, -1, -1, 0, 0, 1, 2, 3,
209+ -3, -2, -1, -1, 0, 0, 1, 2, 3,
210+ -3, -2, -1, -1, 0, 0, 1, 2, 3,
211+ -3, -2, -1, -1, 0, 0, 1, 2, 3,
212+ -3, -2, -1, -1, 0, 0, 1, 2, 3,
213+ },
214+
215+ .dist_qp_adj_tab_b = {
216+ 0, 0, 0, 0, 0, 2, 3, 3, 4,
217+ 0, 0, 0, 0, 0, 2, 3, 3, 4,
218+ -1, 0, 0, 0, 0, 2, 2, 3, 3,
219+ -1, -1, 0, 0, 0, 1, 2, 2, 2,
220+ -1, -1, -1, 0, 0, 0, 1, 2, 2,
221+ -2, -1, -1, 0, 0, 0, 0, 1, 2,
222+ -2, -1, -1, -1, 0, 0, 0, 1, 3,
223+ -2, -2, -1, -1, 0, 0, 1, 1, 3,
224+ -2, -2, -1, -1, 0, 1, 1, 2, 4,
225+ },
226+
227+ /* default table for non lowdelay */
228+ .buf_rate_adj_tab_i = {
229+ -4, -20, -28, -36, -40, -44, -48, -80,
230+ 0, -8, -12, -20, -24, -28, -32, -36,
231+ 0, 0, -8, -16, -20, -24, -28, -32,
232+ 8, 4, 0, 0, -8, -16, -24, -28,
233+ 32, 24, 16, 2, -4, -8, -16, -20,
234+ 36, 32, 28, 16, 8, 0, -4, -8,
235+ 40, 36, 24, 20, 16, 8, 0, -8,
236+ 48, 40, 28, 24, 20, 12, 0, -4,
237+ 64, 48, 28, 20, 16, 12, 8, 4,
238+ },
239+
240+ /* default table for non lowdelay */
241+ .buf_rate_adj_tab_p = {
242+ -8, -24, -32, -44, -48, -56, -64, -80,
243+ -8, -16, -32, -40, -44, -52, -56, -64,
244+ 0, 0, -16, -28, -36, -40, -44, -48,
245+ 8, 4, 0, 0, -8, -16, -24, -36,
246+ 20, 12, 4, 0, -8, -8, -8, -16,
247+ 24, 16, 8, 8, 8, 0, -4, -8,
248+ 40, 36, 24, 20, 16, 8, 0, -8,
249+ 48, 40, 28, 24, 20, 12, 0, -4,
250+ 64, 48, 28, 20, 16, 12, 8, 4,
251+ },
252+
253+ /* default table for non lowdelay */
254+ .buf_rate_adj_tab_b = {
255+ 0, -4, -8, -16, -24, -32, -40, -48,
256+ 1, 0, -4, -8, -16, -24, -32, -40,
257+ 4, 2, 0, -1, -3, -8, -16, -24,
258+ 8, 4, 2, 0, -1, -4, -8, -16,
259+ 20, 16, 4, 0, -1, -4, -8, -16,
260+ 24, 20, 16, 8, 4, 0, -4, -8,
261+ 28, 24, 20, 16, 8, 4, 0, -8,
262+ 32, 24, 20, 16, 8, 4, 0, -4,
263+ 64, 48, 28, 20, 16, 12, 8, 4,
264+ },
265+
266+ .frame_size_min_tab_p = { 1, 2, 4, 6, 8, 10, 16, 16, 16 },
267+ .frame_size_min_tab_i = { 1, 2, 4, 8, 16, 20, 24, 32, 36 },
268+
269+ .frame_size_max_tab_p = { 48, 64, 80, 96, 112, 128, 144, 160, 160 },
270+ .frame_size_max_tab_i = { 48, 64, 80, 96, 112, 128, 144, 160, 160 },
271+
272+ .frame_size_scg_tab_p = { 4, 8, 12, 16, 20, 24, 24, 0, 0 },
273+ .frame_size_scg_tab_i = { 4, 8, 12, 16, 20, 24, 24, 0, 0 },
274+
275+ .i_intra_non_pred = {
276+ 0x0e, 0x0e, 0x0e, 0x18, 0x19, 0x1b, 0x1c, 0x0d, 0x0f, 0x18, 0x19, 0x0d, 0x0f, 0x0f,
277+ 0x0c, 0x0e, 0x0c, 0x0c, 0x0a, 0x0a, 0x0b, 0x0a, 0x0a, 0x0a, 0x09, 0x09, 0x08, 0x08,
278+ 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x07, 0x07, 0x07, 0x07, 0x07,
279+ },
280+
281+ .i_intra_16x16 = {
282+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
283+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
284+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
285+ },
286+
287+ .i_intra_8x8 = {
288+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
289+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x04, 0x04, 0x04, 0x04, 0x06, 0x06, 0x06,
290+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07,
291+ },
292+
293+ .i_intra_4x4 = {
294+ 0x2e, 0x2e, 0x2e, 0x38, 0x39, 0x3a, 0x3b, 0x2c, 0x2e, 0x38, 0x39, 0x2d, 0x2f, 0x38,
295+ 0x2e, 0x38, 0x2e, 0x38, 0x2f, 0x2e, 0x38, 0x38, 0x38, 0x38, 0x2f, 0x2f, 0x2f, 0x2e,
296+ 0x2d, 0x2c, 0x2b, 0x2a, 0x29, 0x28, 0x1e, 0x1c, 0x1b, 0x1a, 0x19, 0x18, 0x0e, 0x0d,
297+ },
298+
299+ .i_intra_chroma = {
300+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
301+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
302+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
303+ },
304+
305+ .p_intra_non_pred = {
306+ 0x06, 0x06, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x05, 0x06, 0x07, 0x08, 0x06, 0x07, 0x07,
307+ 0x07, 0x07, 0x06, 0x07, 0x07, 0x06, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
308+ 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07, 0x07,
309+ },
310+
311+ .p_intra_16x16 = {
312+ 0x1b, 0x1b, 0x1b, 0x1c, 0x1e, 0x28, 0x29, 0x1a, 0x1b, 0x1c, 0x1e, 0x1a, 0x1c, 0x1d,
313+ 0x1b, 0x1c, 0x1c, 0x1c, 0x1c, 0x1b, 0x1c, 0x1c, 0x1d, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c,
314+ 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
315+ },
316+
317+ .p_intra_8x8 = {
318+ 0x1d, 0x1d, 0x1d, 0x1e, 0x28, 0x29, 0x2a, 0x1b, 0x1d, 0x1e, 0x28, 0x1c, 0x1d, 0x1f,
319+ 0x1d, 0x1e, 0x1d, 0x1e, 0x1d, 0x1d, 0x1f, 0x1e, 0x1e, 0x1e, 0x1d, 0x1e, 0x1e, 0x1d,
320+ 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e, 0x1e,
321+ },
322+
323+ .p_intra_4x4 = {
324+ 0x38, 0x38, 0x38, 0x39, 0x3a, 0x3b, 0x3d, 0x2e, 0x38, 0x39, 0x3a, 0x2f, 0x39, 0x3a,
325+ 0x38, 0x39, 0x38, 0x39, 0x39, 0x38, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
326+ 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39, 0x39,
327+ },
328+
329+ .p_intra_chroma = {
330+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
331+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
332+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
333+ },
334+
335+ .p_inter_16x8 = {
336+ 0x07, 0x07, 0x07, 0x08, 0x09, 0x0b, 0x0c, 0x06, 0x07, 0x09, 0x0a, 0x07, 0x08, 0x09,
337+ 0x08, 0x09, 0x08, 0x09, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x08, 0x08, 0x08, 0x08,
338+ 0x08, 0x08, 0x08, 0x08, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09, 0x09,
339+ },
340+
341+ .p_inter_8x8 = {
342+ 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x02, 0x02, 0x02, 0x03, 0x02, 0x02, 0x02,
343+ 0x02, 0x03, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
344+ 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03, 0x03,
345+ },
346+
347+ .p_inter_16x16 = {
348+ 0x05, 0x05, 0x05, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
349+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
350+ 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06, 0x06,
351+ },
352+
353+ .p_ref_id = {
354+ 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
355+ 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04,
356+ 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04, 0x04
357+ },
358+
359+ .hme_mv_cost = {
360+ /* mv = 0 */
361+ {
362+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
363+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
364+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
365+ },
366+
367+ /* mv <= 16 */
368+ {
369+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
370+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
371+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
372+ },
373+
374+ /* mv <= 32 */
375+ {
376+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
377+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
378+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01,
379+ },
380+
381+ /* mv <= 64 */
382+ {
383+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
384+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
385+ 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05, 0x05,
386+ },
387+
388+ /* mv <= 128 */
389+ {
390+ 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
391+ 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
392+ 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
393+ },
394+
395+ /* mv <= 256 */
396+ {
397+ 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
398+ 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x0a,
399+ 0x0a, 0x0a, 0x0a, 0x0a, 0x0a, 0x1a, 0x1f, 0x2a, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d, 0x2d
400+ },
401+
402+ /* mv <= 512 */
403+ {
404+ 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
405+ 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
406+ 0x1a, 0x1a, 0x1a, 0x1a, 0x2a, 0x2f, 0x3a, 0x3d, 0x3d, 0x3d, 0x3d, 0x3d, 0x3d, 0x3d,
407+ },
408+
409+ /* mv <= 1024 */
410+ {
411+ 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
412+ 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a,
413+ 0x1a, 0x1a, 0x1a, 0x1f, 0x2d, 0x3d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d, 0x4d,
414+ },
415+ },
416+};
417+
418+/* 11 DWs */
419+static uint8_t vdenc_const_qp_lambda[44] = {
420+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
421+ 0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
422+ 0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
423+ 0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
424+ 0x4a, 0x53, 0x00, 0x00
425+};
426+
427+/* 14 DWs */
428+static uint16_t vdenc_const_skip_threshold[28] = {
429+
430+};
431+
432+/* 14 DWs */
433+static uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0[28] = {
434+
435+};
436+
437+/* 7 DWs */
438+static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1[28] = {
439+
440+};
441+
442+/* 7 DWs */
443+static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2[28] = {
444+
445+};
446+
447+/* 7 DWs */
448+static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3[28] = {
449+
450+};
451+
452+/* P frame */
453+/* 11 DWs */
454+static uint8_t vdenc_const_qp_lambda_p[44] = {
455+ 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02,
456+ 0x02, 0x03, 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x07,
457+ 0x07, 0x08, 0x09, 0x0a, 0x0c, 0x0d, 0x0f, 0x11, 0x13, 0x15,
458+ 0x17, 0x1a, 0x1e, 0x21, 0x25, 0x2a, 0x2f, 0x35, 0x3b, 0x42,
459+ 0x4a, 0x53, 0x00, 0x00
460+};
461+
462+/* 14 DWs */
463+static uint16_t vdenc_const_skip_threshold_p[28] = {
464+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0002, 0x0004, 0x0007, 0x000b,
465+ 0x0011, 0x0019, 0x0023, 0x0032, 0x0044, 0x005b, 0x0077, 0x0099,
466+ 0x00c2, 0x00f1, 0x0128, 0x0168, 0x01b0, 0x0201, 0x025c, 0x02c2,
467+ 0x0333, 0x03b0, 0x0000, 0x0000
468+};
469+
470+/* 14 DWs */
471+static uint16_t vdenc_const_sic_forward_transform_coeff_threshold_0_p[28] = {
472+ 0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
473+ 0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x3f,
474+ 0x4e, 0x51, 0x5b, 0x63, 0x6f, 0x7f, 0x00, 0x00
475+};
476+
477+/* 7 DWs */
478+static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_1_p[28] = {
479+ 0x03, 0x04, 0x05, 0x05, 0x07, 0x09, 0x0b, 0x0e, 0x12, 0x17,
480+ 0x1c, 0x21, 0x27, 0x2c, 0x33, 0x3b, 0x41, 0x51, 0x5c, 0x1a,
481+ 0x1e, 0x21, 0x22, 0x26, 0x2c, 0x30, 0x00, 0x00
482+};
483+
484+/* 7 DWs */
485+static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_2_p[28] = {
486+ 0x02, 0x02, 0x03, 0x04, 0x04, 0x05, 0x07, 0x09, 0x0b, 0x0e,
487+ 0x12, 0x14, 0x18, 0x1d, 0x20, 0x25, 0x2a, 0x34, 0x39, 0x0f,
488+ 0x13, 0x14, 0x16, 0x18, 0x1b, 0x1f, 0x00, 0x00
489+};
490+
491+/* 7 DWs */
492+static uint8_t vdenc_const_sic_forward_transform_coeff_threshold_3_p[28] = {
493+ 0x04, 0x05, 0x06, 0x09, 0x0b, 0x0d, 0x12, 0x16, 0x1b, 0x23,
494+ 0x2c, 0x33, 0x3d, 0x45, 0x4f, 0x5b, 0x66, 0x7f, 0x8e, 0x2a,
495+ 0x2f, 0x32, 0x37, 0x3c, 0x45, 0x4c, 0x00, 0x00
496+};
497+
498+static const double
499+vdenc_brc_dev_threshi0_fp_neg[4] = { 0.80, 0.60, 0.34, 0.2 };
500+
501+static const double
502+vdenc_brc_dev_threshi0_fp_pos[4] = { 0.2, 0.4, 0.66, 0.9 };
503+
504+static const double
505+vdenc_brc_dev_threshpb0_fp_neg[4] = { 0.90, 0.66, 0.46, 0.3 };
506+
507+static const double
508+vdenc_brc_dev_threshpb0_fp_pos[4] = { 0.3, 0.46, 0.70, 0.90 };
509+
510+static const double
511+vdenc_brc_dev_threshvbr0_neg[4] = { 0.90, 0.70, 0.50, 0.3 };
512+
513+static const double
514+vdenc_brc_dev_threshvbr0_pos[4] = { 0.4, 0.5, 0.75, 0.90 };
515+
516+static const unsigned char
517+vdenc_brc_estrate_thresh_p0[7] = { 4, 8, 12, 16, 20, 24, 28 };
518+
519+static const unsigned char
520+vdenc_brc_estrate_thresh_i0[7] = { 4, 8, 12, 16, 20, 24, 28 };
521+
522+static const uint16_t
523+vdenc_brc_start_global_adjust_frame[4] = { 10, 50, 100, 150 };
524+
525+static const uint8_t
526+vdenc_brc_global_rate_ratio_threshold[7] = { 80, 90, 95, 101, 105, 115, 130};
527+
528+static const uint8_t
529+vdenc_brc_start_global_adjust_mult[5] = { 1, 1, 3, 2, 1 };
530+
531+static const uint8_t
532+vdenc_brc_start_global_adjust_div[5] = { 40, 5, 5, 3, 1 };
533+
534+static const int8_t
535+vdenc_brc_global_rate_ratio_threshold_qp[8] = { -3, -2, -1, 0, 1, 1, 2, 3 };
536+
537+const int vdenc_mode_const[2][12][52] = {
538+ //INTRASLICE
539+ {
540+ //LUTMODE_INTRA_NONPRED
541+ {
542+ 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, //QP=[0 ~12]
543+ 16, 18, 22, 24, 13, 15, 16, 18, 13, 15, 15, 12, 14, //QP=[13~25]
544+ 12, 12, 10, 10, 11, 10, 10, 10, 9, 9, 8, 8, 8, //QP=[26~38]
545+ 8, 8, 8, 8, 8, 8, 8, 8, 7, 7, 7, 7, 7, //QP=[39~51]
546+ },
547+
548+ //LUTMODE_INTRA_16x16, LUTMODE_INTRA
549+ {
550+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
551+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13~25]
552+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26~38]
553+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39~51]
554+ },
555+
556+ //LUTMODE_INTRA_8x8
557+ {
558+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
559+ 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, //QP=[13~25]
560+ 1, 1, 1, 1, 1, 4, 4, 4, 4, 6, 6, 6, 6, //QP=[26~38]
561+ 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, //QP=[39~51]
562+ },
563+
564+ //LUTMODE_INTRA_4x4
565+ {
566+ 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, 56, //QP=[0 ~12]
567+ 64, 72, 80, 88, 48, 56, 64, 72, 53, 59, 64, 56, 64, //QP=[13~25]
568+ 57, 64, 58, 55, 64, 64, 64, 64, 59, 59, 60, 57, 50, //QP=[26~38]
569+ 46, 42, 38, 34, 31, 27, 23, 22, 19, 18, 16, 14, 13, //QP=[39~51]
570+ },
571+
572+ //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
573+ { 0, },
574+
575+ //LUTMODE_INTER_8X8Q
576+ { 0, },
577+
578+ //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16x8_FIELD
579+ { 0, },
580+
581+ //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8X8_FIELD
582+ { 0, },
583+
584+ //LUTMODE_INTER_16x16, LUTMODE_INTER
585+ { 0, },
586+
587+ //LUTMODE_INTER_BWD
588+ { 0, },
589+
590+ //LUTMODE_REF_ID
591+ { 0, },
592+
593+ //LUTMODE_INTRA_CHROMA
594+ { 0, },
595+ },
596+
597+ //PREDSLICE
598+ {
599+ //LUTMODE_INTRA_NONPRED
600+ {
601+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[0 ~12]
602+ 7, 8, 9, 10, 5, 6, 7, 8, 6, 7, 7, 7, 7, //QP=[13~25]
603+ 6, 7, 7, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[26~38]
604+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[39~51]
605+ },
606+
607+ //LUTMODE_INTRA_16x16, LUTMODE_INTRA
608+ {
609+ 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21,
610+ 24, 28, 31, 35, 19, 21, 24, 28, 20, 24, 25, 21, 24,
611+ 24, 24, 24, 21, 24, 24, 26, 24, 24, 24, 24, 24, 24,
612+ 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
613+
614+ },
615+
616+ //LUTMODE_INTRA_8x8
617+ {
618+ 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, //QP=[0 ~12]
619+ 28, 32, 36, 40, 22, 26, 28, 32, 24, 26, 30, 26, 28, //QP=[13~25]
620+ 26, 28, 26, 26, 30, 28, 28, 28, 26, 28, 28, 26, 28, //QP=[26~38]
621+ 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, 28, //QP=[39~51]
622+ },
623+
624+ //LUTMODE_INTRA_4x4
625+ {
626+ 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, //QP=[0 ~12]
627+ 72, 80, 88, 104, 56, 64, 72, 80, 58, 68, 76, 64, 68, //QP=[13~25]
628+ 64, 68, 68, 64, 70, 70, 70, 70, 68, 68, 68, 68, 68, //QP=[26~38]
629+ 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, 68, //QP=[39~51]
630+ },
631+
632+ //LUTMODE_INTER_16x8, LUTMODE_INTER_8x16
633+ {
634+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[0 ~12]
635+ 8, 9, 11, 12, 6, 7, 9, 10, 7, 8, 9, 8, 9, //QP=[13~25]
636+ 8, 9, 8, 8, 9, 9, 9, 9, 8, 8, 8, 8, 8, //QP=[26~38]
637+ 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, //QP=[39~51]
638+ },
639+
640+ //LUTMODE_INTER_8X8Q
641+ {
642+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, //QP=[0 ~12]
643+ 2, 3, 3, 3, 2, 2, 2, 3, 2, 2, 2, 2, 3, //QP=[13~25]
644+ 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, //QP=[26~38]
645+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, //QP=[39~51]
646+ },
647+
648+ //LUTMODE_INTER_8X4Q, LUTMODE_INTER_4X8Q, LUTMODE_INTER_16X8_FIELD
649+ {
650+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[0 ~12]
651+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[13~25]
652+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[26~38]
653+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[39~51]
654+ },
655+
656+ //LUTMODE_INTER_4X4Q, LUTMODE_INTER_8x8_FIELD
657+ {
658+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[0 ~12]
659+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[13~25]
660+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[26~38]
661+ 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, //QP=[39~51]
662+ },
663+
664+ //LUTMODE_INTER_16x16, LUTMODE_INTER
665+ {
666+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[0 ~12]
667+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[13~25]
668+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[26~38]
669+ 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, //QP=[39~51]
670+ },
671+
672+ //LUTMODE_INTER_BWD
673+ {
674+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
675+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13~25]
676+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26~38]
677+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39~51]
678+ },
679+
680+ //LUTMODE_REF_ID
681+ {
682+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[0 ~12]
683+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[13~25]
684+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[26~38]
685+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, //QP=[39~51]
686+ },
687+
688+ //LUTMODE_INTRA_CHROMA
689+ {
690+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
691+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13~25]
692+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26~38]
693+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39~51]
694+ },
695+ },
696+};
697+
698+const int vdenc_mv_cost_skipbias_qpel[8] = {
699+ //PREDSLICE
700+ 0, 6, 6, 9, 10, 13, 14, 16
701+};
702+
703+const int vdenc_hme_cost[8][52] = {
704+ //mv=0
705+ {
706+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
707+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13 ~25]
708+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26 ~38]
709+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39 ~51]
710+ },
711+ //mv<=16
712+ {
713+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[0 ~12]
714+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[13 ~25]
715+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[26 ~38]
716+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, //QP=[39 ~51]
717+ },
718+ //mv<=32
719+ {
720+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[0 ~12]
721+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[13 ~25]
722+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[26 ~38]
723+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, //QP=[39 ~51]
724+ },
725+ //mv<=64
726+ {
727+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[0 ~12]
728+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[13 ~25]
729+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[26 ~38]
730+ 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, //QP=[39 ~51]
731+ },
732+ //mv<=128
733+ {
734+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[0 ~12]
735+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[13 ~25]
736+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[26 ~38]
737+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[39 ~51]
738+ },
739+ //mv<=256
740+ {
741+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[0 ~12]
742+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[13 ~25]
743+ 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, //QP=[26 ~38]
744+ 10, 10, 10, 10, 20, 30, 40, 50, 50, 50, 50, 50, 50, //QP=[39 ~51]
745+ },
746+ //mv<=512
747+ {
748+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[0 ~12]
749+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[13 ~25]
750+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[26 ~38]
751+ 20, 20, 20, 40, 60, 80, 100, 100, 100, 100, 100, 100, 100, //QP=[39 ~51]
752+ },
753+
754+ //mv<=1024
755+ {
756+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[0 ~12]
757+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[13 ~25]
758+ 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, //QP=[26 ~38]
759+ 20, 20, 30, 50, 100, 200, 200, 200, 200, 200, 200, 200, 200, //QP=[39 ~51]
760+ },
761+};
762+
763+#define OUT_BUFFER_2DW(batch, bo, is_target, delta) do { \
764+ if (bo) { \
765+ OUT_BCS_RELOC64(batch, \
766+ bo, \
767+ I915_GEM_DOMAIN_RENDER, \
768+ is_target ? I915_GEM_DOMAIN_RENDER : 0, \
769+ delta); \
770+ } else { \
771+ OUT_BCS_BATCH(batch, 0); \
772+ OUT_BCS_BATCH(batch, 0); \
773+ } \
774+ } while (0)
775+
776+#define OUT_BUFFER_3DW(batch, bo, is_target, delta, attr) do { \
777+ OUT_BUFFER_2DW(batch, bo, is_target, delta); \
778+ OUT_BCS_BATCH(batch, attr); \
779+ } while (0)
780+
781+#define ALLOC_VDENC_BUFFER_RESOURCE(buffer, bfsize, des) { \
782+ buffer.type = I965_GPE_RESOURCE_BUFFER; \
783+ buffer.width = bfsize; \
784+ buffer.height = 1; \
785+ buffer.pitch = buffer.width; \
786+ buffer.size = buffer.pitch; \
787+ buffer.tiling = I915_TILING_NONE; \
788+ i965_allocate_gpe_resource(i965->intel.bufmgr, \
789+ &buffer, \
790+ (des)); \
791+ } while (0)
792+
793+static int
794+gen9_vdenc_get_max_vmv_range(int level)
795+{
796+ int max_vmv_range = 512;
797+
798+ if (level == 10)
799+ max_vmv_range = 256;
800+ else if (level <= 20)
801+ max_vmv_range = 512;
802+ else if (level <= 30)
803+ max_vmv_range = 1024;
804+ else
805+ max_vmv_range = 2048;
806+
807+ return max_vmv_range;
808+}
809+
810+static unsigned char
811+map_44_lut_value(unsigned int v, unsigned char max)
812+{
813+ unsigned int maxcost;
814+ int d;
815+ unsigned char ret;
816+
817+ if (v == 0) {
818+ return 0;
819+ }
820+
821+ maxcost = ((max & 15) << (max >> 4));
822+
823+ if (v >= maxcost) {
824+ return max;
825+ }
826+
827+ d = (int)(log((double)v) / log(2.0)) - 3;
828+
829+ if (d < 0) {
830+ d = 0;
831+ }
832+
833+ ret = (unsigned char)((d << 4) + (int)((v + (d == 0 ? 0 : (1 << (d - 1)))) >> d));
834+ ret = (ret & 0xf) == 0 ? (ret | 8) : ret;
835+
836+ return ret;
837+}
838+
839+static void
840+gen9_vdenc_update_rate_control_parameters(VADriverContextP ctx,
841+ struct intel_encoder_context *encoder_context,
842+ VAEncMiscParameterRateControl *misc)
843+{
844+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
845+
846+ vdenc_context->max_bit_rate = ALIGN(misc->bits_per_second, 1000) / 1000;
847+ vdenc_context->mb_brc_enabled = 0;
848+
849+ if (vdenc_context->internal_rate_mode == I965_BRC_CBR) {
850+ vdenc_context->min_bit_rate = vdenc_context->max_bit_rate;
851+ vdenc_context->mb_brc_enabled = (misc->rc_flags.bits.mb_rate_control < 2);
852+
853+ if (vdenc_context->target_bit_rate != vdenc_context->max_bit_rate) {
854+ vdenc_context->target_bit_rate = vdenc_context->max_bit_rate;
855+ vdenc_context->brc_need_reset = 1;
856+ }
857+ } else if (vdenc_context->internal_rate_mode == I965_BRC_VBR) {
858+ vdenc_context->min_bit_rate = vdenc_context->max_bit_rate * (2 * misc->target_percentage - 100) / 100;
859+ vdenc_context->mb_brc_enabled = (misc->rc_flags.bits.mb_rate_control < 2);
860+
861+ if (vdenc_context->target_bit_rate != vdenc_context->max_bit_rate * misc->target_percentage / 100) {
862+ vdenc_context->target_bit_rate = vdenc_context->max_bit_rate * misc->target_percentage / 100;
863+ vdenc_context->brc_need_reset = 1;
864+ }
865+ }
866+}
867+
868+static void
869+gen9_vdenc_update_hrd_parameters(VADriverContextP ctx,
870+ struct intel_encoder_context *encoder_context,
871+ VAEncMiscParameterHRD *misc)
872+{
873+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
874+
875+ if (vdenc_context->internal_rate_mode == I965_BRC_CQP)
876+ return;
877+
878+ vdenc_context->vbv_buffer_size_in_bit = misc->buffer_size;
879+ vdenc_context->init_vbv_buffer_fullness_in_bit = misc->initial_buffer_fullness;
880+}
881+
882+static void
883+gen9_vdenc_update_framerate_parameters(VADriverContextP ctx,
884+ struct intel_encoder_context *encoder_context,
885+ VAEncMiscParameterFrameRate *misc)
886+{
887+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
888+
889+ vdenc_context->frames_per_100s = misc->framerate; /* misc->framerate is multiple of 100 */
890+}
891+
892+static void
893+gen9_vdenc_update_roi_parameters(VADriverContextP ctx,
894+ struct intel_encoder_context *encoder_context,
895+ VAEncMiscParameterBufferROI *misc)
896+{
897+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
898+ int i;
899+
900+ if (!misc || !misc->roi) {
901+ vdenc_context->num_roi = 0;
902+ return;
903+ }
904+
905+ vdenc_context->num_roi = MIN(misc->num_roi, 3);
906+ vdenc_context->max_delta_qp = misc->max_delta_qp;
907+ vdenc_context->min_delta_qp = misc->min_delta_qp;
908+ vdenc_context->vdenc_streamin_enable = (vdenc_context->num_roi == 0);
909+
910+ for (i = 0; i < vdenc_context->num_roi; i++) {
911+ vdenc_context->roi[i].left = misc->roi->roi_rectangle.x;
912+ vdenc_context->roi[i].right = vdenc_context->roi[i].left + misc->roi->roi_rectangle.width;
913+ vdenc_context->roi[i].top = misc->roi->roi_rectangle.y;
914+ vdenc_context->roi[i].bottom = vdenc_context->roi[i].top + misc->roi->roi_rectangle.height;
915+ vdenc_context->roi[i].value = misc->roi->roi_value;
916+
917+ vdenc_context->roi[i].left /= 16;
918+ vdenc_context->roi[i].right /= 16;
919+ vdenc_context->roi[i].top /= 16;
920+ vdenc_context->roi[i].bottom /= 16;
921+ }
922+}
923+
924+static void
925+gen9_vdenc_update_misc_parameters(VADriverContextP ctx,
926+ struct encode_state *encode_state,
927+ struct intel_encoder_context *encoder_context)
928+{
929+ int i;
930+ VAEncMiscParameterBuffer *misc_param;
931+
932+ for (i = 0; i < ARRAY_ELEMS(encode_state->misc_param); i++) {
933+ if (!encode_state->misc_param[i] || !encode_state->misc_param[i]->buffer)
934+ continue;
935+
936+ misc_param = (VAEncMiscParameterBuffer *)encode_state->misc_param[i]->buffer;
937+
938+ switch (misc_param->type) {
939+ case VAEncMiscParameterTypeFrameRate:
940+ gen9_vdenc_update_framerate_parameters(ctx,
941+ encoder_context,
942+ (VAEncMiscParameterFrameRate *)misc_param->data);
943+ break;
944+
945+ case VAEncMiscParameterTypeRateControl:
946+ gen9_vdenc_update_rate_control_parameters(ctx,
947+ encoder_context,
948+ (VAEncMiscParameterRateControl *)misc_param->data);
949+ break;
950+
951+ case VAEncMiscParameterTypeHRD:
952+ gen9_vdenc_update_hrd_parameters(ctx,
953+ encoder_context,
954+ (VAEncMiscParameterHRD *)misc_param->data);
955+ break;
956+
957+ case VAEncMiscParameterTypeROI:
958+ gen9_vdenc_update_roi_parameters(ctx,
959+ encoder_context,
960+ (VAEncMiscParameterBufferROI *)misc_param->data);
961+ break;
962+
963+ default:
964+ break;
965+ }
966+ }
967+}
968+
969+static void
970+gen9_vdenc_update_parameters(VADriverContextP ctx,
971+ VAProfile profile,
972+ struct encode_state *encode_state,
973+ struct intel_encoder_context *encoder_context)
974+{
975+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
976+ VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
977+ VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
978+
979+ if (profile == VAProfileH264High)
980+ vdenc_context->transform_8x8_mode_enable = !!pic_param->pic_fields.bits.transform_8x8_mode_flag;
981+ else
982+ vdenc_context->transform_8x8_mode_enable = 0;
983+
984+ vdenc_context->frame_width_in_mbs = seq_param->picture_width_in_mbs;
985+ vdenc_context->frame_height_in_mbs = seq_param->picture_height_in_mbs;
986+
987+ vdenc_context->frame_width = vdenc_context->frame_width_in_mbs * 16;
988+ vdenc_context->frame_height = vdenc_context->frame_height_in_mbs * 16;
989+
990+ vdenc_context->down_scaled_width_in_mb4x = WIDTH_IN_MACROBLOCKS(vdenc_context->frame_width / SCALE_FACTOR_4X);
991+ vdenc_context->down_scaled_height_in_mb4x = HEIGHT_IN_MACROBLOCKS(vdenc_context->frame_height / SCALE_FACTOR_4X);
992+ vdenc_context->down_scaled_width_4x = vdenc_context->down_scaled_width_in_mb4x * 16;
993+ vdenc_context->down_scaled_height_4x = ((vdenc_context->down_scaled_height_in_mb4x + 1) >> 1) * 16;
994+ vdenc_context->down_scaled_height_4x = ALIGN(vdenc_context->down_scaled_height_4x, 32) << 1;
995+
996+ if (vdenc_context->internal_rate_mode == I965_BRC_CBR) {
997+ vdenc_context->target_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
998+ vdenc_context->max_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
999+ vdenc_context->min_bit_rate = ALIGN(seq_param->bits_per_second, 1000) / 1000;
1000+ }
1001+
1002+ vdenc_context->init_vbv_buffer_fullness_in_bit = seq_param->bits_per_second;
1003+ vdenc_context->vbv_buffer_size_in_bit = (uint64_t)seq_param->bits_per_second << 1;
1004+ vdenc_context->frames_per_100s = 3000; /* 30fps */
1005+ vdenc_context->gop_size = seq_param->intra_period;
1006+ vdenc_context->ref_dist = seq_param->ip_period;
1007+ vdenc_context->vdenc_streamin_enable = 0;
1008+
1009+ gen9_vdenc_update_misc_parameters(ctx, encode_state, encoder_context);
1010+
1011+ vdenc_context->current_pass = 0;
1012+ vdenc_context->num_passes = 1;
1013+
1014+ if (vdenc_context->internal_rate_mode == I965_BRC_CBR ||
1015+ vdenc_context->internal_rate_mode == I965_BRC_VBR)
1016+ vdenc_context->brc_enabled = 1;
1017+ else
1018+ vdenc_context->brc_enabled = 0;
1019+
1020+ if (vdenc_context->brc_enabled &&
1021+ (!vdenc_context->init_vbv_buffer_fullness_in_bit ||
1022+ !vdenc_context->vbv_buffer_size_in_bit ||
1023+ !vdenc_context->max_bit_rate ||
1024+ !vdenc_context->target_bit_rate ||
1025+ !vdenc_context->frames_per_100s))
1026+ vdenc_context->brc_enabled = 0;
1027+
1028+ if (!vdenc_context->brc_enabled) {
1029+ vdenc_context->target_bit_rate = 0;
1030+ vdenc_context->max_bit_rate = 0;
1031+ vdenc_context->min_bit_rate = 0;
1032+ vdenc_context->init_vbv_buffer_fullness_in_bit = 0;
1033+ vdenc_context->vbv_buffer_size_in_bit = 0;
1034+ } else {
1035+ vdenc_context->num_passes = NUM_OF_BRC_PAK_PASSES;
1036+ }
1037+}
1038+
1039+static void
1040+gen9_vdenc_avc_calculate_mode_cost(VADriverContextP ctx,
1041+ struct encode_state *encode_state,
1042+ struct intel_encoder_context *encoder_context,
1043+ int qp)
1044+{
1045+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1046+ unsigned int frame_type = vdenc_context->frame_type;
1047+
1048+ memset(vdenc_context->mode_cost, 0, sizeof(vdenc_context->mode_cost));
1049+ memset(vdenc_context->mv_cost, 0, sizeof(vdenc_context->mv_cost));
1050+ memset(vdenc_context->hme_mv_cost, 0, sizeof(vdenc_context->hme_mv_cost));
1051+
1052+ vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_NONPRED] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_NONPRED][qp]), 0x6f);
1053+ vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_16x16][qp]), 0x8f);
1054+ vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_8x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_8x8][qp]), 0x8f);
1055+ vdenc_context->mode_cost[VDENC_LUTMODE_INTRA_4x4] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTRA_4x4][qp]), 0x8f);
1056+
1057+ if (frame_type == VDENC_FRAME_P) {
1058+ vdenc_context->mode_cost[VDENC_LUTMODE_INTER_16x16] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_16x16][qp]), 0x8f);
1059+ vdenc_context->mode_cost[VDENC_LUTMODE_INTER_16x8] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_16x8][qp]), 0x8f);
1060+ vdenc_context->mode_cost[VDENC_LUTMODE_INTER_8X8Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_8X8Q][qp]), 0x6f);
1061+ vdenc_context->mode_cost[VDENC_LUTMODE_INTER_8X4Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_8X4Q][qp]), 0x6f);
1062+ vdenc_context->mode_cost[VDENC_LUTMODE_INTER_4X4Q] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_INTER_4X4Q][qp]), 0x6f);
1063+ vdenc_context->mode_cost[VDENC_LUTMODE_REF_ID] = map_44_lut_value((uint32_t)(vdenc_mode_const[frame_type][VDENC_LUTMODE_REF_ID][qp]), 0x6f);
1064+
1065+ vdenc_context->mv_cost[0] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[0]), 0x6f);
1066+ vdenc_context->mv_cost[1] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[1]), 0x6f);
1067+ vdenc_context->mv_cost[2] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[2]), 0x6f);
1068+ vdenc_context->mv_cost[3] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[3]), 0x6f);
1069+ vdenc_context->mv_cost[4] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[4]), 0x6f);
1070+ vdenc_context->mv_cost[5] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[5]), 0x6f);
1071+ vdenc_context->mv_cost[6] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[6]), 0x6f);
1072+ vdenc_context->mv_cost[7] = map_44_lut_value((uint32_t)(vdenc_mv_cost_skipbias_qpel[7]), 0x6f);
1073+
1074+ vdenc_context->hme_mv_cost[0] = map_44_lut_value((uint32_t)(vdenc_hme_cost[0][qp]), 0x6f);
1075+ vdenc_context->hme_mv_cost[1] = map_44_lut_value((uint32_t)(vdenc_hme_cost[1][qp]), 0x6f);
1076+ vdenc_context->hme_mv_cost[2] = map_44_lut_value((uint32_t)(vdenc_hme_cost[2][qp]), 0x6f);
1077+ vdenc_context->hme_mv_cost[3] = map_44_lut_value((uint32_t)(vdenc_hme_cost[3][qp]), 0x6f);
1078+ vdenc_context->hme_mv_cost[4] = map_44_lut_value((uint32_t)(vdenc_hme_cost[4][qp]), 0x6f);
1079+ vdenc_context->hme_mv_cost[5] = map_44_lut_value((uint32_t)(vdenc_hme_cost[5][qp]), 0x6f);
1080+ vdenc_context->hme_mv_cost[6] = map_44_lut_value((uint32_t)(vdenc_hme_cost[6][qp]), 0x6f);
1081+ vdenc_context->hme_mv_cost[7] = map_44_lut_value((uint32_t)(vdenc_hme_cost[7][qp]), 0x6f);
1082+ }
1083+}
1084+
1085+static void
1086+gen9_vdenc_update_roi_in_streamin_state(VADriverContextP ctx,
1087+ struct intel_encoder_context *encoder_context)
1088+{
1089+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1090+ struct gen9_vdenc_streamin_state *streamin_state;
1091+ int row, col, i;
1092+
1093+ if (!vdenc_context->num_roi)
1094+ return;
1095+
1096+ streamin_state = (struct gen9_vdenc_streamin_state *)i965_map_gpe_resource(&vdenc_context->vdenc_streamin_res);
1097+
1098+ if (!streamin_state)
1099+ return;
1100+
1101+ for (col = 0; col < vdenc_context->frame_width_in_mbs; col++) {
1102+ for (row = 0; row < vdenc_context->frame_height_in_mbs; row++) {
1103+ streamin_state[row * vdenc_context->frame_width_in_mbs + col].dw0.roi_selection = 0; /* non-ROI region */
1104+
1105+ /* The last one has higher priority */
1106+ for (i = vdenc_context->num_roi - 1; i >= 0; i--) {
1107+ if ((col >= vdenc_context->roi[i].left && col <= vdenc_context->roi[i].right) &&
1108+ (row >= vdenc_context->roi[i].top && row <= vdenc_context->roi[i].bottom)) {
1109+ streamin_state[row * vdenc_context->frame_width_in_mbs + col].dw0.roi_selection = i + 1;
1110+
1111+ break;
1112+ }
1113+ }
1114+ }
1115+ }
1116+
1117+ i965_unmap_gpe_resource(&vdenc_context->vdenc_streamin_res);
1118+}
1119+
1120+static VAStatus
1121+gen9_vdenc_avc_prepare(VADriverContextP ctx,
1122+ VAProfile profile,
1123+ struct encode_state *encode_state,
1124+ struct intel_encoder_context *encoder_context)
1125+{
1126+ struct i965_driver_data *i965 = i965_driver_data(ctx);
1127+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1128+ struct i965_coded_buffer_segment *coded_buffer_segment;
1129+ struct object_surface *obj_surface;
1130+ struct object_buffer *obj_buffer;
1131+ VAEncPictureParameterBufferH264 *pic_param;
1132+ VAEncSliceParameterBufferH264 *slice_param;
1133+ VDEncAvcSurface *vdenc_avc_surface;
1134+ dri_bo *bo;
1135+ int i, j, enable_avc_ildb = 0;
1136+ int qp;
1137+ char *pbuffer;
1138+
1139+ gen9_vdenc_update_parameters(ctx, profile, encode_state, encoder_context);
1140+
1141+ for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
1142+ assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
1143+ slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
1144+
1145+ for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
1146+ assert((slice_param->slice_type == SLICE_TYPE_I) ||
1147+ (slice_param->slice_type == SLICE_TYPE_SI) ||
1148+ (slice_param->slice_type == SLICE_TYPE_P) ||
1149+ (slice_param->slice_type == SLICE_TYPE_SP) ||
1150+ (slice_param->slice_type == SLICE_TYPE_B));
1151+
1152+ if (slice_param->disable_deblocking_filter_idc != 1) {
1153+ enable_avc_ildb = 1;
1154+ break;
1155+ }
1156+
1157+ slice_param++;
1158+ }
1159+ }
1160+
1161+ /* Setup current frame */
1162+ obj_surface = encode_state->reconstructed_object;
1163+ i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
1164+
1165+ if (obj_surface->private_data == NULL) {
1166+ vdenc_avc_surface = calloc(sizeof(VDEncAvcSurface), 1);
1167+ assert(vdenc_avc_surface);
1168+
1169+ vdenc_avc_surface->ctx = ctx;
1170+ i965_CreateSurfaces(ctx,
1171+ vdenc_context->down_scaled_width_4x,
1172+ vdenc_context->down_scaled_height_4x,
1173+ VA_RT_FORMAT_YUV420,
1174+ 1,
1175+ &vdenc_avc_surface->scaled_4x_surface_id);
1176+ vdenc_avc_surface->scaled_4x_surface_obj = SURFACE(vdenc_avc_surface->scaled_4x_surface_id);
1177+ assert(vdenc_avc_surface->scaled_4x_surface_obj);
1178+ i965_check_alloc_surface_bo(ctx,
1179+ vdenc_avc_surface->scaled_4x_surface_obj,
1180+ 1,
1181+ VA_FOURCC_NV12,
1182+ SUBSAMPLE_YUV420);
1183+
1184+ obj_surface->private_data = (void *)vdenc_avc_surface;
1185+ obj_surface->free_private_data = (void *)vdenc_free_avc_surface;
1186+ }
1187+
1188+ vdenc_avc_surface = (VDEncAvcSurface *)obj_surface->private_data;
1189+ assert(vdenc_avc_surface->scaled_4x_surface_obj);
1190+
1191+ /* Reconstructed surfaces */
1192+ i965_free_gpe_resource(&vdenc_context->recon_surface_res);
1193+ i965_free_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res);
1194+ i965_free_gpe_resource(&vdenc_context->post_deblocking_output_res);
1195+ i965_free_gpe_resource(&vdenc_context->pre_deblocking_output_res);
1196+
1197+ i965_object_surface_to_2d_gpe_resource(&vdenc_context->recon_surface_res, obj_surface);
1198+ i965_object_surface_to_2d_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res, vdenc_avc_surface->scaled_4x_surface_obj);
1199+
1200+ if (enable_avc_ildb) {
1201+ i965_object_surface_to_2d_gpe_resource(&vdenc_context->post_deblocking_output_res, obj_surface);
1202+ } else {
1203+ i965_object_surface_to_2d_gpe_resource(&vdenc_context->pre_deblocking_output_res, obj_surface);
1204+ }
1205+
1206+
1207+ /* Reference surfaces */
1208+ for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
1209+ assert(ARRAY_ELEMS(vdenc_context->list_reference_res) ==
1210+ ARRAY_ELEMS(vdenc_context->list_scaled_4x_reference_res));
1211+ i965_free_gpe_resource(&vdenc_context->list_reference_res[i]);
1212+ i965_free_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i]);
1213+ obj_surface = encode_state->reference_objects[i];
1214+
1215+ if (obj_surface && obj_surface->bo) {
1216+ i965_object_surface_to_2d_gpe_resource(&vdenc_context->list_reference_res[i], obj_surface);
1217+
1218+ if (obj_surface->private_data == NULL) {
1219+ vdenc_avc_surface = calloc(sizeof(VDEncAvcSurface), 1);
1220+ assert(vdenc_avc_surface);
1221+
1222+ vdenc_avc_surface->ctx = ctx;
1223+ i965_CreateSurfaces(ctx,
1224+ vdenc_context->down_scaled_width_4x,
1225+ vdenc_context->down_scaled_height_4x,
1226+ VA_RT_FORMAT_YUV420,
1227+ 1,
1228+ &vdenc_avc_surface->scaled_4x_surface_id);
1229+ vdenc_avc_surface->scaled_4x_surface_obj = SURFACE(vdenc_avc_surface->scaled_4x_surface_id);
1230+ assert(vdenc_avc_surface->scaled_4x_surface_obj);
1231+ i965_check_alloc_surface_bo(ctx,
1232+ vdenc_avc_surface->scaled_4x_surface_obj,
1233+ 1,
1234+ VA_FOURCC_NV12,
1235+ SUBSAMPLE_YUV420);
1236+
1237+ obj_surface->private_data = vdenc_avc_surface;
1238+ obj_surface->free_private_data = gen_free_avc_surface;
1239+ }
1240+
1241+ vdenc_avc_surface = obj_surface->private_data;
1242+ i965_object_surface_to_2d_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i], vdenc_avc_surface->scaled_4x_surface_obj);
1243+ }
1244+ }
1245+
1246+ /* Input YUV surface */
1247+ i965_free_gpe_resource(&vdenc_context->uncompressed_input_surface_res);
1248+ i965_object_surface_to_2d_gpe_resource(&vdenc_context->uncompressed_input_surface_res, encode_state->input_yuv_object);
1249+
1250+ /* Encoded bitstream */
1251+ obj_buffer = encode_state->coded_buf_object;
1252+ bo = obj_buffer->buffer_store->bo;
1253+ i965_free_gpe_resource(&vdenc_context->compressed_bitstream.res);
1254+ i965_dri_object_to_buffer_gpe_resource(&vdenc_context->compressed_bitstream.res, bo);
1255+ vdenc_context->compressed_bitstream.start_offset = I965_CODEDBUFFER_HEADER_SIZE;
1256+ vdenc_context->compressed_bitstream.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
1257+
1258+ /* Status buffer */
1259+ i965_free_gpe_resource(&vdenc_context->status_bffuer.res);
1260+ i965_dri_object_to_buffer_gpe_resource(&vdenc_context->status_bffuer.res, bo);
1261+ vdenc_context->status_bffuer.base_offset = offsetof(struct i965_coded_buffer_segment, codec_private_data);
1262+ vdenc_context->status_bffuer.size = ALIGN(sizeof(struct gen9_vdenc_status), 64);
1263+ vdenc_context->status_bffuer.bytes_per_frame_offset = offsetof(struct gen9_vdenc_status, bytes_per_frame);
1264+ assert(vdenc_context->status_bffuer.base_offset + vdenc_context->status_bffuer.size <
1265+ vdenc_context->compressed_bitstream.start_offset);
1266+
1267+ dri_bo_map(bo, 1);
1268+
1269+ coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
1270+ coded_buffer_segment->mapped = 0;
1271+ coded_buffer_segment->codec = encoder_context->codec;
1272+ coded_buffer_segment->status_support = 1;
1273+
1274+ pbuffer = bo->virtual;
1275+ pbuffer += vdenc_context->status_bffuer.base_offset;
1276+ memset(pbuffer, 0, vdenc_context->status_bffuer.size);
1277+
1278+ dri_bo_unmap(bo);
1279+
1280+ i965_free_gpe_resource(&vdenc_context->mfx_intra_row_store_scratch_res);
1281+ ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_intra_row_store_scratch_res,
1282+ vdenc_context->frame_width_in_mbs * 64,
1283+ "Intra row store scratch buffer");
1284+
1285+ i965_free_gpe_resource(&vdenc_context->mfx_deblocking_filter_row_store_scratch_res);
1286+ ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_deblocking_filter_row_store_scratch_res,
1287+ vdenc_context->frame_width_in_mbs * 256,
1288+ "Deblocking filter row store scratch buffer");
1289+
1290+ i965_free_gpe_resource(&vdenc_context->mfx_bsd_mpc_row_store_scratch_res);
1291+ ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->mfx_bsd_mpc_row_store_scratch_res,
1292+ vdenc_context->frame_width_in_mbs * 128,
1293+ "BSD/MPC row store scratch buffer");
1294+
1295+ i965_free_gpe_resource(&vdenc_context->vdenc_row_store_scratch_res);
1296+ ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_row_store_scratch_res,
1297+ vdenc_context->frame_width_in_mbs * 64,
1298+ "VDENC row store scratch buffer");
1299+
1300+ assert(sizeof(struct gen9_vdenc_streamin_state) == 64);
1301+ i965_free_gpe_resource(&vdenc_context->vdenc_streamin_res);
1302+ ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_streamin_res,
1303+ vdenc_context->frame_width_in_mbs *
1304+ vdenc_context->frame_height_in_mbs *
1305+ sizeof(struct gen9_vdenc_streamin_state),
1306+ "VDENC StreamIn buffer");
1307+
1308+ /*
1309+ * Calculate the index for each reference surface in list0 for the first slice
1310+ * TODO: other slices
1311+ */
1312+ pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1313+ slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1314+
1315+ vdenc_context->num_refs[0] = pic_param->num_ref_idx_l0_active_minus1 + 1;
1316+
1317+ if (slice_param->num_ref_idx_active_override_flag)
1318+ vdenc_context->num_refs[0] = slice_param->num_ref_idx_l0_active_minus1 + 1;
1319+
1320+ if (vdenc_context->num_refs[0] > ARRAY_ELEMS(vdenc_context->list_ref_idx[0]))
1321+ return VA_STATUS_ERROR_INVALID_VALUE;
1322+
1323+ for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_ref_idx[0]); i++) {
1324+ VAPictureH264 *va_pic;
1325+
1326+ assert(ARRAY_ELEMS(slice_param->RefPicList0) == ARRAY_ELEMS(vdenc_context->list_ref_idx[0]));
1327+ vdenc_context->list_ref_idx[0][i] = 0;
1328+
1329+ if (i >= vdenc_context->num_refs[0])
1330+ continue;
1331+
1332+ va_pic = &slice_param->RefPicList0[i];
1333+
1334+ for (j = 0; j < ARRAY_ELEMS(encode_state->reference_objects); j++) {
1335+ obj_surface = encode_state->reference_objects[j];
1336+
1337+ if (obj_surface &&
1338+ obj_surface->bo &&
1339+ obj_surface->base.id == va_pic->picture_id) {
1340+
1341+ assert(obj_surface->base.id != VA_INVALID_SURFACE);
1342+ vdenc_context->list_ref_idx[0][i] = j;
1343+
1344+ break;
1345+ }
1346+ }
1347+ }
1348+
1349+ if (slice_param->slice_type == SLICE_TYPE_I ||
1350+ slice_param->slice_type == SLICE_TYPE_SI)
1351+ vdenc_context->frame_type = VDENC_FRAME_I;
1352+ else
1353+ vdenc_context->frame_type = VDENC_FRAME_P;
1354+
1355+ qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
1356+
1357+ gen9_vdenc_avc_calculate_mode_cost(ctx, encode_state, encoder_context, qp);
1358+ gen9_vdenc_update_roi_in_streamin_state(ctx, encoder_context);
1359+
1360+ return VA_STATUS_SUCCESS;
1361+}
1362+
1363+static void
1364+gen9_vdenc_huc_pipe_mode_select(VADriverContextP ctx,
1365+ struct intel_encoder_context *encoder_context,
1366+ struct huc_pipe_mode_select_parameter *params)
1367+{
1368+ struct intel_batchbuffer *batch = encoder_context->base.batch;
1369+
1370+ BEGIN_BCS_BATCH(batch, 3);
1371+
1372+ OUT_BCS_BATCH(batch, HUC_PIPE_MODE_SELECT | (3 - 2));
1373+ OUT_BCS_BATCH(batch,
1374+ (params->huc_stream_object_enable << 10) |
1375+ (params->indirect_stream_out_enable << 4));
1376+ OUT_BCS_BATCH(batch,
1377+ params->media_soft_reset_counter);
1378+
1379+ ADVANCE_BCS_BATCH(batch);
1380+}
1381+
1382+static void
1383+gen9_vdenc_huc_imem_state(VADriverContextP ctx,
1384+ struct intel_encoder_context *encoder_context,
1385+ struct huc_imem_state_parameter *params)
1386+{
1387+ struct intel_batchbuffer *batch = encoder_context->base.batch;
1388+
1389+ BEGIN_BCS_BATCH(batch, 5);
1390+
1391+ OUT_BCS_BATCH(batch, HUC_IMEM_STATE | (5 - 2));
1392+ OUT_BCS_BATCH(batch, 0);
1393+ OUT_BCS_BATCH(batch, 0);
1394+ OUT_BCS_BATCH(batch, 0);
1395+ OUT_BCS_BATCH(batch, params->huc_firmware_descriptor);
1396+
1397+ ADVANCE_BCS_BATCH(batch);
1398+}
1399+
1400+static void
1401+gen9_vdenc_huc_dmem_state(VADriverContextP ctx,
1402+ struct intel_encoder_context *encoder_context,
1403+ struct huc_dmem_state_parameter *params)
1404+{
1405+ struct intel_batchbuffer *batch = encoder_context->base.batch;
1406+
1407+ BEGIN_BCS_BATCH(batch, 6);
1408+
1409+ OUT_BCS_BATCH(batch, HUC_DMEM_STATE | (6 - 2));
1410+ OUT_BUFFER_3DW(batch, params->huc_data_source_res->bo, 0, 0, 0);
1411+ OUT_BCS_BATCH(batch, params->huc_data_destination_base_address);
1412+ OUT_BCS_BATCH(batch, params->huc_data_length);
1413+
1414+ ADVANCE_BCS_BATCH(batch);
1415+}
1416+
1417+/*
1418+static void
1419+gen9_vdenc_huc_cfg_state(VADriverContextP ctx,
1420+ struct intel_encoder_context *encoder_context,
1421+ struct huc_cfg_state_parameter *params)
1422+{
1423+ struct intel_batchbuffer *batch = encoder_context->base.batch;
1424+
1425+ BEGIN_BCS_BATCH(batch, 2);
1426+
1427+ OUT_BCS_BATCH(batch, HUC_CFG_STATE | (2 - 2));
1428+ OUT_BCS_BATCH(batch, !!params->force_reset);
1429+
1430+ ADVANCE_BCS_BATCH(batch);
1431+}
1432+*/
1433+static void
1434+gen9_vdenc_huc_virtual_addr_state(VADriverContextP ctx,
1435+ struct intel_encoder_context *encoder_context,
1436+ struct huc_virtual_addr_parameter *params)
1437+{
1438+ struct intel_batchbuffer *batch = encoder_context->base.batch;
1439+ int i;
1440+
1441+ BEGIN_BCS_BATCH(batch, 49);
1442+
1443+ OUT_BCS_BATCH(batch, HUC_VIRTUAL_ADDR_STATE | (49 - 2));
1444+
1445+ for (i = 0; i < 16; i++) {
1446+ if (params->regions[i].huc_surface_res && params->regions[i].huc_surface_res->bo)
1447+ OUT_BUFFER_3DW(batch,
1448+ params->regions[i].huc_surface_res->bo,
1449+ !!params->regions[i].is_target, 0, 0);
1450+ else
1451+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1452+ }
1453+
1454+ ADVANCE_BCS_BATCH(batch);
1455+}
1456+
1457+static void
1458+gen9_vdenc_huc_ind_obj_base_addr_state(VADriverContextP ctx,
1459+ struct intel_encoder_context *encoder_context,
1460+ struct huc_ind_obj_base_addr_parameter *params)
1461+{
1462+ struct intel_batchbuffer *batch = encoder_context->base.batch;
1463+
1464+ BEGIN_BCS_BATCH(batch, 11);
1465+
1466+ OUT_BCS_BATCH(batch, HUC_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
1467+
1468+ if (params->huc_indirect_stream_in_object_res)
1469+ OUT_BUFFER_3DW(batch,
1470+ params->huc_indirect_stream_in_object_res->bo,
1471+ 0, 0, 0);
1472+ else
1473+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1474+
1475+ OUT_BUFFER_2DW(batch, NULL, 0, 0); /* ignore access upper bound */
1476+
1477+ if (params->huc_indirect_stream_out_object_res)
1478+ OUT_BUFFER_3DW(batch,
1479+ params->huc_indirect_stream_out_object_res->bo,
1480+ 1, 0, 0);
1481+ else
1482+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
1483+
1484+ OUT_BUFFER_2DW(batch, NULL, 0, 0); /* ignore access upper bound */
1485+
1486+ ADVANCE_BCS_BATCH(batch);
1487+}
1488+
1489+static void
1490+gen9_vdenc_huc_store_huc_status2(VADriverContextP ctx,
1491+ struct intel_encoder_context *encoder_context)
1492+{
1493+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1494+ struct intel_batchbuffer *batch = encoder_context->base.batch;
1495+ struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
1496+ struct gpe_mi_store_data_imm_parameter mi_store_data_imm_params;
1497+
1498+ /* Write HUC_STATUS2 mask (1 << 6) */
1499+ memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
1500+ mi_store_data_imm_params.bo = vdenc_context->huc_status2_res.bo;
1501+ mi_store_data_imm_params.offset = 0;
1502+ mi_store_data_imm_params.dw0 = (1 << 6);
1503+ gen9_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
1504+
1505+ /* Store HUC_STATUS2 */
1506+ memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
1507+ mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS2;
1508+ mi_store_register_mem_params.bo = vdenc_context->huc_status2_res.bo;
1509+ mi_store_register_mem_params.offset = 4;
1510+ gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
1511+}
1512+
1513+static void
1514+gen9_vdenc_huc_stream_object(VADriverContextP ctx,
1515+ struct intel_encoder_context *encoder_context,
1516+ struct huc_stream_object_parameter *params)
1517+{
1518+ struct intel_batchbuffer *batch = encoder_context->base.batch;
1519+
1520+ BEGIN_BCS_BATCH(batch, 5);
1521+
1522+ OUT_BCS_BATCH(batch, HUC_STREAM_OBJECT | (5 - 2));
1523+ OUT_BCS_BATCH(batch, params->indirect_stream_in_data_length);
1524+ OUT_BCS_BATCH(batch,
1525+ (1 << 31) | /* Must be 1 */
1526+ params->indirect_stream_in_start_address);
1527+ OUT_BCS_BATCH(batch, params->indirect_stream_out_start_address);
1528+ OUT_BCS_BATCH(batch,
1529+ (!!params->huc_bitstream_enable << 29) |
1530+ (params->length_mode << 27) |
1531+ (!!params->stream_out << 26) |
1532+ (!!params->emulation_prevention_byte_removal << 25) |
1533+ (!!params->start_code_search_engine << 24) |
1534+ (params->start_code_byte2 << 16) |
1535+ (params->start_code_byte1 << 8) |
1536+ params->start_code_byte0);
1537+
1538+ ADVANCE_BCS_BATCH(batch);
1539+}
1540+
1541+static void
1542+gen9_vdenc_huc_start(VADriverContextP ctx,
1543+ struct intel_encoder_context *encoder_context,
1544+ struct huc_start_parameter *params)
1545+{
1546+ struct intel_batchbuffer *batch = encoder_context->base.batch;
1547+
1548+ BEGIN_BCS_BATCH(batch, 2);
1549+
1550+ OUT_BCS_BATCH(batch, HUC_START | (2 - 2));
1551+ OUT_BCS_BATCH(batch, !!params->last_stream_object);
1552+
1553+ ADVANCE_BCS_BATCH(batch);
1554+}
1555+
1556+static void
1557+gen9_vdenc_vd_pipeline_flush(VADriverContextP ctx,
1558+ struct intel_encoder_context *encoder_context,
1559+ struct vd_pipeline_flush_parameter *params)
1560+{
1561+ struct intel_batchbuffer *batch = encoder_context->base.batch;
1562+
1563+ BEGIN_BCS_BATCH(batch, 2);
1564+
1565+ OUT_BCS_BATCH(batch, VD_PIPELINE_FLUSH | (2 - 2));
1566+ OUT_BCS_BATCH(batch,
1567+ params->mfx_pipeline_command_flush << 19 |
1568+ params->mfl_pipeline_command_flush << 18 |
1569+ params->vdenc_pipeline_command_flush << 17 |
1570+ params->hevc_pipeline_command_flush << 16 |
1571+ params->vd_command_message_parser_done << 4 |
1572+ params->mfx_pipeline_done << 3 |
1573+ params->mfl_pipeline_done << 2 |
1574+ params->vdenc_pipeline_done << 1 |
1575+ params->hevc_pipeline_done);
1576+
1577+ ADVANCE_BCS_BATCH(batch);
1578+}
1579+
1580+static int
1581+gen9_vdenc_get_max_mbps(int level_idc)
1582+{
1583+ int max_mbps = 11880;
1584+
1585+ switch (level_idc) {
1586+ case 20:
1587+ max_mbps = 11880;
1588+ break;
1589+
1590+ case 21:
1591+ max_mbps = 19800;
1592+ break;
1593+
1594+ case 22:
1595+ max_mbps = 20250;
1596+ break;
1597+
1598+ case 30:
1599+ max_mbps = 40500;
1600+ break;
1601+
1602+ case 31:
1603+ max_mbps = 108000;
1604+ break;
1605+
1606+ case 32:
1607+ max_mbps = 216000;
1608+ break;
1609+
1610+ case 40:
1611+ case 41:
1612+ max_mbps = 245760;
1613+ break;
1614+
1615+ case 42:
1616+ max_mbps = 522240;
1617+ break;
1618+
1619+ case 50:
1620+ max_mbps = 589824;
1621+ break;
1622+
1623+ case 51:
1624+ max_mbps = 983040;
1625+ break;
1626+
1627+ case 52:
1628+ max_mbps = 2073600;
1629+ break;
1630+
1631+ default:
1632+ break;
1633+ }
1634+
1635+ return max_mbps;
1636+};
1637+
1638+static unsigned int
1639+gen9_vdenc_get_profile_level_max_frame(VADriverContextP ctx,
1640+ struct intel_encoder_context *encoder_context,
1641+ int level_idc)
1642+{
1643+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1644+ double bits_per_mb, tmpf;
1645+ int max_mbps, num_mb_per_frame;
1646+ uint64_t max_byte_per_frame0, max_byte_per_frame1;
1647+ unsigned int ret;
1648+
1649+ if (level_idc >= 31 && level_idc <= 40)
1650+ bits_per_mb = 96.0;
1651+ else
1652+ bits_per_mb = 192.0;
1653+
1654+ max_mbps = gen9_vdenc_get_max_mbps(level_idc);
1655+ num_mb_per_frame = vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs;
1656+
1657+ tmpf = (double)num_mb_per_frame;
1658+
1659+ if (tmpf < max_mbps / 172.0)
1660+ tmpf = max_mbps / 172.0;
1661+
1662+ max_byte_per_frame0 = (uint64_t)(tmpf * bits_per_mb);
1663+ max_byte_per_frame1 = (uint64_t)(((double)max_mbps * 100) / vdenc_context->frames_per_100s *bits_per_mb);
1664+
1665+ /* TODO: check VAEncMiscParameterTypeMaxFrameSize */
1666+ ret = (unsigned int)MIN(max_byte_per_frame0, max_byte_per_frame1);
1667+ ret = (unsigned int)MIN(ret, vdenc_context->frame_height * vdenc_context->frame_height);
1668+
1669+ return ret;
1670+}
1671+
1672+static int
1673+gen9_vdenc_calculate_initial_qp(VADriverContextP ctx,
1674+ struct encode_state *encode_state,
1675+ struct intel_encoder_context *encoder_context)
1676+{
1677+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1678+ float x0 = 0, y0 = 1.19f, x1 = 1.75f, y1 = 1.75f;
1679+ unsigned frame_size;
1680+ int qp, delat_qp;
1681+
1682+ frame_size = (vdenc_context->frame_width * vdenc_context->frame_height * 3 / 2);
1683+ qp = (int)(1.0 / 1.2 * pow(10.0,
1684+ (log10(frame_size * 2.0 / 3.0 * ((float)vdenc_context->frames_per_100s) /
1685+ ((float)(vdenc_context->target_bit_rate * 1000) * 100)) - x0) *
1686+ (y1 - y0) / (x1 - x0) + y0) + 0.5);
1687+ qp += 2;
1688+ delat_qp = (int)(9 - (vdenc_context->vbv_buffer_size_in_bit * ((float)vdenc_context->frames_per_100s) /
1689+ ((float)(vdenc_context->target_bit_rate * 1000) * 100)));
1690+ if (delat_qp > 0)
1691+ qp += delat_qp;
1692+
1693+ qp = CLAMP(1, 51, qp);
1694+ qp--;
1695+
1696+ if (qp < 0)
1697+ qp = 1;
1698+
1699+ return qp;
1700+}
1701+
1702+static void
1703+gen9_vdenc_update_huc_brc_init_dmem(VADriverContextP ctx,
1704+ struct encode_state *encode_state,
1705+ struct intel_encoder_context *encoder_context)
1706+{
1707+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1708+ struct huc_brc_init_dmem *dmem;
1709+ VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1710+ double input_bits_per_frame, bps_ratio;
1711+ int i;
1712+
1713+ vdenc_context->brc_init_reset_input_bits_per_frame = ((double)(vdenc_context->max_bit_rate * 1000) * 100) / vdenc_context->frames_per_100s;
1714+ vdenc_context->brc_init_current_target_buf_full_in_bits = vdenc_context->brc_init_reset_input_bits_per_frame;
1715+ vdenc_context->brc_target_size = vdenc_context->init_vbv_buffer_fullness_in_bit;
1716+
1717+ dmem = (struct huc_brc_init_dmem *)i965_map_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
1718+
1719+ if (!dmem)
1720+ return;
1721+
1722+ memset(dmem, 0, sizeof(*dmem));
1723+
1724+ dmem->brc_func = vdenc_context->brc_initted ? 2 : 0;
1725+
1726+ dmem->frame_width = vdenc_context->frame_width;
1727+ dmem->frame_height = vdenc_context->frame_height;
1728+
1729+ dmem->target_bitrate = vdenc_context->target_bit_rate * 1000;
1730+ dmem->min_rate = vdenc_context->min_bit_rate * 1000;
1731+ dmem->max_rate = vdenc_context->max_bit_rate * 1000;
1732+ dmem->buffer_size = vdenc_context->vbv_buffer_size_in_bit;
1733+ dmem->init_buffer_fullness = vdenc_context->init_vbv_buffer_fullness_in_bit;
1734+
1735+ if (dmem->init_buffer_fullness > vdenc_context->init_vbv_buffer_fullness_in_bit)
1736+ dmem->init_buffer_fullness = vdenc_context->vbv_buffer_size_in_bit;
1737+
1738+ if (vdenc_context->internal_rate_mode == I965_BRC_CBR)
1739+ dmem->brc_flag |= 0x10;
1740+ else if (vdenc_context->internal_rate_mode == I965_BRC_VBR)
1741+ dmem->brc_flag |= 0x20;
1742+
1743+ dmem->frame_rate_m = vdenc_context->frames_per_100s;
1744+ dmem->frame_rate_d = 100;
1745+
1746+ dmem->profile_level_max_frame = gen9_vdenc_get_profile_level_max_frame(ctx, encoder_context, seq_param->level_idc);
1747+
1748+ if (vdenc_context->ref_dist && vdenc_context->gop_size > 0)
1749+ dmem->num_p_in_gop = (vdenc_context->gop_size - 1) / vdenc_context->ref_dist;
1750+
1751+ dmem->min_qp = 10;
1752+ dmem->max_qp = 51;
1753+
1754+ input_bits_per_frame = ((double)vdenc_context->max_bit_rate * 1000 * 100) / vdenc_context->frames_per_100s;
1755+ bps_ratio = input_bits_per_frame / ((double)vdenc_context->vbv_buffer_size_in_bit * 100 / vdenc_context->frames_per_100s);
1756+
1757+ if (bps_ratio < 0.1)
1758+ bps_ratio = 0.1;
1759+
1760+ if (bps_ratio > 3.5)
1761+ bps_ratio = 3.5;
1762+
1763+ for (i = 0; i < 4; i++) {
1764+ dmem->dev_thresh_pb0[i] = (char)(-50 * pow(vdenc_brc_dev_threshpb0_fp_neg[i], bps_ratio));
1765+ dmem->dev_thresh_pb0[i + 4] = (char)(50 * pow(vdenc_brc_dev_threshpb0_fp_pos[i], bps_ratio));
1766+
1767+ dmem->dev_thresh_i0[i] = (char)(-50 * pow(vdenc_brc_dev_threshi0_fp_neg[i], bps_ratio));
1768+ dmem->dev_thresh_i0[i + 4] = (char)(50 * pow(vdenc_brc_dev_threshi0_fp_pos[i], bps_ratio));
1769+
1770+ dmem->dev_thresh_vbr0[i] = (char)(-50 * pow(vdenc_brc_dev_threshvbr0_neg[i], bps_ratio));
1771+ dmem->dev_thresh_vbr0[i + 4] = (char)(100 * pow(vdenc_brc_dev_threshvbr0_pos[i], bps_ratio));
1772+ }
1773+
1774+ dmem->init_qp_ip = gen9_vdenc_calculate_initial_qp(ctx, encode_state, encoder_context);
1775+
1776+ if (vdenc_context->mb_brc_enabled) {
1777+ dmem->mb_qp_ctrl = 1;
1778+ dmem->dist_qp_delta[0] = -5;
1779+ dmem->dist_qp_delta[1] = -2;
1780+ dmem->dist_qp_delta[2] = 2;
1781+ dmem->dist_qp_delta[3] = 5;
1782+ }
1783+
1784+ dmem->slice_size_ctrl_en = 0; /* TODO: add support for slice size control */
1785+
1786+ dmem->oscillation_qp_delta = 0; /* TODO: add support */
1787+ dmem->first_iframe_no_hrd_check = 0;/* TODO: add support */
1788+
1789+ // 2nd re-encode pass if possible
1790+ if (vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs >= (3840 * 2160 / 256)) {
1791+ dmem->top_qp_delta_thr_for_2nd_pass = 5;
1792+ dmem->bottom_qp_delta_thr_for_2nd_pass = 5;
1793+ dmem->top_frame_size_threshold_for_2nd_pass = 80;
1794+ dmem->bottom_frame_size_threshold_for_2nd_pass = 80;
1795+ } else {
1796+ dmem->top_qp_delta_thr_for_2nd_pass = 2;
1797+ dmem->bottom_qp_delta_thr_for_2nd_pass = 1;
1798+ dmem->top_frame_size_threshold_for_2nd_pass = 32;
1799+ dmem->bottom_frame_size_threshold_for_2nd_pass = 24;
1800+ }
1801+
1802+ dmem->qp_select_for_first_pass = 1;
1803+ dmem->mb_header_compensation = 1;
1804+ dmem->delta_qp_adaptation = 1;
1805+ dmem->max_crf_quality_factor = 52;
1806+
1807+ dmem->crf_quality_factor = 0; /* TODO: add support for CRF */
1808+ dmem->scenario_info = 0;
1809+
1810+ memcpy(&dmem->estrate_thresh_i0, vdenc_brc_estrate_thresh_i0, sizeof(dmem->estrate_thresh_i0));
1811+ memcpy(&dmem->estrate_thresh_p0, vdenc_brc_estrate_thresh_p0, sizeof(dmem->estrate_thresh_p0));
1812+
1813+ i965_unmap_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
1814+}
1815+
1816+static void
1817+gen9_vdenc_huc_brc_init_reset(VADriverContextP ctx,
1818+ struct encode_state *encode_state,
1819+ struct intel_encoder_context *encoder_context)
1820+{
1821+ struct intel_batchbuffer *batch = encoder_context->base.batch;
1822+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1823+ struct huc_pipe_mode_select_parameter pipe_mode_select_params;
1824+ struct huc_imem_state_parameter imem_state_params;
1825+ struct huc_dmem_state_parameter dmem_state_params;
1826+ struct huc_virtual_addr_parameter virtual_addr_params;
1827+ struct huc_ind_obj_base_addr_parameter ind_obj_base_addr_params;
1828+ struct huc_stream_object_parameter stream_object_params;
1829+ struct huc_start_parameter start_params;
1830+ struct vd_pipeline_flush_parameter pipeline_flush_params;
1831+ struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
1832+
1833+ vdenc_context->brc_target_size = vdenc_context->init_vbv_buffer_fullness_in_bit;
1834+
1835+ memset(&imem_state_params, 0, sizeof(imem_state_params));
1836+ imem_state_params.huc_firmware_descriptor = HUC_BRC_INIT_RESET;
1837+ gen9_vdenc_huc_imem_state(ctx, encoder_context, &imem_state_params);
1838+
1839+ memset(&pipe_mode_select_params, 0, sizeof(pipe_mode_select_params));
1840+ gen9_vdenc_huc_pipe_mode_select(ctx, encoder_context, &pipe_mode_select_params);
1841+
1842+ gen9_vdenc_update_huc_brc_init_dmem(ctx, encode_state, encoder_context);
1843+ memset(&dmem_state_params, 0, sizeof(dmem_state_params));
1844+ dmem_state_params.huc_data_source_res = &vdenc_context->brc_init_reset_dmem_res;
1845+ dmem_state_params.huc_data_destination_base_address = HUC_DMEM_DATA_OFFSET;
1846+ dmem_state_params.huc_data_length = ALIGN(sizeof(struct huc_brc_init_dmem), 64);
1847+ gen9_vdenc_huc_dmem_state(ctx, encoder_context, &dmem_state_params);
1848+
1849+ memset(&virtual_addr_params, 0, sizeof(virtual_addr_params));
1850+ virtual_addr_params.regions[0].huc_surface_res = &vdenc_context->brc_history_buffer_res;
1851+ virtual_addr_params.regions[0].is_target = 1;
1852+ gen9_vdenc_huc_virtual_addr_state(ctx, encoder_context, &virtual_addr_params);
1853+
1854+ memset(&ind_obj_base_addr_params, 0, sizeof(ind_obj_base_addr_params));
1855+ ind_obj_base_addr_params.huc_indirect_stream_in_object_res = &vdenc_context->huc_dummy_res;
1856+ ind_obj_base_addr_params.huc_indirect_stream_out_object_res = NULL;
1857+ gen9_vdenc_huc_ind_obj_base_addr_state(ctx, encoder_context, &ind_obj_base_addr_params);
1858+
1859+ memset(&stream_object_params, 0, sizeof(stream_object_params));
1860+ stream_object_params.indirect_stream_in_data_length = 1;
1861+ stream_object_params.indirect_stream_in_start_address = 0;
1862+ gen9_vdenc_huc_stream_object(ctx, encoder_context, &stream_object_params);
1863+
1864+ gen9_vdenc_huc_store_huc_status2(ctx, encoder_context);
1865+
1866+ memset(&start_params, 0, sizeof(start_params));
1867+ start_params.last_stream_object = 1;
1868+ gen9_vdenc_huc_start(ctx, encoder_context, &start_params);
1869+
1870+ memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
1871+ pipeline_flush_params.hevc_pipeline_done = 1;
1872+ pipeline_flush_params.hevc_pipeline_command_flush = 1;
1873+ gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
1874+
1875+ memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
1876+ mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
1877+ gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
1878+}
1879+
1880+static void
1881+gen9_vdenc_update_huc_update_dmem(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
1882+{
1883+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1884+ struct huc_brc_update_dmem *dmem;
1885+ int i, num_p_in_gop = 0;
1886+
1887+ dmem = (struct huc_brc_update_dmem *)i965_map_gpe_resource(&vdenc_context->brc_update_dmem_res[vdenc_context->current_pass]);
1888+
1889+ if (!dmem)
1890+ return;
1891+
1892+ dmem->brc_func = 1;
1893+
1894+ if (vdenc_context->brc_initted && (vdenc_context->current_pass == 0)) {
1895+ vdenc_context->brc_init_previous_target_buf_full_in_bits =
1896+ (uint32_t)(vdenc_context->brc_init_current_target_buf_full_in_bits);
1897+ vdenc_context->brc_init_current_target_buf_full_in_bits += vdenc_context->brc_init_reset_input_bits_per_frame;
1898+ vdenc_context->brc_target_size += vdenc_context->brc_init_reset_input_bits_per_frame;
1899+ }
1900+
1901+ if (vdenc_context->brc_target_size > vdenc_context->vbv_buffer_size_in_bit)
1902+ vdenc_context->brc_target_size -= vdenc_context->vbv_buffer_size_in_bit;
1903+
1904+ dmem->target_size = vdenc_context->brc_target_size;
1905+
1906+ dmem->peak_tx_bits_per_frame = (uint32_t)(vdenc_context->brc_init_current_target_buf_full_in_bits - vdenc_context->brc_init_previous_target_buf_full_in_bits);
1907+
1908+ dmem->target_slice_size = 0; // TODO: add support for slice size control
1909+
1910+ memcpy(dmem->start_global_adjust_frame, vdenc_brc_start_global_adjust_frame, sizeof(dmem->start_global_adjust_frame));
1911+ memcpy(dmem->global_rate_ratio_threshold, vdenc_brc_global_rate_ratio_threshold, sizeof(dmem->global_rate_ratio_threshold));
1912+
1913+ dmem->current_frame_type = (vdenc_context->frame_type + 2) % 3; // I frame:2, P frame:0, B frame:1
1914+
1915+ memcpy(dmem->start_global_adjust_mult, vdenc_brc_start_global_adjust_mult, sizeof(dmem->start_global_adjust_mult));
1916+ memcpy(dmem->start_global_adjust_div, vdenc_brc_start_global_adjust_div, sizeof(dmem->start_global_adjust_div));
1917+ memcpy(dmem->global_rate_ratio_threshold_qp, vdenc_brc_global_rate_ratio_threshold_qp, sizeof(dmem->global_rate_ratio_threshold_qp));
1918+
1919+ dmem->current_pak_pass = vdenc_context->current_pass;
1920+ dmem->max_num_passes = 2;
1921+
1922+ dmem->scene_change_detect_enable = 1;
1923+ dmem->scene_change_prev_intra_percent_threshold = 96;
1924+ dmem->scene_change_cur_intra_perent_threshold = 192;
1925+
1926+ if (vdenc_context->ref_dist && vdenc_context->gop_size > 0)
1927+ num_p_in_gop = (vdenc_context->gop_size - 1) / vdenc_context->ref_dist;
1928+
1929+ for (i = 0; i < 2; i++)
1930+ dmem->scene_change_width[i] = MIN((num_p_in_gop + 1) / 5, 6);
1931+
1932+ if (vdenc_context->is_low_delay)
1933+ dmem->ip_average_coeff = 0;
1934+ else
1935+ dmem->ip_average_coeff = 128;
1936+
1937+ dmem->skip_frame_size = 0;
1938+ dmem->num_of_frames_skipped = 0;
1939+
1940+ dmem->roi_source = 0; // TODO: add support for dirty ROI
1941+ dmem->hme_detection_enable = 0; // TODO: support HME kernel
1942+ dmem->hme_cost_enable = 1;
1943+
1944+ dmem->second_level_batchbuffer_size = 228;
1945+
1946+ i965_unmap_gpe_resource(&vdenc_context->brc_update_dmem_res[vdenc_context->current_pass]);
1947+}
1948+
1949+static void
1950+gen9_vdenc_init_mfx_avc_img_state(VADriverContextP ctx,
1951+ struct encode_state *encode_state,
1952+ struct intel_encoder_context *encoder_context,
1953+ struct gen9_mfx_avc_img_state *pstate)
1954+{
1955+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
1956+ VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1957+ VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1958+
1959+ memset(pstate, 0, sizeof(*pstate));
1960+
1961+ pstate->dw0.value = (MFX_AVC_IMG_STATE | (sizeof(*pstate) / 4 - 2));
1962+
1963+ pstate->dw1.frame_size_in_mbs_minus1 = vdenc_context->frame_width_in_mbs * vdenc_context->frame_height_in_mbs - 1;
1964+
1965+ pstate->dw2.frame_width_in_mbs_minus1 = vdenc_context->frame_width_in_mbs - 1;
1966+ pstate->dw2.frame_height_in_mbs_minus1 = vdenc_context->frame_height_in_mbs - 1;
1967+
1968+ pstate->dw3.image_structure = 0;
1969+ pstate->dw3.weighted_bipred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
1970+ pstate->dw3.weighted_pred_flag = pic_param->pic_fields.bits.weighted_pred_flag;
1971+ pstate->dw3.brc_domain_rate_control_enable = 1;
1972+ pstate->dw3.chroma_qp_offset = pic_param->chroma_qp_index_offset;
1973+ pstate->dw3.second_chroma_qp_offset = pic_param->second_chroma_qp_index_offset;
1974+
1975+ pstate->dw4.field_picture_flag = 0;
1976+ pstate->dw4.mbaff_mode_active = seq_param->seq_fields.bits.mb_adaptive_frame_field_flag;
1977+ pstate->dw4.frame_mb_only_flag = seq_param->seq_fields.bits.frame_mbs_only_flag;
1978+ pstate->dw4.transform_8x8_idct_mode_flag = vdenc_context->transform_8x8_mode_enable;
1979+ pstate->dw4.direct_8x8_interface_flag = seq_param->seq_fields.bits.direct_8x8_inference_flag;
1980+ pstate->dw4.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
1981+ pstate->dw4.entropy_coding_flag = pic_param->pic_fields.bits.entropy_coding_mode_flag;
1982+ pstate->dw4.mb_mv_format_flag = 1;
1983+ pstate->dw4.chroma_format_idc = seq_param->seq_fields.bits.chroma_format_idc;
1984+ pstate->dw4.mv_unpacked_flag = 1;
1985+ pstate->dw4.insert_test_flag = 0;
1986+ pstate->dw4.load_slice_pointer_flag = 0;
1987+ pstate->dw4.macroblock_stat_enable = 0; /* Always 0 in VDEnc mode */
1988+ pstate->dw4.minimum_frame_size = 0;
1989+
1990+ pstate->dw5.intra_mb_max_bit_flag = 1;
1991+ pstate->dw5.inter_mb_max_bit_flag = 1;
1992+ pstate->dw5.frame_size_over_flag = 1;
1993+ pstate->dw5.frame_size_under_flag = 1;
1994+ pstate->dw5.intra_mb_ipcm_flag = 1;
1995+ pstate->dw5.mb_rate_ctrl_flag = 0; /* Always 0 in VDEnc mode */
1996+ pstate->dw5.non_first_pass_flag = 0;
1997+ pstate->dw5.aq_enable = pstate->dw5.aq_rounding = 0;
1998+ pstate->dw5.aq_chroma_disable = 1;
1999+
2000+ pstate->dw6.intra_mb_max_size = 2700;
2001+ pstate->dw6.inter_mb_max_size = 4095;
2002+
2003+ pstate->dw8.slice_delta_qp_max0 = 0;
2004+ pstate->dw8.slice_delta_qp_max1 = 0;
2005+ pstate->dw8.slice_delta_qp_max2 = 0;
2006+ pstate->dw8.slice_delta_qp_max3 = 0;
2007+
2008+ pstate->dw9.slice_delta_qp_min0 = 0;
2009+ pstate->dw9.slice_delta_qp_min1 = 0;
2010+ pstate->dw9.slice_delta_qp_min2 = 0;
2011+ pstate->dw9.slice_delta_qp_min3 = 0;
2012+
2013+ pstate->dw10.frame_bitrate_min = 0;
2014+ pstate->dw10.frame_bitrate_min_unit = 1;
2015+ pstate->dw10.frame_bitrate_min_unit_mode = 1;
2016+ pstate->dw10.frame_bitrate_max = (1 << 14) - 1;
2017+ pstate->dw10.frame_bitrate_max_unit = 1;
2018+ pstate->dw10.frame_bitrate_max_unit_mode = 1;
2019+
2020+ pstate->dw11.frame_bitrate_min_delta = 0;
2021+ pstate->dw11.frame_bitrate_max_delta = 0;
2022+
2023+ pstate->dw12.vad_error_logic = 1;
2024+ /* TODO: set paramters DW19/DW20 for slices */
2025+}
2026+
2027+static void
2028+gen9_vdenc_init_vdenc_img_state(VADriverContextP ctx,
2029+ struct encode_state *encode_state,
2030+ struct intel_encoder_context *encoder_context,
2031+ struct gen9_vdenc_img_state *pstate,
2032+ int update_cost)
2033+{
2034+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2035+ VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
2036+ VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2037+ VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
2038+
2039+ memset(pstate, 0, sizeof(*pstate));
2040+
2041+ pstate->dw0.value = (VDENC_IMG_STATE | (sizeof(*pstate) / 4 - 2));
2042+
2043+ if (vdenc_context->frame_type == VDENC_FRAME_I) {
2044+ pstate->dw4.intra_sad_measure_adjustment = 2;
2045+ pstate->dw4.sub_macroblock_sub_partition_mask = 0x70;
2046+
2047+ pstate->dw5.cre_prefetch_enable = 1;
2048+
2049+ pstate->dw9.mode0_cost = 10;
2050+ pstate->dw9.mode1_cost = 0;
2051+ pstate->dw9.mode2_cost = 3;
2052+ pstate->dw9.mode3_cost = 30;
2053+
2054+ pstate->dw20.penalty_for_intra_16x16_non_dc_prediction = 36;
2055+ pstate->dw20.penalty_for_intra_8x8_non_dc_prediction = 12;
2056+ pstate->dw20.penalty_for_intra_4x4_non_dc_prediction = 4;
2057+
2058+ pstate->dw22.small_mb_size_in_word = 0xff;
2059+ pstate->dw22.large_mb_size_in_word = 0xff;
2060+
2061+ pstate->dw27.max_hmv_r = 0x2000;
2062+ pstate->dw27.max_vmv_r = 0x200;
2063+
2064+ pstate->dw33.qp_range_check_upper_bound = 0x33;
2065+ pstate->dw33.qp_range_check_lower_bound = 0x0a;
2066+ pstate->dw33.qp_range_check_value = 0x0f;
2067+ } else {
2068+ pstate->dw2.bidirectional_weight = 0x20;
2069+
2070+ pstate->dw4.subpel_mode = 3;
2071+ pstate->dw4.bme_disable_for_fbr_message = 1;
2072+ pstate->dw4.inter_sad_measure_adjustment = 2;
2073+ pstate->dw4.intra_sad_measure_adjustment = 2;
2074+ pstate->dw4.sub_macroblock_sub_partition_mask = 0x70;
2075+
2076+ pstate->dw5.cre_prefetch_enable = 1;
2077+
2078+ pstate->dw8.non_skip_zero_mv_const_added = 1;
2079+ pstate->dw8.non_skip_mb_mode_const_added = 1;
2080+ pstate->dw8.ref_id_cost_mode_select = 1;
2081+
2082+ pstate->dw9.mode0_cost = 7;
2083+ pstate->dw9.mode1_cost = 26;
2084+ pstate->dw9.mode2_cost = 30;
2085+ pstate->dw9.mode3_cost = 57;
2086+
2087+ pstate->dw10.mode4_cost = 8;
2088+ pstate->dw10.mode5_cost = 2;
2089+ pstate->dw10.mode6_cost = 4;
2090+ pstate->dw10.mode7_cost = 6;
2091+
2092+ pstate->dw11.mode8_cost = 5;
2093+ pstate->dw11.mode9_cost = 0;
2094+ pstate->dw11.ref_id_cost = 4;
2095+ pstate->dw11.chroma_intra_mode_cost = 0;
2096+
2097+ pstate->dw12_13.mv_cost.dw0.mv0_cost = 0;
2098+ pstate->dw12_13.mv_cost.dw0.mv1_cost = 6;
2099+ pstate->dw12_13.mv_cost.dw0.mv2_cost = 6;
2100+ pstate->dw12_13.mv_cost.dw0.mv3_cost = 9;
2101+ pstate->dw12_13.mv_cost.dw1.mv4_cost = 10;
2102+ pstate->dw12_13.mv_cost.dw1.mv5_cost = 13;
2103+ pstate->dw12_13.mv_cost.dw1.mv6_cost = 14;
2104+ pstate->dw12_13.mv_cost.dw1.mv7_cost = 24;
2105+
2106+ pstate->dw20.penalty_for_intra_16x16_non_dc_prediction = 36;
2107+ pstate->dw20.penalty_for_intra_8x8_non_dc_prediction = 12;
2108+ pstate->dw20.penalty_for_intra_4x4_non_dc_prediction = 4;
2109+
2110+ pstate->dw22.small_mb_size_in_word = 0xff;
2111+ pstate->dw22.large_mb_size_in_word = 0xff;
2112+
2113+ pstate->dw27.max_hmv_r = 0x2000;
2114+ pstate->dw27.max_vmv_r = 0x200;
2115+
2116+ pstate->dw31.offset0_for_zone0_neg_zone1_boundary = 800;
2117+
2118+ pstate->dw32.offset1_for_zone1_neg_zone2_boundary = 1600;
2119+ pstate->dw32.offset2_for_zone2_neg_zone3_boundary = 2400;
2120+
2121+ pstate->dw33.qp_range_check_upper_bound = 0x33;
2122+ pstate->dw33.qp_range_check_lower_bound = 0x0a;
2123+ pstate->dw33.qp_range_check_value = 0x0f;
2124+
2125+ pstate->dw34.midpoint_distortion = 0x640;
2126+ }
2127+
2128+ /* ROI will be updated in HuC kernel for CBR/VBR */
2129+ if (!vdenc_context->brc_enabled && vdenc_context->num_roi) {
2130+ pstate->dw34.roi_enable = 1;
2131+
2132+ pstate->dw30.roi_qp_adjustment_for_zone1 = CLAMP(-8, 7, vdenc_context->roi[0].value);
2133+
2134+ if (vdenc_context->num_roi > 1)
2135+ pstate->dw30.roi_qp_adjustment_for_zone2 = CLAMP(-8, 7, vdenc_context->roi[1].value);
2136+
2137+ if (vdenc_context->num_roi > 2)
2138+ pstate->dw30.roi_qp_adjustment_for_zone3 = CLAMP(-8, 7, vdenc_context->roi[2].value);
2139+ }
2140+
2141+ pstate->dw1.transform_8x8_flag = vdenc_context->transform_8x8_mode_enable;
2142+
2143+ pstate->dw3.picture_width = vdenc_context->frame_width_in_mbs;
2144+
2145+ pstate->dw4.forward_transform_skip_check_enable = 1; /* TODO: double-check it */
2146+
2147+ pstate->dw5.picture_height_minus1 = vdenc_context->frame_height_in_mbs - 1;
2148+ pstate->dw5.picture_type = vdenc_context->frame_type;
2149+ pstate->dw5.constrained_intra_prediction_flag = pic_param->pic_fields.bits.constrained_intra_pred_flag;
2150+
2151+ if (vdenc_context->frame_type == VDENC_FRAME_P) {
2152+ pstate->dw5.hme_ref1_disable = vdenc_context->num_refs[0] == 1 ? 1 : 0;
2153+ }
2154+
2155+ pstate->dw5.mb_slice_threshold_value = 0;
2156+
2157+ pstate->dw6.slice_macroblock_height_minus1 = vdenc_context->frame_height_in_mbs - 1; /* single slice onlye */
2158+
2159+ if (pstate->dw1.transform_8x8_flag)
2160+ pstate->dw8.luma_intra_partition_mask = 0;
2161+ else
2162+ pstate->dw8.luma_intra_partition_mask = (1 << 1); /* disable transform_8x8 */
2163+
2164+ pstate->dw14.qp_prime_y = pic_param->pic_init_qp + slice_param->slice_qp_delta; /* TODO: check whether it is OK to use the first slice only */
2165+
2166+ if (update_cost) {
2167+ pstate->dw9.mode0_cost = vdenc_context->mode_cost[0];
2168+ pstate->dw9.mode1_cost = vdenc_context->mode_cost[1];
2169+ pstate->dw9.mode2_cost = vdenc_context->mode_cost[2];
2170+ pstate->dw9.mode3_cost = vdenc_context->mode_cost[3];
2171+
2172+ pstate->dw10.mode4_cost = vdenc_context->mode_cost[4];
2173+ pstate->dw10.mode5_cost = vdenc_context->mode_cost[5];
2174+ pstate->dw10.mode6_cost = vdenc_context->mode_cost[6];
2175+ pstate->dw10.mode7_cost = vdenc_context->mode_cost[7];
2176+
2177+ pstate->dw11.mode8_cost = vdenc_context->mode_cost[8];
2178+ pstate->dw11.mode9_cost = vdenc_context->mode_cost[9];
2179+ pstate->dw11.ref_id_cost = vdenc_context->mode_cost[10];
2180+ pstate->dw11.chroma_intra_mode_cost = vdenc_context->mode_cost[11];
2181+
2182+ pstate->dw12_13.mv_cost.dw0.mv0_cost = vdenc_context->mv_cost[0];
2183+ pstate->dw12_13.mv_cost.dw0.mv1_cost = vdenc_context->mv_cost[1];
2184+ pstate->dw12_13.mv_cost.dw0.mv2_cost = vdenc_context->mv_cost[2];
2185+ pstate->dw12_13.mv_cost.dw0.mv3_cost = vdenc_context->mv_cost[3];
2186+ pstate->dw12_13.mv_cost.dw1.mv4_cost = vdenc_context->mv_cost[4];
2187+ pstate->dw12_13.mv_cost.dw1.mv5_cost = vdenc_context->mv_cost[5];
2188+ pstate->dw12_13.mv_cost.dw1.mv6_cost = vdenc_context->mv_cost[6];
2189+ pstate->dw12_13.mv_cost.dw1.mv7_cost = vdenc_context->mv_cost[7];
2190+
2191+ pstate->dw28_29.hme_mv_cost.dw0.mv0_cost = vdenc_context->hme_mv_cost[0];
2192+ pstate->dw28_29.hme_mv_cost.dw0.mv1_cost = vdenc_context->hme_mv_cost[1];
2193+ pstate->dw28_29.hme_mv_cost.dw0.mv2_cost = vdenc_context->hme_mv_cost[2];
2194+ pstate->dw28_29.hme_mv_cost.dw0.mv3_cost = vdenc_context->hme_mv_cost[3];
2195+ pstate->dw28_29.hme_mv_cost.dw1.mv4_cost = vdenc_context->hme_mv_cost[4];
2196+ pstate->dw28_29.hme_mv_cost.dw1.mv5_cost = vdenc_context->hme_mv_cost[5];
2197+ pstate->dw28_29.hme_mv_cost.dw1.mv6_cost = vdenc_context->hme_mv_cost[6];
2198+ pstate->dw28_29.hme_mv_cost.dw1.mv7_cost = vdenc_context->hme_mv_cost[7];
2199+ }
2200+
2201+ pstate->dw27.max_vmv_r = gen9_vdenc_get_max_vmv_range(seq_param->level_idc);
2202+
2203+ pstate->dw34.image_state_qp_override = (vdenc_context->internal_rate_mode == I965_BRC_CQP) ? 1 : 0;
2204+
2205+ /* TODO: check rolling I */
2206+
2207+ /* TODO: handle ROI */
2208+
2209+ /* TODO: check stream in support */
2210+}
2211+
2212+static void
2213+gen9_vdenc_init_img_states(VADriverContextP ctx,
2214+ struct encode_state *encode_state,
2215+ struct intel_encoder_context *encoder_context)
2216+{
2217+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2218+ struct gen9_mfx_avc_img_state *mfx_img_cmd;
2219+ struct gen9_vdenc_img_state *vdenc_img_cmd;
2220+ char *pbuffer;
2221+
2222+ pbuffer = i965_map_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
2223+
2224+ mfx_img_cmd = (struct gen9_mfx_avc_img_state *)pbuffer;
2225+ gen9_vdenc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, mfx_img_cmd);
2226+ pbuffer += sizeof(*mfx_img_cmd);
2227+
2228+ vdenc_img_cmd = (struct gen9_vdenc_img_state *)pbuffer;
2229+ gen9_vdenc_init_vdenc_img_state(ctx, encode_state, encoder_context, vdenc_img_cmd, 0);
2230+ pbuffer += sizeof(*vdenc_img_cmd);
2231+
2232+ /* Add batch buffer end command */
2233+ *((unsigned int *)pbuffer) = MI_BATCH_BUFFER_END;
2234+
2235+ i965_unmap_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
2236+}
2237+
2238+static void
2239+gen9_vdenc_huc_brc_update_constant_data(VADriverContextP ctx,
2240+ struct encode_state *encode_state,
2241+ struct intel_encoder_context *encoder_context)
2242+{
2243+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2244+ char *pbuffer;
2245+
2246+ pbuffer = i965_map_gpe_resource(&vdenc_context->brc_constant_data_res);
2247+
2248+ if (vdenc_context->internal_rate_mode == I965_BRC_VBR) {
2249+ memcpy(gen9_brc_update_constant_data.dist_qp_adj_tab_i, dist_qp_adj_tab_i_vbr, sizeof(dist_qp_adj_tab_i_vbr));
2250+ memcpy(gen9_brc_update_constant_data.dist_qp_adj_tab_p, dist_qp_adj_tab_p_vbr, sizeof(dist_qp_adj_tab_p_vbr));
2251+ memcpy(gen9_brc_update_constant_data.dist_qp_adj_tab_b, dist_qp_adj_tab_b_vbr, sizeof(dist_qp_adj_tab_b_vbr));
2252+ memcpy(gen9_brc_update_constant_data.buf_rate_adj_tab_i, buf_rate_adj_tab_i_vbr, sizeof(buf_rate_adj_tab_i_vbr));
2253+ memcpy(gen9_brc_update_constant_data.buf_rate_adj_tab_p, buf_rate_adj_tab_p_vbr, sizeof(buf_rate_adj_tab_p_vbr));
2254+ memcpy(gen9_brc_update_constant_data.buf_rate_adj_tab_b, buf_rate_adj_tab_b_vbr, sizeof(buf_rate_adj_tab_b_vbr));
2255+ }
2256+
2257+ memcpy(pbuffer, &gen9_brc_update_constant_data, sizeof(gen9_brc_update_constant_data));
2258+
2259+ i965_unmap_gpe_resource(&vdenc_context->brc_constant_data_res);
2260+}
2261+
2262+static void
2263+gen9_vdenc_huc_brc_update(VADriverContextP ctx,
2264+ struct encode_state *encode_state,
2265+ struct intel_encoder_context *encoder_context)
2266+{
2267+ struct intel_batchbuffer *batch = encoder_context->base.batch;
2268+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2269+ struct huc_pipe_mode_select_parameter pipe_mode_select_params;
2270+ struct huc_imem_state_parameter imem_state_params;
2271+ struct huc_dmem_state_parameter dmem_state_params;
2272+ struct huc_virtual_addr_parameter virtual_addr_params;
2273+ struct huc_ind_obj_base_addr_parameter ind_obj_base_addr_params;
2274+ struct huc_stream_object_parameter stream_object_params;
2275+ struct huc_start_parameter start_params;
2276+ struct vd_pipeline_flush_parameter pipeline_flush_params;
2277+ struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
2278+ struct gpe_mi_store_data_imm_parameter mi_store_data_imm_params;
2279+ struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
2280+
2281+ memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
2282+ mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
2283+ gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
2284+
2285+ if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset) {
2286+ struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
2287+
2288+ memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
2289+ mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status2_res.bo;
2290+ gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
2291+ }
2292+
2293+ gen9_vdenc_init_img_states(ctx, encode_state, encoder_context);
2294+
2295+ memset(&imem_state_params, 0, sizeof(imem_state_params));
2296+ imem_state_params.huc_firmware_descriptor = HUC_BRC_UPDATE;
2297+ gen9_vdenc_huc_imem_state(ctx, encoder_context, &imem_state_params);
2298+
2299+ memset(&pipe_mode_select_params, 0, sizeof(pipe_mode_select_params));
2300+ gen9_vdenc_huc_pipe_mode_select(ctx, encoder_context, &pipe_mode_select_params);
2301+
2302+ gen9_vdenc_update_huc_update_dmem(ctx, encoder_context);
2303+ memset(&dmem_state_params, 0, sizeof(dmem_state_params));
2304+ dmem_state_params.huc_data_source_res = &vdenc_context->brc_update_dmem_res[vdenc_context->current_pass];
2305+ dmem_state_params.huc_data_destination_base_address = HUC_DMEM_DATA_OFFSET;
2306+ dmem_state_params.huc_data_length = ALIGN(sizeof(struct huc_brc_update_dmem), 64);
2307+ gen9_vdenc_huc_dmem_state(ctx, encoder_context, &dmem_state_params);
2308+
2309+ gen9_vdenc_huc_brc_update_constant_data(ctx, encode_state, encoder_context);
2310+ memset(&virtual_addr_params, 0, sizeof(virtual_addr_params));
2311+ virtual_addr_params.regions[0].huc_surface_res = &vdenc_context->brc_history_buffer_res;
2312+ virtual_addr_params.regions[0].is_target = 1;
2313+ virtual_addr_params.regions[1].huc_surface_res = &vdenc_context->vdenc_statistics_res;
2314+ virtual_addr_params.regions[2].huc_surface_res = &vdenc_context->pak_statistics_res;
2315+ virtual_addr_params.regions[3].huc_surface_res = &vdenc_context->vdenc_avc_image_state_res;
2316+ virtual_addr_params.regions[4].huc_surface_res = &vdenc_context->hme_detection_summary_buffer_res;
2317+ virtual_addr_params.regions[4].is_target = 1;
2318+ virtual_addr_params.regions[5].huc_surface_res = &vdenc_context->brc_constant_data_res;
2319+ virtual_addr_params.regions[6].huc_surface_res = &vdenc_context->second_level_batch_res;
2320+ virtual_addr_params.regions[6].is_target = 1;
2321+ gen9_vdenc_huc_virtual_addr_state(ctx, encoder_context, &virtual_addr_params);
2322+
2323+ memset(&ind_obj_base_addr_params, 0, sizeof(ind_obj_base_addr_params));
2324+ ind_obj_base_addr_params.huc_indirect_stream_in_object_res = &vdenc_context->huc_dummy_res;
2325+ ind_obj_base_addr_params.huc_indirect_stream_out_object_res = NULL;
2326+ gen9_vdenc_huc_ind_obj_base_addr_state(ctx, encoder_context, &ind_obj_base_addr_params);
2327+
2328+ memset(&stream_object_params, 0, sizeof(stream_object_params));
2329+ stream_object_params.indirect_stream_in_data_length = 1;
2330+ stream_object_params.indirect_stream_in_start_address = 0;
2331+ gen9_vdenc_huc_stream_object(ctx, encoder_context, &stream_object_params);
2332+
2333+ gen9_vdenc_huc_store_huc_status2(ctx, encoder_context);
2334+
2335+ memset(&start_params, 0, sizeof(start_params));
2336+ start_params.last_stream_object = 1;
2337+ gen9_vdenc_huc_start(ctx, encoder_context, &start_params);
2338+
2339+ memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
2340+ pipeline_flush_params.hevc_pipeline_done = 1;
2341+ pipeline_flush_params.hevc_pipeline_command_flush = 1;
2342+ gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
2343+
2344+ memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
2345+ mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
2346+ gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
2347+
2348+ /* Store HUC_STATUS */
2349+ memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
2350+ mi_store_register_mem_params.mmio_offset = VCS0_HUC_STATUS;
2351+ mi_store_register_mem_params.bo = vdenc_context->huc_status_res.bo;
2352+ gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
2353+
2354+ /* Write HUC_STATUS mask (1 << 31) */
2355+ memset(&mi_store_data_imm_params, 0, sizeof(mi_store_data_imm_params));
2356+ mi_store_data_imm_params.bo = vdenc_context->huc_status_res.bo;
2357+ mi_store_data_imm_params.offset = 4;
2358+ mi_store_data_imm_params.dw0 = (1 << 31);
2359+ gen9_gpe_mi_store_data_imm(ctx, batch, &mi_store_data_imm_params);
2360+}
2361+
2362+static void
2363+gen9_vdenc_mfx_pipe_mode_select(VADriverContextP ctx,
2364+ struct encode_state *encode_state,
2365+ struct intel_encoder_context *encoder_context)
2366+{
2367+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2368+ struct intel_batchbuffer *batch = encoder_context->base.batch;
2369+
2370+ BEGIN_BCS_BATCH(batch, 5);
2371+
2372+ OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
2373+ OUT_BCS_BATCH(batch,
2374+ (1 << 29) |
2375+ (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
2376+ (MFD_MODE_VLD << 15) |
2377+ (1 << 13) | /* VDEnc mode */
2378+ ((!!vdenc_context->post_deblocking_output_res.bo) << 9) | /* Post Deblocking Output */
2379+ ((!!vdenc_context->pre_deblocking_output_res.bo) << 8) | /* Pre Deblocking Output */
2380+ (1 << 7) | /* Scaled surface enable */
2381+ (1 << 6) | /* Frame statistics stream out enable, always '1' in VDEnc mode */
2382+ (1 << 4) | /* encoding mode */
2383+ (MFX_FORMAT_AVC << 0));
2384+ OUT_BCS_BATCH(batch, 0);
2385+ OUT_BCS_BATCH(batch, 0);
2386+ OUT_BCS_BATCH(batch, 0);
2387+
2388+ ADVANCE_BCS_BATCH(batch);
2389+}
2390+
2391+static void
2392+gen9_vdenc_mfx_surface_state(VADriverContextP ctx,
2393+ struct intel_encoder_context *encoder_context,
2394+ struct i965_gpe_resource *gpe_resource,
2395+ int id)
2396+{
2397+ struct intel_batchbuffer *batch = encoder_context->base.batch;
2398+
2399+ BEGIN_BCS_BATCH(batch, 6);
2400+
2401+ OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
2402+ OUT_BCS_BATCH(batch, id);
2403+ OUT_BCS_BATCH(batch,
2404+ ((gpe_resource->height - 1) << 18) |
2405+ ((gpe_resource->width - 1) << 4));
2406+ OUT_BCS_BATCH(batch,
2407+ (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
2408+ (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
2409+ ((gpe_resource->pitch - 1) << 3) | /* pitch */
2410+ (0 << 2) | /* must be 0 for interleave U/V */
2411+ (1 << 1) | /* must be tiled */
2412+ (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
2413+ OUT_BCS_BATCH(batch,
2414+ (0 << 16) | /* must be 0 for interleave U/V */
2415+ (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
2416+ OUT_BCS_BATCH(batch,
2417+ (0 << 16) | /* must be 0 for interleave U/V */
2418+ (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
2419+
2420+ ADVANCE_BCS_BATCH(batch);
2421+}
2422+
2423+static void
2424+gen9_vdenc_mfx_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2425+{
2426+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2427+ struct intel_batchbuffer *batch = encoder_context->base.batch;
2428+ int i;
2429+
2430+ BEGIN_BCS_BATCH(batch, 65);
2431+
2432+ OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (65 - 2));
2433+
2434+ /* the DW1-3 is for pre_deblocking */
2435+ OUT_BUFFER_3DW(batch, vdenc_context->pre_deblocking_output_res.bo, 1, 0, 0);
2436+
2437+ /* the DW4-6 is for the post_deblocking */
2438+ OUT_BUFFER_3DW(batch, vdenc_context->post_deblocking_output_res.bo, 1, 0, 0);
2439+
2440+ /* the DW7-9 is for the uncompressed_picture */
2441+ OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2442+
2443+ /* the DW10-12 is for PAK information (write) */
2444+ OUT_BUFFER_3DW(batch, vdenc_context->pak_statistics_res.bo, 1, 0, 0);
2445+
2446+ /* the DW13-15 is for the intra_row_store_scratch */
2447+ OUT_BUFFER_3DW(batch, vdenc_context->mfx_intra_row_store_scratch_res.bo, 1, 0, 0);
2448+
2449+ /* the DW16-18 is for the deblocking filter */
2450+ OUT_BUFFER_3DW(batch, vdenc_context->mfx_deblocking_filter_row_store_scratch_res.bo, 1, 0, 0);
2451+
2452+ /* the DW 19-50 is for Reference pictures*/
2453+ for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
2454+ OUT_BUFFER_2DW(batch, vdenc_context->list_reference_res[i].bo, 0, 0);
2455+ }
2456+
2457+ /* DW 51, reference picture attributes */
2458+ OUT_BCS_BATCH(batch, 0);
2459+
2460+ /* The DW 52-54 is for PAK information (read) */
2461+ OUT_BUFFER_3DW(batch, vdenc_context->pak_statistics_res.bo, 0, 0, 0);
2462+
2463+ /* the DW 55-57 is the ILDB buffer */
2464+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2465+
2466+ /* the DW 58-60 is the second ILDB buffer */
2467+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2468+
2469+ /* DW 61, memory compress enable & mode */
2470+ OUT_BCS_BATCH(batch, 0);
2471+
2472+ /* the DW 62-64 is the 4x Down Scaling surface */
2473+ OUT_BUFFER_3DW(batch, vdenc_context->scaled_4x_recon_surface_res.bo, 0, 0, 0);
2474+
2475+ ADVANCE_BCS_BATCH(batch);
2476+}
2477+
2478+static void
2479+gen9_vdenc_mfx_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2480+{
2481+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2482+ struct intel_batchbuffer *batch = encoder_context->base.batch;
2483+
2484+ BEGIN_BCS_BATCH(batch, 26);
2485+
2486+ OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
2487+ /* The DW1-5 is for the MFX indirect bistream offset, ignore for VDEnc mode */
2488+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2489+ OUT_BUFFER_2DW(batch, NULL, 0, 0);
2490+
2491+ /* the DW6-10 is for MFX Indirect MV Object Base Address, ignore for VDEnc mode */
2492+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2493+ OUT_BUFFER_2DW(batch, NULL, 0, 0);
2494+
2495+ /* The DW11-15 is for MFX IT-COFF. Not used on encoder */
2496+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2497+ OUT_BUFFER_2DW(batch, NULL, 0, 0);
2498+
2499+ /* The DW16-20 is for MFX indirect DBLK. Not used on encoder */
2500+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2501+ OUT_BUFFER_2DW(batch, NULL, 0, 0);
2502+
2503+ /* The DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder
2504+ * Note: an offset is specified in MFX_AVC_SLICE_STATE
2505+ */
2506+ OUT_BUFFER_3DW(batch,
2507+ vdenc_context->compressed_bitstream.res.bo,
2508+ 1,
2509+ 0,
2510+ 0);
2511+ OUT_BUFFER_2DW(batch,
2512+ vdenc_context->compressed_bitstream.res.bo,
2513+ 1,
2514+ vdenc_context->compressed_bitstream.end_offset);
2515+
2516+ ADVANCE_BCS_BATCH(batch);
2517+}
2518+
2519+static void
2520+gen9_vdenc_mfx_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2521+{
2522+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2523+ struct intel_batchbuffer *batch = encoder_context->base.batch;
2524+
2525+ BEGIN_BCS_BATCH(batch, 10);
2526+
2527+ OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
2528+
2529+ /* The DW1-3 is for bsd/mpc row store scratch buffer */
2530+ OUT_BUFFER_3DW(batch, vdenc_context->mfx_bsd_mpc_row_store_scratch_res.bo, 1, 0, 0);
2531+
2532+ /* The DW4-6 is for MPR Row Store Scratch Buffer Base Address, ignore for encoder */
2533+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2534+
2535+ /* The DW7-9 is for Bitplane Read Buffer Base Address, ignore for encoder */
2536+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2537+
2538+ ADVANCE_BCS_BATCH(batch);
2539+}
2540+
2541+static void
2542+gen9_vdenc_mfx_qm_state(VADriverContextP ctx,
2543+ int qm_type,
2544+ unsigned int *qm,
2545+ int qm_length,
2546+ struct intel_encoder_context *encoder_context)
2547+{
2548+ struct intel_batchbuffer *batch = encoder_context->base.batch;
2549+ unsigned int qm_buffer[16];
2550+
2551+ assert(qm_length <= 16);
2552+ assert(sizeof(*qm) == 4);
2553+ memcpy(qm_buffer, qm, qm_length * 4);
2554+
2555+ BEGIN_BCS_BATCH(batch, 18);
2556+ OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
2557+ OUT_BCS_BATCH(batch, qm_type << 0);
2558+ intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
2559+ ADVANCE_BCS_BATCH(batch);
2560+}
2561+
2562+static void
2563+gen9_vdenc_mfx_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2564+{
2565+ /* TODO: add support for non flat matrix */
2566+ unsigned int qm[16] = {
2567+ 0x10101010, 0x10101010, 0x10101010, 0x10101010,
2568+ 0x10101010, 0x10101010, 0x10101010, 0x10101010,
2569+ 0x10101010, 0x10101010, 0x10101010, 0x10101010,
2570+ 0x10101010, 0x10101010, 0x10101010, 0x10101010
2571+ };
2572+
2573+ gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
2574+ gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
2575+ gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
2576+ gen9_vdenc_mfx_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
2577+}
2578+
2579+static void
2580+gen9_vdenc_mfx_fqm_state(VADriverContextP ctx,
2581+ int fqm_type,
2582+ unsigned int *fqm,
2583+ int fqm_length,
2584+ struct intel_encoder_context *encoder_context)
2585+{
2586+ struct intel_batchbuffer *batch = encoder_context->base.batch;
2587+ unsigned int fqm_buffer[32];
2588+
2589+ assert(fqm_length <= 32);
2590+ assert(sizeof(*fqm) == 4);
2591+ memcpy(fqm_buffer, fqm, fqm_length * 4);
2592+
2593+ BEGIN_BCS_BATCH(batch, 34);
2594+ OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
2595+ OUT_BCS_BATCH(batch, fqm_type << 0);
2596+ intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
2597+ ADVANCE_BCS_BATCH(batch);
2598+}
2599+
2600+static void
2601+gen9_vdenc_mfx_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
2602+{
2603+ /* TODO: add support for non flat matrix */
2604+ unsigned int qm[32] = {
2605+ 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2606+ 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2607+ 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2608+ 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2609+ 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2610+ 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2611+ 0x10001000, 0x10001000, 0x10001000, 0x10001000,
2612+ 0x10001000, 0x10001000, 0x10001000, 0x10001000
2613+ };
2614+
2615+ gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
2616+ gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
2617+ gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
2618+ gen9_vdenc_mfx_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
2619+}
2620+
2621+static void
2622+gen9_vdenc_mfx_avc_img_state(VADriverContextP ctx,
2623+ struct encode_state *encode_state,
2624+ struct intel_encoder_context *encoder_context)
2625+{
2626+ struct intel_batchbuffer *batch = encoder_context->base.batch;
2627+ struct gen9_mfx_avc_img_state mfx_img_cmd;
2628+
2629+ gen9_vdenc_init_mfx_avc_img_state(ctx, encode_state, encoder_context, &mfx_img_cmd);
2630+
2631+ BEGIN_BCS_BATCH(batch, (sizeof(mfx_img_cmd) >> 2));
2632+ intel_batchbuffer_data(batch, &mfx_img_cmd, sizeof(mfx_img_cmd));
2633+ ADVANCE_BCS_BATCH(batch);
2634+}
2635+
2636+static void
2637+gen9_vdenc_vdenc_pipe_mode_select(VADriverContextP ctx,
2638+ struct encode_state *encode_state,
2639+ struct intel_encoder_context *encoder_context)
2640+{
2641+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2642+ struct intel_batchbuffer *batch = encoder_context->base.batch;
2643+
2644+ BEGIN_BCS_BATCH(batch, 2);
2645+
2646+ OUT_BCS_BATCH(batch, VDENC_PIPE_MODE_SELECT | (2 - 2));
2647+ OUT_BCS_BATCH(batch,
2648+ (vdenc_context->vdenc_streamin_enable << 9) |
2649+ (vdenc_context->vdenc_pak_threshold_check_enable << 8) |
2650+ (1 << 7) | /* Tlb prefetch enable */
2651+ (1 << 5) | /* Frame Statistics Stream-Out Enable */
2652+ (VDENC_CODEC_AVC << 0));
2653+
2654+ ADVANCE_BCS_BATCH(batch);
2655+}
2656+
2657+static void
2658+gen9_vdenc_vdenc_surface_state(VADriverContextP ctx,
2659+ struct intel_encoder_context *encoder_context,
2660+ struct i965_gpe_resource *gpe_resource,
2661+ int vdenc_surface_cmd)
2662+{
2663+ struct intel_batchbuffer *batch = encoder_context->base.batch;
2664+
2665+ BEGIN_BCS_BATCH(batch, 6);
2666+
2667+ OUT_BCS_BATCH(batch, vdenc_surface_cmd | (6 - 2));
2668+ OUT_BCS_BATCH(batch, 0);
2669+ OUT_BCS_BATCH(batch,
2670+ ((gpe_resource->height - 1) << 18) |
2671+ ((gpe_resource->width - 1) << 4));
2672+ OUT_BCS_BATCH(batch,
2673+ (VDENC_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface only on SKL */
2674+ (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
2675+ ((gpe_resource->pitch - 1) << 3) | /* pitch */
2676+ (0 << 2) | /* must be 0 for interleave U/V */
2677+ (1 << 1) | /* must be tiled */
2678+ (I965_TILEWALK_YMAJOR << 0)); /* tile walk, TILEWALK_YMAJOR */
2679+ OUT_BCS_BATCH(batch,
2680+ (0 << 16) | /* must be 0 for interleave U/V */
2681+ (gpe_resource->y_cb_offset)); /* y offset for U(cb) */
2682+ OUT_BCS_BATCH(batch,
2683+ (0 << 16) | /* must be 0 for interleave U/V */
2684+ (gpe_resource->y_cb_offset)); /* y offset for v(cr) */
2685+
2686+ ADVANCE_BCS_BATCH(batch);
2687+}
2688+
2689+static void
2690+gen9_vdenc_vdenc_src_surface_state(VADriverContextP ctx,
2691+ struct intel_encoder_context *encoder_context,
2692+ struct i965_gpe_resource *gpe_resource)
2693+{
2694+ gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_SRC_SURFACE_STATE);
2695+}
2696+
2697+static void
2698+gen9_vdenc_vdenc_ref_surface_state(VADriverContextP ctx,
2699+ struct intel_encoder_context *encoder_context,
2700+ struct i965_gpe_resource *gpe_resource)
2701+{
2702+ gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_REF_SURFACE_STATE);
2703+}
2704+
2705+static void
2706+gen9_vdenc_vdenc_ds_ref_surface_state(VADriverContextP ctx,
2707+ struct intel_encoder_context *encoder_context,
2708+ struct i965_gpe_resource *gpe_resource)
2709+{
2710+ gen9_vdenc_vdenc_surface_state(ctx, encoder_context, gpe_resource, VDENC_DS_REF_SURFACE_STATE);
2711+}
2712+
2713+static void
2714+gen9_vdenc_vdenc_pipe_buf_addr_state(VADriverContextP ctx,
2715+ struct encode_state *encode_state,
2716+ struct intel_encoder_context *encoder_context)
2717+{
2718+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2719+ struct intel_batchbuffer *batch = encoder_context->base.batch;
2720+
2721+ BEGIN_BCS_BATCH(batch, 37);
2722+
2723+ OUT_BCS_BATCH(batch, VDENC_PIPE_BUF_ADDR_STATE | (37 - 2));
2724+
2725+ /* DW1-6 for DS FWD REF0/REF1 */
2726+ OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2727+ OUT_BUFFER_3DW(batch, vdenc_context->list_scaled_4x_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2728+
2729+ /* DW7-9 for DS BWD REF0, ignored on SKL */
2730+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2731+
2732+ /* DW10-12 for uncompressed input data */
2733+ OUT_BUFFER_3DW(batch, vdenc_context->uncompressed_input_surface_res.bo, 0, 0, 0);
2734+
2735+ /* DW13-DW15 for streamin data */
2736+ if (vdenc_context->vdenc_streamin_enable)
2737+ OUT_BUFFER_3DW(batch, vdenc_context->vdenc_streamin_res.bo, 0, 0, 0);
2738+ else
2739+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2740+
2741+ /* DW16-DW18 for row scratch buffer */
2742+ OUT_BUFFER_3DW(batch, vdenc_context->vdenc_row_store_scratch_res.bo, 1, 0, 0);
2743+
2744+ /* DW19-DW21, ignored on SKL */
2745+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2746+
2747+ /* DW22-DW27 for FWD REF0/REF1 */
2748+ OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][0]].bo, 0, 0, 0);
2749+ OUT_BUFFER_3DW(batch, vdenc_context->list_reference_res[vdenc_context->list_ref_idx[0][1]].bo, 0, 0, 0);
2750+
2751+ /* DW28-DW30 for FWD REF2, ignored on SKL */
2752+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2753+
2754+ /* DW31-DW33 for BDW REF0, ignored on SKL */
2755+ OUT_BUFFER_3DW(batch, NULL, 0, 0, 0);
2756+
2757+ /* DW34-DW36 for VDEnc statistics streamout */
2758+ OUT_BUFFER_3DW(batch, vdenc_context->vdenc_statistics_res.bo, 1, 0, 0);
2759+
2760+ ADVANCE_BCS_BATCH(batch);
2761+}
2762+
2763+static void
2764+gen9_vdenc_vdenc_const_qpt_state(VADriverContextP ctx,
2765+ struct encode_state *encode_state,
2766+ struct intel_encoder_context *encoder_context)
2767+{
2768+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2769+ struct intel_batchbuffer *batch = encoder_context->base.batch;
2770+
2771+ BEGIN_BCS_BATCH(batch, 61);
2772+
2773+ OUT_BCS_BATCH(batch, VDENC_CONST_QPT_STATE | (61 - 2));
2774+
2775+ if (vdenc_context->frame_type == VDENC_FRAME_I) {
2776+ /* DW1-DW11 */
2777+ intel_batchbuffer_data(batch, vdenc_const_qp_lambda, sizeof(vdenc_const_qp_lambda));
2778+
2779+ /* DW12-DW25 */
2780+ intel_batchbuffer_data(batch, vdenc_const_skip_threshold, sizeof(vdenc_const_skip_threshold));
2781+
2782+ /* DW26-DW39 */
2783+ intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_0, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_0));
2784+
2785+ /* DW40-DW46 */
2786+ intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_1, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_1));
2787+
2788+ /* DW47-DW53 */
2789+ intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_2, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_2));
2790+
2791+ /* DW54-DW60 */
2792+ intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_3, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_3));
2793+ } else {
2794+ int i;
2795+
2796+ for (i = 0; i < 28; i++) {
2797+ vdenc_const_skip_threshold_p[i] *= 3;
2798+ }
2799+
2800+ /* DW1-DW11 */
2801+ intel_batchbuffer_data(batch, vdenc_const_qp_lambda_p, sizeof(vdenc_const_qp_lambda_p));
2802+
2803+ /* DW12-DW25 */
2804+ intel_batchbuffer_data(batch, vdenc_const_skip_threshold_p, sizeof(vdenc_const_skip_threshold_p));
2805+
2806+ /* DW26-DW39 */
2807+ intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_0_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_0_p));
2808+
2809+ /* DW40-DW46 */
2810+ intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_1_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_1_p));
2811+
2812+ /* DW47-DW53 */
2813+ intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_2_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_2_p));
2814+
2815+ /* DW54-DW60 */
2816+ intel_batchbuffer_data(batch, vdenc_const_sic_forward_transform_coeff_threshold_3_p, sizeof(vdenc_const_sic_forward_transform_coeff_threshold_3_p));
2817+ }
2818+
2819+ ADVANCE_BCS_BATCH(batch);
2820+}
2821+
2822+static void
2823+gen9_vdenc_vdenc_walker_state(VADriverContextP ctx,
2824+ struct encode_state *encode_state,
2825+ struct intel_encoder_context *encoder_context)
2826+{
2827+ struct intel_batchbuffer *batch = encoder_context->base.batch;
2828+
2829+ BEGIN_BCS_BATCH(batch, 2);
2830+
2831+ OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (2 - 2));
2832+ OUT_BCS_BATCH(batch, 0); /* All fields are set to 0 */
2833+
2834+ ADVANCE_BCS_BATCH(batch);
2835+}
2836+
2837+static void
2838+gen9_vdenc_vdenc_img_state(VADriverContextP ctx,
2839+ struct encode_state *encode_state,
2840+ struct intel_encoder_context *encoder_context)
2841+{
2842+ struct intel_batchbuffer *batch = encoder_context->base.batch;
2843+ struct gen9_vdenc_img_state vdenc_img_cmd;
2844+
2845+ gen9_vdenc_init_vdenc_img_state(ctx, encode_state, encoder_context, &vdenc_img_cmd, 1);
2846+
2847+ BEGIN_BCS_BATCH(batch, (sizeof(vdenc_img_cmd) >> 2));
2848+ intel_batchbuffer_data(batch, &vdenc_img_cmd, sizeof(vdenc_img_cmd));
2849+ ADVANCE_BCS_BATCH(batch);
2850+}
2851+
2852+extern int
2853+intel_avc_enc_slice_type_fixup(int slice_type);
2854+
2855+static void
2856+gen9_vdenc_mfx_avc_insert_object(VADriverContextP ctx,
2857+ struct intel_encoder_context *encoder_context,
2858+ unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
2859+ int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
2860+ int slice_header_indicator)
2861+{
2862+ struct intel_batchbuffer *batch = encoder_context->base.batch;
2863+
2864+ if (data_bits_in_last_dw == 0)
2865+ data_bits_in_last_dw = 32;
2866+
2867+ BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
2868+
2869+ OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws));
2870+ OUT_BCS_BATCH(batch,
2871+ (0 << 16) | /* always start at offset 0 */
2872+ (slice_header_indicator << 14) |
2873+ (data_bits_in_last_dw << 8) |
2874+ (skip_emul_byte_count << 4) |
2875+ (!!emulation_flag << 3) |
2876+ ((!!is_last_header) << 2) |
2877+ ((!!is_end_of_slice) << 1) |
2878+ (0 << 0)); /* TODO: check this flag */
2879+ intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
2880+
2881+ ADVANCE_BCS_BATCH(batch);
2882+}
2883+
2884+static void
2885+gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
2886+ struct encode_state *encode_state,
2887+ struct intel_encoder_context *encoder_context,
2888+ int slice_index)
2889+{
2890+ VAEncPackedHeaderParameterBuffer *param = NULL;
2891+ unsigned int length_in_bits;
2892+ unsigned int *header_data = NULL;
2893+ int count, i, start_index;
2894+ int slice_header_index;
2895+
2896+ if (encode_state->slice_header_index[slice_index] == 0)
2897+ slice_header_index = -1;
2898+ else
2899+ slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
2900+
2901+ count = encode_state->slice_rawdata_count[slice_index];
2902+ start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
2903+
2904+ for (i = 0; i < count; i++) {
2905+ unsigned int skip_emul_byte_cnt;
2906+
2907+ header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
2908+
2909+ param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[start_index + i]->buffer);
2910+
2911+ /* skip the slice header packed data type as it is lastly inserted */
2912+ if (param->type == VAEncPackedHeaderSlice)
2913+ continue;
2914+
2915+ length_in_bits = param->bit_length;
2916+
2917+ skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2918+
2919+ /* as the slice header is still required, the last header flag is set to
2920+ * zero.
2921+ */
2922+ gen9_vdenc_mfx_avc_insert_object(ctx,
2923+ encoder_context,
2924+ header_data,
2925+ ALIGN(length_in_bits, 32) >> 5,
2926+ length_in_bits & 0x1f,
2927+ skip_emul_byte_cnt,
2928+ 0,
2929+ 0,
2930+ !param->has_emulation_bytes,
2931+ 0);
2932+ }
2933+
2934+ if (slice_header_index == -1) {
2935+ VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
2936+ VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
2937+ VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
2938+ unsigned char *slice_header = NULL;
2939+ int slice_header_length_in_bits = 0;
2940+
2941+ /* No slice header data is passed. And the driver needs to generate it */
2942+ /* For the Normal H264 */
2943+ slice_header_length_in_bits = build_avc_slice_header(seq_param,
2944+ pic_param,
2945+ slice_params,
2946+ &slice_header);
2947+ gen9_vdenc_mfx_avc_insert_object(ctx,
2948+ encoder_context,
2949+ (unsigned int *)slice_header,
2950+ ALIGN(slice_header_length_in_bits, 32) >> 5,
2951+ slice_header_length_in_bits & 0x1f,
2952+ 5, /* first 5 bytes are start code + nal unit type */
2953+ 1, 0, 1,
2954+ 1);
2955+
2956+ free(slice_header);
2957+ } else {
2958+ unsigned int skip_emul_byte_cnt;
2959+
2960+ header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
2961+
2962+ param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
2963+ length_in_bits = param->bit_length;
2964+
2965+ /* as the slice header is the last header data for one slice,
2966+ * the last header flag is set to one.
2967+ */
2968+ skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
2969+
2970+ gen9_vdenc_mfx_avc_insert_object(ctx,
2971+ encoder_context,
2972+ header_data,
2973+ ALIGN(length_in_bits, 32) >> 5,
2974+ length_in_bits & 0x1f,
2975+ skip_emul_byte_cnt,
2976+ 1,
2977+ 0,
2978+ !param->has_emulation_bytes,
2979+ 1);
2980+ }
2981+
2982+ return;
2983+}
2984+
2985+static void
2986+gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
2987+ struct encode_state *encode_state,
2988+ struct intel_encoder_context *encoder_context,
2989+ VAEncSliceParameterBufferH264 *slice_param,
2990+ int slice_index)
2991+{
2992+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
2993+ int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
2994+ unsigned int internal_rate_mode = vdenc_context->internal_rate_mode;
2995+ unsigned int skip_emul_byte_cnt;
2996+
2997+ if (slice_index == 0) {
2998+ if (encode_state->packed_header_data[idx]) {
2999+ VAEncPackedHeaderParameterBuffer *param = NULL;
3000+ unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3001+ unsigned int length_in_bits;
3002+
3003+ assert(encode_state->packed_header_param[idx]);
3004+ param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3005+ length_in_bits = param->bit_length;
3006+
3007+ skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3008+ gen9_vdenc_mfx_avc_insert_object(ctx,
3009+ encoder_context,
3010+ header_data,
3011+ ALIGN(length_in_bits, 32) >> 5,
3012+ length_in_bits & 0x1f,
3013+ skip_emul_byte_cnt,
3014+ 0,
3015+ 0,
3016+ !param->has_emulation_bytes,
3017+ 0);
3018+ }
3019+
3020+ idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
3021+
3022+ if (encode_state->packed_header_data[idx]) {
3023+ VAEncPackedHeaderParameterBuffer *param = NULL;
3024+ unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3025+ unsigned int length_in_bits;
3026+
3027+ assert(encode_state->packed_header_param[idx]);
3028+ param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3029+ length_in_bits = param->bit_length;
3030+
3031+ skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3032+
3033+ gen9_vdenc_mfx_avc_insert_object(ctx,
3034+ encoder_context,
3035+ header_data,
3036+ ALIGN(length_in_bits, 32) >> 5,
3037+ length_in_bits & 0x1f,
3038+ skip_emul_byte_cnt,
3039+ 0,
3040+ 0,
3041+ !param->has_emulation_bytes,
3042+ 0);
3043+ }
3044+
3045+ idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
3046+
3047+ if (encode_state->packed_header_data[idx]) {
3048+ VAEncPackedHeaderParameterBuffer *param = NULL;
3049+ unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
3050+ unsigned int length_in_bits;
3051+
3052+ assert(encode_state->packed_header_param[idx]);
3053+ param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
3054+ length_in_bits = param->bit_length;
3055+
3056+ skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
3057+ gen9_vdenc_mfx_avc_insert_object(ctx,
3058+ encoder_context,
3059+ header_data,
3060+ ALIGN(length_in_bits, 32) >> 5,
3061+ length_in_bits & 0x1f,
3062+ skip_emul_byte_cnt,
3063+ 0,
3064+ 0,
3065+ !param->has_emulation_bytes,
3066+ 0);
3067+ } else if (internal_rate_mode == I965_BRC_CBR) {
3068+ /* TODO: insert others */
3069+ }
3070+ }
3071+
3072+ gen9_vdenc_mfx_avc_insert_slice_packed_data(ctx,
3073+ encode_state,
3074+ encoder_context,
3075+ slice_index);
3076+}
3077+
3078+static void
3079+gen9_vdenc_mfx_avc_slice_state(VADriverContextP ctx,
3080+ struct encode_state *encode_state,
3081+ struct intel_encoder_context *encoder_context,
3082+ VAEncPictureParameterBufferH264 *pic_param,
3083+ VAEncSliceParameterBufferH264 *slice_param,
3084+ VAEncSliceParameterBufferH264 *next_slice_param)
3085+{
3086+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3087+ struct intel_batchbuffer *batch = encoder_context->base.batch;
3088+ unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
3089+ unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
3090+ unsigned char correct[6], grow, shrink;
3091+ int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
3092+ int max_qp_n, max_qp_p;
3093+ int i;
3094+ int weighted_pred_idc = 0;
3095+ int num_ref_l0 = 0, num_ref_l1 = 0;
3096+ int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3097+ int slice_qp = pic_param->pic_init_qp + slice_param->slice_qp_delta; // TODO: fix for CBR&VBR */
3098+
3099+ slice_hor_pos = slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
3100+ slice_ver_pos = slice_param->macroblock_address / vdenc_context->frame_height_in_mbs;
3101+
3102+ if (next_slice_param) {
3103+ next_slice_hor_pos = next_slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
3104+ next_slice_ver_pos = next_slice_param->macroblock_address / vdenc_context->frame_height_in_mbs;
3105+ } else {
3106+ next_slice_hor_pos = 0;
3107+ next_slice_ver_pos = vdenc_context->frame_height_in_mbs;
3108+ }
3109+
3110+ if (slice_type == SLICE_TYPE_I) {
3111+ luma_log2_weight_denom = 0;
3112+ chroma_log2_weight_denom = 0;
3113+ } else if (slice_type == SLICE_TYPE_P) {
3114+ weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
3115+ num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
3116+
3117+ if (slice_param->num_ref_idx_active_override_flag)
3118+ num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3119+ } else if (slice_type == SLICE_TYPE_B) {
3120+ weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
3121+ num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
3122+ num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
3123+
3124+ if (slice_param->num_ref_idx_active_override_flag) {
3125+ num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
3126+ num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
3127+ }
3128+
3129+ if (weighted_pred_idc == 2) {
3130+ /* 8.4.3 - Derivation process for prediction weights (8-279) */
3131+ luma_log2_weight_denom = 5;
3132+ chroma_log2_weight_denom = 5;
3133+ }
3134+ }
3135+
3136+ max_qp_n = 0; /* TODO: update it */
3137+ max_qp_p = 0; /* TODO: update it */
3138+ grow = 0; /* TODO: update it */
3139+ shrink = 0; /* TODO: update it */
3140+
3141+ for (i = 0; i < 6; i++)
3142+ correct[i] = 0; /* TODO: update it */
3143+
3144+ BEGIN_BCS_BATCH(batch, 11);
3145+
3146+ OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
3147+ OUT_BCS_BATCH(batch, slice_type);
3148+ OUT_BCS_BATCH(batch,
3149+ (num_ref_l0 << 16) |
3150+ (num_ref_l1 << 24) |
3151+ (chroma_log2_weight_denom << 8) |
3152+ (luma_log2_weight_denom << 0));
3153+ OUT_BCS_BATCH(batch,
3154+ (weighted_pred_idc << 30) |
3155+ (slice_param->direct_spatial_mv_pred_flag << 29) |
3156+ (slice_param->disable_deblocking_filter_idc << 27) |
3157+ (slice_param->cabac_init_idc << 24) |
3158+ (slice_qp << 16) |
3159+ ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
3160+ ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
3161+
3162+ OUT_BCS_BATCH(batch,
3163+ slice_ver_pos << 24 |
3164+ slice_hor_pos << 16 |
3165+ slice_param->macroblock_address);
3166+ OUT_BCS_BATCH(batch,
3167+ next_slice_ver_pos << 16 |
3168+ next_slice_hor_pos);
3169+
3170+ OUT_BCS_BATCH(batch,
3171+ (0 << 31) | /* TODO: ignore it for VDENC ??? */
3172+ (!slice_param->macroblock_address << 30) | /* ResetRateControlCounter */
3173+ (2 << 28) | /* Loose Rate Control */
3174+ (0 << 24) | /* RC Stable Tolerance */
3175+ (0 << 23) | /* RC Panic Enable */
3176+ (1 << 22) | /* CBP mode */
3177+ (0 << 21) | /* MB Type Direct Conversion, 0: Enable, 1: Disable */
3178+ (0 << 20) | /* MB Type Skip Conversion, 0: Enable, 1: Disable */
3179+ (!next_slice_param << 19) | /* Is Last Slice */
3180+ (0 << 18) | /* BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable */
3181+ (1 << 17) | /* HeaderPresentFlag */
3182+ (1 << 16) | /* SliceData PresentFlag */
3183+ (0 << 15) | /* TailPresentFlag, TODO: check it on VDEnc */
3184+ (1 << 13) | /* RBSP NAL TYPE */
3185+ (1 << 12)); /* CabacZeroWordInsertionEnable */
3186+
3187+ OUT_BCS_BATCH(batch, vdenc_context->compressed_bitstream.start_offset);
3188+
3189+ OUT_BCS_BATCH(batch,
3190+ (max_qp_n << 24) | /*Target QP - 24 is lowest QP*/
3191+ (max_qp_p << 16) | /*Target QP + 20 is highest QP*/
3192+ (shrink << 8) |
3193+ (grow << 0));
3194+ OUT_BCS_BATCH(batch,
3195+ (1 << 31) |
3196+ (3 << 28) |
3197+ (1 << 27) |
3198+ (5 << 24) |
3199+ (correct[5] << 20) |
3200+ (correct[4] << 16) |
3201+ (correct[3] << 12) |
3202+ (correct[2] << 8) |
3203+ (correct[1] << 4) |
3204+ (correct[0] << 0));
3205+ OUT_BCS_BATCH(batch, 0);
3206+
3207+ ADVANCE_BCS_BATCH(batch);
3208+}
3209+
3210+static uint8_t
3211+gen9_vdenc_mfx_get_ref_idx_state(VAPictureH264 *va_pic, unsigned int frame_store_id)
3212+{
3213+ unsigned int is_long_term =
3214+ !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
3215+ unsigned int is_top_field =
3216+ !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
3217+ unsigned int is_bottom_field =
3218+ !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
3219+
3220+ return ((is_long_term << 6) |
3221+ ((is_top_field ^ is_bottom_field ^ 1) << 5) |
3222+ (frame_store_id << 1) |
3223+ ((is_top_field ^ 1) & is_bottom_field));
3224+}
3225+
3226+static void
3227+gen9_vdenc_mfx_avc_ref_idx_state(VADriverContextP ctx,
3228+ struct encode_state *encode_state,
3229+ struct intel_encoder_context *encoder_context,
3230+ VAEncSliceParameterBufferH264 *slice_param)
3231+{
3232+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3233+ struct intel_batchbuffer *batch = encoder_context->base.batch;
3234+ VAPictureH264 *ref_pic;
3235+ int i, slice_type, ref_idx_shift;
3236+ unsigned int fwd_ref_entry;
3237+
3238+ fwd_ref_entry = 0x80808080;
3239+ slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3240+
3241+ for (i = 0; i < MAX(vdenc_context->num_refs[0], 2); i++) {
3242+ ref_pic = &slice_param->RefPicList0[i];
3243+ ref_idx_shift = vdenc_context->list_ref_idx[0][i] * 8;
3244+
3245+ fwd_ref_entry &= ~(0xFF << ref_idx_shift);
3246+ fwd_ref_entry += (gen9_vdenc_mfx_get_ref_idx_state(ref_pic, vdenc_context->list_ref_idx[0][i]) << ref_idx_shift);
3247+ }
3248+
3249+ if (slice_type == SLICE_TYPE_P) {
3250+ BEGIN_BCS_BATCH(batch, 10);
3251+ OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
3252+ OUT_BCS_BATCH(batch, 0); // L0
3253+ OUT_BCS_BATCH(batch, fwd_ref_entry);
3254+
3255+ for (i = 0; i < 7; i++) {
3256+ OUT_BCS_BATCH(batch, 0x80808080);
3257+ }
3258+
3259+ ADVANCE_BCS_BATCH(batch);
3260+ }
3261+
3262+ if (slice_type == SLICE_TYPE_B) {
3263+ /* VDEnc on SKL doesn't support BDW */
3264+ assert(0);
3265+ }
3266+}
3267+
3268+static void
3269+gen9_vdenc_mfx_avc_weightoffset_state(VADriverContextP ctx,
3270+ struct encode_state *encode_state,
3271+ struct intel_encoder_context *encoder_context,
3272+ VAEncPictureParameterBufferH264 *pic_param,
3273+ VAEncSliceParameterBufferH264 *slice_param)
3274+{
3275+ struct intel_batchbuffer *batch = encoder_context->base.batch;
3276+ int i, slice_type;
3277+ short weightoffsets[32 * 6];
3278+
3279+ slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
3280+
3281+ if (slice_type == SLICE_TYPE_P &&
3282+ pic_param->pic_fields.bits.weighted_pred_flag == 1) {
3283+
3284+ for (i = 0; i < 32; i++) {
3285+ weightoffsets[i * 6 + 0] = slice_param->luma_weight_l0[i];
3286+ weightoffsets[i * 6 + 1] = slice_param->luma_offset_l0[i];
3287+ weightoffsets[i * 6 + 2] = slice_param->chroma_weight_l0[i][0];
3288+ weightoffsets[i * 6 + 3] = slice_param->chroma_offset_l0[i][0];
3289+ weightoffsets[i * 6 + 4] = slice_param->chroma_weight_l0[i][1];
3290+ weightoffsets[i * 6 + 5] = slice_param->chroma_offset_l0[i][1];
3291+ }
3292+
3293+ BEGIN_BCS_BATCH(batch, 98);
3294+ OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
3295+ OUT_BCS_BATCH(batch, 0);
3296+ intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
3297+
3298+ ADVANCE_BCS_BATCH(batch);
3299+ }
3300+
3301+ if (slice_type == SLICE_TYPE_B) {
3302+ /* VDEnc on SKL doesn't support BWD */
3303+ assert(0);
3304+ }
3305+}
3306+
3307+static void
3308+gen9_vdenc_mfx_avc_single_slice(VADriverContextP ctx,
3309+ struct encode_state *encode_state,
3310+ struct intel_encoder_context *encoder_context,
3311+ VAEncSliceParameterBufferH264 *slice_param,
3312+ VAEncSliceParameterBufferH264 *next_slice_param,
3313+ int slice_index)
3314+{
3315+ VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
3316+
3317+ gen9_vdenc_mfx_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param);
3318+ gen9_vdenc_mfx_avc_weightoffset_state(ctx,
3319+ encode_state,
3320+ encoder_context,
3321+ pic_param,
3322+ slice_param);
3323+ gen9_vdenc_mfx_avc_slice_state(ctx,
3324+ encode_state,
3325+ encoder_context,
3326+ pic_param,
3327+ slice_param,
3328+ next_slice_param);
3329+ gen9_vdenc_mfx_avc_inset_headers(ctx,
3330+ encode_state,
3331+ encoder_context,
3332+ slice_param,
3333+ slice_index);
3334+}
3335+
3336+static void
3337+gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx,
3338+ struct encode_state *encode_state,
3339+ struct intel_encoder_context *encoder_context)
3340+{
3341+ struct intel_batchbuffer *batch = encoder_context->base.batch;
3342+ struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
3343+ VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
3344+ int i, j;
3345+ int slice_index = 0;
3346+ int is_frame_level_vdenc = 1; /* TODO: check it for SKL */
3347+ int has_tail = 0; /* TODO: check it later */
3348+
3349+ for (j = 0; j < encode_state->num_slice_params_ext; j++) {
3350+ slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
3351+
3352+ if (j == encode_state->num_slice_params_ext - 1)
3353+ next_slice_group_param = NULL;
3354+ else
3355+ next_slice_group_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j + 1]->buffer;
3356+
3357+ for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
3358+ if (i < encode_state->slice_params_ext[j]->num_elements - 1)
3359+ next_slice_param = slice_param + 1;
3360+ else
3361+ next_slice_param = next_slice_group_param;
3362+
3363+ gen9_vdenc_mfx_avc_single_slice(ctx,
3364+ encode_state,
3365+ encoder_context,
3366+ slice_param,
3367+ next_slice_param,
3368+ slice_index);
3369+ slice_param++;
3370+ slice_index++;
3371+
3372+ if (is_frame_level_vdenc)
3373+ break;
3374+ else {
3375+ /* TODO: remove assert(0) and add other commands here */
3376+ assert(0);
3377+ }
3378+ }
3379+
3380+ if (is_frame_level_vdenc)
3381+ break;
3382+ }
3383+
3384+ if (is_frame_level_vdenc) {
3385+ struct vd_pipeline_flush_parameter pipeline_flush_params;
3386+
3387+ gen9_vdenc_vdenc_walker_state(ctx, encode_state, encoder_context);
3388+
3389+ memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
3390+ pipeline_flush_params.mfx_pipeline_done = !has_tail;
3391+ pipeline_flush_params.vdenc_pipeline_done = 1;
3392+ pipeline_flush_params.vdenc_pipeline_command_flush = 1;
3393+ pipeline_flush_params.vd_command_message_parser_done = 1;
3394+ gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
3395+ }
3396+
3397+ if (has_tail) {
3398+ /* TODO: insert a tail if required */
3399+ }
3400+
3401+ memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3402+ mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
3403+ gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3404+}
3405+
3406+static void
3407+gen9_vdenc_mfx_vdenc_pipeline(VADriverContextP ctx,
3408+ struct encode_state *encode_state,
3409+ struct intel_encoder_context *encoder_context)
3410+{
3411+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3412+ struct intel_batchbuffer *batch = encoder_context->base.batch;
3413+ struct gpe_mi_batch_buffer_start_parameter mi_batch_buffer_start_params;
3414+
3415+ if (vdenc_context->brc_enabled) {
3416+ struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
3417+
3418+ memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
3419+ mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status2_res.bo;
3420+ gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
3421+ }
3422+
3423+ if (vdenc_context->current_pass) {
3424+ struct gpe_mi_conditional_batch_buffer_end_parameter mi_conditional_batch_buffer_end_params;
3425+
3426+ memset(&mi_conditional_batch_buffer_end_params, 0, sizeof(mi_conditional_batch_buffer_end_params));
3427+ mi_conditional_batch_buffer_end_params.bo = vdenc_context->huc_status_res.bo;
3428+ gen9_gpe_mi_conditional_batch_buffer_end(ctx, batch, &mi_conditional_batch_buffer_end_params);
3429+ }
3430+
3431+ gen9_vdenc_mfx_pipe_mode_select(ctx, encode_state, encoder_context);
3432+
3433+ gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->recon_surface_res, 0);
3434+ gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->uncompressed_input_surface_res, 4);
3435+ gen9_vdenc_mfx_surface_state(ctx, encoder_context, &vdenc_context->scaled_4x_recon_surface_res, 5);
3436+
3437+ gen9_vdenc_mfx_pipe_buf_addr_state(ctx, encoder_context);
3438+ gen9_vdenc_mfx_ind_obj_base_addr_state(ctx, encoder_context);
3439+ gen9_vdenc_mfx_bsp_buf_base_addr_state(ctx, encoder_context);
3440+
3441+ gen9_vdenc_vdenc_pipe_mode_select(ctx, encode_state, encoder_context);
3442+ gen9_vdenc_vdenc_src_surface_state(ctx, encoder_context, &vdenc_context->uncompressed_input_surface_res);
3443+ gen9_vdenc_vdenc_ref_surface_state(ctx, encoder_context, &vdenc_context->recon_surface_res);
3444+ gen9_vdenc_vdenc_ds_ref_surface_state(ctx, encoder_context, &vdenc_context->scaled_4x_recon_surface_res);
3445+ gen9_vdenc_vdenc_pipe_buf_addr_state(ctx, encode_state, encoder_context);
3446+ gen9_vdenc_vdenc_const_qpt_state(ctx, encode_state, encoder_context);
3447+
3448+ if (!vdenc_context->brc_enabled) {
3449+ gen9_vdenc_mfx_avc_img_state(ctx, encode_state, encoder_context);
3450+ gen9_vdenc_vdenc_img_state(ctx, encode_state, encoder_context);
3451+ } else {
3452+ memset(&mi_batch_buffer_start_params, 0, sizeof(mi_batch_buffer_start_params));
3453+ mi_batch_buffer_start_params.is_second_level = 1; /* Must be the second level batch buffer */
3454+ mi_batch_buffer_start_params.bo = vdenc_context->second_level_batch_res.bo;
3455+ gen9_gpe_mi_batch_buffer_start(ctx, batch, &mi_batch_buffer_start_params);
3456+ }
3457+
3458+ gen9_vdenc_mfx_avc_qm_state(ctx, encoder_context);
3459+ gen9_vdenc_mfx_avc_fqm_state(ctx, encoder_context);
3460+
3461+ gen9_vdenc_mfx_vdenc_avc_slices(ctx, encode_state, encoder_context);
3462+}
3463+
3464+static void
3465+gen9_vdenc_context_brc_prepare(struct encode_state *encode_state,
3466+ struct intel_encoder_context *encoder_context)
3467+{
3468+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3469+ unsigned int rate_control_mode = encoder_context->rate_control_mode;
3470+
3471+ switch (rate_control_mode & 0x7f) {
3472+ case VA_RC_CBR:
3473+ vdenc_context->internal_rate_mode = I965_BRC_CBR;
3474+ break;
3475+
3476+ case VA_RC_VBR:
3477+ vdenc_context->internal_rate_mode = I965_BRC_VBR;
3478+ break;
3479+
3480+ case VA_RC_CQP:
3481+ default:
3482+ vdenc_context->internal_rate_mode = I965_BRC_CQP;
3483+ break;
3484+ }
3485+}
3486+
3487+static void
3488+gen9_vdenc_read_status(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3489+{
3490+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3491+ struct intel_batchbuffer *batch = encoder_context->base.batch;
3492+ struct gpe_mi_store_register_mem_parameter mi_store_register_mem_params;
3493+ struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
3494+ unsigned int base_offset = vdenc_context->status_bffuer.base_offset;
3495+ int i;
3496+
3497+ memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
3498+ gen9_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
3499+
3500+ memset(&mi_store_register_mem_params, 0, sizeof(mi_store_register_mem_params));
3501+ mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
3502+ mi_store_register_mem_params.bo = vdenc_context->status_bffuer.res.bo;
3503+ mi_store_register_mem_params.offset = base_offset + vdenc_context->status_bffuer.bytes_per_frame_offset;
3504+ gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3505+
3506+ /* Update DMEM buffer for BRC Update */
3507+ for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
3508+ mi_store_register_mem_params.mmio_offset = MFC_BITSTREAM_BYTECOUNT_FRAME_REG; /* TODO: fix it if VDBOX2 is used */
3509+ mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
3510+ mi_store_register_mem_params.offset = 5 * sizeof(uint32_t);
3511+ gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3512+
3513+ mi_store_register_mem_params.mmio_offset = MFC_IMAGE_STATUS_CTRL_REG; /* TODO: fix it if VDBOX2 is used */
3514+ mi_store_register_mem_params.bo = vdenc_context->brc_update_dmem_res[i].bo;
3515+ mi_store_register_mem_params.offset = 7 * sizeof(uint32_t);
3516+ gen9_gpe_mi_store_register_mem(ctx, batch, &mi_store_register_mem_params);
3517+ }
3518+}
3519+
3520+static VAStatus
3521+gen9_vdenc_avc_check_capability(VADriverContextP ctx,
3522+ struct encode_state *encode_state,
3523+ struct intel_encoder_context *encoder_context)
3524+{
3525+ VAEncSliceParameterBufferH264 *slice_param;
3526+ int i, j;
3527+
3528+ for (j = 0; j < encode_state->num_slice_params_ext; j++) {
3529+ slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
3530+
3531+ for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
3532+ if (slice_param->slice_type == SLICE_TYPE_B)
3533+ return VA_STATUS_ERROR_UNKNOWN;
3534+
3535+ slice_param++;
3536+ }
3537+ }
3538+
3539+ return VA_STATUS_SUCCESS;
3540+}
3541+
3542+static VAStatus
3543+gen9_vdenc_avc_encode_picture(VADriverContextP ctx,
3544+ VAProfile profile,
3545+ struct encode_state *encode_state,
3546+ struct intel_encoder_context *encoder_context)
3547+{
3548+ VAStatus va_status;
3549+ struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
3550+ struct intel_batchbuffer *batch = encoder_context->base.batch;
3551+
3552+ va_status = gen9_vdenc_avc_check_capability(ctx, encode_state, encoder_context);
3553+
3554+ if (va_status != VA_STATUS_SUCCESS)
3555+ return va_status;
3556+
3557+ gen9_vdenc_avc_prepare(ctx, profile, encode_state, encoder_context);
3558+
3559+ for (vdenc_context->current_pass = 0; vdenc_context->current_pass < vdenc_context->num_passes; vdenc_context->current_pass++) {
3560+ vdenc_context->is_first_pass = (vdenc_context->current_pass == 0);
3561+ vdenc_context->is_last_pass = (vdenc_context->current_pass == (vdenc_context->num_passes - 1));
3562+
3563+ intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
3564+ intel_batchbuffer_emit_mi_flush(batch);
3565+
3566+ if (vdenc_context->brc_enabled) {
3567+ if (!vdenc_context->brc_initted || vdenc_context->brc_need_reset)
3568+ gen9_vdenc_huc_brc_init_reset(ctx, encode_state, encoder_context);
3569+
3570+ gen9_vdenc_huc_brc_update(ctx, encode_state, encoder_context);
3571+ intel_batchbuffer_emit_mi_flush(batch);
3572+ }
3573+
3574+ gen9_vdenc_mfx_vdenc_pipeline(ctx, encode_state, encoder_context);
3575+ gen9_vdenc_read_status(ctx, encoder_context);
3576+
3577+ intel_batchbuffer_end_atomic(batch);
3578+ intel_batchbuffer_flush(batch);
3579+
3580+ vdenc_context->brc_initted = 1;
3581+ vdenc_context->brc_need_reset = 0;
3582+ }
3583+
3584+ return VA_STATUS_SUCCESS;
3585+}
3586+
3587+static VAStatus
3588+gen9_vdenc_pipeline(VADriverContextP ctx,
3589+ VAProfile profile,
3590+ struct encode_state *encode_state,
3591+ struct intel_encoder_context *encoder_context)
3592+{
3593+ VAStatus vaStatus;
3594+
3595+ switch (profile) {
3596+ case VAProfileH264ConstrainedBaseline:
3597+ case VAProfileH264Main:
3598+ case VAProfileH264High:
3599+ vaStatus = gen9_vdenc_avc_encode_picture(ctx, profile, encode_state, encoder_context);
3600+ break;
3601+
3602+ default:
3603+ vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
3604+ break;
3605+ }
3606+
3607+ return vaStatus;
3608+}
3609+
3610+static void
3611+gen9_vdenc_free_resources(struct gen9_vdenc_context *vdenc_context)
3612+{
3613+ int i;
3614+
3615+ i965_free_gpe_resource(&vdenc_context->brc_init_reset_dmem_res);
3616+ i965_free_gpe_resource(&vdenc_context->brc_history_buffer_res);
3617+ i965_free_gpe_resource(&vdenc_context->brc_stream_in_res);
3618+ i965_free_gpe_resource(&vdenc_context->brc_stream_out_res);
3619+ i965_free_gpe_resource(&vdenc_context->huc_dummy_res);
3620+
3621+ for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++)
3622+ i965_free_gpe_resource(&vdenc_context->brc_update_dmem_res[i]);
3623+
3624+ i965_free_gpe_resource(&vdenc_context->vdenc_statistics_res);
3625+ i965_free_gpe_resource(&vdenc_context->pak_statistics_res);
3626+ i965_free_gpe_resource(&vdenc_context->vdenc_avc_image_state_res);
3627+ i965_free_gpe_resource(&vdenc_context->hme_detection_summary_buffer_res);
3628+ i965_free_gpe_resource(&vdenc_context->brc_constant_data_res);
3629+ i965_free_gpe_resource(&vdenc_context->second_level_batch_res);
3630+
3631+ i965_free_gpe_resource(&vdenc_context->huc_status_res);
3632+ i965_free_gpe_resource(&vdenc_context->huc_status2_res);
3633+
3634+ i965_free_gpe_resource(&vdenc_context->recon_surface_res);
3635+ i965_free_gpe_resource(&vdenc_context->scaled_4x_recon_surface_res);
3636+ i965_free_gpe_resource(&vdenc_context->post_deblocking_output_res);
3637+ i965_free_gpe_resource(&vdenc_context->pre_deblocking_output_res);
3638+
3639+ for (i = 0; i < ARRAY_ELEMS(vdenc_context->list_reference_res); i++) {
3640+ i965_free_gpe_resource(&vdenc_context->list_reference_res[i]);
3641+ i965_free_gpe_resource(&vdenc_context->list_scaled_4x_reference_res[i]);
3642+ }
3643+
3644+ i965_free_gpe_resource(&vdenc_context->uncompressed_input_surface_res);
3645+ i965_free_gpe_resource(&vdenc_context->compressed_bitstream.res);
3646+ i965_free_gpe_resource(&vdenc_context->status_bffuer.res);
3647+
3648+ i965_free_gpe_resource(&vdenc_context->mfx_intra_row_store_scratch_res);
3649+ i965_free_gpe_resource(&vdenc_context->mfx_deblocking_filter_row_store_scratch_res);
3650+ i965_free_gpe_resource(&vdenc_context->mfx_bsd_mpc_row_store_scratch_res);
3651+ i965_free_gpe_resource(&vdenc_context->vdenc_row_store_scratch_res);
3652+
3653+ i965_free_gpe_resource(&vdenc_context->vdenc_streamin_res);
3654+}
3655+
3656+static void
3657+gen9_vdenc_context_destroy(void *context)
3658+{
3659+ struct gen9_vdenc_context *vdenc_context = context;
3660+
3661+ gen9_vdenc_free_resources(vdenc_context);
3662+
3663+ free(vdenc_context);
3664+}
3665+
3666+static void
3667+gen9_vdenc_allocate_resources(VADriverContextP ctx,
3668+ struct intel_encoder_context *encoder_context,
3669+ struct gen9_vdenc_context *vdenc_context)
3670+{
3671+ struct i965_driver_data *i965 = i965_driver_data(ctx);
3672+ int i;
3673+
3674+ ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_init_reset_dmem_res,
3675+ ALIGN(sizeof(struct huc_brc_init_dmem), 64),
3676+ "HuC Init&Reset DMEM buffer");
3677+
3678+ ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_history_buffer_res,
3679+ ALIGN(HUC_BRC_HISTORY_BUFFER_SIZE, 0x1000),
3680+ "HuC History buffer");
3681+
3682+ ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_stream_in_res,
3683+ ALIGN(HUC_BRC_STREAM_INOUT_BUFFER_SIZE, 0x1000),
3684+ "HuC Stream In buffer");
3685+
3686+ ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_stream_out_res,
3687+ ALIGN(HUC_BRC_STREAM_INOUT_BUFFER_SIZE, 0x1000),
3688+ "HuC Stream Out buffer");
3689+
3690+ ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_dummy_res,
3691+ 0x1000,
3692+ "HuC dummy buffer");
3693+
3694+ for (i = 0; i < NUM_OF_BRC_PAK_PASSES; i++) {
3695+ ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_update_dmem_res[i],
3696+ ALIGN(sizeof(struct huc_brc_update_dmem), 64),
3697+ "HuC BRC Update buffer");
3698+ i965_zero_gpe_resource(&vdenc_context->brc_update_dmem_res[i]);
3699+ }
3700+
3701+ ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_statistics_res,
3702+ ALIGN(VDENC_STATISTICS_SIZE, 0x1000),
3703+ "VDENC statistics buffer");
3704+
3705+ ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->pak_statistics_res,
3706+ ALIGN(PAK_STATISTICS_SIZE, 0x1000),
3707+ "PAK statistics buffer");
3708+
3709+ ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->vdenc_avc_image_state_res,
3710+ ALIGN(VDENC_AVC_IMAGE_STATE_SIZE, 0x1000),
3711+ "VDENC/AVC image state buffer");
3712+
3713+ ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->hme_detection_summary_buffer_res,
3714+ ALIGN(HME_DETECTION_SUMMARY_BUFFER_SIZE, 0x1000),
3715+ "HME summary buffer");
3716+
3717+ ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->brc_constant_data_res,
3718+ ALIGN(BRC_CONSTANT_DATA_SIZE, 0x1000),
3719+ "BRC constant buffer");
3720+
3721+ ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->second_level_batch_res,
3722+ ALIGN(VDENC_AVC_IMAGE_STATE_SIZE, 0x1000),
3723+ "Second level batch buffer");
3724+
3725+ ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_status_res,
3726+ 0x1000,
3727+ "HuC Status buffer");
3728+
3729+ ALLOC_VDENC_BUFFER_RESOURCE(vdenc_context->huc_status2_res,
3730+ 0x1000,
3731+ "HuC Status buffer");
3732+}
3733+
3734+static VAStatus
3735+gen9_vdenc_context_get_status(VADriverContextP ctx,
3736+ struct intel_encoder_context *encoder_context,
3737+ struct i965_coded_buffer_segment *coded_buffer_segment)
3738+{
3739+ struct gen9_vdenc_status *vdenc_status = (struct gen9_vdenc_status *)coded_buffer_segment->codec_private_data;
3740+
3741+ coded_buffer_segment->base.size = vdenc_status->bytes_per_frame;
3742+
3743+ return VA_STATUS_SUCCESS;
3744+}
3745+
3746+Bool
3747+gen9_vdenc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
3748+{
3749+ struct gen9_vdenc_context *vdenc_context = calloc(1, sizeof(struct gen9_vdenc_context));
3750+
3751+ if (!vdenc_context)
3752+ return False;
3753+
3754+ vdenc_context->brc_initted = 0;
3755+ vdenc_context->brc_need_reset = 0;
3756+ vdenc_context->is_low_delay = 0;
3757+ vdenc_context->current_pass = 0;
3758+ vdenc_context->num_passes = 1;
3759+ vdenc_context->vdenc_streamin_enable = 0;
3760+ vdenc_context->vdenc_pak_threshold_check_enable = 0;
3761+
3762+ gen9_vdenc_allocate_resources(ctx, encoder_context, vdenc_context);
3763+
3764+ encoder_context->mfc_context = vdenc_context;
3765+ encoder_context->mfc_context_destroy = gen9_vdenc_context_destroy;
3766+ encoder_context->mfc_pipeline = gen9_vdenc_pipeline;
3767+ encoder_context->mfc_brc_prepare = gen9_vdenc_context_brc_prepare;
3768+ encoder_context->get_status = gen9_vdenc_context_get_status;
3769+
3770+ return True;
3771+}
--- /dev/null
+++ b/src/gen9_vdenc.h
@@ -0,0 +1,908 @@
1+/*
2+ * Copyright © 2015 Intel Corporation
3+ *
4+ * Permission is hereby granted, free of charge, to any person obtaining a
5+ * copy of this software and associated documentation files (the
6+ * "Software"), to deal in the Software without restriction, including
7+ * without limitation the rights to use, copy, modify, merge, publish,
8+ * distribute, sub license, and/or sell copies of the Software, and to
9+ * permit persons to whom the Software is furnished to do so, subject to
10+ * the following conditions:
11+ *
12+ * The above copyright notice and this permission notice (including the
13+ * next paragraph) shall be included in all copies or substantial portions
14+ * of the Software.
15+ *
16+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19+ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23+ *
24+ * Authors:
25+ * Xiang Haihao <haihao.xiang@intel.com>
26+ *
27+ */
28+
29+#ifndef GEN9_VDENC_H
30+#define GEN9_VDENC_H
31+
32+#include <drm.h>
33+#include <i915_drm.h>
34+#include <intel_bufmgr.h>
35+
36+#include "i965_gpe_utils.h"
37+#include "i965_encoder.h"
38+
39+struct encode_state;
40+
41+#define HUC_BRC_INIT_RESET 4
42+#define HUC_BRC_UPDATE 5
43+
44+#define HUC_DMEM_DATA_OFFSET 0x2000
45+
46+#define NUM_OF_BRC_PAK_PASSES 2
47+
48+#define HUC_BRC_HISTORY_BUFFER_SIZE 832
49+#define HUC_BRC_STREAM_INOUT_BUFFER_SIZE 4096
50+#define VDENC_STATISTICS_SIZE 128
51+#define PAK_STATISTICS_SIZE 256
52+#define VDENC_AVC_IMAGE_STATE_SIZE (sizeof(struct gen9_mfx_avc_img_state) + sizeof(struct gen9_vdenc_img_state) + 2 * sizeof(int))
53+#define HME_DETECTION_SUMMARY_BUFFER_SIZE 256
54+#define BRC_CONSTANT_DATA_SIZE 4096
55+#define BRC_DEBUG_OUTPUT_SIZE 4096
56+
57+#define HUC_STATUS_MMIO_OFFSET 0x0D000
58+
59+#define SCALE_FACTOR_4X 4
60+
61+#define VDENC_FRAME_I 0
62+#define VDENC_FRAME_P 1
63+
64+#define VDENC_LUTMODE_INTRA_NONPRED 0x00
65+#define VDENC_LUTMODE_INTRA 0x01
66+#define VDENC_LUTMODE_INTRA_16x16 0x01
67+#define VDENC_LUTMODE_INTRA_8x8 0x02
68+#define VDENC_LUTMODE_INTRA_4x4 0x03
69+#define VDENC_LUTMODE_INTER_16x8 0x04
70+#define VDENC_LUTMODE_INTER_8x16 0x04
71+#define VDENC_LUTMODE_INTER_8X8Q 0x05
72+#define VDENC_LUTMODE_INTER_8X4Q 0x06
73+#define VDENC_LUTMODE_INTER_4X8Q 0x06
74+#define VDENC_LUTMODE_INTER_16x8_FIELD 0x06
75+#define VDENC_LUTMODE_INTER_4X4Q 0x07
76+#define VDENC_LUTMODE_INTER_8x8_FIELD 0x07
77+#define VDENC_LUTMODE_INTER 0x08
78+#define VDENC_LUTMODE_INTER_16x16 0x08
79+#define VDENC_LUTMODE_INTER_BWD 0x09
80+#define VDENC_LUTMODE_REF_ID 0x0A
81+#define VDENC_LUTMODE_INTRA_CHROMA 0x0B
82+
83+#define WIDTH_IN_MACROBLOCKS(width) (ALIGN(width, 16) >> 4)
84+#define HEIGHT_IN_MACROBLOCKS(height) (ALIGN(height, 16) >> 4)
85+
86+struct gen9_mfx_avc_img_state
87+{
88+ union {
89+ struct {
90+ uint32_t dword_length:16;
91+ uint32_t sub_opcode_b:5;
92+ uint32_t sub_opcode_a:3;
93+ uint32_t command_opcode:3;
94+ uint32_t pipeline:2;
95+ uint32_t command_type:3;
96+ };
97+
98+ uint32_t value;
99+ } dw0;
100+
101+ struct {
102+ uint32_t frame_size_in_mbs_minus1:16;
103+ uint32_t pad0:16;
104+ } dw1;
105+
106+ struct {
107+ uint32_t frame_width_in_mbs_minus1:8;
108+ uint32_t pad0:8;
109+ uint32_t frame_height_in_mbs_minus1:8;
110+ uint32_t pad1:8;
111+ } dw2;
112+
113+ struct {
114+ uint32_t pad0:8;
115+ uint32_t image_structure:2;
116+ uint32_t weighted_bipred_idc:2;
117+ uint32_t weighted_pred_flag:1;
118+ uint32_t brc_domain_rate_control_enable:1;
119+ uint32_t pad1:2;
120+ uint32_t chroma_qp_offset:5;
121+ uint32_t pad2:3;
122+ uint32_t second_chroma_qp_offset:5;
123+ uint32_t pad3:3;
124+ } dw3;
125+
126+ struct {
127+ uint32_t field_picture_flag:1;
128+ uint32_t mbaff_mode_active:1;
129+ uint32_t frame_mb_only_flag:1;
130+ uint32_t transform_8x8_idct_mode_flag:1;
131+ uint32_t direct_8x8_interface_flag:1;
132+ uint32_t constrained_intra_prediction_flag:1;
133+ uint32_t current_img_dispoable_flag:1;
134+ uint32_t entropy_coding_flag:1;
135+ uint32_t mb_mv_format_flag:1;
136+ uint32_t pad0:1;
137+ uint32_t chroma_format_idc:2;
138+ uint32_t mv_unpacked_flag:1;
139+ uint32_t insert_test_flag:1;
140+ uint32_t load_slice_pointer_flag:1;
141+ uint32_t macroblock_stat_enable:1;
142+ uint32_t minimum_frame_size:16;
143+ } dw4;
144+
145+ struct {
146+ uint32_t intra_mb_max_bit_flag:1;
147+ uint32_t inter_mb_max_bit_flag:1;
148+ uint32_t frame_size_over_flag:1;
149+ uint32_t frame_size_under_flag:1;
150+ uint32_t pad0:3;
151+ uint32_t intra_mb_ipcm_flag:1;
152+ uint32_t pad1:1;
153+ uint32_t mb_rate_ctrl_flag:1;
154+ uint32_t min_frame_size_units:2;
155+ uint32_t inter_mb_zero_cbp_flag:1;
156+ uint32_t pad2:3;
157+ uint32_t non_first_pass_flag:1;
158+ uint32_t pad3:10;
159+ uint32_t aq_chroma_disable:1;
160+ uint32_t aq_rounding:3;
161+ uint32_t aq_enable:1;
162+ } dw5;
163+
164+ struct {
165+ uint32_t intra_mb_max_size:12;
166+ uint32_t pad0:4;
167+ uint32_t inter_mb_max_size:12;
168+ uint32_t pad1:4;
169+ } dw6;
170+
171+ struct {
172+ uint32_t pad0;
173+ } dw7;
174+
175+ struct {
176+ uint32_t slice_delta_qp_max0:8;
177+ uint32_t slice_delta_qp_max1:8;
178+ uint32_t slice_delta_qp_max2:8;
179+ uint32_t slice_delta_qp_max3:8;
180+ } dw8;
181+
182+ struct {
183+ uint32_t slice_delta_qp_min0:8;
184+ uint32_t slice_delta_qp_min1:8;
185+ uint32_t slice_delta_qp_min2:8;
186+ uint32_t slice_delta_qp_min3:8;
187+ } dw9;
188+
189+ struct {
190+ uint32_t frame_bitrate_min:14;
191+ uint32_t frame_bitrate_min_unit_mode:1;
192+ uint32_t frame_bitrate_min_unit:1;
193+ uint32_t frame_bitrate_max:14;
194+ uint32_t frame_bitrate_max_unit_mode:1;
195+ uint32_t frame_bitrate_max_unit:1;
196+ } dw10;
197+
198+ struct {
199+ uint32_t frame_bitrate_min_delta:15;
200+ uint32_t pad0:1;
201+ uint32_t frame_bitrate_max_delta:15;
202+ uint32_t pad1:1;
203+ } dw11;
204+
205+ struct {
206+ uint32_t pad0:18;
207+ uint32_t vad_error_logic:1;
208+ uint32_t pad1:13;
209+ } dw12;
210+
211+ struct {
212+ uint32_t pic_qp_init_minus26:8;
213+ uint32_t pic_num_ref_idx_l0_active_minus1:6;
214+ uint32_t pad0:2;
215+ uint32_t pic_num_ref_idx_l1_active_minus1:6;
216+ uint32_t pad1:2;
217+ uint32_t num_ref_frames:5;
218+ uint32_t is_curr_pic_has_mmco5:1;
219+ } dw13;
220+
221+ struct {
222+ uint32_t pic_order_present_flag:1;
223+ uint32_t delta_pic_order_always_zero_flag:1;
224+ uint32_t pic_order_cnt_type:2;
225+ uint32_t pad0:4;
226+ uint32_t slice_group_map_type:3;
227+ uint32_t redundant_pic_cnt_present_flag:1;
228+ uint32_t num_slice_groups_minus1:3;
229+ uint32_t deblock_filter_ctrl_present_flag:1;
230+ uint32_t log2_max_frame_num_minus4:8;
231+ uint32_t log2_max_pic_order_cnt_lsb_minus4:8;
232+ } dw14;
233+
234+ struct {
235+ uint32_t slice_group_change_rate:16;
236+ uint32_t curr_pic_frame_num:16;
237+ } dw15;
238+
239+ struct {
240+ uint32_t current_frame_view_id:10;
241+ uint32_t pad0:2;
242+ uint32_t max_view_idx_l0:4;
243+ uint32_t pad1:2;
244+ uint32_t max_view_idx_l1:4;
245+ uint32_t pad2:9;
246+ uint32_t inter_view_order_disable:1;
247+ } dw16;
248+
249+ struct {
250+ uint32_t fqp:3; // Must be zero for SKL
251+ uint32_t fqp_offset:3; // Must be zero for SKL
252+ uint32_t pad0:2;
253+ uint32_t ext_brc_dm_stat_en:1; // Must be zero for SKL
254+ uint32_t pad1:7;
255+ uint32_t brc_dm_avg_mb_qp:6; // Must be zero for SKL
256+ uint32_t pad2:10;
257+ } dw17;
258+
259+ struct {
260+ uint32_t brc_domain_target_frame_size;
261+ } dw18;
262+
263+ struct {
264+ uint32_t threshold_size_in_bytes;
265+ } dw19;
266+
267+ struct {
268+ uint32_t target_slice_size_in_bytes;
269+ } dw20;
270+};
271+
272+struct gen9_image_state_cost
273+{
274+ struct {
275+ uint32_t mv0_cost:8;
276+ uint32_t mv1_cost:8;
277+ uint32_t mv2_cost:8;
278+ uint32_t mv3_cost:8;
279+ } dw0;
280+
281+ struct {
282+ uint32_t mv4_cost:8;
283+ uint32_t mv5_cost:8;
284+ uint32_t mv6_cost:8;
285+ uint32_t mv7_cost:8;
286+ } dw1;
287+};
288+
289+struct gen9_vdenc_img_state
290+{
291+ union {
292+ struct {
293+ uint32_t dword_length:12;
294+ uint32_t pad0:4;
295+ uint32_t sub_opcode_b:5;
296+ uint32_t sub_opcode_a:2;
297+ uint32_t command_opcode:4;
298+ uint32_t pipeline:2;
299+ uint32_t command_type:3;
300+ };
301+
302+ uint32_t value;
303+ } dw0;
304+
305+ struct {
306+ uint32_t pad0:2;
307+ uint32_t bidirectional_mix_disable:1;
308+ uint32_t pad1:1;
309+ uint32_t time_budget_overflow_check:1;
310+ uint32_t pad2:2;
311+ uint32_t transform_8x8_flag:1;
312+ uint32_t vdenc_l1_cache_priority:2;
313+ uint32_t pad3:22;
314+ } dw1;
315+
316+ struct {
317+ uint32_t pad0:16;
318+ uint32_t bidirectional_weight:6;
319+ uint32_t pad1:6;
320+ uint32_t unidirection_mix_disable:1;
321+ uint32_t pad2:3;
322+ } dw2;
323+
324+ struct {
325+ uint32_t pad0:16;
326+ uint32_t picture_width:16;
327+ } dw3;
328+
329+ struct {
330+ uint32_t pad0:12;
331+ uint32_t subpel_mode:2;
332+ uint32_t pad1:3;
333+ uint32_t forward_transform_skip_check_enable:1;
334+ uint32_t bme_disable_for_fbr_message:1;
335+ uint32_t block_based_skip_enabled:1;
336+ uint32_t inter_sad_measure_adjustment:2;
337+ uint32_t intra_sad_measure_adjustment:2;
338+ uint32_t sub_macroblock_sub_partition_mask:7;
339+ uint32_t block_based_skip_type:1;
340+ } dw4;
341+
342+ struct {
343+ uint32_t picture_height_minus1:16;
344+ uint32_t cre_prefetch_enable:1;
345+ uint32_t hme_ref1_disable:1;
346+ uint32_t mb_slice_threshold_value:4;
347+ uint32_t pad0:4;
348+ uint32_t constrained_intra_prediction_flag:1;
349+ uint32_t pad1:2;
350+ uint32_t picture_type:2;
351+ uint32_t pad2:1;
352+ } dw5;
353+
354+ struct {
355+ uint32_t slice_macroblock_height_minus1:16;
356+ uint32_t pad0:16;
357+ } dw6;
358+
359+ struct {
360+ uint32_t pad0;
361+ } dw7;
362+
363+ struct {
364+ uint32_t luma_intra_partition_mask:5;
365+ uint32_t non_skip_zero_mv_const_added:1;
366+ uint32_t non_skip_mb_mode_const_added:1;
367+ uint32_t pad0:9;
368+ uint32_t mv_cost_scaling_factor:2;
369+ uint32_t bilinear_filter_enable:1;
370+ uint32_t pad1:3;
371+ uint32_t ref_id_cost_mode_select:1;
372+ uint32_t pad2:9;
373+ } dw8;
374+
375+ struct {
376+ uint32_t mode0_cost:8;
377+ uint32_t mode1_cost:8;
378+ uint32_t mode2_cost:8;
379+ uint32_t mode3_cost:8;
380+ } dw9;
381+
382+ struct {
383+ uint32_t mode4_cost:8;
384+ uint32_t mode5_cost:8;
385+ uint32_t mode6_cost:8;
386+ uint32_t mode7_cost:8;
387+ } dw10;
388+
389+ struct {
390+ uint32_t mode8_cost:8;
391+ uint32_t mode9_cost:8;
392+ uint32_t ref_id_cost:8;
393+ uint32_t chroma_intra_mode_cost:8;
394+ } dw11;
395+
396+ struct {
397+ struct gen9_image_state_cost mv_cost;
398+ } dw12_13;
399+
400+ struct {
401+ uint32_t qp_prime_y:8;
402+ uint32_t pad0:16;
403+ uint32_t target_size_in_word:8;
404+ } dw14;
405+
406+ struct {
407+ uint32_t pad0;
408+ } dw15;
409+
410+ struct {
411+ uint32_t pad0;
412+ } dw16;
413+
414+ struct {
415+ uint32_t avc_intra_4x4_mode_mask:9;
416+ uint32_t pad0:7;
417+ uint32_t avc_intra_8x8_mode_mask:9;
418+ uint32_t pad1:7;
419+ } dw17;
420+
421+ struct {
422+ uint32_t avc_intra_16x16_mode_mask:4;
423+ uint32_t avc_intra_chroma_mode_mask:4;
424+ uint32_t intra_compute_type_intra_compute_type:2;
425+ uint32_t pad0:22;
426+ } dw18;
427+
428+ struct {
429+ uint32_t pad0;
430+ } dw19;
431+
432+ struct {
433+ uint32_t penalty_for_intra_16x16_non_dc_prediction:8;
434+ uint32_t penalty_for_intra_8x8_non_dc_prediction:8;
435+ uint32_t penalty_for_intra_4x4_non_dc_prediction:8;
436+ uint32_t pad0:8;
437+ } dw20;
438+
439+ struct {
440+ uint32_t pad0;
441+ } dw21;
442+
443+ struct {
444+ uint32_t panic_mode_mb_threadhold:16;
445+ uint32_t small_mb_size_in_word:8;
446+ uint32_t large_mb_size_in_word:8;
447+ } dw22;
448+
449+ struct {
450+ uint32_t l0_number_of_reference_minus1:8;
451+ uint32_t pad0:8;
452+ uint32_t l1_number_of_reference_minus1:8;
453+ uint32_t pad1:8;
454+ } dw23;
455+
456+ struct {
457+ uint32_t pad0;
458+ } dw24;
459+
460+ struct {
461+ uint32_t pad0;
462+ } dw25;
463+
464+ struct {
465+ uint32_t pad0:8;
466+ uint32_t hme_ref_windows_combining_threshold:8;
467+ uint32_t pad1:16;
468+ } dw26;
469+
470+ struct {
471+ uint32_t max_hmv_r:16;
472+ uint32_t max_vmv_r:16;
473+ } dw27;
474+
475+ struct {
476+ struct gen9_image_state_cost hme_mv_cost;
477+ } dw28_29;
478+
479+ struct {
480+ uint32_t roi_qp_adjustment_for_zone0:4;
481+ uint32_t roi_qp_adjustment_for_zone1:4;
482+ uint32_t roi_qp_adjustment_for_zone2:4;
483+ uint32_t roi_qp_adjustment_for_zone3:4;
484+ uint32_t qp_adjustment_for_shape_best_intra_4x4_winner:4;
485+ uint32_t qp_adjustment_for_shape_best_intra_8x8_winner:4;
486+ uint32_t qp_adjustment_for_shape_best_intra_16x16_winner:4;
487+ uint32_t pad0:4;
488+ } dw30;
489+
490+ struct {
491+ uint32_t best_distortion_qp_adjustment_for_zone0:4;
492+ uint32_t best_distortion_qp_adjustment_for_zone1:4;
493+ uint32_t best_distortion_qp_adjustment_for_zone2:4;
494+ uint32_t best_distortion_qp_adjustment_for_zone3:4;
495+ uint32_t offset0_for_zone0_neg_zone1_boundary:16;
496+ } dw31;
497+
498+ struct {
499+ uint32_t offset1_for_zone1_neg_zone2_boundary:16;
500+ uint32_t offset2_for_zone2_neg_zone3_boundary:16;
501+ } dw32;
502+
503+ struct {
504+ uint32_t qp_range_check_upper_bound:8;
505+ uint32_t qp_range_check_lower_bound:8;
506+ uint32_t pad0:8;
507+ uint32_t qp_range_check_value:4;
508+ uint32_t pad1:4;
509+ } dw33;
510+
511+ struct {
512+ uint32_t roi_enable:1;
513+ uint32_t fwd_predictor0_mv_enable:1;
514+ uint32_t bdw_predictor1_mv_enable:1;
515+ uint32_t mb_level_qp_enable:1;
516+ uint32_t target_size_in_words_mb_max_size_in_words_mb_enable:1;
517+ uint32_t pad0:3;
518+ uint32_t ppmv_disable:1;
519+ uint32_t coefficient_clamp_enable:1;
520+ uint32_t long_term_reference_frame_bwd_ref0_indicator:1;
521+ uint32_t long_term_reference_frame_fwd_ref2_indicator:1;
522+ uint32_t long_term_reference_frame_fwd_ref1_indicator:1;
523+ uint32_t long_term_reference_frame_fwd_ref0_indicator:1;
524+ uint32_t image_state_qp_override:1;
525+ uint32_t pad1:1;
526+ uint32_t midpoint_distortion:16;
527+ } dw34;
528+};
529+
530+struct gen9_vdenc_streamin_state
531+{
532+ struct {
533+ uint32_t roi_selection:8;
534+ uint32_t force_intra:1;
535+ uint32_t force_skip:1;
536+ uint32_t pad0:22;
537+ } dw0;
538+
539+ struct {
540+ uint32_t qp_prime_y:8;
541+ uint32_t target_size_in_word:8;
542+ uint32_t max_size_in_word:8;
543+ uint32_t pad0:8;
544+ } dw1;
545+
546+ struct {
547+ uint32_t fwd_predictor_x:16;
548+ uint32_t fwd_predictor_y:16;
549+ } dw2;
550+
551+ struct {
552+ uint32_t bwd_predictore_x:16;
553+ uint32_t bwd_predictore_y:16;
554+ } dw3;
555+
556+ struct {
557+ uint32_t fwd_ref_id0:4;
558+ uint32_t bdw_ref_id0:4;
559+ uint32_t pad0:24;
560+ } dw4;
561+
562+ struct {
563+ uint32_t pad0[11];
564+ } dw5_15;
565+};
566+
567+struct huc_brc_update_constant_data
568+{
569+ uint8_t global_rate_qp_adj_tab_i[64];
570+ uint8_t global_rate_qp_adj_tab_p[64];
571+ uint8_t global_rate_qp_adj_tab_b[64];
572+ uint8_t dist_threshld_i[10];
573+ uint8_t dist_threshld_p[10];
574+ uint8_t dist_threshld_b[10];
575+ uint8_t dist_qp_adj_tab_i[81];
576+ uint8_t dist_qp_adj_tab_p[81];
577+ uint8_t dist_qp_adj_tab_b[81];
578+ int8_t buf_rate_adj_tab_i[72];
579+ int8_t buf_rate_adj_tab_p[72];
580+ int8_t buf_rate_adj_tab_b[72];
581+ uint8_t frame_size_min_tab_p[9];
582+ uint8_t frame_size_min_tab_b[9];
583+ uint8_t frame_size_min_tab_i[9];
584+ uint8_t frame_size_max_tab_p[9];
585+ uint8_t frame_size_max_tab_b[9];
586+ uint8_t frame_size_max_tab_i[9];
587+ uint8_t frame_size_scg_tab_p[9];
588+ uint8_t frame_size_scg_tab_b[9];
589+ uint8_t frame_size_scg_tab_i[9];
590+ /* cost table 14*42 = 588 bytes */
591+ uint8_t i_intra_non_pred[42];
592+ uint8_t i_intra_16x16[42];
593+ uint8_t i_intra_8x8[42];
594+ uint8_t i_intra_4x4[42];
595+ uint8_t i_intra_chroma[42];
596+ uint8_t p_intra_non_pred[42];
597+ uint8_t p_intra_16x16[42];
598+ uint8_t p_intra_8x8[42];
599+ uint8_t p_intra_4x4[42];
600+ uint8_t p_intra_chroma[42];
601+ uint8_t p_inter_16x8[42];
602+ uint8_t p_inter_8x8[42];
603+ uint8_t p_inter_16x16[42];
604+ uint8_t p_ref_id[42];
605+ uint8_t hme_mv_cost[8][42];
606+ uint8_t pad0[42];
607+};
608+
609+struct huc_brc_init_dmem
610+{
611+ uint8_t brc_func; // 0: Init; 2: Reset
612+ uint8_t os_enabled; // Always 1
613+ uint8_t pad0[2];
614+ uint16_t brc_flag; // ICQ or CQP with slice size control: 0x00 CBR: 0x10; VBR: 0x20; VCM: 0x40; LOWDELAY: 0x80.
615+ uint16_t pad1;
616+ uint16_t frame_width; // Luma width in bytes
617+ uint16_t frame_height; // Luma height in bytes
618+ uint32_t target_bitrate; // target bitrate, set by application
619+ uint32_t min_rate; // 0
620+ uint32_t max_rate; // Maximum bit rate in bits per second (bps).
621+ uint32_t buffer_size; // buffer size in bits
622+ uint32_t init_buffer_fullness; // initial buffer fullness in bits
623+ uint32_t profile_level_max_frame; // user defined. refer to AVC BRC HLD for conformance check and correction
624+ uint32_t frame_rate_m; // FrameRateM is the number of frames in FrameRateD
625+ uint32_t frame_rate_d; // If driver gets this FrameRateD from VUI, it is the num_units_in_tick field (32 bits UINT).
626+ uint16_t num_p_in_gop; // number of P frames in a GOP
627+ uint16_t num_b_in_gop; // number of B frames in a GOP
628+ uint16_t min_qp; // 10
629+ uint16_t max_qp; // 51
630+ int8_t dev_thresh_pb0[8]; // lowdelay ? (-45, -33, -23, -15, -8, 0, 15, 25) : (-46, -38, -30, -23, 23, 30, 40, 46)
631+ int8_t dev_thresh_vbr0[8]; // lowdelay ? (-45, -35, -25, -15, -8, 0, 20, 40) : (-46, -40, -32, -23, 56, 64, 83, 93)
632+ int8_t dev_thresh_i0[8]; // lowdelay ? (-40, -30, -17, -10, -5, 0, 10, 20) : (-43, -36, -25, -18, 18, 28, 38, 46)
633+ uint8_t init_qp_ip; // Initial QP for I and P
634+
635+ uint8_t pad2; // Reserved
636+ uint8_t init_qp_b; // Initial QP for B
637+ uint8_t mb_qp_ctrl; // Enable MB level QP control (global)
638+ uint8_t slice_size_ctrl_en; // Enable slice size control
639+ int8_t intra_qp_delta[3]; // set to zero for all by default
640+ int8_t skip_qp_delta; // Reserved
641+ int8_t dist_qp_delta[4]; // lowdelay ? (-5, -2, 2, 5) : (0, 0, 0, 0)
642+ uint8_t oscillation_qp_delta; // BRCFLAG_ISVCM ? 16 : 0
643+ uint8_t first_iframe_no_hrd_check; // BRCFLAG_ISVCM ? 1 : 0
644+ uint8_t skip_frame_enable_flag;
645+ uint8_t top_qp_delta_thr_for_2nd_pass; // =1. QP Delta threshold for second pass.
646+ uint8_t top_frame_size_threshold_for_2nd_pass; // lowdelay ? 10 : 50. Top frame size threshold for second pass
647+ uint8_t bottom_frame_size_threshold_for_2nd_pass; // lowdelay ? 10 : 200. Bottom frame size threshold for second pass
648+ uint8_t qp_select_for_first_pass; // lowdelay ? 0 : 1. =0 to use previous frame final QP; or =1 to use (targetQP + previousQP) / 2.
649+ uint8_t mb_header_compensation; // Reserved
650+ uint8_t over_shoot_carry_flag; // set to zero by default
651+ uint8_t over_shoot_skip_frame_pct; // set to zero by default
652+ uint8_t estrate_thresh_p0[7]; // 4, 8, 12, 16, 20, 24, 28
653+ uint8_t estrate_thresh_b0[7]; // 4, 8, 12, 16, 20, 24, 28
654+ uint8_t estrate_thresh_i0[7]; // 4, 8, 12, 16, 20, 24, 28
655+ uint8_t fqp_enable; // ExtendedBrcDomainEn
656+ uint8_t scenario_info; // 0: UNKNOWN, 1: DISPLAYREMOTING, 2: VIDEOCONFERENCE, 3: ARCHIVE, 4: LIVESTREAMING.
657+ uint8_t static_Region_streamin; // should be programmed from par file
658+ uint8_t delta_qp_adaptation; // =1,
659+ uint8_t max_crf_quality_factor; // =52,
660+ uint8_t crf_quality_factor; // =25,
661+ uint8_t bottom_qp_delta_thr_for_2nd_pass;// =1. QP Delta threshold for second pass.
662+ uint8_t sliding_window_size; // =30, the window size (in frames) used to compute bit rate
663+ uint8_t sliding_widow_rc_enable; // =0, sliding window based rate control (SWRC) disabled, 1: enabled
664+ uint8_t sliding_window_max_rate_ratio; // =120, ratio between the max rate within the window and average target bitrate
665+ uint8_t low_delay_golden_frame_boost; // only for lowdelay mode, 0 (default): no boost for I and scene change frames, 1: boost
666+ uint8_t pad3[61]; // Must be zero
667+};
668+
669+struct huc_brc_update_dmem
670+{
671+ uint8_t brc_func; // =1 for Update, other values are reserved for future use
672+ uint8_t pad0[3];
673+ uint32_t target_size; // refer to AVC BRC HLD for calculation
674+ uint32_t frame_number; // frame number
675+ uint32_t peak_tx_bits_per_frame; // current global target bits - previous global target bits (global target bits += input bits per frame)
676+ uint32_t frame_budget; // target time counter
677+ uint32_t frame_byte_count; // PAK output via MMIO
678+ uint32_t timing_budget_overflow; // PAK output via MMIO
679+ uint32_t slice_size_violation; // PAK output via MMIO
680+ uint32_t ipcm_non_conformant; // PAK output via MMIO
681+
682+ uint16_t start_global_adjust_frame[4]; // 10, 50, 100, 150
683+ uint16_t mb_budget[52]; // MB bugdet for QP 0 - 51.
684+ uint16_t target_slice_size; // target slice size
685+ uint16_t slcsz_thr_deltai[42]; // slice size threshold delta for I frame
686+ uint16_t slcsz_thr_deltap[42]; // slice size threshold delta for P frame
687+ uint16_t num_of_frames_skipped; // Recording how many frames have been skipped.
688+ uint16_t skip_frame_size; // Recording the skip frame size for one frame. =NumMBs * 1, assuming one bit per mb for skip frame.
689+ uint16_t static_region_pct; // One entry, recording the percentage of static region
690+ uint8_t global_rate_ratio_threshold[7]; // 80,95,99,101,105,125,160
691+ uint8_t current_frame_type; // I frame: 2; P frame: 0; B frame: 1.
692+ uint8_t start_global_adjust_mult[5]; // 1, 1, 3, 2, 1
693+ uint8_t start_global_adjust_div[5]; // 40, 5, 5, 3, 1
694+ uint8_t global_rate_ratio_threshold_qp[8]; // 253,254,255,0,1,1,2,3
695+ uint8_t current_pak_pass; // current pak pass number
696+ uint8_t max_num_passes; // 2
697+ uint8_t scene_change_width[2]; // set both to MIN((NumP + 1) / 5, 6)
698+ uint8_t scene_change_detect_enable; // Enable scene change detection
699+ uint8_t scene_change_prev_intra_percent_threshold; // =96. scene change previous intra percentage threshold
700+ uint8_t scene_change_cur_intra_perent_threshold; // =192. scene change current intra percentage threshold
701+ uint8_t ip_average_coeff; // lowdelay ? 0 : 128
702+ uint8_t min_qp_adjustment; // Minimum QP increase step
703+ uint8_t timing_budget_check; // Flag indicating if kernel will check timing budget.
704+ int8_t roi_qp_delta_i8[4]; // Application specified ROI QP Adjustment for Zone0, Zone1, Zone2 and Zone3.
705+ uint8_t cqp_qp_value; // Application specified target QP in BRC_ICQ mode
706+ uint8_t cqp_fqp; // Application specified fine position in BRC_ICQ mode
707+ uint8_t hme_detection_enable; // 0: default, 1: HuC BRC kernel requires information from HME detection kernel output
708+ uint8_t hme_cost_enable; // 0: default, 1: driver provides HME cost table
709+ uint8_t disable_pframe_8x8_transform;
710+ uint8_t skl_cabac_wa_enable;
711+ uint8_t roi_source; // =0: disable, 1: ROIMap from HME Static Region or from App dirty rectangle, 2: ROIMap from App
712+ uint8_t slice_size_consertative_threshold; // =0, 0: do not set conservative threshold (suggested for video conference) 1: set conservative threshold for non-video conference
713+ uint16_t max_target_slice_size; // default: 1498, max target slice size from app DDI
714+ uint16_t max_num_slice_allowed; // computed by driver based on level idc
715+ uint16_t second_level_batchbuffer_size; // second level batch buffer (SLBB) size in bytes, the input buffer will contain two SLBBs A and B, A followed by B, A and B have the same structure.
716+ uint16_t second_level_batchbuffer_b_offset; // offset in bytes from the beginning of the input buffer, it points to the start of SLBB B, set by driver for skip frame support
717+ uint16_t avc_img_state_offset; // offset in bytes from the beginning of SLBB A
718+
719+ /* HME distortion based QP adjustment */
720+ uint16_t ave_hme_dist;
721+ uint8_t hme_dist_available; // 0: disabled, 1: enabled
722+
723+ uint8_t pad1[63];
724+};
725+
726+struct gen9_vdenc_status
727+{
728+ uint32_t bytes_per_frame;
729+};
730+
731+struct gen9_vdenc_context
732+{
733+ uint32_t frame_width_in_mbs;
734+ uint32_t frame_height_in_mbs;
735+ uint32_t frame_width; // frame_width_in_mbs * 16
736+ uint32_t frame_height; // frame_height_in_mbs * 16
737+ uint32_t down_scaled_width_in_mb4x;
738+ uint32_t down_scaled_height_in_mb4x;
739+ uint32_t down_scaled_width_4x; // down_scaled_width_in_mb4x * 16
740+ uint32_t down_scaled_height_4x; // down_scaled_height_in_mbs * 16
741+
742+ uint32_t target_bit_rate; /* in kbps */
743+ uint32_t max_bit_rate; /* in kbps */
744+ uint32_t min_bit_rate; /* in kbps */
745+ uint64_t init_vbv_buffer_fullness_in_bit;
746+ uint64_t vbv_buffer_size_in_bit;
747+ uint32_t frames_per_100s;
748+ uint32_t gop_size;
749+ uint32_t ref_dist;
750+ uint32_t brc_target_size;
751+ double brc_init_current_target_buf_full_in_bits;
752+ double brc_init_reset_input_bits_per_frame;
753+ uint32_t brc_init_reset_buf_size_in_bits;
754+ uint32_t brc_init_previous_target_buf_full_in_bits;
755+
756+ uint8_t mode_cost[12];
757+ uint8_t mv_cost[8];
758+ uint8_t hme_mv_cost[8];
759+
760+ uint32_t num_roi;
761+ uint32_t max_delta_qp;
762+ uint32_t min_delta_qp;
763+ struct intel_roi roi[3];
764+
765+ uint32_t brc_initted:1;
766+ uint32_t brc_need_reset:1;
767+ uint32_t is_low_delay:1;
768+ uint32_t brc_enabled:1;
769+ uint32_t internal_rate_mode:4;
770+ uint32_t current_pass:4;
771+ uint32_t num_passes:4;
772+ uint32_t is_first_pass:1;
773+ uint32_t is_last_pass:1;
774+
775+ uint32_t vdenc_streamin_enable:1;
776+ uint32_t vdenc_pak_threshold_check_enable:1;
777+ uint32_t pad1:1;
778+ uint32_t transform_8x8_mode_enable:1;
779+ uint32_t frame_type:2;
780+
781+ uint32_t mb_brc_enabled:1;
782+ uint32_t pad0:31;
783+
784+ struct i965_gpe_resource brc_init_reset_dmem_res;
785+ struct i965_gpe_resource brc_history_buffer_res;
786+ struct i965_gpe_resource brc_stream_in_res;
787+ struct i965_gpe_resource brc_stream_out_res;
788+ struct i965_gpe_resource huc_dummy_res;
789+
790+ struct i965_gpe_resource brc_update_dmem_res[NUM_OF_BRC_PAK_PASSES];
791+ struct i965_gpe_resource vdenc_statistics_res;
792+ struct i965_gpe_resource pak_statistics_res;
793+ struct i965_gpe_resource vdenc_avc_image_state_res;
794+ struct i965_gpe_resource hme_detection_summary_buffer_res;
795+ struct i965_gpe_resource brc_constant_data_res;
796+ struct i965_gpe_resource second_level_batch_res;
797+
798+ struct i965_gpe_resource huc_status_res;
799+ struct i965_gpe_resource huc_status2_res;
800+
801+ struct i965_gpe_resource recon_surface_res;
802+ struct i965_gpe_resource scaled_4x_recon_surface_res;
803+ struct i965_gpe_resource post_deblocking_output_res;
804+ struct i965_gpe_resource pre_deblocking_output_res;
805+ struct i965_gpe_resource list_reference_res[16];
806+ struct i965_gpe_resource list_scaled_4x_reference_res[16];
807+ struct i965_gpe_resource uncompressed_input_surface_res; // Input
808+
809+ struct {
810+ struct i965_gpe_resource res; // Output
811+ uint32_t start_offset;
812+ uint32_t end_offset;
813+ } compressed_bitstream;
814+
815+ struct i965_gpe_resource mfx_intra_row_store_scratch_res; // MFX internal buffer
816+ struct i965_gpe_resource mfx_deblocking_filter_row_store_scratch_res; // MFX internal buffer
817+ struct i965_gpe_resource mfx_bsd_mpc_row_store_scratch_res; // MFX internal buffer
818+ struct i965_gpe_resource vdenc_row_store_scratch_res; // VDENC internal buffer
819+
820+ struct i965_gpe_resource vdenc_streamin_res;
821+
822+ uint32_t num_refs[2];
823+ uint32_t list_ref_idx[2][32];
824+
825+ struct {
826+ struct i965_gpe_resource res;
827+ uint32_t base_offset;
828+ uint32_t size;
829+ uint32_t bytes_per_frame_offset;
830+ } status_bffuer;
831+};
832+
833+struct huc_pipe_mode_select_parameter
834+{
835+ uint32_t huc_stream_object_enable;
836+ uint32_t indirect_stream_out_enable;
837+ uint32_t media_soft_reset_counter;
838+};
839+
840+struct huc_imem_state_parameter
841+{
842+ uint32_t huc_firmware_descriptor;
843+};
844+
845+struct huc_dmem_state_parameter
846+{
847+ struct i965_gpe_resource *huc_data_source_res;
848+ uint32_t huc_data_destination_base_address;
849+ uint32_t huc_data_length;
850+};
851+
852+struct huc_cfg_state_parameter
853+{
854+ uint32_t force_reset;
855+};
856+
857+
858+struct huc_virtual_addr_parameter
859+{
860+ struct {
861+ struct i965_gpe_resource *huc_surface_res;
862+ uint32_t is_target;
863+ } regions[16];
864+};
865+
866+struct huc_ind_obj_base_addr_parameter
867+{
868+ struct i965_gpe_resource *huc_indirect_stream_in_object_res;
869+ struct i965_gpe_resource *huc_indirect_stream_out_object_res;
870+};
871+
872+struct huc_stream_object_parameter
873+{
874+ uint32_t indirect_stream_in_data_length;
875+ uint32_t indirect_stream_in_start_address;
876+ uint32_t indirect_stream_out_start_address;
877+ uint32_t huc_bitstream_enable;
878+ uint32_t length_mode;
879+ uint32_t stream_out;
880+ uint32_t emulation_prevention_byte_removal;
881+ uint32_t start_code_search_engine;
882+ uint8_t start_code_byte2;
883+ uint8_t start_code_byte1;
884+ uint8_t start_code_byte0;
885+};
886+
887+struct huc_start_parameter
888+{
889+ uint32_t last_stream_object;
890+};
891+
892+struct vd_pipeline_flush_parameter
893+{
894+ uint32_t hevc_pipeline_done;
895+ uint32_t vdenc_pipeline_done;
896+ uint32_t mfl_pipeline_done;
897+ uint32_t mfx_pipeline_done;
898+ uint32_t vd_command_message_parser_done;
899+ uint32_t hevc_pipeline_command_flush;
900+ uint32_t vdenc_pipeline_command_flush;
901+ uint32_t mfl_pipeline_command_flush;
902+ uint32_t mfx_pipeline_command_flush;
903+};
904+
905+extern Bool
906+gen9_vdenc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
907+
908+#endif /* GEN9_VDENC_H */
--- a/src/i965_defines.h
+++ b/src/i965_defines.h
@@ -918,4 +918,62 @@
918918 #define HCP_CODEC_SELECT_DECODE 0
919919 #define HCP_CODEC_SELECT_ENCODE 1
920920
921+/* VDEnc/HuC */
922+/* HuC commands */
923+#define HUC(command) \
924+ (3 << 29 | \
925+ 2 << 27 | \
926+ 11 << 23 | \
927+ (command << 16))
928+
929+#define HUC_PIPE_MODE_SELECT HUC(0)
930+#define HUC_IMEM_STATE HUC(1)
931+#define HUC_DMEM_STATE HUC(2)
932+#define HUC_CFG_STATE HUC(3)
933+#define HUC_VIRTUAL_ADDR_STATE HUC(4)
934+#define HUC_IND_OBJ_BASE_ADDR_STATE HUC(5)
935+#define HUC_STREAM_OBJECT HUC(32)
936+#define HUC_START HUC(33)
937+
938+/* HuC registers */
939+#define VCS0_HUC_STATUS 0xD000
940+#define VCS0_HUC_STATUS2 0xD3B0
941+
942+/* VDEnc commands */
943+#define VDENC(opcode, sub_opcode_a, sub_opcode_b) \
944+ (3 << 29 | \
945+ 2 << 27 | \
946+ opcode << 23 | \
947+ sub_opcode_a << 21 | \
948+ sub_opcode_b << 16)
949+
950+#define VD_PIPELINE_FLUSH VDENC(15, 0, 0)
951+
952+#define VDENC_PIPE_MODE_SELECT VDENC(1, 0, 0)
953+#define VDENC_SRC_SURFACE_STATE VDENC(1, 0, 1)
954+#define VDENC_REF_SURFACE_STATE VDENC(1, 0, 2)
955+#define VDENC_DS_REF_SURFACE_STATE VDENC(1, 0, 3)
956+#define VDENC_PIPE_BUF_ADDR_STATE VDENC(1, 0, 4)
957+#define VDENC_IMG_STATE VDENC(1, 0, 5)
958+#define VDENC_CONST_QPT_STATE VDENC(1, 0, 6)
959+#define VDENC_WALKER_STATE VDENC(1, 0, 7)
960+
961+#define VDENC_CODEC_AVC 2
962+
963+#define VDENC_SURFACE_YUV422 0
964+#define VDENC_SURFACE_RGBA444 1
965+#define VDENC_SURFACE_YUV444 2
966+#define VDENC_SURFACE_Y8_UNORM 3
967+#define VDENC_SURFACE_PLANAR_420_8 4
968+#define VDENC_SURFACE_YCBCR_SWAP_Y 5
969+#define VDENC_SURFACE_YCBCR_SWAP_UV 6
970+#define VDENC_SURFACE_YCBCR_SWAP_UVY 7
971+#define VDENC_SURFACE_P010 8
972+#define VDENC_SURFACE_RGBA10 9
973+#define VDENC_SURFACE_Y410 10
974+#define VDENC_SURFACE_NV21 11
975+
976+#define MFC_BITSTREAM_BYTECOUNT_FRAME_REG 0x128A0
977+#define MFC_IMAGE_STATUS_CTRL_REG 0x128B8
978+
921979 #endif /* _I965_DEFINES_H_ */
--- a/src/i965_encoder.c
+++ b/src/i965_encoder.c
@@ -40,6 +40,7 @@
4040 #include "gen6_vme.h"
4141 #include "gen6_mfc.h"
4242 #include "gen9_mfc.h"
43+#include "gen9_vdenc.h"
4344
4445 extern Bool gen6_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
4546 extern Bool gen6_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
@@ -801,10 +802,14 @@ gen8_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
801802 struct hw_context *
802803 gen9_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
803804 {
804- if (obj_config->profile == VAProfileHEVCMain) {
805- return intel_enc_hw_context_init(ctx, obj_config, gen9_vme_context_init, gen9_hcpe_context_init);
806- } else if (obj_config->profile == VAProfileJPEGBaseline)
807- return intel_enc_hw_context_init(ctx, obj_config, gen8_vme_context_init, gen8_mfc_context_init);
808- else
809- return intel_enc_hw_context_init(ctx, obj_config, gen9_vme_context_init, gen9_mfc_context_init);
805+ if (obj_config->entrypoint == VAEntrypointEncSliceLP) {
806+ return intel_enc_hw_context_init(ctx, obj_config, NULL, gen9_vdenc_context_init);
807+ } else {
808+ if (obj_config->profile == VAProfileHEVCMain) {
809+ return intel_enc_hw_context_init(ctx, obj_config, gen9_vme_context_init, gen9_hcpe_context_init);
810+ } else if (obj_config->profile == VAProfileJPEGBaseline)
811+ return intel_enc_hw_context_init(ctx, obj_config, gen8_vme_context_init, gen8_mfc_context_init);
812+ else
813+ return intel_enc_hw_context_init(ctx, obj_config, gen9_vme_context_init, gen9_mfc_context_init);
814+ }
810815 }
--- a/src/i965_encoder.h
+++ b/src/i965_encoder.h
@@ -36,6 +36,21 @@
3636 #include "i965_structs.h"
3737 #include "i965_drv_video.h"
3838
39+#define I965_BRC_NONE 0
40+#define I965_BRC_CBR 1
41+#define I965_BRC_VBR 2
42+#define I965_BRC_CQP 3
43+
44+struct intel_roi
45+{
46+ short left;
47+ short right;
48+ short top;
49+ short bottom;
50+
51+ char value;
52+};
53+
3954 struct intel_encoder_context
4055 {
4156 struct hw_context base;
--- a/src/intel_media.h
+++ b/src/intel_media.h
@@ -68,8 +68,18 @@ struct gen_vp9_surface
6868 dri_bo *motion_vector_temporal_bo;
6969 };
7070
71+typedef struct vdenc_avc_surface VDEncAvcSurface;
72+struct vdenc_avc_surface
73+{
74+ VADriverContextP ctx;
75+ VASurfaceID scaled_4x_surface_id;
76+ struct object_surface *scaled_4x_surface_obj;
77+};
78+
7179 extern void gen_free_hevc_surface(void **data);
7280
7381 extern void gen_free_vp9_surface(void **data);
7482
83+extern void vdenc_free_avc_surface(void **data);
84+
7585 #endif /* INTEL_MEDIA_H */
--- a/src/intel_media_common.c
+++ b/src/intel_media_common.c
@@ -128,3 +128,36 @@ void gen_free_vp9_surface(void **data)
128128
129129 pthread_mutex_unlock(&free_vp9_surface_lock);
130130 }
131+
132+extern VAStatus
133+i965_DestroySurfaces(VADriverContextP ctx,
134+ VASurfaceID *surface_list,
135+ int num_surfaces);
136+
137+static pthread_mutex_t free_vdenc_avc_surface_lock = PTHREAD_MUTEX_INITIALIZER;
138+
139+void
140+vdenc_free_avc_surface(void **data)
141+{
142+ VDEncAvcSurface *avc_surface;
143+
144+ pthread_mutex_lock(&free_vdenc_avc_surface_lock);
145+
146+ avc_surface = *data;
147+
148+ if (!avc_surface) {
149+ pthread_mutex_unlock(&free_vdenc_avc_surface_lock);
150+ return;
151+ }
152+
153+ if (avc_surface->scaled_4x_surface_obj) {
154+ i965_DestroySurfaces(avc_surface->ctx, &avc_surface->scaled_4x_surface_id, 1);
155+ avc_surface->scaled_4x_surface_id = VA_INVALID_SURFACE;
156+ avc_surface->scaled_4x_surface_obj = NULL;
157+ }
158+
159+ free(avc_surface);
160+ *data = NULL;
161+
162+ pthread_mutex_unlock(&free_vdenc_avc_surface_lock);
163+}