@@ -119,10 +119,14 @@ func (t *scaleableClientTrack) isKeyframe(vp9 *codecs.VP9Packet) bool {
119
119
if len (vp9 .Payload ) < 1 {
120
120
return false
121
121
}
122
+ // According to VP9 RFC, B bit set to 1 if packet is the beginning of a new VP9 frame
123
+ // For keyframes, P bit should be 0 (no inter-picture prediction)
122
124
if ! vp9 .B {
123
125
return false
124
126
}
125
127
128
+ // Check for keyframe by examining the VP9 payload
129
+ // This identifies the frame as an intra-frame (keyframe)
126
130
if (vp9 .Payload [0 ] & 0xc0 ) != 0x80 {
127
131
return false
128
132
}
@@ -172,31 +176,47 @@ func (t *scaleableClientTrack) push(p *rtp.Packet, _ QualityLevel) {
172
176
currentTID := t .tid
173
177
currentSID := t .sid
174
178
175
- // check if possible to scale up/down temporal layer
179
+ // Temporal layer switching according to VP9 RFC
180
+ // U bit indicates a switching up point where we can safely switch to a higher frame rate
176
181
if t .tid < targetTID {
177
182
if vp9Packet .U && vp9Packet .B && currentTID < vp9Packet .TID && vp9Packet .TID <= targetTID {
178
- // scale temporal up
183
+ // Scale temporal up - U bit confirms it's safe to switch up
179
184
t .tid = vp9Packet .TID
180
185
currentTID = t .tid
181
186
}
182
187
} else if t .tid > targetTID {
183
188
if vp9Packet .E {
184
- // scale temporal down
189
+ // Scale temporal down - safe at the end of a frame
185
190
t .tid = vp9Packet .TID
186
191
}
187
192
}
188
193
189
- // check if possible to scale up spatial layer
190
-
194
+ // Spatial layer switching according to VP9 RFC
195
+ // D bit indicates inter-layer dependency
191
196
if currentSID < targetSID {
192
- if ! vp9Packet .P && vp9Packet .B && currentSID < vp9Packet .SID && vp9Packet .SID <= targetSID {
193
- // scale spatial up
194
- t .sid = vp9Packet .SID
195
- currentSID = t .sid
197
+ // Switching up to higher spatial layer
198
+ // For scaling up, we should ensure this is the start of a frame (B=1)
199
+ // and the layer doesn't depend on layers we might have skipped
200
+ if vp9Packet .B && currentSID < vp9Packet .SID && vp9Packet .SID <= targetSID {
201
+ // For non-base layers, check the D bit to understand dependencies
202
+ if vp9Packet .SID > 0 && vp9Packet .D {
203
+ // This layer depends on the previous spatial layer
204
+ // Only switch if we have the previous layer
205
+ if vp9Packet .SID == currentSID + 1 {
206
+ t .sid = vp9Packet .SID
207
+ currentSID = t .sid
208
+ }
209
+ } else {
210
+ // This layer doesn't depend on previous spatial layer (D=0) or is base layer
211
+ // Safe to switch to this layer
212
+ t .sid = vp9Packet .SID
213
+ currentSID = t .sid
214
+ }
196
215
}
197
216
} else if currentSID > targetSID {
217
+ // Switching down to lower spatial layer
218
+ // Safe to switch down at the end of a frame
198
219
if vp9Packet .E {
199
- // scale spatsial down
200
220
t .sid = vp9Packet .SID
201
221
}
202
222
}
@@ -205,16 +225,36 @@ func (t *scaleableClientTrack) push(p *rtp.Packet, _ QualityLevel) {
205
225
t .setLastQuality (quality )
206
226
}
207
227
228
+ // Determine if packet should be dropped
229
+ shouldDrop := false
230
+
231
+ // Drop packets higher than our target temporal layer
232
+ if vp9Packet .TID > currentTID {
233
+ shouldDrop = true
234
+ }
235
+
236
+ // Drop packets higher than our target spatial layer
237
+ if vp9Packet .SID > currentSID {
238
+ shouldDrop = true
239
+ }
240
+
241
+ // Drop packets from non-reference frames for higher spatial layers that we're not using
242
+ // Similar to Z bit concept in the RFC
208
243
sidNonReference := (p .Payload [0 ] & 0x01 ) != 0
209
- if currentTID < vp9Packet .TID || currentSID < vp9Packet .SID || (vp9Packet .SID > currentSID && sidNonReference ) {
210
- // t.client.log.Infof("scalabletrack: packet ", p.SequenceNumber, " is dropped because of currentTID ", currentTID, " < vp9Packet.TID", vp9Packet.TID)
244
+ if vp9Packet .SID > currentSID && sidNonReference {
245
+ shouldDrop = true
246
+ }
247
+
248
+ if shouldDrop {
211
249
ok := t .packetmap .Drop (p .SequenceNumber , vp9Packet .PictureID )
212
250
if ok {
213
251
return
214
252
}
215
253
}
216
254
217
- // mark packet as a last spatial layer packet
255
+ // Mark packet as a last spatial layer packet
256
+ // According to RFC: Marker bit (M) MUST be set to 1 for the final packet of the
257
+ // highest spatial layer frame (the final packet of the picture)
218
258
if vp9Packet .E && currentSID == vp9Packet .SID && targetSID <= currentSID {
219
259
p .Marker = true
220
260
}
@@ -236,9 +276,9 @@ func (t *scaleableClientTrack) push(p *rtp.Packet, _ QualityLevel) {
236
276
237
277
p .SequenceNumber = newseqno
238
278
239
- // if quality is none we need to send blank frame
240
- // make sure the player is paused when the quality is none.
241
- // quality none only possible when the video is not displayed
279
+ // If quality is none we need to send blank frame
280
+ // Make sure the player is paused when the quality is none.
281
+ // Quality none only possible when the video is not displayed
242
282
if quality == QualityNone {
243
283
if ok := t .packetmap .Drop (p .SequenceNumber , vp9Packet .PictureID ); ok {
244
284
return
0 commit comments