1
0
Fork 0

avcodec/wavpack: reduce extra dereferencing inside loops

This commit is contained in:
Paul B Mahol 2023-08-15 22:13:07 +02:00
parent 8653dcaf7d
commit 8f7850a22e
1 changed files with 66 additions and 60 deletions

View File

@ -794,71 +794,73 @@ static inline int wv_unpack_stereo(WavpackFrameContext *s, GetBitContext *gb,
if (last)
break;
for (i = 0; i < s->terms; i++) {
t = s->decorr[i].value;
Decorr *decorr = &s->decorr[i];
t = decorr->value;
if (t > 0) {
if (t > 8) {
if (t & 1) {
A = 2U * s->decorr[i].samplesA[0] - s->decorr[i].samplesA[1];
B = 2U * s->decorr[i].samplesB[0] - s->decorr[i].samplesB[1];
A = 2U * decorr->samplesA[0] - decorr->samplesA[1];
B = 2U * decorr->samplesB[0] - decorr->samplesB[1];
} else {
A = (int)(3U * s->decorr[i].samplesA[0] - s->decorr[i].samplesA[1]) >> 1;
B = (int)(3U * s->decorr[i].samplesB[0] - s->decorr[i].samplesB[1]) >> 1;
A = (int)(3U * decorr->samplesA[0] - decorr->samplesA[1]) >> 1;
B = (int)(3U * decorr->samplesB[0] - decorr->samplesB[1]) >> 1;
}
s->decorr[i].samplesA[1] = s->decorr[i].samplesA[0];
s->decorr[i].samplesB[1] = s->decorr[i].samplesB[0];
j = 0;
decorr->samplesA[1] = decorr->samplesA[0];
decorr->samplesB[1] = decorr->samplesB[0];
j = 0;
} else {
A = s->decorr[i].samplesA[pos];
B = s->decorr[i].samplesB[pos];
A = decorr->samplesA[pos];
B = decorr->samplesB[pos];
j = (pos + t) & 7;
}
if (type != AV_SAMPLE_FMT_S16P) {
L2 = L + ((s->decorr[i].weightA * (int64_t)A + 512) >> 10);
R2 = R + ((s->decorr[i].weightB * (int64_t)B + 512) >> 10);
L2 = L + ((decorr->weightA * (int64_t)A + 512) >> 10);
R2 = R + ((decorr->weightB * (int64_t)B + 512) >> 10);
} else {
L2 = L + (unsigned)((int)(s->decorr[i].weightA * (unsigned)A + 512) >> 10);
R2 = R + (unsigned)((int)(s->decorr[i].weightB * (unsigned)B + 512) >> 10);
L2 = L + (unsigned)((int)(decorr->weightA * (unsigned)A + 512) >> 10);
R2 = R + (unsigned)((int)(decorr->weightB * (unsigned)B + 512) >> 10);
}
if (A && L)
s->decorr[i].weightA -= ((((L ^ A) >> 30) & 2) - 1) * s->decorr[i].delta;
decorr->weightA -= ((((L ^ A) >> 30) & 2) - 1) * decorr->delta;
if (B && R)
s->decorr[i].weightB -= ((((R ^ B) >> 30) & 2) - 1) * s->decorr[i].delta;
s->decorr[i].samplesA[j] = L = L2;
s->decorr[i].samplesB[j] = R = R2;
decorr->weightB -= ((((R ^ B) >> 30) & 2) - 1) * decorr->delta;
decorr->samplesA[j] = L = L2;
decorr->samplesB[j] = R = R2;
} else if (t == -1) {
if (type != AV_SAMPLE_FMT_S16P)
L2 = L + ((s->decorr[i].weightA * (int64_t)s->decorr[i].samplesA[0] + 512) >> 10);
L2 = L + ((decorr->weightA * (int64_t)decorr->samplesA[0] + 512) >> 10);
else
L2 = L + (unsigned)((int)(s->decorr[i].weightA * (unsigned)s->decorr[i].samplesA[0] + 512) >> 10);
UPDATE_WEIGHT_CLIP(s->decorr[i].weightA, s->decorr[i].delta, s->decorr[i].samplesA[0], L);
L2 = L + (unsigned)((int)(decorr->weightA * (unsigned)decorr->samplesA[0] + 512) >> 10);
UPDATE_WEIGHT_CLIP(decorr->weightA, decorr->delta, decorr->samplesA[0], L);
L = L2;
if (type != AV_SAMPLE_FMT_S16P)
R2 = R + ((s->decorr[i].weightB * (int64_t)L2 + 512) >> 10);
R2 = R + ((decorr->weightB * (int64_t)L2 + 512) >> 10);
else
R2 = R + (unsigned)((int)(s->decorr[i].weightB * (unsigned)L2 + 512) >> 10);
UPDATE_WEIGHT_CLIP(s->decorr[i].weightB, s->decorr[i].delta, L2, R);
R = R2;
s->decorr[i].samplesA[0] = R;
R2 = R + (unsigned)((int)(decorr->weightB * (unsigned)L2 + 512) >> 10);
UPDATE_WEIGHT_CLIP(decorr->weightB, decorr->delta, L2, R);
R = R2;
decorr->samplesA[0] = R;
} else {
if (type != AV_SAMPLE_FMT_S16P)
R2 = R + ((s->decorr[i].weightB * (int64_t)s->decorr[i].samplesB[0] + 512) >> 10);
R2 = R + ((decorr->weightB * (int64_t)decorr->samplesB[0] + 512) >> 10);
else
R2 = R + (unsigned)((int)(s->decorr[i].weightB * (unsigned)s->decorr[i].samplesB[0] + 512) >> 10);
UPDATE_WEIGHT_CLIP(s->decorr[i].weightB, s->decorr[i].delta, s->decorr[i].samplesB[0], R);
R2 = R + (unsigned)((int)(decorr->weightB * (unsigned)decorr->samplesB[0] + 512) >> 10);
UPDATE_WEIGHT_CLIP(decorr->weightB, decorr->delta, decorr->samplesB[0], R);
R = R2;
if (t == -3) {
R2 = s->decorr[i].samplesA[0];
s->decorr[i].samplesA[0] = R;
R2 = decorr->samplesA[0];
decorr->samplesA[0] = R;
}
if (type != AV_SAMPLE_FMT_S16P)
L2 = L + ((s->decorr[i].weightA * (int64_t)R2 + 512) >> 10);
L2 = L + ((decorr->weightA * (int64_t)R2 + 512) >> 10);
else
L2 = L + (unsigned)((int)(s->decorr[i].weightA * (unsigned)R2 + 512) >> 10);
UPDATE_WEIGHT_CLIP(s->decorr[i].weightA, s->decorr[i].delta, R2, L);
L = L2;
s->decorr[i].samplesB[0] = L;
L2 = L + (unsigned)((int)(decorr->weightA * (unsigned)R2 + 512) >> 10);
UPDATE_WEIGHT_CLIP(decorr->weightA, decorr->delta, R2, L);
L = L2;
decorr->samplesB[0] = L;
}
}
@ -920,25 +922,27 @@ static inline int wv_unpack_mono(WavpackFrameContext *s, GetBitContext *gb,
if (last)
break;
for (i = 0; i < s->terms; i++) {
t = s->decorr[i].value;
Decorr *decorr = &s->decorr[i];
t = decorr->value;
if (t > 8) {
if (t & 1)
A = 2U * s->decorr[i].samplesA[0] - s->decorr[i].samplesA[1];
A = 2U * decorr->samplesA[0] - decorr->samplesA[1];
else
A = (int)(3U * s->decorr[i].samplesA[0] - s->decorr[i].samplesA[1]) >> 1;
s->decorr[i].samplesA[1] = s->decorr[i].samplesA[0];
j = 0;
A = (int)(3U * decorr->samplesA[0] - decorr->samplesA[1]) >> 1;
decorr->samplesA[1] = decorr->samplesA[0];
j = 0;
} else {
A = s->decorr[i].samplesA[pos];
A = decorr->samplesA[pos];
j = (pos + t) & 7;
}
if (type != AV_SAMPLE_FMT_S16P)
S = T + ((s->decorr[i].weightA * (int64_t)A + 512) >> 10);
S = T + ((decorr->weightA * (int64_t)A + 512) >> 10);
else
S = T + (unsigned)((int)(s->decorr[i].weightA * (unsigned)A + 512) >> 10);
S = T + (unsigned)((int)(decorr->weightA * (unsigned)A + 512) >> 10);
if (A && T)
s->decorr[i].weightA -= ((((T ^ A) >> 30) & 2) - 1) * s->decorr[i].delta;
s->decorr[i].samplesA[j] = T = S;
decorr->weightA -= ((((T ^ A) >> 30) & 2) - 1) * decorr->delta;
decorr->samplesA[j] = T = S;
}
pos = (pos + 1) & 7;
crc = crc * 3 + S;
@ -1219,36 +1223,38 @@ static int wavpack_decode_block(AVCodecContext *avctx, int block_no,
}
t = 0;
for (i = s->terms - 1; (i >= 0) && (t < size); i--) {
if (s->decorr[i].value > 8) {
s->decorr[i].samplesA[0] =
Decorr *decorr = &s->decorr[i];
if (decorr->value > 8) {
decorr->samplesA[0] =
wp_exp2(bytestream2_get_le16(&gb));
s->decorr[i].samplesA[1] =
decorr->samplesA[1] =
wp_exp2(bytestream2_get_le16(&gb));
if (s->stereo_in) {
s->decorr[i].samplesB[0] =
decorr->samplesB[0] =
wp_exp2(bytestream2_get_le16(&gb));
s->decorr[i].samplesB[1] =
decorr->samplesB[1] =
wp_exp2(bytestream2_get_le16(&gb));
t += 4;
t += 4;
}
t += 4;
} else if (s->decorr[i].value < 0) {
s->decorr[i].samplesA[0] =
} else if (decorr->value < 0) {
decorr->samplesA[0] =
wp_exp2(bytestream2_get_le16(&gb));
s->decorr[i].samplesB[0] =
decorr->samplesB[0] =
wp_exp2(bytestream2_get_le16(&gb));
t += 4;
t += 4;
} else {
for (j = 0; j < s->decorr[i].value; j++) {
s->decorr[i].samplesA[j] =
for (j = 0; j < decorr->value; j++) {
decorr->samplesA[j] =
wp_exp2(bytestream2_get_le16(&gb));
if (s->stereo_in) {
s->decorr[i].samplesB[j] =
decorr->samplesB[j] =
wp_exp2(bytestream2_get_le16(&gb));
}
}
t += s->decorr[i].value * 2 * (s->stereo_in + 1);
t += decorr->value * 2 * (s->stereo_in + 1);
}
}
got_samples = 1;