Another transmute tweak. (#610)
* Another transmute tweak. * Changelog tweak.
This commit is contained in:
parent
06b37ea7ad
commit
fdf15f0e05
|
@ -3,6 +3,8 @@ This documents the main changes to the `candle` crate.
|
||||||
|
|
||||||
## Unreleased
|
## Unreleased
|
||||||
### Added
|
### Added
|
||||||
|
- Support more quantized types, e.g. Q2K, Q4K, Q5K...
|
||||||
|
[586](https://github.com/huggingface/candle/pull/586).
|
||||||
- Add pose estimation to the yolo example
|
- Add pose estimation to the yolo example
|
||||||
[589](https://github.com/huggingface/candle/pull/589).
|
[589](https://github.com/huggingface/candle/pull/589).
|
||||||
- Api to write GGUF files
|
- Api to write GGUF files
|
||||||
|
|
|
@ -651,7 +651,6 @@ impl GgmlType for BlockQ3K {
|
||||||
let mut aux32: [i32; 8] = [0; 8];
|
let mut aux32: [i32; 8] = [0; 8];
|
||||||
|
|
||||||
let mut auxs: [u32; 4] = [0; 4];
|
let mut auxs: [u32; 4] = [0; 4];
|
||||||
let mut scales: &[i8; 16];
|
|
||||||
|
|
||||||
for (x, y) in xs.iter().zip(ys.iter()) {
|
for (x, y) in xs.iter().zip(ys.iter()) {
|
||||||
let mut q3: &[u8] = &x.qs;
|
let mut q3: &[u8] = &x.qs;
|
||||||
|
@ -728,14 +727,14 @@ impl GgmlType for BlockQ3K {
|
||||||
auxs[0] = (auxs[0] & KMASK2) | (((tmp) & KMASK1) << 4);
|
auxs[0] = (auxs[0] & KMASK2) | (((tmp) & KMASK1) << 4);
|
||||||
auxs[1] = (auxs[1] & KMASK2) | (((tmp >> 2) & KMASK1) << 4);
|
auxs[1] = (auxs[1] & KMASK2) | (((tmp >> 2) & KMASK1) << 4);
|
||||||
|
|
||||||
scales = unsafe { std::mem::transmute::<&mut [u32; 4], &mut [i8; 16]>(&mut auxs) };
|
for aux in auxs {
|
||||||
|
for scale in aux.to_le_bytes() {
|
||||||
for scale in scales {
|
let scale = i8::from_be_bytes([scale]);
|
||||||
for l in 0..8 {
|
for l in 0..8 {
|
||||||
aux16[l] = q8[l] as i16 * a[l] as i16;
|
aux16[l] = q8[l] as i16 * a[l] as i16;
|
||||||
}
|
}
|
||||||
for l in 0..8 {
|
for l in 0..8 {
|
||||||
aux32[l] += (*scale as i32 - 32) * aux16[l] as i32;
|
aux32[l] += (scale as i32 - 32) * aux16[l] as i32;
|
||||||
}
|
}
|
||||||
q8 = &q8[8..];
|
q8 = &q8[8..];
|
||||||
a = &mut a[8..];
|
a = &mut a[8..];
|
||||||
|
@ -744,12 +743,12 @@ impl GgmlType for BlockQ3K {
|
||||||
aux16[l] = q8[l] as i16 * a[l] as i16;
|
aux16[l] = q8[l] as i16 * a[l] as i16;
|
||||||
}
|
}
|
||||||
for l in 0..8 {
|
for l in 0..8 {
|
||||||
aux32[l] += (*scale as i32 - 32) * aux16[l] as i32;
|
aux32[l] += (scale as i32 - 32) * aux16[l] as i32;
|
||||||
}
|
}
|
||||||
q8 = &q8[8..];
|
q8 = &q8[8..];
|
||||||
a = &mut a[8..];
|
a = &mut a[8..];
|
||||||
}
|
}
|
||||||
|
}
|
||||||
let d = x.d.to_f32() * y.d;
|
let d = x.d.to_f32() * y.d;
|
||||||
for l in 0..8 {
|
for l in 0..8 {
|
||||||
sums[l] += d * aux32[l] as f32;
|
sums[l] += d * aux32[l] as f32;
|
||||||
|
|
Loading…
Reference in New Issue