mirror of https://github.com/tracel-ai/burn.git
continuous to contiguous (#511)
This commit is contained in:
parent
62ab554df8
commit
4b60c0e7a0
|
@ -58,7 +58,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_sum_mid_dim_3d_non_continuous_1() {
|
||||
fn test_should_sum_mid_dim_3d_non_contiguous_1() {
|
||||
let tensor = TestTensor::from_data([
|
||||
[[2.0, 4.0, 1.0], [7.0, -5.0, 3.0]],
|
||||
[[3.0, 1.0, 2.0], [4.0, 2.0, 3.0]],
|
||||
|
@ -73,7 +73,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn test_should_sum_mid_dim_3d_non_continuous_2() {
|
||||
fn test_should_sum_mid_dim_3d_non_contiguous_2() {
|
||||
let tensor = TestTensor::from_data([
|
||||
[[2.0, 4.0, 1.0], [7.0, -5.0, 3.0]],
|
||||
[[3.0, 1.0, 2.0], [4.0, 2.0, 3.0]],
|
||||
|
|
|
@ -61,7 +61,7 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn should_support_partial_sliceing_3d_non_continuous() {
|
||||
fn should_support_partial_sliceing_3d_non_contiguous() {
|
||||
let tensor = TestTensor::from_floats([
|
||||
[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]],
|
||||
[[6.0, 7.0, 8.0], [9.0, 10.0, 11.0]],
|
||||
|
|
|
@ -2,7 +2,7 @@ use burn_tensor::{backend::Backend, Distribution, Shape, Tensor};
|
|||
use burn_wgpu::{
|
||||
benchmark::Benchmark,
|
||||
kernel::matmul::{
|
||||
continuous, continuous_vectorized, matmul_mem_coalescing_default, matmul_naive_default,
|
||||
contiguous, contiguous_vectorized, matmul_mem_coalescing_default, matmul_naive_default,
|
||||
tile, tile_vectorized,
|
||||
},
|
||||
run_benchmark, GraphicsApi, WgpuBackend, WgpuDevice,
|
||||
|
@ -75,8 +75,8 @@ macro_rules! benchmark {
|
|||
benchmark!(NaiveMatmul, matmul_naive_default);
|
||||
benchmark!(MemCoalescingMatmul, matmul_mem_coalescing_default);
|
||||
benchmark!(
|
||||
Tiling2DMatmulContinuous,
|
||||
continuous::matmul_tiling_2d_default
|
||||
Tiling2DMatmulContiguous,
|
||||
contiguous::matmul_tiling_2d_default
|
||||
);
|
||||
benchmark!(Tiling2DMatmulTile, tile::matmul_tiling_2d_default);
|
||||
benchmark!(
|
||||
|
@ -84,8 +84,8 @@ benchmark!(
|
|||
tile_vectorized::matmul_tiling_2d_default
|
||||
);
|
||||
benchmark!(
|
||||
Tiling2DMatmulContinuousVectorized,
|
||||
continuous_vectorized::matmul_tiling_2d_default
|
||||
Tiling2DMatmulContiguousVectorized,
|
||||
contiguous_vectorized::matmul_tiling_2d_default
|
||||
);
|
||||
|
||||
fn main() {
|
||||
|
@ -98,13 +98,13 @@ fn main() {
|
|||
num_repeats,
|
||||
matmul: PhantomData
|
||||
});
|
||||
run_benchmark!(MatmulBenchmark::<Tiling2DMatmulContinuous, 3> {
|
||||
run_benchmark!(MatmulBenchmark::<Tiling2DMatmulContiguous, 3> {
|
||||
shape_lhs: [batch_size, matrix_size, matrix_size].into(),
|
||||
shape_rhs: [batch_size, matrix_size, matrix_size].into(),
|
||||
num_repeats,
|
||||
matmul: PhantomData
|
||||
});
|
||||
run_benchmark!(MatmulBenchmark::<Tiling2DMatmulContinuousVectorized, 3> {
|
||||
run_benchmark!(MatmulBenchmark::<Tiling2DMatmulContiguousVectorized, 3> {
|
||||
shape_lhs: [batch_size, matrix_size, matrix_size].into(),
|
||||
shape_rhs: [batch_size, matrix_size, matrix_size].into(),
|
||||
num_repeats,
|
||||
|
|
|
@ -35,12 +35,12 @@ macro_rules! kernel_wgsl {
|
|||
};
|
||||
}
|
||||
|
||||
kernel_wgsl!(ContinuousRaw, "../template/continuous.wgsl");
|
||||
kernel_wgsl!(ContiguousRaw, "../template/contiguous.wgsl");
|
||||
|
||||
pub(crate) fn into_continuous<E: WgpuElement, const D: usize>(
|
||||
pub(crate) fn into_contiguous<E: WgpuElement, const D: usize>(
|
||||
tensor: WgpuTensor<E, D>,
|
||||
) -> WgpuTensor<E, D> {
|
||||
if tensor.is_continuous() {
|
||||
if tensor.is_contiguous() {
|
||||
return tensor;
|
||||
}
|
||||
|
||||
|
@ -58,7 +58,7 @@ pub(crate) fn into_continuous<E: WgpuElement, const D: usize>(
|
|||
|
||||
let kernel = tensor
|
||||
.context
|
||||
.compile_static::<KernelSettings<ContinuousRaw, E, i32, WORKGROUP, WORKGROUP, 1>>();
|
||||
.compile_static::<KernelSettings<ContiguousRaw, E, i32, WORKGROUP, WORKGROUP, 1>>();
|
||||
|
||||
tensor.context.execute(
|
||||
elemwise_workgroup(num_elems, WORKGROUP),
|
||||
|
|
|
@ -16,7 +16,7 @@ pub(crate) fn gather<E: WgpuElement, I: WgpuElement, const D: usize>(
|
|||
|
||||
let shape_output = indices.shape.clone();
|
||||
let num_elems = shape_output.num_elements();
|
||||
let indices = kernel::into_continuous(indices);
|
||||
let indices = kernel::into_contiguous(indices);
|
||||
|
||||
let buffer = tensor
|
||||
.context
|
||||
|
|
|
@ -15,9 +15,9 @@ pub(crate) fn scatter<E: WgpuElement, I: WgpuElement, const D: usize>(
|
|||
) -> WgpuTensor<E, D> {
|
||||
const WORKGROUP: usize = 32;
|
||||
|
||||
let indices = kernel::into_continuous(indices);
|
||||
let tensor = kernel::into_continuous(tensor);
|
||||
let value = kernel::into_continuous(value);
|
||||
let indices = kernel::into_contiguous(indices);
|
||||
let tensor = kernel::into_contiguous(tensor);
|
||||
let value = kernel::into_contiguous(value);
|
||||
|
||||
let tensor = match tensor.can_mut() {
|
||||
true => tensor,
|
||||
|
|
|
@ -7,6 +7,6 @@ use crate::{
|
|||
};
|
||||
|
||||
matmul_tile_2d!(
|
||||
MatmulTiling2DContinuous,
|
||||
"../../../template/matmul/blocktiling_2d/continuous.wgsl"
|
||||
MatmulTiling2DContiguous,
|
||||
"../../../template/matmul/blocktiling_2d/contiguous.wgsl"
|
||||
);
|
|
@ -7,6 +7,6 @@ use crate::{
|
|||
};
|
||||
|
||||
matmul_tile_2d!(
|
||||
MatmulTiling2DContinuousVectorized,
|
||||
"../../../template/matmul/blocktiling_2d/continuous_vectorized.wgsl"
|
||||
MatmulTiling2DContiguousVectorized,
|
||||
"../../../template/matmul/blocktiling_2d/contiguous_vectorized.wgsl"
|
||||
);
|
|
@ -1,13 +1,13 @@
|
|||
mod base;
|
||||
mod padding;
|
||||
|
||||
/// Loading is done in a continuous manner
|
||||
pub mod continuous;
|
||||
/// Loading is done in a continuous manner. lhs is transposed
|
||||
pub mod continuous_vectorized;
|
||||
/// Loading to shared memory is done in a contiguous manner
|
||||
pub mod contiguous;
|
||||
/// Loading is done in a contiguous manner, with left hand tensor being transposed.
|
||||
pub mod contiguous_vectorized;
|
||||
/// Loading is done in a tile manner
|
||||
pub mod tile;
|
||||
/// Loading is done in a tile manner. lhs is transposed
|
||||
/// Loading is done in a tile manner, with left hand tensor being transposed.
|
||||
pub mod tile_vectorized;
|
||||
|
||||
pub use tile_vectorized::*;
|
||||
|
|
|
@ -46,7 +46,7 @@ pub(crate) fn avg_pool2d_backward<E: WgpuElement>(
|
|||
) -> WgpuTensor<E, 4> {
|
||||
const WORKGROUP: usize = 32;
|
||||
|
||||
let grad = kernel::into_continuous(grad);
|
||||
let grad = kernel::into_contiguous(grad);
|
||||
|
||||
let num_elems = x.shape.num_elements();
|
||||
let buffer = x
|
||||
|
|
|
@ -78,8 +78,8 @@ pub(crate) fn max_pool2d_with_indices_backward<E: WgpuElement, I: WgpuElement>(
|
|||
) -> WgpuTensor<E, 4> {
|
||||
const WORKGROUP: usize = 32;
|
||||
|
||||
let grad = kernel::into_continuous(grad);
|
||||
let indices = kernel::into_continuous(indices);
|
||||
let grad = kernel::into_contiguous(grad);
|
||||
let indices = kernel::into_contiguous(indices);
|
||||
|
||||
let num_elems = x.shape.num_elements();
|
||||
let buffer = x
|
||||
|
|
|
@ -23,7 +23,7 @@ pub fn from_data<G: GraphicsApi, E: WgpuElement, const D: usize>(
|
|||
}
|
||||
|
||||
pub fn into_data<E: WgpuElement, const D: usize>(tensor: WgpuTensor<E, D>) -> Data<E, D> {
|
||||
let tensor = kernel::into_continuous(tensor);
|
||||
let tensor = kernel::into_contiguous(tensor);
|
||||
let bytes = tensor.context.read_buffer(tensor.buffer);
|
||||
let values = E::from_bytes(&bytes);
|
||||
|
||||
|
@ -68,7 +68,7 @@ pub fn reshape<E: WgpuElement, const D1: usize, const D2: usize>(
|
|||
shape: Shape<D2>,
|
||||
) -> WgpuTensor<E, D2> {
|
||||
// TODO: Not force standard layout all the time (improve performance).
|
||||
let tensor = kernel::into_continuous(tensor);
|
||||
let tensor = kernel::into_contiguous(tensor);
|
||||
|
||||
WgpuTensor::new(tensor.context, shape, tensor.buffer)
|
||||
}
|
||||
|
|
|
@ -136,8 +136,8 @@ where
|
|||
lhs: FloatTensor<Self, D>,
|
||||
rhs: FloatTensor<Self, D>,
|
||||
) -> FloatTensor<Self, D> {
|
||||
let lhs = kernel::into_continuous(lhs);
|
||||
let rhs = kernel::into_continuous(rhs);
|
||||
let lhs = kernel::into_contiguous(lhs);
|
||||
let rhs = kernel::into_contiguous(rhs);
|
||||
|
||||
kernel::matmul::tile_vectorized::matmul_tiling_2d_default(lhs, rhs)
|
||||
}
|
||||
|
|
|
@ -93,7 +93,7 @@ impl<E: WgpuElement, const D: usize> WgpuTensor<E, D> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn is_continuous(&self) -> bool {
|
||||
pub fn is_contiguous(&self) -> bool {
|
||||
let mut current_stride = 0;
|
||||
for d in 0..D {
|
||||
let stride = self.strides[D - 1 - d];
|
||||
|
|
Loading…
Reference in New Issue