continuous to contiguous (#511)

This commit is contained in:
Louis Fortier-Dubois 2023-07-20 11:28:35 -04:00 committed by GitHub
parent 62ab554df8
commit 4b60c0e7a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 35 additions and 35 deletions

View File

@ -58,7 +58,7 @@ mod tests {
}
#[test]
fn test_should_sum_mid_dim_3d_non_continuous_1() {
fn test_should_sum_mid_dim_3d_non_contiguous_1() {
let tensor = TestTensor::from_data([
[[2.0, 4.0, 1.0], [7.0, -5.0, 3.0]],
[[3.0, 1.0, 2.0], [4.0, 2.0, 3.0]],
@ -73,7 +73,7 @@ mod tests {
}
#[test]
fn test_should_sum_mid_dim_3d_non_continuous_2() {
fn test_should_sum_mid_dim_3d_non_contiguous_2() {
let tensor = TestTensor::from_data([
[[2.0, 4.0, 1.0], [7.0, -5.0, 3.0]],
[[3.0, 1.0, 2.0], [4.0, 2.0, 3.0]],

View File

@ -61,7 +61,7 @@ mod tests {
}
#[test]
fn should_support_partial_sliceing_3d_non_continuous() {
fn should_support_partial_sliceing_3d_non_contiguous() {
let tensor = TestTensor::from_floats([
[[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]],
[[6.0, 7.0, 8.0], [9.0, 10.0, 11.0]],

View File

@ -2,7 +2,7 @@ use burn_tensor::{backend::Backend, Distribution, Shape, Tensor};
use burn_wgpu::{
benchmark::Benchmark,
kernel::matmul::{
continuous, continuous_vectorized, matmul_mem_coalescing_default, matmul_naive_default,
contiguous, contiguous_vectorized, matmul_mem_coalescing_default, matmul_naive_default,
tile, tile_vectorized,
},
run_benchmark, GraphicsApi, WgpuBackend, WgpuDevice,
@ -75,8 +75,8 @@ macro_rules! benchmark {
benchmark!(NaiveMatmul, matmul_naive_default);
benchmark!(MemCoalescingMatmul, matmul_mem_coalescing_default);
benchmark!(
Tiling2DMatmulContinuous,
continuous::matmul_tiling_2d_default
Tiling2DMatmulContiguous,
contiguous::matmul_tiling_2d_default
);
benchmark!(Tiling2DMatmulTile, tile::matmul_tiling_2d_default);
benchmark!(
@ -84,8 +84,8 @@ benchmark!(
tile_vectorized::matmul_tiling_2d_default
);
benchmark!(
Tiling2DMatmulContinuousVectorized,
continuous_vectorized::matmul_tiling_2d_default
Tiling2DMatmulContiguousVectorized,
contiguous_vectorized::matmul_tiling_2d_default
);
fn main() {
@ -98,13 +98,13 @@ fn main() {
num_repeats,
matmul: PhantomData
});
run_benchmark!(MatmulBenchmark::<Tiling2DMatmulContinuous, 3> {
run_benchmark!(MatmulBenchmark::<Tiling2DMatmulContiguous, 3> {
shape_lhs: [batch_size, matrix_size, matrix_size].into(),
shape_rhs: [batch_size, matrix_size, matrix_size].into(),
num_repeats,
matmul: PhantomData
});
run_benchmark!(MatmulBenchmark::<Tiling2DMatmulContinuousVectorized, 3> {
run_benchmark!(MatmulBenchmark::<Tiling2DMatmulContiguousVectorized, 3> {
shape_lhs: [batch_size, matrix_size, matrix_size].into(),
shape_rhs: [batch_size, matrix_size, matrix_size].into(),
num_repeats,

View File

@ -35,12 +35,12 @@ macro_rules! kernel_wgsl {
};
}
kernel_wgsl!(ContinuousRaw, "../template/continuous.wgsl");
kernel_wgsl!(ContiguousRaw, "../template/contiguous.wgsl");
pub(crate) fn into_continuous<E: WgpuElement, const D: usize>(
pub(crate) fn into_contiguous<E: WgpuElement, const D: usize>(
tensor: WgpuTensor<E, D>,
) -> WgpuTensor<E, D> {
if tensor.is_continuous() {
if tensor.is_contiguous() {
return tensor;
}
@ -58,7 +58,7 @@ pub(crate) fn into_continuous<E: WgpuElement, const D: usize>(
let kernel = tensor
.context
.compile_static::<KernelSettings<ContinuousRaw, E, i32, WORKGROUP, WORKGROUP, 1>>();
.compile_static::<KernelSettings<ContiguousRaw, E, i32, WORKGROUP, WORKGROUP, 1>>();
tensor.context.execute(
elemwise_workgroup(num_elems, WORKGROUP),

View File

@ -16,7 +16,7 @@ pub(crate) fn gather<E: WgpuElement, I: WgpuElement, const D: usize>(
let shape_output = indices.shape.clone();
let num_elems = shape_output.num_elements();
let indices = kernel::into_continuous(indices);
let indices = kernel::into_contiguous(indices);
let buffer = tensor
.context

View File

@ -15,9 +15,9 @@ pub(crate) fn scatter<E: WgpuElement, I: WgpuElement, const D: usize>(
) -> WgpuTensor<E, D> {
const WORKGROUP: usize = 32;
let indices = kernel::into_continuous(indices);
let tensor = kernel::into_continuous(tensor);
let value = kernel::into_continuous(value);
let indices = kernel::into_contiguous(indices);
let tensor = kernel::into_contiguous(tensor);
let value = kernel::into_contiguous(value);
let tensor = match tensor.can_mut() {
true => tensor,

View File

@ -7,6 +7,6 @@ use crate::{
};
matmul_tile_2d!(
MatmulTiling2DContinuous,
"../../../template/matmul/blocktiling_2d/continuous.wgsl"
MatmulTiling2DContiguous,
"../../../template/matmul/blocktiling_2d/contiguous.wgsl"
);

View File

@ -7,6 +7,6 @@ use crate::{
};
matmul_tile_2d!(
MatmulTiling2DContinuousVectorized,
"../../../template/matmul/blocktiling_2d/continuous_vectorized.wgsl"
MatmulTiling2DContiguousVectorized,
"../../../template/matmul/blocktiling_2d/contiguous_vectorized.wgsl"
);

View File

@ -1,13 +1,13 @@
mod base;
mod padding;
/// Loading is done in a continuous manner
pub mod continuous;
/// Loading is done in a continuous manner. lhs is transposed
pub mod continuous_vectorized;
/// Loading to shared memory is done in a contiguous manner
pub mod contiguous;
/// Loading is done in a contiguous manner, with left hand tensor being transposed.
pub mod contiguous_vectorized;
/// Loading is done in a tile manner
pub mod tile;
/// Loading is done in a tile manner. lhs is transposed
/// Loading is done in a tile manner, with left hand tensor being transposed.
pub mod tile_vectorized;
pub use tile_vectorized::*;

View File

@ -46,7 +46,7 @@ pub(crate) fn avg_pool2d_backward<E: WgpuElement>(
) -> WgpuTensor<E, 4> {
const WORKGROUP: usize = 32;
let grad = kernel::into_continuous(grad);
let grad = kernel::into_contiguous(grad);
let num_elems = x.shape.num_elements();
let buffer = x

View File

@ -78,8 +78,8 @@ pub(crate) fn max_pool2d_with_indices_backward<E: WgpuElement, I: WgpuElement>(
) -> WgpuTensor<E, 4> {
const WORKGROUP: usize = 32;
let grad = kernel::into_continuous(grad);
let indices = kernel::into_continuous(indices);
let grad = kernel::into_contiguous(grad);
let indices = kernel::into_contiguous(indices);
let num_elems = x.shape.num_elements();
let buffer = x

View File

@ -23,7 +23,7 @@ pub fn from_data<G: GraphicsApi, E: WgpuElement, const D: usize>(
}
pub fn into_data<E: WgpuElement, const D: usize>(tensor: WgpuTensor<E, D>) -> Data<E, D> {
let tensor = kernel::into_continuous(tensor);
let tensor = kernel::into_contiguous(tensor);
let bytes = tensor.context.read_buffer(tensor.buffer);
let values = E::from_bytes(&bytes);
@ -68,7 +68,7 @@ pub fn reshape<E: WgpuElement, const D1: usize, const D2: usize>(
shape: Shape<D2>,
) -> WgpuTensor<E, D2> {
// TODO: Not force standard layout all the time (improve performance).
let tensor = kernel::into_continuous(tensor);
let tensor = kernel::into_contiguous(tensor);
WgpuTensor::new(tensor.context, shape, tensor.buffer)
}

View File

@ -136,8 +136,8 @@ where
lhs: FloatTensor<Self, D>,
rhs: FloatTensor<Self, D>,
) -> FloatTensor<Self, D> {
let lhs = kernel::into_continuous(lhs);
let rhs = kernel::into_continuous(rhs);
let lhs = kernel::into_contiguous(lhs);
let rhs = kernel::into_contiguous(rhs);
kernel::matmul::tile_vectorized::matmul_tiling_2d_default(lhs, rhs)
}

View File

@ -93,7 +93,7 @@ impl<E: WgpuElement, const D: usize> WgpuTensor<E, D> {
}
}
pub fn is_continuous(&self) -> bool {
pub fn is_contiguous(&self) -> bool {
let mut current_stride = 0;
for d in 0..D {
let stride = self.strides[D - 1 - d];