diff --git a/hercules_samples/call/build.rs b/hercules_samples/call/build.rs index af48fe64f7c2b778c9841e45ecf13b8a6a5740d2..7f5816ce0992e02e0c9d4da83617d8b8414eb3c2 100644 --- a/hercules_samples/call/build.rs +++ b/hercules_samples/call/build.rs @@ -4,6 +4,8 @@ fn main() { JunoCompiler::new() .ir_in_src("call.hir") .unwrap() + .schedule_in_src(if cfg!(feature = "cuda") { "gpu.sch" } else { "cpu.sch" }) + .unwrap() .build() .unwrap(); } diff --git a/hercules_samples/call/src/cpu.sch b/hercules_samples/call/src/cpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..4c684da2f176dd3675dc79fa506d2d60fe6e0577 --- /dev/null +++ b/hercules_samples/call/src/cpu.sch @@ -0,0 +1,19 @@ +gvn(*); +phi-elim(*); +dce(*); + +auto-outline(*); + +ip-sroa(*); +sroa(*); +fork-split(*); +unforkify(*); +dce(*); +float-collections(*); +gvn(*); +phi-elim(*); +dce(*); + +infer-schedules(*); + +gcm(*); diff --git a/hercules_samples/call/src/gpu.sch b/hercules_samples/call/src/gpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..1e654e22664a5b9c79a20888d1f013f9256d7d82 --- /dev/null +++ b/hercules_samples/call/src/gpu.sch @@ -0,0 +1,18 @@ +gvn(*); +phi-elim(*); +dce(*); + +let out = auto-outline(*); +gpu(out.add); + +ip-sroa(*); +sroa(*); +dce(*); +float-collections(*); +gvn(*); +phi-elim(*); +dce(*); + +infer-schedules(*); + +gcm(*); diff --git a/hercules_samples/ccp/build.rs b/hercules_samples/ccp/build.rs index f04d48c7d0ea6df8b16d70b05cedabfc04c1f6f2..c98d0551654113ee26523179feae3322383f0d01 100644 --- a/hercules_samples/ccp/build.rs +++ b/hercules_samples/ccp/build.rs @@ -4,6 +4,8 @@ fn main() { JunoCompiler::new() .ir_in_src("ccp.hir") .unwrap() + .schedule_in_src(if cfg!(feature = "cuda") { "gpu.sch" } else { "cpu.sch" }) + .unwrap() .build() .unwrap(); } diff --git a/hercules_samples/ccp/src/cpu.sch b/hercules_samples/ccp/src/cpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..4c684da2f176dd3675dc79fa506d2d60fe6e0577 --- /dev/null +++ b/hercules_samples/ccp/src/cpu.sch @@ -0,0 +1,19 @@ +gvn(*); +phi-elim(*); +dce(*); + +auto-outline(*); + +ip-sroa(*); +sroa(*); +fork-split(*); +unforkify(*); +dce(*); +float-collections(*); +gvn(*); +phi-elim(*); +dce(*); + +infer-schedules(*); + +gcm(*); diff --git a/hercules_samples/ccp/src/gpu.sch b/hercules_samples/ccp/src/gpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..d8f6a2d00171255a1596d73dc9bf4ecff7ff308a --- /dev/null +++ b/hercules_samples/ccp/src/gpu.sch @@ -0,0 +1,18 @@ +gvn(*); +phi-elim(*); +dce(*); + +let out = auto-outline(*); +gpu(out.tricky); + +ip-sroa(*); +sroa(*); +dce(*); +float-collections(*); +gvn(*); +phi-elim(*); +dce(*); + +infer-schedules(*); + +gcm(*); diff --git a/hercules_samples/dot/src/gpu.sch b/hercules_samples/dot/src/gpu.sch index a1a5108882971f4cc0baabb4646a19002beba0ad..4adbf530a7ce8277cd8c691349f36de65ba4e251 100644 --- a/hercules_samples/dot/src/gpu.sch +++ b/hercules_samples/dot/src/gpu.sch @@ -2,9 +2,8 @@ gvn(*); phi-elim(*); dce(*); -auto-outline(*); -gpu(*); -host(dot); +let out = auto-outline(*); +gpu(out.dot); ip-sroa(*); sroa(*); diff --git a/hercules_samples/fac/build.rs b/hercules_samples/fac/build.rs index 4d8226f11183d9500e6affec4c46110e8626ee69..1986a74679dc43c19ec4638f7f1d96ef8832d769 100644 --- a/hercules_samples/fac/build.rs +++ b/hercules_samples/fac/build.rs @@ -4,6 +4,8 @@ fn main() { JunoCompiler::new() .ir_in_src("fac.hir") .unwrap() + .schedule_in_src(if cfg!(feature = "cuda") { "gpu.sch" } else { "cpu.sch" }) + .unwrap() .build() .unwrap(); } diff --git a/hercules_samples/fac/src/cpu.sch b/hercules_samples/fac/src/cpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..4c684da2f176dd3675dc79fa506d2d60fe6e0577 --- /dev/null +++ b/hercules_samples/fac/src/cpu.sch @@ -0,0 +1,19 @@ +gvn(*); +phi-elim(*); +dce(*); + +auto-outline(*); + +ip-sroa(*); +sroa(*); +fork-split(*); +unforkify(*); +dce(*); +float-collections(*); +gvn(*); +phi-elim(*); +dce(*); + +infer-schedules(*); + +gcm(*); diff --git a/hercules_samples/fac/src/gpu.sch b/hercules_samples/fac/src/gpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..1885854ca2d2b05e000d64165244080060b2a4f9 --- /dev/null +++ b/hercules_samples/fac/src/gpu.sch @@ -0,0 +1,18 @@ +gvn(*); +phi-elim(*); +dce(*); + +let out = auto-outline(*); +gpu(out.fac); + +ip-sroa(*); +sroa(*); +dce(*); +float-collections(*); +gvn(*); +phi-elim(*); +dce(*); + +infer-schedules(*); + +gcm(*); diff --git a/hercules_samples/matmul/src/gpu.sch b/hercules_samples/matmul/src/gpu.sch index c9d6b3361f7f48bc47f0d6ee2da3fc9018cebf48..9a714789ac768405a2782cac6e68338c2f0697bb 100644 --- a/hercules_samples/matmul/src/gpu.sch +++ b/hercules_samples/matmul/src/gpu.sch @@ -2,9 +2,8 @@ gvn(*); phi-elim(*); dce(*); -auto-outline(*); -gpu(*); -host(matmul); +let out = auto-outline(*); +gpu(out.matmul); ip-sroa(*); sroa(*); diff --git a/hercules_samples/matmul/src/main.rs b/hercules_samples/matmul/src/main.rs index 7b6cfe79048df5f6ff22a83b4e5859b2338f2b9d..abd25ec9cddbd4be508b3f484cffc1df1365dc4d 100644 --- a/hercules_samples/matmul/src/main.rs +++ b/hercules_samples/matmul/src/main.rs @@ -33,15 +33,13 @@ fn main() { } #[cfg(feature = "cuda")] { - let a_box = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut a)); - let a = a_box.get_ref(); - let b_box = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut b)); - let b = b_box.get_ref(); + let a = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut a)); + let b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut b)); let mut r = runner!(matmul); - let c = r.run(I as u64, J as u64, K as u64, a, b).await; + let c = r.run(I as u64, J as u64, K as u64, a.get_ref(), b.get_ref()).await; let mut c_cpu: Box<[i32]> = vec![0; correct_c.len()].into_boxed_slice(); c.to_cpu_ref(&mut c_cpu); - assert_eq!(c_cpu.as_ref(), &*correct_c); + assert_eq!(&*c_cpu, &*correct_c); } }); } diff --git a/juno_samples/antideps/build.rs b/juno_samples/antideps/build.rs index 7ed716a444460d7a90965f5b7f5faf3a7aadcb14..92b30c43b385812c02f2685ab3cdc76950dd6960 100644 --- a/juno_samples/antideps/build.rs +++ b/juno_samples/antideps/build.rs @@ -1,6 +1,15 @@ use juno_build::JunoCompiler; fn main() { + #[cfg(feature = "cuda")] + JunoCompiler::new() + .file_in_src("antideps.jn") + .unwrap() + .schedule_in_src("gpu.sch") + .unwrap() + .build() + .unwrap(); + #[cfg(not(feature = "cuda"))] JunoCompiler::new() .file_in_src("antideps.jn") .unwrap() diff --git a/juno_samples/antideps/src/gpu.sch b/juno_samples/antideps/src/gpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..d3f4a6c28ccefa44b2dd474e7f54546f1a21eab6 --- /dev/null +++ b/juno_samples/antideps/src/gpu.sch @@ -0,0 +1,18 @@ +gvn(*); +phi-elim(*); +dce(*); + +let out = auto-outline(*); +gpu(out.simple_antideps, out.loop_antideps, out.complex_antideps1, out.complex_antideps2, out.very_complex_antideps, out.read_chains, out.array_of_structs); + +ip-sroa(*); +sroa(*); +dce(*); +float-collections(*); +gvn(*); +phi-elim(*); +dce(*); + +infer-schedules(*); + +gcm(*); diff --git a/juno_samples/casts_and_intrinsics/build.rs b/juno_samples/casts_and_intrinsics/build.rs index 16d5c7a4f7fcb00344fc7669b67103a27f71a7c6..e43a2ac82f2dc60dc1198a9884cce617aa3d70f5 100644 --- a/juno_samples/casts_and_intrinsics/build.rs +++ b/juno_samples/casts_and_intrinsics/build.rs @@ -1,6 +1,15 @@ use juno_build::JunoCompiler; fn main() { + #[cfg(feature = "cuda")] + JunoCompiler::new() + .file_in_src("casts_and_intrinsics.jn") + .unwrap() + .schedule_in_src("gpu.sch") + .unwrap() + .build() + .unwrap(); + #[cfg(not(feature = "cuda"))] JunoCompiler::new() .file_in_src("casts_and_intrinsics.jn") .unwrap() diff --git a/juno_samples/casts_and_intrinsics/src/gpu.sch b/juno_samples/casts_and_intrinsics/src/gpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..b2fb3449954ca567ef3b24adebcf5610763a687e --- /dev/null +++ b/juno_samples/casts_and_intrinsics/src/gpu.sch @@ -0,0 +1,18 @@ +gvn(*); +phi-elim(*); +dce(*); + +let out = auto-outline(*); +gpu(out.casts_and_intrinsics); + +ip-sroa(*); +sroa(*); +dce(*); +float-collections(*); +gvn(*); +phi-elim(*); +dce(*); + +infer-schedules(*); + +gcm(*); diff --git a/juno_samples/cava/build.rs b/juno_samples/cava/build.rs index 929d3eba3e1c83f185c2c0ff256450b05247c80d..03d54160d9a10d9cf43f9b3f057d45696e6e6a22 100644 --- a/juno_samples/cava/build.rs +++ b/juno_samples/cava/build.rs @@ -2,6 +2,15 @@ extern crate juno_build; use juno_build::JunoCompiler; fn main() { + #[cfg(feature = "cuda")] + JunoCompiler::new() + .file_in_src("cava.jn") + .unwrap() + .schedule_in_src("gpu.sch") + .unwrap() + .build() + .unwrap(); + #[cfg(not(feature = "cuda"))] JunoCompiler::new() .file_in_src("cava.jn") .unwrap() diff --git a/juno_samples/cava/src/gpu.sch b/juno_samples/cava/src/gpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..07f71c99302c86a9ec3dc5c813b854fe3af983b0 --- /dev/null +++ b/juno_samples/cava/src/gpu.sch @@ -0,0 +1,18 @@ +gvn(*); +phi-elim(*); +dce(*); + +gpu(scale, demosaic, medianMatrix, transform, gamut, tone_map, descale); + +ip-sroa(*); +sroa(*); + +dce(*); +float-collections(*); +gvn(*); +phi-elim(*); +dce(*); + +infer-schedules(*); + +gcm(*); diff --git a/juno_samples/concat/build.rs b/juno_samples/concat/build.rs index f7784b999492955289fc83ad1297907c2d0ce996..c91df94e2fbd0c05226cedb3f437e22c4689a961 100644 --- a/juno_samples/concat/build.rs +++ b/juno_samples/concat/build.rs @@ -4,6 +4,8 @@ fn main() { JunoCompiler::new() .file_in_src("concat.jn") .unwrap() + .schedule_in_src(if cfg!(feature = "cuda") { "gpu.sch" } else { "cpu.sch" }) + .unwrap() .build() .unwrap(); } diff --git a/juno_samples/concat/src/concat.jn b/juno_samples/concat/src/concat.jn index 2471671e69af7e9c73e2347abfe56e2db722d1d0..b9806c9348e73a3467f29f2f39c981b8f347037c 100644 --- a/juno_samples/concat/src/concat.jn +++ b/juno_samples/concat/src/concat.jn @@ -18,15 +18,7 @@ fn sum<t : number, c : usize>(arr : t[c]) -> t { } #[entry] -fn concat_entry(a : i32) -> i32 { - let arr1 : i32[3]; - let arr2 : i32[6]; - arr1[0] = a; - arr1[1] = a; - arr2[0] = a; - arr2[1] = a; - arr2[4] = a; - arr2[5] = a; +fn concat_entry(arr1 : i32[3], arr2 : i32[6]) -> i32 { let arr3 = concat::<i32, 3, 6>(arr1, arr2); return sum::<i32, 9>(arr3); } diff --git a/juno_samples/concat/src/cpu.sch b/juno_samples/concat/src/cpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..680adaeb2070b26b68fba361aafb64e5af2afc03 --- /dev/null +++ b/juno_samples/concat/src/cpu.sch @@ -0,0 +1,17 @@ +gvn(*); +phi-elim(*); +dce(*); + +cpu(concat, sum); + +ip-sroa(*); +sroa(*); +dce(*); +float-collections(*); +gvn(*); +phi-elim(*); +dce(*); + +infer-schedules(*); + +gcm(*); diff --git a/juno_samples/concat/src/gpu.sch b/juno_samples/concat/src/gpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..8ee4ef0e5627aa6a51cb5e86dccd014f18dcfd96 --- /dev/null +++ b/juno_samples/concat/src/gpu.sch @@ -0,0 +1,17 @@ +gvn(*); +phi-elim(*); +dce(*); + +gpu(concat, sum); + +ip-sroa(*); +sroa(*); +dce(*); +float-collections(*); +gvn(*); +phi-elim(*); +dce(*); + +infer-schedules(*); + +gcm(*); diff --git a/juno_samples/concat/src/main.rs b/juno_samples/concat/src/main.rs index db3f37fdaa6047d146ebd899cc7178e2b135d7ee..d0929fbf0a3d5d5073b51f79ee8ab575208eb7cc 100644 --- a/juno_samples/concat/src/main.rs +++ b/juno_samples/concat/src/main.rs @@ -1,15 +1,35 @@ #![feature(concat_idents)] use hercules_rt::runner; +use hercules_rt::HerculesCPURef; +#[cfg(feature = "cuda")] +use hercules_rt::CUDABox; juno_build::juno!("concat"); fn main() { async_std::task::block_on(async { let mut r = runner!(concat_entry); - let output = r.run(7).await; - println!("{}", output); - assert_eq!(output, 42); + #[cfg(not(feature = "cuda"))] + { + let mut a_data = [7, 7, 0]; + let a = HerculesCPURef::from_slice(&mut a_data); + let mut b_data = [7, 7, 0, 0, 7, 7]; + let b = HerculesCPURef::from_slice(&mut b_data); + let output = r.run(a, b).await; + println!("{}", output); + assert_eq!(output, 42); + } + #[cfg(feature = "cuda")] + { + let mut a_data = [7, 7, 0]; + let a = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut a_data)); + let mut b_data = [7, 7, 0, 0, 7, 7]; + let b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut b_data)); + let output = r.run(a.get_ref(), b.get_ref()).await; + println!("{}", output); + assert_eq!(output, 42); + } }); } diff --git a/juno_samples/cpu.sch b/juno_samples/cpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..4c684da2f176dd3675dc79fa506d2d60fe6e0577 --- /dev/null +++ b/juno_samples/cpu.sch @@ -0,0 +1,19 @@ +gvn(*); +phi-elim(*); +dce(*); + +auto-outline(*); + +ip-sroa(*); +sroa(*); +fork-split(*); +unforkify(*); +dce(*); +float-collections(*); +gvn(*); +phi-elim(*); +dce(*); + +infer-schedules(*); + +gcm(*); diff --git a/juno_samples/gpu.sch b/juno_samples/gpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..9a714789ac768405a2782cac6e68338c2f0697bb --- /dev/null +++ b/juno_samples/gpu.sch @@ -0,0 +1,18 @@ +gvn(*); +phi-elim(*); +dce(*); + +let out = auto-outline(*); +gpu(out.matmul); + +ip-sroa(*); +sroa(*); +dce(*); +float-collections(*); +gvn(*); +phi-elim(*); +dce(*); + +infer-schedules(*); + +gcm(*); diff --git a/juno_samples/implicit_clone/build.rs b/juno_samples/implicit_clone/build.rs index 75c1afc41a75b2006b26042323df3bdc3fcf5a17..dc134e59757a301b2be13ab61ed4d2feb252d1fe 100644 --- a/juno_samples/implicit_clone/build.rs +++ b/juno_samples/implicit_clone/build.rs @@ -1,6 +1,15 @@ use juno_build::JunoCompiler; fn main() { + #[cfg(feature = "cuda")] + JunoCompiler::new() + .file_in_src("implicit_clone.jn") + .unwrap() + .schedule_in_src("gpu.sch") + .unwrap() + .build() + .unwrap(); + #[cfg(not(feature = "cuda"))] JunoCompiler::new() .file_in_src("implicit_clone.jn") .unwrap() diff --git a/juno_samples/implicit_clone/src/gpu.sch b/juno_samples/implicit_clone/src/gpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..443fc778fe6b6f4489d6c5a339b691f488f917c2 --- /dev/null +++ b/juno_samples/implicit_clone/src/gpu.sch @@ -0,0 +1,18 @@ +gvn(*); +phi-elim(*); +dce(*); + +let out = auto-outline(*); +gpu(out.simple_implicit_clone, out.loop_implicit_clone, out.double_loop_implicit_clone, out.tricky_loop_implicit_clone, out.tricky2_loop_implicit_clone, out.tricky3_loop_implicit_clone, out.no_implicit_clone, out.mirage_implicit_clone); + +ip-sroa(*); +sroa(*); +dce(*); +float-collections(*); +gvn(*); +phi-elim(*); +dce(*); + +infer-schedules(*); + +gcm(*); diff --git a/juno_samples/matmul/build.rs b/juno_samples/matmul/build.rs index 926fbc33ecfa5ab31b40a92f778bb4d3b7f6a77e..ff3e3d8c79091349f9d76716f3d7496bba3d5503 100644 --- a/juno_samples/matmul/build.rs +++ b/juno_samples/matmul/build.rs @@ -1,6 +1,15 @@ use juno_build::JunoCompiler; fn main() { + #[cfg(feature = "cuda")] + JunoCompiler::new() + .file_in_src("matmul.jn") + .unwrap() + .schedule_in_src("gpu.sch") + .unwrap() + .build() + .unwrap(); + #[cfg(not(feature = "cuda"))] JunoCompiler::new() .file_in_src("matmul.jn") .unwrap() diff --git a/juno_samples/matmul/src/gpu.sch b/juno_samples/matmul/src/gpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..e85dafdfdbfac2021b0215f625fb8077972be6b4 --- /dev/null +++ b/juno_samples/matmul/src/gpu.sch @@ -0,0 +1,18 @@ +gvn(*); +phi-elim(*); +dce(*); + +let out = auto-outline(*); +gpu(out.matmul, out.tiled_64_matmul); + +ip-sroa(*); +sroa(*); +dce(*); +float-collections(*); +gvn(*); +phi-elim(*); +dce(*); + +infer-schedules(*); + +gcm(*); diff --git a/juno_samples/matmul/src/main.rs b/juno_samples/matmul/src/main.rs index fa5d1f04d48cdf48cf377e8f3d08de80d30e688e..50fe1760eeeedc946d510a6d5285d76e1346f3cc 100644 --- a/juno_samples/matmul/src/main.rs +++ b/juno_samples/matmul/src/main.rs @@ -3,6 +3,8 @@ use rand::random; use hercules_rt::{runner, HerculesCPURef}; +#[cfg(feature = "cuda")] +use hercules_rt::CUDABox; juno_build::juno!("matmul"); @@ -11,8 +13,8 @@ fn main() { const I: usize = 256; const J: usize = 64; const K: usize = 128; - let a: Box<[i32]> = (0..I * J).map(|_| random::<i32>() % 100).collect(); - let b: Box<[i32]> = (0..J * K).map(|_| random::<i32>() % 100).collect(); + let mut a: Box<[i32]> = (0..I * J).map(|_| random::<i32>() % 100).collect(); + let mut b: Box<[i32]> = (0..J * K).map(|_| random::<i32>() % 100).collect(); let mut correct_c: Box<[i32]> = (0..I * K).map(|_| 0).collect(); for i in 0..I { for k in 0..K { @@ -21,14 +23,32 @@ fn main() { } } } - let a = HerculesCPURef::from_slice(&a); - let b = HerculesCPURef::from_slice(&b); - let mut r = runner!(matmul); - let c = r.run(I as u64, J as u64, K as u64, a.clone(), b.clone()).await; - assert_eq!(c.as_slice::<i32>(), &*correct_c); - let mut r = runner!(tiled_64_matmul); - let tiled_c = r.run(I as u64, J as u64, K as u64, a.clone(), b.clone()).await; - assert_eq!(tiled_c.as_slice::<i32>(), &*correct_c); + #[cfg(not(feature = "cuda"))] + { + let a = HerculesCPURef::from_slice(&a); + let b = HerculesCPURef::from_slice(&b); + let mut r = runner!(matmul); + let c = r.run(I as u64, J as u64, K as u64, a.clone(), b.clone()).await; + assert_eq!(c.as_slice::<i32>(), &*correct_c); + let mut r = runner!(tiled_64_matmul); + let tiled_c = r.run(I as u64, J as u64, K as u64, a.clone(), b.clone()).await; + assert_eq!(tiled_c.as_slice::<i32>(), &*correct_c); + } + #[cfg(feature = "cuda")] + { + let a = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut a)); + let b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut b)); + let mut r = runner!(matmul); + let c = r.run(I as u64, J as u64, K as u64, a.get_ref(), b.get_ref()).await; + let mut c_cpu: Box<[i32]> = vec![0; correct_c.len()].into_boxed_slice(); + c.to_cpu_ref(&mut c_cpu); + assert_eq!(&*c_cpu, &*correct_c); + let mut r = runner!(tiled_64_matmul); + let tiled_c = r.run(I as u64, J as u64, K as u64, a.get_ref(), b.get_ref()).await; + let mut tiled_c_cpu: Box<[i32]> = vec![0; correct_c.len()].into_boxed_slice(); + tiled_c.to_cpu_ref(&mut tiled_c_cpu); + assert_eq!(&*tiled_c_cpu, &*correct_c); + } }); } @@ -36,4 +56,3 @@ fn main() { fn matmul_test() { main(); } - diff --git a/juno_samples/nested_ccp/build.rs b/juno_samples/nested_ccp/build.rs index c5c7ca6a1b9ab5decf6a8cf0b8e8f13ff7122834..2352ddef0a432d42a09fef72042c516843799290 100644 --- a/juno_samples/nested_ccp/build.rs +++ b/juno_samples/nested_ccp/build.rs @@ -1,6 +1,15 @@ use juno_build::JunoCompiler; fn main() { + #[cfg(feature = "cuda")] + JunoCompiler::new() + .file_in_src("nested_ccp.jn") + .unwrap() + .schedule_in_src("gpu.sch") + .unwrap() + .build() + .unwrap(); + #[cfg(not(feature = "cuda"))] JunoCompiler::new() .file_in_src("nested_ccp.jn") .unwrap() diff --git a/juno_samples/nested_ccp/src/gpu.sch b/juno_samples/nested_ccp/src/gpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..021a05e3f361e4f5d1ae679c33f24282d431b2cc --- /dev/null +++ b/juno_samples/nested_ccp/src/gpu.sch @@ -0,0 +1,18 @@ +gvn(*); +phi-elim(*); +dce(*); + +let out = auto-outline(*); +gpu(out.ccp_example, out.median_array, out.no_underflow); + +ip-sroa(*); +sroa(*); +dce(*); +float-collections(*); +gvn(*); +phi-elim(*); +dce(*); + +infer-schedules(*); + +gcm(*); diff --git a/juno_samples/nested_ccp/src/main.rs b/juno_samples/nested_ccp/src/main.rs index 423b66fb8c84fd1af6b0267c7c63aa204ab1dc6c..412d56a4f4e66af5a6608822219bce16ac5554dc 100644 --- a/juno_samples/nested_ccp/src/main.rs +++ b/juno_samples/nested_ccp/src/main.rs @@ -1,6 +1,8 @@ #![feature(concat_idents)] use hercules_rt::{runner, HerculesCPURef, HerculesCPURefMut}; +#[cfg(feature = "cuda")] +use hercules_rt::CUDABox; juno_build::juno!("nested_ccp"); @@ -8,19 +10,30 @@ fn main() { async_std::task::block_on(async { let a: Box<[f32]> = Box::new([17.0, 18.0, 19.0]); let mut b: Box<[i32]> = Box::new([12, 16, 4, 18, 23, 56, 93, 22, 14]); - let a = HerculesCPURef::from_slice(&a); - let b = HerculesCPURefMut::from_slice(&mut b); - let mut r = runner!(ccp_example); - let output_example = r.run(a).await; - let mut r = runner!(median_array); - let output_median = r.run(9, b).await; + #[cfg(not(feature = "cuda"))] + { + let a = HerculesCPURef::from_slice(&a); + let b = HerculesCPURefMut::from_slice(&mut b); + let mut r = runner!(ccp_example); + let output_example = r.run(a).await; + let mut r = runner!(median_array); + let output_median = r.run(9, b).await; + assert_eq!(output_example, 1.0); + assert_eq!(output_median, 18); + } + #[cfg(feature = "cuda")] + { + let mut a = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&a)); + let mut b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&mut b)); + let mut r = runner!(ccp_example); + let output_example = r.run(a.get_ref_mut()).await; + let mut r = runner!(median_array); + let output_median = r.run(9, b.get_ref_mut()).await; + assert_eq!(output_example, 1.0); + assert_eq!(output_median, 18); + } let mut r = runner!(no_underflow); let out_no_underflow = r.run().await; - println!("{}", output_example); - println!("{}", output_median); - println!("{}", out_no_underflow); - assert_eq!(output_example, 1.0); - assert_eq!(output_median, 18); assert_eq!(out_no_underflow, 7); }); } diff --git a/juno_samples/schedule_test/src/main.rs b/juno_samples/schedule_test/src/main.rs index 2e63babf29e84bc74a7649306dc28af88225cf39..1505d4e5ff620a53d1095cdc4185a5a6d665e71e 100644 --- a/juno_samples/schedule_test/src/main.rs +++ b/juno_samples/schedule_test/src/main.rs @@ -3,6 +3,8 @@ use rand::random; use hercules_rt::{runner, HerculesCPURef}; +#[cfg(feature = "cuda")] +use hercules_rt::CUDABox; juno_build::juno!("code"); @@ -26,12 +28,26 @@ fn main() { } } - let a = HerculesCPURef::from_slice(&a); - let b = HerculesCPURef::from_slice(&b); - let c = HerculesCPURef::from_slice(&c); - let mut r = runner!(test); - let res = r.run(N as u64, M as u64, K as u64, a, b, c).await; - assert_eq!(res.as_slice::<i32>(), &*correct_res); + #[cfg(not(feature = "cuda"))] + { + let a = HerculesCPURef::from_slice(&a); + let b = HerculesCPURef::from_slice(&b); + let c = HerculesCPURef::from_slice(&c); + let mut r = runner!(test); + let res = r.run(N as u64, M as u64, K as u64, a, b, c).await; + assert_eq!(res.as_slice::<i32>(), &*correct_res); + } + #[cfg(feature = "cuda")] + { + let a = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&a)); + let b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&b)); + let c = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&c)); + let mut r = runner!(test); + let res = r.run(N as u64, M as u64, K as u64, a.get_ref(), b.get_ref(), c.get_ref()).await; + let mut res_cpu: Box<[i32]> = vec![0; correct_res.len()].into_boxed_slice(); + res.to_cpu_ref(&mut res_cpu); + assert_eq!(&*res_cpu, &*correct_res); + } }); } diff --git a/juno_samples/simple3/build.rs b/juno_samples/simple3/build.rs index 94760025d53abe7e10914052e1a7783386b316b0..a0874af7ecb42b29eea3aaaa5628d1c0ddeb2090 100644 --- a/juno_samples/simple3/build.rs +++ b/juno_samples/simple3/build.rs @@ -1,6 +1,15 @@ use juno_build::JunoCompiler; fn main() { + #[cfg(feature = "cuda")] + JunoCompiler::new() + .file_in_src("simple3.jn") + .unwrap() + .schedule_in_src("gpu.sch") + .unwrap() + .build() + .unwrap(); + #[cfg(not(feature = "cuda"))] JunoCompiler::new() .file_in_src("simple3.jn") .unwrap() diff --git a/juno_samples/simple3/src/gpu.sch b/juno_samples/simple3/src/gpu.sch new file mode 100644 index 0000000000000000000000000000000000000000..e97627d43c0e1dccf67f565c686712acb1f080f8 --- /dev/null +++ b/juno_samples/simple3/src/gpu.sch @@ -0,0 +1,18 @@ +gvn(*); +phi-elim(*); +dce(*); + +let out = auto-outline(*); +gpu(out.simple3); + +ip-sroa(*); +sroa(*); +dce(*); +float-collections(*); +gvn(*); +phi-elim(*); +dce(*); + +infer-schedules(*); + +gcm(*); diff --git a/juno_samples/simple3/src/main.rs b/juno_samples/simple3/src/main.rs index 4f9fe6a708ec50fe65b1c31a2823580a117985ce..8eb78f7c93b0f195ffee1be120376dbe3f9a2a62 100644 --- a/juno_samples/simple3/src/main.rs +++ b/juno_samples/simple3/src/main.rs @@ -1,6 +1,8 @@ #![feature(concat_idents)] use hercules_rt::{runner, HerculesCPURef}; +#[cfg(feature = "cuda")] +use hercules_rt::CUDABox; juno_build::juno!("simple3"); @@ -8,12 +10,22 @@ fn main() { async_std::task::block_on(async { let a: Box<[u32]> = Box::new([1, 2, 3, 4, 5, 6, 7, 8]); let b: Box<[u32]> = Box::new([8, 7, 6, 5, 4, 3, 2, 1]); - let a = HerculesCPURef::from_slice(&a); - let b = HerculesCPURef::from_slice(&b); - let mut r = runner!(simple3); - let c = r.run(8, a, b).await; - println!("{}", c); - assert_eq!(c, 120); + #[cfg(not(feature = "cuda"))] + { + let a = HerculesCPURef::from_slice(&a); + let b = HerculesCPURef::from_slice(&b); + let mut r = runner!(simple3); + let c = r.run(8, a, b).await; + assert_eq!(c, 120); + } + #[cfg(feature = "cuda")] + { + let a = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&a)); + let b = CUDABox::from_cpu_ref(HerculesCPURef::from_slice(&b)); + let mut r = runner!(simple3); + let c = r.run(8, a.get_ref(), b.get_ref()).await; + assert_eq!(c, 120); + } }); }