Skip to content
Snippets Groups Projects
Commit 3f102168 authored by rarbore2's avatar rarbore2
Browse files

Sequential schedules

parent 8c7d8fab
No related branches found
No related tags found
1 merge request!219Sequential schedules
Showing with 194 additions and 138 deletions
......@@ -13,6 +13,7 @@ path = "src/lib.rs"
[features]
cuda = ["juno_build/cuda", "hercules_rt/cuda"]
seq = []
[build-dependencies]
juno_build = { path = "../../juno_build" }
......@@ -30,4 +31,4 @@ criterion = { version = "0.5", features = ["html_reports"] }
[[bench]]
name = "cava_bench"
harness = false
\ No newline at end of file
harness = false
......@@ -113,14 +113,18 @@ fixpoint {
simpl!(fuse4);
array-slf(fuse4);
simpl!(fuse4);
let par = fuse4@image_loop \ fuse4@channel_loop;
fork-tile[4, 1, false, false](par);
fork-tile[8, 0, false, false](par);
fork-interchange[1, 2](par);
let split = fork-split(par);
let fuse4_body = outline(split.cava_3.fj2);
fork-coalesce(fuse4, fuse4_body);
simpl!(fuse4, fuse4_body);
if !feature("seq") {
let par = fuse4@image_loop \ fuse4@channel_loop;
fork-tile[4, 1, false, false](par);
fork-tile[8, 0, false, false](par);
fork-interchange[1, 2](par);
let split = fork-split(par);
let fuse4_body = outline(split.cava_3.fj2);
fork-coalesce(fuse4, fuse4_body);
simpl!(fuse4, fuse4_body);
fuse4 = fuse4_body;
}
no-memset(fuse5@res1);
no-memset(fuse5@res2);
......@@ -136,8 +140,8 @@ simpl!(fuse5);
delete-uncalled(*);
simpl!(*);
fork-split(fuse1, fuse2, fuse3, fuse4_body, fuse5);
unforkify(fuse1, fuse2, fuse3, fuse4_body, fuse5);
fork-split(fuse1, fuse2, fuse3, fuse4, fuse5);
unforkify(fuse1, fuse2, fuse3, fuse4, fuse5);
simpl!(*);
......
......@@ -7,6 +7,7 @@ edition = "2021"
[features]
opencv = ["dep:opencv"]
cuda = ["juno_build/cuda", "hercules_rt/cuda"]
seq = []
[[bin]]
name = "juno_edge_detection"
......
......@@ -24,14 +24,18 @@ predication(gaussian_smoothing);
simpl!(gaussian_smoothing);
predication(gaussian_smoothing);
simpl!(gaussian_smoothing);
let par = gaussian_smoothing@image_loop \ gaussian_smoothing@filter_loop;
fork-tile[4, 1, false, false](par);
fork-tile[8, 0, false, false](par);
fork-interchange[1, 2](par);
let split = fork-split(par);
let gaussian_smoothing_body = outline(split._0_gaussian_smoothing.fj2);
fork-coalesce(gaussian_smoothing, gaussian_smoothing_body);
simpl!(gaussian_smoothing, gaussian_smoothing_body);
if !feature("seq") {
let par = gaussian_smoothing@image_loop \ gaussian_smoothing@filter_loop;
fork-tile[4, 1, false, false](par);
fork-tile[8, 0, false, false](par);
fork-interchange[1, 2](par);
let split = fork-split(par);
let gaussian_smoothing_body = outline(split._0_gaussian_smoothing.fj2);
fork-coalesce(gaussian_smoothing, gaussian_smoothing_body);
simpl!(gaussian_smoothing, gaussian_smoothing_body);
gaussian_smoothing = gaussian_smoothing_body;
}
no-memset(laplacian_estimate@res);
fixpoint {
......@@ -40,15 +44,19 @@ fixpoint {
fork-coalesce(laplacian_estimate);
}
simpl!(laplacian_estimate);
let par = laplacian_estimate@image_loop \ laplacian_estimate@filter_loop;
fork-tile[4, 1, false, false](par);
fork-tile[8, 0, false, false](par);
fork-interchange[1, 2](par);
let split = fork-split(par);
let body = split._1_laplacian_estimate.fj2;
let laplacian_estimate_body = outline(body);
fork-coalesce(laplacian_estimate, laplacian_estimate_body);
simpl!(laplacian_estimate, laplacian_estimate_body);
if !feature("seq") {
let par = laplacian_estimate@image_loop \ laplacian_estimate@filter_loop;
fork-tile[4, 1, false, false](par);
fork-tile[8, 0, false, false](par);
fork-interchange[1, 2](par);
let split = fork-split(par);
let body = split._1_laplacian_estimate.fj2;
let laplacian_estimate_body = outline(body);
fork-coalesce(laplacian_estimate, laplacian_estimate_body);
simpl!(laplacian_estimate, laplacian_estimate_body);
laplacian_estimate = laplacian_estimate_body;
}
no-memset(zero_crossings@res);
fixpoint {
......@@ -57,15 +65,19 @@ fixpoint {
fork-coalesce(zero_crossings);
}
simpl!(zero_crossings);
let par = zero_crossings@image_loop \ zero_crossings@filter_loop;
fork-tile[4, 1, false, false](par);
fork-tile[8, 0, false, false](par);
fork-interchange[1, 2](par);
let split = fork-split(par);
let body = split._2_zero_crossings.fj2;
let zero_crossings_body = outline(body);
fork-coalesce(zero_crossings, zero_crossings_body);
simpl!(zero_crossings, zero_crossings_body);
if !feature("seq") {
let par = zero_crossings@image_loop \ zero_crossings@filter_loop;
fork-tile[4, 1, false, false](par);
fork-tile[8, 0, false, false](par);
fork-interchange[1, 2](par);
let split = fork-split(par);
let body = split._2_zero_crossings.fj2;
let zero_crossings_body = outline(body);
fork-coalesce(zero_crossings, zero_crossings_body);
simpl!(zero_crossings, zero_crossings_body);
zero_crossings = zero_crossings_body;
}
no-memset(gradient@res);
fixpoint {
......@@ -84,17 +96,23 @@ fixpoint {
fork-coalesce(max_gradient);
}
simpl!(max_gradient);
fork-dim-merge(max_gradient);
simpl!(max_gradient);
fork-tile[32, 0, false, false](max_gradient);
let split = fork-split(max_gradient);
clean-monoid-reduces(max_gradient);
let out = outline(split._4_max_gradient.fj1);
simpl!(max_gradient, out);
unforkify(out);
let out = fork-fission[split._4_max_gradient.fj0](max_gradient);
simpl!(max_gradient);
unforkify(out._4_max_gradient.fj_bottom);
if !feature("seq") {
fork-dim-merge(max_gradient);
simpl!(max_gradient);
fork-tile[32, 0, false, false](max_gradient);
let split = fork-split(max_gradient);
clean-monoid-reduces(max_gradient);
let out = outline(split._4_max_gradient.fj1);
simpl!(max_gradient, out);
unforkify(out);
let out = fork-fission[split._4_max_gradient.fj0](max_gradient);
simpl!(max_gradient);
unforkify(out._4_max_gradient.fj_bottom);
} else {
fork-split(max_gradient);
unforkify(max_gradient);
}
no-memset(reject_zero_crossings@res);
fixpoint {
......@@ -104,18 +122,22 @@ fixpoint {
}
predication(reject_zero_crossings);
simpl!(reject_zero_crossings);
fork-tile[4, 1, false, false](reject_zero_crossings);
fork-tile[8, 0, false, false](reject_zero_crossings);
fork-interchange[1, 2](reject_zero_crossings);
let split = fork-split(reject_zero_crossings);
let reject_zero_crossings_body = outline(split._5_reject_zero_crossings.fj2);
fork-coalesce(reject_zero_crossings, reject_zero_crossings_body);
simpl!(reject_zero_crossings, reject_zero_crossings_body);
if !feature("seq") {
fork-tile[4, 1, false, false](reject_zero_crossings);
fork-tile[8, 0, false, false](reject_zero_crossings);
fork-interchange[1, 2](reject_zero_crossings);
let split = fork-split(reject_zero_crossings);
let reject_zero_crossings_body = outline(split._5_reject_zero_crossings.fj2);
fork-coalesce(reject_zero_crossings, reject_zero_crossings_body);
simpl!(reject_zero_crossings, reject_zero_crossings_body);
reject_zero_crossings = reject_zero_crossings_body;
}
async-call(edge_detection@le, edge_detection@zc);
fork-split(gaussian_smoothing_body, laplacian_estimate_body, zero_crossings_body, gradient, reject_zero_crossings_body);
unforkify(gaussian_smoothing_body, laplacian_estimate_body, zero_crossings_body, gradient, reject_zero_crossings_body);
fork-split(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, reject_zero_crossings);
unforkify(gaussian_smoothing, laplacian_estimate, zero_crossings, gradient, reject_zero_crossings);
simpl!(*);
......
......@@ -13,6 +13,7 @@ path = "src/lib.rs"
[features]
cuda = ["juno_build/cuda", "hercules_rt/cuda"]
seq = []
[build-dependencies]
juno_build = { path = "../../../juno_build" }
......@@ -31,4 +32,4 @@ criterion = { version = "0.5", features = ["html_reports"] }
[[bench]]
name = "backprop_bench"
harness = false
\ No newline at end of file
harness = false
......@@ -37,10 +37,12 @@ inline(backprop@forward_input, backprop@forward_hidden);
let forward_input = outline(backprop@forward_input);
let forward_hidden = outline(backprop@forward_hidden);
fork-tile[16, 0, false, true](forward_input@outer_loop \ forward_input@inner_loop);
let (outer, inner) = fork-reshape[[1], [0]](forward_input@outer_loop \ forward_input@inner_loop);
let forward_input = outline(inner);
inline(backprop@forward_input);
if !feature("seq") {
fork-tile[16, 0, false, true](forward_input@outer_loop \ forward_input@inner_loop);
let (outer, inner) = fork-reshape[[1], [0]](forward_input@outer_loop \ forward_input@inner_loop);
forward_input = outline(inner);
inline(backprop@forward_input);
}
// The first call to adjust_weights has total loop dimensions of 1 * 17, so not
// worth parallelizing (given that the body is trivial)
......@@ -50,10 +52,12 @@ inline(backprop@adjust_hidden, backprop@adjust_input);
let adjust_hidden = outline(backprop@adjust_hidden);
let adjust_input = outline(backprop@adjust_input);
fork-tile[16, 0, false, true](adjust_input);
let (outer, inner) = fork-reshape[[1], [0, 2]](adjust_input);
let adjust_input = outline(inner);
inline(backprop@adjust_input);
if !feature("seq") {
fork-tile[16, 0, false, true](adjust_input);
let (outer, inner) = fork-reshape[[1], [0, 2]](adjust_input);
adjust_input = outline(inner);
inline(backprop@adjust_input);
}
delete-uncalled(*);
const-inline(*);
......
......@@ -13,6 +13,7 @@ path = "src/lib.rs"
[features]
cuda = ["juno_build/cuda", "hercules_rt/cuda"]
seq = []
[build-dependencies]
juno_build = { path = "../../../juno_build" }
......
......@@ -40,21 +40,24 @@ simpl!(collect);
parallel-fork(traverse, collect);
parallel-reduce(traverse, collect);
fork-tile[32, 0, false, true](traverse, collect);
let (outer, inner) = fork-reshape[[1], [0]](traverse);
let traverse_body = outline(inner);
let (outer, inner) = fork-reshape[[1], [0]](collect);
let collect_body = outline(inner);
fork-tile[32, 0, false, true](init);
let (outer, inner) = fork-reshape[[1], [0]](init);
let init_body = outline(inner);
inline(bfs@cost_init, bfs@loop1, bfs@loop2);
if !feature("seq") {
fork-tile[32, 0, false, true](traverse, collect);
let (outer, inner) = fork-reshape[[1], [0]](traverse);
traverse = outline(inner);
let (outer, inner) = fork-reshape[[1], [0]](collect);
collect = outline(inner);
fork-tile[32, 0, false, true](init);
let (outer, inner) = fork-reshape[[1], [0]](init);
let init_body = outline(inner);
inline(bfs@cost_init, bfs@loop1, bfs@loop2);
init = init_body;
}
delete-uncalled(*);
const-inline(*);
simpl!(*);
unforkify(init_body, traverse_body, collect_body);
unforkify(init, traverse, collect);
simpl!(*);
gcm(*);
gcm(*);
\ No newline at end of file
......@@ -13,6 +13,7 @@ path = "src/lib.rs"
[features]
cuda = ["juno_build/cuda", "hercules_rt/cuda"]
seq = []
[build-dependencies]
juno_build = { path = "../../../juno_build" }
......
......@@ -27,29 +27,35 @@ fixpoint {
simpl!(*);
unforkify(compute_flux@inner_loop);
fork-tile[32, 0, false, false](compute_step_factor);
let split = fork-split(compute_step_factor);
let compute_step_factor_body = outline(split._4_compute_step_factor.fj1);
fork-coalesce(compute_step_factor, compute_step_factor_body);
simpl!(compute_step_factor, compute_step_factor_body);
if !feature("seq") {
fork-tile[32, 0, false, false](compute_step_factor);
let split = fork-split(compute_step_factor);
let compute_step_factor_body = outline(split._4_compute_step_factor.fj1);
fork-coalesce(compute_step_factor, compute_step_factor_body);
simpl!(compute_step_factor, compute_step_factor_body);
compute_step_factor = compute_step_factor_body;
fork-tile[32, 0, false, false](compute_flux);
let split = fork-split(compute_flux);
let compute_flux_body = outline(split._6_compute_flux.fj1);
fork-coalesce(compute_flux, compute_flux_body);
simpl!(compute_flux, compute_flux_body);
fork-tile[32, 0, false, false](compute_flux);
let split = fork-split(compute_flux);
let compute_flux_body = outline(split._6_compute_flux.fj1);
fork-coalesce(compute_flux, compute_flux_body);
simpl!(compute_flux, compute_flux_body);
compute_flux = compute_flux_body;
fork-tile[32, 0, false, false](time_step);
let split = fork-split(time_step);
let time_step_body = outline(split._7_time_step.fj1);
fork-coalesce(time_step, time_step_body);
simpl!(time_step, time_step_body);
fork-tile[32, 0, false, false](time_step);
let split = fork-split(time_step);
let time_step_body = outline(split._7_time_step.fj1);
fork-coalesce(time_step, time_step_body);
simpl!(time_step, time_step_body);
time_step = time_step_body;
fork-tile[32, 0, false, false](copy_vars);
let split = fork-split(copy_vars);
let copy_vars_body = outline(split._8_copy_vars.fj1);
fork-coalesce(copy_vars, copy_vars_body);
simpl!(copy_vars, copy_vars_body);
fork-tile[32, 0, false, false](copy_vars);
let split = fork-split(copy_vars);
let copy_vars_body = outline(split._8_copy_vars.fj1);
fork-coalesce(copy_vars, copy_vars_body);
simpl!(copy_vars, copy_vars_body);
copy_vars = copy_vars_body;
}
unforkify(compute_step_factor_body, compute_flux_body, time_step_body, copy_vars_body);
unforkify(compute_step_factor, compute_flux, time_step, copy_vars);
gcm(*);
......@@ -27,35 +27,42 @@ simpl!(*);
no-memset(compute_step_factor@res, compute_flux_contributions@res, compute_flux@res, copy_vars@res);
unforkify(compute_flux@inner_loop);
fork-tile[32, 0, false, false](compute_step_factor);
let split = fork-split(compute_step_factor);
let compute_step_factor_body = outline(split._4_compute_step_factor.fj1);
fork-coalesce(compute_step_factor, compute_step_factor_body);
simpl!(compute_step_factor, compute_step_factor_body);
if !feature("seq") {
fork-tile[32, 0, false, false](compute_step_factor);
let split = fork-split(compute_step_factor);
let compute_step_factor_body = outline(split._4_compute_step_factor.fj1);
fork-coalesce(compute_step_factor, compute_step_factor_body);
simpl!(compute_step_factor, compute_step_factor_body);
compute_step_factor = compute_step_factor_body;
fork-tile[32, 0, false, false](compute_flux_contributions);
let split = fork-split(compute_flux_contributions);
let compute_flux_contributions_body = outline(split._6_compute_flux_contributions.fj1);
fork-coalesce(compute_flux_contributions, compute_flux_contributions_body);
simpl!(compute_flux_contributions, compute_flux_contributions_body);
fork-tile[32, 0, false, false](compute_flux_contributions);
let split = fork-split(compute_flux_contributions);
let compute_flux_contributions_body = outline(split._6_compute_flux_contributions.fj1);
fork-coalesce(compute_flux_contributions, compute_flux_contributions_body);
simpl!(compute_flux_contributions, compute_flux_contributions_body);
compute_flux_contributions = compute_flux_contributions_body;
fork-tile[32, 0, false, false](compute_flux);
let split = fork-split(compute_flux);
let compute_flux_body = outline(split._7_compute_flux.fj1);
fork-coalesce(compute_flux, compute_flux_body);
simpl!(compute_flux, compute_flux_body);
fork-tile[32, 0, false, false](compute_flux);
let split = fork-split(compute_flux);
let compute_flux_body = outline(split._7_compute_flux.fj1);
fork-coalesce(compute_flux, compute_flux_body);
simpl!(compute_flux, compute_flux_body);
compute_flux = compute_flux_body;
fork-tile[32, 0, false, false](time_step);
let split = fork-split(time_step);
let time_step_body = outline(split._8_time_step.fj1);
fork-coalesce(time_step, time_step_body);
simpl!(time_step, time_step_body);
fork-tile[32, 0, false, false](time_step);
let split = fork-split(time_step);
let time_step_body = outline(split._8_time_step.fj1);
fork-coalesce(time_step, time_step_body);
simpl!(time_step, time_step_body);
time_step = time_step_body;
fork-tile[32, 0, false, false](copy_vars);
let split = fork-split(copy_vars);
let copy_vars_body = outline(split._9_copy_vars.fj1);
fork-coalesce(copy_vars, copy_vars_body);
simpl!(copy_vars, copy_vars_body);
fork-tile[32, 0, false, false](copy_vars);
let split = fork-split(copy_vars);
let copy_vars_body = outline(split._9_copy_vars.fj1);
fork-coalesce(copy_vars, copy_vars_body);
simpl!(copy_vars, copy_vars_body);
copy_vars = copy_vars_body;
}
unforkify(compute_step_factor_body, compute_flux_contributions_body, compute_flux_body, time_step_body, copy_vars_body);
unforkify(compute_step_factor, compute_flux_contributions, compute_flux, time_step, copy_vars);
gcm(*);
......@@ -13,6 +13,7 @@ path = "src/lib.rs"
[features]
cuda = ["juno_build/cuda", "hercules_rt/cuda"]
seq = []
[build-dependencies]
juno_build = { path = "../../../juno_build" }
......
......@@ -35,20 +35,24 @@ simpl!(*);
slf(*);
simpl!(*);
fork-tile[32, 0, false, false](loop2);
let split = fork-split(loop2);
let loop2_body = outline(split.srad_1.fj1);
simpl!(loop2, loop2_body);
if !feature("seq") {
fork-tile[32, 0, false, false](loop2);
let split = fork-split(loop2);
let loop2_body = outline(split.srad_1.fj1);
simpl!(loop2, loop2_body);
loop2 = loop2_body;
fork-tile[32, 0, false, false](loop3);
let split = fork-split(loop3);
let loop3_body = outline(split.srad_2.fj1);
simpl!(loop3, loop3_body);
fork-tile[32, 0, false, false](loop3);
let split = fork-split(loop3);
let loop3_body = outline(split.srad_2.fj1);
simpl!(loop3, loop3_body);
loop3 = loop3_body;
inline(srad@loop2, srad@loop3);
delete-uncalled(*);
inline(srad@loop2, srad@loop3);
delete-uncalled(*);
}
fork-split(extract, compress, loop1, loop2_body, loop3_body);
unforkify(extract, compress, loop1, loop2_body, loop3_body);
fork-split(extract, compress, loop1, loop2, loop3);
unforkify(extract, compress, loop1, loop2, loop3);
gcm(*);
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment