Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • llvm/hercules
1 result
Show changes
Commits on Source (22)
Showing
with 1951 additions and 640 deletions
build-job:
stage: build
script:
- cargo build
test-job:
stage: test
script:
- cargo test
- cargo test --features=cuda
This diff is collapsed.
......@@ -27,4 +27,7 @@ members = [
"juno_samples/nested_ccp",
"juno_samples/antideps",
"juno_samples/implicit_clone",
"juno_samples/concat",
"juno_samples/cava",
]
......@@ -536,6 +536,17 @@ impl<'a> CPUContext<'a> {
self.get_value(collect, true)
)?;
}
Node::Undef { ty } => {
let body = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap().body;
let ty = self.get_type(ty);
write!(
body,
" {} = bitcast {} undef to {}\n",
self.get_value(id, false),
ty,
ty
)?;
}
_ => panic!("PANIC: Can't lower {:?}.", self.function.nodes[id.idx()]),
}
Ok(())
......@@ -602,6 +613,20 @@ impl<'a> CPUContext<'a> {
left.idx(),
right.idx()
)?,
DynamicConstant::Min(left, right) => write!(
body,
" %dc{} = call @llvm.umin.i64(i64%dc{},i64%dc{})\n",
dc.idx(),
left.idx(),
right.idx()
)?,
DynamicConstant::Max(left, right) => write!(
body,
" %dc{} = call @llvm.umax.i64(i64%dc{},i64%dc{})\n",
dc.idx(),
left.idx(),
right.idx()
)?,
}
}
Ok(())
......@@ -848,7 +873,7 @@ fn convert_intrinsic(intrinsic: &Intrinsic, ty: &Type) -> String {
Intrinsic::Log2 => "log2",
Intrinsic::Max => {
if ty.is_float() {
"max"
"maxnum"
} else if ty.is_unsigned() {
"umax"
} else if ty.is_signed() {
......@@ -859,7 +884,7 @@ fn convert_intrinsic(intrinsic: &Intrinsic, ty: &Type) -> String {
}
Intrinsic::Min => {
if ty.is_float() {
"min"
"minnum"
} else if ty.is_unsigned() {
"umin"
} else if ty.is_signed() {
......
......@@ -295,10 +295,19 @@ impl<'a> RTContext<'a> {
ref dynamic_constants,
ref args,
} => {
match self.devices[callee_id.idx()] {
Device::LLVM => {
let device = self.devices[callee_id.idx()];
match device {
// The device backends ensure that device functions have the
// same C interface.
Device::LLVM | Device::CUDA => {
let block = &mut blocks.get_mut(&self.bbs.0[id.idx()]).unwrap();
let device = match device {
Device::LLVM => "cpu",
Device::CUDA => "cuda",
_ => panic!(),
};
// First, get the raw pointers to collections that the
// device function takes as input.
let callee_objs = &self.collection_objects[&callee_id];
......@@ -308,16 +317,18 @@ impl<'a> RTContext<'a> {
if callee_objs.is_mutated(obj) {
write!(
block,
" let arg_tmp{} = unsafe {{ {}.__cpu_ptr_mut() }};\n",
" let arg_tmp{} = unsafe {{ {}.__{}_ptr_mut() }};\n",
idx,
self.get_value(*arg)
self.get_value(*arg),
device
)?;
} else {
write!(
block,
" let arg_tmp{} = unsafe {{ {}.__cpu_ptr() }};\n",
" let arg_tmp{} = unsafe {{ {}.__{}_ptr() }};\n",
idx,
self.get_value(*arg)
self.get_value(*arg),
device
)?;
}
} else {
......@@ -401,7 +412,6 @@ impl<'a> RTContext<'a> {
}
write!(block, ").await;\n")?;
}
_ => todo!(),
}
}
_ => panic!(
......@@ -459,6 +469,20 @@ impl<'a> RTContext<'a> {
self.codegen_dynamic_constant(right, w)?;
write!(w, ")")?;
}
DynamicConstant::Min(left, right) => {
write!(w, "::core::cmp::min(")?;
self.codegen_dynamic_constant(left, w)?;
write!(w, ",")?;
self.codegen_dynamic_constant(right, w)?;
write!(w, ")")?;
}
DynamicConstant::Max(left, right) => {
write!(w, "::core::cmp::max(")?;
self.codegen_dynamic_constant(left, w)?;
write!(w, ",")?;
self.codegen_dynamic_constant(right, w)?;
write!(w, ")")?;
}
}
Ok(())
}
......
use std::cmp::{max, min};
use std::fmt::Write;
use std::ops::Coroutine;
use std::ops::CoroutineState;
......@@ -121,6 +122,8 @@ pub enum DynamicConstant {
Mul(DynamicConstantID, DynamicConstantID),
Div(DynamicConstantID, DynamicConstantID),
Rem(DynamicConstantID, DynamicConstantID),
Min(DynamicConstantID, DynamicConstantID),
Max(DynamicConstantID, DynamicConstantID),
}
/*
......@@ -130,7 +133,7 @@ pub enum DynamicConstant {
* operate on an index list, composing indices at different levels in a type
* tree. Each type that can be indexed has a unique variant in the index enum.
*/
#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
pub enum Index {
Field(usize),
Variant(usize),
......@@ -329,7 +332,7 @@ pub enum Schedule {
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum Device {
LLVM,
NVVM,
CUDA,
// Entry functions are lowered to async Rust code that calls device
// functions (leaf nodes in the call graph), possibly concurrently.
AsyncRust,
......@@ -445,13 +448,17 @@ impl Module {
| DynamicConstant::Sub(x, y)
| DynamicConstant::Mul(x, y)
| DynamicConstant::Div(x, y)
| DynamicConstant::Rem(x, y) => {
| DynamicConstant::Rem(x, y)
| DynamicConstant::Min(x, y)
| DynamicConstant::Max(x, y) => {
match &self.dynamic_constants[dc_id.idx()] {
DynamicConstant::Add(_, _) => write!(w, "+")?,
DynamicConstant::Sub(_, _) => write!(w, "-")?,
DynamicConstant::Mul(_, _) => write!(w, "*")?,
DynamicConstant::Div(_, _) => write!(w, "/")?,
DynamicConstant::Rem(_, _) => write!(w, "%")?,
DynamicConstant::Min(_, _) => write!(w, "min")?,
DynamicConstant::Max(_, _) => write!(w, "max")?,
_ => (),
}
write!(w, "(")?;
......@@ -1014,6 +1021,14 @@ pub fn evaluate_dynamic_constant(
DynamicConstant::Rem(left, right) => {
Some(evaluate_dynamic_constant(left, dcs)? % evaluate_dynamic_constant(right, dcs)?)
}
DynamicConstant::Min(left, right) => Some(min(
evaluate_dynamic_constant(left, dcs)?,
evaluate_dynamic_constant(right, dcs)?,
)),
DynamicConstant::Max(left, right) => Some(max(
evaluate_dynamic_constant(left, dcs)?,
evaluate_dynamic_constant(right, dcs)?,
)),
}
}
......
use std::cmp::{max, min};
use std::collections::HashMap;
use std::iter::zip;
......@@ -193,7 +194,9 @@ fn typeflow(
| DynamicConstant::Sub(x, y)
| DynamicConstant::Mul(x, y)
| DynamicConstant::Div(x, y)
| DynamicConstant::Rem(x, y) => {
| DynamicConstant::Rem(x, y)
| DynamicConstant::Min(x, y)
| DynamicConstant::Max(x, y) => {
check_dynamic_constants(x, dynamic_constants, num_parameters)
&& check_dynamic_constants(y, dynamic_constants, num_parameters)
}
......@@ -513,12 +516,8 @@ fn typeflow(
Constant::Float64(_) => {
Concrete(get_type_id(Type::Float64, types, reverse_type_map))
}
// Product, summation, and array constants are exceptions.
// Technically, only summation constants need to explicitly
// store their type, but product and array constants also
// explicitly store their type specifically to make this code
// simpler (although their type could be derived from the
// constant itself).
// Product, summation, and array constants are exceptions. they
// all explicitly store their type.
Constant::Product(id, _) => {
if let Type::Product(_) = types[id.idx()] {
Concrete(id)
......@@ -537,7 +536,6 @@ fn typeflow(
))
}
}
// Array typechecking also consists of validating the number of constant elements.
Constant::Array(id) => {
if let Type::Array(_, _) = &types[id.idx()] {
Concrete(id)
......@@ -1126,27 +1124,55 @@ fn types_match(
/*
* Determine if the given dynamic constant matches the parameter's dynamic
* constants when the provided dynamic constants are substituted in for the
* dynamic constants used in the parameter's dynamic constant
* dynamic constants used in the parameter's dynamic constant. Implement dynamic
* constant normalization here as well - i.e., 1 * 2 * 3 = 6.
*/
fn dyn_consts_match(
dynamic_constants: &Vec<DynamicConstant>,
dc_args: &Box<[DynamicConstantID]>,
param: DynamicConstantID,
input: DynamicConstantID,
left: DynamicConstantID,
right: DynamicConstantID,
) -> bool {
// First, try evaluating the DCs and seeing if they're the same value.
if let (Some(cons1), Some(cons2)) = (
evaluate_dynamic_constant(left, dynamic_constants),
evaluate_dynamic_constant(right, dynamic_constants),
) {
return cons1 == cons2;
}
match (
&dynamic_constants[param.idx()],
&dynamic_constants[input.idx()],
&dynamic_constants[left.idx()],
&dynamic_constants[right.idx()],
) {
(DynamicConstant::Constant(x), DynamicConstant::Constant(y)) => x == y,
(DynamicConstant::Parameter(i), _) => input == dc_args[*i],
(DynamicConstant::Add(pl, pr), DynamicConstant::Add(il, ir))
| (DynamicConstant::Sub(pl, pr), DynamicConstant::Sub(il, ir))
| (DynamicConstant::Mul(pl, pr), DynamicConstant::Mul(il, ir))
| (DynamicConstant::Div(pl, pr), DynamicConstant::Div(il, ir))
| (DynamicConstant::Rem(pl, pr), DynamicConstant::Rem(il, ir)) => {
dyn_consts_match(dynamic_constants, dc_args, *pl, *il)
&& dyn_consts_match(dynamic_constants, dc_args, *pr, *ir)
(DynamicConstant::Parameter(l), DynamicConstant::Parameter(r)) => l == r,
(DynamicConstant::Parameter(i), _) => dyn_consts_match(
dynamic_constants,
dc_args,
min(right, dc_args[*i]),
max(right, dc_args[*i]),
),
(_, DynamicConstant::Parameter(i)) => dyn_consts_match(
dynamic_constants,
dc_args,
min(left, dc_args[*i]),
max(left, dc_args[*i]),
),
(DynamicConstant::Add(ll, lr), DynamicConstant::Add(rl, rr))
| (DynamicConstant::Mul(ll, lr), DynamicConstant::Mul(rl, rr))
| (DynamicConstant::Min(ll, lr), DynamicConstant::Min(rl, rr))
| (DynamicConstant::Max(ll, lr), DynamicConstant::Max(rl, rr)) => {
// Normalize for associative ops by always looking at smaller DC ID
// as left arm and larger DC ID as right arm.
dyn_consts_match(dynamic_constants, dc_args, min(*ll, *lr), min(*rl, *rr))
&& dyn_consts_match(dynamic_constants, dc_args, max(*ll, *lr), max(*rl, *rr))
}
(DynamicConstant::Sub(ll, lr), DynamicConstant::Sub(rl, rr))
| (DynamicConstant::Div(ll, lr), DynamicConstant::Div(rl, rr))
| (DynamicConstant::Rem(ll, lr), DynamicConstant::Rem(rl, rr)) => {
dyn_consts_match(dynamic_constants, dc_args, *ll, *rl)
&& dyn_consts_match(dynamic_constants, dc_args, *lr, *rr)
}
(_, _) => false,
}
......@@ -1328,5 +1354,27 @@ fn dyn_const_subst(
reverse_dynamic_constant_map,
)
}
DynamicConstant::Min(l, r) => {
let x = *l;
let y = *r;
let sx = dyn_const_subst(dynamic_constants, reverse_dynamic_constant_map, dc_args, x);
let sy = dyn_const_subst(dynamic_constants, reverse_dynamic_constant_map, dc_args, y);
intern_dyn_const(
DynamicConstant::Min(sx, sy),
dynamic_constants,
reverse_dynamic_constant_map,
)
}
DynamicConstant::Max(l, r) => {
let x = *l;
let y = *r;
let sx = dyn_const_subst(dynamic_constants, reverse_dynamic_constant_map, dc_args, x);
let sy = dyn_const_subst(dynamic_constants, reverse_dynamic_constant_map, dc_args, y);
intern_dyn_const(
DynamicConstant::Max(sx, sy),
dynamic_constants,
reverse_dynamic_constant_map,
)
}
}
}
......@@ -11,6 +11,7 @@ tempfile = "*"
either = "*"
itertools = "*"
take_mut = "*"
union-find = "*"
postcard = { version = "*", features = ["alloc"] }
serde = { version = "*", features = ["derive"] }
hercules_cg = { path = "../hercules_cg" }
......
This diff is collapsed.
......@@ -3,6 +3,7 @@ use std::iter::{empty, once, zip, FromIterator};
use bitvec::prelude::*;
use either::Either;
use union_find::{QuickFindUf, UnionBySize, UnionFind};
use hercules_cg::*;
use hercules_ir::*;
......@@ -551,6 +552,35 @@ fn mutating_objects<'a>(
}
}
fn mutating_writes<'a>(
function: &'a Function,
mutator: NodeID,
objects: &'a CollectionObjects,
) -> Box<dyn Iterator<Item = NodeID> + 'a> {
match function.nodes[mutator.idx()] {
Node::Write {
collect,
data: _,
indices: _,
} => Box::new(once(collect)),
Node::Call {
control: _,
function: callee,
dynamic_constants: _,
ref args,
} => Box::new(args.into_iter().enumerate().filter_map(move |(idx, arg)| {
let callee_objects = &objects[&callee];
let param_obj = callee_objects.param_to_object(idx)?;
if callee_objects.is_mutated(param_obj) {
Some(*arg)
} else {
None
}
})),
_ => Box::new(empty()),
}
}
type Liveness = BTreeMap<NodeID, Vec<BTreeSet<NodeID>>>;
/*
......@@ -579,27 +609,60 @@ fn spill_clones(
// Step 2: compute an interference graph from the liveness result. This
// graph contains a vertex per node ID producing a collection value and an
// edge per pair of node IDs that interfere. Nodes A and B interfere if node
// A is defined right above a point where node B is live.
// A is defined right above a point where node B is live and A != B. Extra
// edges are drawn for forwarding reads - when there is a node A that is a
// forwarding read of a node B, A and B really have the same live range for
// the purpose of determining when spills are necessary, since forwarding
// reads can be thought of as nothing but pointer math. For this purpose, we
// maintain a union-find of nodes that form a forwarding read DAG (notably,
// phis and reduces are not considered forwarding reads). The more precise
// version of the interference condition is nodes A and B interfere is node
// A is defined right above a point where a node C is live where C is in the
// same union-find class as B.
// Assemble the union-find to group forwarding read DAGs.
let mut union_find = QuickFindUf::<UnionBySize>::new(editor.func().nodes.len());
for id in editor.node_ids() {
for forwarding_read in forwarding_reads(editor.func(), editor.func_id(), id, objects) {
union_find.union(id.idx(), forwarding_read.idx());
}
}
// Figure out which classes contain which node IDs, since we need to iterate
// the disjoint sets.
let mut disjoint_sets: BTreeMap<usize, Vec<NodeID>> = BTreeMap::new();
for id in editor.node_ids() {
disjoint_sets
.entry(union_find.find(id.idx()))
.or_default()
.push(id);
}
// Create the graph.
let mut edges = vec![];
for (bb, liveness) in liveness {
let insts = &bbs.1[bb.idx()];
for (node, live) in zip(insts, liveness.into_iter().skip(1)) {
for live_node in live {
if *node != live_node {
edges.push((*node, live_node));
for live_node in disjoint_sets[&union_find.find(live_node.idx())].iter() {
if *node != *live_node {
edges.push((*node, *live_node));
}
}
}
}
}
// Step 3: filter edges (A, B) to just see edges where A uses B and A isn't
// a terminating read. These are the edges that may require a spill.
// Step 3: filter edges (A, B) to just see edges where A uses B and A
// mutates B. These are the edges that may require a spill.
let mut spill_edges = edges.into_iter().filter(|(a, b)| {
get_uses(&editor.func().nodes[a.idx()])
.as_ref()
.into_iter()
.any(|u| *u == *b)
&& !terminating_reads(editor.func(), editor.func_id(), *a, objects).any(|id| id == *b)
mutating_writes(editor.func(), *a, objects).any(|id| id == *b)
|| (get_uses(&editor.func().nodes[a.idx()])
.as_ref()
.into_iter()
.any(|u| *u == *b)
&& (editor.func().nodes[a.idx()].is_phi()
|| editor.func().nodes[a.idx()].is_reduce()))
});
// Step 4: if there is a spill edge, spill it and return true. Otherwise,
......
......@@ -43,9 +43,6 @@ pub fn inline(editors: &mut [FunctionEditor], callgraph: &CallGraph) {
// Step 4: run inlining on each function individually. Iterate the functions
// in topological order.
for to_inline_id in topo {
if editors[to_inline_id.idx()].func().entry {
continue;
}
// Since Rust cannot analyze the accesses into an array of mutable
// references, we need to do some weirdness here to simultaneously get:
// 1. A mutable reference to the function we're modifying.
......
......@@ -319,7 +319,8 @@ fn compress_return_products(editors: &mut Vec<FunctionEditor>, all_callsites_edi
let old_dcs = dc_param_idx_to_dc_id[..new_dcs.len()].to_vec().clone();
let mut substituted = old_return_type_ids[function_id.idx()];
let first_dc = edit.num_dynamic_constants() + 1;
assert_eq!(old_dcs.len(), new_dcs.len());
let first_dc = edit.num_dynamic_constants() + 100;
for (dc_a, dc_n) in zip(old_dcs, first_dc..) {
substituted = substitute_dynamic_constants_in_type(
dc_a,
......@@ -416,12 +417,37 @@ fn remove_return_singletons(editors: &mut Vec<FunctionEditor>, all_callsites_edi
.collect();
for call_node_id in call_node_ids {
let (_, function, _, _) = editor.func().nodes[call_node_id.idx()].try_call().unwrap();
let (_, function, dc_args, _) =
editor.func().nodes[call_node_id.idx()].try_call().unwrap();
let dc_args = dc_args.clone();
if singleton_removed[function.idx()] {
let edit_successful = editor.edit(|mut edit| {
let empty_constant_id =
edit.add_zero_constant(old_return_type_ids[function.idx()]);
let mut substituted = old_return_type_ids[function.idx()];
let first_dc = edit.num_dynamic_constants() + 100;
let dc_params: Vec<_> = (0..dc_args.len())
.map(|param_idx| {
edit.add_dynamic_constant(DynamicConstant::Parameter(param_idx))
})
.collect();
for (dc_a, dc_n) in zip(dc_params, first_dc..) {
substituted = substitute_dynamic_constants_in_type(
dc_a,
DynamicConstantID::new(dc_n),
substituted,
&mut edit,
);
}
for (dc_n, dc_b) in zip(first_dc.., dc_args.iter()) {
substituted = substitute_dynamic_constants_in_type(
DynamicConstantID::new(dc_n),
*dc_b,
substituted,
&mut edit,
);
}
let empty_constant_id = edit.add_zero_constant(substituted);
let empty_node_id = edit.add_node(Node::Constant {
id: empty_constant_id,
});
......
......@@ -17,6 +17,7 @@ pub mod pass;
pub mod phi_elim;
pub mod pred;
pub mod schedule;
pub mod slf;
pub mod sroa;
pub mod unforkify;
pub mod utils;
......@@ -38,6 +39,7 @@ pub use crate::pass::*;
pub use crate::phi_elim::*;
pub use crate::pred::*;
pub use crate::schedule::*;
pub use crate::slf::*;
pub use crate::sroa::*;
pub use crate::unforkify::*;
pub use crate::utils::*;
......@@ -25,6 +25,8 @@ pub enum Pass {
PhiElim,
Forkify,
ForkGuardElim,
SLF,
WritePredication,
Predication,
SROA,
Inline,
......@@ -469,27 +471,90 @@ impl PassManager {
}
self.clear_analyses();
}
Pass::Predication => {
Pass::SLF => {
self.make_def_uses();
self.make_reverse_postorders();
self.make_doms();
self.make_fork_join_maps();
self.make_typing();
let def_uses = self.def_uses.as_ref().unwrap();
let reverse_postorders = self.reverse_postorders.as_ref().unwrap();
let doms = self.doms.as_ref().unwrap();
let fork_join_maps = self.fork_join_maps.as_ref().unwrap();
let typing = self.typing.as_ref().unwrap();
for idx in 0..self.module.functions.len() {
predication(
let constants_ref =
RefCell::new(std::mem::take(&mut self.module.constants));
let dynamic_constants_ref =
RefCell::new(std::mem::take(&mut self.module.dynamic_constants));
let types_ref = RefCell::new(std::mem::take(&mut self.module.types));
let mut editor = FunctionEditor::new(
&mut self.module.functions[idx],
FunctionID::new(idx),
&constants_ref,
&dynamic_constants_ref,
&types_ref,
&def_uses[idx],
&reverse_postorders[idx],
&doms[idx],
&fork_join_maps[idx],
);
let num_nodes = self.module.functions[idx].nodes.len();
self.module.functions[idx]
.schedules
.resize(num_nodes, vec![]);
slf(&mut editor, &reverse_postorders[idx], &typing[idx]);
self.module.constants = constants_ref.take();
self.module.dynamic_constants = dynamic_constants_ref.take();
self.module.types = types_ref.take();
println!("{}", self.module.functions[idx].name);
self.module.functions[idx].delete_gravestones();
}
self.clear_analyses();
}
Pass::WritePredication => {
self.make_def_uses();
let def_uses = self.def_uses.as_ref().unwrap();
for idx in 0..self.module.functions.len() {
let constants_ref =
RefCell::new(std::mem::take(&mut self.module.constants));
let dynamic_constants_ref =
RefCell::new(std::mem::take(&mut self.module.dynamic_constants));
let types_ref = RefCell::new(std::mem::take(&mut self.module.types));
let mut editor = FunctionEditor::new(
&mut self.module.functions[idx],
FunctionID::new(idx),
&constants_ref,
&dynamic_constants_ref,
&types_ref,
&def_uses[idx],
);
write_predication(&mut editor);
self.module.constants = constants_ref.take();
self.module.dynamic_constants = dynamic_constants_ref.take();
self.module.types = types_ref.take();
self.module.functions[idx].delete_gravestones();
}
self.clear_analyses();
}
Pass::Predication => {
self.make_def_uses();
self.make_typing();
let def_uses = self.def_uses.as_ref().unwrap();
let typing = self.typing.as_ref().unwrap();
for idx in 0..self.module.functions.len() {
let constants_ref =
RefCell::new(std::mem::take(&mut self.module.constants));
let dynamic_constants_ref =
RefCell::new(std::mem::take(&mut self.module.dynamic_constants));
let types_ref = RefCell::new(std::mem::take(&mut self.module.types));
let mut editor = FunctionEditor::new(
&mut self.module.functions[idx],
FunctionID::new(idx),
&constants_ref,
&dynamic_constants_ref,
&types_ref,
&def_uses[idx],
);
predication(&mut editor, &typing[idx]);
self.module.constants = constants_ref.take();
self.module.dynamic_constants = dynamic_constants_ref.take();
self.module.types = types_ref.take();
self.module.functions[idx].delete_gravestones();
}
self.clear_analyses();
......@@ -1002,7 +1067,7 @@ impl PassManager {
.expect("PANIC: Unable to write output module file contents.");
}
}
println!("Ran pass: {:?}", pass);
eprintln!("Ran pass: {:?}", pass);
}
}
......
This diff is collapsed.
use std::collections::BTreeMap;
use hercules_ir::*;
use crate::*;
/*
* The SLF lattice tracks what sub-values of a collection are known. Each sub-
* value is a node ID at a set of indices that were written at. A write to a set
* of indices that structurally maps a previous sub-value removes the old sub-
* value, since that write may overwrite the old known sub-value. The lattice
* top corresponds to every value is 0. When the sub-values at a set of indices
* are not known, the `subvalues` map stores `None` for the known value. When a
* write involves array positions, remove sub-values that are clobbered and
* insert an indices set with an empty positions list and a `None` value.
*/
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct SLFLattice {
subvalues: BTreeMap<Box<[Index]>, Option<NodeID>>,
}
impl Semilattice for SLFLattice {
fn meet(a: &Self, b: &Self) -> Self {
// Merge the two maps. Find equal indices sets between `a` and `b` and
// keep their known sub-value if they're equivalent. All other indices
// sets in `a` or `b` map to `None`.
let mut ret = BTreeMap::new();
for (indices, a_subvalue) in &a.subvalues {
if let Some(b_subvalue) = b.subvalues.get(indices)
&& a_subvalue == b_subvalue
{
// If both maps have the same sub-value for this set of indices,
// add it unmolested to the meet lattice value.
ret.insert(indices.clone(), *a_subvalue);
} else {
// If not both maps have a write at the same set of indices or
// if the writes don't match, then we don't know what's been
// written there.
ret.insert(indices.clone(), None);
}
}
for (indices, _) in &b.subvalues {
// Any indices sets in `b` that aren't in `ret` are indices sets
// that aren't in `a`, so the sub-value isn't known.
ret.entry(indices.clone()).or_insert(None);
}
SLFLattice { subvalues: ret }
}
fn top() -> Self {
SLFLattice {
subvalues: BTreeMap::new(),
}
}
fn bottom() -> Self {
let mut subvalues = BTreeMap::new();
// The empty indices set overlaps with all possible indices sets.
subvalues.insert(Box::new([]) as Box<[Index]>, None);
SLFLattice { subvalues }
}
}
/*
* Top level function to run store-to-load forwarding on a function. Looks for
* known values inside collections and replaces reads of those values with the
* values directly.
*/
pub fn slf(editor: &mut FunctionEditor, reverse_postorder: &Vec<NodeID>, typing: &Vec<TypeID>) {
// First, run a dataflow analysis that looks at known values inside
// collections. Thanks to the value semantics of Hercules IR, this analysis
// is relatively simple and straightforward.
let func = editor.func();
let lattice = forward_dataflow(func, reverse_postorder, |inputs, id| {
match func.nodes[id.idx()] {
Node::Phi {
control: _,
data: _,
}
| Node::Reduce {
control: _,
init: _,
reduct: _,
}
| Node::Ternary {
op: TernaryOperator::Select,
first: _,
second: _,
third: _,
} => inputs.into_iter().fold(SLFLattice::top(), |acc, input| {
SLFLattice::meet(&acc, input)
}),
Node::Write {
collect: _,
data,
ref indices,
} => {
// Start with the indices of the `collect` input.
let mut value = inputs[0].clone();
// Any indices sets that overlap with `indices` become `None`,
// since we no longer know what's stored there.
for (other_indices, subvalue) in value.subvalues.iter_mut() {
if indices_may_overlap(other_indices, indices) {
*subvalue = None;
}
}
// Track `data` at `indices`.
value.subvalues.insert(indices.clone(), Some(data));
value
}
_ => SLFLattice::bottom(),
}
});
// Second, look for reads where the indices set either:
// 1. Equal the indices of a known sub-value. Then, the read can be replaced
// by the known sub-value.
// 2. Otherwise, if the indices set doesn't overlap with any known or
// unknown sub-value, then the read can be replaced by a zero constant.
// 3. Otherwise, the read can't be replaced.
// Keep track of which nodes we've already replaced, since a sub-value we
// knew previously may be the ID of an old node replaced previously.
let mut replacements = BTreeMap::new();
for id in editor.node_ids() {
let Node::Read {
collect,
ref indices,
} = editor.func().nodes[id.idx()]
else {
continue;
};
let subvalues = &lattice[collect.idx()].subvalues;
if let Some(sub_value) = subvalues.get(indices)
&& let Some(mut known) = *sub_value
{
while let Some(replacement) = replacements.get(&known) {
known = *replacement;
}
editor.edit(|mut edit| {
edit = edit.replace_all_uses(id, known)?;
edit.delete_node(id)
});
replacements.insert(id, known);
} else if !subvalues
.keys()
.any(|other_indices| indices_may_overlap(other_indices, indices))
{
editor.edit(|mut edit| {
let zero = edit.add_zero_constant(typing[id.idx()]);
let zero = edit.add_node(Node::Constant { id: zero });
edit = edit.replace_all_uses(id, zero)?;
edit.delete_node(id)
});
}
}
}
use std::iter::zip;
use hercules_ir::def_use::*;
use hercules_ir::ir::*;
......@@ -130,6 +132,24 @@ pub(crate) fn substitute_dynamic_constants(
dc_c
}
}
DynamicConstant::Min(left, right) => {
let new_left = substitute_dynamic_constants(dc_a, dc_b, left, edit);
let new_right = substitute_dynamic_constants(dc_a, dc_b, right, edit);
if new_left != left || new_right != right {
edit.add_dynamic_constant(DynamicConstant::Min(new_left, new_right))
} else {
dc_c
}
}
DynamicConstant::Max(left, right) => {
let new_left = substitute_dynamic_constants(dc_a, dc_b, left, edit);
let new_right = substitute_dynamic_constants(dc_a, dc_b, right, edit);
if new_left != left || new_right != right {
edit.add_dynamic_constant(DynamicConstant::Max(new_left, new_right))
} else {
dc_c
}
}
}
}
......@@ -223,7 +243,7 @@ pub(crate) fn substitute_dynamic_constants_in_node(
/*
* Top level function to make a function have only a single return.
*/
pub fn collapse_returns(editor: &mut FunctionEditor) -> Option<NodeID> {
pub(crate) fn collapse_returns(editor: &mut FunctionEditor) -> Option<NodeID> {
let returns: Vec<NodeID> = (0..editor.func().nodes.len())
.filter(|idx| editor.func().nodes[*idx].is_return())
.map(NodeID::new)
......@@ -263,7 +283,7 @@ pub fn collapse_returns(editor: &mut FunctionEditor) -> Option<NodeID> {
new_return
}
pub fn contains_between_control_flow(func: &Function) -> bool {
pub(crate) fn contains_between_control_flow(func: &Function) -> bool {
let num_control = func.nodes.iter().filter(|node| node.is_control()).count();
assert!(num_control >= 2, "PANIC: A Hercules function must have at least two control nodes: a start node and at least one return node.");
num_control > 2
......@@ -273,7 +293,7 @@ pub fn contains_between_control_flow(func: &Function) -> bool {
* Top level function to ensure a Hercules function contains at least one
* control node that isn't the start or return nodes.
*/
pub fn ensure_between_control_flow(editor: &mut FunctionEditor) -> Option<NodeID> {
pub(crate) fn ensure_between_control_flow(editor: &mut FunctionEditor) -> Option<NodeID> {
if !contains_between_control_flow(editor.func()) {
let ret = editor
.node_ids()
......@@ -308,3 +328,51 @@ pub fn ensure_between_control_flow(editor: &mut FunctionEditor) -> Option<NodeID
)
}
}
/*
* Helper function to tell if two lists of indices have the same structure.
*/
pub(crate) fn indices_structurally_equivalent(indices1: &[Index], indices2: &[Index]) -> bool {
if indices1.len() == indices2.len() {
let mut equiv = true;
for pair in zip(indices1, indices2) {
equiv = equiv
&& match pair {
(Index::Field(idx1), Index::Field(idx2)) => idx1 == idx2,
(Index::Variant(idx1), Index::Variant(idx2)) => idx1 == idx2,
(Index::Position(ref pos1), Index::Position(ref pos2)) => {
assert_eq!(pos1.len(), pos2.len());
true
}
_ => false,
};
}
equiv
} else {
false
}
}
/*
* Helper function to determine if two lists of indices may overlap.
*/
pub(crate) fn indices_may_overlap(indices1: &[Index], indices2: &[Index]) -> bool {
for pair in zip(indices1, indices2) {
match pair {
// Check that the field numbers are the same.
(Index::Field(idx1), Index::Field(idx2)) => {
if idx1 != idx2 {
return false;
}
}
// Variant indices always may overlap, since it's the same
// underlying memory. Position indices always may overlap, since the
// indexing nodes may be the same at runtime.
(Index::Variant(_), Index::Variant(_)) | (Index::Position(_), Index::Position(_)) => {}
_ => panic!(),
}
}
// `zip` will exit as soon as either iterator is done - two sets of indices
// may overlap when one indexes a larger sub-value than the other.
true
}
......@@ -4,5 +4,8 @@ version = "0.1.0"
authors = ["Russel Arbore <rarbore2@illinois.edu>"]
edition = "2021"
[features]
cuda = []
[dependencies]
use std::env::var;
use std::path::Path;
use std::process::Command;
fn main() {
if cfg!(feature = "cuda") {
let out_dir = var("OUT_DIR").unwrap();
Command::new("nvcc")
.args(&["src/rtdefs.cu", "-c", "-o"])
.arg(&format!("{}/rtdefs.o", out_dir))
.status()
.expect("PANIC: NVCC failed when building runtime. Is NVCC installed?");
Command::new("ar")
.args(&["crus", "librtdefs.a", "rtdefs.o"])
.current_dir(&Path::new(&out_dir))
.status()
.unwrap();
println!("cargo::rustc-link-search=native={}", out_dir);
println!("cargo::rustc-link-search=native=/usr/lib/x86_64-linux-gnu/");
println!("cargo::rustc-link-lib=static=rtdefs");
println!("cargo::rustc-link-lib=cudart");
println!("cargo::rerun-if-changed=src/rtdefs.cu");
}
}
......@@ -4,6 +4,16 @@ use std::mem::swap;
use std::ptr::{copy_nonoverlapping, NonNull};
use std::slice::from_raw_parts;
#[cfg(feature = "cuda")]
extern "C" {
fn cuda_alloc(size: usize) -> *mut u8;
fn cuda_alloc_zeroed(size: usize) -> *mut u8;
fn cuda_dealloc(ptr: *mut u8);
fn copy_cpu_to_cuda(dst: *mut u8, src: *mut u8, size: usize);
fn copy_cuda_to_cpu(dst: *mut u8, src: *mut u8, size: usize);
fn copy_cuda_to_cuda(dst: *mut u8, src: *mut u8, size: usize);
}
/*
* An in-memory collection object that can be used by functions compiled by the
* Hercules compiler.
......@@ -13,16 +23,23 @@ pub struct HerculesBox<'a> {
cpu_exclusive: Option<NonNull<u8>>,
cpu_owned: Option<NonNull<u8>>,
#[cfg(feature = "cuda")]
cuda_owned: Option<NonNull<u8>>,
size: usize,
_phantom: PhantomData<&'a u8>,
}
impl<'a> HerculesBox<'a> {
impl<'b, 'a: 'b> HerculesBox<'a> {
pub fn from_slice<T>(slice: &'a [T]) -> Self {
HerculesBox {
cpu_shared: Some(unsafe { NonNull::new_unchecked(slice.as_ptr() as *mut u8) }),
cpu_exclusive: None,
cpu_owned: None,
#[cfg(feature = "cuda")]
cuda_owned: None,
size: slice.len() * size_of::<T>(),
_phantom: PhantomData,
}
......@@ -33,36 +50,69 @@ impl<'a> HerculesBox<'a> {
cpu_shared: None,
cpu_exclusive: Some(unsafe { NonNull::new_unchecked(slice.as_mut_ptr() as *mut u8) }),
cpu_owned: None,
#[cfg(feature = "cuda")]
cuda_owned: None,
size: slice.len() * size_of::<T>(),
_phantom: PhantomData,
}
}
pub fn as_slice<T>(&'a self) -> &'a [T] {
pub fn as_slice<T>(&'b mut self) -> &'b [T] {
assert_eq!(self.size % size_of::<T>(), 0);
unsafe { from_raw_parts(self.__cpu_ptr() as *const T, self.size / size_of::<T>()) }
}
unsafe fn into_cpu(&self) -> NonNull<u8> {
self.cpu_shared
.or(self.cpu_exclusive)
.or(self.cpu_owned)
.unwrap()
unsafe fn get_cpu_ptr(&self) -> Option<NonNull<u8>> {
self.cpu_owned.or(self.cpu_exclusive).or(self.cpu_shared)
}
#[cfg(feature = "cuda")]
unsafe fn get_cuda_ptr(&self) -> Option<NonNull<u8>> {
self.cuda_owned
}
unsafe fn into_cpu_mut(&mut self) -> NonNull<u8> {
if let Some(ptr) = self.cpu_exclusive.or(self.cpu_owned) {
unsafe fn allocate_cpu(&mut self) -> NonNull<u8> {
if let Some(ptr) = self.cpu_owned {
ptr
} else {
let ptr =
NonNull::new(alloc(Layout::from_size_align_unchecked(self.size, 16))).unwrap();
copy_nonoverlapping(self.cpu_shared.unwrap().as_ptr(), ptr.as_ptr(), self.size);
self.cpu_owned = Some(ptr);
self.cpu_shared = None;
ptr
}
}
#[cfg(feature = "cuda")]
unsafe fn allocate_cuda(&mut self) -> NonNull<u8> {
if let Some(ptr) = self.cuda_owned {
ptr
} else {
let ptr = cuda_alloc(self.size);
self.cuda_owned = Some(NonNull::new(ptr).unwrap());
self.cuda_owned.unwrap()
}
}
unsafe fn deallocate_cpu(&mut self) {
if let Some(ptr) = self.cpu_owned {
dealloc(
ptr.as_ptr(),
Layout::from_size_align_unchecked(self.size, 16),
);
self.cpu_owned = None;
}
}
#[cfg(feature = "cuda")]
unsafe fn deallocate_cuda(&mut self) {
if let Some(ptr) = self.cuda_owned {
cuda_dealloc(ptr.as_ptr());
self.cuda_owned = None;
}
}
pub unsafe fn __zeros(size: u64) -> Self {
assert_ne!(size, 0);
let size = size as usize;
......@@ -72,6 +122,10 @@ impl<'a> HerculesBox<'a> {
cpu_owned: Some(
NonNull::new(alloc_zeroed(Layout::from_size_align_unchecked(size, 16))).unwrap(),
),
#[cfg(feature = "cuda")]
cuda_owned: None,
size: size,
_phantom: PhantomData,
}
......@@ -82,6 +136,10 @@ impl<'a> HerculesBox<'a> {
cpu_shared: None,
cpu_exclusive: None,
cpu_owned: None,
#[cfg(feature = "cuda")]
cuda_owned: None,
size: 0,
_phantom: PhantomData,
}
......@@ -93,24 +151,61 @@ impl<'a> HerculesBox<'a> {
ret
}
pub unsafe fn __cpu_ptr(&self) -> *mut u8 {
self.into_cpu().as_ptr()
pub unsafe fn __cpu_ptr(&mut self) -> *mut u8 {
if let Some(ptr) = self.get_cpu_ptr() {
return ptr.as_ptr();
}
#[cfg(feature = "cuda")]
{
let cuda_ptr = self.get_cuda_ptr().unwrap();
let cpu_ptr = self.allocate_cpu();
copy_cuda_to_cpu(cpu_ptr.as_ptr(), cuda_ptr.as_ptr(), self.size);
return cpu_ptr.as_ptr();
}
panic!()
}
pub unsafe fn __cpu_ptr_mut(&mut self) -> *mut u8 {
self.into_cpu_mut().as_ptr()
let cpu_ptr = self.__cpu_ptr();
if Some(cpu_ptr) == self.cpu_shared.map(|nn| nn.as_ptr()) {
self.allocate_cpu();
copy_nonoverlapping(cpu_ptr, self.cpu_owned.unwrap().as_ptr(), self.size);
}
self.cpu_shared = None;
self.cpu_exclusive = None;
#[cfg(feature = "cuda")]
self.deallocate_cuda();
cpu_ptr
}
#[cfg(feature = "cuda")]
pub unsafe fn __cuda_ptr(&mut self) -> *mut u8 {
if let Some(ptr) = self.get_cuda_ptr() {
ptr.as_ptr()
} else {
let cpu_ptr = self.get_cpu_ptr().unwrap();
let cuda_ptr = self.allocate_cuda();
copy_cpu_to_cuda(cuda_ptr.as_ptr(), cpu_ptr.as_ptr(), self.size);
cuda_ptr.as_ptr()
}
}
#[cfg(feature = "cuda")]
pub unsafe fn __cuda_ptr_mut(&mut self) -> *mut u8 {
let cuda_ptr = self.__cuda_ptr();
self.cpu_shared = None;
self.cpu_exclusive = None;
self.deallocate_cpu();
cuda_ptr
}
}
impl<'a> Drop for HerculesBox<'a> {
fn drop(&mut self) {
if let Some(ptr) = self.cpu_owned {
unsafe {
dealloc(
ptr.as_ptr(),
Layout::from_size_align_unchecked(self.size, 16),
)
}
unsafe {
self.deallocate_cpu();
#[cfg(feature = "cuda")]
self.deallocate_cuda();
}
}
}