From 6997adf76342b7a6fe03c4bc370ce5fc5082a869 Mon Sep 17 00:00:00 2001 From: Graydon Hoare Date: Fri, 13 May 2011 18:38:28 -0700 Subject: [PATCH] Remove rustboot from the repository. --- configure | 6 - mk/boot.mk | 98 - mk/clean.mk | 1 - mk/platform.mk | 6 - src/README | 11 +- src/boot/README | 405 --- src/boot/be/abi.ml | 253 -- src/boot/be/asm.ml | 831 ----- src/boot/be/elf.ml | 1784 ---------- src/boot/be/il.ml | 937 ------ src/boot/be/macho.ml | 1194 ------- src/boot/be/pe.ml | 1175 ------- src/boot/be/ra.ml | 688 ---- src/boot/be/x86.ml | 2582 --------------- src/boot/driver/glue.ml | 16 - src/boot/driver/lib.ml | 464 --- src/boot/driver/main.ml | 530 --- src/boot/driver/session.ml | 149 - src/boot/fe/ast.ml | 1795 ---------- src/boot/fe/cexp.ml | 771 ----- src/boot/fe/extfmt.ml | 229 -- src/boot/fe/fuzz.ml | 166 - src/boot/fe/item.ml | 1334 -------- src/boot/fe/lexer.mll | 478 --- src/boot/fe/parser.ml | 372 --- src/boot/fe/pexp.ml | 1441 -------- src/boot/fe/token.ml | 334 -- src/boot/me/alias.ml | 156 - src/boot/me/dead.ml | 134 - src/boot/me/dwarf.ml | 3213 ------------------ src/boot/me/layer.ml | 108 - src/boot/me/layout.ml | 480 --- src/boot/me/loop.ml | 164 - src/boot/me/resolve.ml | 935 ------ src/boot/me/semant.ml | 2802 ---------------- src/boot/me/simplify.ml | 109 - src/boot/me/trans.ml | 6367 ------------------------------------ src/boot/me/transutil.ml | 251 -- src/boot/me/type.ml | 1453 -------- src/boot/me/typestate.ml | 1537 --------- src/boot/me/walk.ml | 729 ----- src/boot/util/bits.ml | 116 - src/boot/util/common.ml | 823 ----- src/boot/util/fmt.ml | 86 - src/comp/README | 41 - src/comp/front/parser.rs | 4 +- 46 files changed, 2 insertions(+), 37556 deletions(-) delete mode 100644 mk/boot.mk delete mode 100644 src/boot/README delete mode 100644 src/boot/be/abi.ml delete mode 100644 src/boot/be/asm.ml delete mode 100644 src/boot/be/elf.ml delete mode 100644 src/boot/be/il.ml delete mode 100644 src/boot/be/macho.ml delete mode 100644 src/boot/be/pe.ml delete mode 100644 src/boot/be/ra.ml delete mode 100644 src/boot/be/x86.ml delete mode 100644 src/boot/driver/glue.ml delete mode 100644 src/boot/driver/lib.ml delete mode 100644 src/boot/driver/main.ml delete mode 100644 src/boot/driver/session.ml delete mode 100644 src/boot/fe/ast.ml delete mode 100644 src/boot/fe/cexp.ml delete mode 100644 src/boot/fe/extfmt.ml delete mode 100644 src/boot/fe/fuzz.ml delete mode 100644 src/boot/fe/item.ml delete mode 100644 src/boot/fe/lexer.mll delete mode 100644 src/boot/fe/parser.ml delete mode 100644 src/boot/fe/pexp.ml delete mode 100644 src/boot/fe/token.ml delete mode 100644 src/boot/me/alias.ml delete mode 100644 src/boot/me/dead.ml delete mode 100644 src/boot/me/dwarf.ml delete mode 100644 src/boot/me/layer.ml delete mode 100644 src/boot/me/layout.ml delete mode 100644 src/boot/me/loop.ml delete mode 100644 src/boot/me/resolve.ml delete mode 100644 src/boot/me/semant.ml delete mode 100644 src/boot/me/simplify.ml delete mode 100644 src/boot/me/trans.ml delete mode 100644 src/boot/me/transutil.ml delete mode 100644 src/boot/me/type.ml delete mode 100644 src/boot/me/typestate.ml delete mode 100644 src/boot/me/walk.ml delete mode 100644 src/boot/util/bits.ml delete mode 100644 src/boot/util/common.ml delete mode 100644 src/boot/util/fmt.ml diff --git a/configure b/configure index e83787b5198..00827e6238d 100755 --- a/configure +++ b/configure @@ -176,7 +176,6 @@ fi step_msg "making directories" for i in \ doc \ - boot/fe boot/me boot/be boot/driver boot/util \ rt rt/isaac rt/bigint rt/sync rt/test \ rustllvm \ dl stage0 stage1 stage2 stage3 \ @@ -203,11 +202,6 @@ probe CFG_CLANG clang++ probe CFG_GCC gcc probe CFG_LLVM_CONFIG llvm-config probe CFG_VALGRIND valgrind -probe CFG_OCAMLC ocamlc -probe CFG_OCAMLOPT ocamlopt -probe CFG_OCAMLC_OPT ocamlc.opt -probe CFG_OCAMLOPT_OPT ocamlopt.opt -probe CFG_FLEXLINK flexlink probe CFG_MAKEINFO makeinfo probe CFG_TEXI2PDF texi2pdf probe CFG_TEX tex diff --git a/mk/boot.mk b/mk/boot.mk deleted file mode 100644 index a6ab5ccca59..00000000000 --- a/mk/boot.mk +++ /dev/null @@ -1,98 +0,0 @@ -###################################################################### -# Bootstrap compiler variables and rules -###################################################################### - -ifdef CFG_BOOT_PROFILE - $(info cfg: forcing native bootstrap compiler (CFG_BOOT_PROFILE)) - CFG_BOOT_NATIVE := 1 - CFG_OCAMLOPT_PROFILE_FLAGS := -p -endif - -ifdef CFG_BOOT_DEBUG - $(info cfg: forcing bytecode bootstrap compiler (CFG_BOOT_DEBUG)) - CFG_BOOT_NATIVE := -endif - -ifdef CFG_BOOT_NATIVE - $(info cfg: building native bootstrap compiler) -else - $(info cfg: building bytecode bootstrap compiler) -endif - -GENERATED := boot/fe/lexer.ml boot/version.ml - - -# We must list them in link order. -# Nobody calculates the link-order DAG automatically, sadly. - -BOOT_MLS := \ - $(addsuffix .ml, \ - boot/version \ - $(addprefix boot/util/, fmt common bits) \ - $(addprefix boot/driver/, session) \ - $(addprefix boot/fe/, ast token lexer parser \ - extfmt pexp item cexp fuzz) \ - $(addprefix boot/be/, asm il abi) \ - $(addprefix boot/me/, walk semant resolve alias \ - simplify type dead layer typestate \ - loop layout transutil trans dwarf) \ - $(addprefix boot/be/, x86 ra pe elf macho) \ - $(addprefix boot/driver/, lib glue main)) \ - -BOOT_CMOS := $(BOOT_MLS:.ml=.cmo) -BOOT_CMXS := $(BOOT_MLS:.ml=.cmx) -BOOT_OBJS := $(BOOT_MLS:.ml=.o) -BOOT_CMIS := $(BOOT_MLS:.ml=.cmi) - -BS := $(S)src/boot - -BOOT_ML_DEP_INCS := -I $(BS)/fe -I $(BS)/me \ - -I $(BS)/be -I $(BS)/driver \ - -I $(BS)/util -I boot - -BOOT_ML_INCS := -I boot/fe -I boot/me \ - -I boot/be -I boot/driver \ - -I boot/util -I boot - -BOOT_ML_LIBS := unix.cma nums.cma bigarray.cma -BOOT_ML_NATIVE_LIBS := unix.cmxa nums.cmxa bigarray.cmxa -BOOT_OCAMLC_FLAGS := -g $(BOOT_ML_INCS) -w Ael -warn-error Ael -BOOT_OCAMLOPT_FLAGS := -g $(BOOT_ML_INCS) -w Ael -warn-error Ael - -ifdef CFG_FLEXLINK - BOOT_OCAMLOPT_FLAGS += -cclib -L/usr/lib -endif - -BOOT := $(Q)OCAMLRUNPARAM="b1" boot/rustboot$(X) $(CFG_BOOT_FLAGS) -L stage0 - - -ifdef CFG_BOOT_NATIVE -boot/rustboot$(X): $(BOOT_CMXS) $(MKFILES) - @$(call E, link: $@) - $(Q)ocamlopt$(OPT) -o $@ $(BOOT_OCAMLOPT_FLAGS) $(BOOT_ML_NATIVE_LIBS) \ - $(BOOT_CMXS) -else -boot/rustboot$(X): $(BOOT_CMOS) $(MKFILES) - @$(call E, link: $@) - $(Q)ocamlc$(OPT) -o $@ $(BOOT_OCAMLC_FLAGS) $(BOOT_ML_LIBS) $(BOOT_CMOS) -endif - -boot/version.ml: $(MKFILES) - @$(call E, git: $@) - $(Q)(cd $(S) && git log -1 \ - --pretty=format:'let version = "prerelease (%h %ci)";;') >$@ || exit 1 - -%.cmo: %.ml $(MKFILES) - @$(call E, compile: $@) - $(Q)ocamlc$(OPT) -c -o $@ $(BOOT_OCAMLC_FLAGS) $< - -%.cmo: %.cmi $(MKFILES) - -%.cmx %.o: %.ml $(MKFILES) - @$(call E, compile: $@) - $(Q)ocamlopt$(OPT) -c -o $@ $(BOOT_OCAMLOPT_FLAGS) $< - -%.ml: %.mll $(MKFILES) - @$(call E, lex-gen: $@) - $(Q)ocamllex$(OPT) -q -o $@ $< - diff --git a/mk/clean.mk b/mk/clean.mk index 938bc79eb3c..3f19953929e 100644 --- a/mk/clean.mk +++ b/mk/clean.mk @@ -17,7 +17,6 @@ clean: @$(call E, cleaning) $(Q)rm -f $(RUNTIME_OBJS) $(RUNTIME_DEF) $(Q)rm -f $(RUSTLLVM_LIB_OBJS) $(RUSTLLVM_OBJS_OBJS) $(RUSTLLVM_DEF) - $(Q)rm -f $(BOOT_CMOS) $(BOOT_CMIS) $(BOOT_CMXS) $(BOOT_OBJS) $(Q)rm -f $(ML_DEPFILES) $(C_DEPFILES) $(CRATE_DEPFILES) $(Q)rm -f $(ML_DEPFILES:%.d=%.d.tmp) $(Q)rm -f $(C_DEPFILES:%.d=%.d.tmp) diff --git a/mk/platform.mk b/mk/platform.mk index 0b01f17a8c4..cbab19d3d6d 100644 --- a/mk/platform.mk +++ b/mk/platform.mk @@ -87,9 +87,6 @@ ifdef CFG_WINDOWSY CFG_PATH_MUNGE := $(strip perl -i.bak -p \ -e 's@\\(\S)@/\1@go;' \ -e 's@^/([a-zA-Z])/@\1:/@o;') - ifdef CFG_FLEXLINK - CFG_BOOT_NATIVE := 1 - endif CFG_GCCISH_CFLAGS += -march=i686 -O2 CFG_GCCISH_LINK_FLAGS += -shared -fPIC CFG_DEF_SUFFIX := .def @@ -106,8 +103,6 @@ ifdef CFG_UNIXY $(CFG_LDENV)=$(call CFG_TESTLIB,$(1)):$(CFG_LDPATH) \ $(CFG_VALGRIND) $(1) - CFG_BOOT_NATIVE := 1 - ifdef MINGW_CROSS CFG_EXE_SUFFIX := .exe CFG_LIB_NAME=$(1).dll @@ -118,7 +113,6 @@ ifdef CFG_UNIXY CFG_INFO := $(info cfg: mingw-cross) CFG_GCCISH_CROSS := i586-mingw32msvc- - CFG_BOOT_FLAGS += -t win32-x86-pe ifdef CFG_VALGRIND CFG_VALGRIND += wine endif diff --git a/src/README b/src/README index f3ce4585736..4a843a64a09 100644 --- a/src/README +++ b/src/README @@ -2,16 +2,7 @@ This is preliminary version of the Rust compiler(s). Source layout: -boot/ The bootstrap compiler -boot/README - More-detailed guide to it. -boot/fe - Front end (lexer, parser, AST) -boot/me - Middle end (resolve, check, layout, trans) -boot/be - Back end (IL, RA, insns, asm, objfiles) -boot/util - Ubiquitous helpers -boot/driver - Compiler driver - -comp/ The self-hosted compiler ("rustc": incomplete) -comp/* - Similar structure as in boot/ +comp/ The self-hosted compiler rt/ The runtime system rt/rust_*.cpp - The majority of the runtime services diff --git a/src/boot/README b/src/boot/README deleted file mode 100644 index e17bfd791ac..00000000000 --- a/src/boot/README +++ /dev/null @@ -1,405 +0,0 @@ -An informal guide to reading and working on the rustboot compiler. -================================================================== - -First off, know that our current state of development is "bootstrapping"; -this means we've got two compilers on the go and one of them is being used -to develop the other. Rustboot is written in ocaml and rustc in rust. The -one you *probably* ought to be working on at present is rustc. Rustboot is -more for historical comparison and bug-fixing whenever necessary to un-block -development of rustc. - -There's a document similar to this next door, then, in comp/README. The -comp directory is where we do work on rustc. - -If you wish to expand on this document, or have one of the -slightly-more-familiar authors add anything else to it, please get in touch or -file a bug. Your concerns are probably the same as someone else's. - - - -High-level concepts, invariants, 30,000-ft view -=============================================== - -Rustboot has 3 main subdirectories: fe, me, and be (front, mid, back -end). Helper modules and ubiquitous types are found in util/. - -The entry-point for the compiler is driver/main.ml, and this file sequences -the various parts together. - - -The 4 central data structures: ------------------------------- - -#1: fe/ast.ml defines the AST. The AST is treated as immutable after parsing - despite containing some mutable types (hashtbl and such). Many -- though - not all -- nodes within this data structure are wrapped in the type 'a - identified. This is important. An "identified" AST node is one that the - parser has marked with a unique node_id value. This node_id is used both - to denote a source location and, more importantly, to key into a large - number of tables later in the compiler. Most additional calculated - properties of a program that the compiler derives are keyed to the node_id - of an identified node. - - The types 'a identified, node_id and such are in util/common.ml - - -#2: me/semant.ml defines the Semant.ctxt structure. This is a record of - tables, almost all of which are keyed by node_id. See previous comment - regrding node_id. The Semant module is open in most of the modules within - the me/ directory, and they all refer liberally to the ctxt tables, either - directly or via helper functions in semant. Semant also defines the - mid-end pass-management logic, lookup routines, type folds, and a variety - of other miscallaneous semantic-analysis helpers. - - -#3: be/il.ml defines the IL. This is a small, typed IL based on a type system - that is relatively LLVM-ish, and a control-flow system that is *not* - expression/SSA based like LLVM. It's much dumber than that. The root of - the interesting types in this file is the type 'emitter', which is a - growable buffer along with a few counters. An emitter is essentially a - buffer of quads. A quad, in turn, is a primitive virtual instruction - ('quad' because it is in its limit a 3-address machine, plus opcode) which - we then ... tend to turn directly into x86 anyways. Sorry; it wasn't clear - during initial construction that we'd wind up stopping at x86, so the IL - is probably superfluous, but there it is. - - The IL types are operand = cell | immediate, and cell = reg | mem. Plus a - certain quantity of special-casing and noise for constant-pointer - propagation and addressing modes and whatnot. - - -#4: be/asm.ml defines the Asm.frag type, which is a "chunk of binary-ish - stuff" to put in an output file. Words, bytes, lazily-resolved fixups, - constant expressions, 0-terminated strings, alignment boundaries, etc. You - will hopefully not need to produce a lot of this yourself; most of this is - already being emitted. - - An important type that gets resolved here is fixup, from util/common.ml. - Fixups are things you can wrap around a frag using an Asm.DEF frag, which - get their size and position (both in-file and in-memory) calculated at - asm-time; but you can refer to them before they're resolved. So any time - the compiler needs to refer to "the place / size this thingy will be, when - it finally gets boiled down to frags and emitted" we generate a fixup and - use that. Functions and static data structures, for example, tend to get - fixups assigned to them early on in the middle-end of the compiler. - - -Control and information flow within the compiler: -------------------------------------------------- - -- driver/main.ml assumes control on startup. Options are parsed, platform is - detected, etc. - - -- fe/lexer.ml does lexing in any case; fe/parser.ml holds the fundamental - parser-state and parser-combinator functions. Parsing rules are split - between 3 files: fe/cexp.ml, fe/pexp.ml, and fe/item.ml. This split - represents the general structure of the grammar(s): - - - The outermost grammar is called "cexp" (crate expression), and is an - expression language that describes the crate directives found in crate - files. It's evaluated inside the compiler. - - - The next grammar is "item", which is a statement language that describes - the directives, declarations and statements found in source files. If - you compile a naked source file, you jump straight to item and then - synthesize a simple crate structure around the result. - - - The innermost grammar is "pexp" (parsed expression), and is an - expression language used for the shared expression grammar within both - cexp and item. Pexps within cexp are evaluated in the compiler - (non-constant, complex cexps are errors) whereas pexps within items are - desugared to statements and primitive expressions. - - - The AST is the output from the item grammar. Pexp and cexp do not escape - the front-end. - - -- driver/main.ml then builds a Semant.ctxt and threads it through the various - middle-end passes. Each pass defines one or more visitors, which is an FRU - copy of the empty_visitor in me/walk.ml. Each visitor performs a particular - task, encapsulates some local state in local variables, and leaves its - results in a table. If the table it's calculating is pass-local, it will be - a local binding within the pass; if it's to be shared with later passes, it - will be a table in Semant.ctxt. Pass order is therefore somewhat important, - so I'll describe it here: - - - me/resolve.ml looks up names and connects them to definitions. This - includes expanding all types (as types can occur within names, as part - of a parametric name) and performing all import/export/visibility - judgments. After resolve, we should not be doing any further name-based - lookups (with one exception: typestate does some more name - lookup. Subtle reason, will return to it). - - Resolve populates several of the tables near the top of Semant.ctxt: - - ctxt_all_cast_types - ctxt_all_defns - ctxt_all_item_names - ctxt_all_item_types - ctxt_all_lvals - ctxt_all_stmts - ctxt_all_type_items - ctxt_block_items - ctxt_block_slots - ctxt_frame_args - ctxt_lval_to_referent - ctxt_node_referenced - ctxt_required_items - ctxt_slot_is_arg - ctxt_slot_keys - - The most obviously critical of these are lval_to_referent and all_defns, - which connect subsequent visitors from a reference node to its referent - node, and catalogue all the possible things a referent may be. - - Part of resolving that is perhaps not obvious is the task of resolving - and normalizing recursive types. This is what TY_iso is for. Recursive - types in rust have to pass through a tag type on their recursive edge; - TY_iso is an iso-recursive group of tags that refer only to one another; - within a TY_iso, the type term "TY_idx n" refers to "the nth member of - the current TY_iso". Resolve is responsible for finding such groups and - tying them into such closed-form knots. - - TY_name should be completely eliminated in any of the types exiting - resolve. - - - - me/type.ml is a unification-based typechecker and inference engine. This - is as textbook-y as we could make it. It rewrites "auto" slots in the - ctxt_all_defns table when it completes (these are the slots with None as - their Ast.slot_ty). - - This file is organized around tyspecs and tyvars. A tyspec is a - constraint on an unknown type that is implied by its use; tyspecs are - generated during the AST-walk, placed in ref cells (tyvars), and the - cells are and unified with one another. If two tyvars unify, then a new - constraint is created with the tighter of the two and the two previous - tyvars are updated to point to the unified spec. Ideally all constraints - eventually run into a source of a concrete type (or a type otherwise - uniquely-determined by its tyspecs). If not, the type is underdetermined - and we get a type error. Similarly if two tyvars that are supposed to - unify clash in some way (integer unify-with string, say) then there is - also a type error. - - - - me/typestate.ml is a dataflow-based typestate checker. It is responsible - for ensuring all preconditions are met, including init-before-use. It - also determines slot lifecycle boundaries, and populates the context - tables: - - ctxt_constr_ids - ctxt_constrs - ctxt_copy_stmt_is_init - ctxt_post_stmt_slot_drops - ctxt_postconditions - ctxt_poststates - ctxt_preconditions - ctxt_prestates - - It is organized around constr_keys, a bunch of bitsets, and a CFG. - - A constr_key is a normalized value representing a single constraint that - we wish to be able to refer to within a typestate. Every constr_key gets - a bit number assigned to it. A condition (and a typestate) is a - bit-vector, in which the set bits indicate the constr_keys (indexed by - associatd number) that hold in the condition/typestate. - - There are 4 such bitsets generated for each node in the CFG: - precondition/postcondition and prestate/poststate. The visitors here - figure out all the constr_keys we'll need, then assign all the pre/post - conditions, generate the CFG, calculate the typestates from the CFG, and - check that every typestate satisfies its precondition. - - (Due to the peculiarity that types are pure terms and are not 'a - identified in our AST, we have to do some name-lookup in here as well - when normalizing the const_keys). - - - - Effect is relatively simple: it calculates the effect of each type and - item, and checks that they either match their declarations or are - authorized to be lying. - - - - Loop is even simpler: it calculates loop-depth information for later use - generating foreach loops. It populates the context tables: - - ctxt_block_is_loop_body - ctxt_slot_loop_depths - ctxt_stmt_loop_depths - - - - Alias checks slot-aliasing to ensure none of the rules are broken about - simultaneous aliases and such. It also populates the table - ctxt_slot_is_aliased. - - - - Layout determines the layout of frames, arguments, objects, closures and - such. This includes deciding which slot should go in a vreg and - generating fixups for all frame-spill regions. It populates the context - tables: - - ctxt_block_is_loop_body - ctxt_call_sizes - ctxt_frame_blocks - ctxt_frame_sizes - ctxt_slot_is_obj_state - ctxt_slot_offsets - ctxt_slot_vregs - ctxt_spill_fixups - - There is a useful chunk of ASCII-art in the leading comment of layout, - if you want to see how a frame goes together, I recommend reading it. - - - - Trans is the big one. This is the "translate AST to IL" pass, and it's a - bit of a dumping ground, sadly. Probably 4x the size of any other - pass. Stuff that is common to the x86 and LLVM backends is factored out - into transutil.ml, but it hardly helps. Suggestions welcome for - splitting it further. - - Trans works *imperatively*. It maintains a stack of emitters, one per - function (or helper-function) and emits Il.quads into the top-of-stack - emitter into while it walks the statements of each function. If at any - point it needs to pause to emit a helper function ("glue function") it - pushes a new emitter onto the stack and emits into that. - - Trans populates the context tables: - - ctxt_all_item_code - ctxt_block_fixups - ctxt_data - ctxt_file_code - ctxt_file_fixups - ctxt_fn_fixups - ctxt_glue_code - - The entries in the tables ending in _code are of type Semant.code, which - is an abstract type covering both function and glue-function code; each - holds an executable block of quads, plus an aggregate count of vregs and - a reference to the spill fixup for that code. - - -- Once it completes trans, driver/main.ml does the "finishing touches": - register allocates each emitted code value (be/ra.ml), emits dwarf for the - crate (me/dwarf.ml), selects instructions (be/x86.ml), then selects one of - the object-file backends (be/elf.ml, be/macho.ml or be/pe.ml) and emits the - selected Asm.frag to it. Hopefully little of this will require further work; - the most incomplete module here is probably dwarf.ml but the remainder are - mostly stable and don't tend to change much, aside from picking bugs out of - them. - - - -Details and curiosities to note along the way: -============================================== - -- Where you might expect there to be a general recursive expression type for - 'expr', you'll find only a very limited non-recursive 3-way switch: binary, - unary, or atom; where atom is either a literal or an lval. This is because - all the "big" expressions (pexps) were boiled off during the desugaring - phase in the frontend. - - -- There are multiple ways to refer to a path. Names, lvals and cargs all - appear to have similar structure (and do). They're all subsets of the - general path grammar, so all follow the rough shape of being either a base - anchor-path or an ext (extension) path with structural recursion to the - left. - - Cargs (constraint arguments) are the sort of paths that can be passed to - constraints in the typestate system, and can contain the special symbol "*" - in the grammar, meaning "thing I am attached to". This is the symbol - BASE_formal in the carg_base type. - - Names are the sort of paths that refer to types or other items. Not slots. - - Lvals are the sort of paths that *might* refer to slots, but we don't - generally know. So they can contain the dynamic-indexing component - COMP_atom. For example, x.(1 + 2).y is an lval. - - -- Only one of these forms is 'a identified: an lval. And moreover, only the - lval *base* is identified; the remainder of the path has to be projected - forward through the referent after lookup. This also means that when you - lookup anything else by name, you have to be using the result immediately, - not storing it in a table for later. - - -- Types are not 'a identified. This means that you (generally) cannot refer to - a *particular* occurrence of a type in the AST and associate information - with it. Instead, we treat types as "pure terms" (not carrying identity) and - calculate properties of them on the fly. For this we use a general fold - defined in me/semant.ml, the family of functions held in a ty_fold - structure, and passed to fold_ty. - - -- There is a possibly-surprising type called "size" in util/common. This is a - type representing a "size expression" that may depend on runtime - information, such as the type descriptors passed to a frame at runtime. This - exists because our type-parameterization scheme is, at the moment, - implemented by passing type descriptors around at runtime, not - code-expansion a la C++ templates. So any time we have a translated indexing - operation or such that depends on a type parameter, we wind up with a size - expression including SIZE_param_size or SIZE_param_align, and have to do - size arithmetic at runtime. Upstream of trans, we generate sizes willy-nilly - and then decide in trans, x86, and dwarf whether they can be emitted - statically or via runtime calculation at the point of use. - - -- Trans generates position-independent code (PIC). This means that it never - refers to the exact position of a fixup in memory at load-time, always the - distance-to-a-fixup from some other fixup, and/or current PC. On x86 this - means we wind up copying the "get next pc thunk" trick used on linux - systems, and/or storing "crate relative" addresses. The runtime and compiler - "know" (unfortunately sometimes quite obscurely) that an immediate pointer - should be encoded as relative-to a given displacement base, and work with - those as necessary. Similarly, they emit code to reify pointer immediates - (add the displacements to displacement-bases) before handing them off to - (say) C library functions that expect "real" pointers. This is all somewhat - messy. - - -- There is one central static data structure, "rust_crate", which is emitted - into the final loadable object and contains pointers to all subsequent - information the runtime may be interested in. It also serves as the - displacement base for a variety of PIC-ish displacements stored - elsewhere. When the runtime loads a crate, it dlsym()s rust_crate, and then - digs around in there. It's the entry-point for crawling the crate's - structure from outside. Importantly: it also contains pointers to the dwarf. - - -- Currently we drive linking off dwarf. That is: when a crate needs to 'use' - an item from another dwarf crate, we dlopen / LoadLibrary and find the - "rust_crate" value, follow its pointers to dwarf tables, and scan around the - dwarf DIE tree resolving the hierarchical name of the used item. This may - change, we decided to recycle dwarf for this purpose early in the language - evolution and may, given the number of simplifications that have occurred - along the way, be able to fall back to C "mangled name" linkage at some - point. Though that decision carries a number of serious constraints, and - should not be taken lightly. - - - -Probably-bad ideas we will want to do differently in the self-hosted compiler: -============================================================================== - -- We desugar too early in rustboot and should preserve the pexp structure - until later. Dherman is likely to argue for movement to a more - expression-focused grammar. This may well happen. - -- Multiple kinds of paths enforced by numerous nearly-isomorphic ML type - constructors is pointless once we're in rust; we can just make type - abbreviations that carry constraints like path : is_name(*) or such. - -- Storing auxiliary information in semant tables is awkward, and we should - figure out a suitably rusty idiom for decorating AST nodes in-place. - Inter-pass dependencies should be managed by augmenting the AST with - ever-more constraints (is_resolved(ast), is_typechecked(ast), etc.) - -- Trans should be organized as pure and value-producing code, not imperatively - emitting quads into emitters. LLVM will enforce this anyways. See what - happened in lltrans.ml if you're curious what it'll look (more) like. - -- The PIC scheme will have to change, hopefully get much easier. - diff --git a/src/boot/be/abi.ml b/src/boot/be/abi.ml deleted file mode 100644 index 3ae3b84350a..00000000000 --- a/src/boot/be/abi.ml +++ /dev/null @@ -1,253 +0,0 @@ - -(* - * The 'abi' structure is pretty much just a grab-bag of machine - * dependencies and structure-layout information. Part of the latter - * is shared with trans and semant. - * - * Make some attempt to factor it as time goes by. - *) - -(* Word offsets for structure fields in rust-internal.h, and elsewhere in - compiler. *) - -let rc_base_field_refcnt = 0;; - -(* FIXME: this needs updating if you ever want to work on 64 bit. *) -let const_refcount = 0x7badfaceL;; - -let task_field_refcnt = rc_base_field_refcnt;; -let task_field_stk = task_field_refcnt + 2;; -let task_field_runtime_sp = task_field_stk + 1;; -let task_field_rust_sp = task_field_runtime_sp + 1;; -let task_field_gc_alloc_chain = task_field_rust_sp + 1;; -let task_field_dom = task_field_gc_alloc_chain + 1;; -let n_visible_task_fields = task_field_dom + 1;; - -let dom_field_interrupt_flag = 1;; - -let frame_glue_fns_field_mark = 0;; -let frame_glue_fns_field_drop = 1;; -let frame_glue_fns_field_reloc = 2;; - -let box_rc_field_refcnt = 0;; -let box_rc_field_body = 1;; - -let box_gc_alloc_base = (-3);; -let box_gc_field_prev = (-3);; -let box_gc_field_next = (-2);; -let box_gc_field_ctrl = (-1);; -let box_gc_field_refcnt = 0;; -let box_gc_field_body = 1;; - -let box_rc_header_size = 1;; -let box_gc_header_size = 4;; - -let box_gc_malloc_return_adjustment = 3;; - -let stk_field_valgrind_id = 0;; -let stk_field_limit = stk_field_valgrind_id + 1;; -let stk_field_data = stk_field_limit + 1;; - -(* Both obj and fn are two-word "bindings": One word points to some static - * dispatch information (vtbl, thunk, callee), and the other points to some - * box of bound data (object-body or closure). - *) - -let binding_field_dispatch = 0;; -let binding_field_bound_data = 1;; - -let obj_field_vtbl = binding_field_dispatch;; -let obj_field_box = binding_field_bound_data;; - -let obj_body_elt_tydesc = 0;; -let obj_body_elt_fields = 1;; - -let fn_field_code = binding_field_dispatch;; -let fn_field_box = binding_field_bound_data;; - -(* NB: bound ty params come last to facilitate ignoring them on - * closure-dropping. *) -let closure_body_elt_bound_args_tydesc = 0;; -let closure_body_elt_target = 1;; -let closure_body_elt_bound_args = 2;; -let closure_body_elt_bound_ty_params = 3;; - -let tag_elt_discriminant = 0;; -let tag_elt_variant = 1;; - -let general_code_alignment = 16;; - -let tydesc_field_first_param = 0;; -let tydesc_field_size = 1;; -let tydesc_field_align = 2;; -let tydesc_field_take_glue = 3;; -let tydesc_field_drop_glue = 4;; -let tydesc_field_free_glue = 5;; -let tydesc_field_sever_glue = 6;; -let tydesc_field_mark_glue = 7;; -let tydesc_field_obj_drop_glue = 8;; -let tydesc_field_cmp_glue = 9;; (* FIXME these two aren't in the *) -let tydesc_field_hash_glue = 10;; (* runtime's type_desc struct. *) -let tydesc_field_stateflag = 11;; - -let vec_elt_rc = 0;; -let vec_elt_alloc = 1;; -let vec_elt_fill = 2;; -let vec_elt_pad = 3;; -let vec_elt_data = 4;; - -let calltup_elt_out_ptr = 0;; -let calltup_elt_task_ptr = 1;; -let calltup_elt_indirect_args = 2;; -let calltup_elt_ty_params = 3;; -let calltup_elt_args = 4;; -let calltup_elt_iterator_args = 5;; - -let iterator_args_elt_block_fn = 0;; -let iterator_args_elt_outer_frame_ptr = 1;; - -let indirect_args_elt_closure = 0;; - -(* Current worst case is by vec grow glue *) -let worst_case_glue_call_args = 8;; - -(* - * ABI tags used to inform the runtime which sort of frame to set up for new - * spawned functions. FIXME: There is almost certainly a better abstraction to - * use. - *) -let abi_x86_rustboot_cdecl = 1;; -let abi_x86_rustc_fastcall = 2;; - -type abi = - { - abi_word_sz: int64; - abi_word_bits: Il.bits; - abi_word_ty: Common.ty_mach; - - abi_tag: int; - - abi_has_pcrel_data: bool; - abi_has_pcrel_code: bool; - - abi_n_hardregs: int; - abi_str_of_hardreg: (int -> string); - - abi_emit_target_specific: (Il.emitter -> Il.quad -> unit); - abi_constrain_vregs: (Il.quad -> (Il.vreg,Bits.t) Hashtbl.t -> unit); - - abi_emit_fn_prologue: (Il.emitter - -> Common.size (* framesz *) - -> Common.size (* callsz *) - -> Common.nabi - -> Common.fixup (* grow_task *) - -> bool (* is_obj_fn *) - -> bool (* minimal *) - -> unit); - - abi_emit_fn_epilogue: (Il.emitter -> unit); - - abi_emit_fn_tail_call: (Il.emitter - -> int64 (* caller_callsz *) - -> int64 (* caller_argsz *) - -> Il.code (* callee_code *) - -> int64 (* callee_argsz *) - -> unit); - - abi_clobbers: (Il.quad -> Il.hreg list); - - abi_emit_native_call: (Il.emitter - -> Il.cell (* ret *) - -> Common.nabi - -> Common.fixup (* callee *) - -> Il.operand array (* args *) - -> unit); - - abi_emit_native_void_call: (Il.emitter - -> Common.nabi - -> Common.fixup (* callee *) - -> Il.operand array (* args *) - -> unit); - - abi_emit_native_call_in_thunk: (Il.emitter - -> Il.cell option (* ret *) - -> Common.nabi - -> Il.operand (* callee *) - -> Il.operand array (* args *) - -> unit); - abi_emit_inline_memcpy: (Il.emitter - -> int64 (* n_bytes *) - -> Il.reg (* dst_ptr *) - -> Il.reg (* src_ptr *) - -> Il.reg (* tmp_reg *) - -> bool (* ascending *) - -> unit); - - (* Global glue. *) - abi_activate: (Il.emitter -> unit); - abi_yield: (Il.emitter -> unit); - abi_unwind: (Il.emitter -> Common.nabi -> Common.fixup -> unit); - abi_gc: (Il.emitter -> unit); - abi_get_next_pc_thunk: - ((Il.reg (* output *) - * Common.fixup (* thunk in objfile *) - * (Il.emitter -> unit)) (* fn to make thunk *) - option); - - abi_sp_reg: Il.reg; - abi_fp_reg: Il.reg; - abi_dwarf_fp_reg: int; - abi_tp_cell: Il.cell; - abi_implicit_args_sz: int64; - abi_frame_base_sz: int64; - abi_callee_saves_sz: int64; - abi_frame_info_sz: int64; - abi_spill_slot: (Il.spill -> Il.mem); - } -;; - -let load_fixup_addr - (e:Il.emitter) - (out_reg:Il.reg) - (fix:Common.fixup) - (rty:Il.referent_ty) - : unit = - - let cell = Il.Reg (out_reg, Il.AddrTy rty) in - let op = Il.ImmPtr (fix, rty) in - Il.emit e (Il.lea cell op); -;; - -let load_fixup_codeptr - (e:Il.emitter) - (out_reg:Il.reg) - (fixup:Common.fixup) - (has_pcrel_code:bool) - (indirect:bool) - : Il.code = - if indirect - then - begin - load_fixup_addr e out_reg fixup (Il.ScalarTy (Il.AddrTy Il.CodeTy)); - Il.CodePtr (Il.Cell (Il.Mem (Il.RegIn (out_reg, None), - Il.ScalarTy (Il.AddrTy Il.CodeTy)))) - end - else - if has_pcrel_code - then (Il.CodePtr (Il.ImmPtr (fixup, Il.CodeTy))) - else - begin - load_fixup_addr e out_reg fixup Il.CodeTy; - Il.CodePtr (Il.Cell (Il.Reg (out_reg, Il.AddrTy Il.CodeTy))) - end -;; - - -(* - * Local Variables: - * fill-column: 78; - * indent-tabs-mode: nil - * buffer-file-coding-system: utf-8-unix - * compile-command: "make -k -C $RBUILD 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; - * End: - *) diff --git a/src/boot/be/asm.ml b/src/boot/be/asm.ml deleted file mode 100644 index f8284beffa0..00000000000 --- a/src/boot/be/asm.ml +++ /dev/null @@ -1,831 +0,0 @@ -(* - - Our assembler is an all-at-once, buffer-in-memory job, very simple - minded. I have 1gb of memory on my laptop: I don't expect to ever - emit a program that large with this code. - - It is based on the 'frag' type, which has a variant for every major - type of machine-blob we know how to write (bytes, zstrings, BSS - blocks, words of various sorts). - - A frag can contain symbolic references between the sub-parts of - it. These are accomplished through ref cells we call fixups, and a - 2-pass (resolution and writing) process defined recursively over - the frag structure. - - Fixups are defined by wrapping a frag in a DEF pseudo-frag with - a fixup attached. This will record information about the wrapped - frag -- positions and sizes -- in the fixup during resolution. - - We say "positions" and "sizes" there, in plural, because both a - file number and a memory number is recorded for each concept. - - File numbers refer to positions and sizes in the file we're - generating, and are based on the native int type for the host - platform -- usually 31 or 62 bits -- whereas the expressions that - *use* position fixups tend to promote them up to 32 or 64 bits - somehow. On a 32 bit platform, you can't generate output buffers - with 64-bit positions (ocaml limitation!) - - Memory numbers are 64 bit, always, and refer to sizes and positions - of frags when they are loaded into memory in the target. When - you're generating code for a 32-bit target, or using a memory - number in a context that's less than 64 bits, the value is - range-checked and truncated. But in all other respects, we imagine - a 32-bit address space is just the prefix of the continuing 64-bit - address space. If you need to pin an object at a particular place - from the point 2^32-1, say, you will need to do arithmetic and use - the MEMPOS pseudo-frag, that sets the current memory position as - it's being processed. - - Fixups can be *used* anywhere else in the frag tree, as many times - as you like. If you try to write an unresolved fixup, the emitter - faults. When you specify the use of a fixup, you need to specify - whether you want to use its file size, file position, memory size, - or memory position. - - Positions, addresses, sizes and such, of course, are in bytes. - - Expressions are evaluated to an int64 (signed), even if the - expression is an int32 or less. Depending on how you use the result - of the expression, a range check error may fire (for example, if - the expression evaluates to -2^24 and you're emitting a word16). - - Word endianness is per-file. At the moment this seems acceptable. - - Because we want to be *very specific* about the time and place - arithmetic promotions occur, we define two separate expression-tree - types (with the same polymorphic constructors) and two separate - evaluation functions, with an explicit operator for marking the - promotion-points. - -*) - -open Common;; -open Fmt;; - -let log (sess:Session.sess) = - Session.log "asm" - sess.Session.sess_log_asm - sess.Session.sess_log_out -;; - -let iflog (sess:Session.sess) (thunk:(unit -> unit)) : unit = - if sess.Session.sess_log_asm - then thunk () - else () -;; - -exception Bad_fit of string;; -exception Undef_sym of string;; - -type ('a, 'b) expr = - IMM of 'a - | ADD of (('a, 'b) expr) * (('a, 'b) expr) - | SUB of (('a, 'b) expr) * (('a, 'b) expr) - | MUL of (('a, 'b) expr) * (('a, 'b) expr) - | DIV of (('a, 'b) expr) * (('a, 'b) expr) - | REM of (('a, 'b) expr) * (('a, 'b) expr) - | MAX of (('a, 'b) expr) * (('a, 'b) expr) - | ALIGN of (('a, 'b) expr) * (('a, 'b) expr) - | SLL of (('a, 'b) expr) * int - | SLR of (('a, 'b) expr) * int - | SAR of (('a, 'b) expr) * int - | AND of (('a, 'b) expr) * (('a, 'b) expr) - | XOR of (('a, 'b) expr) * (('a, 'b) expr) - | OR of (('a, 'b) expr) * (('a, 'b) expr) - | NOT of (('a, 'b) expr) - | NEG of (('a, 'b) expr) - | F_POS of fixup - | F_SZ of fixup - | M_POS of fixup - | M_SZ of fixup - | EXT of 'b - -type expr32 = (int32, int) expr -;; - -type expr64 = (int64, expr32) expr -;; - - -let rec eval32 (e:expr32) - : int32 = - let chop64 kind name v = - let x = Int64.to_int32 v in - if (Int64.compare v (Int64.of_int32 x)) = 0 then - x - else raise (Bad_fit (kind - ^ " fixup " - ^ name - ^ " overflowed 32 bits in eval32: " - ^ Int64.to_string v)) - in - let expandInt _ _ v = Int32.of_int v in - let checkdef kind name v inj = - match v with - None -> - raise (Undef_sym (kind ^ " fixup " ^ name - ^ " undefined in eval32")) - | Some x -> inj kind name x - in - match e with - IMM i -> i - | ADD (a, b) -> Int32.add (eval32 a) (eval32 b) - | SUB (a, b) -> Int32.sub (eval32 a) (eval32 b) - | MUL (a, b) -> Int32.mul (eval32 a) (eval32 b) - | DIV (a, b) -> Int32.div (eval32 a) (eval32 b) - | REM (a, b) -> Int32.rem (eval32 a) (eval32 b) - | MAX (a, b) -> i32_max (eval32 a) (eval32 b) - | ALIGN (a, b) -> i32_align (eval32 a) (eval32 b) - | SLL (a, b) -> Int32.shift_left (eval32 a) b - | SLR (a, b) -> Int32.shift_right_logical (eval32 a) b - | SAR (a, b) -> Int32.shift_right (eval32 a) b - | AND (a, b) -> Int32.logand (eval32 a) (eval32 b) - | XOR (a, b) -> Int32.logxor (eval32 a) (eval32 b) - | OR (a, b) -> Int32.logor (eval32 a) (eval32 b) - | NOT a -> Int32.lognot (eval32 a) - | NEG a -> Int32.neg (eval32 a) - | F_POS f -> - checkdef "file position" - f.fixup_name f.fixup_file_pos expandInt - | F_SZ f -> - checkdef "file size" - f.fixup_name f.fixup_file_sz expandInt - | M_POS f -> - checkdef "mem position" - f.fixup_name f.fixup_mem_pos chop64 - | M_SZ f -> - checkdef "mem size" f.fixup_name f.fixup_mem_sz chop64 - | EXT i -> Int32.of_int i -;; - -let rec eval64 (e:expr64) - : int64 = - let checkdef kind name v inj = - match v with - None -> - raise (Undef_sym (kind ^ " fixup '" - ^ name ^ "' undefined in eval64")) - | Some x -> inj x - in - match e with - IMM i -> i - | ADD (a, b) -> Int64.add (eval64 a) (eval64 b) - | SUB (a, b) -> Int64.sub (eval64 a) (eval64 b) - | MUL (a, b) -> Int64.mul (eval64 a) (eval64 b) - | DIV (a, b) -> Int64.div (eval64 a) (eval64 b) - | REM (a, b) -> Int64.rem (eval64 a) (eval64 b) - | MAX (a, b) -> i64_max (eval64 a) (eval64 b) - | ALIGN (a, b) -> i64_align (eval64 a) (eval64 b) - | SLL (a, b) -> Int64.shift_left (eval64 a) b - | SLR (a, b) -> Int64.shift_right_logical (eval64 a) b - | SAR (a, b) -> Int64.shift_right (eval64 a) b - | AND (a, b) -> Int64.logand (eval64 a) (eval64 b) - | XOR (a, b) -> Int64.logxor (eval64 a) (eval64 b) - | OR (a, b) -> Int64.logor (eval64 a) (eval64 b) - | NOT a -> Int64.lognot (eval64 a) - | NEG a -> Int64.neg (eval64 a) - | F_POS f -> - checkdef "file position" - f.fixup_name f.fixup_file_pos Int64.of_int - | F_SZ f -> - checkdef "file size" - f.fixup_name f.fixup_file_sz Int64.of_int - | M_POS f -> - checkdef "mem position" - f.fixup_name f.fixup_mem_pos (fun x -> x) - | M_SZ f -> - checkdef "mem size" - f.fixup_name f.fixup_mem_sz (fun x -> x) - | EXT e -> Int64.of_int32 (eval32 e) -;; - -let rec string_of_expr64 (e64:expr64) : string = - let bin op a b = - Printf.sprintf "(%s %s %s)" (string_of_expr64 a) op (string_of_expr64 b) - in - let bini op a b = - Printf.sprintf "(%s %s %d)" (string_of_expr64 a) op b - in - match e64 with - IMM i when (i64_lt i 0L) -> Printf.sprintf "-0x%Lx" (Int64.neg i) - | IMM i -> Printf.sprintf "0x%Lx" i - | ADD (a,b) -> bin "+" a b - | SUB (a,b) -> bin "-" a b - | MUL (a,b) -> bin "*" a b - | DIV (a,b) -> bin "/" a b - | REM (a,b) -> bin "%" a b - | MAX (a,b) -> - Printf.sprintf "(max %s %s)" - (string_of_expr64 a) (string_of_expr64 b) - | ALIGN (a,b) -> - Printf.sprintf "(align %s %s)" - (string_of_expr64 a) (string_of_expr64 b) - | SLL (a,b) -> bini "<<" a b - | SLR (a,b) -> bini ">>" a b - | SAR (a,b) -> bini ">>>" a b - | AND (a,b) -> bin "&" a b - | XOR (a,b) -> bin "xor" a b - | OR (a,b) -> bin "|" a b - | NOT a -> Printf.sprintf "(not %s)" (string_of_expr64 a) - | NEG a -> Printf.sprintf "-%s" (string_of_expr64 a) - | F_POS f -> Printf.sprintf "<%s>.fpos" f.fixup_name - | F_SZ f -> Printf.sprintf "<%s>.fsz" f.fixup_name - | M_POS f -> Printf.sprintf "<%s>.mpos" f.fixup_name - | M_SZ f -> Printf.sprintf "<%s>.msz" f.fixup_name - | EXT _ -> "??ext??" -;; - -type frag = - MARK (* MARK == 'PAD (IMM 0L)' *) - | SEQ of frag array - | PAD of int - | BSS of int64 - | MEMPOS of int64 - | BYTE of int - | BYTES of int array - | CHAR of char - | STRING of string - | ZSTRING of string - | ULEB128 of expr64 - | SLEB128 of expr64 - | WORD of (ty_mach * expr64) - | ALIGN_FILE of (int * frag) - | ALIGN_MEM of (int * frag) - | DEF of (fixup * frag) - | RELAX of relaxation - -and relaxation = - { relax_options: frag array; - relax_choice: int ref; } -;; - - -let rec fmt_frag (ff:Format.formatter) (f:frag) : unit = - match f with - MARK -> fmt ff "MARK" - | SEQ fs -> fmt_bracketed_arr_sep "[" "]" ", " fmt_frag ff fs - | PAD i -> fmt ff "PAD(%d)" i - | BSS i -> fmt ff "BSZ(%Ld)" i - | MEMPOS i -> fmt ff "MEMPOS(%Ld)" i - | BYTE i -> fmt ff "0x%x" i - | BYTES iz -> - fmt ff "BYTES"; - fmt_bracketed_arr_sep "(" ")" ", " - (fun ff i -> fmt ff "0x%x" i) ff iz - | CHAR c -> fmt ff "CHAR(%s)" (Char.escaped c) - | STRING s -> fmt ff "STRING(%s)" (String.escaped s) - | ZSTRING s -> fmt ff "ZSTRING(%s)" (String.escaped s) - | ULEB128 e -> fmt ff "ULEB128(%s)" (string_of_expr64 e) - | SLEB128 e -> fmt ff "SLEB128(%s)" (string_of_expr64 e) - | WORD (tm, e) -> - fmt ff "%s:%s" - (string_of_ty_mach tm) (string_of_expr64 e) - | ALIGN_FILE (i, f) -> - fmt ff "ALIGN_FILE(%d, " i; - fmt_frag ff f; - fmt ff ")" - | ALIGN_MEM (i, f) -> - fmt ff "ALIGN_MEM(%d, " i; - fmt_frag ff f; - fmt ff ")" - | DEF (fix, f) -> - fmt ff "DEF(%s, " fix.fixup_name; - fmt_frag ff f; - fmt ff ")" - | RELAX r -> - fmt ff "RELAX("; - fmt_arr_sep ", " fmt_frag ff r.relax_options -;; - -let sprintf_frag = Fmt.sprintf_fmt fmt_frag;; - -exception Relax_more of relaxation;; - -let new_relaxation (frags:frag array) = - RELAX { relax_options = frags; - relax_choice = ref ((Array.length frags) - 1); } -;; - - -let rec write_frag - ~(sess:Session.sess) - ~(lsb0:bool) - ~(buf:Buffer.t) - ~(frag:frag) - : unit = - let relax = Queue.create () in - let bump_relax r = - iflog sess (fun _ -> - log sess "bumping relaxation to position %d" - ((!(r.relax_choice)) - 1)); - r.relax_choice := (!(r.relax_choice)) - 1; - if !(r.relax_choice) < 0 - then bug () "relaxation ran out of options" - in - let rec loop _ = - Queue.clear relax; - Buffer.clear buf; - resolve_frag_full relax frag; - lower_frag ~sess ~lsb0 ~buf ~relax ~frag; - if Queue.is_empty relax - then () - else - begin - iflog sess (fun _ -> log sess "relaxing"); - Queue.iter bump_relax relax; - loop () - end - in - loop () - - -and resolve_frag_full (relax:relaxation Queue.t) (frag:frag) - : unit = - let file_pos = ref 0 in - let mem_pos = ref 0L in - let bump i = - mem_pos := Int64.add (!mem_pos) (Int64.of_int i); - file_pos := (!file_pos) + i - in - - let uleb (e:expr64) : unit = - let rec loop value = - let value = Int64.shift_right_logical value 7 in - if value = 0L - then bump 1 - else - begin - bump 1; - loop value - end - in - loop (eval64 e) - in - - let sleb (e:expr64) : unit = - let rec loop value = - let byte = Int64.logand value 0xf7L in - let value = Int64.shift_right value 7 in - let signbit = Int64.logand byte 0x40L in - if (((value = 0L) && (signbit = 0L)) || - ((value = -1L) && (signbit = 0x40L))) - then bump 1 - else - begin - bump 1; - loop value - end - in - loop (eval64 e) - in - let rec resolve_frag it = - match it with - | MARK -> () - | SEQ frags -> Array.iter resolve_frag frags - | PAD i -> bump i - | BSS i -> mem_pos := Int64.add (!mem_pos) i - | MEMPOS i -> mem_pos := i - | BYTE _ -> bump 1 - | BYTES ia -> bump (Array.length ia) - | CHAR _ -> bump 1 - | STRING s -> bump (String.length s) - | ZSTRING s -> bump ((String.length s) + 1) - | ULEB128 e -> uleb e - | SLEB128 e -> sleb e - | WORD (mach,_) -> bump (bytes_of_ty_mach mach) - | ALIGN_FILE (n, frag) -> - let spill = (!file_pos) mod n in - let pad = (n - spill) mod n in - file_pos := (!file_pos) + pad; - (* - * NB: aligning the file *causes* likewise alignment of - * memory, since we implement "file alignment" by - * padding! - *) - mem_pos := Int64.add (!mem_pos) (Int64.of_int pad); - resolve_frag frag - - | ALIGN_MEM (n, frag) -> - let n64 = Int64.of_int n in - let spill = Int64.rem (!mem_pos) n64 in - let pad = Int64.rem (Int64.sub n64 spill) n64 in - mem_pos := Int64.add (!mem_pos) pad; - resolve_frag frag - - | DEF (f, i) -> - let fpos1 = !file_pos in - let mpos1 = !mem_pos in - resolve_frag i; - f.fixup_file_pos <- Some fpos1; - f.fixup_mem_pos <- Some mpos1; - f.fixup_file_sz <- Some ((!file_pos) - fpos1); - f.fixup_mem_sz <- Some (Int64.sub (!mem_pos) mpos1) - - | RELAX rel -> - begin - try - resolve_frag rel.relax_options.(!(rel.relax_choice)) - with - Bad_fit _ -> Queue.add rel relax - end - in - resolve_frag frag - -and lower_frag - ~(sess:Session.sess) - ~(lsb0:bool) - ~(buf:Buffer.t) - ~(relax:relaxation Queue.t) - ~(frag:frag) - : unit = - let byte (i:int) = - if i < 0 - then raise (Bad_fit "byte underflow") - else - if i > 255 - then raise (Bad_fit "byte overflow") - else Buffer.add_char buf (Char.chr i) - in - - let uleb (e:expr64) : unit = - let emit1 k = Buffer.add_char buf (Char.chr (Int64.to_int k)) in - let rec loop value = - let byte = Int64.logand value 0x7fL in - let value = Int64.shift_right_logical value 7 in - if value = 0L - then emit1 byte - else - begin - emit1 (Int64.logor byte 0x80L); - loop value - end - in - loop (eval64 e) - in - - let sleb (e:expr64) : unit = - let emit1 k = Buffer.add_char buf (Char.chr (Int64.to_int k)) in - let rec loop value = - let byte = Int64.logand value 0x7fL in - let value = Int64.shift_right value 7 in - let signbit = Int64.logand byte 0x40L in - if (((value = 0L) && (signbit = 0L)) || - ((value = -1L) && (signbit = 0x40L))) - then emit1 byte - else - begin - emit1 (Int64.logor byte 0x80L); - loop value - end - in - loop (eval64 e) - in - - let word (nbytes:int) (signed:bool) (e:expr64) = - let i = eval64 e in - - (* - FIXME: - - We should really base the entire assembler and memory-position - system on Big_int.big_int, but in ocaml the big_int type lacks, - oh, just about every useful function (no format string spec, no - bitwise ops, blah blah) so it's useless; we're stuck on int64 - for bootstrapping. - - For the time being we're just going to require you to represent - those few unsigned 64 bit terms you have in mind via their - signed bit pattern. Suboptimal but it's the best we can do. - *) - - let (top,bot) = - if nbytes >= 8 - then - if signed - then (Int64.max_int,Int64.min_int) - else (Int64.max_int,0L) - else - if signed - then - let bound = (Int64.shift_left 1L ((8 * nbytes) - 1)) in - (Int64.sub bound 1L, Int64.neg bound) - else - let bound = (Int64.shift_left 1L (8 * nbytes)) in - (Int64.sub bound 1L, 0L) - in - - let mask1 = Int64.logand 0xffL in - let shift = Int64.shift_right_logical in - let emit1 k = Buffer.add_char buf (Char.chr (Int64.to_int k)) in - if Int64.compare i bot = (-1) - then raise (Bad_fit ("word underflow: " - ^ (Int64.to_string i) - ^ " into " - ^ (string_of_int nbytes) - ^ (if signed then " signed" else " unsigned") - ^ " bytes")) - else - if Int64.compare i top = 1 - then raise (Bad_fit ("word overflow: " - ^ (Int64.to_string i) - ^ " into " - ^ (string_of_int nbytes) - ^ (if signed then " signed" else " unsigned") - ^ " bytes")) - else - if lsb0 - then - for n = 0 to (nbytes - 1) do - emit1 (mask1 (shift i (8*n))) - done - else - for n = (nbytes - 1) downto 0 do - emit1 (mask1 (shift i (8*n))) - done - in - match frag with - MARK -> () - - | SEQ frags -> - Array.iter - begin - fun frag -> - lower_frag ~sess ~lsb0 ~buf ~relax ~frag - end frags - - | PAD c -> - for i = 1 to c do - Buffer.add_char buf '\x00' - done - - | BSS _ -> () - - | MEMPOS _ -> () - - | BYTE i -> byte i - - | BYTES bs -> - iflog sess (fun _ -> log sess "lowering %d bytes" - (Array.length bs)); - Array.iter byte bs - - | CHAR c -> - iflog sess (fun _ -> log sess "lowering char: %c" c); - Buffer.add_char buf c - - | STRING s -> - iflog sess (fun _ -> log sess "lowering string: %s" s); - Buffer.add_string buf s - - | ZSTRING s -> - iflog sess (fun _ -> log sess "lowering zstring: %s" s); - Buffer.add_string buf s; - byte 0 - - | ULEB128 e -> uleb e - | SLEB128 e -> sleb e - - | WORD (m,e) -> - iflog sess - (fun _ -> - log sess "lowering word %s with val %s" - (string_of_ty_mach m) - (fmt_to_str fmt_frag frag)); - word (bytes_of_ty_mach m) (mach_is_signed m) e - - | ALIGN_FILE (n, frag) -> - let spill = (Buffer.length buf) mod n in - let pad = (n - spill) mod n in - for i = 1 to pad do - Buffer.add_char buf '\x00' - done; - lower_frag sess lsb0 buf relax frag - - | ALIGN_MEM (_, i) -> lower_frag sess lsb0 buf relax i - | DEF (f, i) -> - iflog sess (fun _ -> log sess "lowering fixup: %s" f.fixup_name); - lower_frag sess lsb0 buf relax i; - - | RELAX rel -> - begin - try - lower_frag sess lsb0 buf relax - rel.relax_options.(!(rel.relax_choice)) - with - Bad_fit _ -> Queue.add rel relax - end -;; - -let fold_flags (f:'a -> int64) (flags:'a list) : int64 = - List.fold_left (Int64.logor) 0x0L (List.map f flags) -;; - -let write_out_frag sess lsb0 frag = - let buf = Buffer.create 0xffff in - let file = Session.filename_of sess.Session.sess_out in - let out = open_out_bin file in - write_frag ~sess ~lsb0 ~buf ~frag; - Buffer.output_buffer out buf; - flush out; - close_out out; - Unix.chmod file 0o755 -;; - -(* Asm-reader stuff for loading info back from mapped files. *) -(* - * Unfortunately the ocaml Bigarray interface takes 'int' indices, so - * f.e. can't do 64-bit offsets / files when running on a 32bit platform. - * Despite the fact that we can possibly produce them. Sigh. Yet another - * "bootstrap compiler limitation". - *) -type asm_reader = - { - asm_seek: int -> unit; - asm_get_u32: unit -> int; - asm_get_u16: unit -> int; - asm_get_u8: unit -> int; - asm_get_uleb: unit -> int; - asm_get_zstr: unit -> string; - asm_get_zstr_padded: int -> string; - asm_get_off: unit -> int; - asm_adv: int -> unit; - asm_adv_u32: unit -> unit; - asm_adv_u16: unit -> unit; - asm_adv_u8: unit -> unit; - asm_adv_zstr: unit -> unit; - asm_close: unit -> unit; - } -;; - -type mmap_arr = - (int, Bigarray.int8_unsigned_elt, Bigarray.c_layout) - Bigarray.Array1.t -;; - -let new_asm_reader (sess:Session.sess) (s:filename) : asm_reader = - iflog sess (fun _ -> log sess "opening file %s" s); - let fd = Unix.openfile s [ Unix.O_RDONLY ] 0 in - let arr = (Bigarray.Array1.map_file - fd ~pos:0L - Bigarray.int8_unsigned - Bigarray.c_layout - false (-1)) - in - let tmp = ref Nativeint.zero in - let buf = Buffer.create 16 in - let off = ref 0 in - let is_open = ref true in - let get_word_as_int (nbytes:int) : int = - assert (!is_open); - let lsb0 = true in - tmp := Nativeint.zero; - if lsb0 - then - for j = nbytes-1 downto 0 do - tmp := Nativeint.shift_left (!tmp) 8; - tmp := Nativeint.logor (!tmp) (Nativeint.of_int arr.{(!off) + j}) - done - else - for j = 0 to nbytes-1 do - tmp := Nativeint.shift_left (!tmp) 8; - tmp := Nativeint.logor (!tmp) (Nativeint.of_int arr.{(!off) + j}) - done; - off := (!off) + nbytes; - Nativeint.to_int (!tmp) - in - let get_zstr_padded pad_opt = - assert (!is_open); - let i = ref (!off) in - Buffer.clear buf; - let buflen_ok _ = - match pad_opt with - None -> true - | Some pad -> (Buffer.length buf) < pad - in - while arr.{!i} != 0 && (buflen_ok()) do - Buffer.add_char buf (Char.chr arr.{!i}); - incr i - done; - begin - match pad_opt with - None -> off := (!off) + (Buffer.length buf) + 1 - | Some pad -> - begin - assert ((Buffer.length buf) <= pad); - off := (!off) + pad - end - end; - Buffer.contents buf - in - let bump i = - assert (!is_open); - off := (!off) + i - in - { - asm_seek = (fun i -> off := i); - asm_get_u32 = (fun _ -> get_word_as_int 4); - asm_get_u16 = (fun _ -> get_word_as_int 2); - asm_get_u8 = (fun _ -> get_word_as_int 1); - asm_get_uleb = - begin - fun _ -> - let rec loop result shift = - let byte = arr.{!off} in - incr off; - let result = result lor ((byte land 0x7f) lsl shift) in - if (byte land 0x80) = 0 - then result - else loop result (shift+7) - in - loop 0 0 - end; - asm_get_zstr = (fun _ -> get_zstr_padded None); - asm_get_zstr_padded = (fun pad -> get_zstr_padded (Some pad)); - asm_get_off = (fun _ -> !off); - asm_adv = bump; - asm_adv_u32 = (fun _ -> bump 4); - asm_adv_u16 = (fun _ -> bump 2); - asm_adv_u8 = (fun _ -> bump 1); - asm_adv_zstr = (fun _ -> while arr.{!off} != 0 - do incr off done); - asm_close = (fun _ -> - assert (!is_open); - Unix.close fd; - is_open := false) - } -;; - - -(* - * Metadata note-section encoding / decoding. - * - * Since the only object format that defines a "note" section at all is - * ELF, we model the contents of the metadata section on ELF's - * notes. But the same blob of data is stuck into PE and Mach-O files - * too. - * - * The format is essentially just the ELF note format: - * - * - * - * - * - * <0-pad to 4-byte boundary> - * - * - * ... - * - * <0-pad to 4-byte boundary> - * - *) -let note_rust_frags (meta:(Ast.ident * string) array) : frag = - let desc_fixup = new_fixup ".rust.note metadata" in - let desc = - DEF (desc_fixup, - SEQ [| - WORD (TY_u32, IMM (Int64.of_int (Array.length meta))); - SEQ (Array.map - (fun (k,v) -> SEQ [| ZSTRING k; ZSTRING v; |]) - meta); - ALIGN_FILE (4, MARK) |]) - in - let name = "rust" in - let ty = 0L in - let padded_name = SEQ [| ZSTRING name; - ALIGN_FILE (4, MARK) |] - in - let name_sz = IMM (Int64.of_int ((String.length name) + 1)) in - SEQ [| WORD (TY_u32, name_sz); - WORD (TY_u32, F_SZ desc_fixup); - WORD (TY_u32, IMM ty); - padded_name; - desc;|] -;; - -let read_rust_note (ar:asm_reader) : (Ast.ident * string) array = - ar.asm_adv_u32 (); - ar.asm_adv_u32 (); - assert ((ar.asm_get_u32 ()) = 0); - let rust_name = ar.asm_get_zstr_padded 8 in - assert (rust_name = "rust"); - let n = ar.asm_get_u32() in - let meta = Queue.create () in - for i = 1 to n - do - let k = ar.asm_get_zstr() in - let v = ar.asm_get_zstr() in - Queue.add (k,v) meta - done; - queue_to_arr meta -;; - -(* - * Local Variables: - * fill-column: 78; - * indent-tabs-mode: nil - * buffer-file-coding-system: utf-8-unix - * compile-command: "make -k -C $RBUILD 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; - * End: - *) diff --git a/src/boot/be/elf.ml b/src/boot/be/elf.ml deleted file mode 100644 index 9ca1b3b6f88..00000000000 --- a/src/boot/be/elf.ml +++ /dev/null @@ -1,1784 +0,0 @@ -(* - * Module for writing System V ELF files. - * - * FIXME: Presently heavily infected with x86 and elf32 specificities, - * though they are reasonably well marked. Needs to be refactored to - * depend on abi fields if it's to be usable for other elf - * configurations. - *) - -open Asm;; -open Common;; - -let log (sess:Session.sess) = - Session.log "obj (elf)" - sess.Session.sess_log_obj - sess.Session.sess_log_out -;; - -let iflog (sess:Session.sess) (thunk:(unit -> unit)) : unit = - if sess.Session.sess_log_obj - then thunk () - else () -;; - - -(* Fixed sizes of structs involved in elf32 spec. *) -let elf32_ehsize = 52L;; -let elf32_phentsize = 32L;; -let elf32_shentsize = 40L;; -let elf32_symsize = 16L;; -let elf32_rela_entsz = 0xcL;; - -type ei_class = - ELFCLASSNONE - | ELFCLASS32 - | ELFCLASS64 -;; - - -type ei_data = - ELFDATANONE - | ELFDATA2LSB - | ELFDATA2MSB -;; - - -let elf_identification sess ei_class ei_data = - SEQ - [| - STRING "\x7fELF"; - BYTES - [| - (match ei_class with (* EI_CLASS *) - ELFCLASSNONE -> 0 - | ELFCLASS32 -> 1 - | ELFCLASS64 -> 2); - (match ei_data with (* EI_DATA *) - ELFDATANONE -> 0 - | ELFDATA2LSB -> 1 - | ELFDATA2MSB -> 2); - - 1; (* EI_VERSION = EV_CURRENT *) - - (* EI_OSABI *) - (match sess.Session.sess_targ with - FreeBSD_x86_elf -> 9 - | _ -> 0); - - 0; (* EI_ABIVERSION *) - - 0; (* EI_PAD #9 *) - 0; (* EI_PAD #A *) - 0; (* EI_PAD #B *) - 0; (* EI_PAD #C *) - 0; (* EI_PAD #D *) - 0; (* EI_PAD #E *) - 0; (* EI_PAD #F *) - |] - |] -;; - - -type e_type = - ET_NONE - | ET_REL - | ET_EXEC - | ET_DYN - | ET_CORE -;; - - -type e_machine = - (* Maybe support more later. *) - EM_NONE - | EM_386 - | EM_X86_64 -;; - - -type e_version = - EV_NONE - | EV_CURRENT -;; - - -let elf32_header - ~(sess:Session.sess) - ~(ei_data:ei_data) - ~(e_type:e_type) - ~(e_machine:e_machine) - ~(e_version:e_version) - ~(e_entry_fixup:fixup) - ~(e_phoff_fixup:fixup) - ~(e_shoff_fixup:fixup) - ~(e_phnum:int64) - ~(e_shnum:int64) - ~(e_shstrndx:int64) - : frag = - let elf_header_fixup = new_fixup "elf header" in - let entry_pos = - if sess.Session.sess_library_mode - then (IMM 0L) - else (M_POS e_entry_fixup) - in - DEF - (elf_header_fixup, - SEQ [| elf_identification sess ELFCLASS32 ei_data; - WORD (TY_u16, (IMM (match e_type with - ET_NONE -> 0L - | ET_REL -> 1L - | ET_EXEC -> 2L - | ET_DYN -> 3L - | ET_CORE -> 4L))); - WORD (TY_u16, (IMM (match e_machine with - EM_NONE -> 0L - | EM_386 -> 3L - | EM_X86_64 -> 62L))); - WORD (TY_u32, (IMM (match e_version with - EV_NONE -> 0L - | EV_CURRENT -> 1L))); - WORD (TY_u32, entry_pos); - WORD (TY_u32, (F_POS e_phoff_fixup)); - WORD (TY_u32, (F_POS e_shoff_fixup)); - WORD (TY_u32, (IMM 0L)); (* e_flags *) - WORD (TY_u16, (IMM elf32_ehsize)); - WORD (TY_u16, (IMM elf32_phentsize)); - WORD (TY_u16, (IMM e_phnum)); - WORD (TY_u16, (IMM elf32_shentsize)); - WORD (TY_u16, (IMM e_shnum)); - WORD (TY_u16, (IMM e_shstrndx)); - |]) -;; - - -type sh_type = - SHT_NULL - | SHT_PROGBITS - | SHT_SYMTAB - | SHT_STRTAB - | SHT_RELA - | SHT_HASH - | SHT_DYNAMIC - | SHT_NOTE - | SHT_NOBITS - | SHT_REL - | SHT_SHLIB - | SHT_DYNSYM -;; - - -type sh_flags = - SHF_WRITE - | SHF_ALLOC - | SHF_EXECINSTR -;; - - -let section_header - ?(sh_link:int64 option=None) - ?(sh_info:int64 option=None) - ?(zero_sh_addr:bool=false) - ?(sh_flags:sh_flags list=[]) - ?(section_fixup:fixup option=None) - ?(sh_addralign:int64=1L) - ?(sh_entsize:int64=0L) - ~(shstring_table_fixup:fixup) - ~(shname_string_fixup:fixup) - (sh_type:sh_type) - : frag = - SEQ - [| - WORD (TY_i32, (SUB - ((F_POS shname_string_fixup), - (F_POS shstring_table_fixup)))); - WORD (TY_u32, (IMM (match sh_type with - SHT_NULL -> 0L - | SHT_PROGBITS -> 1L - | SHT_SYMTAB -> 2L - | SHT_STRTAB -> 3L - | SHT_RELA -> 4L - | SHT_HASH -> 5L - | SHT_DYNAMIC -> 6L - | SHT_NOTE -> 7L - | SHT_NOBITS -> 8L - | SHT_REL -> 9L - | SHT_SHLIB -> 10L - | SHT_DYNSYM -> 11L))); - WORD (TY_u32, (IMM (fold_flags - (fun f -> match f with - SHF_WRITE -> 0x1L - | SHF_ALLOC -> 0x2L - | SHF_EXECINSTR -> 0x4L) sh_flags))); - WORD (TY_u32, - if zero_sh_addr - then IMM 0L - else (match section_fixup with - None -> (IMM 0L) - | Some s -> (M_POS s))); - WORD (TY_u32, (match section_fixup with - None -> (IMM 0L) - | Some s -> (F_POS s))); - WORD (TY_u32, (match section_fixup with - None -> (IMM 0L) - | Some s -> (F_SZ s))); - WORD (TY_u32, (IMM (match sh_link with - None -> 0L - | Some i -> i))); - WORD (TY_u32, (IMM (match sh_info with - None -> 0L - | Some i -> i))); - WORD (TY_u32, (IMM sh_addralign)); - WORD (TY_u32, (IMM sh_entsize)); - |] -;; - - -type p_type = - PT_NULL - | PT_LOAD - | PT_DYNAMIC - | PT_INTERP - | PT_NOTE - | PT_SHLIB - | PT_PHDR -;; - - -type p_flag = - PF_X - | PF_W - | PF_R -;; - - -let program_header - ~(p_type:p_type) - ~(segment_fixup:fixup) - ~(p_flags:p_flag list) - ~(p_align:int64) - : frag = - SEQ - [| - WORD (TY_u32, (IMM (match p_type with - PT_NULL -> 0L - | PT_LOAD -> 1L - | PT_DYNAMIC -> 2L - | PT_INTERP -> 3L - | PT_NOTE -> 4L - | PT_SHLIB -> 5L - | PT_PHDR -> 6L))); - WORD (TY_u32, (F_POS segment_fixup)); - WORD (TY_u32, (M_POS segment_fixup)); - WORD (TY_u32, (M_POS segment_fixup)); - WORD (TY_u32, (F_SZ segment_fixup)); - WORD (TY_u32, (M_SZ segment_fixup)); - WORD (TY_u32, (IMM (fold_flags - (fun f -> - match f with - PF_X -> 0x1L - | PF_W -> 0x2L - | PF_R -> 0x4L) - p_flags))); - WORD (TY_u32, (IMM p_align)); - |] -;; - - -type st_bind = - STB_LOCAL - | STB_GLOBAL - | STB_WEAK -;; - - -type st_type = - STT_NOTYPE - | STT_OBJECT - | STT_FUNC - | STT_SECTION - | STT_FILE -;; - - -(* Special symbol-section indices *) -let shn_UNDEF = 0L;; -let shn_ABS = 0xfff1L;; -let shn_ABS = 0xfff2L;; - - -let symbol - ~(string_table_fixup:fixup) - ~(name_string_fixup:fixup) - ~(sym_target_fixup:fixup option) - ~(st_bind:st_bind) - ~(st_type:st_type) - ~(st_shndx:int64) - : frag = - let st_bind_num = - match st_bind with - STB_LOCAL -> 0L - | STB_GLOBAL -> 1L - | STB_WEAK -> 2L - in - let st_type_num = - match st_type with - STT_NOTYPE -> 0L - | STT_OBJECT -> 1L - | STT_FUNC -> 2L - | STT_SECTION -> 3L - | STT_FILE -> 4L - in - SEQ - [| - WORD (TY_u32, (SUB - ((F_POS name_string_fixup), - (F_POS string_table_fixup)))); - WORD (TY_u32, (match sym_target_fixup with - None -> (IMM 0L) - | Some f -> (M_POS f))); - WORD (TY_u32, (match sym_target_fixup with - None -> (IMM 0L) - | Some f -> (M_SZ f))); - WORD (TY_u8, (* st_info *) - (OR - ((SLL ((IMM st_bind_num), 4)), - (AND ((IMM st_type_num), (IMM 0xfL)))))); - WORD (TY_u8, (IMM 0L)); (* st_other *) - WORD (TY_u16, (IMM st_shndx)); - |] -;; - -type d_tag = - DT_NULL - | DT_NEEDED - | DT_PLTRELSZ - | DT_PLTGOT - | DT_HASH - | DT_STRTAB - | DT_SYMTAB - | DT_RELA - | DT_RELASZ - | DT_RELAENT - | DT_STRSZ - | DT_SYMENT - | DT_INIT - | DT_FINI - | DT_SONAME - | DT_RPATH - | DT_SYMBOLIC - | DT_REL - | DT_RELSZ - | DT_RELENT - | DT_PLTREL - | DT_DEBUG - | DT_TEXTREL - | DT_JMPREL - | DT_BIND_NOW - | DT_INIT_ARRAY - | DT_FINI_ARRAY - | DT_INIT_ARRAYSZ - | DT_FINI_ARRAYSZ - | DT_RUNPATH - | DT_FLAGS - | DT_ENCODING - | DT_PREINIT_ARRAY - | DT_PREINIT_ARRAYSZ -;; - -type elf32_dyn = (d_tag * expr64);; - -let elf32_num_of_dyn_tag tag = - match tag with - DT_NULL -> 0L - | DT_NEEDED -> 1L - | DT_PLTRELSZ -> 2L - | DT_PLTGOT -> 3L - | DT_HASH -> 4L - | DT_STRTAB -> 5L - | DT_SYMTAB -> 6L - | DT_RELA -> 7L - | DT_RELASZ -> 8L - | DT_RELAENT -> 9L - | DT_STRSZ -> 10L - | DT_SYMENT -> 11L - | DT_INIT -> 12L - | DT_FINI -> 13L - | DT_SONAME -> 14L - | DT_RPATH -> 15L - | DT_SYMBOLIC -> 16L - | DT_REL -> 17L - | DT_RELSZ -> 18L - | DT_RELENT -> 19L - | DT_PLTREL -> 20L - | DT_DEBUG -> 21L - | DT_TEXTREL -> 22L - | DT_JMPREL -> 23L - | DT_BIND_NOW -> 24L - | DT_INIT_ARRAY -> 25L - | DT_FINI_ARRAY -> 26L - | DT_INIT_ARRAYSZ -> 27L - | DT_FINI_ARRAYSZ -> 28L - | DT_RUNPATH -> 29L - | DT_FLAGS -> 30L - | DT_ENCODING -> 31L - | DT_PREINIT_ARRAY -> 32L - | DT_PREINIT_ARRAYSZ -> 33L -;; - -let elf32_dyn_frag d = - let (tag, expr) = d in - let tagval = elf32_num_of_dyn_tag tag in - SEQ [| WORD (TY_u32, (IMM tagval)); WORD (TY_u32, expr) |] -;; - -type elf32_386_reloc_type = - R_386_NONE - | R_386_32 - | R_386_PC32 - | R_386_GOT32 - | R_386_PLT32 - | R_386_COPY - | R_386_GLOB_DAT - | R_386_JMP_SLOT - | R_386_RELATIVE - | R_386_GOTOFF - | R_386_GOTPC -;; - - -type elf32_386_rela = - { elf32_386_rela_type: elf32_386_reloc_type; - elf32_386_rela_offset: expr64; - elf32_386_rela_sym: expr64; - elf32_386_rela_addend: expr64 } -;; - -let elf32_386_rela_frag r = - let type_val = - match r.elf32_386_rela_type with - R_386_NONE -> 0L - | R_386_32 -> 1L - | R_386_PC32 -> 2L - | R_386_GOT32 -> 3L - | R_386_PLT32 -> 4L - | R_386_COPY -> 5L - | R_386_GLOB_DAT -> 6L - | R_386_JMP_SLOT -> 7L - | R_386_RELATIVE -> 8L - | R_386_GOTOFF -> 9L - | R_386_GOTPC -> 10L - in - let info_expr = - WORD (TY_u32, - (OR - (SLL ((r.elf32_386_rela_sym), 8), - AND ((IMM 0xffL), (IMM type_val))))) - in - SEQ [| WORD (TY_u32, r.elf32_386_rela_offset); - info_expr; - WORD (TY_u32, r.elf32_386_rela_addend) |] -;; - - -let elf32_linux_x86_file - ~(sess:Session.sess) - ~(crate:Ast.crate) - ~(entry_name:string) - ~(text_frags:(string option, frag) Hashtbl.t) - ~(data_frags:(string option, frag) Hashtbl.t) - ~(bss_frags:(string option, frag) Hashtbl.t) - ~(rodata_frags:(string option, frag) Hashtbl.t) - ~(required_fixups:(string, fixup) Hashtbl.t) - ~(dwarf:Dwarf.debug_records) - ~(sem:Semant.ctxt) - ~(needed_libs:string array) - : frag = - - (* Procedure Linkage Tables (PLTs), Global Offset Tables - * (GOTs), and the relocations that set them up: - * - * The PLT goes in a section called .plt and GOT in a section called - * .got. The portion of the GOT that holds PLT jump slots goes in a - * section called .got.plt. Dynamic relocations for these jump slots go in - * section .rela.plt. - * - * The easiest way to understand the PLT/GOT system is to draw it: - * - * PLT GOT - * +----------------------+ +----------------------+ - * 0| push & 0| - * | jmp *GOT[2] 1| - * | 2| & - * 1| jmp *GOT[3] 3| & <'push 0' in PLT[1]> - * | push 0 4| & <'push 1' in PLT[2]> - * | jmp *PLT[0] 5| & <'push 2' in PLT[3]> - * | - * 2| jmp *GOT[4] - * | push 1 - * | jmp *PLT[0] - * | - * 2| jmp *GOT[5] - * | push 2 - * | jmp *PLT[0] - * - * - * In normal user code, we call PLT entries with a call to a - * PC-relative address, the PLT entry, which itself does an indirect - * jump through a slot in the GOT that it also addresses - * PC-relative. This makes the whole scheme PIC. - * - * The linker fills in the GOT on startup. For the first 3, it uses - * its own thinking. For the remainder it needs to be instructed to - * fill them in with "jump slot relocs", type R_386_JUMP_SLOT, each - * of which says in effect which PLT entry it's to point back to and - * which symbol it's to be resolved to later. These relocs go in the - * section .rela.plt. - *) - - let plt0_fixup = new_fixup "PLT[0]" in - let got_prefix = SEQ [| WORD (TY_u32, (IMM 0L)); - WORD (TY_u32, (IMM 0L)); - WORD (TY_u32, (IMM 0L)); |] - in - - let got_cell reg i = - let got_entry_off = Int64.of_int (i*4) in - let got_entry_mem = Il.RegIn (reg, (Some (Asm.IMM got_entry_off))) in - Il.Mem (got_entry_mem, Il.ScalarTy (Il.AddrTy Il.CodeTy)) - in - - let got_code_cell reg i = - Il.CodePtr (Il.Cell (got_cell reg i)) - in - - let plt0_frag = - let reg = Il.Hreg X86.eax in - let e = X86.new_emitter_without_vregs () in - Il.emit e (Il.Push (Il.Cell (got_cell reg 1))); - Il.emit e (Il.jmp Il.JMP (got_code_cell reg 2)); - Il.emit e Il.Nop; - Il.emit e Il.Nop; - Il.emit e Il.Nop; - Il.emit e Il.Nop; - DEF (plt0_fixup, (X86.frags_of_emitted_quads sess e)) - in - - (* - * The existence of the GOT/PLT mish-mash causes, therefore, the - * following new sections: - * - * .plt - the PLT itself, in the r/x text segment - * .got.plt - the PLT-used portion of the GOT, in the r/w segment - * .rela.plt - the dynamic relocs for the GOT-PLT, in the r/x segment - * - * In addition, because we're starting up a dynamically linked executable, - * we have to have several more sections! - * - * .interp - the read-only section that names ld.so - * .dynsym - symbols named by the PLT/GOT entries, r/x segment - * .dynstr - string-names used in those symbols, r/x segment - * .hash - hashtable in which to look these up, r/x segment - * .dynamic - the machine-readable description of the dynamic - * linkage requirements of this elf file, in the - * r/w _DYNAMIC segment - * - * The Dynamic section contains a sequence of 2-word records of type - * d_tag. - * - *) - - (* There are 17 official section headers in the file we're making: *) - (* *) - (* section 0: *) - (* *) - (* section 1: .interp (segment 1: R+X, INTERP) *) - (* *) - (* section 2: .text (segment 2: R+X, LOAD) *) - (* section 3: .rodata ... *) - (* section 4: .dynsym ... *) - (* section 5: .dynstr ... *) - (* section 6: .hash ... *) - (* section 7: .plt ... *) - (* section 8: .got ... *) - (* section 9: .rela.plt ... *) - (* *) - (* section 10: .data (segment 3: R+W, LOAD) *) - (* section 11: .bss ... *) - (* *) - (* section 12: .dynamic (segment 4: R+W, DYNAMIC) *) - (* *) - (* section 13: .shstrtab (not in a segment) *) - (* section 14: .debug_aranges (segment 2: cont'd) *) - (* section 15: .debug_pubnames ... *) - (* section 14: .debug_info ... *) - (* section 15: .debug_abbrev ... *) - (* section 14: .debug_line ... *) - (* section 15: .debug_frame ... *) - (* section 16: .note..rust (segment 5: NOTE) *) - - let sname s = - new_fixup (Printf.sprintf "string name of '%s' section" s) - in - let null_section_name_fixup = sname "" in - let interp_section_name_fixup = sname ".interp"in - let text_section_name_fixup = sname ".text" in - let rodata_section_name_fixup = sname ".rodata" in - let dynsym_section_name_fixup = sname ".dynsym" in - let dynstr_section_name_fixup = sname ".dynstr" in - let hash_section_name_fixup = sname ".hash" in - let plt_section_name_fixup = sname ".plt" in - let got_plt_section_name_fixup = sname ".got.plt" in - let rela_plt_section_name_fixup = sname ".rela.plt" in - let data_section_name_fixup = sname ".data" in - let bss_section_name_fixup = sname ".bss" in - let dynamic_section_name_fixup = sname ".dynamic" in - let shstrtab_section_name_fixup = sname ".shstrtab" in - let debug_aranges_section_name_fixup = sname ".debug_aranges" in - let debug_pubnames_section_name_fixup = sname ".debug_pubnames" in - let debug_info_section_name_fixup = sname ".debug_info" in - let debug_abbrev_section_name_fixup = sname ".debug_abbrev" in - let debug_line_section_name_fixup = sname ".debug_line" in - let debug_frame_section_name_fixup = sname ".debug_frame" in - let note_rust_section_name_fixup = sname ".note.rust" in - - (* let interpndx = 1L in *) (* Section index of .interp *) - let textndx = 2L in (* Section index of .text *) - let rodatandx = 3L in (* Section index of .rodata *) - let dynsymndx = 4L in (* Section index of .dynsym *) - let dynstrndx = 5L in (* Section index of .dynstr *) - (* let hashndx = 6L in *) (* Section index of .hash *) - let pltndx = 7L in (* Section index of .plt *) - (* let gotpltndx = 8L in *) (* Section index of .got.plt *) - (* let relapltndx = 9L in *) (* Section index of .rela.plt *) - let datandx = 10L in (* Section index of .data *) - let bssndx = 11L in (* Section index of .bss *) - (* let dynamicndx = 12L in *) (* Section index of .dynamic *) - let shstrtabndx = 13L in (* Section index of .shstrtab *) - - let section_header_table_fixup = new_fixup ".section header table" in - let interp_section_fixup = new_fixup ".interp section" in - let text_section_fixup = new_fixup ".text section" in - let rodata_section_fixup = new_fixup ".rodata section" in - let dynsym_section_fixup = new_fixup ".dynsym section" in - let dynstr_section_fixup = new_fixup ".dynstr section" in - let hash_section_fixup = new_fixup ".hash section" in - let plt_section_fixup = new_fixup ".plt section" in - let got_plt_section_fixup = new_fixup ".got.plt section" in - let rela_plt_section_fixup = new_fixup ".rela.plt section" in - let data_section_fixup = new_fixup ".data section" in - let bss_section_fixup = new_fixup ".bss section" in - let dynamic_section_fixup = new_fixup ".dynamic section" in - let shstrtab_section_fixup = new_fixup ".shstrtab section" in - let note_rust_section_fixup = new_fixup ".shstrtab section" in - - let shstrtab_section = - SEQ - [| - DEF (null_section_name_fixup, ZSTRING ""); - DEF (interp_section_name_fixup, ZSTRING ".interp"); - DEF (text_section_name_fixup, ZSTRING ".text"); - DEF (rodata_section_name_fixup, ZSTRING ".rodata"); - DEF (dynsym_section_name_fixup, ZSTRING ".dynsym"); - DEF (dynstr_section_name_fixup, ZSTRING ".dynstr"); - DEF (hash_section_name_fixup, ZSTRING ".hash"); - DEF (plt_section_name_fixup, ZSTRING ".plt"); - DEF (got_plt_section_name_fixup, ZSTRING ".got.plt"); - DEF (rela_plt_section_name_fixup, ZSTRING ".rela.plt"); - DEF (data_section_name_fixup, ZSTRING ".data"); - DEF (bss_section_name_fixup, ZSTRING ".bss"); - DEF (dynamic_section_name_fixup, ZSTRING ".dynamic"); - DEF (shstrtab_section_name_fixup, ZSTRING ".shstrtab"); - DEF (debug_aranges_section_name_fixup, ZSTRING ".debug_aranges"); - DEF (debug_pubnames_section_name_fixup, ZSTRING ".debug_pubnames"); - DEF (debug_info_section_name_fixup, ZSTRING ".debug_info"); - DEF (debug_abbrev_section_name_fixup, ZSTRING ".debug_abbrev"); - DEF (debug_line_section_name_fixup, ZSTRING ".debug_line"); - DEF (debug_frame_section_name_fixup, ZSTRING ".debug_frame"); - DEF (note_rust_section_name_fixup, ZSTRING ".note.rust"); - |] - in - - let section_headers = - [| - (* *) - (section_header - ~shstring_table_fixup: shstrtab_section_fixup - ~shname_string_fixup: null_section_name_fixup - ~section_fixup: None - ~sh_addralign: 0L - SHT_NULL); - - (* .interp *) - (section_header - ~shstring_table_fixup: shstrtab_section_fixup - ~shname_string_fixup: interp_section_name_fixup - ~sh_flags: [ SHF_ALLOC ] - ~section_fixup: (Some interp_section_fixup) - SHT_PROGBITS); - - (* .text *) - (section_header - ~shstring_table_fixup: shstrtab_section_fixup - ~shname_string_fixup: text_section_name_fixup - ~sh_flags: [ SHF_ALLOC; SHF_EXECINSTR ] - ~section_fixup: (Some text_section_fixup) - ~sh_addralign: 32L - SHT_PROGBITS); - - (* .rodata *) - (section_header - ~shstring_table_fixup: shstrtab_section_fixup - ~shname_string_fixup: rodata_section_name_fixup - ~sh_flags: [ SHF_ALLOC ] - ~section_fixup: (Some rodata_section_fixup) - ~sh_addralign: 32L - SHT_PROGBITS); - - (* .dynsym *) - (section_header - ~shstring_table_fixup: shstrtab_section_fixup - ~shname_string_fixup: dynsym_section_name_fixup - ~sh_flags: [ SHF_ALLOC ] - ~section_fixup: (Some dynsym_section_fixup) - ~sh_addralign: 4L - ~sh_entsize: elf32_symsize - ~sh_link: (Some dynstrndx) - SHT_DYNSYM); - - (* .dynstr *) - (section_header - ~shstring_table_fixup: shstrtab_section_fixup - ~shname_string_fixup: dynstr_section_name_fixup - ~sh_flags: [ SHF_ALLOC ] - ~section_fixup: (Some dynstr_section_fixup) - SHT_STRTAB); - - (* .hash *) - (section_header - ~shstring_table_fixup: shstrtab_section_fixup - ~shname_string_fixup: hash_section_name_fixup - ~sh_flags: [ SHF_ALLOC ] - ~section_fixup: (Some hash_section_fixup) - ~sh_addralign: 4L - ~sh_entsize: 4L - SHT_PROGBITS); - - (* .plt *) - (section_header - ~shstring_table_fixup: shstrtab_section_fixup - ~shname_string_fixup: plt_section_name_fixup - ~sh_flags: [ SHF_ALLOC; SHF_EXECINSTR ] - ~section_fixup: (Some plt_section_fixup) - ~sh_addralign: 4L - SHT_PROGBITS); - - (* .got.plt *) - (section_header - ~shstring_table_fixup: shstrtab_section_fixup - ~shname_string_fixup: got_plt_section_name_fixup - ~sh_flags: [ SHF_ALLOC; SHF_WRITE ] - ~section_fixup: (Some got_plt_section_fixup) - ~sh_addralign: 4L - SHT_PROGBITS); - - (* .rela.plt *) - (section_header - ~shstring_table_fixup: shstrtab_section_fixup - ~shname_string_fixup: rela_plt_section_name_fixup - ~sh_flags: [ SHF_ALLOC ] - ~section_fixup: (Some rela_plt_section_fixup) - ~sh_addralign: 4L - ~sh_entsize: elf32_rela_entsz - ~sh_link: (Some dynsymndx) - ~sh_info: (Some pltndx) - SHT_RELA); - - (* .data *) - (section_header - ~shstring_table_fixup: shstrtab_section_fixup - ~shname_string_fixup: data_section_name_fixup - ~sh_flags: [ SHF_ALLOC; SHF_WRITE ] - ~section_fixup: (Some data_section_fixup) - ~sh_addralign: 32L - SHT_PROGBITS); - - (* .bss *) - (section_header - ~shstring_table_fixup: shstrtab_section_fixup - ~shname_string_fixup: bss_section_name_fixup - ~sh_flags: [ SHF_ALLOC; SHF_WRITE ] - ~section_fixup: (Some bss_section_fixup) - ~sh_addralign: 32L - SHT_NOBITS); - - (* .dynamic *) - (section_header - ~shstring_table_fixup: shstrtab_section_fixup - ~shname_string_fixup: dynamic_section_name_fixup - ~sh_flags: [ SHF_ALLOC; SHF_WRITE ] - ~section_fixup: (Some dynamic_section_fixup) - ~sh_addralign: 8L - ~sh_link: (Some dynstrndx) - SHT_DYNAMIC); - - (* .shstrtab *) - (section_header - ~shstring_table_fixup: shstrtab_section_fixup - ~shname_string_fixup: shstrtab_section_name_fixup - ~section_fixup: (Some shstrtab_section_fixup) - SHT_STRTAB); - -(* - FIXME: uncomment the dwarf section headers as you make use of them; - recent gdb versions have got fussier about parsing dwarf and don't - like seeing junk there. -*) - - (* .debug_aranges *) -(* - - (section_header - ~shstring_table_fixup: shstrtab_section_fixup - ~shname_string_fixup: debug_aranges_section_name_fixup - ~section_fixup: (Some sem.Semant.ctxt_debug_aranges_fixup) - ~sh_addralign: 8L - ~zero_sh_addr: true - SHT_PROGBITS); -*) - (* .debug_pubnames *) -(* - (section_header - ~shstring_table_fixup: shstrtab_section_fixup - ~shname_string_fixup: debug_pubnames_section_name_fixup - ~section_fixup: (Some sem.Semant.ctxt_debug_pubnames_fixup) - ~zero_sh_addr: true - SHT_PROGBITS); -*) - - (* .debug_info *) - (section_header - ~shstring_table_fixup: shstrtab_section_fixup - ~shname_string_fixup: debug_info_section_name_fixup - ~section_fixup: (Some sem.Semant.ctxt_debug_info_fixup) - ~zero_sh_addr: true - SHT_PROGBITS); - - (* .debug_abbrev *) - (section_header - ~shstring_table_fixup: shstrtab_section_fixup - ~shname_string_fixup: debug_abbrev_section_name_fixup - ~section_fixup: (Some sem.Semant.ctxt_debug_abbrev_fixup) - ~zero_sh_addr: true - SHT_PROGBITS); - - (* .debug_line *) -(* - (section_header - ~shstring_table_fixup: shstrtab_section_fixup - ~shname_string_fixup: debug_line_section_name_fixup - ~section_fixup: (Some sem.Semant.ctxt_debug_line_fixup) - ~zero_sh_addr: true - SHT_PROGBITS); -*) - - (* .debug_frame *) -(* - (section_header - ~shstring_table_fixup: shstrtab_section_fixup - ~shname_string_fixup: debug_frame_section_name_fixup - ~section_fixup: (Some sem.Semant.ctxt_debug_frame_fixup) - ~sh_addralign: 4L - ~zero_sh_addr: true - SHT_PROGBITS); -*) - - (* .note.rust *) - (section_header - ~shstring_table_fixup: shstrtab_section_fixup - ~shname_string_fixup: note_rust_section_name_fixup - ~section_fixup: (Some note_rust_section_fixup) - SHT_NOTE); - - |] - in - let section_header_table = SEQ section_headers in - - - (* There are 6 official program headers in the file we're making: *) - (* segment 0: RX / PHDR *) - (* segment 1: R / INTERP *) - (* segment 2: RX / LOAD *) - (* segment 3: RW / LOAD *) - (* segment 4: RW / DYNAMIC *) - (* segment 5: R *) - - let program_header_table_fixup = new_fixup "program header table" in - let segment_0_fixup = new_fixup "segment 0" in - let segment_1_fixup = new_fixup "segment 1" in - let segment_2_fixup = new_fixup "segment 2" in - let segment_3_fixup = new_fixup "segment 3" in - let segment_4_fixup = new_fixup "segment 4" in - let segment_5_fixup = new_fixup "segment 5" in - - let segment_0_align = 4 in - let segment_1_align = 1 in - let segment_2_align = 0x1000 in - let segment_3_align = 0x1000 in - let segment_4_align = 0x1000 in - let segment_5_align = 1 in - - let program_headers = [| - (program_header - ~p_type: PT_PHDR - ~segment_fixup: segment_0_fixup - ~p_flags: [ PF_R; PF_X ] - ~p_align: (Int64.of_int segment_0_align)); - (program_header - ~p_type: PT_INTERP - ~segment_fixup: segment_1_fixup - ~p_flags: [ PF_R ] - ~p_align: (Int64.of_int segment_1_align)); - (program_header - ~p_type: PT_LOAD - ~segment_fixup: segment_2_fixup - ~p_flags: [ PF_R; PF_X ] - ~p_align: (Int64.of_int segment_2_align)); - (program_header - ~p_type: PT_LOAD - ~segment_fixup: segment_3_fixup - ~p_flags: [ PF_R; PF_W ] - ~p_align: (Int64.of_int segment_3_align)); - (program_header - ~p_type: PT_DYNAMIC - ~segment_fixup: segment_4_fixup - ~p_flags: [ PF_R; PF_W ] - ~p_align: (Int64.of_int segment_4_align)); - (program_header - ~p_type: PT_NOTE - ~segment_fixup: segment_5_fixup - ~p_flags: [ PF_R;] - ~p_align: (Int64.of_int segment_5_align)); - |] - in - let program_header_table = SEQ program_headers in - - let e_entry_fixup = new_fixup "entry symbol" in - - let elf_header = - elf32_header - ~sess - ~ei_data: ELFDATA2LSB - ~e_type: (if sess.Session.sess_library_mode then ET_DYN else ET_EXEC) - ~e_machine: EM_386 - ~e_version: EV_CURRENT - - ~e_entry_fixup: e_entry_fixup - ~e_phoff_fixup: program_header_table_fixup - ~e_shoff_fixup: section_header_table_fixup - ~e_phnum: (Int64.of_int (Array.length program_headers)) - ~e_shnum: (Int64.of_int (Array.length section_headers)) - ~e_shstrndx: shstrtabndx - in - - let n_syms = ref 1 in (* The empty symbol, implicit. *) - - let data_sym name st_bind fixup = - let name_fixup = new_fixup ("data symbol name fixup: '" ^ name ^ "'") in - let strtab_entry = DEF (name_fixup, ZSTRING name) in - let symtab_entry = - symbol - ~string_table_fixup: dynstr_section_fixup - ~name_string_fixup: name_fixup - ~sym_target_fixup: (Some fixup) - ~st_bind - ~st_type: STT_OBJECT - ~st_shndx: datandx - in - incr n_syms; - (strtab_entry, symtab_entry) - in - - let bss_sym name st_bind fixup = - let name_fixup = new_fixup ("bss symbol name fixup: '" ^ name ^ "'") in - let strtab_entry = DEF (name_fixup, ZSTRING name) in - let symtab_entry = - symbol - ~string_table_fixup: dynstr_section_fixup - ~name_string_fixup: name_fixup - ~sym_target_fixup: (Some fixup) - ~st_bind - ~st_type: STT_OBJECT - ~st_shndx: bssndx - in - incr n_syms; - (strtab_entry, symtab_entry) - in - - let rodata_sym name st_bind fixup = - let name_fixup = new_fixup ("rodata symbol name fixup: '" ^ name ^ "'") in - let strtab_entry = DEF (name_fixup, ZSTRING name) in - let symtab_entry = - symbol - ~string_table_fixup: dynstr_section_fixup - ~name_string_fixup: name_fixup - ~sym_target_fixup: (Some fixup) - ~st_bind - ~st_type: STT_OBJECT - ~st_shndx: rodatandx - in - incr n_syms; - (strtab_entry, symtab_entry) - in - - let text_sym name st_bind fixup = - let name_fixup = new_fixup ("text symbol name fixup: '" ^ name ^ "'") in - let strtab_frag = DEF (name_fixup, ZSTRING name) in - let symtab_frag = - symbol - ~string_table_fixup: dynstr_section_fixup - ~name_string_fixup: name_fixup - ~sym_target_fixup: (Some fixup) - ~st_bind: st_bind - ~st_type: STT_FUNC - ~st_shndx: textndx - in - incr n_syms; - (strtab_frag, symtab_frag) - in - - let require_sym name st_bind _(*fixup*) = - let name_fixup = - new_fixup ("require symbol name fixup: '" ^ name ^ "'") - in - let strtab_frag = DEF (name_fixup, ZSTRING name) in - let symtab_frag = - symbol - ~string_table_fixup: dynstr_section_fixup - ~name_string_fixup: name_fixup - ~sym_target_fixup: None - ~st_bind - ~st_type: STT_FUNC - ~st_shndx: shn_UNDEF - in - incr n_syms; - (strtab_frag, symtab_frag) - in - - let frags_of_symbol sym_emitter st_bind symname_opt symbody x = - let (strtab_frags, symtab_frags, body_frags) = x in - let (strtab_frag, symtab_frag, body_frag) = - match symname_opt with - None -> (MARK, MARK, symbody) - | Some symname -> - let body_fixup = - new_fixup ("symbol body fixup: '" ^ symname ^ "'") - in - let body = - if symname = entry_name - then DEF (e_entry_fixup, DEF (body_fixup, symbody)) - else DEF (body_fixup, symbody) - in - let (str, sym) = sym_emitter symname st_bind body_fixup in - (str, sym, body) - in - ((strtab_frag :: strtab_frags), - (symtab_frag :: symtab_frags), - (body_frag :: body_frags)) - in - - let frags_of_require_symbol sym_emitter st_bind symname plt_entry_fixup x = - let (i, strtab_frags, symtab_frags, - plt_frags, got_plt_frags, rela_plt_frags) = x in - let (strtab_frag, symtab_frag) = sym_emitter symname st_bind None in - let e = X86.new_emitter_without_vregs () in - let jump_slot_fixup = new_fixup ("jump slot #" ^ string_of_int i) in - let jump_slot_initial_target_fixup = - new_fixup ("jump slot #" ^ string_of_int i ^ " initial target") in - - (* You may notice this PLT entry doesn't look like either of the - * types of "normal" PLT entries outlined in the ELF manual. It is, - * however, just what you get when you combine a PIC PLT entry with - * inline calls to the horrible __i686.get_pc_thunk.ax kludge used - * on x86 to support entering PIC PLTs. We're just doing it *in* - * the PLT entries rather than infecting all the callers with the - * obligation of having the GOT address in a register on - * PLT-entry. - *) - - let plt_frag = - let (reg, _, _) = X86.get_next_pc_thunk in - - Il.emit_full e (Some plt_entry_fixup) Il.Dead; - - Abi.load_fixup_addr e reg got_plt_section_fixup Il.CodeTy; - - Il.emit e (Il.jmp Il.JMP (got_code_cell reg (2+i))); - - Il.emit_full e (Some jump_slot_initial_target_fixup) - (Il.Push (X86.immi (Int64.of_int i))); - - Il.emit e (Il.jmp Il.JMP (Il.direct_code_ptr plt0_fixup)); - X86.frags_of_emitted_quads sess e - in - let got_plt_frag = - DEF (jump_slot_fixup, - WORD (TY_u32, (M_POS jump_slot_initial_target_fixup))) - in - let rela_plt = - { elf32_386_rela_type = R_386_JMP_SLOT; - elf32_386_rela_offset = (M_POS jump_slot_fixup); - elf32_386_rela_sym = (IMM (Int64.of_int i)); - elf32_386_rela_addend = (IMM 0L) } - in - let rela_plt_frag = elf32_386_rela_frag rela_plt in - (i+1, - (strtab_frag :: strtab_frags), - (symtab_frag :: symtab_frags), - (plt_frag :: plt_frags), - (got_plt_frag :: got_plt_frags), - (rela_plt_frag :: rela_plt_frags)) - in - - (* Emit text export symbols. *) - let (global_text_strtab_frags, global_text_symtab_frags) = - match htab_search sem.Semant.ctxt_native_provided SEG_text with - None -> ([], []) - | Some etab -> - Hashtbl.fold - begin - fun name fix x -> - let (strtab_frags, symtab_frags) = x in - let (str, sym) = text_sym name STB_GLOBAL fix in - (str :: strtab_frags, - sym :: symtab_frags) - end - etab - ([],[]) - in - - (* Emit text fragments (possibly named). *) - let (global_text_strtab_frags, - global_text_symtab_frags, - text_body_frags) = - Hashtbl.fold - (frags_of_symbol text_sym STB_GLOBAL) - text_frags - (global_text_strtab_frags, global_text_symtab_frags, []) - in - - let (local_text_strtab_frags, - local_text_symtab_frags) = - - let symbol_frags_of_code _ code accum = - let (strtab_frags, symtab_frags) = accum in - let fix = code.Semant.code_fixup in - let (strtab_frag, symtab_frag) = - text_sym fix.fixup_name STB_LOCAL fix - in - (strtab_frag :: strtab_frags, - symtab_frag :: symtab_frags) - in - - let symbol_frags_of_glue_code g code accum = - let (strtab_frags, symtab_frags) = accum in - let fix = code.Semant.code_fixup in - let (strtab_frag, symtab_frag) = - text_sym (Semant.glue_str sem g) STB_LOCAL fix - in - (strtab_frag :: strtab_frags, - symtab_frag :: symtab_frags) - in - - let item_str_frags, item_sym_frags = - Hashtbl.fold symbol_frags_of_code - sem.Semant.ctxt_all_item_code ([], []) - in - let glue_str_frags, glue_sym_frags = - Hashtbl.fold symbol_frags_of_glue_code - sem.Semant.ctxt_glue_code ([], []) - in - (item_str_frags @ glue_str_frags, - item_sym_frags @ glue_sym_frags) - in - - (* Emit rodata export symbols. *) - let (rodata_strtab_frags, rodata_symtab_frags) = - match htab_search sem.Semant.ctxt_native_provided SEG_data with - None -> ([], []) - | Some etab -> - Hashtbl.fold - begin - fun name fix x -> - let (strtab_frags, symtab_frags) = x in - let (str, sym) = rodata_sym name STB_GLOBAL fix in - (str :: strtab_frags, - sym :: symtab_frags) - end - etab - ([],[]) - in - - (* Emit rodata fragments (possibly named). *) - let (rodata_strtab_frags, - rodata_symtab_frags, - rodata_body_frags) = - Hashtbl.fold - (frags_of_symbol rodata_sym STB_GLOBAL) - rodata_frags - (rodata_strtab_frags, rodata_symtab_frags, []) - in - - - let (data_strtab_frags, - data_symtab_frags, - data_body_frags) = - Hashtbl.fold (frags_of_symbol data_sym STB_GLOBAL) data_frags ([],[],[]) - in - - let (bss_strtab_frags, - bss_symtab_frags, - bss_body_frags) = - Hashtbl.fold (frags_of_symbol bss_sym STB_GLOBAL) bss_frags ([],[],[]) - in - - let (_, - require_strtab_frags, - require_symtab_frags, - plt_frags, - got_plt_frags, - rela_plt_frags) = - Hashtbl.fold (frags_of_require_symbol require_sym STB_GLOBAL) - required_fixups - (1,[],[],[plt0_frag],[got_prefix],[]) - in - let require_symtab_frags = List.rev require_symtab_frags in - let plt_frags = List.rev plt_frags in - let got_plt_frags = List.rev got_plt_frags in - let rela_plt_frags = List.rev rela_plt_frags in - - let dynamic_needed_strtab_frags = - Array.make (Array.length needed_libs) MARK - in - - let dynamic_frags = - let dynamic_needed_frags = Array.make (Array.length needed_libs) MARK in - for i = 0 to (Array.length needed_libs) - 1 do - let fixup = - new_fixup ("needed library name fixup: " ^ needed_libs.(i)) - in - dynamic_needed_frags.(i) <- - elf32_dyn_frag (DT_NEEDED, SUB (M_POS fixup, - M_POS dynstr_section_fixup)); - dynamic_needed_strtab_frags.(i) <- - DEF (fixup, ZSTRING needed_libs.(i)) - done; - (SEQ [| - SEQ dynamic_needed_frags; - elf32_dyn_frag (DT_STRTAB, M_POS dynstr_section_fixup); - elf32_dyn_frag (DT_STRSZ, M_SZ dynstr_section_fixup); - - elf32_dyn_frag (DT_SYMTAB, M_POS dynsym_section_fixup); - elf32_dyn_frag (DT_SYMENT, IMM elf32_symsize); - - elf32_dyn_frag (DT_HASH, M_POS hash_section_fixup); - elf32_dyn_frag (DT_PLTGOT, M_POS got_plt_section_fixup); - - elf32_dyn_frag (DT_PLTREL, IMM (elf32_num_of_dyn_tag DT_RELA)); - elf32_dyn_frag (DT_PLTRELSZ, M_SZ rela_plt_section_fixup); - elf32_dyn_frag (DT_JMPREL, M_POS rela_plt_section_fixup); - - elf32_dyn_frag (DT_NULL, IMM 0L) - |]) - in - - let null_strtab_fixup = new_fixup "null dynstrtab entry" in - let null_strtab_frag = DEF (null_strtab_fixup, ZSTRING "") in - let null_symtab_frag = (symbol - ~string_table_fixup: dynstr_section_fixup - ~name_string_fixup: null_strtab_fixup - ~sym_target_fixup: None - ~st_bind: STB_LOCAL - ~st_type: STT_NOTYPE - ~st_shndx: 0L) in - - let dynsym_frags = (null_symtab_frag :: - (require_symtab_frags @ - global_text_symtab_frags @ - local_text_symtab_frags @ - rodata_symtab_frags @ - data_symtab_frags @ - bss_symtab_frags)) - in - - let dynstr_frags = (null_strtab_frag :: - (require_strtab_frags @ - global_text_strtab_frags @ - local_text_strtab_frags @ - rodata_strtab_frags @ - data_strtab_frags @ - bss_strtab_frags @ - (Array.to_list dynamic_needed_strtab_frags))) - in - - let interp_section = - - DEF (interp_section_fixup, ZSTRING - (if sess.Session.sess_targ = FreeBSD_x86_elf - then "/libexec/ld-elf.so.1" - else "/lib/ld-linux.so.2")) - in - - let text_section = - DEF (text_section_fixup, - SEQ (Array.of_list text_body_frags)) - in - let rodata_section = - DEF (rodata_section_fixup, - SEQ (Array.of_list rodata_body_frags)) - in - let data_section = - DEF (data_section_fixup, - SEQ (Array.of_list data_body_frags)) - in - let bss_section = - DEF (bss_section_fixup, - SEQ (Array.of_list bss_body_frags)) - in - let dynsym_section = - DEF (dynsym_section_fixup, - SEQ (Array.of_list dynsym_frags)) - in - let dynstr_section = - DEF (dynstr_section_fixup, - SEQ (Array.of_list dynstr_frags)) - in - - let hash_section = - let n_syms = !n_syms in - - DEF (hash_section_fixup, - (* Worst hashtable ever: one chain. *) - SEQ [| - WORD (TY_u32, IMM 1L); (* nbucket *) - WORD (TY_u32, (* nchain *) - IMM (Int64.of_int n_syms)); - WORD (TY_u32, IMM 1L); (* bucket 0 => symbol 1. *) - SEQ - begin - Array.init - n_syms - (fun i -> - let next = (* chain[i] => if last then 0 else i+1 *) - if i > 0 && i < (n_syms-1) - then Int64.of_int (i+1) - else 0L - in - WORD (TY_u32, IMM next)) - end; - |]) - in - - let plt_section = - DEF (plt_section_fixup, - SEQ (Array.of_list plt_frags)) - in - - let got_plt_section = - DEF (got_plt_section_fixup, - SEQ (Array.of_list got_plt_frags)) - in - - let rela_plt_section = - DEF (rela_plt_section_fixup, - SEQ (Array.of_list rela_plt_frags)) - in - - let dynamic_section = - DEF (dynamic_section_fixup, dynamic_frags) - in - - let note_rust_section = - DEF (note_rust_section_fixup, - (Asm.note_rust_frags crate.node.Ast.crate_meta)) - in - - - let page_alignment = 0x1000 in - - let align_both i = - ALIGN_FILE (page_alignment, - (ALIGN_MEM (page_alignment, i))) - in - - let def_aligned f i = - align_both - (SEQ [| DEF(f,i); - (align_both MARK)|]) - in - - let debug_aranges_section = - def_aligned - sem.Semant.ctxt_debug_aranges_fixup - dwarf.Dwarf.debug_aranges - in - let debug_pubnames_section = - def_aligned - sem.Semant.ctxt_debug_pubnames_fixup - dwarf.Dwarf.debug_pubnames - in - let debug_info_section = - def_aligned - sem.Semant.ctxt_debug_info_fixup - dwarf.Dwarf.debug_info - in - let debug_abbrev_section = - def_aligned - sem.Semant.ctxt_debug_abbrev_fixup - dwarf.Dwarf.debug_abbrev - in - let debug_line_section = - def_aligned - sem.Semant.ctxt_debug_line_fixup - dwarf.Dwarf.debug_line - in - let debug_frame_section = - def_aligned sem.Semant.ctxt_debug_frame_fixup dwarf.Dwarf.debug_frame - in - - let load_address = 0x0804_8000L in - - SEQ - [| - MEMPOS load_address; - ALIGN_FILE - (segment_2_align, - DEF - (segment_2_fixup, - SEQ - [| - DEF (sem.Semant.ctxt_image_base_fixup, MARK); - elf_header; - ALIGN_FILE - (segment_0_align, - DEF - (segment_0_fixup, - SEQ - [| - DEF (program_header_table_fixup, - program_header_table); - |])); - ALIGN_FILE - (segment_1_align, - DEF (segment_1_fixup, interp_section)); - text_section; - rodata_section; - dynsym_section; - dynstr_section; - hash_section; - plt_section; - rela_plt_section; - debug_aranges_section; - debug_pubnames_section; - debug_info_section; - debug_abbrev_section; - debug_line_section; - debug_frame_section; - |])); - ALIGN_FILE - (segment_3_align, - DEF - (segment_3_fixup, - SEQ - [| - data_section; - got_plt_section; - bss_section; - ALIGN_FILE - (segment_4_align, - DEF (segment_4_fixup, - dynamic_section)); - ALIGN_FILE - (segment_5_align, - DEF (segment_5_fixup, - note_rust_section)); - |])); - DEF (shstrtab_section_fixup, - shstrtab_section); - DEF (section_header_table_fixup, - section_header_table); - |] -;; - -let emit_file - (sess:Session.sess) - (crate:Ast.crate) - (code:Asm.frag) - (data:Asm.frag) - (sem:Semant.ctxt) - (dwarf:Dwarf.debug_records) - : unit = - - let text_frags = Hashtbl.create 4 in - let rodata_frags = Hashtbl.create 4 in - let data_frags = Hashtbl.create 4 in - let bss_frags = Hashtbl.create 4 in - let required_fixups = Hashtbl.create 4 in - - (* - * Startup on elf-linux is more complex than in win32. It's - * thankfully documented in some detail around the net. - * - * - The elf entry address is for _start. - * - * - _start pushes: - * - * eax (should be zero) - * esp (holding the kernel-provided stack end) - * edx (address of _rtld_fini) - * address of _fini - * address of _init - * ecx (argv) - * esi (argc) - * address of main - * - * and then calls __libc_start_main@plt. - * - * - This means any sensible binary has a PLT. Fun. So - * We call into the PLT, which itself is just a bunch - * of indirect jumps through slots in the GOT, and wind - * up in __libc_start_main. Which calls _init, then - * essentially exit(main(argc,argv)). - *) - - - let init_fixup = new_fixup "_init function entry" in - let fini_fixup = new_fixup "_fini function entry" in - let (start_fixup, rust_start_fixup) = - if sess.Session.sess_library_mode - then (None, None) - else (Some (new_fixup "start function entry"), - Some (Semant.require_native sem REQUIRED_LIB_rustrt "rust_start")) - in - let libc_start_main_fixup = new_fixup "__libc_start_main@plt stub" in - - let start_fn _ = - let start_fixup = - match start_fixup with - None -> bug () "missing start fixup in non-library mode" - | Some s -> s - in - let e = X86.new_emitter_without_vregs () in - let push_r32 r = Il.emit e - (Il.Push (Il.Cell (Il.Reg (Il.Hreg r, Il.ValTy Il.Bits32)))) - in - let push_pos32 = X86.push_pos32 e in - - Il.emit e (Il.unary Il.UMOV (X86.rc X86.ebp) (X86.immi 0L)); - Il.emit e (Il.Pop (X86.rc X86.esi)); - Il.emit e (Il.unary Il.UMOV (X86.rc X86.ecx) (X86.ro X86.esp)); - Il.emit e (Il.binary Il.AND - (X86.rc X86.esp) (X86.ro X86.esp) - (X86.immi 0xfffffffffffffff0L)); - - push_r32 X86.eax; - push_r32 X86.esp; - push_r32 X86.edx; - push_pos32 fini_fixup; - push_pos32 init_fixup; - push_r32 X86.ecx; - push_r32 X86.esi; - push_pos32 start_fixup; - Il.emit e (Il.call - (Il.Reg (Il.Hreg X86.eax, Il.ValTy Il.Bits32)) - (Il.direct_code_ptr libc_start_main_fixup)); - X86.frags_of_emitted_quads sess e - in - - let do_nothing_fn _ = - let e = X86.new_emitter_without_vregs () in - Il.emit e Il.Ret; - X86.frags_of_emitted_quads sess e - in - - let main_fn _ = - match (start_fixup, rust_start_fixup, sem.Semant.ctxt_main_fn_fixup) with - (None, _, _) - | (_, None, _) - | (_, _, None) -> MARK - | (Some start_fixup, - Some rust_start_fixup, - Some main_fn_fixup) -> - let e = X86.new_emitter_without_vregs () in - X86.objfile_start e - ~start_fixup - ~rust_start_fixup - ~main_fn_fixup - ~crate_fixup: sem.Semant.ctxt_crate_fixup - ~indirect_start: false; - X86.frags_of_emitted_quads sess e - in - - let needed_libs = - [| - if sess.Session.sess_targ = FreeBSD_x86_elf - then "libc.so.7" - else "libc.so.6"; - "librustrt.so" - |] - in - - let _ = - if not sess.Session.sess_library_mode - then - begin - htab_put text_frags (Some "_start") (start_fn()); - htab_put text_frags (Some "_init") - (DEF (init_fixup, do_nothing_fn())); - htab_put text_frags (Some "_fini") - (DEF (fini_fixup, do_nothing_fn())); - htab_put text_frags (Some "main") (main_fn ()); - htab_put required_fixups "__libc_start_main" libc_start_main_fixup; - end; - htab_put text_frags None code; - htab_put rodata_frags None data; - - if sess.Session.sess_targ = FreeBSD_x86_elf - then - (* - * FreeBSD wants some extra symbols in .bss so its libc can fill - * them in, I think. - *) - List.iter - (fun x -> htab_put bss_frags (Some x) (WORD (TY_u32, (IMM 0L)))) - [ - "environ"; - "optind"; - "optarg"; - "_CurrentRuneLocale"; - "__stack_chk_guard"; - "__mb_sb_limit"; - "__isthreaded"; - "__stdinp"; - "__stderrp"; - "__stdoutp"; - ]; - - Hashtbl.iter - begin - fun _ tab -> - Hashtbl.iter - begin - fun name fixup -> - htab_put required_fixups name fixup - end - tab - end - sem.Semant.ctxt_native_required - in - - let all_frags = - elf32_linux_x86_file - ~sess - ~crate - ~entry_name: "_start" - ~text_frags - ~data_frags - ~bss_frags - ~dwarf - ~sem - ~rodata_frags - ~required_fixups - ~needed_libs - in - write_out_frag sess true all_frags -;; - -let elf_magic = "\x7fELF";; - -let sniff - (sess:Session.sess) - (filename:filename) - : asm_reader option = - try - let stat = Unix.stat filename in - if (stat.Unix.st_kind = Unix.S_REG) && - (stat.Unix.st_size > 4) - then - let ar = new_asm_reader sess filename in - let _ = log sess "sniffing ELF file" in - if (ar.asm_get_zstr_padded 4) = elf_magic - then (ar.asm_seek 0; Some ar) - else None - else - None - with - _ -> None -;; - -let get_sections - (sess:Session.sess) - (ar:asm_reader) - : (string,(int*int)) Hashtbl.t = - let sects = Hashtbl.create 0 in - let _ = log sess "reading sections" in - let elf_id = ar.asm_get_zstr_padded 4 in - let _ = assert (elf_id = elf_magic) in - - let _ = ar.asm_seek 0x10 in - let _ = ar.asm_adv_u16 () in (* e_type *) - let _ = ar.asm_adv_u16 () in (* e_machine *) - let _ = ar.asm_adv_u32 () in (* e_version *) - let _ = ar.asm_adv_u32 () in (* e_entry *) - let _ = ar.asm_adv_u32 () in (* e_phoff *) - let e_shoff = ar.asm_get_u32 () in (* e_shoff *) - let _ = ar.asm_adv_u32 () in (* e_flags *) - let _ = ar.asm_adv_u16 () in (* e_ehsize *) - let _ = ar.asm_adv_u16 () in (* e_phentsize *) - let _ = ar.asm_adv_u16 () in (* e_phnum *) - let e_shentsize = ar.asm_get_u16 () in - let e_shnum = ar.asm_get_u16 () in - let e_shstrndx = ar.asm_get_u16 () in - let _ = log sess - "%d ELF section headers, %d bytes each, starting at 0x%x" - e_shnum e_shentsize e_shoff - in - let _ = log sess "section %d is .shstrtab" e_shstrndx in - - let read_section_hdr n = - let _ = ar.asm_seek (e_shoff + n * e_shentsize) in - let str_off = ar.asm_get_u32() in - let _ = ar.asm_adv_u32() in (* sh_type *) - let _ = ar.asm_adv_u32() in (* sh_flags *) - let _ = ar.asm_adv_u32() in (* sh_addr *) - let off = ar.asm_get_u32() in (* sh_off *) - let size = ar.asm_get_u32() in (* sh_size *) - let _ = ar.asm_adv_u32() in (* sh_link *) - let _ = ar.asm_adv_u32() in (* sh_info *) - let _ = ar.asm_adv_u32() in (* sh_addralign *) - let _ = ar.asm_adv_u32() in (* sh_entsize *) - (str_off, off, size) - in - - let (_, str_base, _) = read_section_hdr e_shstrndx in - - let _ = ar.asm_seek e_shoff in - for i = 0 to (e_shnum - 1) do - let (str_off, off, size) = read_section_hdr i in - let _ = ar.asm_seek (str_base + str_off) in - let name = ar.asm_get_zstr() in - log sess "section %d: %s, size %d, offset 0x%x" i name size off; - Hashtbl.add sects name (off, size); - done; - sects -;; - - -(* - * Local Variables: - * fill-column: 78; - * indent-tabs-mode: nil - * buffer-file-coding-system: utf-8-unix - * compile-command: "make -k -C $RBUILD 2>&1 | sed -e 's/\\/x\\//x:\\//g'"; - * End: - *) diff --git a/src/boot/be/il.ml b/src/boot/be/il.ml deleted file mode 100644 index 1539c00b87c..00000000000 --- a/src/boot/be/il.ml +++ /dev/null @@ -1,937 +0,0 @@ -open Common;; - -(* FIXME (issue #1): thread a session object through this eventually. *) -let log_iltypes = ref false;; - -(* IL type system, very rudimentary. *) - -type bits = - Bits8 - | Bits16 - | Bits32 - | Bits64 -;; - -type scalar_ty = - ValTy of bits - | AddrTy of referent_ty - -and referent_ty = - ScalarTy of scalar_ty - | StructTy of referent_ty array - | UnionTy of referent_ty array - | ParamTy of ty_param_idx (* Thing of current-frame type-param #n *) - | OpaqueTy (* Unknown memory-resident thing. *) - | CodeTy (* Executable machine code. *) - | NilTy (* 0 bits of space. *) -;; - -let (voidptr_t:scalar_ty) = AddrTy OpaqueTy;; -let (codeptr_t:scalar_ty) = AddrTy CodeTy;; - -(* Operands. *) - -type vreg = int ;; -type hreg = int ;; -type label = int ;; -type spill = int ;; - -type reg = - Vreg of vreg - | Hreg of hreg -;; - -type mem = - Abs of Asm.expr64 - | RegIn of (reg * (Asm.expr64 option)) - | Spill of spill -;; - -type typed_reg = (reg * scalar_ty);; -type typed_mem = (mem * referent_ty);; -type typed_imm = (Asm.expr64 * ty_mach);; -type typed_imm_ptr = (fixup * referent_ty);; - -type cell = - Reg of typed_reg - | Mem of typed_mem -;; - -(* - * ImmPtr (a, rty) can be assigned to anything of scalar_ty - * AddrTy rty; the difference is that ImmAddr carries its value - * so can be used in cases where we want to have an immediate - * address constant-propagated through the code to the backend. - *) -type operand = - Cell of cell - | Imm of typed_imm - | ImmPtr of typed_imm_ptr -;; - - -type code = - CodeLabel of label (* Index into current quad block. *) - | CodePtr of operand - | CodeNone -;; - -(* NB: for the most part, we let the register allocator assign spills - * from vregs, and we permanently allocate aliased slots to stack - * locations by static aliasing information early, in layout. - * - * The one awkward case this doesn't handle is when someone tries to - * pass a literal-atom to an alias-slot. This *requires* a memory slot - * but we only realize it rather late, much later than we'd normally - * have thougt to desugar the literal into a temporary. - * - * So in these cases, we let the trans module explicitly demand a - * "Spill n" operand, which the register allocator mops up before it - * gets started on the vregs. - * - * NOTE: if we were more clever we'd integrate vregs and spills like - * this together along with the general notion of a temporary way back - * at the desugaring stage, and use some kind of size-class - * consolidation so that spills with non-overlapping lifetimes could - * share memory. But we're not that clever yet. - *) - - -(* Helpers. *) - -let direct_code_ptr fix = - (CodePtr (ImmPtr (fix, CodeTy))) -;; - -let cell_referent_ty c = - match c with - Reg (_, st) -> ScalarTy st - | Mem (_, rt) -> rt -;; - -let cell_is_nil c = - match c with - Mem (_, NilTy) -> true - | Reg (_, AddrTy NilTy) -> true - | _ -> false -;; - -let operand_is_nil o = - match o with - Cell c -> cell_is_nil c - | _ -> false -;; - -let mem_off (mem:mem) (off:Asm.expr64) : mem = - let addto e = Asm.ADD (off, e) in - match mem with - Abs e -> Abs (addto e) - | RegIn (r, None) -> RegIn (r, Some off) - | RegIn (r, Some e) -> RegIn (r, Some (addto e)) - | Spill _ -> - bug () "Adding offset %s to spill slot" - (Asm.string_of_expr64 off) -;; - -let mem_off_imm (mem:mem) (imm:int64) : mem = - mem_off mem (Asm.IMM imm) -;; - - -(* Quads. *) - -type binop = - ADD | SUB - | IMUL | UMUL - | IDIV | UDIV - | IMOD | UMOD - | AND | OR | XOR - | LSL | LSR | ASR -;; - -type unop = - NEG | NOT - | UMOV | IMOV - | ZERO -;; - -type jmpop = - JE | JNE - | JZ | JNZ (* FIXME: Synonyms with JE/JNE in x86, others? *) - | JL | JLE | JG | JGE (* Signed. *) - | JB | JBE | JA | JAE (* Unsigned. *) - | JC | JNC | JO | JNO - | JMP -;; - -type binary = - { - binary_op: binop; - binary_dst: cell; - binary_lhs: operand; - binary_rhs: operand - } -;; - -type unary = - { - unary_op: unop; - unary_dst: cell; - unary_src: operand - } -;; - -type cmp = - { - cmp_lhs: operand; - cmp_rhs: operand - } -;; - -type lea = - { - lea_dst: cell; - lea_src: operand - } -;; - -type jmp = - { - jmp_op: jmpop; - jmp_targ: code; - } -;; - -type call = - { - call_dst: cell; - call_targ: code - } - -type quad' = - Binary of binary - | Unary of unary - | Lea of lea - | Cmp of cmp - | Jmp of jmp - | Push of operand - | Pop of cell - | Call of call - | Debug (* Debug-break pseudo-instruction. *) - | Enter of fixup (* Enter-fixup-block pseudo-instruction. *) - | Leave (* Leave-fixup-block pseudo-instruction. *) - | Ret (* Return to caller. *) - | Nop (* Keep this quad here, emit CPU nop. *) - | Dead (* Keep this quad but emit nothing. *) - | Regfence (* Clobber all hregs. *) - | End (* Space past the end of quads to emit. *) -;; - -type quad = - { quad_fixup: fixup option; - quad_body: quad'; } - -type quads = quad array ;; - -(* Query functions. *) - -let cell_is_scalar (c:cell) : bool = - match c with - Reg (_, _) -> true - | Mem (_, ScalarTy _) -> true - | _ -> false -;; - - -let bits_of_ty_mach (tm:ty_mach) : bits = - match tm with - | TY_u8 -> Bits8 - | TY_i8 -> Bits8 - | TY_u16 -> Bits16 - | TY_i16 -> Bits16 - | TY_u32 -> Bits32 - | TY_i32 -> Bits32 - | TY_u64 -> Bits64 - | TY_i64 -> Bits64 - | TY_f32 -> Bits32 - | TY_f64 -> Bits64 -;; - -let cell_scalar_ty (c:cell) : scalar_ty = - match c with - Reg (_, st) -> st - | Mem (_, ScalarTy st) -> st - | _ -> bug () "mem of non-scalar in Il.cell_scalar_ty" -;; - -let operand_scalar_ty (op:operand) : scalar_ty = - match op with - Cell c -> cell_scalar_ty c - | Imm (_, t) -> ValTy (bits_of_ty_mach t) - | ImmPtr (_, t) -> AddrTy t -;; - - -let scalar_ty_bits (word_bits:bits) (st:scalar_ty) : bits = - match st with - ValTy bits -> bits - | AddrTy _ -> word_bits -;; - -let cell_bits (word_bits:bits) (c:cell) : bits = - match c with - Reg (_, st) -> scalar_ty_bits word_bits st - | Mem (_, ScalarTy st) -> scalar_ty_bits word_bits st - | Mem _ -> bug () "mem of non-scalar in Il.cell_bits" -;; - -let operand_bits (word_bits:bits) (op:operand) : bits = - match op with - Cell cell -> cell_bits word_bits cell - | Imm (_, tm) -> bits_of_ty_mach tm - | ImmPtr _ -> word_bits -;; - -let bits_size (bits:bits) : int64 = - match bits with - Bits8 -> 1L - | Bits16 -> 2L - | Bits32 -> 4L - | Bits64 -> 8L -;; - -let bits_align (bits:bits) : int64 = - match bits with - Bits8 -> 1L - | Bits16 -> 2L - | Bits32 -> 4L - | Bits64 -> 8L -;; - -let scalar_ty_size (word_bits:bits) (st:scalar_ty) : int64 = - bits_size (scalar_ty_bits word_bits st) -;; - -let scalar_ty_align (word_bits:bits) (st:scalar_ty) : int64 = - bits_align (scalar_ty_bits word_bits st) -;; - -let rec referent_ty_layout (word_bits:bits) (rt:referent_ty) : (size * size) = - match rt with - ScalarTy st -> (SIZE_fixed (scalar_ty_size word_bits st), - SIZE_fixed (scalar_ty_align word_bits st)) - | StructTy rts -> - begin - let accum (off,align) rt : (size * size) = - let (elt_size, elt_align) = referent_ty_layout word_bits rt in - let elt_off = align_sz elt_align off in - (add_sz elt_off elt_size, max_sz elt_align align) - in - Array.fold_left accum (SIZE_fixed 0L, SIZE_fixed 1L) rts - end - | UnionTy rts -> - begin - let accum (sz,align) rt : (size * size) = - let (elt_size, elt_align) = referent_ty_layout word_bits rt in - (max_sz sz elt_size, max_sz elt_align align) - in - Array.fold_left accum (SIZE_fixed 0L, SIZE_fixed 1L) rts - end - | OpaqueTy -> bug () "opaque ty in referent_ty_layout" - | CodeTy -> bug () "code ty in referent_ty_layout" - | ParamTy i -> (SIZE_param_size i, SIZE_param_align i) - | NilTy -> (SIZE_fixed 0L, SIZE_fixed 1L) - -and referent_ty_size (word_bits:bits) (rt:referent_ty) : size = - (fst (referent_ty_layout word_bits rt)) - -and referent_ty_align (word_bits:bits) (rt:referent_ty) : size = - (snd (referent_ty_layout word_bits rt)) - -;; - -let get_element_offset - (word_bits:bits) - (elts:referent_ty array) - (i:int) - : size = - let elts_before = Array.sub elts 0 i in - let elt_rty = elts.(i) in - let elts_before_size = referent_ty_size word_bits (StructTy elts_before) in - let elt_align = referent_ty_align word_bits elt_rty in - let elt_off = align_sz elt_align elts_before_size in - elt_off -;; - -(* Processor. *) - -type quad_processor = - { qp_reg: (quad_processor -> reg -> reg); - qp_mem: (quad_processor -> mem -> mem); - qp_cell_read: (quad_processor -> cell -> cell); - qp_cell_write: (quad_processor -> cell -> cell); - qp_code: (quad_processor -> code -> code); - qp_op: (quad_processor -> operand -> operand); } -;; - -let identity_processor = - let qp_cell = (fun qp c -> match c with - Reg (r, b) -> Reg (qp.qp_reg qp r, b) - | Mem (a, b) -> Mem (qp.qp_mem qp a, b)) - in - { qp_reg = (fun _ r -> r); - qp_mem = (fun qp a -> match a with - RegIn (r, o) -> RegIn (qp.qp_reg qp r, o) - | Abs _ - | Spill _ -> a); - qp_cell_read = qp_cell; - qp_cell_write = qp_cell; - qp_code = (fun qp c -> match c with - CodePtr op -> CodePtr (qp.qp_op qp op) - | CodeLabel _ - | CodeNone -> c); - qp_op = (fun qp op -> match op with - Cell c -> Cell (qp.qp_cell_read qp c) - | ImmPtr _ -> op - | Imm _ -> op) } -;; - -let process_quad (qp:quad_processor) (q:quad) : quad = - { q with - quad_body = match q.quad_body with - Binary b -> - Binary { b with - binary_dst = qp.qp_cell_write qp b.binary_dst; - binary_lhs = qp.qp_op qp b.binary_lhs; - binary_rhs = qp.qp_op qp b.binary_rhs } - | Unary u -> - Unary { u with - unary_dst = qp.qp_cell_write qp u.unary_dst; - unary_src = qp.qp_op qp u.unary_src } - - | Lea le -> - Lea { lea_dst = qp.qp_cell_write qp le.lea_dst; - lea_src = qp.qp_op qp le.lea_src } - - | Cmp c -> - Cmp { cmp_lhs = qp.qp_op qp c.cmp_lhs; - cmp_rhs = qp.qp_op qp c.cmp_rhs } - - | Jmp j -> - Jmp { j with - jmp_targ = qp.qp_code qp j.jmp_targ } - - | Push op -> - Push (qp.qp_op qp op) - - | Pop c -> - Pop (qp.qp_cell_write qp c) - - | Call c -> - Call { call_dst = qp.qp_cell_write qp c.call_dst; - call_targ = qp.qp_code qp c.call_targ } - - | Ret -> Ret - | Nop -> Nop - | Debug -> Debug - | Regfence -> Regfence - | Enter f -> Enter f - | Leave -> Leave - | Dead -> Dead - | End -> End } -;; - -let visit_quads (qp:quad_processor) (qs:quads) : unit = - Array.iter (fun x ->ignore ( process_quad qp x); ()) qs -;; - -let process_quads (qp:quad_processor) (qs:quads) : quads = - Array.map (process_quad qp) qs -;; - -let rewrite_quads (qp:quad_processor) (qs:quads) : unit = - for i = 0 to ((Array.length qs) - 1) do - qs.(i) <- process_quad qp qs.(i) - done -;; - - -(* A little partial-evaluator to help lowering sizes. *) - -let rec size_to_expr64 (a:size) : Asm.expr64 option = - let binary a b f = - match (size_to_expr64 a, size_to_expr64 b) with - (Some a, Some b) -> Some (f a b) - | _ -> None - in - match a with - SIZE_fixed i -> Some (Asm.IMM i) - | SIZE_fixup_mem_sz f -> Some (Asm.M_SZ f) - | SIZE_fixup_mem_pos f -> Some (Asm.M_POS f) - | SIZE_rt_neg s -> - begin - match (size_to_expr64 s) with - None -> None - | Some s -> Some (Asm.NEG s) - end - | SIZE_rt_add (a, b) -> binary a b (fun a b -> Asm.ADD (a,b)) - | SIZE_rt_mul (a, b) -> binary a b (fun a b -> Asm.MUL (a,b)) - | SIZE_rt_max (a, b) -> binary a b (fun a b -> Asm.MAX (a,b)) - | SIZE_rt_align (a, b) -> binary a b (fun a b -> Asm.ALIGN (a,b)) - | _ -> None -;; - - -(* Formatters. *) - -let string_of_bits (b:bits) : string = - match b with - Bits8 -> "b8" - | Bits16 -> "b16" - | Bits32 -> "b32" - | Bits64 -> "b64" -;; - -let rec string_of_scalar_ty (s:scalar_ty) : string = - match s with - ValTy b -> (string_of_bits b) - | AddrTy r -> (string_of_referent_ty r) ^ "*" - -and string_of_referent_ty (r:referent_ty) : string = - match r with - ScalarTy s -> (string_of_scalar_ty s) - | StructTy rs -> - Printf.sprintf "[%s]" - (String.concat "," - (Array.to_list (Array.map string_of_referent_ty rs))) - | UnionTy rs -> - Printf.sprintf "(%s)" - (String.concat "|" - (Array.to_list (Array.map string_of_referent_ty rs))) - | ParamTy i -> Printf.sprintf "#%d" i - | OpaqueTy -> "?" - | CodeTy -> "!" - | NilTy -> "()" -;; - - -type hreg_formatter = hreg -> string;; - -let string_of_reg (f:hreg_formatter) (r:reg) : string = - match r with - Vreg i -> Printf.sprintf "" i - | Hreg i -> f i -;; - -let string_of_off (e:Asm.expr64 option) : string = - match e with - None -> "" - | Some (Asm.IMM i) when (i64_lt i 0L) -> - Printf.sprintf " - 0x%Lx" (Int64.neg i) - | Some e' -> " + " ^ (Asm.string_of_expr64 e') -;; - -let string_of_mem (f:hreg_formatter) (a:mem) : string = - match a with - Abs e -> - Printf.sprintf "[%s]" (Asm.string_of_expr64 e) - | RegIn (r, off) -> - Printf.sprintf "[%s%s]" (string_of_reg f r) (string_of_off off) - | Spill i -> - Printf.sprintf "[]" i -;; -let string_of_cell (f:hreg_formatter) (c:cell) : string = - match c with - Reg (r,ty) -> - if !log_iltypes - then - Printf.sprintf "%s:%s" (string_of_reg f r) (string_of_scalar_ty ty) - else - Printf.sprintf "%s" (string_of_reg f r) - | Mem (a,ty) -> - if !log_iltypes - then - Printf.sprintf "%s:%s" - (string_of_mem f a) (string_of_referent_ty ty) - else - Printf.sprintf "%s" (string_of_mem f a) -;; - -let string_of_operand (f:hreg_formatter) (op:operand) : string = - match op with - Cell c -> string_of_cell f c - | ImmPtr (f, ty) -> - if !log_iltypes - then - Printf.sprintf "$<%s>.mpos:%s*" - f.fixup_name (string_of_referent_ty ty) - else - Printf.sprintf "$<%s>.mpos" f.fixup_name - | Imm (i, ty) -> - if !log_iltypes - then - Printf.sprintf "$%s:%s" - (Asm.string_of_expr64 i) (string_of_ty_mach ty) - else - Printf.sprintf "$%s" (Asm.string_of_expr64 i) -;; - - -let string_of_code (f:hreg_formatter) (c:code) : string = - match c with - CodeLabel lab -> Printf.sprintf "