diff --git a/include/llvm/Analysis/NaCl.h b/include/llvm/Analysis/NaCl.h
new file mode 100644
index 000000000000..f174e72608e0
--- /dev/null
+++ b/include/llvm/Analysis/NaCl.h
@@ -0,0 +1,71 @@
+//===-- NaCl.h - NaCl Analysis ---------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_NACL_H
+#define LLVM_ANALYSIS_NACL_H
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include <string>
+
+namespace llvm {
+
+class FunctionPass;
+class ModulePass;
+extern cl::opt<bool> PNaClABIAllowDebugMetadata;
+
+class PNaClABIErrorReporter {
+ public:
+  PNaClABIErrorReporter() : ErrorCount(0), Errors(ErrorString),
+                            UseFatalErrors(true) {}
+  // Return the number of verification errors from the last run.
+  int getErrorCount() { return ErrorCount; }
+  // Print the error messages to O
+  void printErrors(llvm::raw_ostream &O) {
+    Errors.flush();
+    O << ErrorString;
+  }
+  // Increments the error count and returns an ostream to which the error
+  // message can be streamed.
+  raw_ostream &addError() {
+    ErrorCount++;
+    return Errors;
+  }
+  // Reset the error count and error messages.
+  void reset() {
+    ErrorCount = 0;
+    Errors.flush();
+    ErrorString.clear();
+  }
+  void setNonFatal() {
+    UseFatalErrors = false;
+  }
+  void checkForFatalErrors() {
+    if (UseFatalErrors && ErrorCount != 0) {
+      printErrors(errs());
+      report_fatal_error("PNaCl ABI verification failed");
+    }
+  }
+ private:
+  int ErrorCount;
+  std::string ErrorString;
+  raw_string_ostream Errors;
+  bool UseFatalErrors;
+};
+
+FunctionPass *createPNaClABIVerifyFunctionsPass(
+    PNaClABIErrorReporter *Reporter);
+ModulePass *createPNaClABIVerifyModulePass(PNaClABIErrorReporter *Reporter,
+                                           bool StreamingMode = false);
+
+}
+
+
+#endif
diff --git a/include/llvm/Bitcode/NaCl/NaClBitCodes.h b/include/llvm/Bitcode/NaCl/NaClBitCodes.h
new file mode 100644
index 000000000000..87f1b858cdd4
--- /dev/null
+++ b/include/llvm/Bitcode/NaCl/NaClBitCodes.h
@@ -0,0 +1,270 @@
+//===- NaClBitCodes.h - Enum values for the bitcode format ------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header Bitcode enum values.
+//
+// The enum values defined in this file should be considered permanent.  If
+// new features are added, they should have values added at the end of the
+// respective lists.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_NACL_NACLBITCODES_H
+#define LLVM_BITCODE_NACL_NACLBITCODES_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include <cassert>
+
+namespace llvm {
+namespace naclbitc {
+  enum StandardWidths {
+    BlockIDWidth   = 8,  // We use VBR-8 for block IDs.
+    CodeLenWidth   = 4,  // Codelen are VBR-4.
+    BlockSizeWidth = 32  // BlockSize up to 2^32 32-bit words = 16GB per block.
+  };
+
+  // The standard abbrev namespace always has a way to exit a block, enter a
+  // nested block, define abbrevs, and define an unabbreviated record.
+  enum FixedAbbrevIDs {
+    END_BLOCK = 0,  // Must be zero to guarantee termination for broken bitcode.
+    ENTER_SUBBLOCK = 1,
+
+    /// DEFINE_ABBREV - Defines an abbrev for the current block.  It consists
+    /// of a vbr5 for # operand infos.  Each operand info is emitted with a
+    /// single bit to indicate if it is a literal encoding.  If so, the value is
+    /// emitted with a vbr8.  If not, the encoding is emitted as 3 bits followed
+    /// by the info value as a vbr5 if needed.
+    DEFINE_ABBREV = 2,
+
+    // UNABBREV_RECORDs are emitted with a vbr6 for the record code, followed by
+    // a vbr6 for the # operands, followed by vbr6's for each operand.
+    UNABBREV_RECORD = 3,
+
+    // This is not a code, this is a marker for the first abbrev assignment.
+    // In addition, we assume up to two additional enumerated constants are
+    // added for each extension. These constants are:
+    //
+    //   PREFIX_MAX_FIXED_ABBREV
+    //   PREFIX_MAX_ABBREV
+    //
+    // PREFIX_MAX_ABBREV defines the maximal enumeration value used for
+    // the code selector of a block. If Both PREFIX_MAX_FIXED_ABBREV
+    // and PREFIX_MAX_ABBREV is defined, then PREFIX_MAX_FIXED_ABBREV
+    // defines the last code selector of the block that must be read using
+    // a single read (i.e. a FIXED read, or the first chunk of a VBR read.
+    FIRST_APPLICATION_ABBREV = 4,
+    // Defines default values for code length, if no additional selectors
+    // are added.
+    DEFAULT_MAX_ABBREV = FIRST_APPLICATION_ABBREV-1
+  };
+
+  /// StandardBlockIDs - All bitcode files can optionally include a BLOCKINFO
+  /// block, which contains metadata about other blocks in the file.
+  enum StandardBlockIDs {
+    /// BLOCKINFO_BLOCK is used to define metadata about blocks, for example,
+    /// standard abbrevs that should be available to all blocks of a specified
+    /// ID.
+    BLOCKINFO_BLOCK_ID = 0,
+
+    // Block IDs 1-7 are reserved for future expansion.
+    FIRST_APPLICATION_BLOCKID = 8
+  };
+
+  /// BlockInfoCodes - The blockinfo block contains metadata about user-defined
+  /// blocks.
+  enum BlockInfoCodes {
+    // DEFINE_ABBREV has magic semantics here, applying to the current SETBID'd
+    // block, instead of the BlockInfo block.
+
+    BLOCKINFO_CODE_SETBID        = 1, // SETBID: [blockid#]
+                                      // The following two codes were removed
+                                      // because the PNaCl reader could read
+                                      // them, but couldn't be generated by
+                                      // the writer.
+    BLOCKINFO_CODE_BLOCKNAME     = 2, // Not used in PNaCl.
+    BLOCKINFO_CODE_SETRECORDNAME = 3  // Not used in PNaCl.
+  };
+
+} // End naclbitc namespace
+
+/// NaClBitCodeAbbrevOp - This describes one or more operands in an abbreviation.
+/// This is actually a union of two different things:
+///   1. It could be a literal integer value ("the operand is always 17").
+///   2. It could be an encoding specification ("this operand encoded like so").
+///
+class NaClBitCodeAbbrevOp {
+  uint64_t Val;           // A literal value or data for an encoding.
+  bool IsLiteral : 1;     // Indicate whether this is a literal value or not.
+  unsigned Enc   : 3;     // The encoding to use.
+public:
+  enum Encoding {
+    Fixed = 1,  // A fixed width field, Val specifies number of bits.
+    VBR   = 2,  // A VBR field where Val specifies the width of each chunk.
+    Array = 3,  // A sequence of fields, next field species elt encoding.
+    Char6 = 4,  // A 6-bit fixed field which maps to [a-zA-Z0-9._].
+    Blob  = 5   // Not used in PNaCl. Used to be supported by the reader,
+                // but was never generated by the writer.
+  };
+
+  explicit NaClBitCodeAbbrevOp(uint64_t V) :  Val(V), IsLiteral(true) {}
+  explicit NaClBitCodeAbbrevOp(Encoding E, uint64_t Data = 0)
+    : Val(Data), IsLiteral(false), Enc(E) {}
+
+  // For whatever reason, this isn't being autogenerated.
+  bool operator==(const NaClBitCodeAbbrevOp& rhs) const {
+    return Val == rhs.Val && IsLiteral == rhs.IsLiteral && Enc == rhs.Enc;
+  }
+
+  bool isLiteral() const  { return IsLiteral; }
+  bool isEncoding() const { return !IsLiteral; }
+
+  // Accessors for literals.
+  uint64_t getLiteralValue() const { assert(isLiteral()); return Val; }
+
+  // Accessors for encoding info.
+  Encoding getEncoding() const { assert(isEncoding()); return (Encoding)Enc; }
+  uint64_t getEncodingData() const {
+    assert(isEncoding() && hasEncodingData());
+    return Val;
+  }
+
+  bool hasEncodingData() const { return hasEncodingData(getEncoding()); }
+  static bool hasEncodingData(Encoding E) {
+    switch (E) {
+    case Fixed:
+    case VBR:
+      return true;
+    case Array:
+    case Char6:
+      return false;
+    default:
+      break;
+    }
+    report_fatal_error("Invalid encoding");
+  }
+
+  /// isChar6 - Return true if this character is legal in the Char6 encoding.
+  static bool isChar6(char C) {
+    if (C >= 'a' && C <= 'z') return true;
+    if (C >= 'A' && C <= 'Z') return true;
+    if (C >= '0' && C <= '9') return true;
+    if (C == '.' || C == '_') return true;
+    return false;
+  }
+  static unsigned EncodeChar6(char C) {
+    if (C >= 'a' && C <= 'z') return C-'a';
+    if (C >= 'A' && C <= 'Z') return C-'A'+26;
+    if (C >= '0' && C <= '9') return C-'0'+26+26;
+    if (C == '.')             return 62;
+    if (C == '_')             return 63;
+    llvm_unreachable("Not a value Char6 character!");
+  }
+
+  static char DecodeChar6(unsigned V) {
+    assert((V & ~63) == 0 && "Not a Char6 encoded character!");
+    if (V < 26)       return V+'a';
+    if (V < 26+26)    return V-26+'A';
+    if (V < 26+26+10) return V-26-26+'0';
+    if (V == 62)      return '.';
+    if (V == 63)      return '_';
+    llvm_unreachable("Not a value Char6 character!");
+  }
+
+};
+
+template <> struct isPodLike<NaClBitCodeAbbrevOp> {
+  static const bool value=true;
+};
+
+/// NaClBitCodeAbbrev - This class represents an abbreviation record.  An
+/// abbreviation allows a complex record that has redundancy to be stored in a
+/// specialized format instead of the fully-general, fully-vbr, format.
+class NaClBitCodeAbbrev {
+  SmallVector<NaClBitCodeAbbrevOp, 8> OperandList;
+  unsigned char RefCount; // Number of things using this.
+  ~NaClBitCodeAbbrev() {}
+public:
+  NaClBitCodeAbbrev() : RefCount(1) {}
+
+  void addRef() { ++RefCount; }
+  void dropRef() { if (--RefCount == 0) delete this; }
+
+  unsigned getNumOperandInfos() const {
+    return static_cast<unsigned>(OperandList.size());
+  }
+  const NaClBitCodeAbbrevOp &getOperandInfo(unsigned N) const {
+    return OperandList[N];
+  }
+
+  void Add(const NaClBitCodeAbbrevOp &OpInfo) {
+    OperandList.push_back(OpInfo);
+  }
+  bool operator==(const NaClBitCodeAbbrev& rhs) const {
+    return OperandList == rhs.OperandList;
+  }
+};
+
+/// \brief Returns number of bits needed to encode
+/// value for dense FIXED encoding.
+inline unsigned NaClBitsNeededForValue(unsigned Value) {
+  // Note: Need to handle case where Value=0xFFFFFFFF as special case,
+  // since we can't add 1 to it.
+  if (Value >= 0x80000000) return 32;
+  return Log2_32_Ceil(Value+1);
+}
+
+/// \brief Encode a signed value by moving the sign to the LSB for dense
+/// VBR encoding.
+inline uint64_t NaClEncodeSignRotatedValue(int64_t V) {
+  return (V >= 0) ? (V << 1) : ((-V << 1) | 1);
+}
+
+/// \brief Decode a signed value stored with the sign bit in
+/// the LSB for dense VBR encoding.
+inline uint64_t NaClDecodeSignRotatedValue(uint64_t V) {
+  if ((V & 1) == 0)
+    return V >> 1;
+  if (V != 1)
+    return -(V >> 1);
+  // There is no such thing as -0 with integers.  "-0" really means MININT.
+  return 1ULL << 63;
+}
+
+/// \brief This class determines whether a FIXED or VBR
+/// abbreviation should be used for the selector, and the number of bits
+/// needed to capture such selectors.
+class NaClBitcodeSelectorAbbrev {
+
+public:
+  // If true, use a FIXED abbreviation. Otherwise, use a VBR abbreviation.
+  bool IsFixed;
+  // Number of bits needed for selector.
+  unsigned NumBits;
+
+  // Creates a selector range for the given values.
+  NaClBitcodeSelectorAbbrev(bool IF, unsigned NB)
+      : IsFixed(IF), NumBits(NB) {}
+
+  // Creates a selector range when no abbreviations are defined.
+  NaClBitcodeSelectorAbbrev()
+      : IsFixed(true),
+        NumBits(NaClBitsNeededForValue(naclbitc::DEFAULT_MAX_ABBREV)) {}
+
+  // Creates a selector range to handle fixed abbrevations up to
+  // the specified value.
+  explicit NaClBitcodeSelectorAbbrev(unsigned MaxAbbrev)
+      : IsFixed(true),
+        NumBits(NaClBitsNeededForValue(MaxAbbrev)) {}
+};
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Bitcode/NaCl/NaClBitcodeAnalyzer.h b/include/llvm/Bitcode/NaCl/NaClBitcodeAnalyzer.h
new file mode 100644
index 000000000000..e6eada3ff4e0
--- /dev/null
+++ b/include/llvm/Bitcode/NaCl/NaClBitcodeAnalyzer.h
@@ -0,0 +1,54 @@
+//===-- NaClBitcodeAnalyzer.h - Bitcode Analyzer --------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// Analytical information about a bitcode file. Intended as an aid to developers
+// of bitcode reading and writing software. It produces a summary of the bitcode
+// file that shows various statistics about the contents of the file. By default
+// this information is detailed and contains information about individual
+// bitcode blocks and the functions in the module. The tool is also able to
+// print a bitcode file in a straight forward text format that shows the
+// containment and relationships of the information in the bitcode file (-dump
+// option).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NACL_BITCODE_ANALYZER_H
+#define NACL_BITCODE_ANALYZER_H
+
+namespace llvm {
+
+class MemoryBuffer;
+class StringRef;
+class raw_ostream;
+
+// Analysis options. See the command-line documentation in pnacl-bcanalyzer
+// for a description.
+struct AnalysisDumpOptions {
+  AnalysisDumpOptions()
+    : DoDump(false), DumpOnlyRecords(false), OpsPerLine(0),
+      NoHistogram(false), NonSymbolic(false)
+  {}
+
+  bool DoDump;
+  bool DumpOnlyRecords;
+  unsigned OpsPerLine;
+  bool NoHistogram;
+  bool NonSymbolic;
+};
+
+/// Run analysis on the given file. Output goes to OS.
+int AnalyzeBitcodeInFile(const StringRef &InputFilename, raw_ostream &OS,
+                         const AnalysisDumpOptions &DumpOptions);
+
+/// Run analysis on a memory buffer with bitcode. The buffer is owned by the
+/// caller.
+int AnalyzeBitcodeInBuffer(const MemoryBuffer &Buf, raw_ostream &OS,
+                           const AnalysisDumpOptions &DumpOptions);
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/Bitcode/NaCl/NaClBitcodeHeader.h b/include/llvm/Bitcode/NaCl/NaClBitcodeHeader.h
new file mode 100644
index 000000000000..214971e701fa
--- /dev/null
+++ b/include/llvm/Bitcode/NaCl/NaClBitcodeHeader.h
@@ -0,0 +1,232 @@
+//===-- llvm/Bitcode/NaCl/NaClBitcodeHeader.h - ----------------*- C++ -*-===//
+//      NaCl Bitcode header reader.
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines interfaces to read and write NaCl bitcode wire format
+// file headers.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_NACL_NACLBITCODEHEADER_H
+#define LLVM_BITCODE_NACL_NACLBITCODEHEADER_H
+
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/DataTypes.h"
+#include <string>
+#include <vector>
+
+namespace llvm {
+class StreamableMemoryObject;
+
+// Class representing a variable-size metadata field in the bitcode header.
+// Also contains the list of known (typed) Tag IDs.
+//
+// The serialized format has 2 fixed subfields (ID:type and data length) and the
+// variable-length data subfield
+class NaClBitcodeHeaderField {
+  NaClBitcodeHeaderField(const NaClBitcodeHeaderField &) LLVM_DELETED_FUNCTION;
+  void operator=(const NaClBitcodeHeaderField &)LLVM_DELETED_FUNCTION;
+
+public:
+  // Defines the ID associated with the value. Valid values are in
+  // {0x0, ..., 0xFFF}
+  typedef enum {
+    kInvalid = 0,     // KUnknownType.
+    kPNaClVersion = 1 // kUint32.
+  } Tag;
+  // Defines the type of value.
+  typedef enum {
+    kBufferType, // Buffer of form uint8_t[len].
+    kUInt32Type
+  } FieldType;
+  // Defines the number of bytes in a (32-bit) word.
+  static const int WordSize = 4;
+
+  // Defines the encoding of the fixed fields {i.e. ID:type and data length).
+  typedef uint16_t FixedSubfield;
+
+  // Create an invalid header field.
+  NaClBitcodeHeaderField();
+
+  // Create a header field with an uint32_t value.
+  NaClBitcodeHeaderField(Tag MyID, uint32_t value);
+
+  // Create a header field for the given data.
+  NaClBitcodeHeaderField(Tag MyID, size_t MyLen, uint8_t *MyData);
+
+  virtual ~NaClBitcodeHeaderField() {
+    if (Data)
+      delete[] Data;
+  }
+
+  /// \brief Number of bytes used to represent header field.
+  size_t GetTotalSize() const {
+    // Round up to 4 byte alignment
+    return (kTagLenSize + Len + (WordSize - 1)) & ~(WordSize - 1);
+  }
+
+  /// \brief Write field into Buf[BufLen].
+  bool Write(uint8_t *Buf, size_t BufLen) const;
+
+  /// \brief Read field form Buf[BufLen].
+  bool Read(const uint8_t *Buf, size_t BufLen);
+
+  /// \brief Returns string describing field.
+  std::string Contents() const;
+
+  /// \brief Get the data size from a serialized field to allow allocation.
+  static size_t GetDataSizeFromSerialized(const uint8_t *Buf) {
+    FixedSubfield Length;
+    ReadFixedSubfield(&Length, Buf + sizeof(FixedSubfield));
+    return Length;
+  }
+
+  /// \brief Return the ID of the field.
+  Tag GetID() const { return ID; }
+
+  FieldType GetType() const { return FType; }
+
+  /// \brief Return the length of the data (in bytes).
+  size_t GetLen() const { return Len; }
+
+  /// \brief Return the data. Data is array getData()[getLen()].
+  const uint8_t *GetData() const { return Data; }
+
+  /// \brief Returns the uint32_t value stored. Requires that
+  /// getType() == kUint32Type
+  uint32_t GetUInt32Value() const;
+
+private:
+  // Convert ID:Type into a fixed subfield
+  FixedSubfield EncodeTypedID() const { return (ID << 4) | FType; }
+  // Extract out ID and Type from a fixed subfield.
+  void DecodeTypedID(FixedSubfield Subfield, Tag &ID, FieldType &FType) {
+    ID = static_cast<Tag>(Subfield >> 4);
+    FType = static_cast<FieldType>(Subfield & 0xF);
+  }
+  // Combined size of the fixed subfields
+  const static size_t kTagLenSize = 2 * sizeof(FixedSubfield);
+  static void WriteFixedSubfield(FixedSubfield Value, uint8_t *Buf) {
+    Buf[0] = Value & 0xFF;
+    Buf[1] = (Value >> 8) & 0xFF;
+  }
+  static void ReadFixedSubfield(FixedSubfield *Value, const uint8_t *Buf) {
+    *Value = Buf[0] | Buf[1] << 8;
+  }
+  Tag ID;
+  FieldType FType;
+  size_t Len;
+  uint8_t *Data;
+};
+
+/// \brief Class holding parsed header fields in PNaCl bitcode file.
+class NaClBitcodeHeader {
+  // The set of parsed header fields. The header takes ownership of
+  // all fields in this vector.
+  std::vector<NaClBitcodeHeaderField *> Fields;
+  // The number of bytes in the PNaCl header.
+  size_t HeaderSize;
+  // String defining why it is unsupported (if unsupported).
+  std::string UnsupportedMessage;
+  // Flag defining if header is supported.
+  bool IsSupportedFlag;
+  // Flag defining if the corresponding bitcode file is readable.
+  bool IsReadableFlag;
+  // Defines the PNaCl version defined by the header file.
+  uint32_t PNaClVersion;
+
+public:
+  static const int WordSize = NaClBitcodeHeaderField::WordSize;
+
+  NaClBitcodeHeader();
+  ~NaClBitcodeHeader();
+
+  /// \brief Installs the fields of the header, defining if the header
+  /// is readable and supported. Sets UnsupportedMessage on failure.
+  void InstallFields();
+
+  /// \brief Adds a field to the list of fields in a header. Takes ownership
+  /// of fields added.
+  void push_back(NaClBitcodeHeaderField *Field) {
+    Fields.push_back(Field);
+  }
+
+  /// \brief Read the PNaCl bitcode header, The format of the header is:
+  ///
+  ///    1) 'PEXE' - The four character sequence defining the magic number.
+  ///    2) uint_16 num_fields - The number of NaClBitcodeHeaderField's.
+  ///    3) uint_16 num_bytes - The number of bytes to hold fields in
+  ///                           the header.
+  ///    4) NaClBitcodeHeaderField f1 - The first bitcode header field.
+  ///    ...
+  ///    2 + num_fields) NaClBitcodeHeaderField fn - The last bitcode header
+  /// field.
+  ///
+  /// Returns false if able to read (all of) the bitcode header.
+  bool Read(const unsigned char *&BufPtr, const unsigned char *&BufEnd);
+
+  // \brief Read the PNaCl bitcode header, recording the fields found
+  // in the header. Returns false if able to read (all of) the bitcode header.
+  bool Read(StreamableMemoryObject *Bytes);
+
+  // \brief Returns the number of bytes read to consume the header.
+  size_t getHeaderSize() { return HeaderSize; }
+
+  /// \brief Returns string describing why the header describes
+  /// an unsupported PNaCl Bitcode file.
+  const std::string &Unsupported() const { return UnsupportedMessage; }
+
+  /// \brief Returns true if supported. That is, it can be run in the
+  /// browser.
+  bool IsSupported() const { return IsSupportedFlag; }
+
+  /// \brief Returns true if the bitcode file should be readable. Note
+  /// that just because it is readable, it doesn't necessarily mean that
+  /// it is supported.
+  bool IsReadable() const { return IsReadableFlag; }
+
+  /// \brief Returns number of fields defined.
+  size_t NumberFields() const { return Fields.size(); }
+
+  /// \brief Returns a pointer to the field with the given ID
+  /// (0 if no such field).
+  NaClBitcodeHeaderField *GetTaggedField(NaClBitcodeHeaderField::Tag ID) const;
+
+  /// \brief Returns a pointer to the Nth field in the header
+  /// (0 if no such field).
+  NaClBitcodeHeaderField *GetField(size_t index) const;
+
+  /// \brief Returns the PNaClVersion, as defined by the header.
+  uint32_t GetPNaClVersion() const { return PNaClVersion; }
+
+private:
+  // Reads and verifies the first 8 bytes of the header, consisting
+  // of the magic number 'PEXE', and the value defining the number
+  // of fields and number of bytes used to hold fields.
+  // Returns false if successful, sets UnsupportedMessage otherwise.
+  bool ReadPrefix(const unsigned char *BufPtr, const unsigned char *BufEnd,
+                  unsigned &NumFields, unsigned &NumBytes);
+
+  // Reads and verifies the fields in the header.
+  // Returns false if successful, sets UnsupportedMessage otherwise.
+  bool ReadFields(const unsigned char *BufPtr, const unsigned char *BufEnd,
+                  unsigned NumFields, unsigned NumBytes);
+
+  // Sets the Unsupported error message and returns true.
+  bool UnsupportedError(StringRef Message) {
+    UnsupportedMessage = Message.str();
+    return true;
+  }
+
+};
+
+} // namespace llvm
+
+#endif
diff --git a/include/llvm/Bitcode/NaCl/NaClBitcodeParser.h b/include/llvm/Bitcode/NaCl/NaClBitcodeParser.h
new file mode 100644
index 000000000000..ee8a3080c0ee
--- /dev/null
+++ b/include/llvm/Bitcode/NaCl/NaClBitcodeParser.h
@@ -0,0 +1,364 @@
+//===- NaClBitcodeParser.h -----------------------------------*- C++ -*-===//
+//     Low-level bitcode driver to parse PNaCl bitcode files.
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Parses and processes low-level PNaCl bitcode files. Defines class
+// NaClBitcodeParser.
+//
+// The concepts of PNaCl bitcode files are basically the same as for
+// LLVM bitcode files (see http://llvm.org/docs/BitCodeFormat.html for
+// details).
+//
+// The bitstream format is an abstract encoding of structured data,
+// very similar to XML in some ways. Like XML, bitstream files contain
+// tags, and nested structures, and you can parse the file without
+// having to understand the tags. Unlike XML, the bitstream format is
+// a binary encoding, and provides a mechanism for the file to
+// self-describe "abbreviations".  Abbreviations are effectively size
+// optimizations for the content.
+//
+// The bitcode file is conceptually a sequence of "blocks", defining
+// the content. Blocks contain a sequence of records and
+// blocks. Nested content is defined using nested blocks.  A (data)
+// "record" is a tag, and a vector of (unsigned integer) values.
+//
+// Blocks are identified using Block IDs. Each kind of block has a
+// unique block "ID". Records have two elements:
+//
+//   a) A "code" identifying what type of record it is.
+//   b) A vector of "values" defining the contents of the record.
+//
+// The bitstream "reader" (defined in NaClBitstreamReader.h) defines
+// the implementation that converts the low-level bit file into
+// records and blocks. The bit stream is processed by moving a
+// "cursor" over the sequence of bits.
+//
+// The bitstream reader assumes that each block/record is read in by
+// first reading the "entry". The entry defines whether it corresponds
+// to one of the following:
+//
+//    a) At the beginning of a (possibly nested) block
+//    b) At the end of the current block.
+//    c) The input defines an abberviation.
+//    d) The input defines a record.
+//
+// An entry contains two values, a "kind" and an "ID". The kind
+// defines which of the four cases above occurs. The ID provides
+// identifying information on how to further process the input. For
+// case (a), the ID is the identifier associated with the the block
+// being processed. For case (b) and (c) the ID is ignored. For case
+// (d) the ID identifies the abbreviation that should be used to parse
+// the values.
+//
+// The class NaClBitcodeParser defines a bitcode parser that extracts
+// the blocks and records, which are then processed using virtual
+// callbacks. In general, you will want to implement derived classes
+// for each type of block, so that the corresponding data is processed
+// appropriately.
+//
+// The class NaClBitcodeParser parses a bitcode block, and defines a
+// set of callbacks for that block, including:
+//
+//    a) EnterBlock: What to do once we have entered the block.
+//    b) ProcessRecord: What to do with each parsed record.
+//    c) ProcessAbbrevRecord: What to do with a parsed abbreviation.
+//    d) ParseBlock: Parse the (nested) block with the given ID.
+//    e) ExitBlock: What to do once we have finished processing the block.
+//
+// Note that a separate instance of NaClBitcodeParser (or a
+// corresponding derived class) is created for each nested block. Each
+// instance is responsible for only parsing a single block. Method
+// ParseBlock creates new instances to parse nested blocks. Method
+// GetEnclosingParser() can be used to refer to the parser associated
+// with the enclosing block.
+//
+// TODO(kschimpf): Define an intermediate derived class of
+// NaClBitcodeParser that defines callbacks based on the actual
+// structure of PNaCl bitcode files.  That is, it has callbacks for
+// each of the types of blocks (i.e. module, types, global variables,
+// function, symbol tables etc). This derivied class can then be used
+// as the base class for the bitcode reader.
+//
+// TODO(kschimpf): Currently, the processing of abbreviations is
+// handled by the PNaCl bitstream reader, rather than by the
+// parser. Hence, we currently require defining methods
+// EnterBlockInfo, ExitBlockInfo, and ProcessRecordAbbrev. BlockInfo
+// is a special block that defines abbreviations to be applied to all
+// blocks. Record abbreviations (which are a special kind of record)
+// define abbreviations for a the current block.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_NACL_NACLBITCODEPARSER_H
+#define LLVM_BITCODE_NACL_NACLBITCODEPARSER_H
+
+#include "llvm/Bitcode/NaCl/NaClBitstreamReader.h"
+#include "llvm/Support/raw_ostream.h"
+#include <vector>
+
+namespace llvm {
+
+class NaClBitcodeParser;
+
+/// Defines the data associated with reading a block record in the
+/// PNaCl bitcode stream.
+class NaClBitcodeRecord {
+public:
+  /// Type for vector of values representing a record.
+  typedef SmallVector<uint64_t, 64> RecordVector;
+
+  NaClBitcodeRecord(unsigned BlockID, NaClBitstreamCursor &Cursor)
+      : BlockID(BlockID),
+        Cursor(Cursor),
+        StartBit(Cursor.GetCurrentBitNo()) {
+  }
+
+  /// Print the contents out to the given stream (for debugging).
+  void Print(raw_ostream& os) const;
+
+  /// Returns the bitstream reader being used.
+  NaClBitstreamReader &GetReader() const {
+    return *Cursor.getBitStreamReader();
+  }
+
+  /// Returns the cursor position within the bitstream.
+  NaClBitstreamCursor &GetCursor() const {
+    return Cursor;
+  }
+
+  /// Returns the block ID of the record.
+  unsigned GetBlockID() const {
+    return BlockID;
+  }
+
+  /// Returns the kind of entry read from the input stream.
+  unsigned GetEntryKind() const {
+    return Entry.Kind;
+  }
+
+  /// Returns the code value (i.e. selector) associated with the
+  /// record.
+  unsigned GetCode() const {
+    return Code;
+  }
+
+  /// Returns the EntryID (e.g. abbreviation if !=
+  /// naclbitc::UNABBREV_RECORD) associated with the record. Note:
+  /// for block-enter, block-exit, and define-abbreviation, EntryID is
+  /// not the corresponding abbreviation.
+  unsigned GetEntryID() const {
+    return Entry.ID;
+  }
+
+  /// Returns the (value) record associated with the read record.
+  const RecordVector &GetValues() const {
+    return Values;
+  }
+  RecordVector &GetValues() {
+    return Values;
+  }
+
+  /// Returns the number of bits in this record.
+  unsigned GetNumBits() const {
+    return GetCursor().GetCurrentBitNo() - StartBit;
+  }
+
+  /// Returns true if the record was read using an abbreviation.
+  bool UsedAnAbbreviation() const {
+    return GetEntryKind() == NaClBitstreamEntry::Record &&
+        GetEntryID() != naclbitc::UNABBREV_RECORD;
+  }
+
+  /// Returns the abbrevation index used to read the record.
+  /// Returns naclbitc::UNABBREV_RECORD if not applicable.
+  unsigned GetAbbreviationIndex() const {
+    return UsedAnAbbreviation()
+        ? GetEntryID() : static_cast<unsigned>(naclbitc::UNABBREV_RECORD);
+  }
+
+protected:
+  // The block ID associated with this record.
+  unsigned BlockID;
+  // The bitstream cursor defining location within the bitcode file.
+  NaClBitstreamCursor &Cursor;
+  // The entry ID associated with the record.
+  unsigned EntryID;
+  // The selector code associated with the record.
+  unsigned Code;
+  // The sequence of values defining the parsed record.
+  RecordVector Values;
+  // The entry (i.e. value(s) preceding the record that define what
+  // value comes next).
+  NaClBitstreamEntry Entry;
+  // Start bit for the record.
+  uint64_t StartBit;
+
+  /// Returns the position of the start bit for this record.
+  unsigned GetStartBit() const {
+    return StartBit;
+  }
+
+private:
+  // Allows class NaClBitcodeParser to read values into the
+  // record, thereby hiding the details of how to read values.
+  friend class NaClBitcodeParser;
+
+  /// Read bitstream entry. Defines what construct appears next in the
+  /// bitstream.
+  void ReadEntry() {
+    StartBit = GetCursor().GetCurrentBitNo();
+    Entry = GetCursor().advance(NaClBitstreamCursor::AF_DontAutoprocessAbbrevs);
+  }
+
+  /// Reads in a record's values, if the entry defines a record (Must
+  /// be called after ReadEntry).
+  void ReadValues() {
+    Values.clear();
+    Code = GetCursor().readRecord(Entry.ID, Values);
+  }
+
+  NaClBitcodeRecord(const NaClBitcodeRecord &Rcd) LLVM_DELETED_FUNCTION;
+  void operator=(const NaClBitcodeRecord &Rcd) LLVM_DELETED_FUNCTION;
+};
+
+/// Parses a block in the PNaCL bitcode stream.
+class NaClBitcodeParser {
+public:
+
+  // Creates a parser to parse the the block at the given cursor in
+  // the PNaCl bitcode stream. This instance is a "dummy" instance
+  // that starts the parser.
+  explicit NaClBitcodeParser(NaClBitstreamCursor &Cursor)
+      : EnclosingParser(0),
+        Record(ILLEGAL_BLOCK_ID, Cursor),
+        StartBit(Cursor.GetCurrentBitNo()) {
+    BlockStart = StartBit;
+  }
+
+  virtual ~NaClBitcodeParser();
+
+  /// Reads the (top-level) block associated with the given block
+  /// record at the stream cursor. Returns true if unable to parse.
+  /// Can be called multiple times to parse multiple blocks.
+  bool Parse();
+
+  // Called once the bitstream reader has entered the corresponding
+  // subblock.  Argument NumWords is set to the number of words in the
+  // corresponding subblock.
+  virtual void EnterBlock(unsigned NumWords) {}
+
+  // Called when the corresponding EndBlock of the block being parsed
+  // is found.
+  virtual void ExitBlock() {}
+
+  // Called before a BlockInfo block is parsed. Note: BlockInfo blocks
+  // are special. They include abbreviations to be used for blocks.
+  // After this routine is called, the NaClBitstreamParser is called
+  // to parse the BlockInfo block (rather than making a call to
+  // Parser->Parse()).
+  virtual void EnterBlockInfo() {}
+
+  // Called after a BlockInfo block is parsed.
+  virtual void ExitBlockInfo() { ExitBlock(); }
+
+  // Called after each record (within the block) is read (into field Record).
+  virtual void ProcessRecord() {}
+
+  // Called if a block-specific abbreviation is read (into field
+  // Record), after processing by the bitstream reader.
+  virtual void ProcessRecordAbbrev() {}
+
+  // Creates an instance of the NaClBitcodeParser to use to parse the
+  // block with the given block ID, and then call's method
+  // ParseThisBlock() to parse the corresponding block. Note:
+  // Each derived class should define it's own version of this
+  // method, following the pattern below.
+  virtual bool ParseBlock(unsigned BlockID) {
+    // Default implementation just builds a parser that does nothing.
+    NaClBitcodeParser Parser(BlockID, this);
+    return Parser.ParseThisBlock();
+  }
+
+  // Called when error occurs. Message is the error to report. Always
+  // returns true (the error return value of Parse).
+  virtual bool Error(const std::string &Message) {
+    errs() << "Error: " << Message << "\n";
+    return true;
+  }
+
+  // Returns the number of bits in this block.
+  unsigned GetNumBits() {
+    return Record.GetCursor().GetCurrentBitNo() - StartBit;
+  }
+
+  // Returns the number of bits in this block, but not subblocks
+  // within this block.
+  unsigned GetLocalNumBits() {
+    return Record.GetCursor().GetCurrentBitNo() - BlockStart;
+  }
+
+  /// Returns the block ID associated with the Parser.
+  unsigned GetBlockID() {
+    return Record.GetBlockID();
+  }
+
+  /// Returns the enclosing block parser of this block.
+  NaClBitcodeParser *GetEnclosingParser() const {
+    // Note: The top-level parser instance is a dummy instance
+    // and is not considered an enclosing parser.
+    return EnclosingParser->EnclosingParser ? EnclosingParser : 0;
+  }
+
+  // Parses the block using the parser defined by
+  // ParseBlock(unsigned).  Returns true if unable to parse the
+  // block. Note: Should only be called by virtual
+  // ParseBlock(unsigned).
+  bool ParseThisBlock();
+
+protected:
+  // The containing parser.
+  NaClBitcodeParser *EnclosingParser;
+
+  // The current record (within the block) being processed.
+  NaClBitcodeRecord Record;
+
+  // Creates a block parser to parse the block associated with the
+  // bitcode entry that defines the beginning of a block. This
+  // instance actually parses the corresponding block.
+  NaClBitcodeParser(unsigned BlockID,
+                    NaClBitcodeParser *EnclosingParser)
+      : EnclosingParser(EnclosingParser),
+        Record(BlockID, EnclosingParser->Record.GetCursor()),
+        StartBit(EnclosingParser->Record.GetStartBit()) {
+    BlockStart = StartBit;
+  }
+
+private:
+  // Special constant identifying the top-level instance.
+  static const unsigned ILLEGAL_BLOCK_ID = UINT_MAX;
+
+  // The start bit of the block.
+  unsigned StartBit;
+  // The start bit of the block, plus the bits in all subblocks.  Used
+  // to compute the number of (block local) bits.
+  unsigned BlockStart;
+
+  // Updates BlockStart in the enclosingblock, so that bits in this
+  // block are not counted as local bits for the enclosing block.
+  void RemoveBlockBitsFromEnclosingBlock() {
+    EnclosingParser->BlockStart += GetNumBits();
+  }
+
+  void operator=(const NaClBitcodeParser &Parser) LLVM_DELETED_FUNCTION;
+  NaClBitcodeParser(const NaClBitcodeParser &Parser) LLVM_DELETED_FUNCTION;
+};
+
+}  // namespace llvm
+
+#endif
diff --git a/include/llvm/Bitcode/NaCl/NaClBitcodeRecordBitsDist.h b/include/llvm/Bitcode/NaCl/NaClBitcodeRecordBitsDist.h
new file mode 100644
index 000000000000..7c08d9c85508
--- /dev/null
+++ b/include/llvm/Bitcode/NaCl/NaClBitcodeRecordBitsDist.h
@@ -0,0 +1,122 @@
+//===- NaClBitcodeRecordBitsDist.h -----------------------------*- C++ -*-===//
+//     Maps distributions of values and corresponding number of
+//     bits in PNaCl bitcode records.
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Creates a (nestable) distribution map of values, and the correspdonding
+// bits, in PNaCl bitcode records. These distributions are built directly
+// on top of NaClBitcodeRecordDist and NaClBitcodeRecordDistElement classes.
+// See (included) file NaClBitcodeRecordDist.h for more details on these
+// classes, and how you should use them.
+
+#ifndef LLVM_BITCODE_NACL_NACLBITCODERECORDBITSDIST_H
+#define LLVM_BITCODE_NACL_NACLBITCODERECORDBITSDIST_H
+
+#include "llvm/Bitcode/NaCl/NaClBitcodeRecordDist.h"
+
+namespace llvm {
+
+/// Defines the element type of a PNaCl bitcode distribution map when
+/// we want to count both the number of instances, and the number of
+/// bits used by each record. Also tracks the number to times an
+/// abbreviation was used to parse the corresponding record.
+class NaClBitcodeRecordBitsDistElement : public NaClBitcodeRecordDistElement {
+  NaClBitcodeRecordBitsDistElement(const NaClBitcodeRecordBitsDistElement&)
+      LLVM_DELETED_FUNCTION;
+  void operator=(const NaClBitcodeRecordBitsDistElement&)
+      LLVM_DELETED_FUNCTION;
+
+public:
+  // Create an element with no instances.
+  NaClBitcodeRecordBitsDistElement(
+      NaClBitcodeRecordDist<NaClBitcodeRecordDistElement>* NestedDist)
+      : NaClBitcodeRecordDistElement(NestedDist), TotalBits(0), NumAbbrevs(0)
+  {}
+
+  virtual ~NaClBitcodeRecordBitsDistElement() {}
+
+  virtual void Add(const NaClBitcodeRecord &Record) {
+    NaClBitcodeRecordDistElement::Add(Record);
+    TotalBits += Record.GetNumBits();
+    if (Record.UsedAnAbbreviation()) {
+      ++NumAbbrevs;
+    }
+  }
+
+  // Returns the total number of bits used to represent all instances
+  // of this value.
+  uint64_t GetTotalBits() const {
+    return TotalBits;
+  }
+
+  // Returns the number of times an abbreviation was used to represent
+  // the value.
+  unsigned GetNumAbbrevs() const {
+    return NumAbbrevs;
+  }
+
+private:
+  // Number of bits used to represent all instances of the value.
+  uint64_t TotalBits;
+  // Number of times an abbreviation is used for the value.
+  unsigned NumAbbrevs;
+};
+
+/// Defines a PNaCl bitcode distribution map when we want to count
+/// both the number of instances, and the number of bits used by each
+/// record.
+///
+/// ElementType is assumed to be a derived class of
+/// NaClBitcodeRecordBitsDistElement.
+template<class ElementType>
+class NaClBitcodeRecordBitsDist : public NaClBitcodeRecordDist<ElementType> {
+  NaClBitcodeRecordBitsDist(const NaClBitcodeRecordBitsDist&)
+      LLVM_DELETED_FUNCTION;
+  void operator=(const NaClBitcodeRecordBitsDist&)
+      LLVM_DELETED_FUNCTION;
+
+public:
+  NaClBitcodeRecordBitsDist()
+      : NaClBitcodeRecordDist<ElementType>() {}
+
+  virtual ~NaClBitcodeRecordBitsDist() {}
+
+protected:
+  virtual void PrintRowStats(raw_ostream &Stream,
+                             std::string Indent,
+                             NaClBitcodeRecordDistValue Value) const {
+
+    ElementType *Element = this->at(Value);
+    Stream << Indent
+           << format("%7d %6.2f %9lu ",
+                     Element->GetNumInstances(),
+                     (double) Element->GetNumInstances()/
+                     this->GetTotal()*100.0,
+                     (unsigned long) Element->GetTotalBits())
+           << format("%9.2f",
+                     (double) Element->GetTotalBits()/
+                     Element->GetNumInstances());
+    if (Element->GetNumAbbrevs())
+      Stream << format(" %7.2f  ",
+                       (double) Element->GetNumAbbrevs()/
+                       Element->GetNumInstances()*100.0);
+    else
+      Stream << "          ";
+  }
+
+  virtual void PrintHeader(raw_ostream &Stream, std::string Indent) const {
+    Stream << Indent
+           << "  Count %Total    # Bits Bits/Elmt   % Abv  "
+           << this->GetValueHeader() << "\n";
+  }
+};
+
+}
+
+#endif
diff --git a/include/llvm/Bitcode/NaCl/NaClBitcodeRecordDist.h b/include/llvm/Bitcode/NaCl/NaClBitcodeRecordDist.h
new file mode 100644
index 000000000000..42b3e2384b6f
--- /dev/null
+++ b/include/llvm/Bitcode/NaCl/NaClBitcodeRecordDist.h
@@ -0,0 +1,431 @@
+//===- NaClBitcodeRecordDist.h -----------------------------------*- C++ -*-===//
+//     Maps distributions of values in PNaCl bitcode records.
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Creates a (nestable) distribution map of values in PNaCl bitcode
+// records. The domain of these maps is the set of record values being
+// tracked. The range is the information associated with each record
+// value, including the number of instances of that value. The
+// distribution map is nested if the range element contains another
+// distribution map.
+//
+// The goal of distribution maps is to build a (histogram)
+// distribution of values in bitcode records, of a PNaCl bitcode
+// file. From appropriately built distribution maps, one can infer
+// possible new abbreviations that can be used in the PNaCl bitcode
+// file.  Hence, one of the primary goals of distribution maps is to
+// support tools pnacl-bcanalyzer and pnacl-bccompress.
+//
+// To make the API uniform, distribution maps are updated using
+// NaClBitcodeRecords (in NaClBitcodeParser.h). The values from the
+// record are defined by the extract method GetValueList, and added
+// via Method Add.  This same API makes handling of nested
+// distribution maps easy by allowing the nested map to extract the
+// values it needs, for the distribution it is modeling, independent
+// of the distribution map it appears in.
+//
+// In addition, there is the virtual method CreateElement that
+// creates a new range element in the distribution map. This
+// allows the distribution map to do two things:
+//
+// 1) Add any additional information needed by the element, based
+//    on the distribution map.
+//
+// 2) Add a nested distribution map to the created element if
+// appropriate.
+//
+// Distribution maps are sortable (via method GetDistribution). The
+// purpose of sorting is to find interesting elements. This is done by
+// sorting the values in the domain of the distribution map, based on
+// the GetImportance method of the range element.
+//
+// Method GetImportance defines how (potentially) interesting the
+// value is in the distribution. "Interesting" is based on the notion
+// of how likely will the value show a case where adding an
+// abbreviation will shrink the size of the corresponding bitcode
+// file. For most distributions, the number of instances associated
+// with the value is the best measure.
+//
+// However, for cases where multiple domain entries are created for
+// the same NaClBitcodeRecord (i.e. method GetValueList defines more
+// than one value), things are not so simple.
+
+// However, for some distributions (such as value index distributions)
+// the numbers of instances isn't sufficient. In such cases, you may
+// have to look at nested distributions to find important cases.
+//
+// In the case of value index distributions, when the size of the
+// records is the same, all value indices have the same number of
+// instances.  In this case, "interesting" may be measured in terms of
+// the (nested) distribution of the values that can appear at that
+// index, and how often each value appears.
+//
+// The larger the importance, the more interesting the value is
+// considered, and sorting is based on moving interesting values to
+// the front of the sorted list.
+//
+// When printing distribution maps, the values are sorted based on
+// the importance. By default, importance is based on the number of
+// times the value appears in records, putting the most used values
+// at the top of the printed list.
+//
+// Since sorting is expensive, the sorted distribution is built once
+// and cached.  This cache is flushed whenever the distribution map is
+// updated, so that a new sorted distribuition will be generated.
+//
+// Printing of distribution maps are stylized, so that virtuals can
+// easily fill in the necessary data. Each distribution map (nested
+// or top-level) has a title, that is retrieved from method GetTitle,
+// and is printed first.
+//
+// Then, a header (showing what each column in the printed histogram
+// includes) is printed. This header is generated by method
+// PrintHeader.  In addition, the (domain) value of the histogram is
+// always printed as the last element in a row, and the header
+// descriptor for this value is provided by method GetValueHeader.
+//
+// After the header, rows of the (sorted) distribution map are
+// printed.  Each row contains a value and a sequence of statistics
+// based on the corresponding range element. To allow the printing of
+// (optional) nested distributions, The statistics are printed first,
+// followed by the value. Method PrintRowStats prints the statistics
+// of the range element, and PrintRowValue prints the corresponding
+// (domain) value. Unless PrintRowValue is overridden, this method
+// uses a format string that will right justify the value based on the
+// length of the header name (the value returned by GetValueHeader).
+//
+// If the range element contains a nested distribution map, it is then
+// printed below that row, indented further than the current
+// distribution map.
+//
+// Distribution maps are implemented as subclasses of the class
+// NaClBitcodeRecordDist, whose domain type is
+// NaClBitcodeRecordDistValue, and range elements are subclasses of
+// the class NaClBitcodeRecordDistElement.
+//
+// Simple (non-nested) distribution maps must, at a minimum define
+// method GetValueList, to extract values out of the bitcode record.
+// In addition, only if the range element needs a non-default
+// constructor, one must override the method CreateElement to call the
+// appropriate constructor with the appropriate arguments.
+//
+// Nested distribution maps are created by defining a derived class of
+// another distribution map. This derived class must implement
+// CreateNestedDistributionMap, which returns the corresponding
+// (dynamically) allocated nested distribution map to be associated
+// with the element created by CreateElement.
+
+#ifndef LLVM_BITCODE_NACL_NACLBITCODERECORDDIST_H
+#define LLVM_BITCODE_NACL_NACLBITCODERECORDDIST_H
+
+#include "llvm/Bitcode/NaCl/NaClBitcodeParser.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <map>
+#include <vector>
+
+namespace llvm {
+
+/// The domain type of PNaCl bitcode record distribution maps.
+typedef uint64_t NaClBitcodeRecordDistValue;
+
+/// Base class of the range type of PNaCl bitcode record distribution
+/// maps.
+class NaClBitcodeRecordDistElement;
+
+/// Type defining the list of values extracted from the corresponding
+/// bitcode record. Typically, the list size is one. However, there
+/// are cases where a record defines more than one value (i.e. value
+/// indices). Hence, this type defines the more generic API for
+/// values.
+typedef std::vector<NaClBitcodeRecordDistValue> ValueListType;
+
+typedef ValueListType::const_iterator ValueListIterator;
+
+/// Defines a PNaCl bitcode record distribution map. The distribution
+/// map is a map from a (record) value, to the corresponding data
+/// associated with that value.
+///
+/// ElementType is assumed to be a derived class of
+/// NaClBitcodeRecordDistElement.
+template<class ElementType>
+class NaClBitcodeRecordDist {
+  NaClBitcodeRecordDist(const NaClBitcodeRecordDist<ElementType>&)
+      LLVM_DELETED_FUNCTION;
+  void operator=(const NaClBitcodeRecordDist<ElementType>&)
+      LLVM_DELETED_FUNCTION;
+
+public:
+  /// Type defining the mapping used to define the distribution.
+  typedef typename
+  std::map<NaClBitcodeRecordDistValue, ElementType*> MappedElement;
+
+  typedef typename MappedElement::const_iterator const_iterator;
+
+  /// Type defining a pair of values used to sort the
+  /// distribution. The first element is defined by method
+  /// GetImportance, and the second is the distribution value
+  /// associated with that importance.
+  typedef std::pair<double, NaClBitcodeRecordDistValue> DistPair;
+
+  /// Type defining the sorted list of (domain) values in the
+  /// corresponding distribution map.
+  typedef std::vector<DistPair> Distribution;
+
+  NaClBitcodeRecordDist()
+      : TableMap(), CachedDistribution(0), Total(0) {
+  }
+
+  virtual ~NaClBitcodeRecordDist() {
+    RemoveCachedDistribution();
+    for (const_iterator Iter = begin(), IterEnd = end();
+         Iter != IterEnd; ++Iter) {
+      delete Iter->second;
+    }
+  }
+
+  /// Number of elements in the distribution map.
+  size_t size() const {
+    return TableMap.size();
+  }
+
+  /// Iterator at beginning of distribution map.
+  const_iterator begin() const {
+    return TableMap.begin();
+  }
+
+  /// Iterator at end of distribution map.
+  const_iterator end() const {
+    return TableMap.end();
+  }
+
+  /// Returns true if the distribution map is empty.
+  bool empty() const {
+    return TableMap.empty();
+  }
+
+  /// Returns the element associated with the given distribution
+  /// value.  Creates the element if needed.
+  ElementType *GetElement(NaClBitcodeRecordDistValue Value) {
+    if (TableMap.find(Value) == TableMap.end()) {
+      TableMap[Value] = CreateElement(Value);
+    }
+    return TableMap[Value];
+  }
+
+  /// Returns the element associated with the given distribution
+  /// value.
+  ElementType *at(NaClBitcodeRecordDistValue Value) const {
+    return TableMap.at(Value);
+  }
+
+  /// Returns the total number of instances held in the distribution
+  /// map.
+  unsigned GetTotal() const {
+    return Total;
+  }
+
+  /// Adds the value(s) in the given bitcode record to the
+  /// distribution map.  The value(s) based on method GetValueList.
+  virtual void Add(const NaClBitcodeRecord &Record) {
+    ValueListType ValueList;
+    this->GetValueList(Record, ValueList);
+    if (!ValueList.empty()) {
+      RemoveCachedDistribution();
+      ++Total;
+      for (ValueListIterator
+               Iter = ValueList.begin(),
+               IterEnd = ValueList.end();
+           Iter != IterEnd; ++Iter) {
+        GetElement(*Iter)->Add(Record);
+      }
+    }
+  }
+
+  /// Builds the distribution associated with the distribution map.
+  /// Warning: The distribution is cached, and hence, only valid while
+  /// it's contents is not changed.
+  Distribution *GetDistribution() const {
+    if (CachedDistribution == 0) Sort();
+    return CachedDistribution;
+  }
+
+  /// Prints out the contents of the distribution map to Stream.
+  void Print(raw_ostream &Stream, std::string Indent="") const {
+    Distribution *Dist = this->GetDistribution();
+    PrintTitle(Stream, Indent);
+    PrintHeader(Stream, Indent);
+    for (size_t I = 0, E = Dist->size(); I != E; ++I) {
+      const DistPair &Pair = Dist->at(I);
+      PrintRow(Stream, Indent, Pair.second);
+    }
+  }
+
+protected:
+  /// Creates a distribution element for the given value.
+  virtual ElementType *CreateElement(NaClBitcodeRecordDistValue Value) {
+    return new ElementType(CreateNestedDistributionMap());
+  }
+
+  /// Returns the (optional) nested distribution map to be associated
+  // with the element. Returning 0 implies that no nested distribution map
+  // will be added to the element.
+  virtual NaClBitcodeRecordDist<NaClBitcodeRecordDistElement>*
+  CreateNestedDistributionMap() {
+    return 0;
+  }
+
+  /// If the distribution is cached, remove it. Should be called
+  /// whenever the distribution map is changed.
+  void RemoveCachedDistribution() const {
+    if (CachedDistribution) {
+      delete CachedDistribution;
+      CachedDistribution = 0;
+    }
+  }
+
+  /// Interrogates the block record, and returns the corresponding
+  /// values that are being tracked by the distribution map. Must be
+  /// defined in derived classes.
+  virtual void GetValueList(const NaClBitcodeRecord &Record,
+                            ValueListType &ValueList) const = 0;
+
+  /// Returns the title to use when printing the distribution map.
+  virtual const char *GetTitle() const {
+    return "Distribution";
+  }
+
+  /// Returns the header to use when printing the value in the
+  /// distribution map.
+  virtual const char *GetValueHeader() const {
+    return "Value";
+  }
+
+  /// Prints out the title of the distribution map.
+  virtual void PrintTitle(raw_ostream &Stream, std::string Indent) const {
+    Stream << Indent << GetTitle() << " (" << size() << " elements):\n\n";
+  }
+
+  /// Prints out statistics for the row with the given value.
+  virtual void PrintRowStats(raw_ostream &Stream,
+                             std::string Indent,
+                             NaClBitcodeRecordDistValue Value) const {
+    Stream << Indent << format("%7d ", at(Value)->GetNumInstances()) << "    ";
+  }
+
+  /// Prints out Value (in a row) to Stream. If the element contains a
+  /// nested distribution, that nested distribution will use the given
+  /// Indent for this distribution to properly indent the nested
+  /// distribution.
+  virtual void PrintRowValue(raw_ostream &Stream,
+                             std::string Indent,
+                             NaClBitcodeRecordDistValue Value) const {
+    std::string ValueFormat;
+    raw_string_ostream StrStream(ValueFormat);
+    StrStream << "%" << strlen(GetValueHeader()) << "d";
+    StrStream.flush();
+    Stream << format(ValueFormat.c_str(), (int) Value);
+    // TODO(kschimpf): Print nested distribution here if applicable.
+    // Note: Indent would be used in this context.
+  }
+
+  // Prints out the header to the printed distribution map.
+  virtual void PrintHeader(raw_ostream &Stream, std::string Indent) const {
+    Stream << Indent << "  Count     " << GetValueHeader() << "\n";
+  }
+
+  // Prints out a row in the printed distribution map.
+  virtual void PrintRow(raw_ostream &Stream,
+                        std::string Indent,
+                        NaClBitcodeRecordDistValue Value) const {
+    PrintRowStats(Stream, Indent, Value);
+    PrintRowValue(Stream, Indent, Value);
+    Stream << "\n";
+  }
+
+  /// Sorts the distribution, based on the importance of each element.
+  void Sort() const {
+    RemoveCachedDistribution();
+    CachedDistribution = new Distribution();
+    for (const_iterator Iter = begin(), IterEnd = end();
+         Iter != IterEnd; ++Iter) {
+      const ElementType *Elmt = Iter->second;
+      if (double Importance = Elmt->GetImportance())
+        CachedDistribution->push_back(std::make_pair(Importance, Iter->first));
+    }
+    // Sort in ascending order, based on importance.
+    std::stable_sort(CachedDistribution->begin(),
+                     CachedDistribution->end());
+    // Reverse so most important appear first.
+    std::reverse(CachedDistribution->begin(),
+                 CachedDistribution->end());
+  }
+
+private:
+  // Map from the distribution value to the corresponding distribution
+  // element.
+  MappedElement TableMap;
+  // Pointer to the cached distribution.
+  mutable Distribution *CachedDistribution;
+  // The total number of instances in the map.
+  unsigned Total;
+};
+
+/// Defines the element type of a PNaCl bitcode distribution map.
+/// This is the base class for all element types used in
+/// NaClBitcodeRecordDist.  By default, only the number of instances
+/// of the corresponding distribution values is recorded.
+class NaClBitcodeRecordDistElement {
+  NaClBitcodeRecordDistElement(const NaClBitcodeRecordDistElement &)
+      LLVM_DELETED_FUNCTION;
+  void operator=(const NaClBitcodeRecordDistElement &)
+      LLVM_DELETED_FUNCTION;
+
+public:
+  // Create an element with no instances.
+  NaClBitcodeRecordDistElement(
+      NaClBitcodeRecordDist<NaClBitcodeRecordDistElement>* NestedDist)
+      : NestedDist(NestedDist), NumInstances(0)
+  {}
+
+  virtual ~NaClBitcodeRecordDistElement() {
+    delete NestedDist;
+  }
+
+  // Adds an instance of the given record to this instance.
+  virtual void Add(const NaClBitcodeRecord &Record) {
+    if (NestedDist) NestedDist->Add(Record);
+    ++NumInstances;
+  }
+
+  // Returns the number of instances associated with this element.
+  unsigned GetNumInstances() const {
+    return NumInstances;
+  }
+
+  // Returns the importance of this element, and the number of
+  // instances associated with it. Used to sort the distribution map,
+  // where values with larger importance appear first.
+  virtual double GetImportance() const {
+    return static_cast<double>(NumInstances);
+  }
+
+protected:
+  // The (optional) nested distribution.
+  NaClBitcodeRecordDist<NaClBitcodeRecordDistElement> *NestedDist;
+
+private:
+  // The number of instances associated with this element.
+  unsigned NumInstances;
+};
+
+}
+
+#endif
diff --git a/include/llvm/Bitcode/NaCl/NaClBitcodeWriterPass.h b/include/llvm/Bitcode/NaCl/NaClBitcodeWriterPass.h
new file mode 100644
index 000000000000..33f1990dde6b
--- /dev/null
+++ b/include/llvm/Bitcode/NaCl/NaClBitcodeWriterPass.h
@@ -0,0 +1,51 @@
+//===-- NaClBitcodeWriterPass.h - Bitcode writing pass ----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file provides a NaCl format bitcode writing pass.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_NACLBITCODE_WRITER_PASS_H
+#define LLVM_BITCODE_NACLBITCODE_WRITER_PASS_H
+
+#include "llvm/ADT/StringRef.h"
+
+namespace llvm {
+class Module;
+class ModulePass;
+class raw_ostream;
+class PreservedAnalyses;
+
+/// \brief Create and return a pass that writes the module to the specified
+/// ostream. Note that this pass is designed for use with the legacy pass
+/// manager.
+ModulePass *createNaClBitcodeWriterPass(raw_ostream &Str);
+
+/// \brief Pass for writing a module of IR out to a bitcode file.
+///
+/// Note that this is intended for use with the new pass manager. To construct
+/// a pass for the legacy pass manager, use the function above.
+class NaClBitcodeWriterPass {
+  raw_ostream &OS;
+
+public:
+  /// \brief Construct a bitcode writer pass around a particular output stream.
+  explicit NaClBitcodeWriterPass(raw_ostream &OS) : OS(OS) {}
+
+  /// \brief Run the bitcode writer pass, and output the module to the selected
+  /// output stream.
+  PreservedAnalyses run(Module *M);
+
+  static StringRef name() { return "NaClBitcodeWriterPass"; }
+};
+
+}
+
+#endif
diff --git a/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h b/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h
new file mode 100644
index 000000000000..86324dc0d8ec
--- /dev/null
+++ b/include/llvm/Bitcode/NaCl/NaClBitstreamReader.h
@@ -0,0 +1,562 @@
+//===- NaClBitstreamReader.h -----------------------------------*- C++ -*-===//
+//     Low-level bitstream reader interface
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the BitstreamReader class.  This class can be used to
+// read an arbitrary bitstream, regardless of its contents.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_NACL_NACLBITSTREAMREADER_H
+#define LLVM_BITCODE_NACL_NACLBITSTREAMREADER_H
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Bitcode/NaCl/NaClLLVMBitCodes.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/StreamableMemoryObject.h"
+#include <climits>
+#include <vector>
+
+namespace llvm {
+
+  class Deserializer;
+
+/// NaClBitstreamReader - This class is used to read from a NaCl
+/// bitcode wire format stream, maintaining information that is global
+/// to decoding the entire file.  While a file is being read, multiple
+/// cursors can be independently advanced or skipped around within the
+/// file.  These are represented by the NaClBitstreamCursor class.
+class NaClBitstreamReader {
+public:
+  /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
+  /// These describe abbreviations that all blocks of the specified ID inherit.
+  struct BlockInfo {
+    unsigned BlockID;
+    std::vector<NaClBitCodeAbbrev*> Abbrevs;
+  };
+private:
+  OwningPtr<StreamableMemoryObject> BitcodeBytes;
+
+  std::vector<BlockInfo> BlockInfoRecords;
+
+  /// \brief Holds the offset of the first byte after the header.
+  size_t InitialAddress;
+
+  NaClBitstreamReader(const NaClBitstreamReader&) LLVM_DELETED_FUNCTION;
+  void operator=(const NaClBitstreamReader&) LLVM_DELETED_FUNCTION;
+public:
+  NaClBitstreamReader() : InitialAddress(0) {}
+
+  NaClBitstreamReader(const unsigned char *Start, const unsigned char *End) {
+    InitialAddress = 0;
+    init(Start, End);
+  }
+
+  NaClBitstreamReader(StreamableMemoryObject *Bytes,
+                      size_t MyInitialAddress=0)
+      : InitialAddress(MyInitialAddress)
+  {
+    BitcodeBytes.reset(Bytes);
+  }
+
+  void init(const unsigned char *Start, const unsigned char *End) {
+    assert(((End-Start) & 3) == 0 &&"Bitcode stream not a multiple of 4 bytes");
+    BitcodeBytes.reset(getNonStreamedMemoryObject(Start, End));
+  }
+
+  StreamableMemoryObject &getBitcodeBytes() { return *BitcodeBytes; }
+
+  ~NaClBitstreamReader() {
+    // Free the BlockInfoRecords.
+    while (!BlockInfoRecords.empty()) {
+      BlockInfo &Info = BlockInfoRecords.back();
+      // Free blockinfo abbrev info.
+      for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size());
+           i != e; ++i)
+        Info.Abbrevs[i]->dropRef();
+      BlockInfoRecords.pop_back();
+    }
+  }
+
+  /// \brief Returns the initial address (after the header) of the input stream.
+  size_t getInitialAddress() const {
+    return InitialAddress;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Block Manipulation
+  //===--------------------------------------------------------------------===//
+
+  /// hasBlockInfoRecords - Return true if we've already read and processed the
+  /// block info block for this Bitstream.  We only process it for the first
+  /// cursor that walks over it.
+  bool hasBlockInfoRecords() const { return !BlockInfoRecords.empty(); }
+
+  /// Gets the set of blocks defined in the block info records structure.
+  void GetBlockInfoBlockIDs(SmallVectorImpl<unsigned> &Out) {
+    for (size_t i = 0, e = BlockInfoRecords.size(); i != e; ++i) {
+      Out.push_back(BlockInfoRecords[i].BlockID);
+    }
+  }
+
+  /// getBlockInfo - If there is block info for the specified ID, return it,
+  /// otherwise return null.
+  const BlockInfo *getBlockInfo(unsigned BlockID) const {
+    // Common case, the most recent entry matches BlockID.
+    if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
+      return &BlockInfoRecords.back();
+
+    for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
+         i != e; ++i)
+      if (BlockInfoRecords[i].BlockID == BlockID)
+        return &BlockInfoRecords[i];
+    return 0;
+  }
+
+  BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
+    if (const BlockInfo *BI = getBlockInfo(BlockID))
+      return *const_cast<BlockInfo*>(BI);
+
+    // Otherwise, add a new record.
+    BlockInfoRecords.push_back(BlockInfo());
+    BlockInfoRecords.back().BlockID = BlockID;
+    return BlockInfoRecords.back();
+  }
+};
+
+  
+/// NaClBitstreamEntry - When advancing through a bitstream cursor,
+/// each advance can discover a few different kinds of entries:
+///   Error    - Malformed bitcode was found.
+///   EndBlock - We've reached the end of the current block, (or the end of the
+///              file, which is treated like a series of EndBlock records.
+///   SubBlock - This is the start of a new subblock of a specific ID.
+///   Record   - This is a record with a specific AbbrevID.
+///
+struct NaClBitstreamEntry {
+  enum {
+    Error,
+    EndBlock,
+    SubBlock,
+    Record
+  } Kind;
+  
+  unsigned ID;
+
+  static NaClBitstreamEntry getError() {
+    NaClBitstreamEntry E; E.Kind = Error; return E;
+  }
+  static NaClBitstreamEntry getEndBlock() {
+    NaClBitstreamEntry E; E.Kind = EndBlock; return E;
+  }
+  static NaClBitstreamEntry getSubBlock(unsigned ID) {
+    NaClBitstreamEntry E; E.Kind = SubBlock; E.ID = ID; return E;
+  }
+  static NaClBitstreamEntry getRecord(unsigned AbbrevID) {
+    NaClBitstreamEntry E; E.Kind = Record; E.ID = AbbrevID; return E;
+  }
+};
+
+/// NaClBitstreamCursor - This represents a position within a bitcode
+/// file.  There may be multiple independent cursors reading within
+/// one bitstream, each maintaining their own local state.
+///
+/// Unlike iterators, NaClBitstreamCursors are heavy-weight objects
+/// that should not be passed by value.
+class NaClBitstreamCursor {
+  friend class Deserializer;
+  NaClBitstreamReader *BitStream;
+  size_t NextChar;
+
+  /// CurWord/word_t - This is the current data we have pulled from the stream
+  /// but have not returned to the client.  This is specifically and
+  /// intentionally defined to follow the word size of the host machine for
+  /// efficiency.  We use word_t in places that are aware of this to make it
+  /// perfectly explicit what is going on.
+  typedef uint32_t word_t;
+  word_t CurWord;
+
+  /// BitsInCurWord - This is the number of bits in CurWord that are valid. This
+  /// is always from [0...31/63] inclusive (depending on word size).
+  unsigned BitsInCurWord;
+
+  // CurCodeSize - This is the declared size of code values used for the current
+  // block, in bits.
+  NaClBitcodeSelectorAbbrev CurCodeSize;
+
+  /// CurAbbrevs - Abbrevs installed at in this block.
+  std::vector<NaClBitCodeAbbrev*> CurAbbrevs;
+
+  struct Block {
+    NaClBitcodeSelectorAbbrev PrevCodeSize;
+    std::vector<NaClBitCodeAbbrev*> PrevAbbrevs;
+    explicit Block() : PrevCodeSize() {}
+    explicit Block(const NaClBitcodeSelectorAbbrev& PCS)
+        : PrevCodeSize(PCS) {}
+  };
+
+  /// BlockScope - This tracks the codesize of parent blocks.
+  SmallVector<Block, 8> BlockScope;
+
+public:
+  NaClBitstreamCursor() : BitStream(0), NextChar(0) {
+  }
+  NaClBitstreamCursor(const NaClBitstreamCursor &RHS)
+      : BitStream(0), NextChar(0) {
+    operator=(RHS);
+  }
+
+  explicit NaClBitstreamCursor(NaClBitstreamReader &R) : BitStream(&R) {
+    NextChar = R.getInitialAddress();
+    CurWord = 0;
+    BitsInCurWord = 0;
+  }
+
+  void init(NaClBitstreamReader &R) {
+    freeState();
+
+    BitStream = &R;
+    NextChar = R.getInitialAddress();
+    CurWord = 0;
+    BitsInCurWord = 0;
+  }
+
+  ~NaClBitstreamCursor() {
+    freeState();
+  }
+
+  void operator=(const NaClBitstreamCursor &RHS);
+
+  void freeState();
+  
+  bool isEndPos(size_t pos) {
+    return BitStream->getBitcodeBytes().isObjectEnd(static_cast<uint64_t>(pos));
+  }
+
+  bool canSkipToPos(size_t pos) const {
+    // pos can be skipped to if it is a valid address or one byte past the end.
+    return pos == 0 || BitStream->getBitcodeBytes().isValidAddress(
+        static_cast<uint64_t>(pos - 1));
+  }
+
+  bool AtEndOfStream() {
+    return BitsInCurWord == 0 && isEndPos(NextChar);
+  }
+
+  /// getAbbrevIDWidth - Return the number of bits used to encode an abbrev #.
+  unsigned getAbbrevIDWidth() const { return CurCodeSize.NumBits; }
+
+  /// GetCurrentBitNo - Return the bit # of the bit we are reading.
+  uint64_t GetCurrentBitNo() const {
+    return NextChar*CHAR_BIT - BitsInCurWord;
+  }
+
+  NaClBitstreamReader *getBitStreamReader() {
+    return BitStream;
+  }
+  const NaClBitstreamReader *getBitStreamReader() const {
+    return BitStream;
+  }
+
+  /// Flags that modify the behavior of advance().
+  enum {
+    /// AF_DontPopBlockAtEnd - If this flag is used, the advance() method does
+    /// not automatically pop the block scope when the end of a block is
+    /// reached.
+    AF_DontPopBlockAtEnd = 1,
+
+    /// AF_DontAutoprocessAbbrevs - If this flag is used, abbrev entries are
+    /// returned just like normal records.
+    AF_DontAutoprocessAbbrevs = 2
+  };
+  
+  /// advance - Advance the current bitstream, returning the next entry in the
+  /// stream.
+  NaClBitstreamEntry advance(unsigned Flags = 0) {
+    while (1) {
+      unsigned Code = ReadCode();
+      if (Code == naclbitc::END_BLOCK) {
+        // Pop the end of the block unless Flags tells us not to.
+        if (!(Flags & AF_DontPopBlockAtEnd) && ReadBlockEnd())
+          return NaClBitstreamEntry::getError();
+        return NaClBitstreamEntry::getEndBlock();
+      }
+      
+      if (Code == naclbitc::ENTER_SUBBLOCK)
+        return NaClBitstreamEntry::getSubBlock(ReadSubBlockID());
+      
+      if (Code == naclbitc::DEFINE_ABBREV &&
+          !(Flags & AF_DontAutoprocessAbbrevs)) {
+        // We read and accumulate abbrev's, the client can't do anything with
+        // them anyway.
+        ReadAbbrevRecord();
+        continue;
+      }
+
+      return NaClBitstreamEntry::getRecord(Code);
+    }
+  }
+
+  /// advanceSkippingSubblocks - This is a convenience function for clients that
+  /// don't expect any subblocks.  This just skips over them automatically.
+  NaClBitstreamEntry advanceSkippingSubblocks(unsigned Flags = 0) {
+    while (1) {
+      // If we found a normal entry, return it.
+      NaClBitstreamEntry Entry = advance(Flags);
+      if (Entry.Kind != NaClBitstreamEntry::SubBlock)
+        return Entry;
+      
+      // If we found a sub-block, just skip over it and check the next entry.
+      if (SkipBlock())
+        return NaClBitstreamEntry::getError();
+    }
+  }
+
+  /// JumpToBit - Reset the stream to the specified bit number.
+  void JumpToBit(uint64_t BitNo) {
+    uintptr_t ByteNo = uintptr_t(BitNo/8) & ~(sizeof(word_t)-1);
+    unsigned WordBitNo = unsigned(BitNo & (sizeof(word_t)*8-1));
+    assert(canSkipToPos(ByteNo) && "Invalid location");
+
+    // Move the cursor to the right word.
+    NextChar = ByteNo;
+    BitsInCurWord = 0;
+    CurWord = 0;
+
+    // Skip over any bits that are already consumed.
+    if (WordBitNo) {
+      if (sizeof(word_t) > 4)
+        Read64(WordBitNo);
+      else
+        Read(WordBitNo);
+    }
+  }
+
+  uint32_t Read(unsigned NumBits) {
+    assert(NumBits && NumBits <= 32 &&
+           "Cannot return zero or more than 32 bits!");
+    
+    // If the field is fully contained by CurWord, return it quickly.
+    if (BitsInCurWord >= NumBits) {
+      uint32_t R = uint32_t(CurWord) & (~0U >> (32-NumBits));
+      CurWord >>= NumBits;
+      BitsInCurWord -= NumBits;
+      return R;
+    }
+
+    // If we run out of data, stop at the end of the stream.
+    if (isEndPos(NextChar)) {
+      CurWord = 0;
+      BitsInCurWord = 0;
+      return 0;
+    }
+
+    uint32_t R = uint32_t(CurWord);
+
+    // Read the next word from the stream.
+    uint8_t Array[sizeof(word_t)] = {0};
+    
+    BitStream->getBitcodeBytes().readBytes(NextChar, sizeof(Array),
+                                           Array);
+    
+    // Handle big-endian byte-swapping if necessary.
+    support::detail::packed_endian_specific_integral
+      <word_t, support::little, support::unaligned> EndianValue;
+    memcpy(&EndianValue, Array, sizeof(Array));
+    
+    CurWord = EndianValue;
+
+    NextChar += sizeof(word_t);
+
+    // Extract NumBits-BitsInCurWord from what we just read.
+    unsigned BitsLeft = NumBits-BitsInCurWord;
+
+    // Be careful here, BitsLeft is in the range [1..32]/[1..64] inclusive.
+    R |= uint32_t((CurWord & (word_t(~0ULL) >> (sizeof(word_t)*8-BitsLeft)))
+                    << BitsInCurWord);
+
+    // BitsLeft bits have just been used up from CurWord.  BitsLeft is in the
+    // range [1..32]/[1..64] so be careful how we shift.
+    if (BitsLeft != sizeof(word_t)*8)
+      CurWord >>= BitsLeft;
+    else
+      CurWord = 0;
+    BitsInCurWord = sizeof(word_t)*8-BitsLeft;
+    return R;
+  }
+
+  uint64_t Read64(unsigned NumBits) {
+    if (NumBits <= 32) return Read(NumBits);
+
+    uint64_t V = Read(32);
+    return V | (uint64_t)Read(NumBits-32) << 32;
+  }
+
+  uint32_t ReadVBR(unsigned NumBits) {
+    uint32_t Piece = Read(NumBits);
+    if ((Piece & (1U << (NumBits-1))) == 0)
+      return Piece;
+
+    uint32_t Result = 0;
+    unsigned NextBit = 0;
+    while (1) {
+      Result |= (Piece & ((1U << (NumBits-1))-1)) << NextBit;
+
+      if ((Piece & (1U << (NumBits-1))) == 0)
+        return Result;
+
+      NextBit += NumBits-1;
+      Piece = Read(NumBits);
+    }
+  }
+
+  // ReadVBR64 - Read a VBR that may have a value up to 64-bits in size.  The
+  // chunk size of the VBR must still be <= 32 bits though.
+  uint64_t ReadVBR64(unsigned NumBits) {
+    uint32_t Piece = Read(NumBits);
+    if ((Piece & (1U << (NumBits-1))) == 0)
+      return uint64_t(Piece);
+
+    uint64_t Result = 0;
+    unsigned NextBit = 0;
+    while (1) {
+      Result |= uint64_t(Piece & ((1U << (NumBits-1))-1)) << NextBit;
+
+      if ((Piece & (1U << (NumBits-1))) == 0)
+        return Result;
+
+      NextBit += NumBits-1;
+      Piece = Read(NumBits);
+    }
+  }
+
+private:
+  void SkipToFourByteBoundary() {
+    // If word_t is 64-bits and if we've read less than 32 bits, just dump
+    // the bits we have up to the next 32-bit boundary.
+    if (sizeof(word_t) > 4 &&
+        BitsInCurWord >= 32) {
+      CurWord >>= BitsInCurWord-32;
+      BitsInCurWord = 32;
+      return;
+    }
+    
+    BitsInCurWord = 0;
+    CurWord = 0;
+  }
+public:
+
+  unsigned ReadCode() {
+    return CurCodeSize.IsFixed
+        ? Read(CurCodeSize.NumBits)
+        : ReadVBR(CurCodeSize.NumBits);
+  }
+
+  // Block header:
+  //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
+
+  /// ReadSubBlockID - Having read the ENTER_SUBBLOCK code, read the BlockID for
+  /// the block.
+  unsigned ReadSubBlockID() {
+    return ReadVBR(naclbitc::BlockIDWidth);
+  }
+
+  /// SkipBlock - Having read the ENTER_SUBBLOCK abbrevid and a BlockID, skip
+  /// over the body of this block.  If the block record is malformed, return
+  /// true.
+  bool SkipBlock() {
+    // Read and ignore the codelen value.  Since we are skipping this block, we
+    // don't care what code widths are used inside of it.
+    ReadVBR(naclbitc::CodeLenWidth);
+    SkipToFourByteBoundary();
+    unsigned NumFourBytes = Read(naclbitc::BlockSizeWidth);
+
+    // Check that the block wasn't partially defined, and that the offset isn't
+    // bogus.
+    size_t SkipTo = GetCurrentBitNo() + NumFourBytes*4*8;
+    if (AtEndOfStream() || !canSkipToPos(SkipTo/8))
+      return true;
+
+    JumpToBit(SkipTo);
+    return false;
+  }
+
+  /// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
+  /// the block, and return true if the block has an error.
+  bool EnterSubBlock(unsigned BlockID, unsigned *NumWordsP = 0);
+  
+  bool ReadBlockEnd() {
+    if (BlockScope.empty()) return true;
+
+    // Block tail:
+    //    [END_BLOCK, <align4bytes>]
+    SkipToFourByteBoundary();
+
+    popBlockScope();
+    return false;
+  }
+
+private:
+
+  void popBlockScope() {
+    CurCodeSize = BlockScope.back().PrevCodeSize;
+
+    // Delete abbrevs from popped scope.
+    for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
+         i != e; ++i)
+      CurAbbrevs[i]->dropRef();
+
+    BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
+    BlockScope.pop_back();
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Record Processing
+  //===--------------------------------------------------------------------===//
+
+private:
+  void readAbbreviatedLiteral(const NaClBitCodeAbbrevOp &Op,
+                              SmallVectorImpl<uint64_t> &Vals);
+  void readAbbreviatedField(const NaClBitCodeAbbrevOp &Op,
+                            SmallVectorImpl<uint64_t> &Vals);
+  void skipAbbreviatedField(const NaClBitCodeAbbrevOp &Op);
+  
+public:
+
+  /// getAbbrev - Return the abbreviation for the specified AbbrevId.
+  const NaClBitCodeAbbrev *getAbbrev(unsigned AbbrevID) {
+    unsigned AbbrevNo = AbbrevID-naclbitc::FIRST_APPLICATION_ABBREV;
+    assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
+    return CurAbbrevs[AbbrevNo];
+  }
+
+  /// Returns the last (i.e. newest) abbreviation added to the current
+  /// block.
+  const NaClBitCodeAbbrev *GetNewestAbbrev() {
+    assert(CurAbbrevs.size() && "No newest abbrev!");
+    return CurAbbrevs.back();
+  }
+
+  /// skipRecord - Read the current record and discard it.
+  void skipRecord(unsigned AbbrevID);
+  
+  unsigned readRecord(unsigned AbbrevID, SmallVectorImpl<uint64_t> &Vals);
+
+  //===--------------------------------------------------------------------===//
+  // Abbrev Processing
+  //===--------------------------------------------------------------------===//
+  void ReadAbbrevRecord();
+  
+  bool ReadBlockInfoBlock();
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h b/include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h
new file mode 100644
index 000000000000..a89def8fd216
--- /dev/null
+++ b/include/llvm/Bitcode/NaCl/NaClBitstreamWriter.h
@@ -0,0 +1,504 @@
+//===- NaClBitstreamWriter.h - NaCl bitstream writer ------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the BitstreamWriter class.  This class can be used to
+// write an arbitrary bitstream, regardless of its contents.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_NACL_NACLBITSTREAMWRITER_H
+#define LLVM_BITCODE_NACL_NACLBITSTREAMWRITER_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Bitcode/NaCl/NaClBitCodes.h"
+#include <vector>
+
+namespace llvm {
+
+class NaClBitstreamWriter {
+  SmallVectorImpl<char> &Out;
+
+  /// CurBit - Always between 0 and 31 inclusive, specifies the next bit to use.
+  unsigned CurBit;
+
+  /// CurValue - The current value.  Only bits < CurBit are valid.
+  uint32_t CurValue;
+
+  /// CurCodeSize - This is the declared size of code values used for the
+  /// current block, in bits.
+  NaClBitcodeSelectorAbbrev CurCodeSize;
+
+  /// BlockInfoCurBID - When emitting a BLOCKINFO_BLOCK, this is the currently
+  /// selected BLOCK ID.
+  unsigned BlockInfoCurBID;
+
+  /// CurAbbrevs - Abbrevs installed at in this block.
+  std::vector<NaClBitCodeAbbrev*> CurAbbrevs;
+
+  struct Block {
+    NaClBitcodeSelectorAbbrev PrevCodeSize;
+    unsigned StartSizeWord;
+    std::vector<NaClBitCodeAbbrev*> PrevAbbrevs;
+    Block(const NaClBitcodeSelectorAbbrev& PCS, unsigned SSW)
+        : PrevCodeSize(PCS), StartSizeWord(SSW) {}
+  };
+
+  /// BlockScope - This tracks the current blocks that we have entered.
+  std::vector<Block> BlockScope;
+
+  /// BlockInfo - This contains information emitted to BLOCKINFO_BLOCK blocks.
+  /// These describe abbreviations that all blocks of the specified ID inherit.
+  struct BlockInfo {
+    unsigned BlockID;
+    std::vector<NaClBitCodeAbbrev*> Abbrevs;
+  };
+  std::vector<BlockInfo> BlockInfoRecords;
+
+public:
+  // BackpatchWord - Backpatch a 32-bit word in the output with the specified
+  // value.
+  void BackpatchWord(unsigned ByteNo, unsigned NewWord) {
+    Out[ByteNo++] = (unsigned char)(NewWord >>  0);
+    Out[ByteNo++] = (unsigned char)(NewWord >>  8);
+    Out[ByteNo++] = (unsigned char)(NewWord >> 16);
+    Out[ByteNo  ] = (unsigned char)(NewWord >> 24);
+  }
+
+private:
+  void WriteByte(unsigned char Value) {
+    Out.push_back(Value);
+  }
+
+  void WriteWord(unsigned Value) {
+    unsigned char Bytes[4] = {
+      (unsigned char)(Value >>  0),
+      (unsigned char)(Value >>  8),
+      (unsigned char)(Value >> 16),
+      (unsigned char)(Value >> 24) };
+    Out.append(&Bytes[0], &Bytes[4]);
+  }
+
+  unsigned GetBufferOffset() const {
+    return Out.size();
+  }
+
+  unsigned GetWordIndex() const {
+    unsigned Offset = GetBufferOffset();
+    assert((Offset & 3) == 0 && "Not 32-bit aligned");
+    return Offset / 4;
+  }
+
+public:
+  explicit NaClBitstreamWriter(SmallVectorImpl<char> &O)
+      : Out(O), CurBit(0), CurValue(0), CurCodeSize() {}
+
+  ~NaClBitstreamWriter() {
+    assert(CurBit == 0 && "Unflused data remaining");
+    assert(BlockScope.empty() && CurAbbrevs.empty() && "Block imbalance");
+
+    // Free the BlockInfoRecords.
+    while (!BlockInfoRecords.empty()) {
+      BlockInfo &Info = BlockInfoRecords.back();
+      // Free blockinfo abbrev info.
+      for (unsigned i = 0, e = static_cast<unsigned>(Info.Abbrevs.size());
+           i != e; ++i)
+        Info.Abbrevs[i]->dropRef();
+      BlockInfoRecords.pop_back();
+    }
+  }
+
+  /// \brief Retrieve the current position in the stream, in bits.
+  uint64_t GetCurrentBitNo() const { return GetBufferOffset() * 8 + CurBit; }
+
+  //===--------------------------------------------------------------------===//
+  // Basic Primitives for emitting bits to the stream.
+  //===--------------------------------------------------------------------===//
+
+  void Emit(uint32_t Val, unsigned NumBits) {
+    assert(NumBits && NumBits <= 32 && "Invalid value size!");
+    assert((Val & ~(~0U >> (32-NumBits))) == 0 && "High bits set!");
+    CurValue |= Val << CurBit;
+    if (CurBit + NumBits < 32) {
+      CurBit += NumBits;
+      return;
+    }
+
+    // Add the current word.
+    WriteWord(CurValue);
+
+    if (CurBit)
+      CurValue = Val >> (32-CurBit);
+    else
+      CurValue = 0;
+    CurBit = (CurBit+NumBits) & 31;
+  }
+
+  void Emit64(uint64_t Val, unsigned NumBits) {
+    if (NumBits <= 32)
+      Emit((uint32_t)Val, NumBits);
+    else {
+      Emit((uint32_t)Val, 32);
+      Emit((uint32_t)(Val >> 32), NumBits-32);
+    }
+  }
+
+  void FlushToWord() {
+    if (CurBit) {
+      WriteWord(CurValue);
+      CurBit = 0;
+      CurValue = 0;
+    }
+  }
+
+  void EmitVBR(uint32_t Val, unsigned NumBits) {
+    assert(NumBits <= 32 && "Too many bits to emit!");
+    assert(NumBits > 1 && "Too few bits to emit!");
+    uint32_t Threshold = 1U << (NumBits-1);
+
+    // Emit the bits with VBR encoding, NumBits-1 bits at a time.
+    while (Val >= Threshold) {
+      Emit((Val & ((1 << (NumBits-1))-1)) | (1 << (NumBits-1)), NumBits);
+      Val >>= NumBits-1;
+    }
+
+    Emit(Val, NumBits);
+  }
+
+  void EmitVBR64(uint64_t Val, unsigned NumBits) {
+    assert(NumBits <= 32 && "Too many bits to emit!");
+    assert(NumBits > 1 && "Too few bits to emit!");
+    if ((uint32_t)Val == Val)
+      return EmitVBR((uint32_t)Val, NumBits);
+
+    uint32_t Threshold = 1U << (NumBits-1);
+
+    // Emit the bits with VBR encoding, NumBits-1 bits at a time.
+    while (Val >= Threshold) {
+      Emit(((uint32_t)Val & ((1 << (NumBits-1))-1)) |
+           (1 << (NumBits-1)), NumBits);
+      Val >>= NumBits-1;
+    }
+
+    Emit((uint32_t)Val, NumBits);
+  }
+
+  /// EmitCode - Emit the specified code.
+  void EmitCode(unsigned Val) {
+    if (CurCodeSize.IsFixed)
+      Emit(Val, CurCodeSize.NumBits);
+    else
+      EmitVBR(Val, CurCodeSize.NumBits);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Block Manipulation
+  //===--------------------------------------------------------------------===//
+
+  /// getBlockInfo - If there is block info for the specified ID, return it,
+  /// otherwise return null.
+  BlockInfo *getBlockInfo(unsigned BlockID) {
+    // Common case, the most recent entry matches BlockID.
+    if (!BlockInfoRecords.empty() && BlockInfoRecords.back().BlockID == BlockID)
+      return &BlockInfoRecords.back();
+
+    for (unsigned i = 0, e = static_cast<unsigned>(BlockInfoRecords.size());
+         i != e; ++i)
+      if (BlockInfoRecords[i].BlockID == BlockID)
+        return &BlockInfoRecords[i];
+    return 0;
+  }
+
+private:
+  // Enter block using CodeLen bits to read the size of the code
+  // selector associated with the block.
+  void EnterSubblock(unsigned BlockID,
+                     const NaClBitcodeSelectorAbbrev& CodeLen,
+                     BlockInfo *Info) {
+    // Block header:
+    //    [ENTER_SUBBLOCK, blockid, newcodelen, <align4bytes>, blocklen]
+    EmitCode(naclbitc::ENTER_SUBBLOCK);
+    EmitVBR(BlockID, naclbitc::BlockIDWidth);
+    assert(CodeLen.IsFixed && "Block codelens must be fixed");
+    EmitVBR(CodeLen.NumBits, naclbitc::CodeLenWidth);
+    FlushToWord();
+
+    unsigned BlockSizeWordIndex = GetWordIndex();
+    NaClBitcodeSelectorAbbrev OldCodeSize(CurCodeSize);
+
+    // Emit a placeholder, which will be replaced when the block is popped.
+    Emit(0, naclbitc::BlockSizeWidth);
+
+    CurCodeSize = CodeLen;
+
+    // Push the outer block's abbrev set onto the stack, start out with an
+    // empty abbrev set.
+    BlockScope.push_back(Block(OldCodeSize, BlockSizeWordIndex));
+    BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
+
+    // If there is a blockinfo for this BlockID, add all the predefined abbrevs
+    // to the abbrev list.
+    if (Info) {
+      for (unsigned i = 0, e = static_cast<unsigned>(Info->Abbrevs.size());
+           i != e; ++i) {
+        CurAbbrevs.push_back(Info->Abbrevs[i]);
+        Info->Abbrevs[i]->addRef();
+      }
+    }
+  }
+
+public:
+  /// \brief Enter block using CodeLen bits to read the size of the code
+  /// selector associated with the block.
+  void EnterSubblock(unsigned BlockID,
+                     const NaClBitcodeSelectorAbbrev& CodeLen) {
+    EnterSubblock(BlockID, CodeLen, getBlockInfo(BlockID));
+  }
+
+  /// \brief Enter block, using a code length based on the number of
+  /// (global) BlockInfo entries defined for the block. Note: This
+  /// should be used only if the block doesn't define any local abbreviations.
+  void EnterSubblock(unsigned BlockID) {
+    BlockInfo *Info = getBlockInfo(BlockID);
+    size_t NumAbbrevs = Info ? Info->Abbrevs.size() : 0;
+    NaClBitcodeSelectorAbbrev DefaultCodeLen(
+        naclbitc::DEFAULT_MAX_ABBREV+NumAbbrevs);
+    EnterSubblock(BlockID, DefaultCodeLen, Info);
+  }
+
+  /// \brief Enter block with the given number of abbreviations.
+  void EnterSubblock(unsigned BlockID, unsigned NumAbbrev) {
+    NaClBitcodeSelectorAbbrev CodeLenAbbrev(NumAbbrev);
+    EnterSubblock(BlockID, CodeLenAbbrev);
+  }
+
+  void ExitBlock() {
+    assert(!BlockScope.empty() && "Block scope imbalance!");
+
+    // Delete all abbrevs.
+    for (unsigned i = 0, e = static_cast<unsigned>(CurAbbrevs.size());
+         i != e; ++i)
+      CurAbbrevs[i]->dropRef();
+
+    const Block &B = BlockScope.back();
+
+    // Block tail:
+    //    [END_BLOCK, <align4bytes>]
+    EmitCode(naclbitc::END_BLOCK);
+    FlushToWord();
+
+    // Compute the size of the block, in words, not counting the size field.
+    unsigned SizeInWords = GetWordIndex() - B.StartSizeWord - 1;
+    unsigned ByteNo = B.StartSizeWord*4;
+
+    // Update the block size field in the header of this sub-block.
+    BackpatchWord(ByteNo, SizeInWords);
+
+    // Restore the inner block's code size and abbrev table.
+    CurCodeSize = B.PrevCodeSize;
+    BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
+    BlockScope.pop_back();
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Record Emission
+  //===--------------------------------------------------------------------===//
+
+private:
+  /// EmitAbbreviatedLiteral - Emit a literal value according to its abbrev
+  /// record.  This is a no-op, since the abbrev specifies the literal to use.
+  template<typename uintty>
+  void EmitAbbreviatedLiteral(const NaClBitCodeAbbrevOp &Op, uintty V) {
+    assert(Op.isLiteral() && "Not a literal");
+    // If the abbrev specifies the literal value to use, don't emit
+    // anything.
+    assert(V == Op.getLiteralValue() &&
+           "Invalid abbrev for record!");
+  }
+
+  /// EmitAbbreviatedField - Emit a single scalar field value with the specified
+  /// encoding.
+  template<typename uintty>
+  void EmitAbbreviatedField(const NaClBitCodeAbbrevOp &Op, uintty V) {
+    assert(!Op.isLiteral() && "Literals should use EmitAbbreviatedLiteral!");
+
+    // Encode the value as we are commanded.
+    switch (Op.getEncoding()) {
+    default: llvm_unreachable("Unknown encoding!");
+    case NaClBitCodeAbbrevOp::Fixed:
+      if (Op.getEncodingData())
+        Emit((unsigned)V, (unsigned)Op.getEncodingData());
+      break;
+    case NaClBitCodeAbbrevOp::VBR:
+      if (Op.getEncodingData())
+        EmitVBR64(V, (unsigned)Op.getEncodingData());
+      break;
+    case NaClBitCodeAbbrevOp::Char6:
+      Emit(NaClBitCodeAbbrevOp::EncodeChar6((char)V), 6);
+      break;
+    }
+  }
+
+  /// EmitRecordWithAbbrevImpl - This is the core implementation of the record
+  /// emission code.
+  template<typename uintty>
+  void EmitRecordWithAbbrevImpl(unsigned Abbrev,
+                                SmallVectorImpl<uintty> &Vals) {
+    unsigned AbbrevNo = Abbrev-naclbitc::FIRST_APPLICATION_ABBREV;
+    assert(AbbrevNo < CurAbbrevs.size() && "Invalid abbrev #!");
+    NaClBitCodeAbbrev *Abbv = CurAbbrevs[AbbrevNo];
+
+    EmitCode(Abbrev);
+
+    unsigned RecordIdx = 0;
+    for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
+         i != e; ++i) {
+      const NaClBitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+      if (Op.isLiteral()) {
+        assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
+        EmitAbbreviatedLiteral(Op, Vals[RecordIdx]);
+        ++RecordIdx;
+      } else if (Op.getEncoding() == NaClBitCodeAbbrevOp::Array) {
+        // Array case.
+        assert(i+2 == e && "array op not second to last?");
+        const NaClBitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
+
+        // Emit a vbr6 to indicate the number of elements present.
+        EmitVBR(static_cast<uint32_t>(Vals.size()-RecordIdx), 6);
+
+        // Emit each field.
+        for (unsigned e = Vals.size(); RecordIdx != e; ++RecordIdx)
+          EmitAbbreviatedField(EltEnc, Vals[RecordIdx]);
+      } else if (Op.getEncoding() == NaClBitCodeAbbrevOp::Blob) {
+        report_fatal_error("Blob not allowed");
+      } else {  // Single scalar field.
+        assert(RecordIdx < Vals.size() && "Invalid abbrev/record");
+        EmitAbbreviatedField(Op, Vals[RecordIdx]);
+        ++RecordIdx;
+      }
+    }
+    assert(RecordIdx == Vals.size() && "Not all record operands emitted!");
+  }
+
+public:
+
+  /// EmitRecord - Emit the specified record to the stream, using an abbrev if
+  /// we have one to compress the output.
+  template<typename uintty>
+  void EmitRecord(unsigned Code, SmallVectorImpl<uintty> &Vals,
+                  unsigned Abbrev = 0) {
+    if (!Abbrev) {
+      // If we don't have an abbrev to use, emit this in its fully unabbreviated
+      // form.
+      EmitCode(naclbitc::UNABBREV_RECORD);
+      EmitVBR(Code, 6);
+      EmitVBR(static_cast<uint32_t>(Vals.size()), 6);
+      for (unsigned i = 0, e = static_cast<unsigned>(Vals.size()); i != e; ++i)
+        EmitVBR64(Vals[i], 6);
+      return;
+    }
+
+    // Insert the code into Vals to treat it uniformly.
+    Vals.insert(Vals.begin(), Code);
+
+    EmitRecordWithAbbrev(Abbrev, Vals);
+  }
+
+  /// EmitRecordWithAbbrev - Emit a record with the specified abbreviation.
+  /// Unlike EmitRecord, the code for the record should be included in Vals as
+  /// the first entry.
+  template<typename uintty>
+  void EmitRecordWithAbbrev(unsigned Abbrev, SmallVectorImpl<uintty> &Vals) {
+    EmitRecordWithAbbrevImpl(Abbrev, Vals);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // Abbrev Emission
+  //===--------------------------------------------------------------------===//
+
+private:
+  // Emit the abbreviation as a DEFINE_ABBREV record.
+  void EncodeAbbrev(NaClBitCodeAbbrev *Abbv) {
+    EmitCode(naclbitc::DEFINE_ABBREV);
+    EmitVBR(Abbv->getNumOperandInfos(), 5);
+    for (unsigned i = 0, e = static_cast<unsigned>(Abbv->getNumOperandInfos());
+         i != e; ++i) {
+      const NaClBitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+      Emit(Op.isLiteral(), 1);
+      if (Op.isLiteral()) {
+        EmitVBR64(Op.getLiteralValue(), 8);
+      } else {
+        Emit(Op.getEncoding(), 3);
+        if (Op.hasEncodingData())
+          EmitVBR64(Op.getEncodingData(), 5);
+      }
+    }
+  }
+public:
+
+  /// EmitAbbrev - This emits an abbreviation to the stream.  Note that this
+  /// method takes ownership of the specified abbrev.
+  unsigned EmitAbbrev(NaClBitCodeAbbrev *Abbv) {
+    // Emit the abbreviation as a record.
+    EncodeAbbrev(Abbv);
+    CurAbbrevs.push_back(Abbv);
+    return static_cast<unsigned>(CurAbbrevs.size())-1 +
+      naclbitc::FIRST_APPLICATION_ABBREV;
+  }
+
+  //===--------------------------------------------------------------------===//
+  // BlockInfo Block Emission
+  //===--------------------------------------------------------------------===//
+
+  /// EnterBlockInfoBlock - Start emitting the BLOCKINFO_BLOCK.
+  void EnterBlockInfoBlock() {
+    EnterSubblock(naclbitc::BLOCKINFO_BLOCK_ID);
+    BlockInfoCurBID = ~0U;
+  }
+private:
+  /// SwitchToBlockID - If we aren't already talking about the specified block
+  /// ID, emit a BLOCKINFO_CODE_SETBID record.
+  void SwitchToBlockID(unsigned BlockID) {
+    if (BlockInfoCurBID == BlockID) return;
+    SmallVector<unsigned, 2> V;
+    V.push_back(BlockID);
+    EmitRecord(naclbitc::BLOCKINFO_CODE_SETBID, V);
+    BlockInfoCurBID = BlockID;
+  }
+
+  BlockInfo &getOrCreateBlockInfo(unsigned BlockID) {
+    if (BlockInfo *BI = getBlockInfo(BlockID))
+      return *BI;
+
+    // Otherwise, add a new record.
+    BlockInfoRecords.push_back(BlockInfo());
+    BlockInfoRecords.back().BlockID = BlockID;
+    return BlockInfoRecords.back();
+  }
+
+public:
+
+  /// EmitBlockInfoAbbrev - Emit a DEFINE_ABBREV record for the specified
+  /// BlockID.
+  unsigned EmitBlockInfoAbbrev(unsigned BlockID, NaClBitCodeAbbrev *Abbv) {
+    SwitchToBlockID(BlockID);
+    EncodeAbbrev(Abbv);
+
+    // Add the abbrev to the specified block record.
+    BlockInfo &Info = getOrCreateBlockInfo(BlockID);
+    Info.Abbrevs.push_back(Abbv);
+
+    return Info.Abbrevs.size()-1+naclbitc::FIRST_APPLICATION_ABBREV;
+  }
+};
+
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Bitcode/NaCl/NaClCommonBitcodeRecordDists.h b/include/llvm/Bitcode/NaCl/NaClCommonBitcodeRecordDists.h
new file mode 100644
index 000000000000..cf298742cfa6
--- /dev/null
+++ b/include/llvm/Bitcode/NaCl/NaClCommonBitcodeRecordDists.h
@@ -0,0 +1,91 @@
+//===-- NaClCommonBitcodeRecordDists.cpp - Bitcode Analyzer ---------------===//
+//      Defines distribution maps for various values in bitcode records.
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Defines simple (non-nested) distribution maps for (common) values
+// appearing in bitcode records (instances of class NaClBitcodeRecord
+// in NaClBitcodeParser.h). This includes records for tracking:
+//
+// 1) BlockID's appearing in the bitcode file.
+//
+// 2) Record Code's appearing in blocks with a given BlockID.
+//
+// 3) Record abbreviations used for records in blocks with a given
+// BlockID.
+//
+// 4) Value indicies defined in records, in blocks with a given
+// BlockID.
+//
+// 5) Values in records, in blocks with a given BlockID.
+//
+// TODO(kschimpf) Define records 1, 3, 4, and 5.
+
+#ifndef LLVM_BITCODE_NACL_NACLCOMMONBITCODERECORDDISTS_H
+#define LLVM_BITCODE_NACL_NACLCOMMONBITCODERECORDDISTS_H
+
+#include "llvm/Bitcode/NaCl/NaClBitcodeRecordBitsDist.h"
+
+namespace llvm {
+
+// Collects the distribution of record codes/number of bits used for
+// a particular blockID.
+class NaClBitcodeRecordCodeDist
+    : public NaClBitcodeRecordBitsDist<NaClBitcodeRecordBitsDistElement> {
+  NaClBitcodeRecordCodeDist(const NaClBitcodeRecordCodeDist&)
+      LLVM_DELETED_FUNCTION;
+  void operator=(const NaClBitcodeRecordCodeDist&)
+      LLVM_DELETED_FUNCTION;
+
+public:
+  NaClBitcodeRecordCodeDist(unsigned BlockID) : BlockID(BlockID) {}
+
+  virtual ~NaClBitcodeRecordCodeDist() {}
+
+  virtual void GetValueList(const NaClBitcodeRecord &Record,
+                            ValueListType &ValueList) const {
+    if (Record.GetEntryKind() == NaClBitstreamEntry::Record) {
+      ValueList.push_back(Record.GetCode());
+    }
+  }
+
+  virtual const char *GetTitle() const {
+    return "Record Histogram:";
+  }
+
+  virtual const char *GetValueHeader() const {
+    return "Record Kind";
+  }
+
+  virtual void PrintRowValue(raw_ostream &Stream,
+                             std::string Indent,
+                             NaClBitcodeRecordDistValue Value) const {
+    Stream << GetCodeName(Value, BlockID);
+    // TODO(kschimpf) handle nested distribution maps if defined.
+  }
+
+  // Returns true if there is a known printable name for record code
+  // CodeID in block associated with BlockID.
+  static bool HasKnownCodeName(unsigned CodeID, unsigned BlockID);
+
+  // Returns the printable name for record code CodeID in blocks
+  // associated with BlockID.
+  //
+  // Note: If the name is not known, an "UnknownCode" name is
+  // generated and return.
+  static std::string GetCodeName(unsigned CodeID, unsigned BlockID);
+
+private:
+  // The blockID associated with the record code distribution.
+  // Used so that we can look up the print name for each record code.
+  unsigned BlockID;
+};
+
+}
+
+#endif
diff --git a/include/llvm/Bitcode/NaCl/NaClLLVMBitCodes.h b/include/llvm/Bitcode/NaCl/NaClLLVMBitCodes.h
new file mode 100644
index 000000000000..b11c4676e248
--- /dev/null
+++ b/include/llvm/Bitcode/NaCl/NaClLLVMBitCodes.h
@@ -0,0 +1,369 @@
+//===- NaClLLVMBitCodes.h ---------------------------------------*- C++ -*-===//
+//     Enum values for the NaCl bitcode wire format
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines Bitcode enum values for NaCl bitcode wire format.
+//
+// The enum values defined in this file should be considered permanent.  If
+// new features are added, they should have values added at the end of the
+// respective lists.
+//
+// Note: PNaCl version 1 is no longer supported, and has been removed from
+// comments.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_NACL_NACLLLVMBITCODES_H
+#define LLVM_BITCODE_NACL_NACLLLVMBITCODES_H
+
+#include "llvm/Bitcode/NaCl/NaClBitCodes.h"
+
+namespace llvm {
+namespace naclbitc {
+  // The only top-level block type defined is for a module.
+  enum NaClBlockIDs {
+    // Blocks
+    MODULE_BLOCK_ID          = FIRST_APPLICATION_BLOCKID,
+
+    // Module sub-block id's.
+    PARAMATTR_BLOCK_ID,        // Not used in PNaCl.
+    PARAMATTR_GROUP_BLOCK_ID,  // Not used in PNaCl.
+
+    CONSTANTS_BLOCK_ID,
+    FUNCTION_BLOCK_ID,
+
+    UNUSED_ID1,
+
+    VALUE_SYMTAB_BLOCK_ID,
+    METADATA_BLOCK_ID,         // Not used in PNaCl.
+    METADATA_ATTACHMENT_ID,    // Not used in PNaCl.
+
+    TYPE_BLOCK_ID_NEW,
+
+    USELIST_BLOCK_ID,          // Not used in PNaCl.
+    GLOBALVAR_BLOCK_ID
+  };
+
+
+  /// MODULE blocks have a number of optional fields and subblocks.
+  enum NaClModuleCodes {
+    MODULE_CODE_VERSION     = 1,    // VERSION:     [version#]
+    MODULE_CODE_TRIPLE      = 2,    // Not used in PNaCl
+    MODULE_CODE_DATALAYOUT  = 3,    // Not used in PNaCl
+    MODULE_CODE_ASM         = 4,    // Not used in PNaCl
+    MODULE_CODE_SECTIONNAME = 5,    // Not used in PNaCl
+    MODULE_CODE_DEPLIB      = 6,    // Not used in PNaCl
+    MODULE_CODE_GLOBALVAR   = 7,    // Not used in PNaCl
+    // FUNCTION:  [type, callingconv, isproto, linkage]
+    MODULE_CODE_FUNCTION    = 8,
+    MODULE_CODE_ALIAS       = 9,    // Not used in PNaCl
+    MODULE_CODE_PURGEVALS   = 10,   // Not used in PNaCl
+    MODULE_CODE_GCNAME      = 11    // Not used in PNaCl
+  };
+
+  /// PARAMATTR blocks have code for defining a parameter attribute set.
+  enum NaClAttributeCodes {
+    // FIXME: Remove `PARAMATTR_CODE_ENTRY_OLD' in 4.0
+    PARAMATTR_CODE_ENTRY_OLD  = 1, // ENTRY: [paramidx0, attr0,
+                                   //         paramidx1, attr1...]
+    PARAMATTR_CODE_ENTRY      = 2, // ENTRY: [paramidx0, attrgrp0,
+                                   //         paramidx1, attrgrp1, ...]
+    PARAMATTR_GRP_CODE_ENTRY  = 3  // ENTRY: [id, attr0, att1, ...]
+  };
+
+  /// TYPE blocks have codes for each type primitive they use.
+  enum NaClTypeCodes {
+    TYPE_CODE_NUMENTRY =  1,    // NUMENTRY: [numentries]
+
+    // Type Codes
+    TYPE_CODE_VOID     =  2,    // VOID
+    TYPE_CODE_FLOAT    =  3,    // FLOAT
+    TYPE_CODE_DOUBLE   =  4,    // DOUBLE
+    // TODO(mseaborn): Remove LABEL when we drop support for v1 of the
+    // PNaCl bitcode format.  The writer no longer generates it.
+    TYPE_CODE_LABEL    =  5,    // LABEL
+    TYPE_CODE_OPAQUE   =  6,    // Not used in PNaCl.
+    TYPE_CODE_INTEGER  =  7,    // INTEGER: [width]
+    TYPE_CODE_POINTER  =  8,    // POINTER: [pointee type]
+
+    TYPE_CODE_FUNCTION_OLD = 9, // Not used in PNaCl.
+
+    TYPE_CODE_HALF     =  10,   // Not used in PNaCl.
+
+    TYPE_CODE_ARRAY    = 11,    // Not used in PNaCl.
+    TYPE_CODE_VECTOR   = 12,    // Not used in PNaCl.
+
+    // These are not with the other floating point types because they're
+    // a late addition, and putting them in the right place breaks
+    // binary compatibility.
+    TYPE_CODE_X86_FP80 = 13,    // Not used in PNaCl.
+    TYPE_CODE_FP128    = 14,    // Not used in PNaCl.
+    TYPE_CODE_PPC_FP128= 15,    // Not used in PNaCl.
+
+    TYPE_CODE_METADATA = 16,    // Not used in PNaCl.
+
+    TYPE_CODE_X86_MMX = 17,     // Not used in PNaCl.
+
+    TYPE_CODE_STRUCT_ANON = 18, // Not used in PNaCl.
+    TYPE_CODE_STRUCT_NAME = 19, // Not used in PNaCl.
+    TYPE_CODE_STRUCT_NAMED = 20,// Not used in PNaCl.
+
+    TYPE_CODE_FUNCTION = 21     // FUNCTION: [vararg, retty, paramty x N]
+  };
+
+  // The type symbol table only has one code (TST_ENTRY_CODE).
+  enum NaClTypeSymtabCodes {
+    TST_CODE_ENTRY = 1     // TST_ENTRY: [typeid, namechar x N]
+  };
+
+  // The value symbol table only has one code (VST_ENTRY_CODE).
+  enum NaClValueSymtabCodes {
+    VST_CODE_ENTRY   = 1,  // VST_ENTRY: [valid, namechar x N]
+    VST_CODE_BBENTRY = 2   // VST_BBENTRY: [bbid, namechar x N]
+  };
+
+  // Not used in PNaCl.
+  enum NaClMetadataCodes {
+    METADATA_STRING        = 1,   // MDSTRING:      [values]
+    // 2 is unused.
+    // 3 is unused.
+    METADATA_NAME          = 4,   // STRING:        [values]
+    // 5 is unused.
+    METADATA_KIND          = 6,   // [n x [id, name]]
+    // 7 is unused.
+    METADATA_NODE          = 8,   // NODE:          [n x (type num, value num)]
+    METADATA_FN_NODE       = 9,   // FN_NODE:       [n x (type num, value num)]
+    METADATA_NAMED_NODE    = 10,  // NAMED_NODE:    [n x mdnodes]
+    METADATA_ATTACHMENT    = 11   // [m x [value, [n x [id, mdnode]]]
+  };
+
+  // The constants block (CONSTANTS_BLOCK_ID) describes emission for each
+  // constant and maintains an implicit current type value.
+  enum NaClConstantsCodes {
+    CST_CODE_SETTYPE       =  1,  // SETTYPE:       [typeid]
+    CST_CODE_NULL          =  2,  // Not used in PNaCl.
+    CST_CODE_UNDEF         =  3,  // UNDEF
+    CST_CODE_INTEGER       =  4,  // INTEGER:       [intval]
+    CST_CODE_WIDE_INTEGER  =  5,  // Not used in PNaCl.
+    CST_CODE_FLOAT         =  6,  // FLOAT:         [fpval]
+    CST_CODE_AGGREGATE     =  7,  // Not used in PNaCl.
+    CST_CODE_STRING        =  8,  // Not used in PNaCl.
+    CST_CODE_CSTRING       =  9,  // Not used in PNaCl.
+    CST_CODE_CE_BINOP      = 10,  // Not used in PNaCl.
+    CST_CODE_CE_CAST       = 11,  // Not used in PNaCl.
+    CST_CODE_CE_GEP        = 12,  // Not used in PNaCl.
+    CST_CODE_CE_SELECT     = 13,  // Not used in PNaCl.
+    CST_CODE_CE_EXTRACTELT = 14,  // Not used in PNaCl.
+    CST_CODE_CE_INSERTELT  = 15,  // Not used in PNaCl.
+    CST_CODE_CE_SHUFFLEVEC = 16,  // Not used in PNaCl.
+    CST_CODE_CE_CMP        = 17,  // Not used in PNaCl.
+    CST_CODE_INLINEASM_OLD = 18,  // No longer used.
+    CST_CODE_CE_SHUFVEC_EX = 19,  // Not used in PNaCl.
+    CST_CODE_CE_INBOUNDS_GEP = 20,// Not used in PNaCl.
+    CST_CODE_BLOCKADDRESS  = 21,  // Not used in PNaCl.
+    CST_CODE_DATA          = 22,  // Not used in PNaCl.
+    CST_CODE_INLINEASM     = 23   // Not used in PNaCl.
+  };
+
+  /// GlobalVarOpcodes - These are values used in the bitcode files to
+  /// encode records defining global variables.
+  ///
+  /// The structure of global variables can be summarized as follows:
+  ///
+  /// The global variable block begins with a GLOBALVAR_COUNT, defining
+  /// the number of global variables in the bitcode file. After that,
+  /// each global variable is defined.
+  ///
+  /// Global variables are defined by a GLOBALVAR_VAR record, followed
+  /// by 1 or more records defining its initial value. Simple
+  /// variables have a single initializer. Structured variables are
+  /// defined by an initial GLOBALVAR_COMPOUND record defining the
+  /// number of fields in the structure, followed by an initializer
+  /// for each of its fields. In this context, a field is either data,
+  /// or a relocation.  A data field is defined by a
+  /// GLOBALVAR_ZEROFILL or GLOBALVAR_DATA record.  A relocation field
+  /// is defined by a GLOBALVAR_RELOC record.
+  enum NaClGlobalVarOpcodes {
+    GLOBALVAR_VAR        = 0,     // VAR: [align, isconst]
+    GLOBALVAR_COMPOUND   = 1,     // COMPOUND: [size]
+    GLOBALVAR_ZEROFILL   = 2,     // ZEROFILL: [size]
+    GLOBALVAR_DATA       = 3,     // DATA: [b0, b1, ...]
+    GLOBALVAR_RELOC      = 4,     // RELOC: [val, [addend]]
+    GLOBALVAR_COUNT      = 5,     // COUNT: [n]
+  };
+
+  /// CastOpcodes - These are values used in the bitcode files to encode which
+  /// cast a CST_CODE_CE_CAST or a XXX refers to.  The values of these enums
+  /// have no fixed relation to the LLVM IR enum values.  Changing these will
+  /// break compatibility with old files.
+  enum NaClCastOpcodes {
+    CAST_TRUNC    =  0,
+    CAST_ZEXT     =  1,
+    CAST_SEXT     =  2,
+    CAST_FPTOUI   =  3,
+    CAST_FPTOSI   =  4,
+    CAST_UITOFP   =  5,
+    CAST_SITOFP   =  6,
+    CAST_FPTRUNC  =  7,
+    CAST_FPEXT    =  8,
+    // 9 was CAST_PTRTOINT; not used in PNaCl.
+    // 10 was CAST_INTTOPTR; not used in PNaCl.
+    CAST_BITCAST  = 11
+  };
+
+  /// BinaryOpcodes - These are values used in the bitcode files to encode which
+  /// binop a CST_CODE_CE_BINOP or a XXX refers to.  The values of these enums
+  /// have no fixed relation to the LLVM IR enum values.  Changing these will
+  /// break compatibility with old files.
+  enum NaClBinaryOpcodes {
+    BINOP_ADD  =  0,
+    BINOP_SUB  =  1,
+    BINOP_MUL  =  2,
+    BINOP_UDIV =  3,
+    BINOP_SDIV =  4,    // overloaded for FP
+    BINOP_UREM =  5,
+    BINOP_SREM =  6,    // overloaded for FP
+    BINOP_SHL  =  7,
+    BINOP_LSHR =  8,
+    BINOP_ASHR =  9,
+    BINOP_AND  = 10,
+    BINOP_OR   = 11,
+    BINOP_XOR  = 12
+  };
+
+  /// OverflowingBinaryOperatorOptionalFlags - Flags for serializing
+  /// OverflowingBinaryOperator's SubclassOptionalData contents.
+  /// Note: This enum is no longer used in PNaCl, because these
+  /// flags can't exist in files that meet the PNaCl ABI.
+  enum NaClOverflowingBinaryOperatorOptionalFlags {
+    OBO_NO_UNSIGNED_WRAP = 0,
+    OBO_NO_SIGNED_WRAP = 1
+  };
+
+  /// PossiblyExactOperatorOptionalFlags - Flags for serializing
+  /// PossiblyExactOperator's SubclassOptionalData contents.
+  /// Note: This enum is no longer used in PNaCl, because these
+  /// flags can't exist in files that meet the PNaCl ABI.
+  enum NaClPossiblyExactOperatorOptionalFlags {
+    PEO_EXACT = 0
+  };
+
+  /// \brief Flags for serializing floating point binary operators's
+  /// SubclassOptionalData contents.
+  /// Note: This enum is no longer used in PNaCl, because these
+  /// flags shouldn't exist in files that meet the PNaCl ABI, unless
+  /// they are old. In the latter case, they are ignored by the reader.
+  enum NaClFloatingPointBinaryOperatorOptionalFlags {
+    FPO_UNSAFE_ALGEBRA = 0,
+    FPO_NO_NANS = 1,
+    FPO_NO_INFS = 2,
+    FPO_NO_SIGNED_ZEROS = 3,
+    FPO_ALLOW_RECIPROCAL = 4
+  };
+
+  /// Encoded function calling conventions.
+  enum NaClCallingConventions {
+    C_CallingConv = 0
+  };
+
+  /// Encoded comparison predicates.
+  enum NaClComparisonPredicates {
+    // Opcode              U L G E    Intuitive operation
+    FCMP_FALSE =  0,  ///< 0 0 0 0    Always false (always folded)
+    FCMP_OEQ   =  1,  ///< 0 0 0 1    True if ordered and equal
+    FCMP_OGT   =  2,  ///< 0 0 1 0    True if ordered and greater than
+    FCMP_OGE   =  3,  ///< 0 0 1 1    True if ordered and greater than or equal
+    FCMP_OLT   =  4,  ///< 0 1 0 0    True if ordered and less than
+    FCMP_OLE   =  5,  ///< 0 1 0 1    True if ordered and less than or equal
+    FCMP_ONE   =  6,  ///< 0 1 1 0    True if ordered and operands are unequal
+    FCMP_ORD   =  7,  ///< 0 1 1 1    True if ordered (no nans)
+    FCMP_UNO   =  8,  ///< 1 0 0 0    True if unordered: isnan(X) | isnan(Y)
+    FCMP_UEQ   =  9,  ///< 1 0 0 1    True if unordered or equal
+    FCMP_UGT   = 10,  ///< 1 0 1 0    True if unordered or greater than
+    FCMP_UGE   = 11,  ///< 1 0 1 1    True if unordered, greater than, or equal
+    FCMP_ULT   = 12,  ///< 1 1 0 0    True if unordered or less than
+    FCMP_ULE   = 13,  ///< 1 1 0 1    True if unordered, less than, or equal
+    FCMP_UNE   = 14,  ///< 1 1 1 0    True if unordered or not equal
+    FCMP_TRUE  = 15,  ///< 1 1 1 1    Always true (always folded)
+    ICMP_EQ    = 32,  ///< equal
+    ICMP_NE    = 33,  ///< not equal
+    ICMP_UGT   = 34,  ///< unsigned greater than
+    ICMP_UGE   = 35,  ///< unsigned greater or equal
+    ICMP_ULT   = 36,  ///< unsigned less than
+    ICMP_ULE   = 37,  ///< unsigned less or equal
+    ICMP_SGT   = 38,  ///< signed greater than
+    ICMP_SGE   = 39,  ///< signed greater or equal
+    ICMP_SLT   = 40,  ///< signed less than
+    ICMP_SLE   = 41   ///< signed less or equal
+  };
+
+  // The function body block (FUNCTION_BLOCK_ID) describes function bodies.  It
+  // can contain a constant block (CONSTANTS_BLOCK_ID).
+  enum NaClFunctionCodes {
+    FUNC_CODE_DECLAREBLOCKS    =  1, // DECLAREBLOCKS: [n]
+
+    FUNC_CODE_INST_BINOP       =  2, // BINOP:      [opval, opval, opcode]
+                                     // Note: because old PNaCl bitcode files
+                                     // may contain flags (which we now ignore),
+                                     // the reader must also support:
+                                     // BINOP: [opval, opval, opcode, flags]
+    FUNC_CODE_INST_CAST        =  3, // CAST:       [opval, destty, castopc]
+    FUNC_CODE_INST_GEP         =  4, // Not used in PNaCl.
+    FUNC_CODE_INST_SELECT      =  5, // Not used in PNaCl. Replaced by VSELECT.
+    FUNC_CODE_INST_EXTRACTELT  =  6, // Not used in PNaCl.
+    FUNC_CODE_INST_INSERTELT   =  7, // Not used in PNaCl.
+    FUNC_CODE_INST_SHUFFLEVEC  =  8, // Not used in PNaCl.
+    FUNC_CODE_INST_CMP         =  9, // Not used in PNaCl. Replaced by CMP2.
+    FUNC_CODE_INST_RET         = 10, // RET:        [opval<optional>]
+    FUNC_CODE_INST_BR          = 11, // BR:         [bb#, bb#, cond] or [bb#]
+    FUNC_CODE_INST_SWITCH      = 12, // SWITCH:     [opty, op0, op1, ...]
+    FUNC_CODE_INST_INVOKE      = 13, // Not used in PNaCl.
+    // 14 is unused.
+    FUNC_CODE_INST_UNREACHABLE = 15, // UNREACHABLE
+
+    FUNC_CODE_INST_PHI         = 16, // PHI:        [ty, val0,bb0, ...]
+    // 17 is unused.
+    // 18 is unused.
+    FUNC_CODE_INST_ALLOCA      = 19, // ALLOCA:     [op, align]
+    FUNC_CODE_INST_LOAD        = 20, // LOAD: [op, align, ty]
+    // 21 is unused.
+    // 22 is unused.
+    FUNC_CODE_INST_VAARG       = 23, // Not used in PNaCl.
+    FUNC_CODE_INST_STORE       = 24, // STORE: [ptr, val, align]
+    // 25 is unused.
+    FUNC_CODE_INST_EXTRACTVAL  = 26, // Not used in PNaCl.
+    FUNC_CODE_INST_INSERTVAL   = 27, // Not used in PNaCl.
+    // fcmp/icmp returning Int1TY or vector of Int1Ty. Same as CMP, exists to
+    // support legacy vicmp/vfcmp instructions.
+    FUNC_CODE_INST_CMP2        = 28, // CMP2:       [opval, opval, pred]
+    // new select on i1 or [N x i1]
+    FUNC_CODE_INST_VSELECT     = 29, // VSELECT:    [opval, opval, pred]
+    FUNC_CODE_INST_INBOUNDS_GEP= 30, // Not used in PNaCl.
+    FUNC_CODE_INST_INDIRECTBR  = 31, // Not used in PNaCl.
+    // 32 is unused.
+    FUNC_CODE_DEBUG_LOC_AGAIN  = 33, // Not used in PNaCl.
+
+    FUNC_CODE_INST_CALL        = 34, // CALL: [cc, fnid, args...]
+                                     // See FUNC_CODE_INST_CALL_INDIRECT below.
+    FUNC_CODE_DEBUG_LOC        = 35, // Not used in PNaCl.
+    FUNC_CODE_INST_FENCE       = 36, // Not used in PNaCl.
+    FUNC_CODE_INST_CMPXCHG     = 37, // Not used in PNaCl.
+    FUNC_CODE_INST_ATOMICRMW   = 38, // Not used in PNaCl.
+    FUNC_CODE_INST_RESUME      = 39, // Not used in PNaCl.
+    FUNC_CODE_INST_LANDINGPAD  = 40, // Not used in PNaCl.
+    FUNC_CODE_INST_LOADATOMIC  = 41, // Not used in PNaCl.
+    FUNC_CODE_INST_STOREATOMIC = 42, // Not used in PNaCl.
+    FUNC_CODE_INST_FORWARDTYPEREF = 43, // TYPE: [opval, ty]
+    // CALL_INDIRECT: [cc, fnid, returnty, args...]
+    FUNC_CODE_INST_CALL_INDIRECT = 44
+  };
+} // End naclbitc namespace
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/Bitcode/NaCl/NaClReaderWriter.h b/include/llvm/Bitcode/NaCl/NaClReaderWriter.h
new file mode 100644
index 000000000000..c3c15851a044
--- /dev/null
+++ b/include/llvm/Bitcode/NaCl/NaClReaderWriter.h
@@ -0,0 +1,109 @@
+//===-- llvm/Bitcode/NaCl/NaClReaderWriter.h - ------------------*- C++ -*-===//
+//      NaCl Bitcode reader/writer.
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines interfaces to read and write NaCl bitcode wire format
+// files.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_BITCODE_NACL_NACLREADERWRITER_H
+#define LLVM_BITCODE_NACL_NACLREADERWRITER_H
+
+#include <string>
+
+namespace llvm {
+  class MemoryBuffer;
+  class DataStreamer;
+  class LLVMContext;
+  class Module;
+  class raw_ostream;
+  class NaClBitcodeHeader;
+  class NaClBitstreamWriter;
+
+  /// \brief Defines the integer bit size used to model pointers in PNaCl.
+  static const unsigned PNaClIntPtrTypeBitSize = 32;
+
+  /// getNaClLazyBitcodeModule - Read the header of the specified bitcode buffer
+  /// and prepare for lazy deserialization of function bodies.  If successful,
+  /// this takes ownership of 'buffer' and returns a non-null pointer.  On
+  /// error, this returns null, *does not* take ownership of Buffer, and fills
+  /// in *ErrMsg with an error description if ErrMsg is non-null.
+  ///
+  /// The AcceptSupportedOnly argument is used to decide which PNaCl versions
+  /// of the PNaCl bitcode to accept. There are three forms:
+  ///    1) Readable and supported.
+  ///    2) Readable and unsupported. Allows testing of code before becoming
+  ///       supported, as well as running experiments on the bitcode format.
+  ///    3) Unreadable.
+  /// When AcceptSupportedOnly is true, only form 1 is allowed. When
+  /// AcceptSupportedOnly is false, forms 1 and 2 are allowed.
+  Module *getNaClLazyBitcodeModule(MemoryBuffer *Buffer,
+                                   LLVMContext &Context,
+                                   std::string *ErrMsg = 0,
+                                   bool AcceptSupportedOnly = true);
+
+  /// getNaClStreamedBitcodeModule - Read the header of the specified stream
+  /// and prepare for lazy deserialization and streaming of function bodies.
+  /// On error, this returns null, and fills in *ErrMsg with an error
+  /// description if ErrMsg is non-null.
+  ///
+  /// See getNaClLazyBitcodeModule for an explanation of argument
+  /// AcceptSupportedOnly.
+  Module *getNaClStreamedBitcodeModule(const std::string &name,
+                                       DataStreamer *streamer,
+                                       LLVMContext &Context,
+                                       std::string *ErrMsg = 0,
+                                       bool AcceptSupportedOnly = true);
+
+  /// NaClParseBitcodeFile - Read the specified bitcode file,
+  /// returning the module.  If an error occurs, this returns null and
+  /// fills in *ErrMsg if it is non-null.  This method *never* takes
+  /// ownership of Buffer.
+  ///
+  /// See getNaClLazyBitcodeModule for an explanation of argument
+  /// AcceptSupportedOnly.
+  Module *NaClParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext &Context,
+                               std::string *ErrMsg = 0,
+                               bool AcceptSupportedOnly = true);
+
+  /// NaClWriteBitcodeToFile - Write the specified module to the
+  /// specified raw output stream, using PNaCl wire format.  For
+  /// streams where it matters, the given stream should be in "binary"
+  /// mode.
+  ///
+  /// The AcceptSupportedOnly argument is used to decide which PNaCl versions
+  /// of the PNaCl bitcode to generate. There are two forms:
+  ///    1) Writable and supported.
+  ///    2) Writable and unsupported. Allows testing of code before becoming
+  ///       supported, as well as running experiments on the bitcode format.
+  /// When AcceptSupportedOnly is true, only form 1 is allowed. When
+  /// AcceptSupportedOnly is false, forms 1 and 2 are allowed.
+  void NaClWriteBitcodeToFile(const Module *M, raw_ostream &Out,
+                              bool AcceptSupportedOnly = true);
+
+  /// isNaClBitcode - Return true if the given bytes are the magic bytes for
+  /// PNaCl bitcode wire format.
+  ///
+  inline bool isNaClBitcode(const unsigned char *BufPtr,
+                        const unsigned char *BufEnd) {
+    return BufPtr+4 <= BufEnd &&
+        BufPtr[0] == 'P' &&
+        BufPtr[1] == 'E' &&
+        BufPtr[2] == 'X' &&
+        BufPtr[3] == 'E';
+  }
+
+  // NaClWriteHeader - Write the contents of the bitcode header to the
+  // corresponding bitcode stream.
+  void NaClWriteHeader(const NaClBitcodeHeader &Header,
+                       NaClBitstreamWriter &Stream);
+
+} // end llvm namespace
+#endif
diff --git a/include/llvm/IR/InlineAsm.h b/include/llvm/IR/InlineAsm.h
index 3398a83e365e..3312fb677188 100644
--- a/include/llvm/IR/InlineAsm.h
+++ b/include/llvm/IR/InlineAsm.h
@@ -92,6 +92,13 @@ class InlineAsm : public Value {
   ///
   static bool Verify(FunctionType *Ty, StringRef Constraints);
 
+  // @LOCALMOD-START
+  /// isAsmMemory - Returns true if the Instruction corresponds to
+  /// ``asm("":::"memory")``, which is often used as a compiler barrier.
+  ///
+  bool isAsmMemory() const;
+  // @LOCALMOD-END
+
   // Constraint String Parsing 
   enum ConstraintPrefix {
     isInput,            // 'x'
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td
index ca05db4982ac..da049d965424 100644
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -511,6 +511,66 @@ def int_convertus  : Intrinsic<[llvm_anyint_ty],
 def int_convertuu  : Intrinsic<[llvm_anyint_ty],
                                [llvm_anyint_ty, llvm_i32_ty, llvm_i32_ty]>;
 
+// @LOCALMOD-BEGIN
+//===----------------------- Native Client Intrinsics ---------------------===//
+// NaCl-specific setjmp/longjmp intrinsics.
+// See https://code.google.com/p/nativeclient/issues/detail?id=3429
+def int_nacl_setjmp   : Intrinsic<[llvm_i32_ty],  [llvm_ptr_ty]>;
+def int_nacl_longjmp  : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty],
+                                  [IntrNoReturn]>;
+
+// Fast built-in version of NaCl's tls_get() IRT interface.
+def int_nacl_read_tp : Intrinsic<[llvm_ptr_ty], [], [IntrNoMem]>;
+
+// The following intrinsics provide target-specific implementations of
+// the interface in native_client/src/untrusted/nacl/tls_params.h.
+// The intrinsic names are basically the functions there without the
+// leading underscores.
+def int_nacl_tp_tls_offset : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>,
+                             GCCBuiltin<"__builtin_nacl_tp_tls_offset">;
+def int_nacl_tp_tdb_offset : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>,
+                             GCCBuiltin<"__builtin_nacl_tp_tdb_offset">;
+
+// The following intrinsic provides a target-specific constant value to
+// indicate the target platform compiled to.  The enum values are enumerated
+// pnaclintrin.h.
+def int_nacl_target_arch : Intrinsic<[llvm_i32_ty], []>,
+                            GCCBuiltin<"__builtin_nacl_target_arch">;
+
+// Atomic intrinsics.
+//
+// Volatiles and atomics are encoded through these intrinsics to make
+// them platform-independent, remove some of LLVM's legacy, and isolate
+// PNaCl from future changes to IR. The intrinsics allow user code to
+// use `__sync_*` builtins as well as C11/C++11 atomics.
+//
+// These are further documented in docs/PNaClLangRef.rst.
+//
+// Note that IntrReadWriteArgMem is used in all cases to prevent
+// reordering.
+def int_nacl_atomic_load : Intrinsic<[llvm_anyint_ty],
+    [LLVMPointerType<LLVMMatchType<0>>, llvm_i32_ty],
+    [IntrReadWriteArgMem]>;
+def int_nacl_atomic_store : Intrinsic<[],
+    [llvm_anyint_ty, LLVMPointerType<LLVMMatchType<0>>, llvm_i32_ty],
+    [IntrReadWriteArgMem]>;
+def int_nacl_atomic_rmw : Intrinsic<[llvm_anyint_ty],
+    [llvm_i32_ty, LLVMPointerType<LLVMMatchType<0>>, LLVMMatchType<0>,
+     llvm_i32_ty],
+    [IntrReadWriteArgMem]>;
+def int_nacl_atomic_cmpxchg : Intrinsic<[llvm_anyint_ty],
+    [LLVMPointerType<LLVMMatchType<0>>, LLVMMatchType<0>, LLVMMatchType<0>,
+     llvm_i32_ty, llvm_i32_ty],
+    [IntrReadWriteArgMem]>;
+def int_nacl_atomic_fence : Intrinsic<[], [llvm_i32_ty],
+    [IntrReadWriteArgMem]>;
+def int_nacl_atomic_fence_all : Intrinsic<[], [],
+    [IntrReadWriteArgMem]>;
+def int_nacl_atomic_is_lock_free : Intrinsic<[llvm_i1_ty],
+    [llvm_i32_ty, llvm_ptr_ty], [IntrNoMem]>,
+    GCCBuiltin<"__nacl_atomic_is_lock_free">;
+// @LOCALMOD-END
+
 //===----------------------------------------------------------------------===//
 // Target-specific intrinsics
 //===----------------------------------------------------------------------===//
diff --git a/include/llvm/IR/NaClAtomicIntrinsics.h b/include/llvm/IR/NaClAtomicIntrinsics.h
new file mode 100644
index 000000000000..b87c1ad77f5c
--- /dev/null
+++ b/include/llvm/IR/NaClAtomicIntrinsics.h
@@ -0,0 +1,110 @@
+//===-- llvm/IR/NaClAtomicIntrinsics.h - NaCl Atomic Intrinsics -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes atomic intrinsic functions that are specific to NaCl.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_NACL_ATOMIC_INTRINSICS_H
+#define LLVM_IR_NACL_ATOMIC_INTRINSICS_H
+
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/Support/Compiler.h"
+#include <cstddef>
+
+namespace llvm {
+
+namespace NaCl {
+
+static const size_t NumAtomicIntrinsics = 6;
+static const size_t NumAtomicIntrinsicOverloadTypes = 4;
+static const size_t MaxAtomicIntrinsicsParameters = 5;
+
+/// Describe all the atomic intrinsics and their type signature. Most
+/// can be overloaded on a type.
+class AtomicIntrinsics {
+public:
+  enum ParamType {
+    NoP, /// No parameter.
+    Int, /// Overloaded.
+    Ptr, /// Overloaded.
+    RMW, /// Atomic RMW operation type.
+    Mem  /// Memory order.
+  };
+
+  struct AtomicIntrinsic {
+    Type *OverloadedType;
+    Intrinsic::ID ID : 16;
+    uint8_t Overloaded : 1;
+    uint8_t NumParams : 7;
+    uint8_t ParamType[MaxAtomicIntrinsicsParameters];
+
+    Function *getDeclaration(Module *M) const {
+      // The atomic intrinsic can be overloaded on zero or one type,
+      // which is needed to create the function's declaration.
+      return Intrinsic::getDeclaration(
+          M, ID, ArrayRef<Type *>(&OverloadedType, Overloaded ? 1 : 0));
+    }
+  };
+
+  AtomicIntrinsics(LLVMContext &C);
+  ~AtomicIntrinsics() {}
+
+  typedef ArrayRef<AtomicIntrinsic> View;
+
+  /// The following three methods give access to atomic intrinsics, or a
+  /// subset of them, and allows iteration through them.
+  View allIntrinsicsAndOverloads() const;
+  View overloadsFor(Intrinsic::ID ID) const;
+  const AtomicIntrinsic *find(Intrinsic::ID ID, Type *OverloadedType) const;
+
+private:
+  AtomicIntrinsic I[NumAtomicIntrinsics][NumAtomicIntrinsicOverloadTypes];
+
+  AtomicIntrinsics() LLVM_DELETED_FUNCTION;
+  AtomicIntrinsics(const AtomicIntrinsics &) LLVM_DELETED_FUNCTION;
+  AtomicIntrinsics &operator=(const AtomicIntrinsics &) LLVM_DELETED_FUNCTION;
+};
+
+/// Operations that can be represented by the @llvm.nacl.atomic.rmw
+/// intrinsic.
+///
+/// Do not reorder these values: their order offers forward
+/// compatibility of bitcode targeted to NaCl.
+enum AtomicRMWOperation {
+  AtomicInvalid = 0, // Invalid, keep first.
+  AtomicAdd,
+  AtomicSub,
+  AtomicOr,
+  AtomicAnd,
+  AtomicXor,
+  AtomicExchange,
+  AtomicNum // Invalid, keep last.
+};
+
+/// Memory orderings supported by C11/C++11.
+///
+/// Do not reorder these values: their order offers forward
+/// compatibility of bitcode targeted to NaCl.
+enum MemoryOrder {
+  MemoryOrderInvalid = 0, // Invalid, keep first.
+  MemoryOrderRelaxed,
+  MemoryOrderConsume,
+  MemoryOrderAcquire,
+  MemoryOrderRelease,
+  MemoryOrderAcquireRelease,
+  MemoryOrderSequentiallyConsistent,
+  MemoryOrderNum // Invalid, keep last.
+};
+
+} // End NaCl namespace
+
+} // End llvm namespace
+
+#endif
diff --git a/include/llvm/IRReader/IRReader.h b/include/llvm/IRReader/IRReader.h
index e2ae5f7164b2..6357a6c77489 100644
--- a/include/llvm/IRReader/IRReader.h
+++ b/include/llvm/IRReader/IRReader.h
@@ -8,7 +8,8 @@
 //===----------------------------------------------------------------------===//
 //
 // This file defines functions for reading LLVM IR. They support both
-// Bitcode and Assembly, automatically detecting the input format.
+// Bitcode, Assembly, and PNaCl file formats, automatically detecting
+// the input format.
 //
 //===----------------------------------------------------------------------===//
 
@@ -24,31 +25,42 @@ class MemoryBuffer;
 class SMDiagnostic;
 class LLVMContext;
 
+// \brief Define the expected format of the file.
+enum FileFormat {
+  // LLVM IR source or bitcode file (as appropriate).
+  LLVMFormat,
+  // PNaCl bitcode file.
+  PNaClFormat
+};
+
 /// If the given MemoryBuffer holds a bitcode image, return a Module for it
 /// which does lazy deserialization of function bodies.  Otherwise, attempt to
 /// parse it as LLVM Assembly and return a fully populated Module. This
 /// function *always* takes ownership of the given MemoryBuffer.
 Module *getLazyIRModule(MemoryBuffer *Buffer, SMDiagnostic &Err,
-                        LLVMContext &Context);
+                        LLVMContext &Context, FileFormat Format = LLVMFormat);
 
 /// If the given file holds a bitcode image, return a Module
 /// for it which does lazy deserialization of function bodies.  Otherwise,
 /// attempt to parse it as LLVM Assembly and return a fully populated
 /// Module.
 Module *getLazyIRFileModule(const std::string &Filename, SMDiagnostic &Err,
-                            LLVMContext &Context);
+                            LLVMContext &Context, FileFormat Format = LLVMFormat);
 
 /// If the given MemoryBuffer holds a bitcode image, return a Module
 /// for it.  Otherwise, attempt to parse it as LLVM Assembly and return
 /// a Module for it. This function *always* takes ownership of the given
 /// MemoryBuffer.
-Module *ParseIR(MemoryBuffer *Buffer, SMDiagnostic &Err, LLVMContext &Context);
+Module *ParseIR(MemoryBuffer *Buffer,
+                SMDiagnostic &Err,
+                LLVMContext &Context,
+                FileFormat Format = LLVMFormat);
 
 /// If the given file holds a bitcode image, return a Module for it.
 /// Otherwise, attempt to parse it as LLVM Assembly and return a Module
 /// for it.
 Module *ParseIRFile(const std::string &Filename, SMDiagnostic &Err,
-                    LLVMContext &Context);
+                    LLVMContext &Context, FileFormat Format = LLVMFormat);
 
 }
 
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 923571ea4b90..35c5c930f8e0 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -268,6 +268,42 @@ void initializeSLPVectorizerPass(PassRegistry&);
 void initializeBBVectorizePass(PassRegistry&);
 void initializeMachineFunctionPrinterPassPass(PassRegistry&);
 void initializeStackMapLivenessPass(PassRegistry&);
+// @LOCALMOD-BEGIN
+void initializeAddPNaClExternalDeclsPass(PassRegistry&);
+void initializeCanonicalizeMemIntrinsicsPass(PassRegistry&);
+void initializeExpandArithWithOverflowPass(PassRegistry&);
+void initializeExpandByValPass(PassRegistry&);
+void initializeExpandConstantExprPass(PassRegistry&);
+void initializeExpandCtorsPass(PassRegistry&);
+void initializeExpandGetElementPtrPass(PassRegistry&);
+void initializeExpandSmallArgumentsPass(PassRegistry&);
+void initializeExpandStructRegsPass(PassRegistry&);
+void initializeExpandTlsConstantExprPass(PassRegistry&);
+void initializeExpandTlsPass(PassRegistry&);
+void initializeExpandVarArgsPass(PassRegistry&);
+void initializeFlattenGlobalsPass(PassRegistry&);
+void initializeGlobalCleanupPass(PassRegistry&);
+void initializeInsertDivideCheckPass(PassRegistry&);
+void initializeNaClCcRewritePass(PassRegistry&);
+void initializePNaClABIVerifyFunctionsPass(PassRegistry&);
+void initializePNaClABIVerifyModulePass(PassRegistry&);
+void initializePNaClSjLjEHPass(PassRegistry&);
+void initializePromoteI1OpsPass(PassRegistry&);
+void initializePromoteIntegersPass(PassRegistry&);
+void initializePromoteSimpleStructsPass(PassRegistry&);
+void initializePromoteReturnedStructsPass(PassRegistry&);
+void initializePromoteStructureArgsPass(PassRegistry&);
+void initializeReplaceAggregatesWithIntsPass(PassRegistry&);
+void initializeRemoveAsmMemoryPass(PassRegistry&);
+void initializeReplacePtrsWithIntsPass(PassRegistry&);
+void initializeResolveAliasesPass(PassRegistry&);
+void initializeResolvePNaClIntrinsicsPass(PassRegistry&);
+void initializeRewriteAtomicsPass(PassRegistry&);
+void initializeRewriteLLVMIntrinsicsPass(PassRegistry&);
+void initializeRewritePNaClLibraryCallsPass(PassRegistry&);
+void initializeStripAttributesPass(PassRegistry&);
+void initializeStripMetadataPass(PassRegistry&);
+// @LOCALMOD-END
 }
 
 #endif
diff --git a/include/llvm/Transforms/NaCl.h b/include/llvm/Transforms/NaCl.h
new file mode 100644
index 000000000000..a005a8ccc2fb
--- /dev/null
+++ b/include/llvm/Transforms/NaCl.h
@@ -0,0 +1,92 @@
+//===-- NaCl.h - NaCl Transformations ---------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_NACL_H
+#define LLVM_TRANSFORMS_NACL_H
+
+#include "llvm/PassManager.h"
+#include "llvm/IR/LLVMContext.h"
+
+namespace llvm {
+
+class BasicBlockPass;
+class Function;
+class FunctionPass;
+class FunctionType;
+class Instruction;
+class ModulePass;
+class Use;
+class Value;
+
+BasicBlockPass *createExpandGetElementPtrPass();
+BasicBlockPass *createPromoteI1OpsPass();
+FunctionPass *createExpandConstantExprPass();
+FunctionPass *createExpandStructRegsPass();
+FunctionPass *createInsertDivideCheckPass();
+FunctionPass *createPromoteIntegersPass();
+FunctionPass *createRemoveAsmMemoryPass();
+FunctionPass *createResolvePNaClIntrinsicsPass();
+FunctionPass *createReplaceAggregatesWithIntsPass();
+ModulePass *createPromoteReturnedStructsPass();
+ModulePass *createPromoteStructureArgsPass();
+ModulePass *createAddPNaClExternalDeclsPass();
+ModulePass *createCanonicalizeMemIntrinsicsPass();
+ModulePass *createExpandArithWithOverflowPass();
+ModulePass *createExpandByValPass();
+ModulePass *createExpandCtorsPass();
+ModulePass *createExpandSmallArgumentsPass();
+ModulePass *createExpandTlsConstantExprPass();
+ModulePass *createExpandTlsPass();
+ModulePass *createExpandVarArgsPass();
+ModulePass *createFlattenGlobalsPass();
+ModulePass *createGlobalCleanupPass();
+ModulePass *createPNaClSjLjEHPass();
+ModulePass *createPromoteSimpleStructsPass();
+ModulePass *createReplacePtrsWithIntsPass();
+ModulePass *createResolveAliasesPass();
+ModulePass *createRewriteAtomicsPass();
+ModulePass *createRewriteLLVMIntrinsicsPass();
+ModulePass *createRewritePNaClLibraryCallsPass();
+ModulePass *createStripAttributesPass();
+ModulePass *createStripMetadataPass();
+
+void PNaClABISimplifyAddPreOptPasses(PassManagerBase &PM, const bool BuildingLib = false);
+void PNaClABISimplifyAddPostOptPasses(PassManagerBase &PM);
+
+Instruction *PhiSafeInsertPt(Use *U);
+void PhiSafeReplaceUses(Use *U, Value *NewVal);
+
+  // Copy debug information from Original to New, and return New.
+  template <class T, class U>
+  T* CopyDebug(T* New, U* Original) {
+    if(static_cast<void*>(New) != static_cast<void*>(Original) &&
+       isa<Instruction>(New) && isa<Instruction>(Original))
+      cast<Instruction>(New)->setMetadata(LLVMContext::MD_dbg,
+                                          cast<Instruction>(Original)->getMetadata(LLVMContext::MD_dbg));
+    return New;
+  }
+
+template <class InstType>
+static void CopyLoadOrStoreAttrs(InstType *Dest, InstType *Src) {
+  Dest->setVolatile(Src->isVolatile());
+  Dest->setAlignment(Src->getAlignment());
+  Dest->setOrdering(Src->getOrdering());
+  Dest->setSynchScope(Src->getSynchScope());
+}
+
+// In order to change a function's type, the function must be
+// recreated.  RecreateFunction() recreates Func with type NewType.
+// It copies or moves across everything except the argument values,
+// which the caller must update because the argument types might be
+// different.
+Function *RecreateFunction(Function *Func, FunctionType *NewType);
+
+}
+
+#endif
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index c6d45738852d..601529fd7dc5 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -56,3 +56,4 @@ add_llvm_library(LLVMAnalysis
 add_dependencies(LLVMAnalysis intrinsics_gen)
 
 add_subdirectory(IPA)
+add_subdirectory(NaCl) # LOCALMOD
diff --git a/lib/Analysis/LLVMBuild.txt b/lib/Analysis/LLVMBuild.txt
index a8a8079d1e5a..de734ec3f724 100644
--- a/lib/Analysis/LLVMBuild.txt
+++ b/lib/Analysis/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = IPA
+subdirectories = IPA NaCl
 
 [component_0]
 type = Library
diff --git a/lib/Analysis/Makefile b/lib/Analysis/Makefile
index 4af6d350a645..426ed1699dd6 100644
--- a/lib/Analysis/Makefile
+++ b/lib/Analysis/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL = ../..
 LIBRARYNAME = LLVMAnalysis
-DIRS = IPA
+DIRS = IPA NaCl
 BUILD_ARCHIVE = 1
 
 include $(LEVEL)/Makefile.common
diff --git a/lib/Analysis/NaCl/CMakeLists.txt b/lib/Analysis/NaCl/CMakeLists.txt
new file mode 100644
index 000000000000..f62a4e5c0217
--- /dev/null
+++ b/lib/Analysis/NaCl/CMakeLists.txt
@@ -0,0 +1,7 @@
+add_llvm_library(LLVMNaClAnalysis
+  PNaClABITypeChecker.cpp
+  PNaClABIVerifyFunctions.cpp
+  PNaClABIVerifyModule.cpp
+  )
+
+add_dependencies(LLVMNaClAnalysis intrinsics_gen)
diff --git a/lib/Analysis/NaCl/LLVMBuild.txt b/lib/Analysis/NaCl/LLVMBuild.txt
new file mode 100644
index 000000000000..b5e7c8a5eaf6
--- /dev/null
+++ b/lib/Analysis/NaCl/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Analysis/NaCl/LLVMBuild.txt ----------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = NaClAnalysis
+parent = Analysis
+library_name = NaClAnalysis
+required_libraries = Analysis Core Support
diff --git a/lib/Analysis/NaCl/Makefile b/lib/Analysis/NaCl/Makefile
new file mode 100644
index 000000000000..7d03b1e92eb1
--- /dev/null
+++ b/lib/Analysis/NaCl/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Analysis/NaCl/Makefile-------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMNaClAnalysis
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
\ No newline at end of file
diff --git a/lib/Analysis/NaCl/PNaClABITypeChecker.cpp b/lib/Analysis/NaCl/PNaClABITypeChecker.cpp
new file mode 100644
index 000000000000..8749abcaa6f5
--- /dev/null
+++ b/lib/Analysis/NaCl/PNaClABITypeChecker.cpp
@@ -0,0 +1,64 @@
+//===- PNaClABITypeChecker.cpp - Verify PNaCl ABI rules -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Common type-checking code for module and function-level passes
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "PNaClABITypeChecker.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Metadata.h"
+
+using namespace llvm;
+
+bool PNaClABITypeChecker::isValidParamType(const Type *Ty) {
+  if (!isValidScalarType(Ty))
+    return false;
+  if (const IntegerType *IntTy = dyn_cast<IntegerType>(Ty)) {
+    // PNaCl requires function arguments and return values to be 32
+    // bits or larger.  This avoids exposing architecture
+    // ABI-dependent differences about whether arguments or return
+    // values are zero-extended when calling a function with the wrong
+    // prototype.
+    if (IntTy->getBitWidth() < 32)
+      return false;
+  }
+  return true;
+}
+
+bool PNaClABITypeChecker::isValidFunctionType(const FunctionType *FTy) {
+  if (FTy->isVarArg())
+    return false;
+  if (!isValidParamType(FTy->getReturnType()))
+    return false;
+  for (unsigned I = 0, E = FTy->getNumParams(); I < E; ++I) {
+    if (!isValidParamType(FTy->getParamType(I)))
+      return false;
+  }
+  return true;
+}
+
+bool PNaClABITypeChecker::isValidScalarType(const Type *Ty) {
+  switch (Ty->getTypeID()) {
+    case Type::IntegerTyID: {
+      unsigned Width = cast<const IntegerType>(Ty)->getBitWidth();
+      return Width == 1 || Width == 8 || Width == 16 ||
+             Width == 32 || Width == 64;
+    }
+    case Type::VoidTyID:
+    case Type::FloatTyID:
+    case Type::DoubleTyID:
+      return true;
+    default:
+      return false;
+  }
+}
diff --git a/lib/Analysis/NaCl/PNaClABITypeChecker.h b/lib/Analysis/NaCl/PNaClABITypeChecker.h
new file mode 100644
index 000000000000..ac3cf850e5e0
--- /dev/null
+++ b/lib/Analysis/NaCl/PNaClABITypeChecker.h
@@ -0,0 +1,47 @@
+//===- PNaClABITypeChecker.h - Verify PNaCl ABI rules ---------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Common type-checking code for module and function-level passes
+//
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIB_ANALYSIS_NACL_CHECKTYPES_H
+#define LIB_ANALYSIS_NACL_CHECKTYPES_H
+
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+class FunctionType;
+
+class PNaClABITypeChecker {
+  // Returns true if Ty is a valid argument or return value type for PNaCl.
+  static bool isValidParamType(const Type *Ty);
+
+ public:
+  // Returns true if Ty is a valid function type for PNaCl.
+  static bool isValidFunctionType(const FunctionType *FTy);
+
+  // Returns true if Ty is a valid non-derived type for PNaCl.
+  static bool isValidScalarType(const Type *Ty);
+
+  // There's no built-in way to get the name of a type, so use a
+  // string ostream to print it.
+  static std::string getTypeName(const Type *T) {
+    std::string TypeName;
+    raw_string_ostream N(TypeName);
+    T->print(N);
+    return N.str();
+  }
+};
+} // namespace llvm
+
+#endif // LIB_ANALYSIS_NACL_CHECKTYPES_H
diff --git a/lib/Analysis/NaCl/PNaClABIVerifyFunctions.cpp b/lib/Analysis/NaCl/PNaClABIVerifyFunctions.cpp
new file mode 100644
index 000000000000..c524bd43ddcf
--- /dev/null
+++ b/lib/Analysis/NaCl/PNaClABIVerifyFunctions.cpp
@@ -0,0 +1,564 @@
+//===- PNaClABIVerifyFunctions.cpp - Verify PNaCl ABI rules ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Verify function-level PNaCl ABI requirements.
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/NaCl.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/NaClAtomicIntrinsics.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "PNaClABITypeChecker.h"
+using namespace llvm;
+
+namespace {
+
+// Checks that examine anything in the function body should be in
+// FunctionPasses to make them streaming-friendly
+class PNaClABIVerifyFunctions : public FunctionPass {
+ public:
+  static char ID;
+  PNaClABIVerifyFunctions() :
+      FunctionPass(ID),
+      Reporter(new PNaClABIErrorReporter),
+      ReporterIsOwned(true) {
+    initializePNaClABIVerifyFunctionsPass(*PassRegistry::getPassRegistry());
+  }
+  explicit PNaClABIVerifyFunctions(PNaClABIErrorReporter *Reporter_) :
+      FunctionPass(ID),
+      Reporter(Reporter_),
+      ReporterIsOwned(false) {
+    initializePNaClABIVerifyFunctionsPass(*PassRegistry::getPassRegistry());
+  }
+  ~PNaClABIVerifyFunctions() {
+    if (ReporterIsOwned)
+      delete Reporter;
+  }
+  virtual bool doInitialization(Module &M) {
+    AtomicIntrinsics.reset(new NaCl::AtomicIntrinsics(M.getContext()));
+    return false;
+  }
+  bool runOnFunction(Function &F);
+  virtual void print(raw_ostream &O, const Module *M) const;
+ private:
+  bool IsWhitelistedMetadata(unsigned MDKind);
+  const char *checkInstruction(const Instruction *Inst);
+  PNaClABIErrorReporter *Reporter;
+  bool ReporterIsOwned;
+  OwningPtr<NaCl::AtomicIntrinsics> AtomicIntrinsics;
+};
+
+} // and anonymous namespace
+
+// There's no built-in way to get the name of an MDNode, so use a
+// string ostream to print it.
+static std::string getMDNodeString(unsigned Kind,
+                                   const SmallVectorImpl<StringRef> &MDNames) {
+  std::string MDName;
+  raw_string_ostream N(MDName);
+  if (Kind < MDNames.size()) {
+    N << "!" << MDNames[Kind];
+  } else {
+    N << "!<unknown kind #" << Kind << ">";
+  }
+  return N.str();
+}
+
+bool PNaClABIVerifyFunctions::IsWhitelistedMetadata(unsigned MDKind) {
+  return MDKind == LLVMContext::MD_dbg && PNaClABIAllowDebugMetadata;
+}
+
+// A valid pointer type is either:
+//  * a pointer to a valid PNaCl scalar type (except i1), or
+//  * a function pointer (with valid argument and return types).
+//
+// i1 is disallowed so that all loads and stores are a whole number of
+// bytes, and so that we do not need to define whether a store of i1
+// zero-extends.
+static bool isValidPointerType(Type *Ty) {
+  if (PointerType *PtrTy = dyn_cast<PointerType>(Ty)) {
+    if (PtrTy->getAddressSpace() != 0)
+      return false;
+    Type *EltTy = PtrTy->getElementType();
+    if (PNaClABITypeChecker::isValidScalarType(EltTy) &&
+        !EltTy->isIntegerTy(1))
+      return true;
+    if (FunctionType *FTy = dyn_cast<FunctionType>(EltTy))
+      return PNaClABITypeChecker::isValidFunctionType(FTy);
+  }
+  return false;
+}
+
+static bool isIntrinsicFunc(const Value *Val) {
+  if (const Function *F = dyn_cast<Function>(Val))
+    return F->isIntrinsic();
+  return false;
+}
+
+// InherentPtrs may be referenced by casts -- PtrToIntInst and
+// BitCastInst -- that produce NormalizedPtrs.
+//
+// InherentPtrs exclude intrinsic functions in order to prevent taking
+// the address of an intrinsic function.  InherentPtrs include
+// intrinsic calls because some intrinsics return pointer types
+// (e.g. nacl.read.tp returns i8*).
+static bool isInherentPtr(const Value *Val) {
+  return isa<AllocaInst>(Val) ||
+         (isa<GlobalValue>(Val) && !isIntrinsicFunc(Val)) ||
+         isa<IntrinsicInst>(Val);
+}
+
+// NormalizedPtrs may be used where pointer types are required -- for
+// loads, stores, etc.  Note that this excludes ConstantExprs,
+// ConstantPointerNull and UndefValue.
+static bool isNormalizedPtr(const Value *Val) {
+  if (!isValidPointerType(Val->getType()))
+    return false;
+  // The bitcast must also be a bitcast of an InherentPtr, but we
+  // check that when visiting the bitcast instruction.
+  return isa<IntToPtrInst>(Val) || isa<BitCastInst>(Val) || isInherentPtr(Val);
+}
+
+static bool isValidScalarOperand(const Value *Val) {
+  // The types of Instructions and Arguments are checked elsewhere
+  // (when visiting the Instruction or the Function).  BasicBlocks are
+  // included here because branch instructions have BasicBlock
+  // operands.
+  if (isa<Instruction>(Val) || isa<Argument>(Val) || isa<BasicBlock>(Val))
+    return true;
+
+  // Allow some Constants.  Note that this excludes ConstantExprs.
+  return PNaClABITypeChecker::isValidScalarType(Val->getType()) &&
+         (isa<ConstantInt>(Val) ||
+          isa<ConstantFP>(Val) ||
+          isa<UndefValue>(Val));
+}
+
+static bool isAllowedAlignment(unsigned Alignment, Type *Ty) {
+  // Non-atomic integer operations must always use "align 1", since we
+  // do not want the backend to generate code with non-portable
+  // undefined behaviour (such as misaligned access faults) if user
+  // code specifies "align 4" but uses a misaligned pointer.  As a
+  // concession to performance, we allow larger alignment values for
+  // floating point types.
+  //
+  // To reduce the set of alignment values that need to be encoded in
+  // pexes, we disallow other alignment values.  We require alignments
+  // to be explicit by disallowing Alignment == 0.
+  return Alignment == 1 ||
+         (Ty->isDoubleTy() && Alignment == 8) ||
+         (Ty->isFloatTy() && Alignment == 4);
+}
+
+static bool hasAllowedAtomicRMWOperation(
+    const NaCl::AtomicIntrinsics::AtomicIntrinsic *I, const CallInst *Call) {
+  for (size_t P = 0; P != I->NumParams; ++P) {
+    if (I->ParamType[P] != NaCl::AtomicIntrinsics::RMW)
+      continue;
+
+    const Value *Operation = Call->getOperand(P);
+    if (!Operation)
+      return false;
+    const Constant *C = dyn_cast<Constant>(Operation);
+    if (!C)
+      return false;
+    const APInt &I = C->getUniqueInteger();
+    if (I.ule(NaCl::AtomicInvalid) || I.uge(NaCl::AtomicNum))
+      return false;
+  }
+  return true;
+}
+
+static bool hasAllowedAtomicMemoryOrder(
+    const NaCl::AtomicIntrinsics::AtomicIntrinsic *I, const CallInst *Call) {
+  for (size_t P = 0; P != I->NumParams; ++P) {
+    if (I->ParamType[P] != NaCl::AtomicIntrinsics::Mem)
+      continue;
+
+    const Value *MemoryOrder = Call->getOperand(P);
+    if (!MemoryOrder)
+      return false;
+    const Constant *C = dyn_cast<Constant>(MemoryOrder);
+    if (!C)
+      return false;
+    const APInt &I = C->getUniqueInteger();
+    if (I.ule(NaCl::MemoryOrderInvalid) || I.uge(NaCl::MemoryOrderNum))
+      return false;
+    // TODO For now only sequential consistency is allowed. When more
+    //      are allowed we need to validate that the memory order is
+    //      allowed on the specific atomic operation (e.g. no store
+    //      acquire, and relationship between success/failure memory
+    //      order on compare exchange).
+    if (I != NaCl::MemoryOrderSequentiallyConsistent)
+      return false;
+  }
+  return true;
+}
+
+static bool hasAllowedLockFreeByteSize(const CallInst *Call) {
+  if (!Call->getType()->isIntegerTy())
+    return false;
+  const Value *Operation = Call->getOperand(0);
+  if (!Operation)
+    return false;
+  const Constant *C = dyn_cast<Constant>(Operation);
+  if (!C)
+    return false;
+  const APInt &I = C->getUniqueInteger();
+  // PNaCl currently only supports atomics of byte size {1,2,4,8} (which
+  // may or may not be lock-free). These values coincide with
+  // C11/C++11's supported atomic types.
+  if (I == 1 || I == 2 || I == 4 || I == 8)
+    return true;
+  return false;
+}
+
+// Check the instruction's opcode and its operands.  The operands may
+// require opcode-specific checking.
+//
+// This returns an error string if the instruction is rejected, or
+// NULL if the instruction is allowed.
+const char *PNaClABIVerifyFunctions::checkInstruction(const Instruction *Inst) {
+  // If the instruction has a single pointer operand, PtrOperandIndex is
+  // set to its operand index.
+  unsigned PtrOperandIndex = -1;
+
+  switch (Inst->getOpcode()) {
+    // Disallowed instructions. Default is to disallow.
+    // We expand GetElementPtr out into arithmetic.
+    case Instruction::GetElementPtr:
+    // VAArg is expanded out by ExpandVarArgs.
+    case Instruction::VAArg:
+    // Zero-cost C++ exception handling is not supported yet.
+    case Instruction::Invoke:
+    case Instruction::LandingPad:
+    case Instruction::Resume:
+    // indirectbr may interfere with streaming
+    case Instruction::IndirectBr:
+    // No vector instructions yet
+    case Instruction::ExtractElement:
+    case Instruction::InsertElement:
+    case Instruction::ShuffleVector:
+    // ExtractValue and InsertValue operate on struct values.
+    case Instruction::ExtractValue:
+    case Instruction::InsertValue:
+    // Atomics should become NaCl intrinsics.
+    case Instruction::AtomicCmpXchg:
+    case Instruction::AtomicRMW:
+    case Instruction::Fence:
+      return "bad instruction opcode";
+    default:
+      return "unknown instruction opcode";
+
+    // Terminator instructions
+    case Instruction::Ret:
+    case Instruction::Br:
+    case Instruction::Unreachable:
+    // Binary operations
+    case Instruction::FAdd:
+    case Instruction::FSub:
+    case Instruction::FMul:
+    case Instruction::FDiv:
+    case Instruction::FRem:
+    // Bitwise binary operations
+    case Instruction::And:
+    case Instruction::Or:
+    case Instruction::Xor:
+    // Conversion operations
+    case Instruction::Trunc:
+    case Instruction::ZExt:
+    case Instruction::SExt:
+    case Instruction::FPTrunc:
+    case Instruction::FPExt:
+    case Instruction::FPToUI:
+    case Instruction::FPToSI:
+    case Instruction::UIToFP:
+    case Instruction::SIToFP:
+    // Other operations
+    case Instruction::FCmp:
+    case Instruction::PHI:
+    case Instruction::Select:
+      break;
+
+    // The following operations are of dubious usefulness on 1-bit
+    // values.  Use of the i1 type is disallowed here so that code
+    // generators do not need to support these corner cases.
+    case Instruction::ICmp:
+    // Binary operations
+    case Instruction::Add:
+    case Instruction::Sub:
+    case Instruction::Mul:
+    case Instruction::UDiv:
+    case Instruction::SDiv:
+    case Instruction::URem:
+    case Instruction::SRem:
+    case Instruction::Shl:
+    case Instruction::LShr:
+    case Instruction::AShr:
+      if (Inst->getOperand(0)->getType()->isIntegerTy(1))
+        return "arithmetic on i1";
+      break;
+
+    // Memory accesses.
+    case Instruction::Load: {
+      const LoadInst *Load = cast<LoadInst>(Inst);
+      PtrOperandIndex = Load->getPointerOperandIndex();
+      if (Load->isAtomic())
+        return "atomic load";
+      if (Load->isVolatile())
+        return "volatile load";
+      if (!isAllowedAlignment(Load->getAlignment(),
+                              Load->getType()))
+        return "bad alignment";
+      if (!isNormalizedPtr(Inst->getOperand(PtrOperandIndex)))
+        return "bad pointer";
+      break;
+    }
+    case Instruction::Store: {
+      const StoreInst *Store = cast<StoreInst>(Inst);
+      PtrOperandIndex = Store->getPointerOperandIndex();
+      if (Store->isAtomic())
+        return "atomic store";
+      if (Store->isVolatile())
+        return "volatile store";
+      if (!isAllowedAlignment(Store->getAlignment(),
+                              Store->getValueOperand()->getType()))
+        return "bad alignment";
+      if (!isNormalizedPtr(Inst->getOperand(PtrOperandIndex)))
+        return "bad pointer";
+      break;
+    }
+
+    // Casts.
+    case Instruction::BitCast:
+      if (Inst->getType()->isPointerTy()) {
+        PtrOperandIndex = 0;
+        if (!isInherentPtr(Inst->getOperand(PtrOperandIndex)))
+          return "operand not InherentPtr";
+      }
+      break;
+    case Instruction::IntToPtr:
+      if (!cast<IntToPtrInst>(Inst)->getSrcTy()->isIntegerTy(32))
+        return "non-i32 inttoptr";
+      break;
+    case Instruction::PtrToInt:
+      PtrOperandIndex = 0;
+      if (!isInherentPtr(Inst->getOperand(PtrOperandIndex)))
+        return "operand not InherentPtr";
+      if (!Inst->getType()->isIntegerTy(32))
+        return "non-i32 ptrtoint";
+      break;
+
+    case Instruction::Alloca: {
+      const AllocaInst *Alloca = cast<AllocaInst>(Inst);
+      if (!Alloca->getAllocatedType()->isIntegerTy(8))
+        return "non-i8 alloca";
+      if (!Alloca->getArraySize()->getType()->isIntegerTy(32))
+        return "alloca array size is not i32";
+      break;
+    }
+
+    case Instruction::Call: {
+      const CallInst *Call = cast<CallInst>(Inst);
+      if (Call->isInlineAsm())
+        return "inline assembly";
+      if (!Call->getAttributes().isEmpty())
+        return "bad call attributes";
+      if (Call->getCallingConv() != CallingConv::C)
+        return "bad calling convention";
+
+      // Intrinsic calls can have multiple pointer arguments and
+      // metadata arguments, so handle them specially.
+      if (const IntrinsicInst *Call = dyn_cast<IntrinsicInst>(Inst)) {
+        for (unsigned ArgNum = 0, E = Call->getNumArgOperands();
+             ArgNum < E; ++ArgNum) {
+          const Value *Arg = Call->getArgOperand(ArgNum);
+          if (!(isValidScalarOperand(Arg) ||
+                isNormalizedPtr(Arg) ||
+                isa<MDNode>(Arg)))
+            return "bad intrinsic operand";
+        }
+
+        // Disallow alignments other than 1 on memcpy() etc., for the
+        // same reason that we disallow them on integer loads and
+        // stores.
+        if (const MemIntrinsic *MemOp = dyn_cast<MemIntrinsic>(Call)) {
+          // Avoid the getAlignment() method here because it aborts if
+          // the alignment argument is not a Constant.
+          Value *AlignArg = MemOp->getArgOperand(3);
+          if (!isa<ConstantInt>(AlignArg) ||
+              cast<ConstantInt>(AlignArg)->getZExtValue() != 1) {
+            return "bad alignment";
+          }
+        }
+
+        switch (Call->getIntrinsicID()) {
+          default: break;  // Other intrinsics don't require checks.
+          // Disallow NaCl atomic intrinsics which don't have valid
+          // constant NaCl::AtomicOperation and NaCl::MemoryOrder
+          // parameters.
+          case Intrinsic::nacl_atomic_load:
+          case Intrinsic::nacl_atomic_store:
+          case Intrinsic::nacl_atomic_rmw:
+          case Intrinsic::nacl_atomic_cmpxchg:
+          case Intrinsic::nacl_atomic_fence:
+          case Intrinsic::nacl_atomic_fence_all: {
+            // All overloads have memory order and RMW operation in the
+            // same parameter, arbitrarily use the I32 overload.
+            Type *T = Type::getInt32Ty(
+                Inst->getParent()->getParent()->getContext());
+            const NaCl::AtomicIntrinsics::AtomicIntrinsic *I =
+                AtomicIntrinsics->find(Call->getIntrinsicID(), T);
+            if (!hasAllowedAtomicMemoryOrder(I, Call))
+              return "invalid memory order";
+            if (!hasAllowedAtomicRMWOperation(I, Call))
+              return "invalid atomicRMW operation";
+          } break;
+          // Disallow NaCl atomic_is_lock_free intrinsics which don't
+          // have valid constant size type.
+          case Intrinsic::nacl_atomic_is_lock_free:
+            if (!hasAllowedLockFreeByteSize(Call))
+              return "invalid atomic lock-free byte size";
+            break;
+        }
+
+        // Allow the instruction and skip the later checks.
+        return NULL;
+      }
+
+      // The callee is the last operand.
+      PtrOperandIndex = Inst->getNumOperands() - 1;
+      if (!isNormalizedPtr(Inst->getOperand(PtrOperandIndex)))
+        return "bad function callee operand";
+      break;
+    }
+
+    case Instruction::Switch: {
+      // SwitchInst represents switch cases using array and vector
+      // constants, which we normally reject, so we must check
+      // SwitchInst specially here.
+      const SwitchInst *Switch = cast<SwitchInst>(Inst);
+      if (!isValidScalarOperand(Switch->getCondition()))
+        return "bad switch condition";
+      if (Switch->getCondition()->getType()->isIntegerTy(1))
+        return "switch on i1";
+
+      // SwitchInst requires the cases to be ConstantInts, but it
+      // doesn't require their types to be the same as the condition
+      // value, so check all the cases too.
+      for (SwitchInst::ConstCaseIt Case = Switch->case_begin(),
+             E = Switch->case_end(); Case != E; ++Case) {
+        if(!isValidScalarOperand(Case.getCaseValue()))
+          return "bad switch case";
+      }
+
+      // Allow the instruction and skip the later checks.
+      return NULL;
+    }
+  }
+
+  // Check the instruction's operands.  We have already checked any
+  // pointer operands.  Any remaining operands must be scalars.
+  for (unsigned OpNum = 0, E = Inst->getNumOperands(); OpNum < E; ++OpNum) {
+    if (OpNum != PtrOperandIndex &&
+        !isValidScalarOperand(Inst->getOperand(OpNum)))
+      return "bad operand";
+  }
+
+  // Check arithmetic attributes.
+  if (const OverflowingBinaryOperator *Op =
+          dyn_cast<OverflowingBinaryOperator>(Inst)) {
+    if (Op->hasNoUnsignedWrap())
+      return "has \"nuw\" attribute";
+    if (Op->hasNoSignedWrap())
+      return "has \"nsw\" attribute";
+  }
+  if (const PossiblyExactOperator *Op =
+          dyn_cast<PossiblyExactOperator>(Inst)) {
+    if (Op->isExact())
+      return "has \"exact\" attribute";
+  }
+
+  // Allow the instruction.
+  return NULL;
+}
+
+bool PNaClABIVerifyFunctions::runOnFunction(Function &F) {
+  SmallVector<StringRef, 8> MDNames;
+  F.getContext().getMDKindNames(MDNames);
+
+  for (Function::const_iterator FI = F.begin(), FE = F.end();
+           FI != FE; ++FI) {
+    for (BasicBlock::const_iterator BBI = FI->begin(), BBE = FI->end();
+             BBI != BBE; ++BBI) {
+      const Instruction *Inst = BBI;
+      // Check the instruction opcode first.  This simplifies testing,
+      // because some instruction opcodes must be rejected out of hand
+      // (regardless of the instruction's result type) and the tests
+      // check the reason for rejection.
+      const char *Error = checkInstruction(BBI);
+      // Check the instruction's result type.
+      if (!Error && !(PNaClABITypeChecker::isValidScalarType(Inst->getType()) ||
+                      isNormalizedPtr(Inst) ||
+                      isa<AllocaInst>(Inst))) {
+        Error = "bad result type";
+      }
+      if (Error) {
+        Reporter->addError() << "Function " << F.getName() <<
+          " disallowed: " << Error << ": " << *BBI << "\n";
+      }
+
+      // Check instruction attachment metadata.
+      SmallVector<std::pair<unsigned, MDNode*>, 4> MDForInst;
+      BBI->getAllMetadata(MDForInst);
+
+      for (unsigned i = 0, e = MDForInst.size(); i != e; i++) {
+        if (!IsWhitelistedMetadata(MDForInst[i].first)) {
+          Reporter->addError()
+              << "Function " << F.getName()
+              << " has disallowed instruction metadata: "
+              << getMDNodeString(MDForInst[i].first, MDNames) << "\n";
+        }
+      }
+    }
+  }
+
+  Reporter->checkForFatalErrors();
+  return false;
+}
+
+// This method exists so that the passes can easily be run with opt -analyze.
+// In this case the default constructor is used and we want to reset the error
+// messages after each print.
+void PNaClABIVerifyFunctions::print(llvm::raw_ostream &O, const Module *M)
+    const {
+  Reporter->printErrors(O);
+  Reporter->reset();
+}
+
+char PNaClABIVerifyFunctions::ID = 0;
+INITIALIZE_PASS(PNaClABIVerifyFunctions, "verify-pnaclabi-functions",
+                "Verify functions for PNaCl", false, true)
+
+FunctionPass *llvm::createPNaClABIVerifyFunctionsPass(
+    PNaClABIErrorReporter *Reporter) {
+  return new PNaClABIVerifyFunctions(Reporter);
+}
diff --git a/lib/Analysis/NaCl/PNaClABIVerifyModule.cpp b/lib/Analysis/NaCl/PNaClABIVerifyModule.cpp
new file mode 100644
index 000000000000..712d64fb2f7e
--- /dev/null
+++ b/lib/Analysis/NaCl/PNaClABIVerifyModule.cpp
@@ -0,0 +1,535 @@
+//===- PNaClABIVerifyModule.cpp - Verify PNaCl ABI rules ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Verify module-level PNaCl ABI requirements (specifically those that do not
+// require looking at the function bodies)
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Analysis/NaCl.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "PNaClABITypeChecker.h"
+using namespace llvm;
+
+namespace llvm {
+cl::opt<bool>
+PNaClABIAllowDebugMetadata("pnaclabi-allow-debug-metadata",
+  cl::desc("Allow debug metadata during PNaCl ABI verification."),
+  cl::init(false));
+
+}
+
+static cl::opt<bool>
+PNaClABIAllowDevIntrinsics("pnaclabi-allow-dev-intrinsics",
+  cl::desc("Allow dev LLVM intrinsics during PNaCl ABI verification."),
+  cl::init(false));
+
+namespace {
+// This pass should not touch function bodies, to stay streaming-friendly
+class PNaClABIVerifyModule : public ModulePass {
+ public:
+  static char ID;
+  PNaClABIVerifyModule() :
+      ModulePass(ID),
+      Reporter(new PNaClABIErrorReporter),
+      ReporterIsOwned(true) {
+    initializePNaClABIVerifyModulePass(*PassRegistry::getPassRegistry());
+  }
+  explicit PNaClABIVerifyModule(PNaClABIErrorReporter *Reporter_,
+                                bool StreamingMode) :
+      ModulePass(ID),
+      Reporter(Reporter_),
+      ReporterIsOwned(false),
+      StreamingMode(StreamingMode) {
+    initializePNaClABIVerifyModulePass(*PassRegistry::getPassRegistry());
+  }
+  ~PNaClABIVerifyModule() {
+    if (ReporterIsOwned)
+      delete Reporter;
+  }
+  bool runOnModule(Module &M);
+  virtual void print(raw_ostream &O, const Module *M) const;
+ private:
+  void checkGlobalValueCommon(const GlobalValue *GV);
+  bool isWhitelistedMetadata(const NamedMDNode *MD);
+
+  /// Returns whether \p GV is an allowed external symbol in stable bitcode.
+  bool isWhitelistedExternal(const GlobalValue *GV);
+
+  void checkGlobalIsFlattened(const GlobalVariable *GV);
+  PNaClABIErrorReporter *Reporter;
+  bool ReporterIsOwned;
+  bool StreamingMode;
+};
+
+class AllowedIntrinsics {
+  LLVMContext *Context;
+  // Maps from an allowed intrinsic's name to its type.
+  StringMap<FunctionType *> Mapping;
+
+  // Tys is an array of type parameters for the intrinsic.  This
+  // defaults to an empty array.
+  void addIntrinsic(Intrinsic::ID ID,
+                    ArrayRef<Type *> Tys = ArrayRef<Type*>()) {
+    Mapping[Intrinsic::getName(ID, Tys)] =
+        Intrinsic::getType(*Context, ID, Tys);
+  }
+public:
+  AllowedIntrinsics(LLVMContext *Context);
+  bool isAllowed(const Function *Func);
+};
+
+static const char *linkageName(GlobalValue::LinkageTypes LT) {
+  // This logic is taken from PrintLinkage in lib/VMCore/AsmWriter.cpp
+  switch (LT) {
+    case GlobalValue::ExternalLinkage: return "external";
+    case GlobalValue::PrivateLinkage:       return "private ";
+    case GlobalValue::LinkerPrivateLinkage: return "linker_private ";
+    case GlobalValue::LinkerPrivateWeakLinkage: return "linker_private_weak ";
+    case GlobalValue::InternalLinkage:      return "internal ";
+    case GlobalValue::LinkOnceAnyLinkage:   return "linkonce ";
+    case GlobalValue::LinkOnceODRLinkage:   return "linkonce_odr ";
+    case GlobalValue::WeakAnyLinkage:       return "weak ";
+    case GlobalValue::WeakODRLinkage:       return "weak_odr ";
+    case GlobalValue::CommonLinkage:        return "common ";
+    case GlobalValue::AppendingLinkage:     return "appending ";
+    case GlobalValue::ExternalWeakLinkage:  return "extern_weak ";
+    case GlobalValue::AvailableExternallyLinkage:
+      return "available_externally ";
+    default:
+      return "unknown";
+  }
+}
+
+} // end anonymous namespace
+
+// Check linkage type and section attributes, which are the same for
+// GlobalVariables and Functions.
+void PNaClABIVerifyModule::checkGlobalValueCommon(const GlobalValue *GV) {
+  assert(!isa<GlobalAlias>(GV));
+  const char *GVTypeName = isa<GlobalVariable>(GV) ?
+      "Variable " : "Function ";
+  switch (GV->getLinkage()) {
+    case GlobalValue::ExternalLinkage:
+      if (!isWhitelistedExternal(GV)) {
+        Reporter->addError()
+          << GV->getName()
+          << " is not a valid external symbol (disallowed)\n";
+      }
+      break;
+    case GlobalValue::InternalLinkage:
+      break;
+    default:
+      Reporter->addError() << GVTypeName << GV->getName()
+                           << " has disallowed linkage type: "
+                           << linkageName(GV->getLinkage()) << "\n";
+  }
+  if (GV->getVisibility() != GlobalValue::DefaultVisibility) {
+    std::string Text = "unknown";
+    if (GV->getVisibility() == GlobalValue::HiddenVisibility) {
+      Text = "hidden";
+    } else if (GV->getVisibility() == GlobalValue::ProtectedVisibility) {
+      Text = "protected";
+    }
+    Reporter->addError() << GVTypeName << GV->getName()
+                         << " has disallowed visibility: " << Text << "\n";
+  }
+  if (GV->hasSection()) {
+    Reporter->addError() << GVTypeName << GV->getName() <<
+        " has disallowed \"section\" attribute\n";
+  }
+  if (GV->getType()->getAddressSpace() != 0) {
+    Reporter->addError() << GVTypeName << GV->getName()
+                         << " has addrspace attribute (disallowed)\n";
+  }
+  // The "unnamed_addr" attribute can be used to merge duplicate
+  // definitions, but that should be done by user-toolchain
+  // optimization passes, not by the PNaCl translator.
+  if (GV->hasUnnamedAddr()) {
+    Reporter->addError() << GVTypeName << GV->getName()
+                         << " has disallowed \"unnamed_addr\" attribute\n";
+  }
+}
+
+AllowedIntrinsics::AllowedIntrinsics(LLVMContext *Context) : Context(Context) {
+  // Note that new intrinsics added here may also need to be added to
+  // NaClBitcodeReader.cpp if they contain pointer-typed parameters.
+  // TODO(mseaborn): Change NaClBitcodeReader.cpp to reuse the list
+  // below.
+
+  Type *I8Ptr = Type::getInt8PtrTy(*Context);
+  Type *I8 = Type::getInt8Ty(*Context);
+  Type *I16 = Type::getInt16Ty(*Context);
+  Type *I32 = Type::getInt32Ty(*Context);
+  Type *I64 = Type::getInt64Ty(*Context);
+  Type *Float = Type::getFloatTy(*Context);
+  Type *Double = Type::getDoubleTy(*Context);
+
+  // We accept bswap for a limited set of types (i16, i32, i64).  The
+  // various backends are able to generate instructions to implement
+  // the intrinsic.  Also, i16 and i64 are easy to implement as along
+  // as there is a way to do i32.
+  addIntrinsic(Intrinsic::bswap, I16);
+  addIntrinsic(Intrinsic::bswap, I32);
+  addIntrinsic(Intrinsic::bswap, I64);
+
+  // We accept cttz, ctlz, and ctpop for a limited set of types (i32, i64).
+  addIntrinsic(Intrinsic::ctlz, I32);
+  addIntrinsic(Intrinsic::ctlz, I64);
+  addIntrinsic(Intrinsic::cttz, I32);
+  addIntrinsic(Intrinsic::cttz, I64);
+  addIntrinsic(Intrinsic::ctpop, I32);
+  addIntrinsic(Intrinsic::ctpop, I64);
+
+  addIntrinsic(Intrinsic::nacl_read_tp);
+  addIntrinsic(Intrinsic::nacl_longjmp);
+  addIntrinsic(Intrinsic::nacl_setjmp);
+
+  // For native sqrt instructions. Must guarantee when x < -0.0, sqrt(x) = NaN.
+  addIntrinsic(Intrinsic::sqrt, Float);
+  addIntrinsic(Intrinsic::sqrt, Double);
+
+  Type *AtomicTypes[] = { I8, I16, I32, I64 };
+  for (size_t T = 0, E = array_lengthof(AtomicTypes); T != E; ++T) {
+    addIntrinsic(Intrinsic::nacl_atomic_load, AtomicTypes[T]);
+    addIntrinsic(Intrinsic::nacl_atomic_store, AtomicTypes[T]);
+    addIntrinsic(Intrinsic::nacl_atomic_rmw, AtomicTypes[T]);
+    addIntrinsic(Intrinsic::nacl_atomic_cmpxchg, AtomicTypes[T]);
+  }
+  addIntrinsic(Intrinsic::nacl_atomic_fence);
+  addIntrinsic(Intrinsic::nacl_atomic_fence_all);
+
+  addIntrinsic(Intrinsic::nacl_atomic_is_lock_free);
+
+  // Stack save and restore are used to support C99 VLAs.
+  addIntrinsic(Intrinsic::stacksave);
+  addIntrinsic(Intrinsic::stackrestore);
+
+  addIntrinsic(Intrinsic::trap);
+
+  // We only allow the variants of memcpy/memmove/memset with an i32
+  // "len" argument, not an i64 argument.
+  Type *MemcpyTypes[] = { I8Ptr, I8Ptr, I32 };
+  addIntrinsic(Intrinsic::memcpy, MemcpyTypes);
+  addIntrinsic(Intrinsic::memmove, MemcpyTypes);
+  Type *MemsetTypes[] = { I8Ptr, I32 };
+  addIntrinsic(Intrinsic::memset, MemsetTypes);
+}
+
+bool AllowedIntrinsics::isAllowed(const Function *Func) {
+  // Keep 3 categories of intrinsics for now.
+  // (1) Allowed always, provided the exact name and type match.
+  // (2) Never allowed.
+  // (3) "Dev": intrinsics in the development or prototype stage,
+  // or private intrinsics used for building special programs.
+  // (4) Debug info intrinsics.
+  //
+  // Please keep these sorted or grouped in a sensible way, within
+  // each category.
+
+  // (1) Allowed always, provided the exact name and type match.
+  if (Mapping.count(Func->getName()) == 1)
+    return Func->getFunctionType() == Mapping[Func->getName()];
+
+  switch (Func->getIntrinsicID()) {
+    // Disallow by default.
+    default: return false;
+
+    // (2) Known to be never allowed.
+    case Intrinsic::not_intrinsic:
+    // Trampolines depend on a target-specific-sized/aligned buffer.
+    case Intrinsic::adjust_trampoline:
+    case Intrinsic::init_trampoline:
+    // CXX exception handling is not stable.
+    case Intrinsic::eh_dwarf_cfa:
+    case Intrinsic::eh_return_i32:
+    case Intrinsic::eh_return_i64:
+    case Intrinsic::eh_sjlj_callsite:
+    case Intrinsic::eh_sjlj_functioncontext:
+    case Intrinsic::eh_sjlj_longjmp:
+    case Intrinsic::eh_sjlj_lsda:
+    case Intrinsic::eh_sjlj_setjmp:
+    case Intrinsic::eh_typeid_for:
+    case Intrinsic::eh_unwind_init:
+    // We do not want to expose addresses to the user.
+    case Intrinsic::frameaddress:
+    case Intrinsic::returnaddress:
+    // Not supporting stack protectors.
+    case Intrinsic::stackprotector:
+    // Var-args handling is done w/out intrinsics.
+    case Intrinsic::vacopy:
+    case Intrinsic::vaend:
+    case Intrinsic::vastart:
+    // Disallow the *_with_overflow intrinsics because they return
+    // struct types.  All of them can be introduced by passing -ftrapv
+    // to Clang, which we do not support for now.  umul_with_overflow
+    // and uadd_with_overflow are introduced by Clang for C++'s new[],
+    // but ExpandArithWithOverflow expands out this use.
+    case Intrinsic::sadd_with_overflow:
+    case Intrinsic::ssub_with_overflow:
+    case Intrinsic::uadd_with_overflow:
+    case Intrinsic::usub_with_overflow:
+    case Intrinsic::smul_with_overflow:
+    case Intrinsic::umul_with_overflow:
+    // Disallow lifetime.start/end because the semantics of what
+    // arguments they accept are not very well defined, and because it
+    // would be better to do merging of stack slots in the user
+    // toolchain than in the PNaCl translator.
+    // See https://code.google.com/p/nativeclient/issues/detail?id=3443
+    case Intrinsic::lifetime_end:
+    case Intrinsic::lifetime_start:
+    case Intrinsic::invariant_end:
+    case Intrinsic::invariant_start:
+    // Some transcendental functions not needed yet.
+    case Intrinsic::cos:
+    case Intrinsic::exp:
+    case Intrinsic::exp2:
+    case Intrinsic::log:
+    case Intrinsic::log2:
+    case Intrinsic::log10:
+    case Intrinsic::pow:
+    case Intrinsic::powi:
+    case Intrinsic::sin:
+    // We run -lower-expect to convert Intrinsic::expect into branch weights
+    // and consume in the middle-end. The backend just ignores llvm.expect.
+    case Intrinsic::expect:
+    // For FLT_ROUNDS macro from float.h. It works for ARM and X86
+    // (but not MIPS). Also, wait until we add a set_flt_rounds intrinsic
+    // before we bless this.
+    case Intrinsic::flt_rounds:
+      return false;
+
+    // (3) Dev intrinsics.
+    case Intrinsic::nacl_target_arch: // Used by translator self-build.
+      return PNaClABIAllowDevIntrinsics;
+
+    // (4) Debug info intrinsics.
+    case Intrinsic::dbg_declare:
+    case Intrinsic::dbg_value:
+      return PNaClABIAllowDebugMetadata;
+  }
+}
+
+bool PNaClABIVerifyModule::isWhitelistedMetadata(const NamedMDNode *MD) {
+  return MD->getName().startswith("llvm.dbg.") && PNaClABIAllowDebugMetadata;
+}
+
+bool PNaClABIVerifyModule::isWhitelistedExternal(const GlobalValue *GV) {
+  if (const Function *Func = dyn_cast<const Function>(GV)) {
+    if (Func->getName().equals("_start") || Func->isIntrinsic()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+static bool isPtrToIntOfGlobal(const Constant *C) {
+  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
+    return CE->getOpcode() == Instruction::PtrToInt &&
+           isa<GlobalValue>(CE->getOperand(0));
+  }
+  return false;
+}
+
+// This checks for part of the normal form produced by FlattenGlobals.
+static bool isSimpleElement(const Constant *C) {
+  // A SimpleElement is one of the following:
+  // 1) An i8 array literal or zeroinitializer:
+  //      [SIZE x i8] c"DATA"
+  //      [SIZE x i8] zeroinitializer
+  if (ArrayType *Ty = dyn_cast<ArrayType>(C->getType())) {
+    return Ty->getElementType()->isIntegerTy(8) &&
+           (isa<ConstantAggregateZero>(C) ||
+            isa<ConstantDataSequential>(C));
+  }
+  // 2) A reference to a GlobalValue (a function or global variable)
+  //    with an optional byte offset added to it (the addend).
+  if (C->getType()->isIntegerTy(32)) {
+    const ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+    if (!CE)
+      return false;
+    // Without addend:  ptrtoint (TYPE* @GLOBAL to i32)
+    if (isPtrToIntOfGlobal(CE))
+      return true;
+    // With addend:  add (i32 ptrtoint (TYPE* @GLOBAL to i32), i32 ADDEND)
+    if (CE->getOpcode() == Instruction::Add &&
+        isPtrToIntOfGlobal(CE->getOperand(0)) &&
+        isa<ConstantInt>(CE->getOperand(1)))
+      return true;
+  }
+  return false;
+}
+
+// This checks for part of the normal form produced by FlattenGlobals.
+static bool isCompoundElement(const Constant *C) {
+  const ConstantStruct *CS = dyn_cast<ConstantStruct>(C);
+  if (!CS || !CS->getType()->isPacked() || CS->getType()->hasName() ||
+      CS->getNumOperands() <= 1)
+    return false;
+  for (unsigned I = 0; I < CS->getNumOperands(); ++I) {
+    if (!isSimpleElement(CS->getOperand(I)))
+      return false;
+  }
+  return true;
+}
+
+static std::string getAttributesAsString(AttributeSet Attrs) {
+  std::string AttrsAsString;
+  for (unsigned Slot = 0; Slot < Attrs.getNumSlots(); ++Slot) {
+    for (AttributeSet::iterator Attr = Attrs.begin(Slot),
+           E = Attrs.end(Slot); Attr != E; ++Attr) {
+      AttrsAsString += " ";
+      AttrsAsString += Attr->getAsString();
+    }
+  }
+  return AttrsAsString;
+}
+
+// This checks that the GlobalVariable has the normal form produced by
+// the FlattenGlobals pass.
+void PNaClABIVerifyModule::checkGlobalIsFlattened(const GlobalVariable *GV) {
+  if (!GV->hasInitializer()) {
+    Reporter->addError() << "Global variable " << GV->getName()
+                         << " has no initializer (disallowed)\n";
+    return;
+  }
+  const Constant *InitVal = GV->getInitializer();
+  if (isSimpleElement(InitVal) || isCompoundElement(InitVal))
+    return;
+  Reporter->addError() << "Global variable " << GV->getName()
+                       << " has non-flattened initializer (disallowed): "
+                       << *InitVal << "\n";
+}
+
+bool PNaClABIVerifyModule::runOnModule(Module &M) {
+  AllowedIntrinsics Intrinsics(&M.getContext());
+
+  if (!M.getModuleInlineAsm().empty()) {
+    Reporter->addError() <<
+        "Module contains disallowed top-level inline assembly\n";
+  }
+
+  for (Module::const_global_iterator MI = M.global_begin(), ME = M.global_end();
+       MI != ME; ++MI) {
+    checkGlobalIsFlattened(MI);
+    checkGlobalValueCommon(MI);
+
+    if (MI->isThreadLocal()) {
+      Reporter->addError() << "Variable " << MI->getName() <<
+          " has disallowed \"thread_local\" attribute\n";
+    }
+    if (MI->isExternallyInitialized()) {
+      Reporter->addError() << "Variable " << MI->getName() <<
+          " has disallowed \"externally_initialized\" attribute\n";
+    }
+  }
+
+  // No aliases allowed for now.
+  for (Module::alias_iterator MI = M.alias_begin(),
+           E = M.alias_end(); MI != E; ++MI) {
+    Reporter->addError() << "Variable " << MI->getName() <<
+        " is an alias (disallowed)\n";
+  }
+
+  for (Module::const_iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) {
+    if (MI->isIntrinsic()) {
+      // Check intrinsics.
+      if (!Intrinsics.isAllowed(MI)) {
+        Reporter->addError() << "Function " << MI->getName()
+                             << " is a disallowed LLVM intrinsic\n";
+      }
+    } else {
+      // Check types of functions and their arguments.  Not necessary
+      // for intrinsics, whose types are fixed anyway, and which have
+      // argument types that we disallow such as i8.
+      if (!PNaClABITypeChecker::isValidFunctionType(MI->getFunctionType())) {
+        Reporter->addError() << "Function " << MI->getName()
+            << " has disallowed type: "
+            << PNaClABITypeChecker::getTypeName(MI->getFunctionType())
+            << "\n";
+      }
+      // This check is disabled in streaming mode because it would
+      // reject a function that is defined but not read in yet.
+      // Unfortunately this means we simply don't check this property
+      // when translating a pexe in the browser.
+      // TODO(mseaborn): Enforce this property in the bitcode reader.
+      if (!StreamingMode && MI->isDeclaration()) {
+        Reporter->addError() << "Function " << MI->getName()
+                             << " is declared but not defined (disallowed)\n";
+      }
+      if (!MI->getAttributes().isEmpty()) {
+        Reporter->addError()
+            << "Function " << MI->getName() << " has disallowed attributes:"
+            << getAttributesAsString(MI->getAttributes()) << "\n";
+      }
+      if (MI->getCallingConv() != CallingConv::C) {
+        Reporter->addError()
+            << "Function " << MI->getName()
+            << " has disallowed calling convention: "
+            << MI->getCallingConv() << "\n";
+      }
+    }
+
+    checkGlobalValueCommon(MI);
+
+    if (MI->hasGC()) {
+      Reporter->addError() << "Function " << MI->getName() <<
+          " has disallowed \"gc\" attribute\n";
+    }
+    // Knowledge of what function alignments are useful is
+    // architecture-specific and sandbox-specific, so PNaCl pexes
+    // should not be able to specify function alignment.
+    if (MI->getAlignment() != 0) {
+      Reporter->addError() << "Function " << MI->getName() <<
+          " has disallowed \"align\" attribute\n";
+    }
+  }
+
+  // Check named metadata nodes
+  for (Module::const_named_metadata_iterator I = M.named_metadata_begin(),
+           E = M.named_metadata_end(); I != E; ++I) {
+    if (!isWhitelistedMetadata(I)) {
+      Reporter->addError() << "Named metadata node " << I->getName()
+                           << " is disallowed\n";
+    }
+  }
+
+  Reporter->checkForFatalErrors();
+  return false;
+}
+
+// This method exists so that the passes can easily be run with opt -analyze.
+// In this case the default constructor is used and we want to reset the error
+// messages after each print (this is more of an issue for the FunctionPass
+// than the ModulePass)
+void PNaClABIVerifyModule::print(llvm::raw_ostream &O, const Module *M) const {
+  Reporter->printErrors(O);
+  Reporter->reset();
+}
+
+char PNaClABIVerifyModule::ID = 0;
+INITIALIZE_PASS(PNaClABIVerifyModule, "verify-pnaclabi-module",
+                "Verify module for PNaCl", false, true)
+
+ModulePass *llvm::createPNaClABIVerifyModulePass(
+    PNaClABIErrorReporter *Reporter, bool StreamingMode) {
+  return new PNaClABIVerifyModule(Reporter, StreamingMode);
+}
diff --git a/lib/Bitcode/CMakeLists.txt b/lib/Bitcode/CMakeLists.txt
index ff7e290cad1b..8969ec83f563 100644
--- a/lib/Bitcode/CMakeLists.txt
+++ b/lib/Bitcode/CMakeLists.txt
@@ -1,2 +1,3 @@
 add_subdirectory(Reader)
 add_subdirectory(Writer)
+add_subdirectory(NaCl)
diff --git a/lib/Bitcode/LLVMBuild.txt b/lib/Bitcode/LLVMBuild.txt
index af9936bbe829..415a33dfdff3 100644
--- a/lib/Bitcode/LLVMBuild.txt
+++ b/lib/Bitcode/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = Reader Writer
+subdirectories = Reader Writer NaCl
 
 [component_0]
 type = Group
diff --git a/lib/Bitcode/Makefile b/lib/Bitcode/Makefile
index 2d6b5ad1fe88..cbaab3578cd0 100644
--- a/lib/Bitcode/Makefile
+++ b/lib/Bitcode/Makefile
@@ -8,7 +8,7 @@
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ../..
-PARALLEL_DIRS = Reader Writer
+PARALLEL_DIRS = Reader Writer NaCl
 
 include $(LEVEL)/Makefile.common
 
diff --git a/lib/Bitcode/NaCl/CMakeLists.txt b/lib/Bitcode/NaCl/CMakeLists.txt
new file mode 100644
index 000000000000..5a8b272befa2
--- /dev/null
+++ b/lib/Bitcode/NaCl/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_subdirectory(Writer)
+add_subdirectory(Reader)
diff --git a/lib/Bitcode/NaCl/LLVMBuild.txt b/lib/Bitcode/NaCl/LLVMBuild.txt
new file mode 100644
index 000000000000..a29928d2a0a7
--- /dev/null
+++ b/lib/Bitcode/NaCl/LLVMBuild.txt
@@ -0,0 +1,24 @@
+;===- ./lib/Bitcode/NaCl/LLVMBuild.txt ------------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[common]
+subdirectories = Writer Reader
+
+[component_0]
+type = Group
+name = NaClBitcode
+parent = Bitcode
diff --git a/lib/Bitcode/NaCl/Makefile b/lib/Bitcode/NaCl/Makefile
new file mode 100644
index 000000000000..5bbbc351a1fd
--- /dev/null
+++ b/lib/Bitcode/NaCl/Makefile
@@ -0,0 +1,14 @@
+##===- lib/Bitcode/NaCl/Makefile ---------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+PARALLEL_DIRS = Writer Reader
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Bitcode/NaCl/Reader/CMakeLists.txt b/lib/Bitcode/NaCl/Reader/CMakeLists.txt
new file mode 100644
index 000000000000..24d03aa3fe52
--- /dev/null
+++ b/lib/Bitcode/NaCl/Reader/CMakeLists.txt
@@ -0,0 +1,10 @@
+add_llvm_library(LLVMNaClBitReader
+  NaClBitcodeHeader.cpp
+  NaClBitcodeReader.cpp
+  NaClBitstreamReader.cpp
+  NaClBitcodeParser.cpp
+  NaClBitcodeAnalyzer.cpp
+  NaClCommonBitcodeRecordDists.cpp
+  )
+
+add_dependencies(LLVMNaClBitReader intrinsics_gen)
diff --git a/lib/Bitcode/NaCl/Reader/LLVMBuild.txt b/lib/Bitcode/NaCl/Reader/LLVMBuild.txt
new file mode 100644
index 000000000000..acf354f5b5d8
--- /dev/null
+++ b/lib/Bitcode/NaCl/Reader/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Bitcode/NaClReader/LLVMBuild.txt -------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = NaClBitReader
+parent = NaClBitcode
+required_libraries = Core Support
diff --git a/lib/Bitcode/NaCl/Reader/Makefile b/lib/Bitcode/NaCl/Reader/Makefile
new file mode 100644
index 000000000000..92c75c29a412
--- /dev/null
+++ b/lib/Bitcode/NaCl/Reader/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Bitcode/NaCl/Reader/Makefile --------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMNaClBitReader
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Bitcode/NaCl/Reader/NaClBitcodeAnalyzer.cpp b/lib/Bitcode/NaCl/Reader/NaClBitcodeAnalyzer.cpp
new file mode 100644
index 000000000000..6e09e9ee135a
--- /dev/null
+++ b/lib/Bitcode/NaCl/Reader/NaClBitcodeAnalyzer.cpp
@@ -0,0 +1,439 @@
+//===-- NaClBitcodeAnalyzer.cpp - Bitcode Analyzer ------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "nacl-bitcode-analyzer"
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Bitcode/NaCl/NaClBitcodeAnalyzer.h"
+#include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h"
+#include "llvm/Bitcode/NaCl/NaClBitcodeParser.h"
+#include "llvm/Bitcode/NaCl/NaClCommonBitcodeRecordDists.h"
+#include "llvm/Bitcode/NaCl/NaClBitstreamReader.h"
+#include "llvm/Bitcode/NaCl/NaClLLVMBitCodes.h"
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/system_error.h"
+#include <algorithm>
+#include <map>
+
+/// Error - All bitcode analysis errors go through this function, making this a
+/// good place to breakpoint if debugging.
+static bool Error(const llvm::Twine &Err) {
+  llvm::errs() << Err << "\n";
+  return true;
+}
+
+namespace llvm {
+
+/// GetBlockName - Return a symbolic block name if known, otherwise return
+/// null.
+static const char *GetBlockName(unsigned BlockID) {
+  // Standard blocks for all bitcode files.
+  if (BlockID < naclbitc::FIRST_APPLICATION_BLOCKID) {
+    if (BlockID == naclbitc::BLOCKINFO_BLOCK_ID)
+      return "BLOCKINFO_BLOCK";
+    return 0;
+  }
+
+  switch (BlockID) {
+  default: return 0;
+  case naclbitc::MODULE_BLOCK_ID:          return "MODULE_BLOCK";
+  case naclbitc::PARAMATTR_BLOCK_ID:       return "PARAMATTR_BLOCK";
+  case naclbitc::PARAMATTR_GROUP_BLOCK_ID: return "PARAMATTR_GROUP_BLOCK_ID";
+  case naclbitc::TYPE_BLOCK_ID_NEW:        return "TYPE_BLOCK_ID";
+  case naclbitc::CONSTANTS_BLOCK_ID:       return "CONSTANTS_BLOCK";
+  case naclbitc::FUNCTION_BLOCK_ID:        return "FUNCTION_BLOCK";
+  case naclbitc::VALUE_SYMTAB_BLOCK_ID:    return "VALUE_SYMTAB";
+  case naclbitc::METADATA_BLOCK_ID:        return "METADATA_BLOCK";
+  case naclbitc::METADATA_ATTACHMENT_ID:   return "METADATA_ATTACHMENT_BLOCK";
+  case naclbitc::USELIST_BLOCK_ID:         return "USELIST_BLOCK_ID";
+  case naclbitc::GLOBALVAR_BLOCK_ID:       return "GLOBALVAR_BLOCK";
+  }
+}
+
+struct PerBlockIDStats {
+private:
+  PerBlockIDStats(const PerBlockIDStats&) LLVM_DELETED_FUNCTION;
+  void operator=(const PerBlockIDStats&) LLVM_DELETED_FUNCTION;
+
+public:
+  /// NumInstances - This the number of times this block ID has been
+  /// seen.
+  unsigned NumInstances;
+
+  /// NumBits - The total size in bits of all of these blocks.
+  uint64_t NumBits;
+
+  /// NumSubBlocks - The total number of blocks these blocks contain.
+  unsigned NumSubBlocks;
+
+  /// NumAbbrevs - The total number of abbreviations.
+  unsigned NumAbbrevs;
+
+  /// NumRecords - The total number of records these blocks contain,
+  /// and the number that are abbreviated.
+  unsigned NumRecords, NumAbbreviatedRecords;
+
+  /// RecordCodeDist - Distribution of each record code for this
+  /// block.
+  NaClBitcodeRecordCodeDist RecordCodeDist;
+
+  explicit PerBlockIDStats(unsigned BlockID)
+    : NumInstances(0), NumBits(0),
+      NumSubBlocks(0), NumAbbrevs(0), NumRecords(0), NumAbbreviatedRecords(0),
+      RecordCodeDist(BlockID)
+  {}
+};
+
+// Parses all bitcode blocks, and collects distribution of records in
+// each block.  Also dumps bitcode structure if specified (via global
+// variables).
+class PNaClBitcodeAnalyzerParser : public NaClBitcodeParser {
+public:
+  PNaClBitcodeAnalyzerParser(NaClBitstreamCursor &Cursor,
+                             raw_ostream &OS,
+                             const AnalysisDumpOptions &DumpOptions)
+    : NaClBitcodeParser(Cursor),
+      IndentLevel(0),
+      OS(OS),
+      DumpOptions(DumpOptions) {
+  }
+
+  virtual ~PNaClBitcodeAnalyzerParser() {}
+
+  virtual bool Error(const std::string Message) {
+    // Use local error routine so that all errors are treated uniformly.
+    return ::Error(Message);
+  }
+
+  virtual bool ParseBlock(unsigned BlockID);
+
+  // Returns the string defining the indentation to use with respect
+  // to the current indent level.
+  const std::string &GetIndentation() {
+    size_t Size = IndentationCache.size();
+    if (IndentLevel >= Size) {
+      IndentationCache.resize(IndentLevel+1);
+      for (size_t i = Size; i <= IndentLevel; ++i) {
+        IndentationCache[i] = std::string(i*2, ' ');
+      }
+    }
+    return IndentationCache[IndentLevel];
+  }
+
+  // Keeps track of current indentation level based on block nesting.
+  unsigned IndentLevel;
+  // The output stream to print to.
+  raw_ostream &OS;
+  // The dump options to use.
+  const AnalysisDumpOptions &DumpOptions;
+  // The statistics collected for each block ID.
+  std::map<unsigned, PerBlockIDStats*> BlockIDStats;
+
+private:
+  // The set of cached, indentation strings. Used for indenting
+  // records when dumping.
+  std::vector<std::string> IndentationCache;
+};
+
+// Parses a bitcode block, and collects distribution of records in that block.
+// Also dumps bitcode structure if specified (via global variables).
+class PNaClBitcodeAnalyzerBlockParser : public NaClBitcodeParser {
+public:
+  // Parses top-level block.
+  PNaClBitcodeAnalyzerBlockParser(
+      unsigned BlockID,
+      PNaClBitcodeAnalyzerParser *Parser)
+      : NaClBitcodeParser(BlockID, Parser) {
+    Initialize(BlockID, Parser);
+  }
+
+  virtual ~PNaClBitcodeAnalyzerBlockParser() {}
+
+protected:
+  // Parses nested blocks.
+  PNaClBitcodeAnalyzerBlockParser(
+      unsigned BlockID,
+      PNaClBitcodeAnalyzerBlockParser *EnclosingBlock)
+      : NaClBitcodeParser(BlockID, EnclosingBlock),
+        Context(EnclosingBlock->Context) {
+    Initialize(BlockID, EnclosingBlock->Context);
+  }
+
+  // Initialize data associated with a block.
+  void Initialize(unsigned BlockID, PNaClBitcodeAnalyzerParser *Parser) {
+    Context = Parser;
+    if (Context->DumpOptions.DoDump) {
+      Indent = Parser->GetIndentation();
+    }
+    NumWords = 0;
+    BlockName = 0;
+    BlockStats = Context->BlockIDStats[BlockID];
+    if (BlockStats == 0) {
+      BlockStats = new PerBlockIDStats(BlockID);
+      Context->BlockIDStats[BlockID] = BlockStats;
+    }
+    BlockStats->NumInstances++;
+  }
+
+  // Increment the indentation level for dumping.
+  void IncrementIndent() {
+    Context->IndentLevel++;
+    Indent = Context->GetIndentation();
+  }
+
+  // Increment the indentation level for dumping.
+  void DecrementIndent() {
+    Context->IndentLevel--;
+    Indent = Context->GetIndentation();
+  }
+
+  virtual bool Error(const std::string Message) {
+    // Use local error routine so that all errors are treated uniformly.
+    return ::Error(Message);
+  }
+
+  // Called once the block has been entered by the bitstream reader.
+  // Argument NumWords is set to the number of words in the
+  // corresponding block.
+  virtual void EnterBlock(unsigned NumberWords) {
+    NumWords = NumberWords;
+    IncrementCallingBlock();
+    BlockName = 0;
+    if (Context->DumpOptions.DoDump) {
+      raw_ostream &OS = Context->OS;
+      unsigned BlockID = GetBlockID();
+      OS << Indent << "<";
+      if ((BlockName = GetBlockName(BlockID)))
+        OS << BlockName;
+      else
+        OS << "UnknownBlock" << BlockID;
+
+      if (Context->DumpOptions.NonSymbolic && BlockName)
+        OS << " BlockID=" << BlockID;
+
+      if (!Context->DumpOptions.DumpOnlyRecords) {
+        OS << " NumWords=" << NumberWords
+           << " BlockCodeSize="
+           << Record.GetCursor().getAbbrevIDWidth();
+      }
+      OS << ">\n";
+      IncrementIndent();
+    }
+  }
+
+  // Called when the corresponding EndBlock of the block being parsed
+  // is found.
+  virtual void ExitBlock() {
+    BlockStats->NumBits += GetLocalNumBits();
+    if (Context->DumpOptions.DoDump) {
+      DecrementIndent();
+      raw_ostream &OS = Context->OS;
+      OS << Indent << "</";
+      if (BlockName)
+        OS << BlockName << ">\n";
+      else
+        OS << "UnknownBlock" << GetBlockID() << ">\n";
+    }
+  }
+
+  // Called after a BlockInfo block is parsed.
+  virtual void ExitBlockInfo() {
+    BlockStats->NumBits += GetLocalNumBits();
+    if (Context->DumpOptions.DoDump)
+      Context->OS << Indent << "<BLOCKINFO_BLOCK/>\n";
+    IncrementCallingBlock();
+  }
+
+  // Process the last read record in the block.
+  virtual void ProcessRecord() {
+    ++BlockStats->NumRecords;
+    unsigned Code = Record.GetCode();
+
+    // Increment the # occurrences of this code.
+    BlockStats->RecordCodeDist.Add(Record);
+
+    if (Context->DumpOptions.DoDump) {
+      raw_ostream &OS = Context->OS;
+      std::string CodeName =
+          NaClBitcodeRecordCodeDist::GetCodeName(Code, GetBlockID());
+      OS << Indent << "<" << CodeName;
+      if (Context->DumpOptions.NonSymbolic &&
+          !NaClBitcodeRecordCodeDist::HasKnownCodeName(Code, GetBlockID()))
+        OS << " codeid=" << Code;
+      if (!Context->DumpOptions.DumpOnlyRecords &&
+          Record.GetEntryID() != naclbitc::UNABBREV_RECORD)
+        OS << " abbrevid=" << Record.GetEntryID();
+
+      const NaClBitcodeRecord::RecordVector &Values = Record.GetValues();
+      for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+        if (Context->DumpOptions.OpsPerLine
+            && (i % Context->DumpOptions.OpsPerLine) == 0
+            && i > 0) {
+          OS << "\n" << Indent << " ";
+          for (unsigned j = 0; j < CodeName.size(); ++j)
+            OS << " ";
+        }
+        OS << " op" << i << "=" << (int64_t)Values[i];
+      }
+
+      OS << "/>\n";
+    }
+  }
+
+  virtual bool ParseBlock(unsigned BlockID) {
+    PNaClBitcodeAnalyzerBlockParser Parser(BlockID, this);
+    return Parser.ParseThisBlock();
+  }
+
+  std::string Indent;
+  unsigned NumWords;
+  const char *BlockName;
+  PerBlockIDStats *BlockStats;
+  PNaClBitcodeAnalyzerParser *Context;
+
+  void IncrementCallingBlock() {
+    if (NaClBitcodeParser *Parser = GetEnclosingParser()) {
+      PNaClBitcodeAnalyzerBlockParser *PNaClBlock =
+          static_cast<PNaClBitcodeAnalyzerBlockParser*>(Parser);
+      ++PNaClBlock->BlockStats->NumSubBlocks;
+    }
+  }
+};
+
+bool PNaClBitcodeAnalyzerParser::ParseBlock(unsigned BlockID) {
+  PNaClBitcodeAnalyzerBlockParser Parser(BlockID, this);
+  return Parser.ParseThisBlock();
+}
+
+static void PrintSize(double Bits, raw_ostream &OS) {
+  OS << format("%.2f/%.2fB/%luW", Bits, Bits/8,(unsigned long)(Bits/32));
+}
+static void PrintSize(uint64_t Bits, raw_ostream &OS) {
+  OS << format("%lub/%.2fB/%luW", (unsigned long)Bits,
+               (double)Bits/8, (unsigned long)(Bits/32));
+}
+
+int AnalyzeBitcodeInBuffer(const MemoryBuffer &Buf, raw_ostream &OS,
+                           const AnalysisDumpOptions &DumpOptions) {
+  DEBUG(dbgs() << "-> AnalyzeBitcodeInBuffer\n");
+
+  if (Buf.getBufferSize() & 3)
+    return Error("Bitcode stream should be a multiple of 4 bytes in length");
+
+  const unsigned char *BufPtr = (const unsigned char *)Buf.getBufferStart();
+  const unsigned char *EndBufPtr = BufPtr+Buf.getBufferSize();
+
+  NaClBitcodeHeader Header;
+  if (Header.Read(BufPtr, EndBufPtr))
+    return Error("Invalid PNaCl bitcode header");
+
+  if (!Header.IsSupported())
+    errs() << "Warning: " << Header.Unsupported() << "\n";
+
+  if (!Header.IsReadable())
+    Error("Bitcode file is not readable");
+
+  NaClBitstreamReader StreamFile(BufPtr, EndBufPtr);
+  NaClBitstreamCursor Stream(StreamFile);
+
+  unsigned NumTopBlocks = 0;
+
+  // Print out header information.
+  for (size_t i = 0, limit = Header.NumberFields(); i < limit; ++i) {
+    OS << Header.GetField(i)->Contents() << "\n";
+  }
+  if (Header.NumberFields()) OS << "\n";
+
+  PNaClBitcodeAnalyzerParser Parser(Stream, OS, DumpOptions);
+  // Parse the top-level structure.  We only allow blocks at the top-level.
+  while (!Stream.AtEndOfStream()) {
+    ++NumTopBlocks;
+    if (Parser.Parse()) return 1;
+  }
+
+  if (DumpOptions.DoDump) OS << "\n\n";
+
+  if (DumpOptions.DumpOnlyRecords) return 0;
+
+  uint64_t BufferSizeBits = (EndBufPtr-BufPtr)*CHAR_BIT;
+  // Print a summary
+  OS << "  Total size: ";
+  PrintSize(BufferSizeBits, OS);
+  OS << "\n";
+  OS << "  # Toplevel Blocks: " << NumTopBlocks << "\n";
+  OS << "\n";
+
+  // Emit per-block stats.
+  OS << "Per-block Summary:\n";
+  for (std::map<unsigned, PerBlockIDStats*>::iterator
+           I = Parser.BlockIDStats.begin(),
+           E = Parser.BlockIDStats.end();
+       I != E; ++I) {
+    OS << "  Block ID #" << I->first;
+    if (const char *BlockName = GetBlockName(I->first))
+      OS << " (" << BlockName << ")";
+    OS << ":\n";
+
+    const PerBlockIDStats &Stats = *I->second;
+    OS << "      Num Instances: " << Stats.NumInstances << "\n";
+    OS << "         Total Size: ";
+    PrintSize(Stats.NumBits, OS);
+    OS << "\n";
+    double pct = (Stats.NumBits * 100.0) / BufferSizeBits;
+    OS << "    Percent of file: " << format("%2.4f%%", pct) << "\n";
+    if (Stats.NumInstances > 1) {
+      OS << "       Average Size: ";
+      PrintSize(Stats.NumBits/(double)Stats.NumInstances, OS);
+      OS << "\n";
+      OS << "  Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/"
+         << Stats.NumSubBlocks/(double)Stats.NumInstances << "\n";
+      OS << "    Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/"
+         << Stats.NumAbbrevs/(double)Stats.NumInstances << "\n";
+      OS << "    Tot/Avg Records: " << Stats.NumRecords << "/"
+         << Stats.NumRecords/(double)Stats.NumInstances << "\n";
+    } else {
+      OS << "      Num SubBlocks: " << Stats.NumSubBlocks << "\n";
+      OS << "        Num Abbrevs: " << Stats.NumAbbrevs << "\n";
+      OS << "        Num Records: " << Stats.NumRecords << "\n";
+    }
+    if (Stats.NumRecords) {
+      double pct = (Stats.NumAbbreviatedRecords * 100.0) / Stats.NumRecords;
+      OS << "    Percent Abbrevs: " << format("%2.4f%%", pct) << "\n";
+    }
+    OS << "\n";
+
+    // Print a histogram of the codes we see.
+    if (!DumpOptions.NoHistogram && !Stats.RecordCodeDist.empty()) {
+      Stats.RecordCodeDist.Print(OS, "    ");
+      OS << "\n";
+    }
+  }
+  DEBUG(dbgs() << "<- AnalyzeBitcode\n");
+  return 0;
+}
+
+int AnalyzeBitcodeInFile(const StringRef &InputFilename, raw_ostream &OS,
+                         const AnalysisDumpOptions &DumpOptions) {
+  // Read the input file.
+  OwningPtr<MemoryBuffer> MemBuf;
+
+  if (error_code ec =
+        MemoryBuffer::getFileOrSTDIN(InputFilename, MemBuf))
+    return Error(Twine("Error reading '") + InputFilename + "': " +
+                 ec.message());
+
+  return AnalyzeBitcodeInBuffer(*MemBuf, OS, DumpOptions);
+}
+
+} // namespace llvm
diff --git a/lib/Bitcode/NaCl/Reader/NaClBitcodeHeader.cpp b/lib/Bitcode/NaCl/Reader/NaClBitcodeHeader.cpp
new file mode 100644
index 000000000000..cabda7db09f9
--- /dev/null
+++ b/lib/Bitcode/NaCl/Reader/NaClBitcodeHeader.cpp
@@ -0,0 +1,273 @@
+//===- NaClBitcodeHeader.cpp ----------------------------------------------===//
+//     PNaCl bitcode header reader.
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h"
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/StreamableMemoryObject.h"
+
+#include <limits>
+#include <cstring>
+#include <iomanip>
+
+using namespace llvm;
+
+NaClBitcodeHeaderField::NaClBitcodeHeaderField()
+    : ID(kInvalid), FType(kBufferType), Len(0), Data(0) {}
+
+NaClBitcodeHeaderField::NaClBitcodeHeaderField(Tag MyID, uint32_t MyValue)
+    : ID(MyID), FType(kUInt32Type), Len(4), Data(new uint8_t[4]) {
+  Data[0] = static_cast<uint8_t>(MyValue & 0xFF);
+  Data[1] = static_cast<uint8_t>((MyValue >> 8) & 0xFF);
+  Data[2] = static_cast<uint8_t>((MyValue >> 16) & 0xFF);
+  Data[3] = static_cast<uint8_t>((MyValue >> 24) & 0xFF);
+}
+
+uint32_t NaClBitcodeHeaderField::GetUInt32Value() const {
+  assert(FType == kUInt32Type && "Header field must be uint32");
+  return static_cast<uint32_t>(Data[0]) |
+         (static_cast<uint32_t>(Data[1]) << 8) |
+         (static_cast<uint32_t>(Data[2]) << 16) |
+         (static_cast<uint32_t>(Data[2]) << 24);
+}
+
+NaClBitcodeHeaderField::NaClBitcodeHeaderField(Tag MyID, size_t MyLen,
+                                               uint8_t *MyData)
+    : ID(MyID), FType(kBufferType), Len(MyLen), Data(new uint8_t[MyLen]) {
+  for (size_t i = 0; i < MyLen; ++i) {
+    Data[i] = MyData[i];
+  }
+}
+
+bool NaClBitcodeHeaderField::Write(uint8_t *Buf, size_t BufLen) const {
+  size_t FieldsLen = kTagLenSize + Len;
+  size_t PadLen = (WordSize - (FieldsLen & (WordSize-1))) & (WordSize-1);
+  // Ensure buffer is large enough and that length can be represented
+  // in 32 bits
+  if (BufLen < FieldsLen + PadLen ||
+      Len > std::numeric_limits<FixedSubfield>::max())
+    return false;
+
+  WriteFixedSubfield(EncodeTypedID(), Buf);
+  WriteFixedSubfield(static_cast<FixedSubfield>(Len),
+                     Buf + sizeof(FixedSubfield));
+  memcpy(Buf + kTagLenSize, Data, Len);
+  // Pad out to word alignment
+  if (PadLen) {
+    memset(Buf + FieldsLen, 0, PadLen);
+  }
+  return true;
+}
+
+bool NaClBitcodeHeaderField::Read(const uint8_t *Buf, size_t BufLen) {
+  if (BufLen < kTagLenSize)
+    return false;
+  FixedSubfield IdField;
+  ReadFixedSubfield(&IdField, Buf);
+  FixedSubfield LengthField;
+  ReadFixedSubfield(&LengthField, Buf + sizeof(FixedSubfield));
+  size_t Length = static_cast<size_t>(LengthField);
+  if (BufLen < kTagLenSize + Length)
+    return false;
+  if (Len != Length) {
+    // Need to reallocate data buffer.
+    if (Data)
+      delete[] Data;
+    Data = new uint8_t[Length];
+  }
+  Len = Length;
+  DecodeTypedID(IdField, ID, FType);
+  memcpy(Data, Buf + kTagLenSize, Len);
+  return true;
+}
+
+std::string NaClBitcodeHeaderField::Contents() const {
+  std::string buffer;
+  raw_string_ostream ss(buffer);
+  switch (ID) {
+  case kPNaClVersion:
+    ss << "PNaCl Version";
+    break;
+  case kInvalid:
+    ss << "Invalid";
+    break;
+  default:
+    report_fatal_error("PNaCl bitcode file contains unknown field tag");
+  }
+  ss << ": ";
+  switch (FType) {
+  case kUInt32Type:
+    ss << GetUInt32Value();
+    break;
+  case kBufferType:
+    ss << "[";
+    for (size_t i = 0; i < Len; ++i) {
+      if (i)
+        ss << " ";
+      ss << format("%02x", Data[i]);
+    }
+    ss << "]";
+    break;
+  default:
+    report_fatal_error("PNaCl bitcode file contains unknown field type");
+  }
+  return ss.str();
+}
+
+NaClBitcodeHeader::NaClBitcodeHeader()
+    : HeaderSize(0), UnsupportedMessage(), IsSupportedFlag(false),
+      IsReadableFlag(false), PNaClVersion(0) {}
+
+NaClBitcodeHeader::~NaClBitcodeHeader() {
+  for (std::vector<NaClBitcodeHeaderField *>::const_iterator
+           Iter = Fields.begin(),
+           IterEnd = Fields.end();
+       Iter != IterEnd; ++Iter) {
+    delete *Iter;
+  }
+}
+
+bool NaClBitcodeHeader::ReadPrefix(const unsigned char *BufPtr,
+                                   const unsigned char *BufEnd,
+                                   unsigned &NumFields, unsigned &NumBytes) {
+  // Must contain PEXE.
+  if (!isNaClBitcode(BufPtr, BufEnd)) {
+    UnsupportedMessage = "Invalid PNaCl bitcode header";
+    if (isBitcode(BufPtr, BufEnd)) {
+      UnsupportedMessage += " (to run in Chrome, bitcode files must be "
+          "finalized using pnacl-finalize)";
+    }
+    return true;
+  }
+  BufPtr += WordSize;
+
+  // Read #Fields and number of bytes needed for the header.
+  if (BufPtr + WordSize > BufEnd)
+    return UnsupportedError("Bitcode read failure");
+  NumFields = static_cast<unsigned>(BufPtr[0]) |
+      (static_cast<unsigned>(BufPtr[1]) << 8);
+  NumBytes = static_cast<unsigned>(BufPtr[2]) |
+      (static_cast<unsigned>(BufPtr[3]) << 8);
+  BufPtr += WordSize;
+  return false;
+}
+
+bool NaClBitcodeHeader::ReadFields(const unsigned char *BufPtr,
+                                   const unsigned char *BufEnd,
+                                   unsigned NumFields, unsigned NumBytes) {
+  HeaderSize = NumBytes + (2 * WordSize);
+
+  // Read in each field.
+  for (size_t i = 0; i < NumFields; ++i) {
+    NaClBitcodeHeaderField *Field = new NaClBitcodeHeaderField();
+    Fields.push_back(Field);
+    if (!Field->Read(BufPtr, BufEnd - BufPtr))
+      return UnsupportedError("Bitcode read failure");
+    size_t FieldSize = Field->GetTotalSize();
+    BufPtr += FieldSize;
+  }
+  return false;
+}
+
+bool NaClBitcodeHeader::Read(const unsigned char *&BufPtr,
+                             const unsigned char *&BufEnd) {
+  unsigned NumFields;
+  unsigned NumBytes;
+  if (ReadPrefix(BufPtr, BufEnd, NumFields, NumBytes))
+    return true; // ReadPrefix sets UnsupportedMessage
+  BufPtr += 2 * WordSize;
+
+  if (ReadFields(BufPtr, BufEnd, NumFields, NumBytes))
+    return true; // ReadFields sets UnsupportedMessage
+  BufPtr += NumBytes;
+  InstallFields();
+  return false;
+}
+
+bool NaClBitcodeHeader::Read(StreamableMemoryObject *Bytes) {
+  unsigned NumFields;
+  unsigned NumBytes;
+  {
+    unsigned char Buffer[2 * WordSize];
+    if (Bytes->readBytes(0, sizeof(Buffer), Buffer))
+      return UnsupportedError("Bitcode read failure");
+    if (ReadPrefix(Buffer, Buffer + sizeof(Buffer), NumFields, NumBytes))
+      return true; // ReadPrefix sets UnsupportedMessage
+  }
+  uint8_t *Header = new uint8_t[NumBytes];
+  bool failed =
+      Bytes->readBytes(2 * WordSize, NumBytes, Header) ||
+      ReadFields(Header, Header + NumBytes, NumFields, NumBytes);
+  delete[] Header;
+  if (failed)
+    return UnsupportedError("Bitcode read failure");
+  InstallFields();
+  return false;
+}
+
+NaClBitcodeHeaderField *
+NaClBitcodeHeader::GetTaggedField(NaClBitcodeHeaderField::Tag ID) const {
+  for (std::vector<NaClBitcodeHeaderField *>::const_iterator
+           Iter = Fields.begin(),
+           IterEnd = Fields.end();
+       Iter != IterEnd; ++Iter) {
+    if ((*Iter)->GetID() == ID) {
+      return *Iter;
+    }
+  }
+  return 0;
+}
+
+NaClBitcodeHeaderField *NaClBitcodeHeader::GetField(size_t index) const {
+  if (index >= Fields.size())
+    return 0;
+  return Fields[index];
+}
+
+NaClBitcodeHeaderField *GetPNaClVersionPtr(NaClBitcodeHeader *Header) {
+  if (NaClBitcodeHeaderField *Version =
+          Header->GetTaggedField(NaClBitcodeHeaderField::kPNaClVersion)) {
+    if (Version->GetType() == NaClBitcodeHeaderField::kUInt32Type) {
+      return Version;
+    }
+  }
+  return 0;
+}
+
+void NaClBitcodeHeader::InstallFields() {
+  // Assume supported until contradicted.
+  bool UpdatedUnsupportedMessage = false;
+  IsSupportedFlag = true;
+  IsReadableFlag = true;
+  UnsupportedMessage = "Supported";
+  PNaClVersion = 0;
+  if (NaClBitcodeHeaderField *Version = GetPNaClVersionPtr(this)) {
+    PNaClVersion = Version->GetUInt32Value();
+  }
+  if (PNaClVersion != 2) {
+    IsSupportedFlag = false;
+    IsReadableFlag = false;
+    UpdatedUnsupportedMessage = true;
+    UnsupportedMessage.clear();
+    raw_string_ostream UnsupportedStream(UnsupportedMessage);
+    UnsupportedStream << "Unsupported PNaCl bitcode version: "
+                      << PNaClVersion << "\n";
+    UnsupportedStream.flush();
+  }
+  if (Fields.size() != 1) {
+    IsSupportedFlag = false;
+    IsReadableFlag = false;
+    if (!UpdatedUnsupportedMessage)
+      UnsupportedMessage = "Unknown header field(s) found";
+  }
+}
diff --git a/lib/Bitcode/NaCl/Reader/NaClBitcodeParser.cpp b/lib/Bitcode/NaCl/Reader/NaClBitcodeParser.cpp
new file mode 100644
index 000000000000..9d7ef39d32b2
--- /dev/null
+++ b/lib/Bitcode/NaCl/Reader/NaClBitcodeParser.cpp
@@ -0,0 +1,97 @@
+//===- NaClBitcodeParser.cpp ----------------------------------------------===//
+//     Low-level bitcode driver to parse PNaCl bitcode files.
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "NaClBitcodeParser"
+
+#include "llvm/Bitcode/NaCl/NaClBitcodeParser.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+void NaClBitcodeRecord::Print(raw_ostream& os) const {
+  DEBUG(os << "Block " << GetBlockID() << ", Code " << Code
+        << ", EntryID " << Entry.ID << ", <";
+        for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+          if (i > 0) os << " ";
+          os << Values[i];
+        }
+        os << ">");
+}
+
+NaClBitcodeParser::~NaClBitcodeParser() {}
+
+bool NaClBitcodeParser::Parse() {
+  Record.ReadEntry();
+
+  if (Record.GetEntryKind() != NaClBitstreamEntry::SubBlock)
+    return Error("Expected block, but not found");
+
+  return ParseBlock(Record.GetEntryID());
+}
+
+bool NaClBitcodeParser::ParseThisBlock() {
+  if (GetBlockID() == naclbitc::BLOCKINFO_BLOCK_ID) {
+    // BLOCKINFO is a special part of the stream. Let the bitstream
+    // reader process this block.
+    //
+    // TODO(kschimpf): Move this out of the bitstream reader, so that
+    // we have simplier API's for this class.
+    EnterBlockInfo();
+    if (Record.GetCursor().ReadBlockInfoBlock())
+      return Error("Malformed BlockInfoBlock");
+    RemoveBlockBitsFromEnclosingBlock();
+    ExitBlockInfo();
+    return false;
+  }
+
+  // Regular block. Enter subblock.
+  unsigned NumWords;
+  if (Record.GetCursor().EnterSubBlock(GetBlockID(), &NumWords)) {
+    return Error("Malformed block record");
+  }
+
+  EnterBlock(NumWords);
+
+  // Process records.
+  while (1) {
+    if (Record.GetCursor().AtEndOfStream())
+      return Error("Premature end of bitstream");
+
+    // Read entry defining type of entry.
+    Record.ReadEntry();
+
+    switch (Record.GetEntryKind()) {
+    case NaClBitstreamEntry::Error:
+      return Error("malformed bitcode file");
+    case NaClBitstreamEntry::EndBlock: {
+      ExitBlock();
+      RemoveBlockBitsFromEnclosingBlock();
+      return false;
+    }
+    case NaClBitstreamEntry::SubBlock: {
+      if (ParseBlock(Record.GetEntryID())) return true;
+      break;
+    }
+    case NaClBitstreamEntry::Record:
+      // The interesting case.
+      if (Record.GetEntryID() == naclbitc::DEFINE_ABBREV) {
+        //Process any block-local abbreviation definitions.
+        Record.GetCursor().ReadAbbrevRecord();
+        ProcessRecordAbbrev();
+      } else {
+        // Read in a record.
+        Record.ReadValues();
+        ProcessRecord();
+      }
+      break;
+    }
+  }
+  return false;
+}
diff --git a/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp
new file mode 100644
index 000000000000..4238575b5821
--- /dev/null
+++ b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.cpp
@@ -0,0 +1,1737 @@
+//===- NaClBitcodeReader.cpp ----------------------------------------------===//
+//     Internal NaClBitcodeReader implementation
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "NaClBitcodeReader"
+
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
+#include "NaClBitcodeReader.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/AutoUpgrade.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DataStream.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+void NaClBitcodeReader::FreeState() {
+  if (BufferOwned)
+    delete Buffer;
+  Buffer = 0;
+  std::vector<Type*>().swap(TypeList);
+  ValueList.clear();
+
+  std::vector<Function*>().swap(FunctionsWithBodies);
+  DeferredFunctionInfo.clear();
+}
+
+//===----------------------------------------------------------------------===//
+//  Helper functions to implement forward reference resolution, etc.
+//===----------------------------------------------------------------------===//
+
+/// ConvertToString - Convert a string from a record into an std::string, return
+/// true on failure.
+template<typename StrTy>
+static bool ConvertToString(ArrayRef<uint64_t> Record, unsigned Idx,
+                            StrTy &Result) {
+  if (Idx > Record.size())
+    return true;
+
+  for (unsigned i = Idx, e = Record.size(); i != e; ++i)
+    Result += (char)Record[i];
+  return false;
+}
+
+static void SetDecodedLinkage(GlobalValue* Global, unsigned Val) {
+  switch (Val) {
+  default: // Map unknown/new linkages to external
+  case 0:  Global->setLinkage(GlobalValue::ExternalLinkage); break;
+  case 1:  Global->setLinkage(GlobalValue::WeakAnyLinkage); break;
+  case 2:  Global->setLinkage(GlobalValue::AppendingLinkage); break;
+  case 3:  Global->setLinkage(GlobalValue::InternalLinkage); break;
+  case 4:  Global->setLinkage(GlobalValue::LinkOnceAnyLinkage); break;
+  case 5:  Global->setDLLStorageClass(GlobalValue::DLLImportStorageClass); break;
+  case 6:  Global->setDLLStorageClass(GlobalValue::DLLExportStorageClass); break;
+  case 7:  Global->setLinkage(GlobalValue::ExternalWeakLinkage); break;
+  case 8:  Global->setLinkage(GlobalValue::CommonLinkage); break;
+  case 9:  Global->setLinkage(GlobalValue::PrivateLinkage); break;
+  case 10: Global->setLinkage(GlobalValue::WeakODRLinkage); break;
+  case 11: Global->setLinkage(GlobalValue::LinkOnceODRLinkage); break;
+  case 12: Global->setLinkage(GlobalValue::AvailableExternallyLinkage); break;
+  case 13: Global->setLinkage(GlobalValue::LinkerPrivateLinkage); break;
+  case 14: Global->setLinkage(GlobalValue::LinkerPrivateWeakLinkage); break;
+  case 15:
+    // Whoops. just use LinkOnceODRLinkage for now
+    Global->setLinkage(GlobalValue::LinkOnceODRLinkage);
+    break;
+  }
+}
+
+static int GetDecodedCastOpcode(unsigned Val) {
+  switch (Val) {
+  default: return -1;
+  case naclbitc::CAST_TRUNC   : return Instruction::Trunc;
+  case naclbitc::CAST_ZEXT    : return Instruction::ZExt;
+  case naclbitc::CAST_SEXT    : return Instruction::SExt;
+  case naclbitc::CAST_FPTOUI  : return Instruction::FPToUI;
+  case naclbitc::CAST_FPTOSI  : return Instruction::FPToSI;
+  case naclbitc::CAST_UITOFP  : return Instruction::UIToFP;
+  case naclbitc::CAST_SITOFP  : return Instruction::SIToFP;
+  case naclbitc::CAST_FPTRUNC : return Instruction::FPTrunc;
+  case naclbitc::CAST_FPEXT   : return Instruction::FPExt;
+  case naclbitc::CAST_BITCAST : return Instruction::BitCast;
+  }
+}
+static int GetDecodedBinaryOpcode(unsigned Val, Type *Ty) {
+  switch (Val) {
+  default: return -1;
+  case naclbitc::BINOP_ADD:
+    return Ty->isFPOrFPVectorTy() ? Instruction::FAdd : Instruction::Add;
+  case naclbitc::BINOP_SUB:
+    return Ty->isFPOrFPVectorTy() ? Instruction::FSub : Instruction::Sub;
+  case naclbitc::BINOP_MUL:
+    return Ty->isFPOrFPVectorTy() ? Instruction::FMul : Instruction::Mul;
+  case naclbitc::BINOP_UDIV: return Instruction::UDiv;
+  case naclbitc::BINOP_SDIV:
+    return Ty->isFPOrFPVectorTy() ? Instruction::FDiv : Instruction::SDiv;
+  case naclbitc::BINOP_UREM: return Instruction::URem;
+  case naclbitc::BINOP_SREM:
+    return Ty->isFPOrFPVectorTy() ? Instruction::FRem : Instruction::SRem;
+  case naclbitc::BINOP_SHL:  return Instruction::Shl;
+  case naclbitc::BINOP_LSHR: return Instruction::LShr;
+  case naclbitc::BINOP_ASHR: return Instruction::AShr;
+  case naclbitc::BINOP_AND:  return Instruction::And;
+  case naclbitc::BINOP_OR:   return Instruction::Or;
+  case naclbitc::BINOP_XOR:  return Instruction::Xor;
+  }
+}
+
+static CallingConv::ID GetDecodedCallingConv(unsigned Val) {
+  switch (Val) {
+  default:
+    report_fatal_error("PNaCl bitcode contains invalid calling conventions.");
+  case naclbitc::C_CallingConv: return CallingConv::C;
+  }
+}
+
+static FCmpInst::Predicate GetDecodedFCmpPredicate(unsigned Val) {
+  switch (Val) {
+  default:
+    report_fatal_error(
+        "PNaCl bitcode contains invalid floating comparison predicate");
+  case naclbitc::FCMP_FALSE: return FCmpInst::FCMP_FALSE;
+  case naclbitc::FCMP_OEQ:   return FCmpInst::FCMP_OEQ;
+  case naclbitc::FCMP_OGT:   return FCmpInst::FCMP_OGT;
+  case naclbitc::FCMP_OGE:   return FCmpInst::FCMP_OGE;
+  case naclbitc::FCMP_OLT:   return FCmpInst::FCMP_OLT;
+  case naclbitc::FCMP_OLE:   return FCmpInst::FCMP_OLE;
+  case naclbitc::FCMP_ONE:   return FCmpInst::FCMP_ONE;
+  case naclbitc::FCMP_ORD:   return FCmpInst::FCMP_ORD;
+  case naclbitc::FCMP_UNO:   return FCmpInst::FCMP_UNO;
+  case naclbitc::FCMP_UEQ:   return FCmpInst::FCMP_UEQ;
+  case naclbitc::FCMP_UGT:   return FCmpInst::FCMP_UGT;
+  case naclbitc::FCMP_UGE:   return FCmpInst::FCMP_UGE;
+  case naclbitc::FCMP_ULT:   return FCmpInst::FCMP_ULT;
+  case naclbitc::FCMP_ULE:   return FCmpInst::FCMP_ULE;
+  case naclbitc::FCMP_UNE:   return FCmpInst::FCMP_UNE;
+  case naclbitc::FCMP_TRUE:  return FCmpInst::FCMP_TRUE;
+  }
+}
+
+static ICmpInst::Predicate GetDecodedICmpPredicate(unsigned Val) {
+  switch (Val) {
+  default:
+    report_fatal_error(
+        "PNaCl bitcode contains invalid integer comparison predicate");
+    case naclbitc::ICMP_EQ:  return ICmpInst::ICMP_EQ;
+    case naclbitc::ICMP_NE:  return ICmpInst::ICMP_NE;
+    case naclbitc::ICMP_UGT: return ICmpInst::ICMP_UGT;
+    case naclbitc::ICMP_UGE: return ICmpInst::ICMP_UGE;
+    case naclbitc::ICMP_ULT: return ICmpInst::ICMP_ULT;
+    case naclbitc::ICMP_ULE: return ICmpInst::ICMP_ULE;
+    case naclbitc::ICMP_SGT: return ICmpInst::ICMP_SGT;
+    case naclbitc::ICMP_SGE: return ICmpInst::ICMP_SGE;
+    case naclbitc::ICMP_SLT: return ICmpInst::ICMP_SLT;
+    case naclbitc::ICMP_SLE: return ICmpInst::ICMP_SLE;
+  }
+}
+
+void NaClBitcodeReaderValueList::AssignValue(Value *V, unsigned Idx) {
+  assert(V);
+  if (Idx == size()) {
+    push_back(V);
+    return;
+  }
+
+  if (Idx >= size())
+    resize(Idx+1);
+
+  WeakVH &OldV = ValuePtrs[Idx];
+  if (OldV == 0) {
+    OldV = V;
+    return;
+  }
+
+  // If there was a forward reference to this value, replace it.
+  Value *PrevVal = OldV;
+  OldV->replaceAllUsesWith(V);
+  delete PrevVal;
+}
+
+void NaClBitcodeReaderValueList::AssignGlobalVar(GlobalVariable *GV,
+                                                 unsigned Idx) {
+  assert(GV);
+
+  if (Idx == size()) {
+    push_back(GV);
+    return;
+  }
+
+  if (Idx >= size())
+    resize(Idx+1);
+
+  WeakVH &OldV = ValuePtrs[Idx];
+  if (OldV == 0) {
+    OldV = GV;
+    return;
+  }
+
+  // If there was a forward reference to this value, replace it.
+  Value *PrevVal = OldV;
+  GlobalVariable *Placeholder = cast<GlobalVariable>(PrevVal);
+  Placeholder->replaceAllUsesWith(
+      ConstantExpr::getBitCast(GV, Placeholder->getType()));
+  Placeholder->eraseFromParent();
+  ValuePtrs[Idx] = GV;
+}
+
+void NaClBitcodeReaderValueList::OverwriteValue(Value *V, unsigned Idx) {
+  ValuePtrs[Idx] = V;
+}
+
+Value *NaClBitcodeReaderValueList::getValueFwdRef(unsigned Idx) {
+  if (Idx >= size())
+    return 0;
+
+  if (Value *V = ValuePtrs[Idx])
+    return V;
+
+  return 0;
+}
+
+bool NaClBitcodeReaderValueList::createValueFwdRef(unsigned Idx, Type *Ty) {
+  if (Idx >= size())
+    resize(Idx + 1);
+
+  // Return an error if this a duplicate definition of Idx.
+  if (ValuePtrs[Idx])
+    return true;
+
+  // No type specified, must be invalid reference.
+  if (Ty == 0)
+    return true;
+
+  // Create a placeholder, which will later be RAUW'd.
+  ValuePtrs[Idx] = new Argument(Ty);
+  return false;
+}
+
+Constant *NaClBitcodeReaderValueList::getOrCreateGlobalVarRef(
+    unsigned Idx, Module *M) {
+  // First make sure the element for Idx is defined.
+  if (Idx >= size())
+    resize(Idx + 1);
+
+  // Now get its value (if applicable).
+  if (Value *V = ValuePtrs[Idx])
+    return dyn_cast<Constant>(V);
+
+  // Create a placeholder, which will later be RAUW'd.
+  Type *PlaceholderType = Type::getInt8Ty(Context);
+
+  Constant *C =
+      new GlobalVariable(*M, PlaceholderType, false,
+                         GlobalValue::ExternalLinkage, 0);
+  ValuePtrs[Idx] = C;
+  return C;
+}
+
+Type *NaClBitcodeReader::getTypeByID(unsigned ID) {
+  // The type table size is always specified correctly.
+  if (ID >= TypeList.size())
+    return 0;
+
+  if (Type *Ty = TypeList[ID])
+    return Ty;
+
+  // If we have a forward reference, the only possible case is when it is to a
+  // named struct.  Just create a placeholder for now.
+  return TypeList[ID] = StructType::create(Context);
+}
+
+
+//===----------------------------------------------------------------------===//
+//  Functions for parsing blocks from the bitcode file
+//===----------------------------------------------------------------------===//
+
+
+bool NaClBitcodeReader::ParseTypeTable() {
+  DEBUG(dbgs() << "-> ParseTypeTable\n");
+  if (Stream.EnterSubBlock(naclbitc::TYPE_BLOCK_ID_NEW))
+    return Error("Malformed block record");
+
+  bool result = ParseTypeTableBody();
+  if (!result)
+    DEBUG(dbgs() << "<- ParseTypeTable\n");
+  return result;
+}
+
+bool NaClBitcodeReader::ParseTypeTableBody() {
+  if (!TypeList.empty())
+    return Error("Multiple TYPE_BLOCKs found!");
+
+  SmallVector<uint64_t, 64> Record;
+  unsigned NumRecords = 0;
+
+  // Read all the records for this type table.
+  while (1) {
+    NaClBitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case NaClBitstreamEntry::SubBlock: // Handled for us already.
+    case NaClBitstreamEntry::Error:
+      Error("Error in the type table block");
+      return true;
+    case NaClBitstreamEntry::EndBlock:
+      if (NumRecords != TypeList.size())
+        return Error("Invalid type forward reference in TYPE_BLOCK");
+      return false;
+    case NaClBitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read a record.
+    Record.clear();
+    Type *ResultTy = 0;
+    unsigned TypeCode = Stream.readRecord(Entry.ID, Record);
+    switch (TypeCode) {
+    default: {
+      std::string Message;
+      raw_string_ostream StrM(Message);
+      StrM << "Unknown type code in type table: " << TypeCode;
+      StrM.flush();
+      return Error(Message);
+    }
+    case naclbitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries]
+      // TYPE_CODE_NUMENTRY contains a count of the number of types in the
+      // type list.  This allows us to reserve space.
+      if (Record.size() < 1)
+        return Error("Invalid TYPE_CODE_NUMENTRY record");
+      TypeList.resize(Record[0]);
+      continue;
+    case naclbitc::TYPE_CODE_VOID:      // VOID
+      ResultTy = Type::getVoidTy(Context);
+      break;
+    case naclbitc::TYPE_CODE_FLOAT:     // FLOAT
+      ResultTy = Type::getFloatTy(Context);
+      break;
+    case naclbitc::TYPE_CODE_DOUBLE:    // DOUBLE
+      ResultTy = Type::getDoubleTy(Context);
+      break;
+    case naclbitc::TYPE_CODE_INTEGER:   // INTEGER: [width]
+      if (Record.size() < 1)
+        return Error("Invalid Integer type record");
+
+      ResultTy = IntegerType::get(Context, Record[0]);
+      break;
+    case naclbitc::TYPE_CODE_FUNCTION: {
+      // FUNCTION: [vararg, retty, paramty x N]
+      if (Record.size() < 2)
+        return Error("Invalid FUNCTION type record");
+      SmallVector<Type*, 8> ArgTys;
+      for (unsigned i = 2, e = Record.size(); i != e; ++i) {
+        if (Type *T = getTypeByID(Record[i]))
+          ArgTys.push_back(T);
+        else
+          break;
+      }
+
+      ResultTy = getTypeByID(Record[1]);
+      if (ResultTy == 0 || ArgTys.size() < Record.size()-2)
+        return Error("invalid type in function type");
+
+      ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]);
+      break;
+    }
+    }
+
+    if (NumRecords >= TypeList.size())
+      return Error("invalid TYPE table");
+    assert(ResultTy && "Didn't read a type?");
+    assert(TypeList[NumRecords] == 0 && "Already read type?");
+    TypeList[NumRecords++] = ResultTy;
+  }
+}
+
+bool NaClBitcodeReader::ParseGlobalVars() {
+  if (Stream.EnterSubBlock(naclbitc::GLOBALVAR_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // True when processing a global variable. Stays true until all records
+  // are processed, and the global variable is created.
+  bool ProcessingGlobal = false;
+  // The alignment value defined for the global variable.
+  unsigned VarAlignment = 0;
+  // True if the variable is read-only.
+  bool VarIsConstant = false;
+  // The initializer for the global variable.
+  SmallVector<Constant *, 10> VarInit;
+  // The number of initializers needed for the global variable.
+  unsigned VarInitializersNeeded = 0;
+  unsigned FirstValueNo = ValueList.size();
+  // The index of the next global variable.
+  unsigned NextValueNo = FirstValueNo;
+  // The number of expected global variable definitions.
+  unsigned NumGlobals = 0;
+
+  // Read all global variable records.
+  while (1) {
+    NaClBitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+    switch (Entry.Kind) {
+    case NaClBitstreamEntry::SubBlock:
+    case NaClBitstreamEntry::Error:
+      return Error("Error in the global vars block");
+    case NaClBitstreamEntry::EndBlock:
+      if (ProcessingGlobal || NumGlobals != (NextValueNo - FirstValueNo))
+        return Error("Error in the global vars block");
+      return false;
+    case NaClBitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read a record.
+    Record.clear();
+    unsigned Bitcode = Stream.readRecord(Entry.ID, Record);
+    switch (Bitcode) {
+    default: return Error("Unknown global variable entry");
+    case naclbitc::GLOBALVAR_VAR:
+      // Start the definition of a global variable.
+      if (ProcessingGlobal || Record.size() != 2)
+        return Error("Bad GLOBALVAR_VAR record");
+      ProcessingGlobal = true;
+      VarAlignment = (1 << Record[0]) >> 1;
+      VarIsConstant = Record[1] != 0;
+      // Assume (by default) there is a single initializer.
+      VarInitializersNeeded = 1;
+      break;
+    case naclbitc::GLOBALVAR_COMPOUND:
+      // Global variable has multiple initializers. Changes the
+      // default number of initializers to the given value in
+      // Record[0].
+      if (!ProcessingGlobal || !VarInit.empty() ||
+          VarInitializersNeeded != 1 || Record.size() != 1)
+        return Error("Bad GLOBALVAR_COMPOUND record");
+      VarInitializersNeeded = Record[0];
+      break;
+    case naclbitc::GLOBALVAR_ZEROFILL: {
+      // Define an initializer that defines a sequence of zero-filled bytes.
+      if (!ProcessingGlobal || Record.size() != 1)
+        return Error("Bad GLOBALVAR_ZEROFILL record");
+      Type *Ty = ArrayType::get(Type::getInt8Ty(Context), Record[0]);
+      Constant *Zero = ConstantAggregateZero::get(Ty);
+      VarInit.push_back(Zero);
+      break;
+    }
+    case naclbitc::GLOBALVAR_DATA: {
+      // Defines an initializer defined by a sequence of byte values.
+      if (!ProcessingGlobal || Record.size() < 1)
+        return Error("Bad GLOBALVAR_DATA record");
+      unsigned Size = Record.size();
+      uint8_t *Buf = new uint8_t[Size];
+      assert(Buf);
+      for (unsigned i = 0; i < Size; ++i)
+        Buf[i] = Record[i];
+      Constant *Init = ConstantDataArray::get(
+          Context, ArrayRef<uint8_t>(Buf, Buf + Size));
+      VarInit.push_back(Init);
+      delete[] Buf;
+      break;
+    }
+    case naclbitc::GLOBALVAR_RELOC: {
+      // Define a relocation initializer.
+      if (!ProcessingGlobal || Record.size() < 1 || Record.size() > 2)
+        return Error("Bad GLOBALVAR_RELOC record");
+      Constant *BaseVal =
+          ValueList.getOrCreateGlobalVarRef(Record[0], TheModule);
+      if (BaseVal == 0)
+        return Error("Bad base value in GLOBALVAR_RELOC record");
+      Type *IntPtrType = IntegerType::get(Context, 32);
+      Constant *Val = ConstantExpr::getPtrToInt(BaseVal, IntPtrType);
+      if (Record.size() == 2) {
+        uint32_t Addend = Record[1];
+        Val = ConstantExpr::getAdd(Val, ConstantInt::get(IntPtrType,
+                                                         Addend));
+      }
+      VarInit.push_back(Val);
+      break;
+    }
+    case naclbitc::GLOBALVAR_COUNT:
+      if (Record.size() != 1 || NumGlobals != 0)
+        return Error("Invalid global count record");
+      NumGlobals = Record[0];
+      break;
+    }
+
+    // If more initializers needed for global variable, continue processing.
+    if (!ProcessingGlobal || VarInit.size() < VarInitializersNeeded)
+      continue;
+
+    Constant *Init = 0;
+    switch (VarInit.size()) {
+    case 0:
+      return Error("No initializer for global variable in global vars block");
+    case 1:
+      Init = VarInit[0];
+      break;
+    default:
+      Init = ConstantStruct::getAnon(Context, VarInit, true);
+      break;
+    }
+    GlobalVariable *GV = new GlobalVariable(
+        *TheModule, Init->getType(), VarIsConstant,
+        GlobalValue::InternalLinkage, Init, "");
+    GV->setAlignment(VarAlignment);
+    ValueList.AssignGlobalVar(GV, NextValueNo);
+    ++NextValueNo;
+    ProcessingGlobal = false;
+    VarAlignment = 0;
+    VarIsConstant = false;
+    VarInitializersNeeded = 0;
+    VarInit.clear();
+  }
+}
+
+bool NaClBitcodeReader::ParseValueSymbolTable() {
+  DEBUG(dbgs() << "-> ParseValueSymbolTable\n");
+  if (Stream.EnterSubBlock(naclbitc::VALUE_SYMTAB_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records for this value table.
+  SmallString<128> ValueName;
+  while (1) {
+    NaClBitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case NaClBitstreamEntry::SubBlock: // Handled for us already.
+    case NaClBitstreamEntry::Error:
+      return Error("malformed value symbol table block");
+    case NaClBitstreamEntry::EndBlock:
+      DEBUG(dbgs() << "<- ParseValueSymbolTable\n");
+      return false;
+    case NaClBitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read a record.
+    Record.clear();
+    switch (Stream.readRecord(Entry.ID, Record)) {
+    default:  // Default behavior: unknown type.
+      break;
+    case naclbitc::VST_CODE_ENTRY: {  // VST_ENTRY: [valueid, namechar x N]
+      if (ConvertToString(Record, 1, ValueName))
+        return Error("Invalid VST_ENTRY record");
+      unsigned ValueID = Record[0];
+      if (ValueID >= ValueList.size())
+        return Error("Invalid Value ID in VST_ENTRY record");
+      Value *V = ValueList[ValueID];
+
+      V->setName(StringRef(ValueName.data(), ValueName.size()));
+      ValueName.clear();
+      break;
+    }
+    case naclbitc::VST_CODE_BBENTRY: {
+      if (ConvertToString(Record, 1, ValueName))
+        return Error("Invalid VST_BBENTRY record");
+      BasicBlock *BB = getBasicBlock(Record[0]);
+      if (BB == 0)
+        return Error("Invalid BB ID in VST_BBENTRY record");
+
+      BB->setName(StringRef(ValueName.data(), ValueName.size()));
+      ValueName.clear();
+      break;
+    }
+    }
+  }
+}
+
+static APInt ReadWideAPInt(ArrayRef<uint64_t> Vals, unsigned TypeBits) {
+  SmallVector<uint64_t, 8> Words(Vals.size());
+  std::transform(Vals.begin(), Vals.end(), Words.begin(),
+                 NaClDecodeSignRotatedValue);
+
+  return APInt(TypeBits, Words);
+}
+
+bool NaClBitcodeReader::ParseConstants() {
+  DEBUG(dbgs() << "-> ParseConstants\n");
+  if (Stream.EnterSubBlock(naclbitc::CONSTANTS_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records for this value table.
+  Type *CurTy = Type::getInt32Ty(Context);
+  unsigned NextCstNo = ValueList.size();
+  while (1) {
+    NaClBitstreamEntry Entry = Stream.advanceSkippingSubblocks();
+
+    switch (Entry.Kind) {
+    case NaClBitstreamEntry::SubBlock: // Handled for us already.
+    case NaClBitstreamEntry::Error:
+      return Error("malformed block record in AST file");
+    case NaClBitstreamEntry::EndBlock:
+      if (NextCstNo != ValueList.size())
+        return Error("Invalid constant reference!");
+      DEBUG(dbgs() << "<- ParseConstants\n");
+      return false;
+    case NaClBitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read a record.
+    Record.clear();
+    Value *V = 0;
+    unsigned BitCode = Stream.readRecord(Entry.ID, Record);
+    switch (BitCode) {
+    default: {
+      std::string Message;
+      raw_string_ostream StrM(Message);
+      StrM << "Invalid Constant code: " << BitCode;
+      StrM.flush();
+      return Error(Message);
+    }
+    case naclbitc::CST_CODE_UNDEF:     // UNDEF
+      V = UndefValue::get(CurTy);
+      break;
+    case naclbitc::CST_CODE_SETTYPE:   // SETTYPE: [typeid]
+      if (Record.empty())
+        return Error("Malformed CST_SETTYPE record");
+      if (Record[0] >= TypeList.size())
+        return Error("Invalid Type ID in CST_SETTYPE record");
+      CurTy = TypeList[Record[0]];
+      continue;  // Skip the ValueList manipulation.
+    case naclbitc::CST_CODE_INTEGER:   // INTEGER: [intval]
+      if (!CurTy->isIntegerTy() || Record.empty())
+        return Error("Invalid CST_INTEGER record");
+      V = ConstantInt::get(CurTy, NaClDecodeSignRotatedValue(Record[0]));
+      break;
+    case naclbitc::CST_CODE_FLOAT: {    // FLOAT: [fpval]
+      if (Record.empty())
+        return Error("Invalid FLOAT record");
+      if (CurTy->isFloatTy())
+        V = ConstantFP::get(Context, APFloat(APFloat::IEEEsingle,
+                                             APInt(32, (uint32_t)Record[0])));
+      else if (CurTy->isDoubleTy())
+        V = ConstantFP::get(Context, APFloat(APFloat::IEEEdouble,
+                                             APInt(64, Record[0])));
+      else
+        return Error("Unknown type for FLOAT record");
+      break;
+    }
+    }
+
+    ValueList.AssignValue(V, NextCstNo);
+    ++NextCstNo;
+  }
+}
+
+/// RememberAndSkipFunctionBody - When we see the block for a function body,
+/// remember where it is and then skip it.  This lets us lazily deserialize the
+/// functions.
+bool NaClBitcodeReader::RememberAndSkipFunctionBody() {
+  DEBUG(dbgs() << "-> RememberAndSkipFunctionBody\n");
+  // Get the function we are talking about.
+  if (FunctionsWithBodies.empty())
+    return Error("Insufficient function protos");
+
+  Function *Fn = FunctionsWithBodies.back();
+  FunctionsWithBodies.pop_back();
+
+  // Save the current stream state.
+  uint64_t CurBit = Stream.GetCurrentBitNo();
+  DeferredFunctionInfo[Fn] = CurBit;
+
+  // Skip over the function block for now.
+  if (Stream.SkipBlock())
+    return Error("Malformed block record");
+  DEBUG(dbgs() << "<- RememberAndSkipFunctionBody\n");
+  return false;
+}
+
+bool NaClBitcodeReader::GlobalCleanup() {
+  // Look for intrinsic functions which need to be upgraded at some point
+  for (Module::iterator FI = TheModule->begin(), FE = TheModule->end();
+       FI != FE; ++FI) {
+    Function *NewFn;
+    if (UpgradeIntrinsicFunction(FI, NewFn))
+      UpgradedIntrinsics.push_back(std::make_pair(FI, NewFn));
+  }
+
+  // Look for global variables which need to be renamed.
+  for (Module::global_iterator
+         GI = TheModule->global_begin(), GE = TheModule->global_end();
+       GI != GE; ++GI)
+    UpgradeGlobalVariable(GI);
+  return false;
+}
+
+FunctionType *NaClBitcodeReader::AddPointerTypesToIntrinsicType(
+    StringRef Name, FunctionType *FTy) {
+  Type *ReturnTy = FTy->getReturnType();
+  SmallVector<Type *, 8> ArgTypes(FTy->param_begin(), FTy->param_end());
+
+  // Ideally we wouldn't need a list of supported intrinsics here, but
+  // Intrinsic::* doesn't provide a function for recovering the
+  // expected type of an intrinsic given its full name.
+  // TODO(mseaborn): We could reuse the intrinsic list from
+  // PNaClABIVerifyModule.cpp here.
+  if (Name == "llvm.nacl.read.tp" ||
+      Name == "llvm.stacksave") {
+    ReturnTy = Type::getInt8PtrTy(Context);
+  } else if (Name == "llvm.nacl.setjmp" ||
+             Name == "llvm.nacl.longjmp" ||
+             Name == "llvm.stackrestore" ||
+             Name.startswith("llvm.memset.")) {
+    assert(ArgTypes.size() >= 1);
+    ArgTypes[0] = Type::getInt8PtrTy(Context);
+  } else if (Name.startswith("llvm.memcpy.") ||
+             Name.startswith("llvm.memmove.")) {
+    assert(ArgTypes.size() >= 2);
+    ArgTypes[0] = Type::getInt8PtrTy(Context);
+    ArgTypes[1] = Type::getInt8PtrTy(Context);
+  } else if (Name.startswith("llvm.nacl.atomic.load.") ||
+             Name.startswith("llvm.nacl.atomic.cmpxchg.")) {
+    assert(ArgTypes.size() >= 1);
+    ArgTypes[0] = ReturnTy->getPointerTo();
+  } else if (Name.startswith("llvm.nacl.atomic.store.")) {
+    assert(ArgTypes.size() >= 2);
+    ArgTypes[1] = ArgTypes[0]->getPointerTo();
+  } else if (Name.startswith("llvm.nacl.atomic.rmw.")) {
+    assert(ArgTypes.size() >= 3);
+    ArgTypes[1] = ArgTypes[2]->getPointerTo();
+  } else if (Name == "llvm.nacl.atomic.is.lock.free") {
+    assert(ArgTypes.size() >= 2);
+    ArgTypes[1] = Type::getInt8PtrTy(Context);
+  }
+  return FunctionType::get(ReturnTy, ArgTypes, false);
+}
+
+void NaClBitcodeReader::AddPointerTypesToIntrinsicParams() {
+  for (unsigned Index = 0, E = ValueList.size(); Index < E; ++Index) {
+    if (Function *Func = dyn_cast<Function>(ValueList[Index])) {
+      if (Func->isIntrinsic()) {
+        FunctionType *FTy = AddPointerTypesToIntrinsicType(
+            Func->getName(), Func->getFunctionType());
+        Function *NewIntrinsic = Function::Create(
+            FTy, GlobalValue::ExternalLinkage, "", TheModule);
+        NewIntrinsic->takeName(Func);
+        ValueList.OverwriteValue(NewIntrinsic, Index);
+        Func->eraseFromParent();
+      }
+    }
+  }
+}
+
+bool NaClBitcodeReader::ParseModule(bool Resume) {
+  DEBUG(dbgs() << "-> ParseModule\n");
+  if (Resume)
+    Stream.JumpToBit(NextUnreadBit);
+  else if (Stream.EnterSubBlock(naclbitc::MODULE_BLOCK_ID))
+    return Error("Malformed block record");
+
+  SmallVector<uint64_t, 64> Record;
+
+  // Read all the records for this module.
+  while (1) {
+    NaClBitstreamEntry Entry = Stream.advance();
+
+    switch (Entry.Kind) {
+    case NaClBitstreamEntry::Error:
+      Error("malformed module block");
+      return true;
+    case NaClBitstreamEntry::EndBlock:
+      DEBUG(dbgs() << "<- ParseModule\n");
+      return GlobalCleanup();
+
+    case NaClBitstreamEntry::SubBlock:
+      switch (Entry.ID) {
+      default: {
+        std::string Message;
+        raw_string_ostream StrM(Message);
+        StrM << "Unknown block ID: " << Entry.ID;
+        return Error(StrM.str());
+      }
+      case naclbitc::BLOCKINFO_BLOCK_ID:
+        if (Stream.ReadBlockInfoBlock())
+          return Error("Malformed BlockInfoBlock");
+        break;
+      case naclbitc::TYPE_BLOCK_ID_NEW:
+        if (ParseTypeTable())
+          return true;
+        break;
+      case naclbitc::GLOBALVAR_BLOCK_ID:
+        if (ParseGlobalVars())
+          return true;
+        break;
+      case naclbitc::VALUE_SYMTAB_BLOCK_ID:
+        if (ParseValueSymbolTable())
+          return true;
+        SeenValueSymbolTable = true;
+        // Now that we know the names of the intrinsics, we can add
+        // pointer types to the intrinsic declarations' types.
+        AddPointerTypesToIntrinsicParams();
+        break;
+      case naclbitc::FUNCTION_BLOCK_ID:
+        // If this is the first function body we've seen, reverse the
+        // FunctionsWithBodies list.
+        if (!SeenFirstFunctionBody) {
+          std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end());
+          if (GlobalCleanup())
+            return true;
+          SeenFirstFunctionBody = true;
+        }
+
+        if (RememberAndSkipFunctionBody())
+          return true;
+
+        // For streaming bitcode, suspend parsing when we reach the function
+        // bodies. Subsequent materialization calls will resume it when
+        // necessary. For streaming, the function bodies must be at the end of
+        // the bitcode. If the bitcode file is old, the symbol table will be
+        // at the end instead and will not have been seen yet. In this case,
+        // just finish the parse now.
+        if (LazyStreamer && SeenValueSymbolTable) {
+          NextUnreadBit = Stream.GetCurrentBitNo();
+          DEBUG(dbgs() << "<- ParseModule\n");
+          return false;
+        }
+        break;
+      }
+      continue;
+
+    case NaClBitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read a record.
+    unsigned Selector = Stream.readRecord(Entry.ID, Record);
+    switch (Selector) {
+    default: {
+      std::string Message;
+      raw_string_ostream StrM(Message);
+      StrM << "Invalid MODULE_CODE: " << Selector;
+      StrM.flush();
+      return Error(Message);
+    }
+    case naclbitc::MODULE_CODE_VERSION: {  // VERSION: [version#]
+      if (Record.size() < 1)
+        return Error("Malformed MODULE_CODE_VERSION");
+      // Only version #1 is supported for PNaCl. Version #0 is not supported.
+      unsigned module_version = Record[0];
+      if (module_version != 1)
+        return Error("Unknown bitstream version!");
+      break;
+    }
+    // FUNCTION:  [type, callingconv, isproto, linkage]
+    case naclbitc::MODULE_CODE_FUNCTION: {
+      if (Record.size() < 4)
+        return Error("Invalid MODULE_CODE_FUNCTION record");
+      Type *Ty = getTypeByID(Record[0]);
+      if (!Ty) return Error("Invalid MODULE_CODE_FUNCTION record");
+      FunctionType *FTy = dyn_cast<FunctionType>(Ty);
+      if (!FTy)
+        return Error("Function not declared with a function type!");
+
+      Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage,
+                                        "", TheModule);
+
+      Func->setCallingConv(GetDecodedCallingConv(Record[1]));
+      bool isProto = Record[2];
+      SetDecodedLinkage(Func, Record[3]);
+      ValueList.push_back(Func);
+
+      // If this is a function with a body, remember the prototype we are
+      // creating now, so that we can match up the body with them later.
+      if (!isProto) {
+        FunctionsWithBodies.push_back(Func);
+        if (LazyStreamer) DeferredFunctionInfo[Func] = 0;
+      }
+      break;
+    }
+    }
+    Record.clear();
+  }
+}
+
+bool NaClBitcodeReader::ParseBitcodeInto(Module *M) {
+  TheModule = 0;
+
+  // PNaCl does not support different DataLayouts in pexes, so we
+  // implicitly set the DataLayout to the following default.
+  //
+  // This is not usually needed by the backend, but it might be used
+  // by IR passes that the PNaCl translator runs.  We set this in the
+  // reader rather than in pnacl-llc so that 'opt' will also use the
+  // correct DataLayout if it is run on a pexe.
+  M->setDataLayout("e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-"
+                   "f32:32:32-f64:64:64-p:32:32:32-v128:32:32");
+
+  if (InitStream()) return true; // InitSream will set the error string.
+
+  // We expect a number of well-defined blocks, though we don't necessarily
+  // need to understand them all.
+  while (1) {
+    if (Stream.AtEndOfStream())
+      return false;
+
+    NaClBitstreamEntry Entry =
+      Stream.advance(NaClBitstreamCursor::AF_DontAutoprocessAbbrevs);
+
+    switch (Entry.Kind) {
+    case NaClBitstreamEntry::Error:
+      Error("malformed module file");
+      return true;
+    case NaClBitstreamEntry::EndBlock:
+      return false;
+
+    case NaClBitstreamEntry::SubBlock:
+      switch (Entry.ID) {
+      case naclbitc::BLOCKINFO_BLOCK_ID:
+        if (Stream.ReadBlockInfoBlock())
+          return Error("Malformed BlockInfoBlock");
+        break;
+      case naclbitc::MODULE_BLOCK_ID:
+        // Reject multiple MODULE_BLOCK's in a single bitstream.
+        if (TheModule)
+          return Error("Multiple MODULE_BLOCKs in same stream");
+        TheModule = M;
+        if (ParseModule(false))
+          return true;
+        if (LazyStreamer) return false;
+        break;
+      default:
+        if (Stream.SkipBlock())
+          return Error("Malformed block record");
+        break;
+      }
+      continue;
+    case NaClBitstreamEntry::Record:
+      // There should be no records in the top-level of blocks.
+      return Error("Invalid record at top-level");
+    }
+  }
+}
+
+// Returns true if error occured installing I into BB.
+bool NaClBitcodeReader::InstallInstruction(
+    BasicBlock *BB, Instruction *I) {
+  // Add instruction to end of current BB.  If there is no current BB, reject
+  // this file.
+  if (BB == 0) {
+    delete I;
+    return Error("Invalid instruction with no BB");
+  }
+  BB->getInstList().push_back(I);
+  return false;
+}
+
+CastInst *
+NaClBitcodeReader::CreateCast(unsigned BBIndex, Instruction::CastOps Op,
+                              Type *CT, Value *V, bool DeferInsertion) {
+  if (BBIndex >= FunctionBBs.size())
+    report_fatal_error("CreateCast on unknown basic block");
+  BasicBlockInfo &BBInfo = FunctionBBs[BBIndex];
+  NaClBitcodeReaderCast ModeledCast(Op, CT, V);
+  CastInst *Cast = BBInfo.CastMap[ModeledCast];
+  if (Cast == NULL) {
+    Cast = CastInst::Create(Op, V, CT);
+    BBInfo.CastMap[ModeledCast] = Cast;
+    if (DeferInsertion) {
+      BBInfo.PhiCasts.push_back(Cast);
+    }
+  }
+  if (!DeferInsertion && Cast->getParent() == 0) {
+    InstallInstruction(BBInfo.BB, Cast);
+  }
+  return Cast;
+}
+
+Value *NaClBitcodeReader::ConvertOpToScalar(Value *Op, unsigned BBIndex,
+                                            bool DeferInsertion) {
+  if (Op->getType()->isPointerTy()) {
+    return CreateCast(BBIndex, Instruction::PtrToInt, IntPtrType, Op,
+                      DeferInsertion);
+  }
+  return Op;
+}
+
+Value *NaClBitcodeReader::ConvertOpToType(Value *Op, Type *T,
+                                          unsigned BBIndex) {
+  Type *OpTy = Op->getType();
+  if (OpTy == T) return Op;
+
+  if (OpTy->isPointerTy()) {
+    if (T == IntPtrType) {
+      return ConvertOpToScalar(Op, BBIndex);
+    } else {
+      return CreateCast(BBIndex, Instruction::BitCast, T, Op);
+    }
+  } else if (OpTy == IntPtrType) {
+    return CreateCast(BBIndex, Instruction::IntToPtr, T, Op);
+  }
+
+  std::string Message;
+  raw_string_ostream StrM(Message);
+  StrM << "Can't convert " << *Op << " to type " << *T << "\n";
+  report_fatal_error(StrM.str());
+}
+
+/// ParseFunctionBody - Lazily parse the specified function body block.
+bool NaClBitcodeReader::ParseFunctionBody(Function *F) {
+  DEBUG(dbgs() << "-> ParseFunctionBody\n");
+  if (Stream.EnterSubBlock(naclbitc::FUNCTION_BLOCK_ID))
+    return Error("Malformed block record");
+
+  unsigned ModuleValueListSize = ValueList.size();
+
+  // Add all the function arguments to the value table.
+  for(Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
+    ValueList.push_back(I);
+
+  unsigned NextValueNo = ValueList.size();
+  BasicBlock *CurBB = 0;
+  unsigned CurBBNo = 0;
+
+  // Read all the records.
+  SmallVector<uint64_t, 64> Record;
+  while (1) {
+    NaClBitstreamEntry Entry = Stream.advance();
+
+    switch (Entry.Kind) {
+    case NaClBitstreamEntry::Error:
+      return Error("Bitcode error in function block");
+    case NaClBitstreamEntry::EndBlock:
+      goto OutOfRecordLoop;
+
+    case NaClBitstreamEntry::SubBlock:
+      switch (Entry.ID) {
+      default:  // Skip unknown content.
+        dbgs() << "default skip block\n";
+        if (Stream.SkipBlock())
+          return Error("Malformed block record");
+        break;
+      case naclbitc::CONSTANTS_BLOCK_ID:
+        if (ParseConstants())
+          return true;
+        NextValueNo = ValueList.size();
+        break;
+      case naclbitc::VALUE_SYMTAB_BLOCK_ID:
+        if (ParseValueSymbolTable())
+          return true;
+        break;
+      }
+      continue;
+
+    case NaClBitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read a record.
+    Record.clear();
+    Instruction *I = 0;
+    unsigned BitCode = Stream.readRecord(Entry.ID, Record);
+    switch (BitCode) {
+    default: {// Default behavior: reject
+      std::string Message;
+      raw_string_ostream StrM(Message);
+      StrM << "Unknown instruction record: <" << BitCode;
+      for (unsigned I = 0, E = Record.size(); I != E; ++I) {
+        StrM << " " << Record[I];
+      }
+      StrM << ">";
+      return Error(StrM.str());
+    }
+
+    case naclbitc::FUNC_CODE_DECLAREBLOCKS:     // DECLAREBLOCKS: [nblocks]
+      if (Record.size() < 1 || Record[0] == 0)
+        return Error("Invalid DECLAREBLOCKS record");
+      // Create all the basic blocks for the function.
+      FunctionBBs.resize(Record[0]);
+      for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i) {
+        BasicBlockInfo &BBInfo = FunctionBBs[i];
+        BBInfo.BB = BasicBlock::Create(Context, "", F);
+      }
+      CurBB = FunctionBBs.at(0).BB;
+      continue;
+
+    case naclbitc::FUNC_CODE_INST_BINOP: {
+      // BINOP: [opval, opval, opcode[, flags]]
+      // Note: Only old PNaCl bitcode files may contain flags. If
+      // they are found, we ignore them.
+      unsigned OpNum = 0;
+      Value *LHS, *RHS;
+      if (popValue(Record, &OpNum, NextValueNo, &LHS) ||
+          popValue(Record, &OpNum, NextValueNo, &RHS) ||
+          OpNum+1 > Record.size())
+        return Error("Invalid BINOP record");
+
+      LHS = ConvertOpToScalar(LHS, CurBBNo);
+      RHS = ConvertOpToScalar(RHS, CurBBNo);
+
+      int Opc = GetDecodedBinaryOpcode(Record[OpNum++], LHS->getType());
+      if (Opc == -1) return Error("Invalid BINOP record");
+      I = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS);
+      break;
+    }
+    case naclbitc::FUNC_CODE_INST_CAST: {    // CAST: [opval, destty, castopc]
+      unsigned OpNum = 0;
+      Value *Op;
+      if (popValue(Record, &OpNum, NextValueNo, &Op) ||
+          OpNum+2 != Record.size())
+        return Error("Invalid CAST record: bad record size");
+
+      Type *ResTy = getTypeByID(Record[OpNum]);
+      if (ResTy == 0)
+        return Error("Invalid CAST record: bad type ID");
+      int Opc = GetDecodedCastOpcode(Record[OpNum+1]);
+      if (Opc == -1)
+        return Error("Invalid CAST record: bad opcode");
+
+      // If a ptrtoint cast was elided on the argument of the cast,
+      // add it back. Note: The casts allowed here should match the
+      // casts in NaClValueEnumerator::ExpectsScalarValue.
+      switch (Opc) {
+      case Instruction::Trunc:
+      case Instruction::ZExt:
+      case Instruction::SExt:
+      case Instruction::UIToFP:
+      case Instruction::SIToFP:
+        Op = ConvertOpToScalar(Op, CurBBNo);
+        break;
+      default:
+        break;
+      }
+
+      I = CastInst::Create((Instruction::CastOps)Opc, Op, ResTy);
+      break;
+    }
+
+    case naclbitc::FUNC_CODE_INST_VSELECT: {// VSELECT: [opval, opval, pred]
+      // new form of select
+      // handles select i1 or select [N x i1]
+      unsigned OpNum = 0;
+      Value *TrueVal, *FalseVal, *Cond;
+      if (popValue(Record, &OpNum, NextValueNo, &TrueVal) ||
+          popValue(Record, &OpNum, NextValueNo, &FalseVal) ||
+          popValue(Record, &OpNum, NextValueNo, &Cond))
+        return Error("Invalid SELECT record");
+
+      TrueVal = ConvertOpToScalar(TrueVal, CurBBNo);
+      FalseVal = ConvertOpToScalar(FalseVal, CurBBNo);
+
+      // expect i1
+      if (Cond->getType() != Type::getInt1Ty(Context))
+        return Error("Invalid SELECT condition type");
+
+      I = SelectInst::Create(Cond, TrueVal, FalseVal);
+      break;
+    }
+
+    case naclbitc::FUNC_CODE_INST_CMP2: { // CMP2: [opval, opval, pred]
+      // FCmp/ICmp returning bool or vector of bool
+
+      unsigned OpNum = 0;
+      Value *LHS, *RHS;
+      if (popValue(Record, &OpNum, NextValueNo, &LHS) ||
+          popValue(Record, &OpNum, NextValueNo, &RHS) ||
+          OpNum+1 != Record.size())
+        return Error("Invalid CMP record");
+
+      LHS = ConvertOpToScalar(LHS, CurBBNo);
+      RHS = ConvertOpToScalar(RHS, CurBBNo);
+
+      if (LHS->getType()->isFPOrFPVectorTy())
+        I = new FCmpInst(GetDecodedFCmpPredicate(Record[OpNum]), LHS, RHS);
+      else
+        I = new ICmpInst(GetDecodedICmpPredicate(Record[OpNum]), LHS, RHS);
+      break;
+    }
+
+    case naclbitc::FUNC_CODE_INST_RET: // RET: [opval<optional>]
+      {
+        unsigned Size = Record.size();
+        if (Size == 0) {
+          I = ReturnInst::Create(Context);
+          break;
+        }
+
+        unsigned OpNum = 0;
+        Value *Op = NULL;
+        if (popValue(Record, &OpNum, NextValueNo, &Op))
+          return Error("Invalid RET record");
+        if (OpNum != Record.size())
+          return Error("Invalid RET record");
+
+        I = ReturnInst::Create(Context, ConvertOpToScalar(Op, CurBBNo));
+        break;
+      }
+    case naclbitc::FUNC_CODE_INST_BR: { // BR: [bb#, bb#, opval] or [bb#]
+      if (Record.size() != 1 && Record.size() != 3)
+        return Error("Invalid BR record");
+      BasicBlock *TrueDest = getBasicBlock(Record[0]);
+      if (TrueDest == 0)
+        return Error("Invalid BR record");
+
+      if (Record.size() == 1) {
+        I = BranchInst::Create(TrueDest);
+      }
+      else {
+        BasicBlock *FalseDest = getBasicBlock(Record[1]);
+        Value *Cond = getValue(Record, 2, NextValueNo);
+        if (FalseDest == 0 || Cond == 0)
+          return Error("Invalid BR record");
+        I = BranchInst::Create(TrueDest, FalseDest, Cond);
+      }
+      break;
+    }
+    case naclbitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, op0, op1, ...]
+      // New SwitchInst format with case ranges.
+      if (Record.size() < 4)
+        return Error("Invalid SWITCH record");
+      Type *OpTy = getTypeByID(Record[0]);
+      unsigned ValueBitWidth = cast<IntegerType>(OpTy)->getBitWidth();
+
+      Value *Cond = getValue(Record, 1, NextValueNo);
+      BasicBlock *Default = getBasicBlock(Record[2]);
+      if (OpTy == 0 || Cond == 0 || Default == 0)
+        return Error("Invalid SWITCH record");
+
+      unsigned NumCases = Record[3];
+
+      SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases);
+
+      unsigned CurIdx = 4;
+      for (unsigned i = 0; i != NumCases; ++i) {
+        unsigned NumItems = Record[CurIdx++];
+
+        std::vector<std::pair<APInt, APInt> > CaseRanges;
+        CaseRanges.reserve(NumItems);
+
+        for (unsigned ci = 0; ci != NumItems; ++ci) {
+          bool isSingleNumber = Record[CurIdx++];
+
+          APInt Low;
+          APInt High;
+
+          unsigned ActiveWords = 1;
+          if (ValueBitWidth > 64)
+            ActiveWords = Record[CurIdx++];
+          Low = ReadWideAPInt(makeArrayRef(&Record[CurIdx], ActiveWords),
+                              ValueBitWidth);
+          CurIdx += ActiveWords;
+
+          if (!isSingleNumber) {
+            ActiveWords = 1;
+            if (ValueBitWidth > 64)
+              ActiveWords = Record[CurIdx++];
+
+            High = ReadWideAPInt(makeArrayRef(&Record[CurIdx], ActiveWords),
+                                 ValueBitWidth);
+
+            CurIdx += ActiveWords;
+          } else
+            High = Low;
+
+          CaseRanges.push_back(std::make_pair(Low, High));
+        }
+        BasicBlock *DestBB = getBasicBlock(Record[CurIdx++]);
+        /// LLVM doesn't support case ranges anymore, so we have to emulate:
+        for(size_t j = 0, j_end = CaseRanges.size(); j < j_end; ++j) {
+          for(APInt k(CaseRanges[j].first),
+                    k_end(CaseRanges[j].second + 1); k != k_end; ++k) {
+            SI->addCase(cast<ConstantInt>(ConstantInt::get(OpTy, k)), DestBB);
+          }
+        }
+      }
+      I = SI;
+      break;
+    }
+    case naclbitc::FUNC_CODE_INST_UNREACHABLE: // UNREACHABLE
+      I = new UnreachableInst(Context);
+      break;
+    case naclbitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...]
+      if (Record.size() < 1 || ((Record.size()-1)&1))
+        return Error("Invalid PHI record");
+      Type *Ty = getTypeByID(Record[0]);
+      if (!Ty) return Error("Invalid PHI record");
+
+      PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2);
+
+      for (unsigned i = 0, e = Record.size()-1; i != e; i += 2) {
+        Value *V;
+        // With relative value IDs, it is possible that operands have
+        // negative IDs (for forward references).  Use a signed VBR
+        // representation to keep the encoding small.
+        V = getValueSigned(Record, 1+i, NextValueNo);
+        unsigned BBIndex = Record[2+i];
+        BasicBlock *BB = getBasicBlock(BBIndex);
+        if (!V || !BB) return Error("Invalid PHI record");
+        if (Ty == IntPtrType) {
+          // Delay installing scalar casts until all instructions of
+          // the function are rendered. This guarantees that we insert
+          // the conversion just before the incoming edge (or use an
+          // existing conversion if already installed).
+          V = ConvertOpToScalar(V, BBIndex, /* DeferInsertion = */ true);
+        }
+        PN->addIncoming(V, BB);
+      }
+      I = PN;
+      break;
+    }
+
+    case naclbitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [op, align]
+      if (Record.size() != 2)
+        return Error("Invalid ALLOCA record");
+      Value *Size;
+      unsigned OpNum = 0;
+      if (popValue(Record, &OpNum, NextValueNo, &Size))
+        return Error("Invalid ALLOCA record");
+      unsigned Align = Record[1];
+      I = new AllocaInst(Type::getInt8Ty(Context), Size, (1 << Align) >> 1);
+      break;
+    }
+    case naclbitc::FUNC_CODE_INST_LOAD: {
+      // LOAD: [op, align, ty]
+      unsigned OpNum = 0;
+      Value *Op;
+      if (popValue(Record, &OpNum, NextValueNo, &Op) ||
+          Record.size() != 3)
+        return Error("Invalid LOAD record");
+
+      // Add pointer cast to op.
+      Type *T = getTypeByID(Record[2]);
+      if (T == 0)
+        return Error("Invalid type for load instruction");
+      Op = ConvertOpToType(Op, T->getPointerTo(), CurBBNo);
+      if (Op == 0) return true;
+      I = new LoadInst(Op, "", false, (1 << Record[OpNum]) >> 1);
+      break;
+    }
+    case naclbitc::FUNC_CODE_INST_STORE: {
+      // STORE: [ptr, val, align]
+      unsigned OpNum = 0;
+      Value *Val, *Ptr;
+      if (popValue(Record, &OpNum, NextValueNo, &Ptr) ||
+          popValue(Record, &OpNum, NextValueNo, &Val))
+        return Error("Invalid STORE record");
+      if (OpNum+1 != Record.size())
+        return Error("Invalid STORE record");
+      Val = ConvertOpToScalar(Val, CurBBNo);
+      Ptr = ConvertOpToType(Ptr, Val->getType()->getPointerTo(), CurBBNo);
+      I = new StoreInst(Val, Ptr, false, (1 << Record[OpNum]) >> 1);
+      break;
+    }
+    case naclbitc::FUNC_CODE_INST_CALL:
+    case naclbitc::FUNC_CODE_INST_CALL_INDIRECT: {
+      // CALL: [cc, fnid, arg0, arg1...]
+      // CALL_INDIRECT: [cc, fnid, fnty, args...]
+      if ((Record.size() < 2) ||
+          (BitCode == naclbitc::FUNC_CODE_INST_CALL_INDIRECT &&
+           Record.size() < 3))
+        return Error("Invalid CALL record");
+
+      unsigned CCInfo = Record[0];
+
+      unsigned OpNum = 1;
+      Value *Callee;
+      if (popValue(Record, &OpNum, NextValueNo, &Callee))
+        return Error("Invalid CALL record");
+
+      // Build function type for call.
+      FunctionType *FTy = 0;
+      Type *ReturnType = 0;
+      if (BitCode == naclbitc::FUNC_CODE_INST_CALL_INDIRECT) {
+        // Callee type has been elided, add back in.
+        ReturnType = getTypeByID(Record[2]);
+        ++OpNum;
+      } else {
+        // Get type signature from callee.
+        if (PointerType *OpTy = dyn_cast<PointerType>(Callee->getType())) {
+          FTy = dyn_cast<FunctionType>(OpTy->getElementType());
+        }
+        if (FTy == 0)
+          return Error("Invalid type for CALL record");
+      }
+
+      unsigned NumParams = Record.size() - OpNum;
+      if (FTy && NumParams != FTy->getNumParams())
+        return Error("Invalid CALL record");
+
+      // Process call arguments.
+      SmallVector<Value*, 6> Args;
+      for (unsigned Index = 0; Index < NumParams; ++Index) {
+        Value *Arg;
+        if (popValue(Record, &OpNum, NextValueNo, &Arg))
+          Error("Invalid argument in CALL record");
+        if (FTy) {
+          // Add a cast, to a pointer type if necessary, in case this
+          // is an intrinsic call that takes a pointer argument.
+          Arg = ConvertOpToType(Arg, FTy->getParamType(Index), CurBBNo);
+        } else {
+          Arg = ConvertOpToScalar(Arg, CurBBNo);
+        }
+        Args.push_back(Arg);
+      }
+
+      if (FTy == 0) {
+        // Reconstruct the function type and cast the function pointer
+        // to it.
+        SmallVector<Type*, 6> ArgTypes;
+        for (unsigned Index = 0; Index < NumParams; ++Index)
+          ArgTypes.push_back(Args[Index]->getType());
+        FTy = FunctionType::get(ReturnType, ArgTypes, false);
+        Callee = ConvertOpToType(Callee, FTy->getPointerTo(), CurBBNo);
+      }
+
+      // Construct call.
+      I = CallInst::Create(Callee, Args);
+      cast<CallInst>(I)->setCallingConv(GetDecodedCallingConv(CCInfo>>1));
+      cast<CallInst>(I)->setTailCall(CCInfo & 1);
+      break;
+    }
+    case naclbitc::FUNC_CODE_INST_FORWARDTYPEREF:
+      // Build corresponding forward reference.
+      if (Record.size() != 2 ||
+          ValueList.createValueFwdRef(Record[0], getTypeByID(Record[1])))
+        return Error("Invalid FORWARDTYPEREF record");
+      continue;
+    }
+
+    if (InstallInstruction(CurBB, I))
+      return true;
+
+    // If this was a terminator instruction, move to the next block.
+    if (isa<TerminatorInst>(I)) {
+      ++CurBBNo;
+      CurBB = getBasicBlock(CurBBNo);
+    }
+
+    // Non-void values get registered in the value table for future use.
+    if (I && !I->getType()->isVoidTy()) {
+      Value *NewVal = I;
+      if (NewVal->getType()->isPointerTy() &&
+          ValueList.getValueFwdRef(NextValueNo)) {
+        // Forward-referenced values cannot have pointer type.
+        NewVal = ConvertOpToScalar(NewVal, CurBBNo);
+      }
+      ValueList.AssignValue(NewVal, NextValueNo++);
+    }
+  }
+
+OutOfRecordLoop:
+
+  // Add PHI conversions to corresponding incoming block, if not
+  // already in the block. Also clear all conversions after fixing
+  // PHI conversions.
+  for (unsigned I = 0, NumBBs = FunctionBBs.size(); I < NumBBs; ++I) {
+    BasicBlockInfo &BBInfo = FunctionBBs[I];
+    std::vector<CastInst*> &PhiCasts = BBInfo.PhiCasts;
+    for (std::vector<CastInst*>::iterator Iter = PhiCasts.begin(),
+           IterEnd = PhiCasts.end(); Iter != IterEnd; ++Iter) {
+      CastInst *Cast = *Iter;
+      if (Cast->getParent() == 0) {
+        BasicBlock *BB = BBInfo.BB;
+        BB->getInstList().insert(BB->getTerminator(), Cast);
+      }
+    }
+    PhiCasts.clear();
+    BBInfo.CastMap.clear();
+  }
+
+  // Check the function list for unresolved values.
+  if (Argument *A = dyn_cast<Argument>(ValueList.back())) {
+    if (A->getParent() == 0) {
+      // We found at least one unresolved value.  Nuke them all to avoid leaks.
+      for (unsigned i = ModuleValueListSize, e = ValueList.size(); i != e; ++i){
+        if ((A = dyn_cast<Argument>(ValueList[i])) && A->getParent() == 0) {
+          A->replaceAllUsesWith(UndefValue::get(A->getType()));
+          delete A;
+        }
+      }
+      return Error("Never resolved value found in function!");
+    }
+  }
+
+  // Trim the value list down to the size it was before we parsed this function.
+  ValueList.shrinkTo(ModuleValueListSize);
+  FunctionBBs.clear();
+  DEBUG(dbgs() << "-> ParseFunctionBody\n");
+  return false;
+}
+
+/// FindFunctionInStream - Find the function body in the bitcode stream
+bool NaClBitcodeReader::FindFunctionInStream(Function *F,
+       DenseMap<Function*, uint64_t>::iterator DeferredFunctionInfoIterator) {
+  while (DeferredFunctionInfoIterator->second == 0) {
+    if (Stream.AtEndOfStream())
+      return Error("Could not find Function in stream");
+    // ParseModule will parse the next body in the stream and set its
+    // position in the DeferredFunctionInfo map.
+    if (ParseModule(true)) return true;
+  }
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// GVMaterializer implementation
+//===----------------------------------------------------------------------===//
+
+
+bool NaClBitcodeReader::isMaterializable(const GlobalValue *GV) const {
+  if (const Function *F = dyn_cast<Function>(GV)) {
+    return F->isDeclaration() &&
+      DeferredFunctionInfo.count(const_cast<Function*>(F));
+  }
+  return false;
+}
+
+error_code NaClBitcodeReader::Materialize(GlobalValue *GV) {
+  Function *F = dyn_cast<Function>(GV);
+  // If it's not a function or is already material, ignore the request.
+  if (!F || !F->isMaterializable()) 
+    return error_code::success();
+
+  DenseMap<Function*, uint64_t>::iterator DFII = DeferredFunctionInfo.find(F);
+  assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!");
+  // If its position is recorded as 0, its body is somewhere in the stream
+  // but we haven't seen it yet.
+  if (DFII->second == 0)
+    if (LazyStreamer && FindFunctionInStream(F, DFII)) 
+      return make_error_code(errc::no_stream_resources);
+
+  // Move the bit stream to the saved position of the deferred function body.
+  Stream.JumpToBit(DFII->second);
+
+  if (ParseFunctionBody(F)) {
+    return make_error_code(errc::protocol_error);
+  }
+
+  // Upgrade any old intrinsic calls in the function.
+  for (UpgradedIntrinsicMap::iterator I = UpgradedIntrinsics.begin(),
+       E = UpgradedIntrinsics.end(); I != E; ++I) {
+    if (I->first != I->second) {
+      for (Value::use_iterator UI = I->first->use_begin(),
+           UE = I->first->use_end(); UI != UE; ) {
+        if (CallInst* CI = dyn_cast<CallInst>(*UI++))
+          UpgradeIntrinsicCall(CI, I->second);
+      }
+    }
+  }
+
+  return error_code::success();
+}
+
+bool NaClBitcodeReader::isDematerializable(const GlobalValue *GV) const {
+  const Function *F = dyn_cast<Function>(GV);
+  if (!F || F->isDeclaration())
+    return false;
+  return DeferredFunctionInfo.count(const_cast<Function*>(F));
+}
+
+void NaClBitcodeReader::Dematerialize(GlobalValue *GV) {
+  Function *F = dyn_cast<Function>(GV);
+  // If this function isn't dematerializable, this is a noop.
+  if (!F || !isDematerializable(F))
+    return;
+
+  assert(DeferredFunctionInfo.count(F) && "No info to read function later?");
+
+  // Just forget the function body, we can remat it later.
+  F->deleteBody();
+}
+
+
+error_code NaClBitcodeReader::MaterializeModule(Module *M) {
+  assert(M == TheModule &&
+         "Can only Materialize the Module this NaClBitcodeReader is attached to.");
+  // Iterate over the module, deserializing any functions that are still on
+  // disk.
+  for (Module::iterator F = TheModule->begin(), E = TheModule->end();
+       F != E; ++F)
+    if (F->isMaterializable() &&
+        Materialize(F))
+      return make_error_code(errc::protocol_error);
+
+  // At this point, if there are any function bodies, the current bit is
+  // pointing to the END_BLOCK record after them. Now make sure the rest
+  // of the bits in the module have been read.
+  if (NextUnreadBit)
+    ParseModule(true);
+
+  // Upgrade any intrinsic calls that slipped through (should not happen!) and
+  // delete the old functions to clean up. We can't do this unless the entire
+  // module is materialized because there could always be another function body
+  // with calls to the old function.
+  for (std::vector<std::pair<Function*, Function*> >::iterator I =
+       UpgradedIntrinsics.begin(), E = UpgradedIntrinsics.end(); I != E; ++I) {
+    if (I->first != I->second) {
+      for (Value::use_iterator UI = I->first->use_begin(),
+           UE = I->first->use_end(); UI != UE; ) {
+        if (CallInst* CI = dyn_cast<CallInst>(*UI++))
+          UpgradeIntrinsicCall(CI, I->second);
+      }
+      if (!I->first->use_empty())
+        I->first->replaceAllUsesWith(I->second);
+      I->first->eraseFromParent();
+    }
+  }
+  std::vector<std::pair<Function*, Function*> >().swap(UpgradedIntrinsics);
+
+  return error_code::success();
+}
+
+bool NaClBitcodeReader::InitStream() {
+  if (LazyStreamer) return InitLazyStream();
+  return InitStreamFromBuffer();
+}
+
+bool NaClBitcodeReader::InitStreamFromBuffer() {
+  const unsigned char *BufPtr = (const unsigned char*)Buffer->getBufferStart();
+  const unsigned char *BufEnd = BufPtr+Buffer->getBufferSize();
+
+  if (Buffer->getBufferSize() & 3)
+    return Error("Bitcode stream should be a multiple of 4 bytes in length");
+
+  if (Header.Read(BufPtr, BufEnd))
+    return Error(Header.Unsupported());
+
+  StreamFile.reset(new NaClBitstreamReader(BufPtr, BufEnd));
+  Stream.init(*StreamFile);
+
+  if (AcceptHeader())
+    return Error(Header.Unsupported());
+  return false;
+}
+
+bool NaClBitcodeReader::InitLazyStream() {
+  StreamingMemoryObject *Bytes = new StreamingMemoryObject(LazyStreamer);
+  if (Header.Read(Bytes))
+    return Error(Header.Unsupported());
+
+  StreamFile.reset(new NaClBitstreamReader(Bytes, Header.getHeaderSize()));
+  Stream.init(*StreamFile);
+  if (AcceptHeader())
+    return Error(Header.Unsupported());
+  return false;
+}
+
+//===----------------------------------------------------------------------===//
+// External interface
+//===----------------------------------------------------------------------===//
+
+/// getNaClLazyBitcodeModule - lazy function-at-a-time loading from a file.
+///
+Module *llvm::getNaClLazyBitcodeModule(MemoryBuffer *Buffer,
+                                       LLVMContext& Context,
+                                       std::string *ErrMsg,
+                                       bool AcceptSupportedOnly) {
+  Module *M = new Module(Buffer->getBufferIdentifier(), Context);
+  NaClBitcodeReader *R =
+      new NaClBitcodeReader(Buffer, Context, AcceptSupportedOnly);
+  M->setMaterializer(R);
+  if (R->ParseBitcodeInto(M)) {
+    if (ErrMsg)
+      *ErrMsg = R->getErrorString();
+
+    delete M;  // Also deletes R.
+    return 0;
+  }
+  // Have the NaClBitcodeReader dtor delete 'Buffer'.
+  R->setBufferOwned(true);
+
+  return M;
+}
+
+
+Module *llvm::getNaClStreamedBitcodeModule(const std::string &name,
+                                           DataStreamer *streamer,
+                                           LLVMContext &Context,
+                                           std::string *ErrMsg,
+                                           bool AcceptSupportedOnly) {
+  Module *M = new Module(name, Context);
+  NaClBitcodeReader *R =
+      new NaClBitcodeReader(streamer, Context, AcceptSupportedOnly);
+  M->setMaterializer(R);
+  if (R->ParseBitcodeInto(M)) {
+    if (ErrMsg)
+      *ErrMsg = R->getErrorString();
+    delete M;  // Also deletes R.
+    return 0;
+  }
+  R->setBufferOwned(false); // no buffer to delete
+
+  return M;
+}
+
+/// NaClParseBitcodeFile - Read the specified bitcode file, returning the module.
+/// If an error occurs, return null and fill in *ErrMsg if non-null.
+Module *llvm::NaClParseBitcodeFile(MemoryBuffer *Buffer, LLVMContext& Context,
+                                   std::string *ErrMsg,
+                                   bool AcceptSupportedOnly){
+  Module *M = getNaClLazyBitcodeModule(Buffer, Context, ErrMsg,
+                                       AcceptSupportedOnly);
+  if (!M) return 0;
+
+  // Don't let the NaClBitcodeReader dtor delete 'Buffer', regardless of whether
+  // there was an error.
+  static_cast<NaClBitcodeReader*>(M->getMaterializer())->setBufferOwned(false);
+
+  // Read in the entire module, and destroy the NaClBitcodeReader.
+  error_code result = M->materializeAll();
+  if (result) {
+    delete M;
+    if(ErrMsg)
+      *ErrMsg = result.message();
+    return 0;
+  }
+
+  // TODO: Restore the use-lists to the in-memory state when the bitcode was
+  // written.  We must defer until the Module has been fully materialized.
+
+  return M;
+}
diff --git a/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.h b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.h
new file mode 100644
index 000000000000..c6e83e9b54db
--- /dev/null
+++ b/lib/Bitcode/NaCl/Reader/NaClBitcodeReader.h
@@ -0,0 +1,343 @@
+//===- NaClBitcodeReader.h ------------------------------------*- C++ -*-===//
+//     Internal NaClBitcodeReader implementation
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This header defines the NaClBitcodeReader class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NACL_BITCODE_READER_H
+#define NACL_BITCODE_READER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h"
+#include "llvm/Bitcode/NaCl/NaClBitstreamReader.h"
+#include "llvm/Bitcode/NaCl/NaClLLVMBitCodes.h"
+#include "llvm/GVMaterializer.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/OperandTraits.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/ValueHandle.h"
+#include <vector>
+
+namespace llvm {
+  class MemoryBuffer;
+  class LLVMContext;
+  class CastInst;
+
+// Models a Cast.  Used to cache casts created in a basic block by the
+// PNaCl bitcode reader.
+struct NaClBitcodeReaderCast {
+  // Fields of the conversion.
+  Instruction::CastOps Op;
+  Type *Ty;
+  Value *Val;
+
+  NaClBitcodeReaderCast(Instruction::CastOps Op, Type *Ty, Value *Val)
+    : Op(Op), Ty(Ty), Val(Val) {}
+};
+
+// Models the data structure used to hash/compare Casts in a DenseMap.
+template<>
+struct DenseMapInfo<NaClBitcodeReaderCast> {
+public:
+  static NaClBitcodeReaderCast getEmptyKey() {
+    return NaClBitcodeReaderCast(Instruction::CastOpsEnd,
+                                 DenseMapInfo<Type*>::getEmptyKey(),
+                                 DenseMapInfo<Value*>::getEmptyKey());
+  }
+  static NaClBitcodeReaderCast getTombstoneKey() {
+    return NaClBitcodeReaderCast(Instruction::CastOpsEnd,
+                                 DenseMapInfo<Type*>::getTombstoneKey(),
+                                 DenseMapInfo<Value*>::getTombstoneKey());
+  }
+  static unsigned getHashValue(const NaClBitcodeReaderCast &C) {
+    std::pair<int, std::pair<Type*, Value*> > Tuple;
+    Tuple.first = C.Op;
+    Tuple.second.first = C.Ty;
+    Tuple.second.second = C.Val;
+    return DenseMapInfo<std::pair<int, std::pair<Type*, Value*> > >::getHashValue(Tuple);
+  }
+  static bool isEqual(const NaClBitcodeReaderCast &LHS,
+                      const NaClBitcodeReaderCast &RHS) {
+    return LHS.Op == RHS.Op && LHS.Ty == RHS.Ty && LHS.Val == RHS.Val;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+//                          NaClBitcodeReaderValueList Class
+//===----------------------------------------------------------------------===//
+
+class NaClBitcodeReaderValueList {
+  std::vector<WeakVH> ValuePtrs;
+  LLVMContext &Context;
+public:
+  NaClBitcodeReaderValueList(LLVMContext &C) : Context(C) {}
+  ~NaClBitcodeReaderValueList() {}
+
+  // vector compatibility methods
+  unsigned size() const { return ValuePtrs.size(); }
+  void resize(unsigned N) { ValuePtrs.resize(N); }
+  void push_back(Value *V) {
+    ValuePtrs.push_back(V);
+  }
+
+  void clear() {
+    ValuePtrs.clear();
+  }
+
+  Value *operator[](unsigned i) const {
+    assert(i < ValuePtrs.size());
+    return ValuePtrs[i];
+  }
+
+  Value *back() const { return ValuePtrs.back(); }
+    void pop_back() { ValuePtrs.pop_back(); }
+  bool empty() const { return ValuePtrs.empty(); }
+  void shrinkTo(unsigned N) {
+    assert(N <= size() && "Invalid shrinkTo request!");
+    ValuePtrs.resize(N);
+  }
+
+  // Declares the type of the forward-referenced value Idx.  Returns
+  // true if an error occurred.  It is an error if Idx's type has
+  // already been declared.
+  bool createValueFwdRef(unsigned Idx, Type *Ty);
+
+  // Gets the forward reference value for Idx.
+  Value *getValueFwdRef(unsigned Idx);
+
+  // Gets the corresponding constant defining the address of the
+  // corresponding global variable defined by Idx, if already defined.
+  // Otherwise, creates a forward reference for Idx, and returns the
+  // placeholder constant for the address of the corresponding global
+  // variable defined by Idx.
+  Constant *getOrCreateGlobalVarRef(unsigned Idx, Module* M);
+
+  // Assigns Idx to the given value (if new), or assigns V to Idx (if Idx
+  // was forward referenced).
+  void AssignValue(Value *V, unsigned Idx);
+
+  // Assigns Idx to the given global variable.  If the Idx currently has
+  // a forward reference (built by createGlobalVarFwdRef(unsigned Idx)),
+  // replaces uses of the global variable forward reference with the
+  // value GV.
+  void AssignGlobalVar(GlobalVariable *GV, unsigned Idx);
+
+  // Assigns Idx to the given value, overwriting the existing entry
+  // and possibly modifying the type of the entry.
+  void OverwriteValue(Value *V, unsigned Idx);
+};
+
+
+class NaClBitcodeReader : public GVMaterializer {
+  NaClBitcodeHeader Header;  // Header fields of the PNaCl bitcode file.
+  LLVMContext &Context;
+  Module *TheModule;
+  MemoryBuffer *Buffer;
+  bool BufferOwned;
+  OwningPtr<NaClBitstreamReader> StreamFile;
+  NaClBitstreamCursor Stream;
+  DataStreamer *LazyStreamer;
+  uint64_t NextUnreadBit;
+  bool SeenValueSymbolTable;
+
+  std::string ErrorString;
+
+  std::vector<Type*> TypeList;
+  NaClBitcodeReaderValueList ValueList;
+
+  // Holds information about each BasicBlock in the function being read.
+  struct BasicBlockInfo {
+    // A basic block within the function being modeled.
+    BasicBlock *BB;
+    // The set of generated conversions.
+    DenseMap<NaClBitcodeReaderCast, CastInst*> CastMap;
+    // The set of generated conversions that were added for phi nodes,
+    // and may need thier parent basic block defined.
+    std::vector<CastInst*> PhiCasts;
+  };
+
+  /// FunctionBBs - While parsing a function body, this is a list of the basic
+  /// blocks for the function.
+  std::vector<BasicBlockInfo> FunctionBBs;
+
+  // When reading the module header, this list is populated with functions that
+  // have bodies later in the file.
+  std::vector<Function*> FunctionsWithBodies;
+
+  // When intrinsic functions are encountered which require upgrading they are
+  // stored here with their replacement function.
+  typedef std::vector<std::pair<Function*, Function*> > UpgradedIntrinsicMap;
+  UpgradedIntrinsicMap UpgradedIntrinsics;
+
+  // Several operations happen after the module header has been read, but
+  // before function bodies are processed.  This keeps track of whether
+  // we've done this yet.
+  bool SeenFirstFunctionBody;
+
+  /// DeferredFunctionInfo - When function bodies are initially scanned, this
+  /// map contains info about where to find deferred function body in the
+  /// stream.
+  DenseMap<Function*, uint64_t> DeferredFunctionInfo;
+
+  /// \brief True if we should only accept supported bitcode format.
+  bool AcceptSupportedBitcodeOnly;
+
+  /// \brief Integer type use for PNaCl conversion of pointers.
+  Type *IntPtrType;
+
+public:
+  explicit NaClBitcodeReader(MemoryBuffer *buffer, LLVMContext &C,
+                             bool AcceptSupportedOnly = true)
+    : Context(C), TheModule(0), Buffer(buffer), BufferOwned(false),
+      LazyStreamer(0), NextUnreadBit(0), SeenValueSymbolTable(false),
+      ValueList(C),
+      SeenFirstFunctionBody(false),
+      AcceptSupportedBitcodeOnly(AcceptSupportedOnly),
+      IntPtrType(IntegerType::get(C, PNaClIntPtrTypeBitSize)) {
+  }
+  explicit NaClBitcodeReader(DataStreamer *streamer, LLVMContext &C,
+                             bool AcceptSupportedOnly = true)
+    : Context(C), TheModule(0), Buffer(0), BufferOwned(false),
+      LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false),
+      ValueList(C),
+      SeenFirstFunctionBody(false),
+      AcceptSupportedBitcodeOnly(AcceptSupportedOnly),
+      IntPtrType(IntegerType::get(C, PNaClIntPtrTypeBitSize)) {
+  }
+  ~NaClBitcodeReader() {
+    FreeState();
+  }
+
+  void FreeState();
+
+  /// setBufferOwned - If this is true, the reader will destroy the MemoryBuffer
+  /// when the reader is destroyed.
+  void setBufferOwned(bool Owned) { BufferOwned = Owned; }
+
+  virtual bool isMaterializable(const GlobalValue *GV) const;
+  virtual bool isDematerializable(const GlobalValue *GV) const;
+  virtual error_code Materialize(GlobalValue *GV);
+  virtual error_code MaterializeModule(Module *M);
+  virtual void Dematerialize(GlobalValue *GV);
+
+  bool Error(const std::string &Str) {
+    ErrorString = Str;
+    return true;
+  }
+  const std::string &getErrorString() const { return ErrorString; }
+
+  /// @brief Main interface to parsing a bitcode buffer.
+  /// @returns true if an error occurred.
+  bool ParseBitcodeInto(Module *M);
+
+private:
+  // Returns false if Header is acceptable.
+  bool AcceptHeader() const {
+    return !(Header.IsSupported() ||
+             (!AcceptSupportedBitcodeOnly && Header.IsReadable()));
+  }
+  uint32_t GetPNaClVersion() const {
+    return Header.GetPNaClVersion();
+  }
+  Type *getTypeByID(unsigned ID);
+  // Returns the value associated with ID.  The value must already exist,
+  // or a forward referenced value created by getOrCreateFnVaueByID.
+  Value *getFnValueByID(unsigned ID) {
+    return ValueList.getValueFwdRef(ID);
+  }
+  BasicBlock *getBasicBlock(unsigned ID) const {
+    if (ID >= FunctionBBs.size()) return 0; // Invalid ID
+    return FunctionBBs[ID].BB;
+  }
+
+  /// \brief Read a value out of the specified record from slot '*Slot'.
+  /// Increment *Slot past the number of slots used by the value in the record.
+  /// Return true if there is an error.
+  bool popValue(const SmallVector<uint64_t, 64> &Record, unsigned *Slot,
+                unsigned InstNum, Value **ResVal) {
+    if (*Slot == Record.size()) return true;
+    // ValNo is encoded relative to the InstNum.
+    unsigned ValNo = InstNum - (unsigned)Record[(*Slot)++];
+    *ResVal = getFnValueByID(ValNo);
+    return *ResVal == 0;
+  }
+
+  /// getValue -- Version of getValue that returns ResVal directly,
+  /// or 0 if there is an error.
+  Value *getValue(const SmallVector<uint64_t, 64> &Record, unsigned Slot,
+                  unsigned InstNum) {
+    if (Slot == Record.size()) return 0;
+    // ValNo is encoded relative to the InstNum.
+    unsigned ValNo = InstNum - (unsigned)Record[Slot];
+    return getFnValueByID(ValNo);
+  }
+
+  /// getValueSigned -- Like getValue, but decodes signed VBRs.
+  Value *getValueSigned(const SmallVector<uint64_t, 64> &Record, unsigned Slot,
+                        unsigned InstNum) {
+    if (Slot == Record.size()) return 0;
+    // ValNo is encoded relative to the InstNum.
+    unsigned ValNo = InstNum -
+        (unsigned) NaClDecodeSignRotatedValue(Record[Slot]);
+    return getFnValueByID(ValNo);
+  }
+
+  /// \brief Create an (elided) cast instruction for basic block
+  /// BBIndex.  Op is the type of cast.  V is the value to cast.  CT
+  /// is the type to convert V to.  DeferInsertion defines whether the
+  /// generated conversion should also be installed into basic block
+  /// BBIndex.  Note: For PHI nodes, we don't insert when created
+  /// (i.e. DeferInsertion=true), since they must be inserted at the end
+  /// of the corresponding incoming basic block.
+  CastInst *CreateCast(unsigned BBIndex, Instruction::CastOps Op,
+                       Type *CT, Value *V, bool DeferInsertion = false);
+
+  /// \brief Add instructions to cast Op to the given type T into
+  /// block BBIndex.  Follows rules for pointer conversion as defined
+  /// in llvm/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp.
+  ///
+  /// Returns 0 if unable to generate conversion value (also generates
+  /// an appropriate error message and calls Error).
+  Value *ConvertOpToType(Value *Op, Type *T, unsigned BBIndex);
+
+  /// \brief If Op is a scalar value, this is a nop.  If Op is a
+  /// pointer value, a PtrToInt instruction is inserted (in BBIndex)
+  /// to convert Op to an integer.  For defaults on DeferInsertion,
+  /// see comments for method CreateCast.
+  Value *ConvertOpToScalar(Value *Op, unsigned BBIndex,
+                           bool DeferInsertion = false);
+
+  /// \brief Install instruction I into basic block BB.
+  bool InstallInstruction(BasicBlock *BB, Instruction *I);
+
+  FunctionType *AddPointerTypesToIntrinsicType(StringRef Name,
+                                               FunctionType *FTy);
+  void AddPointerTypesToIntrinsicParams();
+  bool ParseModule(bool Resume);
+  bool ParseTypeTable();
+  bool ParseTypeTableBody();
+  bool ParseGlobalVars();
+  bool ParseValueSymbolTable();
+  bool ParseConstants();
+  bool RememberAndSkipFunctionBody();
+  bool ParseFunctionBody(Function *F);
+  bool GlobalCleanup();
+  bool InitStream();
+  bool InitStreamFromBuffer();
+  bool InitLazyStream();
+  bool FindFunctionInStream(Function *F,
+         DenseMap<Function*, uint64_t>::iterator DeferredFunctionInfoIterator);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/Bitcode/NaCl/Reader/NaClBitstreamReader.cpp b/lib/Bitcode/NaCl/Reader/NaClBitstreamReader.cpp
new file mode 100644
index 000000000000..6de98c490dc1
--- /dev/null
+++ b/lib/Bitcode/NaCl/Reader/NaClBitstreamReader.cpp
@@ -0,0 +1,306 @@
+//===- NaClBitstreamReader.cpp --------------------------------------------===//
+//     NaClBitstreamReader implementation
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/NaCl/NaClBitstreamReader.h"
+
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+//  NaClBitstreamCursor implementation
+//===----------------------------------------------------------------------===//
+
+void NaClBitstreamCursor::operator=(const NaClBitstreamCursor &RHS) {
+  freeState();
+
+  BitStream = RHS.BitStream;
+  NextChar = RHS.NextChar;
+  CurWord = RHS.CurWord;
+  BitsInCurWord = RHS.BitsInCurWord;
+  CurCodeSize = RHS.CurCodeSize;
+
+  // Copy abbreviations, and bump ref counts.
+  CurAbbrevs = RHS.CurAbbrevs;
+  for (size_t i = 0, e = CurAbbrevs.size(); i != e; ++i)
+    CurAbbrevs[i]->addRef();
+
+  // Copy block scope and bump ref counts.
+  BlockScope = RHS.BlockScope;
+  for (size_t S = 0, e = BlockScope.size(); S != e; ++S) {
+    std::vector<NaClBitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs;
+    for (size_t i = 0, e = Abbrevs.size(); i != e; ++i)
+      Abbrevs[i]->addRef();
+  }
+}
+
+void NaClBitstreamCursor::freeState() {
+  // Free all the Abbrevs.
+  for (size_t i = 0, e = CurAbbrevs.size(); i != e; ++i)
+    CurAbbrevs[i]->dropRef();
+  CurAbbrevs.clear();
+
+  // Free all the Abbrevs in the block scope.
+  for (size_t S = 0, e = BlockScope.size(); S != e; ++S) {
+    std::vector<NaClBitCodeAbbrev*> &Abbrevs = BlockScope[S].PrevAbbrevs;
+    for (size_t i = 0, e = Abbrevs.size(); i != e; ++i)
+      Abbrevs[i]->dropRef();
+  }
+  BlockScope.clear();
+}
+
+/// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter
+/// the block, and return true if the block has an error.
+bool NaClBitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
+  // Save the current block's state on BlockScope.
+  BlockScope.push_back(Block(CurCodeSize));
+  BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
+
+  // Add the abbrevs specific to this block to the CurAbbrevs list.
+  if (const NaClBitstreamReader::BlockInfo *Info =
+      BitStream->getBlockInfo(BlockID)) {
+    for (size_t i = 0, e = Info->Abbrevs.size(); i != e; ++i) {
+      CurAbbrevs.push_back(Info->Abbrevs[i]);
+      CurAbbrevs.back()->addRef();
+    }
+  }
+
+  // Get the codesize of this block.
+  CurCodeSize.IsFixed = true;
+  CurCodeSize.NumBits = ReadVBR(naclbitc::CodeLenWidth);
+  SkipToFourByteBoundary();
+  unsigned NumWords = Read(naclbitc::BlockSizeWidth);
+  if (NumWordsP) *NumWordsP = NumWords;
+
+  // Validate that this block is sane.
+  if (CurCodeSize.NumBits == 0 || AtEndOfStream())
+    return true;
+
+  return false;
+}
+
+void NaClBitstreamCursor::readAbbreviatedLiteral(
+    const NaClBitCodeAbbrevOp &Op,
+    SmallVectorImpl<uint64_t> &Vals) {
+  assert(Op.isLiteral() && "Not a literal");
+  // If the abbrev specifies the literal value to use, use it.
+  Vals.push_back(Op.getLiteralValue());
+}
+
+void NaClBitstreamCursor::readAbbreviatedField(
+    const NaClBitCodeAbbrevOp &Op,
+    SmallVectorImpl<uint64_t> &Vals) {
+  assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!");
+
+  // Decode the value as we are commanded.
+  switch (Op.getEncoding()) {
+  default:
+    report_fatal_error("Should not reach here");
+  case NaClBitCodeAbbrevOp::Fixed:
+    Vals.push_back(Read((unsigned)Op.getEncodingData()));
+    break;
+  case NaClBitCodeAbbrevOp::VBR:
+    Vals.push_back(ReadVBR64((unsigned)Op.getEncodingData()));
+    break;
+  case NaClBitCodeAbbrevOp::Char6:
+    Vals.push_back(NaClBitCodeAbbrevOp::DecodeChar6(Read(6)));
+    break;
+  }
+}
+
+void NaClBitstreamCursor::skipAbbreviatedField(const NaClBitCodeAbbrevOp &Op) {
+  assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!");
+
+  // Decode the value as we are commanded.
+  switch (Op.getEncoding()) {
+  default:
+    report_fatal_error("Should not reach here");
+  case NaClBitCodeAbbrevOp::Fixed:
+    (void)Read((unsigned)Op.getEncodingData());
+    break;
+  case NaClBitCodeAbbrevOp::VBR:
+    (void)ReadVBR64((unsigned)Op.getEncodingData());
+    break;
+  case NaClBitCodeAbbrevOp::Char6:
+    (void)Read(6);
+    break;
+  }
+}
+
+
+
+/// skipRecord - Read the current record and discard it.
+void NaClBitstreamCursor::skipRecord(unsigned AbbrevID) {
+  // Skip unabbreviated records by reading past their entries.
+  if (AbbrevID == naclbitc::UNABBREV_RECORD) {
+    unsigned Code = ReadVBR(6);
+    (void)Code;
+    unsigned NumElts = ReadVBR(6);
+    for (unsigned i = 0; i != NumElts; ++i)
+      (void)ReadVBR64(6);
+    return;
+  }
+
+  const NaClBitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
+
+  for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) {
+    const NaClBitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+    if (Op.isLiteral())
+      continue;
+
+    if (Op.getEncoding() == NaClBitCodeAbbrevOp::Blob)
+      report_fatal_error("Should not reach here");
+
+    if (Op.getEncoding() != NaClBitCodeAbbrevOp::Array) {
+      skipAbbreviatedField(Op);
+      continue;
+    }
+
+    if (Op.getEncoding() == NaClBitCodeAbbrevOp::Array) {
+      // Array case.  Read the number of elements as a vbr6.
+      unsigned NumElts = ReadVBR(6);
+
+      // Get the element encoding.
+      assert(i+2 == e && "array op not second to last?");
+      const NaClBitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
+
+      // Read all the elements.
+      for (; NumElts; --NumElts)
+        skipAbbreviatedField(EltEnc);
+      continue;
+    }
+  }
+}
+
+unsigned NaClBitstreamCursor::readRecord(unsigned AbbrevID,
+                                         SmallVectorImpl<uint64_t> &Vals) {
+  if (AbbrevID == naclbitc::UNABBREV_RECORD) {
+    unsigned Code = ReadVBR(6);
+    unsigned NumElts = ReadVBR(6);
+    for (unsigned i = 0; i != NumElts; ++i)
+      Vals.push_back(ReadVBR64(6));
+    return Code;
+  }
+
+  const NaClBitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
+
+  for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) {
+    const NaClBitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
+    if (Op.isLiteral()) {
+      readAbbreviatedLiteral(Op, Vals);
+      continue;
+    }
+
+    if (Op.getEncoding() == NaClBitCodeAbbrevOp::Blob)
+      report_fatal_error("Should not reach here");
+
+    if (Op.getEncoding() != NaClBitCodeAbbrevOp::Array) {
+      readAbbreviatedField(Op, Vals);
+      continue;
+    }
+
+    if (Op.getEncoding() == NaClBitCodeAbbrevOp::Array) {
+      // Array case.  Read the number of elements as a vbr6.
+      unsigned NumElts = ReadVBR(6);
+
+      // Get the element encoding.
+      assert(i+2 == e && "array op not second to last?");
+      const NaClBitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
+
+      // Read all the elements.
+      for (; NumElts; --NumElts)
+        readAbbreviatedField(EltEnc, Vals);
+      continue;
+    }
+  }
+
+  unsigned Code = (unsigned)Vals[0];
+  Vals.erase(Vals.begin());
+  return Code;
+}
+
+
+void NaClBitstreamCursor::ReadAbbrevRecord() {
+  NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+  unsigned NumOpInfo = ReadVBR(5);
+  for (unsigned i = 0; i != NumOpInfo; ++i) {
+    bool IsLiteral = Read(1) ? true : false;
+    if (IsLiteral) {
+      Abbv->Add(NaClBitCodeAbbrevOp(ReadVBR64(8)));
+      continue;
+    }
+
+    NaClBitCodeAbbrevOp::Encoding E = (NaClBitCodeAbbrevOp::Encoding)Read(3);
+    if (NaClBitCodeAbbrevOp::hasEncodingData(E)) {
+      unsigned Data = ReadVBR64(5);
+
+      // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
+      // and vbr(0) as a literal zero.  This is decoded the same way, and avoids
+      // a slow path in Read() to have to handle reading zero bits.
+      if ((E == NaClBitCodeAbbrevOp::Fixed || E == NaClBitCodeAbbrevOp::VBR) &&
+          Data == 0) {
+        Abbv->Add(NaClBitCodeAbbrevOp(0));
+        continue;
+      }
+      
+      Abbv->Add(NaClBitCodeAbbrevOp(E, Data));
+    } else
+      Abbv->Add(NaClBitCodeAbbrevOp(E));
+  }
+  CurAbbrevs.push_back(Abbv);
+}
+
+bool NaClBitstreamCursor::ReadBlockInfoBlock() {
+  // If this is the second stream to get to the block info block, skip it.
+  if (BitStream->hasBlockInfoRecords())
+    return SkipBlock();
+
+  if (EnterSubBlock(naclbitc::BLOCKINFO_BLOCK_ID)) return true;
+
+  SmallVector<uint64_t, 64> Record;
+  NaClBitstreamReader::BlockInfo *CurBlockInfo = 0;
+
+  // Read all the records for this module.
+  while (1) {
+    NaClBitstreamEntry Entry = advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
+
+    switch (Entry.Kind) {
+    case llvm::NaClBitstreamEntry::SubBlock: // Handled for us already.
+    case llvm::NaClBitstreamEntry::Error:
+      return true;
+    case llvm::NaClBitstreamEntry::EndBlock:
+      return false;
+    case llvm::NaClBitstreamEntry::Record:
+      // The interesting case.
+      break;
+    }
+
+    // Read abbrev records, associate them with CurBID.
+    if (Entry.ID == naclbitc::DEFINE_ABBREV) {
+      if (!CurBlockInfo) return true;
+      ReadAbbrevRecord();
+
+      // ReadAbbrevRecord installs the abbrev in CurAbbrevs.  Move it to the
+      // appropriate BlockInfo.
+      NaClBitCodeAbbrev *Abbv = CurAbbrevs.back();
+      CurAbbrevs.pop_back();
+      CurBlockInfo->Abbrevs.push_back(Abbv);
+      continue;
+    }
+
+    // Read a record.
+    Record.clear();
+    switch (readRecord(Entry.ID, Record)) {
+      default: break;  // Default behavior, ignore unknown content.
+      case naclbitc::BLOCKINFO_CODE_SETBID:
+        if (Record.size() < 1) return true;
+        CurBlockInfo = &BitStream->getOrCreateBlockInfo((unsigned)Record[0]);
+        break;
+    }
+  }
+}
diff --git a/lib/Bitcode/NaCl/Reader/NaClCommonBitcodeRecordDists.cpp b/lib/Bitcode/NaCl/Reader/NaClCommonBitcodeRecordDists.cpp
new file mode 100644
index 000000000000..722a3ff03332
--- /dev/null
+++ b/lib/Bitcode/NaCl/Reader/NaClCommonBitcodeRecordDists.cpp
@@ -0,0 +1,188 @@
+//===-- NaClCommonBitcodeRecordDists.cpp - Bitcode Analyzer ---------------===//
+//      Defines distribution maps for various values in bitcode records.
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/NaCl/NaClCommonBitcodeRecordDists.h"
+#include "llvm/Bitcode/NaCl/NaClLLVMBitCodes.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+
+/// GetCodeName - Return a symbolic code name if known, otherwise return
+/// null.
+static const char *GetCodeName(unsigned CodeID, unsigned BlockID) {
+  // Standard blocks for all bitcode files.
+  if (BlockID < naclbitc::FIRST_APPLICATION_BLOCKID) {
+    if (BlockID == naclbitc::BLOCKINFO_BLOCK_ID) {
+      switch (CodeID) {
+      default: return 0;
+      case naclbitc::BLOCKINFO_CODE_SETBID:        return "SETBID";
+      }
+    }
+    return 0;
+  }
+
+  switch (BlockID) {
+  default: return 0;
+  case naclbitc::MODULE_BLOCK_ID:
+    switch (CodeID) {
+    default: return 0;
+    case naclbitc::MODULE_CODE_VERSION:     return "VERSION";
+    case naclbitc::MODULE_CODE_TRIPLE:      return "TRIPLE";
+    case naclbitc::MODULE_CODE_DATALAYOUT:  return "DATALAYOUT";
+    case naclbitc::MODULE_CODE_ASM:         return "ASM";
+    case naclbitc::MODULE_CODE_SECTIONNAME: return "SECTIONNAME";
+    case naclbitc::MODULE_CODE_DEPLIB:      return "DEPLIB"; // FIXME: Remove in 4.0
+    case naclbitc::MODULE_CODE_GLOBALVAR:   return "GLOBALVAR";
+    case naclbitc::MODULE_CODE_FUNCTION:    return "FUNCTION";
+    case naclbitc::MODULE_CODE_ALIAS:       return "ALIAS";
+    case naclbitc::MODULE_CODE_PURGEVALS:   return "PURGEVALS";
+    case naclbitc::MODULE_CODE_GCNAME:      return "GCNAME";
+    }
+  case naclbitc::PARAMATTR_BLOCK_ID:
+    switch (CodeID) {
+    default: return 0;
+    case naclbitc::PARAMATTR_CODE_ENTRY_OLD: return "ENTRY";
+    case naclbitc::PARAMATTR_CODE_ENTRY:     return "ENTRY";
+    case naclbitc::PARAMATTR_GRP_CODE_ENTRY: return "ENTRY";
+    }
+  case naclbitc::TYPE_BLOCK_ID_NEW:
+    switch (CodeID) {
+    default: return 0;
+    case naclbitc::TYPE_CODE_NUMENTRY:     return "NUMENTRY";
+    case naclbitc::TYPE_CODE_VOID:         return "VOID";
+    case naclbitc::TYPE_CODE_FLOAT:        return "FLOAT";
+    case naclbitc::TYPE_CODE_DOUBLE:       return "DOUBLE";
+    case naclbitc::TYPE_CODE_LABEL:        return "LABEL";
+    case naclbitc::TYPE_CODE_OPAQUE:       return "OPAQUE";
+    case naclbitc::TYPE_CODE_INTEGER:      return "INTEGER";
+    case naclbitc::TYPE_CODE_POINTER:      return "POINTER";
+    case naclbitc::TYPE_CODE_ARRAY:        return "ARRAY";
+    case naclbitc::TYPE_CODE_VECTOR:       return "VECTOR";
+    case naclbitc::TYPE_CODE_X86_FP80:     return "X86_FP80";
+    case naclbitc::TYPE_CODE_FP128:        return "FP128";
+    case naclbitc::TYPE_CODE_PPC_FP128:    return "PPC_FP128";
+    case naclbitc::TYPE_CODE_METADATA:     return "METADATA";
+    case naclbitc::TYPE_CODE_STRUCT_ANON:  return "STRUCT_ANON";
+    case naclbitc::TYPE_CODE_STRUCT_NAME:  return "STRUCT_NAME";
+    case naclbitc::TYPE_CODE_STRUCT_NAMED: return "STRUCT_NAMED";
+    case naclbitc::TYPE_CODE_FUNCTION:     return "FUNCTION";
+    }
+
+  case naclbitc::CONSTANTS_BLOCK_ID:
+    switch (CodeID) {
+    default: return 0;
+    case naclbitc::CST_CODE_SETTYPE:         return "SETTYPE";
+    case naclbitc::CST_CODE_NULL:            return "NULL";
+    case naclbitc::CST_CODE_UNDEF:           return "UNDEF";
+    case naclbitc::CST_CODE_INTEGER:         return "INTEGER";
+    case naclbitc::CST_CODE_WIDE_INTEGER:    return "WIDE_INTEGER";
+    case naclbitc::CST_CODE_FLOAT:           return "FLOAT";
+    case naclbitc::CST_CODE_AGGREGATE:       return "AGGREGATE";
+    case naclbitc::CST_CODE_STRING:          return "STRING";
+    case naclbitc::CST_CODE_CSTRING:         return "CSTRING";
+    case naclbitc::CST_CODE_CE_BINOP:        return "CE_BINOP";
+    case naclbitc::CST_CODE_CE_CAST:         return "CE_CAST";
+    case naclbitc::CST_CODE_CE_GEP:          return "CE_GEP";
+    case naclbitc::CST_CODE_CE_INBOUNDS_GEP: return "CE_INBOUNDS_GEP";
+    case naclbitc::CST_CODE_CE_SELECT:       return "CE_SELECT";
+    case naclbitc::CST_CODE_CE_EXTRACTELT:   return "CE_EXTRACTELT";
+    case naclbitc::CST_CODE_CE_INSERTELT:    return "CE_INSERTELT";
+    case naclbitc::CST_CODE_CE_SHUFFLEVEC:   return "CE_SHUFFLEVEC";
+    case naclbitc::CST_CODE_CE_CMP:          return "CE_CMP";
+    case naclbitc::CST_CODE_INLINEASM:       return "INLINEASM";
+    case naclbitc::CST_CODE_CE_SHUFVEC_EX:   return "CE_SHUFVEC_EX";
+    case naclbitc::CST_CODE_BLOCKADDRESS:    return "CST_CODE_BLOCKADDRESS";
+    case naclbitc::CST_CODE_DATA:            return "DATA";
+    }
+  case naclbitc::FUNCTION_BLOCK_ID:
+    switch (CodeID) {
+    default: return 0;
+    case naclbitc::FUNC_CODE_DECLAREBLOCKS: return "DECLAREBLOCKS";
+
+    case naclbitc::FUNC_CODE_INST_BINOP:        return "INST_BINOP";
+    case naclbitc::FUNC_CODE_INST_CAST:         return "INST_CAST";
+    case naclbitc::FUNC_CODE_INST_GEP:          return "INST_GEP";
+    case naclbitc::FUNC_CODE_INST_INBOUNDS_GEP: return "INST_INBOUNDS_GEP";
+    case naclbitc::FUNC_CODE_INST_SELECT:       return "INST_SELECT";
+    case naclbitc::FUNC_CODE_INST_EXTRACTELT:   return "INST_EXTRACTELT";
+    case naclbitc::FUNC_CODE_INST_INSERTELT:    return "INST_INSERTELT";
+    case naclbitc::FUNC_CODE_INST_SHUFFLEVEC:   return "INST_SHUFFLEVEC";
+    case naclbitc::FUNC_CODE_INST_CMP:          return "INST_CMP";
+
+    case naclbitc::FUNC_CODE_INST_RET:          return "INST_RET";
+    case naclbitc::FUNC_CODE_INST_BR:           return "INST_BR";
+    case naclbitc::FUNC_CODE_INST_SWITCH:       return "INST_SWITCH";
+    case naclbitc::FUNC_CODE_INST_INVOKE:       return "INST_INVOKE";
+    case naclbitc::FUNC_CODE_INST_UNREACHABLE:  return "INST_UNREACHABLE";
+
+    case naclbitc::FUNC_CODE_INST_PHI:          return "INST_PHI";
+    case naclbitc::FUNC_CODE_INST_ALLOCA:       return "INST_ALLOCA";
+    case naclbitc::FUNC_CODE_INST_LOAD:         return "INST_LOAD";
+    case naclbitc::FUNC_CODE_INST_VAARG:        return "INST_VAARG";
+    case naclbitc::FUNC_CODE_INST_STORE:        return "INST_STORE";
+    case naclbitc::FUNC_CODE_INST_EXTRACTVAL:   return "INST_EXTRACTVAL";
+    case naclbitc::FUNC_CODE_INST_INSERTVAL:    return "INST_INSERTVAL";
+    case naclbitc::FUNC_CODE_INST_CMP2:         return "INST_CMP2";
+    case naclbitc::FUNC_CODE_INST_VSELECT:      return "INST_VSELECT";
+    case naclbitc::FUNC_CODE_DEBUG_LOC_AGAIN:   return "DEBUG_LOC_AGAIN";
+    case naclbitc::FUNC_CODE_INST_CALL:         return "INST_CALL";
+    case naclbitc::FUNC_CODE_INST_CALL_INDIRECT: return "INST_CALL_INDIRECT";
+    case naclbitc::FUNC_CODE_DEBUG_LOC:         return "DEBUG_LOC";
+    case naclbitc::FUNC_CODE_INST_FORWARDTYPEREF: return "FORWARDTYPEREF";
+    }
+  case naclbitc::VALUE_SYMTAB_BLOCK_ID:
+    switch (CodeID) {
+    default: return 0;
+    case naclbitc::VST_CODE_ENTRY: return "ENTRY";
+    case naclbitc::VST_CODE_BBENTRY: return "BBENTRY";
+    }
+  case naclbitc::METADATA_ATTACHMENT_ID:
+    switch(CodeID) {
+    default:return 0;
+    case naclbitc::METADATA_ATTACHMENT: return "METADATA_ATTACHMENT";
+    }
+  case naclbitc::METADATA_BLOCK_ID:
+    switch(CodeID) {
+    default:return 0;
+    case naclbitc::METADATA_STRING:      return "METADATA_STRING";
+    case naclbitc::METADATA_NAME:        return "METADATA_NAME";
+    case naclbitc::METADATA_KIND:        return "METADATA_KIND";
+    case naclbitc::METADATA_NODE:        return "METADATA_NODE";
+    case naclbitc::METADATA_FN_NODE:     return "METADATA_FN_NODE";
+    case naclbitc::METADATA_NAMED_NODE:  return "METADATA_NAMED_NODE";
+    }
+  case naclbitc::GLOBALVAR_BLOCK_ID:
+    switch (CodeID) {
+    default: return 0;
+    case naclbitc::GLOBALVAR_VAR:        return "VAR";
+    case naclbitc::GLOBALVAR_COMPOUND:   return "COMPOUND";
+    case naclbitc::GLOBALVAR_ZEROFILL:   return "ZEROFILL";
+    case naclbitc::GLOBALVAR_DATA:       return "DATA";
+    case naclbitc::GLOBALVAR_RELOC:      return "RELOC";
+    case naclbitc::GLOBALVAR_COUNT:      return "COUNT";
+    }
+  }
+}
+
+bool NaClBitcodeRecordCodeDist::HasKnownCodeName(unsigned CodeID,
+                                                 unsigned BlockID) {
+  return ::GetCodeName(CodeID, BlockID) != 0;
+}
+
+std::string NaClBitcodeRecordCodeDist::GetCodeName(unsigned CodeID,
+                                                   unsigned BlockID) {
+  if (const char *CodeName = ::GetCodeName(CodeID, BlockID))
+    return CodeName;
+
+  std::string Str;
+  raw_string_ostream StrStrm(Str);
+  StrStrm << "UnknownCode" << CodeID;
+  return StrStrm.str();
+}
diff --git a/lib/Bitcode/NaCl/Writer/CMakeLists.txt b/lib/Bitcode/NaCl/Writer/CMakeLists.txt
new file mode 100644
index 000000000000..f5718fdb888c
--- /dev/null
+++ b/lib/Bitcode/NaCl/Writer/CMakeLists.txt
@@ -0,0 +1,5 @@
+add_llvm_library(LLVMNaClBitWriter
+  NaClBitcodeWriter.cpp
+  NaClValueEnumerator.cpp
+  )
+add_dependencies(LLVMNaClBitWriter intinsics_gen)
diff --git a/lib/Bitcode/NaCl/Writer/LLVMBuild.txt b/lib/Bitcode/NaCl/Writer/LLVMBuild.txt
new file mode 100644
index 000000000000..4bcdfdc87e57
--- /dev/null
+++ b/lib/Bitcode/NaCl/Writer/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./lib/Bitcode/NaCl/Writer/LLVMBuild.txt -----------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = NaClBitWriter
+parent = NaClBitcode
+required_libraries = Core Support NaClBitReader
diff --git a/lib/Bitcode/NaCl/Writer/Makefile b/lib/Bitcode/NaCl/Writer/Makefile
new file mode 100644
index 000000000000..60da2d1b7162
--- /dev/null
+++ b/lib/Bitcode/NaCl/Writer/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Bitcode/NaCl/Writer/Makefile --------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMNaClBitWriter
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp b/lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp
new file mode 100644
index 000000000000..984aa3f59577
--- /dev/null
+++ b/lib/Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp
@@ -0,0 +1,1246 @@
+//===--- Bitcode/NaCl/Writer/NaClBitcodeWriter.cpp - Bitcode Writer -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Bitcode writer implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "NaClBitcodeWriter"
+
+#include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h"
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
+#include "NaClValueEnumerator.h"
+#include "llvm/Bitcode/NaCl/NaClBitstreamWriter.h"
+#include "llvm/Bitcode/NaCl/NaClLLVMBitCodes.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Program.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cctype>
+#include <map>
+using namespace llvm;
+
+static cl::opt<unsigned>
+PNaClVersion("pnacl-version",
+             cl::desc("Specify PNaCl bitcode version to write"),
+             cl::init(2));
+
+/// These are manifest constants used by the bitcode writer. They do
+/// not need to be kept in sync with the reader, but need to be
+/// consistent within this file.
+///
+/// Note that for each block type GROUP, the last entry should be of
+/// the form:
+///
+///    GROUP_MAX_ABBREV = GROUP_LAST_ABBREV,
+///
+/// where GROUP_LAST_ABBREV is the last defined abbreviation. See
+/// include file "llvm/Bitcode/NaCl/NaClBitCodes.h" for more
+/// information on how groups should be defined.
+enum {
+  // VALUE_SYMTAB_BLOCK abbrev id's.
+  VST_ENTRY_8_ABBREV = naclbitc::FIRST_APPLICATION_ABBREV,
+  VST_ENTRY_7_ABBREV,
+  VST_ENTRY_6_ABBREV,
+  VST_BBENTRY_6_ABBREV,
+  VST_MAX_ABBREV = VST_BBENTRY_6_ABBREV,
+
+  // CONSTANTS_BLOCK abbrev id's.
+  CONSTANTS_SETTYPE_ABBREV = naclbitc::FIRST_APPLICATION_ABBREV,
+  CONSTANTS_INTEGER_ABBREV,
+  CONSTANTS_INTEGER_ZERO_ABBREV,
+  CONSTANTS_FLOAT_ABBREV,
+  CONSTANTS_MAX_ABBREV = CONSTANTS_FLOAT_ABBREV,
+
+  // GLOBALVAR BLOCK abbrev id's.
+  GLOBALVAR_VAR_ABBREV = naclbitc::FIRST_APPLICATION_ABBREV,
+  GLOBALVAR_COMPOUND_ABBREV,
+  GLOBALVAR_ZEROFILL_ABBREV,
+  GLOBALVAR_DATA_ABBREV,
+  GLOBALVAR_RELOC_ABBREV,
+  GLOBALVAR_RELOC_WITH_ADDEND_ABBREV,
+  GLOBALVAR_MAX_ABBREV = GLOBALVAR_RELOC_WITH_ADDEND_ABBREV,
+
+  // FUNCTION_BLOCK abbrev id's.
+  FUNCTION_INST_LOAD_ABBREV = naclbitc::FIRST_APPLICATION_ABBREV,
+  FUNCTION_INST_BINOP_ABBREV,
+  FUNCTION_INST_CAST_ABBREV,
+  FUNCTION_INST_RET_VOID_ABBREV,
+  FUNCTION_INST_RET_VAL_ABBREV,
+  FUNCTION_INST_UNREACHABLE_ABBREV,
+  FUNCTION_INST_FORWARDTYPEREF_ABBREV,
+  FUNCTION_INST_STORE_ABBREV,
+  FUNCTION_INST_MAX_ABBREV = FUNCTION_INST_STORE_ABBREV,
+
+  // TYPE_BLOCK_ID_NEW abbrev id's.
+  TYPE_POINTER_ABBREV = naclbitc::FIRST_APPLICATION_ABBREV,
+  TYPE_FUNCTION_ABBREV,
+  TYPE_ARRAY_ABBREV,
+  TYPE_MAX_ABBREV = TYPE_ARRAY_ABBREV
+};
+
+LLVM_ATTRIBUTE_NORETURN
+static void ReportIllegalValue(const char *ValueMessage,
+                               const Value &Value) {
+  std::string Message;
+  raw_string_ostream StrM(Message);
+  StrM << "Illegal ";
+  if (ValueMessage != 0)
+    StrM << ValueMessage << " ";
+  StrM << ": " << Value;
+  report_fatal_error(StrM.str());
+}
+
+static unsigned GetEncodedCastOpcode(unsigned Opcode, const Value &V) {
+  switch (Opcode) {
+  default: ReportIllegalValue("cast", V);
+  case Instruction::Trunc   : return naclbitc::CAST_TRUNC;
+  case Instruction::ZExt    : return naclbitc::CAST_ZEXT;
+  case Instruction::SExt    : return naclbitc::CAST_SEXT;
+  case Instruction::FPToUI  : return naclbitc::CAST_FPTOUI;
+  case Instruction::FPToSI  : return naclbitc::CAST_FPTOSI;
+  case Instruction::UIToFP  : return naclbitc::CAST_UITOFP;
+  case Instruction::SIToFP  : return naclbitc::CAST_SITOFP;
+  case Instruction::FPTrunc : return naclbitc::CAST_FPTRUNC;
+  case Instruction::FPExt   : return naclbitc::CAST_FPEXT;
+  case Instruction::BitCast : return naclbitc::CAST_BITCAST;
+  }
+}
+
+static unsigned GetEncodedBinaryOpcode(unsigned Opcode, const Value &V) {
+  switch (Opcode) {
+  default: ReportIllegalValue("binary opcode", V);
+  case Instruction::Add:
+  case Instruction::FAdd: return naclbitc::BINOP_ADD;
+  case Instruction::Sub:
+  case Instruction::FSub: return naclbitc::BINOP_SUB;
+  case Instruction::Mul:
+  case Instruction::FMul: return naclbitc::BINOP_MUL;
+  case Instruction::UDiv: return naclbitc::BINOP_UDIV;
+  case Instruction::FDiv:
+  case Instruction::SDiv: return naclbitc::BINOP_SDIV;
+  case Instruction::URem: return naclbitc::BINOP_UREM;
+  case Instruction::FRem:
+  case Instruction::SRem: return naclbitc::BINOP_SREM;
+  case Instruction::Shl:  return naclbitc::BINOP_SHL;
+  case Instruction::LShr: return naclbitc::BINOP_LSHR;
+  case Instruction::AShr: return naclbitc::BINOP_ASHR;
+  case Instruction::And:  return naclbitc::BINOP_AND;
+  case Instruction::Or:   return naclbitc::BINOP_OR;
+  case Instruction::Xor:  return naclbitc::BINOP_XOR;
+  }
+}
+
+static unsigned GetEncodedCallingConv(CallingConv::ID conv) {
+  switch (conv) {
+  default: report_fatal_error(
+      "Calling convention not supported by PNaCL bitcode");
+  case CallingConv::C: return naclbitc::C_CallingConv;
+  }
+}
+
+// Converts LLVM encoding of comparison predicates to the
+// corresponding bitcode versions.
+static unsigned GetEncodedCmpPredicate(const CmpInst &Cmp) {
+  switch (Cmp.getPredicate()) {
+  default: report_fatal_error(
+      "Comparison predicate not supported by PNaCl bitcode");
+  case CmpInst::FCMP_FALSE:
+    return naclbitc::FCMP_FALSE;
+  case CmpInst::FCMP_OEQ:
+    return naclbitc::FCMP_OEQ;
+  case CmpInst::FCMP_OGT:
+    return naclbitc::FCMP_OGT;
+  case CmpInst::FCMP_OGE:
+    return naclbitc::FCMP_OGE;
+  case CmpInst::FCMP_OLT:
+    return naclbitc::FCMP_OLT;
+  case CmpInst::FCMP_OLE:
+    return naclbitc::FCMP_OLE;
+  case CmpInst::FCMP_ONE:
+    return naclbitc::FCMP_ONE;
+  case CmpInst::FCMP_ORD:
+    return naclbitc::FCMP_ORD;
+  case CmpInst::FCMP_UNO:
+    return naclbitc::FCMP_UNO;
+  case CmpInst::FCMP_UEQ:
+    return naclbitc::FCMP_UEQ;
+  case CmpInst::FCMP_UGT:
+    return naclbitc::FCMP_UGT;
+  case CmpInst::FCMP_UGE:
+    return naclbitc::FCMP_UGE;
+  case CmpInst::FCMP_ULT:
+    return naclbitc::FCMP_ULT;
+  case CmpInst::FCMP_ULE:
+    return naclbitc::FCMP_ULE;
+  case CmpInst::FCMP_UNE:
+    return naclbitc::FCMP_UNE;
+  case CmpInst::FCMP_TRUE:
+    return naclbitc::FCMP_TRUE;
+  case CmpInst::ICMP_EQ:
+    return naclbitc::ICMP_EQ;
+  case CmpInst::ICMP_NE:
+    return naclbitc::ICMP_NE;
+  case CmpInst::ICMP_UGT:
+    return naclbitc::ICMP_UGT;
+  case CmpInst::ICMP_UGE:
+    return naclbitc::ICMP_UGE;
+  case CmpInst::ICMP_ULT:
+    return naclbitc::ICMP_ULT;
+  case CmpInst::ICMP_ULE:
+    return naclbitc::ICMP_ULE;
+  case CmpInst::ICMP_SGT:
+    return naclbitc::ICMP_SGT;
+  case CmpInst::ICMP_SGE:
+    return naclbitc::ICMP_SGE;
+  case CmpInst::ICMP_SLT:
+    return naclbitc::ICMP_SLT;
+  case CmpInst::ICMP_SLE:
+    return naclbitc::ICMP_SLE;
+  }
+}
+
+// The type of encoding to use for type ids.
+static NaClBitCodeAbbrevOp::Encoding TypeIdEncoding = NaClBitCodeAbbrevOp::VBR;
+
+// The cutoff (in number of bits) from Fixed to VBR.
+static const unsigned TypeIdVBRCutoff = 6;
+
+// The number of bits to use in the encoding of type ids.
+static unsigned TypeIdNumBits = TypeIdVBRCutoff;
+
+// Optimizes the value for TypeIdEncoding and TypeIdNumBits based
+// the actual number of types.
+static inline void OptimizeTypeIdEncoding(const NaClValueEnumerator &VE) {
+  // Note: modify to use maximum number of bits if under cutoff. Otherwise,
+  // use VBR to take advantage that frequently referenced types have
+  // small IDs.
+  unsigned NumBits = NaClBitsNeededForValue(VE.getTypes().size());
+  TypeIdNumBits = (NumBits < TypeIdVBRCutoff ? NumBits : TypeIdVBRCutoff);
+  TypeIdEncoding = NaClBitCodeAbbrevOp::Encoding(
+      NumBits <= TypeIdVBRCutoff
+      ? NaClBitCodeAbbrevOp::Fixed : NaClBitCodeAbbrevOp::VBR);
+}
+
+/// WriteTypeTable - Write out the type table for a module.
+static void WriteTypeTable(const NaClValueEnumerator &VE,
+                           NaClBitstreamWriter &Stream) {
+  DEBUG(dbgs() << "-> WriteTypeTable\n");
+  const NaClValueEnumerator::TypeList &TypeList = VE.getTypes();
+
+  Stream.EnterSubblock(naclbitc::TYPE_BLOCK_ID_NEW, TYPE_MAX_ABBREV);
+
+  SmallVector<uint64_t, 64> TypeVals;
+
+  // Abbrev for TYPE_CODE_POINTER.
+  NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+  Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_POINTER));
+  Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits));
+  Abbv->Add(NaClBitCodeAbbrevOp(0));  // Addrspace = 0
+  if (TYPE_POINTER_ABBREV != Stream.EmitAbbrev(Abbv))
+    llvm_unreachable("Unexpected abbrev ordering!");
+
+  // Abbrev for TYPE_CODE_FUNCTION.
+  Abbv = new NaClBitCodeAbbrev();
+  Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::TYPE_CODE_FUNCTION));
+  Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 1));  // isvararg
+  Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
+  Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits));
+  if (TYPE_FUNCTION_ABBREV != Stream.EmitAbbrev(Abbv))
+    llvm_unreachable("Unexpected abbrev ordering!");
+
+  // Emit an entry count so the reader can reserve space.
+  TypeVals.push_back(TypeList.size());
+  Stream.EmitRecord(naclbitc::TYPE_CODE_NUMENTRY, TypeVals);
+  TypeVals.clear();
+
+  // Loop over all of the types, emitting each in turn.
+  for (unsigned i = 0, e = TypeList.size(); i != e; ++i) {
+    Type *T = TypeList[i];
+    int AbbrevToUse = 0;
+    unsigned Code = 0;
+
+    switch (T->getTypeID()) {
+    default: llvm_unreachable("Unknown type!");
+    case Type::VoidTyID:      Code = naclbitc::TYPE_CODE_VOID;      break;
+    case Type::FloatTyID:     Code = naclbitc::TYPE_CODE_FLOAT;     break;
+    case Type::DoubleTyID:    Code = naclbitc::TYPE_CODE_DOUBLE;    break;
+    case Type::IntegerTyID:
+      // INTEGER: [width]
+      Code = naclbitc::TYPE_CODE_INTEGER;
+      TypeVals.push_back(cast<IntegerType>(T)->getBitWidth());
+      break;
+    case Type::FunctionTyID: {
+      FunctionType *FT = cast<FunctionType>(T);
+      // FUNCTION: [isvararg, retty, paramty x N]
+      Code = naclbitc::TYPE_CODE_FUNCTION;
+      TypeVals.push_back(FT->isVarArg());
+      TypeVals.push_back(VE.getTypeID(FT->getReturnType()));
+      for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i)
+        TypeVals.push_back(VE.getTypeID(FT->getParamType(i)));
+      AbbrevToUse = TYPE_FUNCTION_ABBREV;
+      break;
+    }
+    case Type::StructTyID:
+      report_fatal_error("Struct types are not supported in PNaCl bitcode");
+    case Type::ArrayTyID:
+      report_fatal_error("Array types are not supported in PNaCl bitcode");
+    case Type::VectorTyID:
+      report_fatal_error("Vector types are not supported in PNaCl bitcode");
+    }
+
+    // Emit the finished record.
+    Stream.EmitRecord(Code, TypeVals, AbbrevToUse);
+    TypeVals.clear();
+  }
+
+  Stream.ExitBlock();
+  DEBUG(dbgs() << "<- WriteTypeTable\n");
+}
+
+static unsigned getEncodedLinkage(const GlobalValue *GV) {
+  if(GV->hasDLLImportStorageClass())
+    return 5;
+  else if(GV->hasDLLExportStorageClass())
+    return 6;
+
+  switch (GV->getLinkage()) {
+  case GlobalValue::ExternalLinkage:                 return 0;
+  case GlobalValue::WeakAnyLinkage:                  return 1;
+  case GlobalValue::AppendingLinkage:                return 2;
+  case GlobalValue::InternalLinkage:                 return 3;
+  case GlobalValue::LinkOnceAnyLinkage:              return 4;
+  case GlobalValue::ExternalWeakLinkage:             return 7;
+  case GlobalValue::CommonLinkage:                   return 8;
+  case GlobalValue::PrivateLinkage:                  return 9;
+  case GlobalValue::WeakODRLinkage:                  return 10;
+  case GlobalValue::LinkOnceODRLinkage:              return 11;
+  case GlobalValue::AvailableExternallyLinkage:      return 12;
+  case GlobalValue::LinkerPrivateLinkage:            return 13;
+  case GlobalValue::LinkerPrivateWeakLinkage:        return 14;
+  }
+  llvm_unreachable("Invalid linkage");
+}
+
+/// \brief Function to convert constant initializers for global
+/// variables into corresponding bitcode. Takes advantage that these
+/// global variable initializations are normalized (see
+/// lib/Transforms/NaCl/FlattenGlobals.cpp).
+void WriteGlobalInit(const Constant *C, unsigned GlobalVarID,
+                     SmallVectorImpl<uint32_t> &Vals,
+                     const NaClValueEnumerator &VE,
+                     NaClBitstreamWriter &Stream) {
+  if (ArrayType *Ty = dyn_cast<ArrayType>(C->getType())) {
+    if (!Ty->getElementType()->isIntegerTy(8))
+      report_fatal_error("Global array initializer not i8");
+    uint32_t Size = Ty->getNumElements();
+    if (isa<ConstantAggregateZero>(C)) {
+      Vals.push_back(Size);
+      Stream.EmitRecord(naclbitc::GLOBALVAR_ZEROFILL, Vals,
+                        GLOBALVAR_ZEROFILL_ABBREV);
+      Vals.clear();
+    } else {
+      const ConstantDataSequential *CD = cast<ConstantDataSequential>(C);
+      StringRef Data = CD->getRawDataValues();
+      for (size_t i = 0; i < Size; ++i) {
+        Vals.push_back(Data[i] & 0xFF);
+      }
+      Stream.EmitRecord(naclbitc::GLOBALVAR_DATA, Vals,
+                        GLOBALVAR_DATA_ABBREV);
+      Vals.clear();
+    }
+    return;
+  }
+  if (VE.IsIntPtrType(C->getType())) {
+    // This constant defines a relocation. Start by verifying the
+    // relocation is of the right form.
+    const ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
+    if (CE == 0)
+      report_fatal_error("Global i32 initializer not constant");
+    assert(CE);
+    int32_t Addend = 0;
+    if (CE->getOpcode() == Instruction::Add) {
+      const ConstantInt *AddendConst = dyn_cast<ConstantInt>(CE->getOperand(1));
+      if (AddendConst == 0)
+        report_fatal_error("Malformed addend in global relocation initializer");
+      Addend = AddendConst->getSExtValue();
+      CE = dyn_cast<ConstantExpr>(CE->getOperand(0));
+      if (CE == 0)
+        report_fatal_error(
+            "Base of global relocation initializer not constant");
+    }
+    if (CE->getOpcode() != Instruction::PtrToInt)
+      report_fatal_error("Global relocation base doesn't contain ptrtoint");
+    GlobalValue *GV = dyn_cast<GlobalValue>(CE->getOperand(0));
+    if (GV == 0)
+      report_fatal_error(
+          "Argument of ptrtoint in global relocation no global value");
+
+    // Now generate the corresponding relocation record.
+    unsigned RelocID = VE.getValueID(GV);
+    // This is a value index.
+    unsigned AbbrevToUse = GLOBALVAR_RELOC_ABBREV;
+    Vals.push_back(RelocID);
+    if (Addend) {
+      Vals.push_back(Addend);
+      AbbrevToUse = GLOBALVAR_RELOC_WITH_ADDEND_ABBREV;
+    }
+    Stream.EmitRecord(naclbitc::GLOBALVAR_RELOC, Vals, AbbrevToUse);
+    Vals.clear();
+    return;
+  }
+  report_fatal_error("Global initializer is not a SimpleElement");
+}
+
+// Emit global variables.
+static void WriteGlobalVars(const Module *M,
+                            const NaClValueEnumerator &VE,
+                            NaClBitstreamWriter &Stream) {
+  Stream.EnterSubblock(naclbitc::GLOBALVAR_BLOCK_ID);
+  SmallVector<uint32_t, 32> Vals;
+  unsigned GlobalVarID = VE.getFirstGlobalVarID();
+
+  // Emit the number of global variables.
+
+  Vals.push_back(M->getGlobalList().size());
+  Stream.EmitRecord(naclbitc::GLOBALVAR_COUNT, Vals);
+  Vals.clear();
+
+  // Now emit each global variable.
+  for (Module::const_global_iterator
+           GV = M->global_begin(), E = M->global_end();
+       GV != E; ++GV, ++GlobalVarID) {
+    // Define the global variable.
+    Vals.push_back(Log2_32(GV->getAlignment()) + 1);
+    Vals.push_back(GV->isConstant());
+    Stream.EmitRecord(naclbitc::GLOBALVAR_VAR, Vals, GLOBALVAR_VAR_ABBREV);
+    Vals.clear();
+
+    // Add the field(s).
+    const Constant *C = GV->getInitializer();
+    if (C == 0)
+      report_fatal_error("Global variable initializer not a constant");
+    if (const ConstantStruct *CS = dyn_cast<ConstantStruct>(C)) {
+      if (!CS->getType()->isPacked())
+        report_fatal_error("Global variable type not packed");
+      if (CS->getType()->hasName())
+        report_fatal_error("Global variable type is named");
+      Vals.push_back(CS->getNumOperands());
+      Stream.EmitRecord(naclbitc::GLOBALVAR_COMPOUND, Vals,
+                        GLOBALVAR_COMPOUND_ABBREV);
+      Vals.clear();
+      for (unsigned I = 0; I < CS->getNumOperands(); ++I) {
+        WriteGlobalInit(dyn_cast<Constant>(CS->getOperand(I)), GlobalVarID,
+                        Vals, VE, Stream);
+      }
+    } else {
+      WriteGlobalInit(C, GlobalVarID, Vals, VE, Stream);
+    }
+  }
+
+  assert(GlobalVarID == VE.getFirstGlobalVarID() + VE.getNumGlobalVarIDs());
+  Stream.ExitBlock();
+}
+
+// Emit top-level description of module, including inline asm,
+// descriptors for global variables, and function prototype info.
+static void WriteModuleInfo(const Module *M, const NaClValueEnumerator &VE,
+                            NaClBitstreamWriter &Stream) {
+  DEBUG(dbgs() << "-> WriteModuleInfo\n");
+
+  // Emit the function proto information. Note: We do this before
+  // global variables, so that global variable initializations can
+  // refer to the functions without a forward reference.
+  SmallVector<unsigned, 64> Vals;
+  for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
+    // FUNCTION:  [type, callingconv, isproto, linkage]
+    Type *Ty = F->getType()->getPointerElementType();
+    Vals.push_back(VE.getTypeID(Ty));
+    Vals.push_back(GetEncodedCallingConv(F->getCallingConv()));
+    Vals.push_back(F->isDeclaration());
+    Vals.push_back(getEncodedLinkage(F));
+
+    unsigned AbbrevToUse = 0;
+    Stream.EmitRecord(naclbitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse);
+    Vals.clear();
+  }
+
+  // Emit the global variable information.
+  WriteGlobalVars(M, VE, Stream);
+  DEBUG(dbgs() << "<- WriteModuleInfo\n");
+}
+
+static void emitSignedInt64(SmallVectorImpl<uint64_t> &Vals, uint64_t V) {
+  Vals.push_back(NaClEncodeSignRotatedValue((int64_t)V));
+}
+
+static void EmitAPInt(SmallVectorImpl<uint64_t> &Vals,
+                      unsigned &Code, unsigned &AbbrevToUse, const APInt &Val) {
+  if (Val.getBitWidth() <= 64) {
+    uint64_t V = Val.getSExtValue();
+    emitSignedInt64(Vals, V);
+    Code = naclbitc::CST_CODE_INTEGER;
+    AbbrevToUse =
+        Val == 0 ? CONSTANTS_INTEGER_ZERO_ABBREV : CONSTANTS_INTEGER_ABBREV;
+  } else {
+    report_fatal_error("Wide integers are not supported");
+  }
+}
+
+static void WriteConstants(unsigned FirstVal, unsigned LastVal,
+                           const NaClValueEnumerator &VE,
+                           NaClBitstreamWriter &Stream) {
+  if (FirstVal == LastVal) return;
+
+  Stream.EnterSubblock(naclbitc::CONSTANTS_BLOCK_ID, CONSTANTS_MAX_ABBREV);
+
+  SmallVector<uint64_t, 64> Record;
+
+  const NaClValueEnumerator::ValueList &Vals = VE.getValues();
+  Type *LastTy = 0;
+  for (unsigned i = FirstVal; i != LastVal; ++i) {
+    const Value *V = Vals[i].first;
+    // If we need to switch types, do so now.
+    if (V->getType() != LastTy) {
+      LastTy = V->getType();
+      Record.push_back(VE.getTypeID(LastTy));
+      Stream.EmitRecord(naclbitc::CST_CODE_SETTYPE, Record,
+                        CONSTANTS_SETTYPE_ABBREV);
+      Record.clear();
+    }
+
+    if (isa<InlineAsm>(V)) {
+      ReportIllegalValue("inline assembly", *V);
+    }
+    const Constant *C = cast<Constant>(V);
+    unsigned Code = -1U;
+    unsigned AbbrevToUse = 0;
+    if (isa<UndefValue>(C)) {
+      Code = naclbitc::CST_CODE_UNDEF;
+    } else if (const ConstantInt *IV = dyn_cast<ConstantInt>(C)) {
+      EmitAPInt(Record, Code, AbbrevToUse, IV->getValue());
+    } else if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C)) {
+      Code = naclbitc::CST_CODE_FLOAT;
+      AbbrevToUse = CONSTANTS_FLOAT_ABBREV;
+      Type *Ty = CFP->getType();
+      if (Ty->isFloatTy() || Ty->isDoubleTy()) {
+        Record.push_back(CFP->getValueAPF().bitcastToAPInt().getZExtValue());
+      } else {
+        report_fatal_error("Unknown FP type");
+      }
+    } else {
+#ifndef NDEBUG
+      C->dump();
+#endif
+      ReportIllegalValue("constant", *C);
+    }
+    Stream.EmitRecord(Code, Record, AbbrevToUse);
+    Record.clear();
+  }
+
+  Stream.ExitBlock();
+  DEBUG(dbgs() << "<- WriteConstants\n");
+}
+
+/// \brief Emits a type for the forward value reference. That is, if
+/// the ID for the given value is larger than or equal to the BaseID,
+/// the corresponding forward reference is generated.
+static void EmitFnForwardTypeRef(const Value *V,
+                                 unsigned BaseID,
+                                 NaClValueEnumerator &VE,
+                                 NaClBitstreamWriter &Stream) {
+  unsigned ValID = VE.getValueID(V);
+  if (ValID >= BaseID &&
+      VE.InsertFnForwardTypeRef(ValID)) {
+    SmallVector<unsigned, 2> Vals;
+    Vals.push_back(ValID);
+    Vals.push_back(VE.getTypeID(VE.NormalizeType(V->getType())));
+    Stream.EmitRecord(naclbitc::FUNC_CODE_INST_FORWARDTYPEREF, Vals,
+                      FUNCTION_INST_FORWARDTYPEREF_ABBREV);
+  }
+}
+
+/// pushValue - The file has to encode both the value and type id for
+/// many values, because we need to know what type to create for forward
+/// references.  However, most operands are not forward references, so this type
+/// field is not needed.
+///
+/// This function adds V's value ID to Vals.  If the value ID is higher than the
+/// instruction ID, then it is a forward reference, and it also includes the
+/// type ID.  The value ID that is written is encoded relative to the InstID.
+static void pushValue(const Value *V, unsigned InstID,
+                      SmallVector<unsigned, 64> &Vals,
+                      NaClValueEnumerator &VE,
+                      NaClBitstreamWriter &Stream) {
+  const Value *VElided = VE.ElideCasts(V);
+  EmitFnForwardTypeRef(VElided, InstID, VE, Stream);
+  unsigned ValID = VE.getValueID(VElided);
+  // Make encoding relative to the InstID.
+  Vals.push_back(InstID - ValID);
+}
+
+static void pushValue64(const Value *V, unsigned InstID,
+                        SmallVector<uint64_t, 128> &Vals,
+                        NaClValueEnumerator &VE,
+                        NaClBitstreamWriter &Stream) {
+  const Value *VElided = VE.ElideCasts(V);
+  EmitFnForwardTypeRef(VElided, InstID, VE, Stream);
+  uint64_t ValID = VE.getValueID(VElided);
+  Vals.push_back(InstID - ValID);
+}
+
+static void pushValueSigned(const Value *V, unsigned InstID,
+                            SmallVector<uint64_t, 128> &Vals,
+                            NaClValueEnumerator &VE,
+                            NaClBitstreamWriter &Stream) {
+  const Value *VElided = VE.ElideCasts(V);
+  EmitFnForwardTypeRef(VElided, InstID, VE, Stream);
+  unsigned ValID = VE.getValueID(VElided);
+  int64_t diff = ((int32_t)InstID - (int32_t)ValID);
+  emitSignedInt64(Vals, diff);
+}
+
+/// WriteInstruction - Emit an instruction to the specified stream.
+/// Returns true if instruction actually emitted.
+static bool WriteInstruction(const Instruction &I, unsigned InstID,
+                             NaClValueEnumerator &VE,
+                             NaClBitstreamWriter &Stream,
+                             SmallVector<unsigned, 64> &Vals) {
+  unsigned Code = 0;
+  unsigned AbbrevToUse = 0;
+  VE.setInstructionID(&I);
+  switch (I.getOpcode()) {
+  default:
+    if (Instruction::isCast(I.getOpcode())) {
+      // CAST:       [opval, destty, castopc]
+      if (VE.IsElidedCast(&I))
+        return false;
+      Code = naclbitc::FUNC_CODE_INST_CAST;
+      AbbrevToUse = FUNCTION_INST_CAST_ABBREV;
+      pushValue(I.getOperand(0), InstID, Vals, VE, Stream);
+      Vals.push_back(VE.getTypeID(I.getType()));
+      unsigned Opcode = I.getOpcode();
+      Vals.push_back(GetEncodedCastOpcode(Opcode, I));
+      if (Opcode == Instruction::PtrToInt ||
+          Opcode == Instruction::IntToPtr ||
+          (Opcode == Instruction::BitCast &&
+           (I.getOperand(0)->getType()->isPointerTy() ||
+            I.getType()->isPointerTy()))) {
+        ReportIllegalValue("(PNaCl ABI) pointer cast", I);
+      }
+    } else if (isa<BinaryOperator>(I)) {
+      // BINOP:      [opval, opval, opcode]
+      Code = naclbitc::FUNC_CODE_INST_BINOP;
+      AbbrevToUse = FUNCTION_INST_BINOP_ABBREV;
+      pushValue(I.getOperand(0), InstID, Vals, VE, Stream);
+      pushValue(I.getOperand(1), InstID, Vals, VE, Stream);
+      Vals.push_back(GetEncodedBinaryOpcode(I.getOpcode(), I));
+    } else {
+      ReportIllegalValue("instruction", I);
+    }
+    break;
+  case Instruction::Select:
+    Code = naclbitc::FUNC_CODE_INST_VSELECT;
+    pushValue(I.getOperand(1), InstID, Vals, VE, Stream);
+    pushValue(I.getOperand(2), InstID, Vals, VE, Stream);
+    pushValue(I.getOperand(0), InstID, Vals, VE, Stream);
+    break;
+  case Instruction::ICmp:
+  case Instruction::FCmp:
+    // compare returning Int1Ty or vector of Int1Ty
+    Code = naclbitc::FUNC_CODE_INST_CMP2;
+    pushValue(I.getOperand(0), InstID, Vals, VE, Stream);
+    pushValue(I.getOperand(1), InstID, Vals, VE, Stream);
+    Vals.push_back(GetEncodedCmpPredicate(cast<CmpInst>(I)));
+    break;
+
+  case Instruction::Ret:
+    {
+      Code = naclbitc::FUNC_CODE_INST_RET;
+      unsigned NumOperands = I.getNumOperands();
+      if (NumOperands == 0)
+        AbbrevToUse = FUNCTION_INST_RET_VOID_ABBREV;
+      else if (NumOperands == 1) {
+        pushValue(I.getOperand(0), InstID, Vals, VE, Stream);
+        AbbrevToUse = FUNCTION_INST_RET_VAL_ABBREV;
+      } else {
+        for (unsigned i = 0, e = NumOperands; i != e; ++i)
+          pushValue(I.getOperand(i), InstID, Vals, VE, Stream);
+      }
+    }
+    break;
+  case Instruction::Br:
+    {
+      Code = naclbitc::FUNC_CODE_INST_BR;
+      const BranchInst &II = cast<BranchInst>(I);
+      Vals.push_back(VE.getValueID(II.getSuccessor(0)));
+      if (II.isConditional()) {
+        Vals.push_back(VE.getValueID(II.getSuccessor(1)));
+        pushValue(II.getCondition(), InstID, Vals, VE, Stream);
+      }
+    }
+    break;
+  case Instruction::Switch:
+    {
+      // Redefine Vals, since here we need to use 64 bit values
+      // explicitly to store large APInt numbers.
+      SmallVector<uint64_t, 128> Vals64;
+
+      Code = naclbitc::FUNC_CODE_INST_SWITCH;
+      const SwitchInst &SI = cast<SwitchInst>(I);
+
+      Vals64.push_back(VE.getTypeID(SI.getCondition()->getType()));
+      pushValue64(SI.getCondition(), InstID, Vals64, VE, Stream);
+      Vals64.push_back(VE.getValueID(SI.getDefaultDest()));
+      Vals64.push_back(SI.getNumCases());
+      for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end();
+           i != e; ++i) {
+        const ConstantInt* CaseValue = i.getCaseValue();
+        unsigned Code, Abbrev; // will unused.
+
+        Vals64.push_back(1/*NumItems = 1*/);
+        Vals64.push_back(true/*IsSingleNumber = true*/);
+        EmitAPInt(Vals64, Code, Abbrev, CaseValue->getValue());
+        Vals64.push_back(VE.getValueID(i.getCaseSuccessor()));
+      }
+
+      Stream.EmitRecord(Code, Vals64, AbbrevToUse);
+
+      // Also do expected action - clear external Vals collection:
+      Vals.clear();
+      return true;
+    }
+    break;
+  case Instruction::Unreachable:
+    Code = naclbitc::FUNC_CODE_INST_UNREACHABLE;
+    AbbrevToUse = FUNCTION_INST_UNREACHABLE_ABBREV;
+    break;
+
+  case Instruction::PHI: {
+    const PHINode &PN = cast<PHINode>(I);
+    Code = naclbitc::FUNC_CODE_INST_PHI;
+    // With the newer instruction encoding, forward references could give
+    // negative valued IDs.  This is most common for PHIs, so we use
+    // signed VBRs.
+    SmallVector<uint64_t, 128> Vals64;
+    Vals64.push_back(VE.getTypeID(PN.getType()));
+    for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i) {
+      pushValueSigned(PN.getIncomingValue(i), InstID, Vals64, VE, Stream);
+      Vals64.push_back(VE.getValueID(PN.getIncomingBlock(i)));
+    }
+    // Emit a Vals64 vector and exit.
+    Stream.EmitRecord(Code, Vals64, AbbrevToUse);
+    Vals64.clear();
+    return true;
+  }
+
+  case Instruction::Alloca:
+    if (!cast<AllocaInst>(&I)->getAllocatedType()->isIntegerTy(8))
+      report_fatal_error("Type of alloca instruction is not i8");
+    Code = naclbitc::FUNC_CODE_INST_ALLOCA;
+    pushValue(I.getOperand(0), InstID, Vals, VE, Stream); // size.
+    Vals.push_back(Log2_32(cast<AllocaInst>(I).getAlignment())+1);
+    break;
+  case Instruction::Load:
+    // LOAD: [op, align, ty]
+    Code = naclbitc::FUNC_CODE_INST_LOAD;
+    pushValue(I.getOperand(0), InstID, Vals, VE, Stream);
+    AbbrevToUse = FUNCTION_INST_LOAD_ABBREV;
+    Vals.push_back(Log2_32(cast<LoadInst>(I).getAlignment())+1);
+    Vals.push_back(VE.getTypeID(I.getType()));
+    break;
+  case Instruction::Store:
+    // STORE: [ptr, val, align]
+    Code = naclbitc::FUNC_CODE_INST_STORE;
+    AbbrevToUse = FUNCTION_INST_STORE_ABBREV;
+    pushValue(I.getOperand(1), InstID, Vals, VE, Stream);
+    pushValue(I.getOperand(0), InstID, Vals, VE, Stream);
+    Vals.push_back(Log2_32(cast<StoreInst>(I).getAlignment())+1);
+    break;
+  case Instruction::Call: {
+    // CALL: [cc, fnid, args...]
+    // CALL_INDIRECT: [cc, fnid, fnty, args...]
+
+    const CallInst &Call = cast<CallInst>(I);
+    const Value* Callee = Call.getCalledValue();
+    Vals.push_back((GetEncodedCallingConv(Call.getCallingConv()) << 1)
+                   | unsigned(Call.isTailCall()));
+
+    pushValue(Callee, InstID, Vals, VE, Stream);
+
+    if (Callee == VE.ElideCasts(Callee)) {
+      // Since the call pointer has not been elided, we know that
+      // the call pointer has the type signature of the called
+      // function.  This implies that the reader can use the type
+      // signature of the callee to figure out how to add casts to
+      // the arguments.
+      Code = naclbitc::FUNC_CODE_INST_CALL;
+    } else {
+      // If the cast was elided, a pointer conversion to a pointer
+      // was applied, meaning that this is an indirect call. For the
+      // reader, this implies that we can't use the type signature
+      // of the callee to resolve elided call arguments, since it is
+      // not known. Hence, we must send the type signature to the
+      // reader.
+      Code = naclbitc::FUNC_CODE_INST_CALL_INDIRECT;
+      Vals.push_back(VE.getTypeID(I.getType()));
+    }
+
+    for (unsigned I = 0, E = Call.getNumArgOperands(); I < E; ++I) {
+      pushValue(Call.getArgOperand(I), InstID, Vals, VE, Stream);
+    }
+    break;
+  }
+  }
+
+  Stream.EmitRecord(Code, Vals, AbbrevToUse);
+  Vals.clear();
+  return true;
+}
+
+// Emit names for globals/functions etc.
+static void WriteValueSymbolTable(const ValueSymbolTable &VST,
+                                  const NaClValueEnumerator &VE,
+                                  NaClBitstreamWriter &Stream) {
+  if (VST.empty()) return;
+  Stream.EnterSubblock(naclbitc::VALUE_SYMTAB_BLOCK_ID);
+
+  // FIXME: Set up the abbrev, we know how many values there are!
+  // FIXME: We know if the type names can use 7-bit ascii.
+  SmallVector<unsigned, 64> NameVals;
+
+  for (ValueSymbolTable::const_iterator SI = VST.begin(), SE = VST.end();
+       SI != SE; ++SI) {
+
+    const ValueName &Name = *SI;
+
+    // Figure out the encoding to use for the name.
+    bool is7Bit = true;
+    bool isChar6 = true;
+    for (const char *C = Name.getKeyData(), *E = C+Name.getKeyLength();
+         C != E; ++C) {
+      if (isChar6)
+        isChar6 = NaClBitCodeAbbrevOp::isChar6(*C);
+      if ((unsigned char)*C & 128) {
+        is7Bit = false;
+        break;  // don't bother scanning the rest.
+      }
+    }
+
+    unsigned AbbrevToUse = VST_ENTRY_8_ABBREV;
+
+    // VST_ENTRY:   [valueid, namechar x N]
+    // VST_BBENTRY: [bbid, namechar x N]
+    unsigned Code;
+    if (isa<BasicBlock>(SI->getValue())) {
+      Code = naclbitc::VST_CODE_BBENTRY;
+      if (isChar6)
+        AbbrevToUse = VST_BBENTRY_6_ABBREV;
+    } else {
+      Code = naclbitc::VST_CODE_ENTRY;
+      if (isChar6)
+        AbbrevToUse = VST_ENTRY_6_ABBREV;
+      else if (is7Bit)
+        AbbrevToUse = VST_ENTRY_7_ABBREV;
+    }
+
+    NameVals.push_back(VE.getValueID(SI->getValue()));
+    for (const char *P = Name.getKeyData(),
+         *E = Name.getKeyData()+Name.getKeyLength(); P != E; ++P)
+      NameVals.push_back((unsigned char)*P);
+
+    // Emit the finished record.
+    Stream.EmitRecord(Code, NameVals, AbbrevToUse);
+    NameVals.clear();
+  }
+  Stream.ExitBlock();
+}
+
+/// WriteFunction - Emit a function body to the module stream.
+static void WriteFunction(const Function &F, NaClValueEnumerator &VE,
+                          NaClBitstreamWriter &Stream) {
+  Stream.EnterSubblock(naclbitc::FUNCTION_BLOCK_ID);
+  VE.incorporateFunction(F);
+
+  SmallVector<unsigned, 64> Vals;
+
+  // Emit the number of basic blocks, so the reader can create them ahead of
+  // time.
+  Vals.push_back(VE.getBasicBlocks().size());
+  Stream.EmitRecord(naclbitc::FUNC_CODE_DECLAREBLOCKS, Vals);
+  Vals.clear();
+
+  // If there are function-local constants, emit them now.
+  unsigned CstStart, CstEnd;
+  VE.getFunctionConstantRange(CstStart, CstEnd);
+  WriteConstants(CstStart, CstEnd, VE, Stream);
+
+  // Keep a running idea of what the instruction ID is.
+  unsigned InstID = CstEnd;
+
+  // Finally, emit all the instructions, in order.
+  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end();
+         I != E; ++I) {
+      if (WriteInstruction(*I, InstID, VE, Stream, Vals) &&
+          !I->getType()->isVoidTy())
+        ++InstID;
+    }
+
+  // Emit names for all the instructions etc.
+  WriteValueSymbolTable(F.getValueSymbolTable(), VE, Stream);
+
+  VE.purgeFunction();
+  Stream.ExitBlock();
+}
+
+// Emit blockinfo, which defines the standard abbreviations etc.
+static void WriteBlockInfo(const NaClValueEnumerator &VE,
+                           NaClBitstreamWriter &Stream) {
+  // We only want to emit block info records for blocks that have multiple
+  // instances: CONSTANTS_BLOCK, FUNCTION_BLOCK and VALUE_SYMTAB_BLOCK.
+  // Other blocks can define their abbrevs inline.
+  Stream.EnterBlockInfoBlock();
+
+  { // 8-bit fixed-width VST_ENTRY/VST_BBENTRY strings.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 3));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 8));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 8));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::VALUE_SYMTAB_BLOCK_ID,
+                                   Abbv) != VST_ENTRY_8_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  { // 7-bit fixed width VST_ENTRY strings.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::VST_CODE_ENTRY));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 8));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 7));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::VALUE_SYMTAB_BLOCK_ID,
+                                   Abbv) != VST_ENTRY_7_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // 6-bit char6 VST_ENTRY strings.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::VST_CODE_ENTRY));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 8));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Char6));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::VALUE_SYMTAB_BLOCK_ID,
+                                   Abbv) != VST_ENTRY_6_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // 6-bit char6 VST_BBENTRY strings.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::VST_CODE_BBENTRY));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 8));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Char6));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::VALUE_SYMTAB_BLOCK_ID,
+                                   Abbv) != VST_BBENTRY_6_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+
+
+  { // SETTYPE abbrev for CONSTANTS_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::CST_CODE_SETTYPE));
+    Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::CONSTANTS_BLOCK_ID,
+                                   Abbv) != CONSTANTS_SETTYPE_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  { // INTEGER abbrev for CONSTANTS_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::CST_CODE_INTEGER));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 8));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::CONSTANTS_BLOCK_ID,
+                                   Abbv) != CONSTANTS_INTEGER_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INTEGER_ZERO abbrev for CONSTANTS_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::CST_CODE_INTEGER));
+    Abbv->Add(NaClBitCodeAbbrevOp(0));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::CONSTANTS_BLOCK_ID,
+                                   Abbv) != CONSTANTS_INTEGER_ZERO_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // FLOAT abbrev for CONSTANTS_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::CST_CODE_FLOAT));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 8));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::CONSTANTS_BLOCK_ID,
+                                   Abbv) != CONSTANTS_FLOAT_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  // FIXME: This should only use space for first class types!
+
+  { // INST_LOAD abbrev for FUNCTION_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::FUNC_CODE_INST_LOAD));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6)); // Ptr
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 4)); // Align
+    // Note: The vast majority of load operations are only on integers
+    // and floats. In addition, no function types are allowed. In
+    // addition, the type IDs have been sorted based on usage, moving
+    // type IDs associated integers and floats to very low
+    // indices. Hence, we assume that we can use a smaller width for
+    // the typecast.
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 4)); // TypeCast
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_LOAD_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_BINOP abbrev for FUNCTION_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::FUNC_CODE_INST_BINOP));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6)); // LHS
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6)); // RHS
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 4)); // opc
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_BINOP_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_CAST abbrev for FUNCTION_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::FUNC_CODE_INST_CAST));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6));    // OpVal
+    Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits));  // dest ty
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 4));  // opc
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_CAST_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  { // INST_RET abbrev for FUNCTION_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::FUNC_CODE_INST_RET));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_RET_VOID_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_RET abbrev for FUNCTION_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::FUNC_CODE_INST_RET));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6)); // ValID
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_RET_VAL_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_UNREACHABLE abbrev for FUNCTION_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::FUNC_CODE_INST_UNREACHABLE));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_UNREACHABLE_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_FORWARDTYPEREF abbrev for FUNCTION_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::FUNC_CODE_INST_FORWARDTYPEREF));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6));
+    Abbv->Add(NaClBitCodeAbbrevOp(TypeIdEncoding, TypeIdNumBits));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_FORWARDTYPEREF_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // INST_STORE abbrev for FUNCTION_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::FUNC_CODE_INST_STORE));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6)); // Ptr
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6)); // Value
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 4)); // Align
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::FUNCTION_BLOCK_ID,
+                                   Abbv) != FUNCTION_INST_STORE_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  { // VAR abbrev for GLOBALVAR_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::GLOBALVAR_VAR));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 1));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::GLOBALVAR_BLOCK_ID,
+                                   Abbv) != GLOBALVAR_VAR_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // COMPOUND abbrev for GLOBALVAR_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::GLOBALVAR_COMPOUND));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 8));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::GLOBALVAR_BLOCK_ID,
+                                   Abbv) != GLOBALVAR_COMPOUND_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // ZEROFILL abbrev for GLOBALVAR_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::GLOBALVAR_ZEROFILL));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 8));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::GLOBALVAR_BLOCK_ID,
+                                   Abbv) != GLOBALVAR_ZEROFILL_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // DATA abbrev for GLOBALVAR_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::GLOBALVAR_DATA));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Array));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::Fixed, 8));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::GLOBALVAR_BLOCK_ID,
+                                   Abbv) != GLOBALVAR_DATA_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // RELOC abbrev for GLOBALVAR_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::GLOBALVAR_RELOC));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6));
+    if (Stream.EmitBlockInfoAbbrev(naclbitc::GLOBALVAR_BLOCK_ID,
+                                   Abbv) != GLOBALVAR_RELOC_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+  { // RELOC_WITH_ADDEND_ABBREV abbrev for GLOBALVAR_BLOCK.
+    NaClBitCodeAbbrev *Abbv = new NaClBitCodeAbbrev();
+    Abbv->Add(NaClBitCodeAbbrevOp(naclbitc::GLOBALVAR_RELOC));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6));
+    Abbv->Add(NaClBitCodeAbbrevOp(NaClBitCodeAbbrevOp::VBR, 6));
+    if (Stream.EmitBlockInfoAbbrev(
+            naclbitc::GLOBALVAR_BLOCK_ID,
+            Abbv) != GLOBALVAR_RELOC_WITH_ADDEND_ABBREV)
+      llvm_unreachable("Unexpected abbrev ordering!");
+  }
+
+  Stream.ExitBlock();
+}
+
+/// WriteModule - Emit the specified module to the bitstream.
+static void WriteModule(const Module *M, NaClBitstreamWriter &Stream) {
+  DEBUG(dbgs() << "-> WriteModule\n");
+  Stream.EnterSubblock(naclbitc::MODULE_BLOCK_ID);
+
+  SmallVector<unsigned, 1> Vals;
+  unsigned CurVersion = 1;
+  Vals.push_back(CurVersion);
+  Stream.EmitRecord(naclbitc::MODULE_CODE_VERSION, Vals);
+
+  // Analyze the module, enumerating globals, functions, etc.
+  NaClValueEnumerator VE(M, PNaClVersion);
+  OptimizeTypeIdEncoding(VE);
+
+  // Emit blockinfo, which defines the standard abbreviations etc.
+  WriteBlockInfo(VE, Stream);
+
+  // Emit information describing all of the types in the module.
+  WriteTypeTable(VE, Stream);
+
+  // Emit top-level description of module, including inline asm,
+  // descriptors for global variables, and function prototype info.
+  WriteModuleInfo(M, VE, Stream);
+
+  // Emit names for globals/functions etc.
+  WriteValueSymbolTable(M->getValueSymbolTable(), VE, Stream);
+
+  // Emit function bodies.
+  for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F)
+    if (!F->isDeclaration())
+      WriteFunction(*F, VE, Stream);
+
+  Stream.ExitBlock();
+  DEBUG(dbgs() << "<- WriteModule\n");
+}
+
+// Max size for variable fields. Currently only used for writing them
+// out to files (the parsing works for arbitrary sizes).
+static const size_t kMaxVariableFieldSize = 256;
+
+// Write out the given Header to the bitstream.
+void llvm::NaClWriteHeader(const NaClBitcodeHeader &Header,
+                           NaClBitstreamWriter &Stream) {
+  // Emit the file magic number;
+  Stream.Emit((unsigned)'P', 8);
+  Stream.Emit((unsigned)'E', 8);
+  Stream.Emit((unsigned)'X', 8);
+  Stream.Emit((unsigned)'E', 8);
+
+  // Emit placeholder for number of bytes used to hold header fields.
+  // This value is necessary so that the streamable reader can preallocate
+  // a buffer to read the fields.
+  Stream.Emit(0, naclbitc::BlockSizeWidth);
+  unsigned BytesForHeader = 0;
+
+  unsigned NumberFields = Header.NumberFields();
+  if (NumberFields > 0xFFFF)
+    report_fatal_error("Too many header fields");
+
+  uint8_t Buffer[kMaxVariableFieldSize];
+  for (unsigned F = 0; F < NumberFields; ++F) {
+    NaClBitcodeHeaderField *Field = Header.GetField(F);
+    if (!Field->Write(Buffer, kMaxVariableFieldSize))
+      report_fatal_error("Header field too big to generate");
+    size_t limit = Field->GetTotalSize();
+    for (size_t i = 0; i < limit; i++) {
+      Stream.Emit(Buffer[i], 8);
+    }
+    BytesForHeader += limit;
+  }
+
+  if (BytesForHeader > 0xFFFF)
+    report_fatal_error("Header fields to big to save");
+
+  // Encode #fields in top two bytes, and #bytes to hold fields in
+  // bottom two bytes. Then backpatch into second word.
+  unsigned Value = NumberFields | (BytesForHeader << 16);
+  Stream.BackpatchWord(NaClBitcodeHeader::WordSize, Value);
+}
+
+/// WriteBitcodeToFile - Write the specified module to the specified output
+/// stream.
+void llvm::NaClWriteBitcodeToFile(const Module *M, raw_ostream &Out,
+                                  bool AcceptSupportedOnly) {
+  SmallVector<char, 0> Buffer;
+  Buffer.reserve(256*1024);
+
+  // Emit the module into the buffer.
+  {
+    NaClBitstreamWriter Stream(Buffer);
+
+    // Define header and install into stream.
+    {
+      NaClBitcodeHeader Header;
+      Header.push_back(
+          new NaClBitcodeHeaderField(NaClBitcodeHeaderField::kPNaClVersion,
+                                     PNaClVersion));
+      Header.InstallFields();
+      if (!(Header.IsSupported() ||
+            (!AcceptSupportedOnly && Header.IsReadable()))) {
+        report_fatal_error(Header.Unsupported());
+      }
+      NaClWriteHeader(Header, Stream);
+    }
+
+    // Emit the module.
+    WriteModule(M, Stream);
+  }
+
+  // Write the generated bitstream to "Out".
+  Out.write((char*)&Buffer.front(), Buffer.size());
+}
diff --git a/lib/Bitcode/NaCl/Writer/NaClBitcodeWriterPass.cpp b/lib/Bitcode/NaCl/Writer/NaClBitcodeWriterPass.cpp
new file mode 100644
index 000000000000..37cb6a0d0b8b
--- /dev/null
+++ b/lib/Bitcode/NaCl/Writer/NaClBitcodeWriterPass.cpp
@@ -0,0 +1,47 @@
+//===- NaClBitcodeWriterPass.cpp - Bitcode writing pass -------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// BitcodeWriterPass implementation.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/NaCl/NaClBitcodeWriterPass.h"
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Pass.h"
+using namespace llvm;
+
+PreservedAnalyses NaClBitcodeWriterPass::run(Module *M) {
+  NaClWriteBitcodeToFile(M, OS);
+  return PreservedAnalyses::all();
+}
+
+namespace {
+  class WriteBitcodePass : public ModulePass {
+    raw_ostream &OS; // raw_ostream to print on
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    explicit WriteBitcodePass(raw_ostream &o)
+      : ModulePass(ID), OS(o) {}
+
+    const char *getPassName() const { return "NaCl Bitcode Writer"; }
+
+    bool runOnModule(Module &M) {
+      NaClWriteBitcodeToFile(&M, OS);
+      return false;
+    }
+  };
+}
+
+char WriteBitcodePass::ID = 0;
+
+ModulePass *llvm::createNaClBitcodeWriterPass(raw_ostream &Str) {
+  return new WriteBitcodePass(Str);
+}
diff --git a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
new file mode 100644
index 000000000000..6a2af3eaccb0
--- /dev/null
+++ b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.cpp
@@ -0,0 +1,506 @@
+//===-- NaClValueEnumerator.cpp ------------------------------------------===//
+//     Number values and types for bitcode writer
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the NaClValueEnumerator class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "NaClValueEnumerator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/ValueSymbolTable.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include <algorithm>
+#include <set>
+
+using namespace llvm;
+
+static bool isIntOrIntVectorValue(const std::pair<const Value*, unsigned> &V) {
+  return V.first->getType()->isIntOrIntVectorTy();
+}
+
+/// NaClValueEnumerator - Enumerate module-level information.
+NaClValueEnumerator::NaClValueEnumerator(const Module *M, uint32_t PNaClVersion)
+    : PNaClVersion(PNaClVersion) {
+  // Create map for counting frequency of types, and set field
+  // TypeCountMap accordingly.  Note: Pointer field TypeCountMap is
+  // used to deal with the fact that types are added through various
+  // method calls in this routine. Rather than pass it as an argument,
+  // we use a field. The field is a pointer so that the memory
+  // footprint of count_map can be garbage collected when this
+  // constructor completes.
+  TypeCountMapType count_map;
+  TypeCountMap = &count_map;
+
+  IntPtrType = IntegerType::get(M->getContext(), PNaClIntPtrTypeBitSize);
+
+  // Enumerate the functions. Note: We do this before global
+  // variables, so that global variable initializations can refer to
+  // the functions without a forward reference.
+  for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) {
+    EnumerateValue(I);
+  }
+
+  // Enumerate the global variables.
+  FirstGlobalVarID = Values.size();
+  for (Module::const_global_iterator I = M->global_begin(),
+         E = M->global_end(); I != E; ++I)
+    EnumerateValue(I);
+  NumGlobalVarIDs = Values.size() - FirstGlobalVarID;
+
+  // Enumerate the aliases.
+  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I)
+    EnumerateValue(I);
+
+  // Remember what is the cutoff between globalvalue's and other constants.
+  unsigned FirstConstant = Values.size();
+
+  // Skip global variable initializers since they are handled within
+  // WriteGlobalVars of file NaClBitcodeWriter.cpp.
+
+  // Enumerate the aliasees.
+  for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end();
+       I != E; ++I)
+    EnumerateValue(I->getAliasee());
+
+  // Insert constants that are named at module level into the slot
+  // pool so that the module symbol table can refer to them...
+  EnumerateValueSymbolTable(M->getValueSymbolTable());
+
+  // Enumerate types used by function bodies and argument lists.
+  for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) {
+
+    for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end();
+         I != E; ++I)
+      EnumerateType(I->getType());
+
+    for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
+      for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E;++I){
+        // Don't generate types for elided pointer casts!
+        if (IsElidedCast(I))
+          continue;
+
+        if (const SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+          // Handle switch instruction specially, so that we don't
+          // write out unnecessary vector/array types used to model case
+          // selectors.
+          EnumerateOperandType(SI->getCondition());
+        } else {
+          for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
+               OI != E; ++OI) {
+            EnumerateOperandType(*OI);
+          }
+        }
+        EnumerateType(I->getType());
+      }
+  }
+
+  // Optimized type indicies to put "common" expected types in with small
+  // indices.
+  OptimizeTypes(M);
+  TypeCountMap = NULL;
+
+  // Optimize constant ordering.
+  OptimizeConstants(FirstConstant, Values.size());
+}
+
+void NaClValueEnumerator::OptimizeTypes(const Module *M) {
+
+  // Sort types by count, so that we can index them based on
+  // frequency. Use indices of built TypeMap, so that order of
+  // construction is repeatable.
+  std::set<unsigned> type_counts;
+  typedef std::set<unsigned> TypeSetType;
+  std::map<unsigned, TypeSetType> usage_count_map;
+  TypeList IdType(Types);
+
+  for (TypeCountMapType::iterator iter = TypeCountMap->begin();
+       iter != TypeCountMap->end(); ++ iter) {
+    type_counts.insert(iter->second);
+    usage_count_map[iter->second].insert(TypeMap[iter->first]-1);
+  }
+
+  // Reset type tracking maps, so that we can re-enter based
+  // on fequency ordering.
+  TypeCountMap = NULL;
+  Types.clear();
+  TypeMap.clear();
+
+  // Reinsert types, based on frequency.
+  for (std::set<unsigned>::reverse_iterator count_iter = type_counts.rbegin();
+       count_iter != type_counts.rend(); ++count_iter) {
+    TypeSetType& count_types = usage_count_map[*count_iter];
+    for (TypeSetType::iterator type_iter = count_types.begin();
+         type_iter != count_types.end(); ++type_iter)
+      EnumerateType((IdType[*type_iter]), true);
+  }
+}
+
+unsigned NaClValueEnumerator::getInstructionID(const Instruction *Inst) const {
+  InstructionMapType::const_iterator I = InstructionMap.find(Inst);
+  assert(I != InstructionMap.end() && "Instruction is not mapped!");
+  return I->second;
+}
+
+void NaClValueEnumerator::setInstructionID(const Instruction *I) {
+  InstructionMap[I] = InstructionCount++;
+}
+
+unsigned NaClValueEnumerator::getValueID(const Value *V) const {
+  ValueMapType::const_iterator I = ValueMap.find(V);
+  assert(I != ValueMap.end() && "Value not in slotcalculator!");
+  return I->second-1;
+}
+
+void NaClValueEnumerator::dump() const {
+  print(dbgs(), ValueMap, "Default");
+  dbgs() << '\n';
+}
+
+void NaClValueEnumerator::print(raw_ostream &OS, const ValueMapType &Map,
+                            const char *Name) const {
+
+  OS << "Map Name: " << Name << "\n";
+  OS << "Size: " << Map.size() << "\n";
+  for (ValueMapType::const_iterator I = Map.begin(),
+         E = Map.end(); I != E; ++I) {
+
+    const Value *V = I->first;
+    if (V->hasName())
+      OS << "Value: " << V->getName();
+    else
+      OS << "Value: [null]\n";
+    V->dump();
+
+    OS << " Uses(" << std::distance(V->use_begin(),V->use_end()) << "):";
+    for (Value::const_use_iterator UI = V->use_begin(), UE = V->use_end();
+         UI != UE; ++UI) {
+      if (UI != V->use_begin())
+        OS << ",";
+      if((*UI)->hasName())
+        OS << " " << (*UI)->getName();
+      else
+        OS << " [null]";
+
+    }
+    OS <<  "\n\n";
+  }
+}
+
+// Optimize constant ordering.
+namespace {
+  struct CstSortPredicate {
+    NaClValueEnumerator &VE;
+    explicit CstSortPredicate(NaClValueEnumerator &ve) : VE(ve) {}
+    bool operator()(const std::pair<const Value*, unsigned> &LHS,
+                    const std::pair<const Value*, unsigned> &RHS) {
+      // Sort by plane.
+      if (LHS.first->getType() != RHS.first->getType())
+        return VE.getTypeID(LHS.first->getType()) <
+               VE.getTypeID(RHS.first->getType());
+      // Then by frequency.
+      return LHS.second > RHS.second;
+    }
+  };
+}
+
+/// OptimizeConstants - Reorder constant pool for denser encoding.
+void NaClValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) {
+  if (CstStart == CstEnd || CstStart+1 == CstEnd) return;
+
+  CstSortPredicate P(*this);
+  std::stable_sort(Values.begin()+CstStart, Values.begin()+CstEnd, P);
+
+  // Ensure that integer and vector of integer constants are at the start of the
+  // constant pool.  This is important so that GEP structure indices come before
+  // gep constant exprs.
+  std::partition(Values.begin()+CstStart, Values.begin()+CstEnd,
+                 isIntOrIntVectorValue);
+
+  // Rebuild the modified portion of ValueMap.
+  for (; CstStart != CstEnd; ++CstStart)
+    ValueMap[Values[CstStart].first] = CstStart+1;
+}
+
+
+/// EnumerateValueSymbolTable - Insert all of the values in the specified symbol
+/// table into the values table.
+void NaClValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable &VST) {
+  for (ValueSymbolTable::const_iterator VI = VST.begin(), VE = VST.end();
+       VI != VE; ++VI)
+    EnumerateValue(VI->getValue());
+}
+
+void NaClValueEnumerator::EnumerateValue(const Value *VIn) {
+  // Skip over elided values.
+  const Value *V = ElideCasts(VIn);
+  if (V != VIn) return;
+
+  assert(!V->getType()->isVoidTy() && "Can't insert void values!");
+  assert(!isa<MDNode>(V) && !isa<MDString>(V) &&
+         "EnumerateValue doesn't handle Metadata!");
+
+  // Check to see if it's already in!
+  unsigned &ValueID = ValueMap[V];
+  if (ValueID) {
+    // Increment use count.
+    Values[ValueID-1].second++;
+    return;
+  }
+
+  // Enumerate the type of this value. Skip global values since no
+  // types are dumped for global variables.
+  if (!isa<GlobalVariable>(V))
+    EnumerateType(V->getType());
+
+  if (const Constant *C = dyn_cast<Constant>(V)) {
+    if (isa<GlobalValue>(C)) {
+      // Initializers for globals are handled explicitly elsewhere.
+    } else if (C->getNumOperands()) {
+      // If a constant has operands, enumerate them.  This makes sure that if a
+      // constant has uses (for example an array of const ints), that they are
+      // inserted also.
+
+      // We prefer to enumerate them with values before we enumerate the user
+      // itself.  This makes it more likely that we can avoid forward references
+      // in the reader.  We know that there can be no cycles in the constants
+      // graph that don't go through a global variable.
+      for (User::const_op_iterator I = C->op_begin(), E = C->op_end();
+           I != E; ++I)
+        if (!isa<BasicBlock>(*I)) // Don't enumerate BB operand to BlockAddress.
+          EnumerateValue(*I);
+
+      // Finally, add the value.  Doing this could make the ValueID reference be
+      // dangling, don't reuse it.
+      Values.push_back(std::make_pair(V, 1U));
+      ValueMap[V] = Values.size();
+      return;
+    }
+  }
+
+  // Add the value.
+  Values.push_back(std::make_pair(V, 1U));
+  ValueID = Values.size();
+}
+
+
+Type *NaClValueEnumerator::NormalizeType(Type *Ty) const {
+  if (Ty->isPointerTy())
+    return IntPtrType;
+  if (FunctionType *FTy = dyn_cast<FunctionType>(Ty)) {
+    SmallVector<Type *, 8> ArgTypes;
+    for (unsigned I = 0, E = FTy->getNumParams(); I < E; ++I)
+      ArgTypes.push_back(NormalizeType(FTy->getParamType(I)));
+    return FunctionType::get(NormalizeType(FTy->getReturnType()),
+                             ArgTypes, false);
+  }
+  return Ty;
+}
+
+void NaClValueEnumerator::EnumerateType(Type *Ty, bool InsideOptimizeTypes) {
+  // Pointer types do not need to be given type IDs.
+  if (Ty->isPointerTy())
+    Ty = Ty->getPointerElementType();
+
+  Ty = NormalizeType(Ty);
+
+  // The label type does not need to be given a type ID.
+  if (Ty->isLabelTy())
+    return;
+
+  // This function is used to enumerate types referenced by the given
+  // module. This function is called in two phases, based on the value
+  // of TypeCountMap. These phases are:
+  //
+  // (1) In this phase, InsideOptimizeTypes=false. We are collecting types
+  // and all corresponding (implicitly) referenced types. In addition,
+  // we are keeping track of the number of references to each type in
+  // TypeCountMap. These reference counts will be used by method
+  // OptimizeTypes to associate the smallest type ID's with the most
+  // referenced types.
+  //
+  // (2) In this phase, InsideOptimizeTypes=true. We are registering types
+  // based on frequency. To minimize type IDs for frequently used
+  // types, (unlike the other context) we are inserting the minimal
+  // (implicitly) referenced types needed for each type.
+  unsigned *TypeID = &TypeMap[Ty];
+
+  if (TypeCountMap) ++((*TypeCountMap)[Ty]);
+
+  // We've already seen this type.
+  if (*TypeID)
+    return;
+
+  // If it is a non-anonymous struct, mark the type as being visited so that we
+  // don't recursively visit it.  This is safe because we allow forward
+  // references of these in the bitcode reader.
+  if (StructType *STy = dyn_cast<StructType>(Ty))
+    if (!STy->isLiteral())
+      *TypeID = ~0U;
+
+  // If in the second phase (i.e. inside optimize types), don't expand
+  // pointers to structures, since we can just generate a forward
+  // reference to it. This way, we don't use up unnecessary (small) ID
+  // values just to define the pointer.
+  bool EnumerateSubtypes = true;
+  if (InsideOptimizeTypes)
+    if (PointerType *PTy = dyn_cast<PointerType>(Ty))
+      if (StructType *STy = dyn_cast<StructType>(PTy->getElementType()))
+        if (!STy->isLiteral())
+          EnumerateSubtypes = false;
+
+  // Enumerate all of the subtypes before we enumerate this type.  This ensures
+  // that the type will be enumerated in an order that can be directly built.
+  if (EnumerateSubtypes) {
+    for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end();
+         I != E; ++I)
+      EnumerateType(*I, InsideOptimizeTypes);
+  }
+
+  // Refresh the TypeID pointer in case the table rehashed.
+  TypeID = &TypeMap[Ty];
+
+  // Check to see if we got the pointer another way.  This can happen when
+  // enumerating recursive types that hit the base case deeper than they start.
+  //
+  // If this is actually a struct that we are treating as forward ref'able,
+  // then emit the definition now that all of its contents are available.
+  if (*TypeID && *TypeID != ~0U)
+    return;
+
+  // Add this type now that its contents are all happily enumerated.
+  Types.push_back(Ty);
+
+  *TypeID = Types.size();
+}
+
+// Enumerate the types for the specified value.  If the value is a constant,
+// walk through it, enumerating the types of the constant.
+void NaClValueEnumerator::EnumerateOperandType(const Value *V) {
+  // Note: We intentionally don't create a type id for global variables,
+  // since the type is automatically generated by the reader before any
+  // use of the global variable.
+  if (isa<GlobalVariable>(V)) return;
+
+  EnumerateType(V->getType());
+
+  if (const Constant *C = dyn_cast<Constant>(V)) {
+    // If this constant is already enumerated, ignore it, we know its type must
+    // be enumerated.
+    if (ValueMap.count(V)) return;
+
+    // This constant may have operands, make sure to enumerate the types in
+    // them.
+    for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
+      const Value *Op = C->getOperand(i);
+
+      // Don't enumerate basic blocks here, this happens as operands to
+      // blockaddress.
+      if (isa<BasicBlock>(Op)) continue;
+
+      EnumerateOperandType(Op);
+    }
+  }
+}
+
+void NaClValueEnumerator::incorporateFunction(const Function &F) {
+  InstructionCount = 0;
+  NumModuleValues = Values.size();
+
+  // Make sure no insertions outside of a function.
+  assert(FnForwardTypeRefs.empty());
+
+  // Adding function arguments to the value table.
+  for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end();
+       I != E; ++I)
+    EnumerateValue(I);
+
+  FirstFuncConstantID = Values.size();
+
+  // Add all function-level constants to the value table.
+  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
+      if (const SwitchInst *SI = dyn_cast<SwitchInst>(I)) {
+        // Handle switch instruction specially, so that we don't write
+        // out unnecessary vector/array constants used to model case selectors.
+        if (isa<Constant>(SI->getCondition())) {
+          EnumerateValue(SI->getCondition());
+        }
+      } else {
+        for (User::const_op_iterator OI = I->op_begin(), E = I->op_end();
+             OI != E; ++OI) {
+          if ((isa<Constant>(*OI) && !isa<GlobalValue>(*OI)) ||
+              isa<InlineAsm>(*OI))
+            EnumerateValue(*OI);
+        }
+      }
+    }
+    BasicBlocks.push_back(BB);
+    ValueMap[BB] = BasicBlocks.size();
+  }
+
+  // Optimize the constant layout.
+  OptimizeConstants(FirstFuncConstantID, Values.size());
+
+  FirstInstID = Values.size();
+
+  // Add all of the instructions.
+  for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) {
+    for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I!=E; ++I) {
+      if (!I->getType()->isVoidTy())
+        EnumerateValue(I);
+    }
+  }
+}
+
+void NaClValueEnumerator::purgeFunction() {
+  /// Remove purged values from the ValueMap.
+  for (unsigned i = NumModuleValues, e = Values.size(); i != e; ++i)
+    ValueMap.erase(Values[i].first);
+  for (unsigned i = 0, e = BasicBlocks.size(); i != e; ++i)
+    ValueMap.erase(BasicBlocks[i]);
+
+  Values.resize(NumModuleValues);
+  BasicBlocks.clear();
+  FnForwardTypeRefs.clear();
+}
+
+// The normal form required by the PNaCl ABI verifier (documented in
+// ReplacePtrsWithInts.cpp) allows us to omit the following pointer
+// casts from the bitcode file.
+const Value *NaClValueEnumerator::ElideCasts(const Value *V) {
+  if (const Instruction *I = dyn_cast<Instruction>(V)) {
+    switch (I->getOpcode()) {
+    default:
+      break;
+    case Instruction::BitCast:
+      if (I->getType()->isPointerTy()) {
+        V = I->getOperand(0);
+      }
+      break;
+    case Instruction::IntToPtr:
+      V = ElideCasts(I->getOperand(0));
+      break;
+    case Instruction::PtrToInt:
+      if (IsIntPtrType(I->getType())) {
+        V = I->getOperand(0);
+      }
+      break;
+    }
+  }
+  return V;
+}
diff --git a/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h
new file mode 100644
index 000000000000..cc43c7a1e234
--- /dev/null
+++ b/lib/Bitcode/NaCl/Writer/NaClValueEnumerator.h
@@ -0,0 +1,172 @@
+//===-- Bitcode/NaCl/Writer/NaClValueEnumerator.h - ----------*- C++ -*-===//
+//      Number values.
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This class gives values and types Unique ID's.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef NACL_VALUE_ENUMERATOR_H
+#define NACL_VALUE_ENUMERATOR_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
+#include <vector>
+
+namespace llvm {
+
+class Type;
+class Value;
+class Instruction;
+class BasicBlock;
+class Function;
+class Module;
+class ValueSymbolTable;
+class raw_ostream;
+
+class NaClValueEnumerator {
+public:
+  typedef std::vector<Type*> TypeList;
+
+  // For each value, we remember its Value* and occurrence frequency.
+  typedef std::vector<std::pair<const Value*, unsigned> > ValueList;
+private:
+  // Defines unique ID's for each type.
+  typedef DenseMap<Type*, unsigned> TypeMapType;
+  TypeMapType TypeMap;
+  // Defines the number of references to each type. If defined,
+  // we are in the first pass of collecting types, and reference counts
+  // should be added to the map. If undefined, we are in the second pass
+  // that actually assigns type IDs, based on frequency counts found in
+  // the first pass.
+  typedef TypeMapType TypeCountMapType;
+  TypeCountMapType* TypeCountMap;
+
+  TypeList Types;
+
+  typedef DenseMap<const Value*, unsigned> ValueMapType;
+  ValueMapType ValueMap;
+  ValueList Values;
+
+  typedef DenseMap<const Instruction*, unsigned> InstructionMapType;
+  InstructionMapType InstructionMap;
+  unsigned InstructionCount;
+
+  /// BasicBlocks - This contains all the basic blocks for the currently
+  /// incorporated function.  Their reverse mapping is stored in ValueMap.
+  std::vector<const BasicBlock*> BasicBlocks;
+
+  /// When a function is incorporated, this is the size of the Values list
+  /// before incorporation.
+  unsigned NumModuleValues;
+
+  unsigned FirstFuncConstantID;
+  unsigned FirstInstID;
+
+  /// Holds values that have been forward referenced within a function.
+  /// Used to make sure we don't generate more forward reference declarations
+  /// than necessary.
+  SmallSet<unsigned, 32> FnForwardTypeRefs;
+
+  // The index of the first global variable ID in the bitcode file.
+  unsigned FirstGlobalVarID;
+  // The number of global variable IDs defined in the bitcode file.
+  unsigned NumGlobalVarIDs;
+
+  // The version of PNaCl bitcode to generate.
+  uint32_t PNaClVersion;
+
+  /// \brief Integer type use for PNaCl conversion of pointers.
+  Type *IntPtrType;
+
+  NaClValueEnumerator(const NaClValueEnumerator &) LLVM_DELETED_FUNCTION;
+  void operator=(const NaClValueEnumerator &) LLVM_DELETED_FUNCTION;
+public:
+  NaClValueEnumerator(const Module *M, uint32_t PNaClVersion);
+
+  void dump() const;
+  void print(raw_ostream &OS, const ValueMapType &Map, const char *Name) const;
+
+  unsigned getFirstGlobalVarID() const {
+    return FirstGlobalVarID;
+  }
+
+  unsigned getNumGlobalVarIDs() const {
+    return NumGlobalVarIDs;
+  }
+
+  unsigned getValueID(const Value *V) const;
+
+  unsigned getTypeID(Type *T) const {
+    TypeMapType::const_iterator I = TypeMap.find(NormalizeType(T));
+    assert(I != TypeMap.end() && "Type not in NaClValueEnumerator!");
+    return I->second-1;
+  }
+
+  unsigned getInstructionID(const Instruction *I) const;
+  void setInstructionID(const Instruction *I);
+
+  /// getFunctionConstantRange - Return the range of values that corresponds to
+  /// function-local constants.
+  void getFunctionConstantRange(unsigned &Start, unsigned &End) const {
+    Start = FirstFuncConstantID;
+    End = FirstInstID;
+  }
+
+  /// \brief Inserts the give value into the set of known function forward
+  /// value type refs. Returns true if the value id is added to the set.
+  bool InsertFnForwardTypeRef(unsigned ValID) {
+    return FnForwardTypeRefs.insert(ValID);
+  }
+
+  const ValueList &getValues() const { return Values; }
+  const TypeList &getTypes() const { return Types; }
+  const std::vector<const BasicBlock*> &getBasicBlocks() const {
+    return BasicBlocks;
+  }
+
+  /// incorporateFunction/purgeFunction - If you'd like to deal with a function,
+  /// use these two methods to get its data into the NaClValueEnumerator!
+  ///
+  void incorporateFunction(const Function &F);
+  void purgeFunction();
+
+  /// \brief Returns the value after elided (cast) operations have been
+  /// removed. Returns V if unable to elide the cast.
+  const Value *ElideCasts(const Value *V);
+
+  /// \brief Returns true if value V is an elided (cast) operation.
+  bool IsElidedCast(const Value *V) {
+    return V != ElideCasts(V);
+  }
+
+  /// \brief Returns true if the type of V is the integer used to
+  /// model pointers in PNaCl.
+  bool IsIntPtrType(Type *T) const {
+    return T == IntPtrType;
+  }
+
+  Type *NormalizeType(Type *Ty) const;
+
+private:
+  void OptimizeTypes(const Module *M);
+  void OptimizeConstants(unsigned CstStart, unsigned CstEnd);
+
+  void EnumerateValue(const Value *V);
+  void EnumerateType(Type *T, bool InsideOptimizeTypes=false);
+  void EnumerateOperandType(const Value *V);
+
+  void EnumerateValueSymbolTable(const ValueSymbolTable &ST);
+};
+
+} // End llvm namespace
+
+#endif
diff --git a/lib/IR/CMakeLists.txt b/lib/IR/CMakeLists.txt
index 1ee7140ae42e..f2278d4ac733 100644
--- a/lib/IR/CMakeLists.txt
+++ b/lib/IR/CMakeLists.txt
@@ -30,6 +30,7 @@ add_llvm_library(LLVMCore
   Mangler.cpp
   Metadata.cpp
   Module.cpp
+  NaClAtomicIntrinsics.cpp
   Pass.cpp
   PassManager.cpp
   PassRegistry.cpp
diff --git a/lib/IR/InlineAsm.cpp b/lib/IR/InlineAsm.cpp
index 9f2a9fea4b93..121fe1528ebd 100644
--- a/lib/IR/InlineAsm.cpp
+++ b/lib/IR/InlineAsm.cpp
@@ -293,3 +293,18 @@ bool InlineAsm::Verify(FunctionType *Ty, StringRef ConstStr) {
   return true;
 }
 
+// @LOCALMOD-START
+bool InlineAsm::isAsmMemory() const {
+  bool retVoid = getFunctionType()->getReturnType()->isVoidTy();
+  bool noArgs = getFunctionType()->getNumParams() == 0 &&
+      !getFunctionType()->isVarArg();
+  bool isEmptyAsm = AsmString.empty();
+  // Different triples will encode "touch everything" differently, e.g.:
+  //  - le32-unknown-nacl has "~{memory}".
+  //  - x86 "~{memory},~{dirflag},~{fpsr},~{flags}".
+  // The following code therefore only searches for memory.
+  bool touchesMemory = Constraints.find("~{memory}") != std::string::npos;
+
+  return retVoid && noArgs && hasSideEffects() && isEmptyAsm && touchesMemory;
+}
+// @LOCALMOD-END
diff --git a/lib/IR/NaClAtomicIntrinsics.cpp b/lib/IR/NaClAtomicIntrinsics.cpp
new file mode 100644
index 000000000000..02fe295fad11
--- /dev/null
+++ b/lib/IR/NaClAtomicIntrinsics.cpp
@@ -0,0 +1,85 @@
+//=== llvm/IR/NaClAtomicIntrinsics.cpp - NaCl Atomic Intrinsics -*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file describes atomic intrinsic functions that are specific to NaCl.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/NaClAtomicIntrinsics.h"
+#include "llvm/IR/Type.h"
+
+namespace llvm {
+
+namespace NaCl {
+
+AtomicIntrinsics::AtomicIntrinsics(LLVMContext &C) {
+  Type *IT[NumAtomicIntrinsicOverloadTypes] = { Type::getInt8Ty(C),
+                                                Type::getInt16Ty(C),
+                                                Type::getInt32Ty(C),
+                                                Type::getInt64Ty(C) };
+  size_t CurIntrin = 0;
+
+  // Initialize each of the atomic intrinsics and their overloads. They
+  // have up to 5 parameters, the following macro will take care of
+  // overloading.
+#define INIT(P0, P1, P2, P3, P4, INTRIN)                                       \
+  do {                                                                         \
+    for (size_t CurType = 0; CurType != NumAtomicIntrinsicOverloadTypes;       \
+         ++CurType) {                                                          \
+      size_t Param = 0;                                                        \
+      I[CurIntrin][CurType].OverloadedType = IT[CurType];                      \
+      I[CurIntrin][CurType].ID = Intrinsic::nacl_atomic_##INTRIN;              \
+      I[CurIntrin][CurType].Overloaded =                                       \
+          P0 == Int || P0 == Ptr || P1 == Int || P1 == Ptr || P2 == Int ||     \
+          P2 == Ptr || P3 == Int || P3 == Ptr || P4 == Int || P4 == Ptr;       \
+      I[CurIntrin][CurType].NumParams =                                        \
+          (P0 != NoP) + (P1 != NoP) + (P2 != NoP) + (P3 != NoP) + (P4 != NoP); \
+      I[CurIntrin][CurType].ParamType[Param++] = P0;                           \
+      I[CurIntrin][CurType].ParamType[Param++] = P1;                           \
+      I[CurIntrin][CurType].ParamType[Param++] = P2;                           \
+      I[CurIntrin][CurType].ParamType[Param++] = P3;                           \
+      I[CurIntrin][CurType].ParamType[Param++] = P4;                           \
+    }                                                                          \
+    ++CurIntrin;                                                               \
+  } while (0)
+
+  INIT(Ptr, Mem, NoP, NoP, NoP, load);
+  INIT(Ptr, Int, Mem, NoP, NoP, store);
+  INIT(RMW, Ptr, Int, Mem, NoP, rmw);
+  INIT(Ptr, Int, Int, Mem, Mem, cmpxchg);
+  INIT(Mem, NoP, NoP, NoP, NoP, fence);
+  INIT(NoP, NoP, NoP, NoP, NoP, fence_all);
+}
+
+AtomicIntrinsics::View AtomicIntrinsics::allIntrinsicsAndOverloads() const {
+  return View(&I[0][0], NumAtomicIntrinsics * NumAtomicIntrinsicOverloadTypes);
+}
+
+AtomicIntrinsics::View AtomicIntrinsics::overloadsFor(Intrinsic::ID ID) const {
+  // Overloads are stored consecutively.
+  View R = allIntrinsicsAndOverloads();
+  for (const AtomicIntrinsic *AI = R.begin(), *E = R.end(); AI != E; ++AI)
+    if (AI->ID == ID)
+      return View(AI, NumAtomicIntrinsicOverloadTypes);
+  llvm_unreachable("unhandled atomic intrinsic");
+}
+
+const AtomicIntrinsics::AtomicIntrinsic *
+AtomicIntrinsics::find(Intrinsic::ID ID, Type *OverloadedType) const {
+  View R = allIntrinsicsAndOverloads();
+  for (const AtomicIntrinsic *AI = R.begin(), *E = R.end(); AI != E; ++AI)
+    if (AI->ID == ID && AI->OverloadedType == OverloadedType)
+      return AI;
+  llvm_unreachable("unhandled atomic intrinsic");
+}
+
+} // End NaCl namespace
+
+} // End llvm namespace
diff --git a/lib/IRReader/IRReader.cpp b/lib/IRReader/IRReader.cpp
index 70801c43f1a3..7615d5abad90 100644
--- a/lib/IRReader/IRReader.cpp
+++ b/lib/IRReader/IRReader.cpp
@@ -13,6 +13,7 @@
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -32,27 +33,44 @@ static const char *const TimeIRParsingName = "Parse IR";
 
 
 Module *llvm::getLazyIRModule(MemoryBuffer *Buffer, SMDiagnostic &Err,
-                              LLVMContext &Context) {
-  if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
-                (const unsigned char *)Buffer->getBufferEnd())) {
+                              LLVMContext &Context, FileFormat Format) {
+  if (Format == LLVMFormat) {
+    if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
+                  (const unsigned char *)Buffer->getBufferEnd())) {
+      std::string ErrMsg;
+      ErrorOr<Module *> ModuleOrErr = getLazyBitcodeModule(Buffer, Context);
+      if (error_code EC = ModuleOrErr.getError()) {
+        Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
+                           EC.message());
+        // ParseBitcodeFile does not take ownership of the Buffer in the
+        // case of an error.
+        delete Buffer;
+        return NULL;
+      }
+      return ModuleOrErr.get();
+    }
+
+    return ParseAssembly(Buffer, 0, Err, Context);
+  } else if ((Format == PNaClFormat) &&
+      isNaClBitcode((const unsigned char *)Buffer->getBufferStart(),
+                    (const unsigned char *)Buffer->getBufferEnd())) {
     std::string ErrMsg;
-    ErrorOr<Module *> ModuleOrErr = getLazyBitcodeModule(Buffer, Context);
-    if (error_code EC = ModuleOrErr.getError()) {
+    Module *M = getNaClLazyBitcodeModule(Buffer, Context, &ErrMsg);
+    if (M == 0)
       Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
-                         EC.message());
-      // ParseBitcodeFile does not take ownership of the Buffer in the
-      // case of an error.
-      delete Buffer;
-      return NULL;
-    }
-    return ModuleOrErr.get();
+                         ErrMsg);
+    // NaClParseBitcodeFile does not take ownership of the Buffer.
+    delete Buffer;
+    return M;
+  } else {
+    Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
+                       "Did not specify correct format for file");
+    return 0;
   }
-
-  return ParseAssembly(Buffer, 0, Err, Context);
 }
 
 Module *llvm::getLazyIRFileModule(const std::string &Filename, SMDiagnostic &Err,
-                                  LLVMContext &Context) {
+                                  LLVMContext &Context, FileFormat Format) {
   OwningPtr<MemoryBuffer> File;
   if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, File)) {
     Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
@@ -60,32 +78,49 @@ Module *llvm::getLazyIRFileModule(const std::string &Filename, SMDiagnostic &Err
     return 0;
   }
 
-  return getLazyIRModule(File.take(), Err, Context);
+  return getLazyIRModule(File.take(), Err, Context, Format);
 }
 
 Module *llvm::ParseIR(MemoryBuffer *Buffer, SMDiagnostic &Err,
-                      LLVMContext &Context) {
+                      LLVMContext &Context, FileFormat Format) {
   NamedRegionTimer T(TimeIRParsingName, TimeIRParsingGroupName,
                      TimePassesIsEnabled);
-  if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
-                (const unsigned char *)Buffer->getBufferEnd())) {
-    ErrorOr<Module *> ModuleOrErr = parseBitcodeFile(Buffer, Context);
-    Module *M = 0;
-    if (error_code EC = ModuleOrErr.getError())
+  if (Format == LLVMFormat) {
+    if (isBitcode((const unsigned char *)Buffer->getBufferStart(),
+                  (const unsigned char *)Buffer->getBufferEnd())) {
+      ErrorOr<Module *> ModuleOrErr = parseBitcodeFile(Buffer, Context);
+      Module *M = 0;
+      if (error_code EC = ModuleOrErr.getError())
+        Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
+                           EC.message());
+      else
+        M = ModuleOrErr.get();
+      // parseBitcodeFile does not take ownership of the Buffer.
+      delete Buffer;
+      return M;
+    }
+
+    return ParseAssembly(Buffer, 0, Err, Context);
+  } else if ((Format == PNaClFormat) &&
+      isNaClBitcode((const unsigned char *)Buffer->getBufferStart(),
+                    (const unsigned char *)Buffer->getBufferEnd())) {
+    std::string ErrMsg;
+    Module *M = NaClParseBitcodeFile(Buffer, Context, &ErrMsg);
+    if (M == 0)
       Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
-                         EC.message());
-    else
-      M = ModuleOrErr.get();
-    // parseBitcodeFile does not take ownership of the Buffer.
+                         ErrMsg);
+    // NaClParseBitcodeFile does not take ownership of the Buffer.
     delete Buffer;
     return M;
+  } else {
+    Err = SMDiagnostic(Buffer->getBufferIdentifier(), SourceMgr::DK_Error,
+                       "Did not specify correct format for file");
+    return 0;
   }
-
-  return ParseAssembly(Buffer, 0, Err, Context);
 }
 
 Module *llvm::ParseIRFile(const std::string &Filename, SMDiagnostic &Err,
-                          LLVMContext &Context) {
+                          LLVMContext &Context, FileFormat Format) {
   OwningPtr<MemoryBuffer> File;
   if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, File)) {
     Err = SMDiagnostic(Filename, SourceMgr::DK_Error,
@@ -93,7 +128,7 @@ Module *llvm::ParseIRFile(const std::string &Filename, SMDiagnostic &Err,
     return 0;
   }
 
-  return ParseIR(File.take(), Err, Context);
+  return ParseIR(File.take(), Err, Context, Format);
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/lib/IRReader/LLVMBuild.txt b/lib/IRReader/LLVMBuild.txt
index b7bc74d61649..a1f37d3f7cb8 100644
--- a/lib/IRReader/LLVMBuild.txt
+++ b/lib/IRReader/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Library
 name = IRReader
 parent = Libraries
-required_libraries = AsmParser BitReader Core Support
+required_libraries = AsmParser BitReader NaClBitReader Core Support
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index df9d9c984264..ef32d00820d5 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -21,6 +21,7 @@
 #include "llvm/IR/TypeFinder.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/Path.h"           // @LOCALMOD
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 #include <cctype>
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 2bfde5fbe486..a1f81bbf3e38 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -1661,6 +1661,41 @@ multiclass AI_str1nopc<bit isByte, string opc, InstrItinClass iii,
 // Instructions
 //===----------------------------------------------------------------------===//
 
+// @LOCALMOD-START
+
+def SFI_GUARD_LOADSTORE :
+PseudoInst<(outs GPR:$dst), (ins GPR:$a, pred:$p), NoItinerary, []>;
+
+let Defs = [CPSR] in
+def SFI_GUARD_LOADSTORE_TST :
+PseudoInst<(outs), (ins GPR:$a), NoItinerary, []>;
+
+// Like SFI_GUARD_LOADSTORE, but reserved for loads into SP.
+def SFI_GUARD_SP_LOAD :
+PseudoInst<(outs GPR:$dst), (ins GPR:$src, pred:$p), NoItinerary, []>;
+
+def SFI_GUARD_INDIRECT_CALL :
+PseudoInst<(outs GPR:$dst), (ins GPR:$a, pred:$p), NoItinerary, []>;
+
+def SFI_GUARD_INDIRECT_JMP :
+PseudoInst<(outs GPR:$dst), (ins GPR:$a, pred:$p), NoItinerary, []>;
+
+def SFI_GUARD_CALL :
+PseudoInst<(outs), (ins pred:$p), NoItinerary, []>;
+
+// NOTE: the BX_RET instruction hardcodes lr as well
+def SFI_GUARD_RETURN :
+PseudoInst<(outs), (ins pred:$p), NoItinerary, []>;
+
+def SFI_NOP_IF_AT_BUNDLE_END :
+PseudoInst<(outs), (ins), NoItinerary, []>;
+
+// Note: intention is that $src and $dst are the same register.
+def SFI_DATA_MASK :
+PseudoInst<(outs GPR:$dst), (ins GPR:$src, pred:$p), NoItinerary, []>;
+
+// @LOCALMOD-END
+
 //===----------------------------------------------------------------------===//
 //  Miscellaneous Instructions.
 //
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index a240d1916f49..f13ec0eb7bdb 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -27,7 +27,8 @@
 
 using namespace llvm;
 
-static cl::opt<bool>
+// @LOCALMOD: remove 'static' to make this flag visible elsewhere
+cl::opt<bool>
 ReserveR9("arm-reserve-r9", cl::Hidden,
           cl::desc("Reserve R9, making it unavailable as GPR"));
 
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 85656d809147..4edb089fb917 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1909,9 +1909,9 @@ X86TargetLowering::LowerReturn(SDValue Chain,
            "SRetReturnReg should have been set in LowerFormalArguments().");
     SDValue Val = DAG.getCopyFromReg(Chain, dl, Reg, getPointerTy());
 
-    unsigned RetValReg
-        = (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
-          X86::RAX : X86::EAX;
+    unsigned RetValReg =
+      (Subtarget->is64Bit() && !Subtarget->isTarget64BitILP32()) ?
+      X86::RAX : X86::EAX;
     Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
     Flag = Chain.getValue(1);
 
diff --git a/lib/Transforms/CMakeLists.txt b/lib/Transforms/CMakeLists.txt
index 2bb6e9059094..328bc13cdd08 100644
--- a/lib/Transforms/CMakeLists.txt
+++ b/lib/Transforms/CMakeLists.txt
@@ -6,3 +6,4 @@ add_subdirectory(IPO)
 add_subdirectory(Vectorize)
 add_subdirectory(Hello)
 add_subdirectory(ObjCARC)
+add_subdirectory(NaCl)
diff --git a/lib/Transforms/InstCombine/InstCombine.h b/lib/Transforms/InstCombine/InstCombine.h
index 4021f292d970..8a091a586cfe 100644
--- a/lib/Transforms/InstCombine/InstCombine.h
+++ b/lib/Transforms/InstCombine/InstCombine.h
@@ -86,6 +86,8 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
   bool MadeIRChange;
   LibCallSimplifier *Simplifier;
   bool MinimizeSize;
+  /// Are we allowed to add llvm.*.with.overflow.*?
+  bool NoOverflowSafeArithmetric;
 public:
   /// Worklist - All of the instructions that need to be simplified.
   InstCombineWorklist Worklist;
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 5dd3325a0bc8..41d37274eb9d 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -2193,12 +2193,12 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
     //
     // sum = a + b
     // if (sum+128 >u 255)  ...  -> llvm.sadd.with.overflow.i8
-    {
-    ConstantInt *CI2;    // I = icmp ugt (add (add A, B), CI2), CI
-    if (I.getPredicate() == ICmpInst::ICMP_UGT &&
-        match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
-      if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this))
-        return Res;
+    if (!NoOverflowSafeArithmetric) {
+      ConstantInt *CI2;    // I = icmp ugt (add (add A, B), CI2), CI
+      if (I.getPredicate() == ICmpInst::ICMP_UGT &&
+          match(Op0, m_Add(m_Add(m_Value(A), m_Value(B)), m_ConstantInt(CI2))))
+        if (Instruction *Res = ProcessUGT_ADDCST_ADD(I, A, B, CI2, CI, *this))
+          return Res;
     }
 
     // (icmp ne/eq (sub A B) 0) -> (icmp ne/eq A, B)
@@ -2863,21 +2863,23 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
         return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
     }
 
-    // (a+b) <u a  --> llvm.uadd.with.overflow.
-    // (a+b) <u b  --> llvm.uadd.with.overflow.
-    if (I.getPredicate() == ICmpInst::ICMP_ULT &&
-        match(Op0, m_Add(m_Value(A), m_Value(B))) &&
-        (Op1 == A || Op1 == B))
-      if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
-        return R;
+    if (!NoOverflowSafeArithmetric) {
+      // (a+b) <u a  --> llvm.uadd.with.overflow.
+      // (a+b) <u b  --> llvm.uadd.with.overflow.
+      if (I.getPredicate() == ICmpInst::ICMP_ULT &&
+          match(Op0, m_Add(m_Value(A), m_Value(B))) &&
+          (Op1 == A || Op1 == B))
+        if (Instruction *R = ProcessUAddIdiom(I, Op0, *this))
+          return R;
 
-    // a >u (a+b)  --> llvm.uadd.with.overflow.
-    // b >u (a+b)  --> llvm.uadd.with.overflow.
-    if (I.getPredicate() == ICmpInst::ICMP_UGT &&
-        match(Op1, m_Add(m_Value(A), m_Value(B))) &&
-        (Op0 == A || Op0 == B))
-      if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
-        return R;
+      // a >u (a+b)  --> llvm.uadd.with.overflow.
+      // b >u (a+b)  --> llvm.uadd.with.overflow.
+      if (I.getPredicate() == ICmpInst::ICMP_UGT &&
+          match(Op1, m_Add(m_Value(A), m_Value(B))) &&
+          (Op0 == A || Op0 == B))
+        if (Instruction *R = ProcessUAddIdiom(I, Op1, *this))
+          return R;
+    }
   }
 
   if (I.isEquality()) {
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 04c1499220c8..14711e7245e3 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -40,6 +40,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Triple.h"
 #include "llvm/Analysis/ConstantFolding.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/MemoryBuiltins.h"
@@ -2518,6 +2519,8 @@ bool InstCombiner::runOnFunction(Function &F) {
   MinimizeSize = F.getAttributes().hasAttribute(AttributeSet::FunctionIndex,
                                                 Attribute::MinSize);
 
+  NoOverflowSafeArithmetric = Triple(F.getParent()->getTargetTriple()).isOSNaCl();
+
   /// Builder - This is an IRBuilder that automatically inserts new
   /// instructions into the worklist when they are created.
   IRBuilder<true, TargetFolder, InstCombineIRInserter>
diff --git a/lib/Transforms/LLVMBuild.txt b/lib/Transforms/LLVMBuild.txt
index 15e9fba0a765..3594de54a257 100644
--- a/lib/Transforms/LLVMBuild.txt
+++ b/lib/Transforms/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = IPO InstCombine Instrumentation Scalar Utils Vectorize ObjCARC
+subdirectories = IPO InstCombine Instrumentation Scalar Utils Vectorize ObjCARC NaCl
 
 [component_0]
 type = Group
diff --git a/lib/Transforms/Makefile b/lib/Transforms/Makefile
index c390517d07cd..b4bb7cba723b 100644
--- a/lib/Transforms/Makefile
+++ b/lib/Transforms/Makefile
@@ -8,7 +8,7 @@
 ##===----------------------------------------------------------------------===##
 
 LEVEL = ../..
-PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Vectorize Hello ObjCARC
+PARALLEL_DIRS = Utils Instrumentation Scalar InstCombine IPO Vectorize Hello ObjCARC NaCl
 
 include $(LEVEL)/Makefile.config
 
diff --git a/lib/Transforms/NaCl/AddPNaClExternalDecls.cpp b/lib/Transforms/NaCl/AddPNaClExternalDecls.cpp
new file mode 100644
index 000000000000..f96db09b2f3c
--- /dev/null
+++ b/lib/Transforms/NaCl/AddPNaClExternalDecls.cpp
@@ -0,0 +1,71 @@
+//===- AddPNaClExternalDecls.cpp - Add decls for PNaCl external functions -===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass adds function declarations for external functions used by PNaCl.
+// These externals are implemented in native libraries and calls to them are
+// created as part of the translation process.
+//
+// Running this pass is a precondition for running ResolvePNaClIntrinsics. They
+// are separate because one is a ModulePass and the other is a FunctionPass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  // This is a module pass because it adds declarations to the module.
+  class AddPNaClExternalDecls : public ModulePass {
+  public:
+    static char ID;
+    AddPNaClExternalDecls() : ModulePass(ID) {
+      initializeAddPNaClExternalDeclsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+bool AddPNaClExternalDecls::runOnModule(Module &M) {
+  // Add declarations for a pre-defined set of external functions to the module.
+  // The function names must match the functions implemented in native code (in
+  // pnacl/support). The function types must match the types of the LLVM
+  // intrinsics.
+  // We expect these declarations not to exist in the module before this pass
+  // runs, but don't assert it; it will be handled by the ABI verifier.
+  LLVMContext &C = M.getContext();
+  M.getOrInsertFunction("setjmp",
+                        // return type
+                        Type::getInt32Ty(C),
+                        // arguments
+                        Type::getInt8Ty(C)->getPointerTo(),
+                        NULL);
+  M.getOrInsertFunction("longjmp",
+                        // return type
+                        Type::getVoidTy(C),
+                        // arguments
+                        Type::getInt8Ty(C)->getPointerTo(),
+                        Type::getInt32Ty(C),
+                        NULL);
+  return true;
+}
+
+char AddPNaClExternalDecls::ID = 0;
+INITIALIZE_PASS(AddPNaClExternalDecls, "add-pnacl-external-decls",
+                "Add declarations of external functions used by PNaCl",
+                false, false)
+
+ModulePass *llvm::createAddPNaClExternalDeclsPass() {
+  return new AddPNaClExternalDecls();
+}
diff --git a/lib/Transforms/NaCl/CMakeLists.txt b/lib/Transforms/NaCl/CMakeLists.txt
new file mode 100644
index 000000000000..f57558a1e747
--- /dev/null
+++ b/lib/Transforms/NaCl/CMakeLists.txt
@@ -0,0 +1,33 @@
+add_llvm_library(LLVMNaClTransforms
+  AddPNaClExternalDecls.cpp
+  CanonicalizeMemIntrinsics.cpp
+  ExceptionInfoWriter.cpp
+  ExpandArithWithOverflow.cpp
+  ExpandByVal.cpp
+  ExpandConstantExpr.cpp
+  ExpandCtors.cpp
+  ExpandGetElementPtr.cpp
+  ExpandSmallArguments.cpp
+  ExpandStructRegs.cpp
+  ExpandTls.cpp
+  ExpandTlsConstantExpr.cpp
+  ExpandUtils.cpp
+  ExpandVarArgs.cpp
+  FlattenGlobals.cpp
+  GlobalCleanup.cpp
+  InsertDivideCheck.cpp
+  PNaClABISimplify.cpp
+  PNaClSjLjEH.cpp
+  PromoteI1Ops.cpp
+  PromoteIntegers.cpp
+  RemoveAsmMemory.cpp
+  ReplacePtrsWithInts.cpp
+  ResolvePNaClIntrinsics.cpp
+  RewriteAtomics.cpp
+  RewriteLLVMIntrinsics.cpp
+  RewritePNaClLibraryCalls.cpp
+  StripAttributes.cpp
+  StripMetadata.cpp
+  )
+
+add_dependencies(LLVMNaClTransforms intrinsics_gen)
diff --git a/lib/Transforms/NaCl/CanonicalizeMemIntrinsics.cpp b/lib/Transforms/NaCl/CanonicalizeMemIntrinsics.cpp
new file mode 100644
index 000000000000..fd44c65434c4
--- /dev/null
+++ b/lib/Transforms/NaCl/CanonicalizeMemIntrinsics.cpp
@@ -0,0 +1,95 @@
+//===- CanonicalizeMemIntrinsics.cpp - Make memcpy's "len" arg consistent--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass canonicalizes uses of the llvm.memset, llvm.memcpy and
+// llvm.memmove intrinsics so that the variants with 64-bit "len"
+// arguments aren't used, and the 32-bit variants are used instead.
+//
+// This means the PNaCl translator won't need to handle two versions
+// of each of these intrinsics, and it won't need to do any implicit
+// truncations from 64-bit to 32-bit.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  // This is a ModulePass because that makes it easier to find all
+  // uses of intrinsics efficiently.
+  class CanonicalizeMemIntrinsics : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    CanonicalizeMemIntrinsics() : ModulePass(ID) {
+      initializeCanonicalizeMemIntrinsicsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+char CanonicalizeMemIntrinsics::ID = 0;
+INITIALIZE_PASS(CanonicalizeMemIntrinsics, "canonicalize-mem-intrinsics",
+                "Make memcpy() et al's \"len\" argument consistent",
+                false, false)
+
+static bool expandIntrinsic(Module *M, Intrinsic::ID ID) {
+  SmallVector<Type *, 3> Types;
+  Types.push_back(Type::getInt8PtrTy(M->getContext()));
+  if (ID != Intrinsic::memset)
+    Types.push_back(Type::getInt8PtrTy(M->getContext()));
+  unsigned LengthTypePos = Types.size();
+  Types.push_back(Type::getInt64Ty(M->getContext()));
+
+  std::string OldName = Intrinsic::getName(ID, Types);
+  Function *OldIntrinsic = M->getFunction(OldName);
+  if (!OldIntrinsic)
+    return false;
+
+  Types[LengthTypePos] = Type::getInt32Ty(M->getContext());
+  Function *NewIntrinsic = Intrinsic::getDeclaration(M, ID, Types);
+
+  for (Value::use_iterator CallIter = OldIntrinsic->use_begin(),
+         E = OldIntrinsic->use_end(); CallIter != E; ) {
+    CallInst *Call = dyn_cast<CallInst>(*CallIter++);
+    if (!Call) {
+      report_fatal_error("CanonicalizeMemIntrinsics: Taking the address of an "
+                         "intrinsic is not allowed: " + OldName);
+    }
+    // This temporarily leaves Call non-well-typed.
+    Call->setCalledFunction(NewIntrinsic);
+    // Truncate the "len" argument.  No overflow check.
+    IRBuilder<> Builder(Call);
+    Value *Length = Builder.CreateTrunc(Call->getArgOperand(2),
+                                        Type::getInt32Ty(M->getContext()),
+                                        "mem_len_truncate");
+    Call->setArgOperand(2, Length);
+  }
+  OldIntrinsic->eraseFromParent();
+  return true;
+}
+
+bool CanonicalizeMemIntrinsics::runOnModule(Module &M) {
+  bool Changed = false;
+  Changed |= expandIntrinsic(&M, Intrinsic::memset);
+  Changed |= expandIntrinsic(&M, Intrinsic::memcpy);
+  Changed |= expandIntrinsic(&M, Intrinsic::memmove);
+  return Changed;
+}
+
+ModulePass *llvm::createCanonicalizeMemIntrinsicsPass() {
+  return new CanonicalizeMemIntrinsics();
+}
diff --git a/lib/Transforms/NaCl/ExceptionInfoWriter.cpp b/lib/Transforms/NaCl/ExceptionInfoWriter.cpp
new file mode 100644
index 000000000000..cd734642240c
--- /dev/null
+++ b/lib/Transforms/NaCl/ExceptionInfoWriter.cpp
@@ -0,0 +1,291 @@
+//===- ExceptionInfoWriter.cpp - Generate C++ exception info for PNaCl-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The ExceptionInfoWriter class converts the clauses of a
+// "landingpad" instruction into data tables stored in global
+// variables.  These tables are interpreted by PNaCl's C++ runtime
+// library (either libsupc++ or libcxxabi), which is linked into a
+// pexe.
+//
+// This is similar to the lowering that the LLVM backend does to
+// convert landingpad clauses into ".gcc_except_table" sections.  The
+// difference is that ExceptionInfoWriter is an IR-to-IR
+// transformation that runs on the PNaCl user toolchain side.  The
+// format it produces is not part of PNaCl's stable ABI; the PNaCl
+// translator and LLVM backend do not know about this format.
+//
+// Encoding:
+//
+// A landingpad instruction contains a list of clauses.
+// ExceptionInfoWriter encodes each clause as a 32-bit "clause ID".  A
+// clause is one of the following forms:
+//
+//  1) "catch i8* @ExcType"
+//     * This clause means that the landingpad should be entered if
+//       the C++ exception being thrown has type @ExcType (or a
+//       subtype of @ExcType).  @ExcType is a pointer to the
+//       std::type_info object (an RTTI object) for the C++ exception
+//       type.
+//     * Clang generates this for a "catch" block in the C++ source.
+//     * @ExcType is NULL for "catch (...)" (catch-all) blocks.
+//     * This is encoded as the "type ID" for @ExcType, defined below,
+//       which is a positive integer.
+//
+//  2) "filter [i8* @ExcType1, ..., i8* @ExcTypeN]"
+//     * This clause means that the landingpad should be entered if
+//       the C++ exception being thrown *doesn't* match any of the
+//       types in the list (which are again specified as
+//       std::type_info pointers).
+//     * Clang uses this to implement C++ exception specifications, e.g.
+//          void foo() throw(ExcType1, ..., ExcTypeN) { ... }
+//     * This is encoded as the filter ID, X, where X < 0, and
+//       &__pnacl_eh_filter_table[-X-1] points to a 0-terminated
+//       array of integer "type IDs".
+//
+//  3) "cleanup"
+//     * This means that the landingpad should always be entered.
+//     * Clang uses this for calling objects' destructors.
+//     * This is encoded as 0.
+//     * The runtime may treat "cleanup" differently from "catch i8*
+//       null" (a catch-all).  In C++, if an unhandled exception
+//       occurs, the language runtime may abort execution without
+//       running any destructors.  The runtime may implement this by
+//       searching for a matching non-"cleanup" clause, and aborting
+//       if it does not find one, before entering any landingpad
+//       blocks.
+//
+// The "type ID" for a type @ExcType is a 1-based index into the array
+// __pnacl_eh_type_table[].  That is, the type ID is a value X such
+// that __pnacl_eh_type_table[X-1] == @ExcType, and X >= 1.
+//
+// ExceptionInfoWriter generates the following data structures:
+//
+//   struct action_table_entry {
+//     int32_t clause_id;
+//     uint32_t next_clause_list_id;
+//   };
+//
+//   // Represents singly linked lists of clauses.
+//   extern const struct action_table_entry __pnacl_eh_action_table[];
+//
+//   // Allows std::type_infos to be represented using small integer IDs.
+//   extern std::type_info *const __pnacl_eh_type_table[];
+//
+//   // Used to represent type arrays for "filter" clauses.
+//   extern const uint32_t __pnacl_eh_filter_table[];
+//
+// A "clause list ID" is either:
+//  * 0, representing the empty list; or
+//  * an index into __pnacl_eh_action_table[] with 1 added, which
+//    specifies a node in the clause list.
+//
+// Example:
+//
+//   std::type_info *const __pnacl_eh_type_table[] = {
+//     // defines type ID 1 == ExcA and clause ID 1 == "catch ExcA"
+//     &typeinfo(ExcA),
+//     // defines type ID 2 == ExcB and clause ID 2 == "catch ExcB"
+//     &typeinfo(ExcB),
+//     // defines type ID 3 == ExcC and clause ID 3 == "catch ExcC"
+//     &typeinfo(ExcC),
+//   };
+//
+//   const uint32_t __pnacl_eh_filter_table[] = {
+//     1,  // refers to ExcA;  defines clause ID -1 as "filter [ExcA, ExcB]"
+//     2,  // refers to ExcB;  defines clause ID -2 as "filter [ExcB]"
+//     0,  // list terminator; defines clause ID -3 as "filter []"
+//     3,  // refers to ExcC;  defines clause ID -4 as "filter [ExcC]"
+//     0,  // list terminator; defines clause ID -5 as "filter []"
+//   };
+//
+//   const struct action_table_entry __pnacl_eh_action_table[] = {
+//     // defines clause list ID 1:
+//     {
+//       -4,  // "filter [ExcC]"
+//       0,  // end of list (no more actions)
+//     },
+//     // defines clause list ID 2:
+//     {
+//       -1,  // "filter [ExcA, ExcB]"
+//       1,  // else go to clause list ID 1
+//     },
+//     // defines clause list ID 3:
+//     {
+//       2,  // "catch ExcB"
+//       2,  // else go to clause list ID 2
+//     },
+//     // defines clause list ID 4:
+//     {
+//       1,  // "catch ExcA"
+//       3,  // else go to clause list ID 3
+//     },
+//   };
+//
+// So if a landingpad contains the clause list:
+//   [catch ExcA,
+//    catch ExcB,
+//    filter [ExcA, ExcB],
+//    filter [ExcC]]
+// then this can be represented as clause list ID 4 using the tables above.
+//
+// The C++ runtime library checks the clauses in order to decide
+// whether to enter the landingpad.  If a clause matches, the
+// landingpad BasicBlock is passed the clause ID.  The landingpad code
+// can use the clause ID to decide which C++ catch() block (if any) to
+// execute.
+//
+// The purpose of these exception tables is to keep code sizes
+// relatively small.  The landingpad code only needs to check a small
+// integer clause ID, rather than having to call a function to check
+// whether the C++ exception matches a type.
+//
+// ExceptionInfoWriter's encoding corresponds loosely to the format of
+// GCC's .gcc_except_table sections.  One difference is that
+// ExceptionInfoWriter writes fixed-width 32-bit integers, whereas
+// .gcc_except_table uses variable-length LEB128 encodings.  We could
+// switch to LEB128 to save space in the future.
+//
+//===----------------------------------------------------------------------===//
+
+#include "ExceptionInfoWriter.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/Constants.h"
+
+using namespace llvm;
+
+ExceptionInfoWriter::ExceptionInfoWriter(LLVMContext *Context):
+    Context(Context) {
+  Type *I32 = Type::getInt32Ty(*Context);
+  Type *Fields[] = { I32, I32 };
+  ActionTableEntryTy = StructType::create(Fields, "action_table_entry");
+}
+
+unsigned ExceptionInfoWriter::getIDForExceptionType(Value *ExcTy) {
+  Constant *ExcTyConst = dyn_cast<Constant>(ExcTy);
+  if (!ExcTyConst)
+    report_fatal_error("Exception type not a constant");
+
+  // Reuse existing ID if one has already been assigned.
+  TypeTableIDMapType::iterator Iter = TypeTableIDMap.find(ExcTyConst);
+  if (Iter != TypeTableIDMap.end())
+    return Iter->second;
+
+  unsigned Index = TypeTableData.size() + 1;
+  TypeTableIDMap[ExcTyConst] = Index;
+  TypeTableData.push_back(ExcTyConst);
+  return Index;
+}
+
+unsigned ExceptionInfoWriter::getIDForClauseListNode(
+    unsigned ClauseID, unsigned NextClauseListID) {
+  // Reuse existing ID if one has already been assigned.
+  ActionTableEntry Key(ClauseID, NextClauseListID);
+  ActionTableIDMapType::iterator Iter = ActionTableIDMap.find(Key);
+  if (Iter != ActionTableIDMap.end())
+    return Iter->second;
+
+  Type *I32 = Type::getInt32Ty(*Context);
+  Constant *Fields[] = { ConstantInt::get(I32, ClauseID),
+                         ConstantInt::get(I32, NextClauseListID) };
+  Constant *Entry = ConstantStruct::get(ActionTableEntryTy, Fields);
+
+  // Add 1 so that the empty list can be represented as 0.
+  unsigned ClauseListID = ActionTableData.size() + 1;
+  ActionTableIDMap[Key] = ClauseListID;
+  ActionTableData.push_back(Entry);
+  return ClauseListID;
+}
+
+unsigned ExceptionInfoWriter::getIDForFilterClause(Value *Filter) {
+  unsigned FilterClauseID = -(FilterTableData.size() + 1);
+  Type *I32 = Type::getInt32Ty(*Context);
+  ArrayType *ArrayTy = dyn_cast<ArrayType>(Filter->getType());
+  if (!ArrayTy)
+    report_fatal_error("Landingpad filter clause is not of array type");
+  unsigned FilterLength = ArrayTy->getNumElements();
+  // Don't try the dyn_cast if the FilterLength is zero, because Array
+  // could be a zeroinitializer.
+  if (FilterLength > 0) {
+    ConstantArray *Array = dyn_cast<ConstantArray>(Filter);
+    if (!Array)
+      report_fatal_error("Landingpad filter clause is not a ConstantArray");
+    for (unsigned I = 0; I < FilterLength; ++I) {
+      unsigned TypeID = getIDForExceptionType(Array->getOperand(I));
+      assert(TypeID > 0);
+      FilterTableData.push_back(ConstantInt::get(I32, TypeID));
+    }
+  }
+  // Add array terminator.
+  FilterTableData.push_back(ConstantInt::get(I32, 0));
+  return FilterClauseID;
+}
+
+unsigned ExceptionInfoWriter::getIDForLandingPadClauseList(LandingPadInst *LP) {
+  unsigned NextClauseListID = 0;  // ID for empty list.
+
+  if (LP->isCleanup()) {
+    // Add cleanup clause at the end of the list.
+    NextClauseListID = getIDForClauseListNode(0, NextClauseListID);
+  }
+
+  for (int I = (int) LP->getNumClauses() - 1; I >= 0; --I) {
+    unsigned ClauseID;
+    if (LP->isCatch(I)) {
+      ClauseID = getIDForExceptionType(LP->getClause(I));
+    } else if (LP->isFilter(I)) {
+      ClauseID = getIDForFilterClause(LP->getClause(I));
+    } else {
+      report_fatal_error("Unknown kind of landingpad clause");
+    }
+    assert(ClauseID > 0);
+    NextClauseListID = getIDForClauseListNode(ClauseID, NextClauseListID);
+  }
+
+  return NextClauseListID;
+}
+
+static void defineArray(Module *M, const char *Name,
+                        const SmallVectorImpl<Constant *> &Elements,
+                        Type *ElementType) {
+  ArrayType *ArrayTy = ArrayType::get(ElementType, Elements.size());
+  Constant *ArrayData = ConstantArray::get(ArrayTy, Elements);
+  GlobalVariable *OldGlobal = M->getGlobalVariable(Name);
+  if (OldGlobal) {
+    if (OldGlobal->hasInitializer()) {
+      report_fatal_error(std::string("Variable ") + Name +
+                         " already has an initializer");
+    }
+    Constant *NewGlobal = new GlobalVariable(
+        *M, ArrayTy, /* isConstant= */ true,
+        GlobalValue::InternalLinkage, ArrayData);
+    NewGlobal->takeName(OldGlobal);
+    OldGlobal->replaceAllUsesWith(ConstantExpr::getBitCast(
+                                      NewGlobal, OldGlobal->getType()));
+    OldGlobal->eraseFromParent();
+  } else {
+    if (Elements.size() > 0) {
+      // This warning could happen for a program that does not link
+      // against the C++ runtime libraries.  Such a program might
+      // contain "invoke" instructions but never throw any C++
+      // exceptions.
+      errs() << "Warning: Variable " << Name << " not referenced\n";
+    }
+  }
+}
+
+void ExceptionInfoWriter::defineGlobalVariables(Module *M) {
+  defineArray(M, "__pnacl_eh_type_table", TypeTableData,
+              Type::getInt8PtrTy(M->getContext()));
+
+  defineArray(M, "__pnacl_eh_action_table", ActionTableData,
+              ActionTableEntryTy);
+
+  defineArray(M, "__pnacl_eh_filter_table", FilterTableData,
+              Type::getInt32Ty(M->getContext()));
+}
diff --git a/lib/Transforms/NaCl/ExceptionInfoWriter.h b/lib/Transforms/NaCl/ExceptionInfoWriter.h
new file mode 100644
index 000000000000..dadaaf76158c
--- /dev/null
+++ b/lib/Transforms/NaCl/ExceptionInfoWriter.h
@@ -0,0 +1,71 @@
+//===-- ExceptionInfoWriter.h - Generate C++ exception info------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TRANSFORMS_NACL_EXCEPTIONINFOWRITER_H
+#define TRANSFORMS_NACL_EXCEPTIONINFOWRITER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+
+namespace llvm {
+
+// The ExceptionInfoWriter class converts the clauses of a
+// "landingpad" instruction into data tables stored in global
+// variables, which are interpreted by PNaCl's C++ runtime library.
+// See ExceptionInfoWriter.cpp for a full description.
+class ExceptionInfoWriter {
+  LLVMContext *Context;
+  StructType *ActionTableEntryTy;
+
+  // Data for populating __pnacl_eh_type_table[], which is an array of
+  // std::type_info* pointers.  Each of these pointers represents a
+  // C++ exception type.
+  SmallVector<Constant *, 10> TypeTableData;
+  // Mapping from std::type_info* pointer to type ID (index in
+  // TypeTableData).
+  typedef DenseMap<Constant *, unsigned> TypeTableIDMapType;
+  TypeTableIDMapType TypeTableIDMap;
+
+  // Data for populating __pnacl_eh_action_table[], which is an array
+  // of pairs.
+  SmallVector<Constant *, 10> ActionTableData;
+  // Pair of (clause_id, clause_list_id).
+  typedef std::pair<unsigned, unsigned> ActionTableEntry;
+  // Mapping from (clause_id, clause_list_id) to clause_id (index in
+  // ActionTableData).
+  typedef DenseMap<ActionTableEntry, unsigned> ActionTableIDMapType;
+  ActionTableIDMapType ActionTableIDMap;
+
+  // Data for populating __pnacl_eh_filter_table[], which is an array
+  // of integers.
+  SmallVector<Constant *, 10> FilterTableData;
+
+  // Get the interned ID for an action.
+  unsigned getIDForClauseListNode(unsigned ClauseID, unsigned NextClauseListID);
+
+  // Get the clause ID for a "filter" clause.
+  unsigned getIDForFilterClause(Value *Filter);
+
+public:
+  explicit ExceptionInfoWriter(LLVMContext *Context);
+
+  // Get the interned type ID (a small integer) for a C++ exception type.
+  unsigned getIDForExceptionType(Value *Ty);
+
+  // Get the clause list ID for a landingpad's clause list.
+  unsigned getIDForLandingPadClauseList(LandingPadInst *LP);
+
+  // Add the exception info tables to the module.
+  void defineGlobalVariables(Module *M);
+};
+
+}
+
+#endif
diff --git a/lib/Transforms/NaCl/ExpandArithWithOverflow.cpp b/lib/Transforms/NaCl/ExpandArithWithOverflow.cpp
new file mode 100644
index 000000000000..52ea4f6a76fb
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandArithWithOverflow.cpp
@@ -0,0 +1,152 @@
+//===- ExpandArithWithOverflow.cpp - Expand out uses of *.with.overflow----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The llvm.*.with.overflow.*() intrinsics are awkward for PNaCl
+// support because they return structs, and we want to omit struct
+// types from IR in PNaCl's stable ABI.
+//
+// However, llvm.{umul,uadd}.with.overflow.*() are used by Clang to
+// implement an overflow check for C++'s new[] operator.  This pass
+// expands out these uses so that PNaCl does not have to support
+// *.with.overflow as part of PNaCl's stable ABI.
+//
+// This pass only handles adding/multiplying by a constant, which is
+// the only use of *.with.overflow that is currently generated by
+// Clang (unless '-ftrapv' is passed to Clang).
+//
+// X * Const overflows iff X > UINT_MAX / Const, where UINT_MAX is the
+// maximum value for the integer type being used.
+//
+// Similarly, X + Const overflows iff X > UINT_MAX - Const.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  // This is a ModulePass so that the pass can easily iterate over all
+  // uses of the intrinsics.
+  class ExpandArithWithOverflow : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ExpandArithWithOverflow() : ModulePass(ID) {
+      initializeExpandArithWithOverflowPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+char ExpandArithWithOverflow::ID = 0;
+INITIALIZE_PASS(ExpandArithWithOverflow, "expand-arith-with-overflow",
+                "Expand out some uses of *.with.overflow intrinsics",
+                false, false)
+
+static uint64_t UintTypeMax(unsigned Bits) {
+  // Avoid doing 1 << 64 because that is undefined on a uint64_t.
+  if (Bits == 64)
+    return ~(uint64_t) 0;
+  return (((uint64_t) 1) << Bits) - 1;
+}
+
+static Value *CreateInsertValue(Value *StructVal, unsigned Index,
+                                Value *Field, Instruction *BasedOn) {
+  SmallVector<unsigned, 1> EVIndexes;
+  EVIndexes.push_back(Index);
+  return CopyDebug(InsertValueInst::Create(
+                       StructVal, Field, EVIndexes,
+                       BasedOn->getName() + ".insert", BasedOn), BasedOn);
+}
+
+static bool ExpandOpForIntSize(Module *M, unsigned Bits, bool Mul) {
+  IntegerType *IntTy = IntegerType::get(M->getContext(), Bits);
+  SmallVector<Type *, 1> Types;
+  Types.push_back(IntTy);
+  Intrinsic::ID ID = (Mul ? Intrinsic::umul_with_overflow
+                          : Intrinsic::uadd_with_overflow);
+  std::string Name = Intrinsic::getName(ID, Types);
+  Function *Intrinsic = M->getFunction(Name);
+  if (!Intrinsic)
+    return false;
+  for (Value::use_iterator CallIter = Intrinsic->use_begin(),
+         E = Intrinsic->use_end(); CallIter != E; ) {
+    CallInst *Call = dyn_cast<CallInst>(*CallIter++);
+    if (!Call) {
+      report_fatal_error("ExpandArithWithOverflow: Taking the address of a "
+                         "*.with.overflow intrinsic is not allowed");
+    }
+    Value *VariableArg;
+    ConstantInt *ConstantArg;
+    if (ConstantInt *C = dyn_cast<ConstantInt>(Call->getArgOperand(0))) {
+      VariableArg = Call->getArgOperand(1);
+      ConstantArg = C;
+    } else if (ConstantInt *C = dyn_cast<ConstantInt>(Call->getArgOperand(1))) {
+      VariableArg = Call->getArgOperand(0);
+      ConstantArg = C;
+    } else {
+      errs() << "Use: " << *Call << "\n";
+      report_fatal_error("ExpandArithWithOverflow: At least one argument of "
+                         "*.with.overflow must be a constant");
+    }
+
+    Value *ArithResult = BinaryOperator::Create(
+        (Mul ? Instruction::Mul : Instruction::Add), VariableArg, ConstantArg,
+        Call->getName() + ".arith", Call);
+    CopyDebug(cast<Instruction>(ArithResult), Call);
+
+    uint64_t ArgMax;
+    if (Mul) {
+      ArgMax = UintTypeMax(Bits) / ConstantArg->getZExtValue();
+    } else {
+      ArgMax = UintTypeMax(Bits) - ConstantArg->getZExtValue();
+    }
+    Value *OverflowResult = new ICmpInst(
+        Call, CmpInst::ICMP_UGT, VariableArg, ConstantInt::get(IntTy, ArgMax),
+        Call->getName() + ".overflow");
+    CopyDebug(cast<Instruction>(OverflowResult), Call);
+
+    // Construct the struct result.
+    Value *NewStruct = UndefValue::get(Call->getType());
+    NewStruct = CreateInsertValue(NewStruct, 0, ArithResult, Call);
+    NewStruct = CreateInsertValue(NewStruct, 1, OverflowResult, Call);
+    Call->replaceAllUsesWith(NewStruct);
+    Call->eraseFromParent();
+  }
+  Intrinsic->eraseFromParent();
+  return true;
+}
+
+static bool ExpandForIntSize(Module *M, unsigned Bits) {
+  bool Modified = false;
+  Modified |= ExpandOpForIntSize(M, Bits, true); // Expand umul
+  Modified |= ExpandOpForIntSize(M, Bits, false); // Expand uadd
+  return Modified;
+}
+
+bool ExpandArithWithOverflow::runOnModule(Module &M) {
+  bool Modified = false;
+  Modified |= ExpandForIntSize(&M, 64);
+  Modified |= ExpandForIntSize(&M, 32);
+  Modified |= ExpandForIntSize(&M, 16);
+  Modified |= ExpandForIntSize(&M, 8);
+  return Modified;
+}
+
+ModulePass *llvm::createExpandArithWithOverflowPass() {
+  return new ExpandArithWithOverflow();
+}
diff --git a/lib/Transforms/NaCl/ExpandByVal.cpp b/lib/Transforms/NaCl/ExpandByVal.cpp
new file mode 100644
index 000000000000..c9ca637f3f14
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandByVal.cpp
@@ -0,0 +1,204 @@
+//===- ExpandByVal.cpp - Expand out use of "byval" and "sret" attributes---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands out by-value passing of structs as arguments and
+// return values.  In LLVM IR terms, it expands out the "byval" and
+// "sret" function argument attributes.
+//
+// The semantics of the "byval" attribute are that the callee function
+// gets a private copy of the pointed-to argument that it is allowed
+// to modify.  In implementing this, we have a choice between making
+// the caller responsible for making the copy or making the callee
+// responsible for making the copy.  We choose the former, because
+// this matches how the normal native calling conventions work, and
+// because it often allows the caller to write struct contents
+// directly into the stack slot that it passes the callee, without an
+// additional copy.
+//
+// Note that this pass does not attempt to modify functions that pass
+// structs by value without using "byval" or "sret", such as:
+//
+//   define %struct.X @func()                           ; struct return
+//   define void @func(%struct.X %arg)                  ; struct arg
+//
+// The pass only handles functions such as:
+//
+//   define void @func(%struct.X* sret %result_buffer)  ; struct return
+//   define void @func(%struct.X* byval %ptr_to_arg)    ; struct arg
+//
+// This is because PNaCl Clang generates the latter and not the former.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  // This is a ModulePass so that it can strip attributes from
+  // declared functions as well as defined functions.
+  class ExpandByVal : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ExpandByVal() : ModulePass(ID) {
+      initializeExpandByValPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+    virtual void getAnalysisUsage(AnalysisUsage &Info) const {
+      Info.addRequired<DataLayout>();
+    }
+  };
+}
+
+char ExpandByVal::ID = 0;
+INITIALIZE_PASS(ExpandByVal, "expand-byval",
+                "Expand out by-value passing of structs",
+                false, false)
+
+// removeAttribute() currently does not work on Attribute::Alignment
+// (it fails with an assertion error), so we have to take a more
+// convoluted route to removing this attribute by recreating the
+// AttributeSet.
+AttributeSet RemoveAttrs(LLVMContext &Context, AttributeSet Attrs) {
+  SmallVector<AttributeSet, 8> AttrList;
+  for (unsigned Slot = 0; Slot < Attrs.getNumSlots(); ++Slot) {
+    unsigned Index = Attrs.getSlotIndex(Slot);
+    AttrBuilder AB;
+    for (AttributeSet::iterator Attr = Attrs.begin(Slot), E = Attrs.end(Slot);
+         Attr != E; ++Attr) {
+      if (!Attr->isAlignAttribute() &&
+          Attr->isEnumAttribute() &&
+          Attr->getKindAsEnum() != Attribute::ByVal &&
+          Attr->getKindAsEnum() != Attribute::StructRet) {
+        AB.addAttribute(*Attr);
+      }
+      // IR semantics require that ByVal implies NoAlias.  However, IR
+      // semantics do not require StructRet to imply NoAlias.  For
+      // example, a global variable address can be passed as a
+      // StructRet argument, although Clang does not do so and Clang
+      // explicitly adds NoAlias to StructRet arguments.
+      if (Attr->isEnumAttribute() &&
+          Attr->getKindAsEnum() == Attribute::ByVal) {
+        AB.addAttribute(Attribute::get(Context, Attribute::NoAlias));
+      }
+    }
+    AttrList.push_back(AttributeSet::get(Context, Index, AB));
+  }
+  return AttributeSet::get(Context, AttrList);
+}
+
+// ExpandCall() can take a CallInst or an InvokeInst.  It returns
+// whether the instruction was modified.
+template <class InstType>
+static bool ExpandCall(DataLayout *DL, InstType *Call) {
+  bool Modify = false;
+  AttributeSet Attrs = Call->getAttributes();
+  for (unsigned ArgIdx = 0; ArgIdx < Call->getNumArgOperands(); ++ArgIdx) {
+    unsigned AttrIdx = ArgIdx + 1;
+
+    if (Attrs.hasAttribute(AttrIdx, Attribute::StructRet))
+      Modify = true;
+
+    if (Attrs.hasAttribute(AttrIdx, Attribute::ByVal)) {
+      Modify = true;
+
+      Value *ArgPtr = Call->getArgOperand(ArgIdx);
+      Type *ArgType = ArgPtr->getType()->getPointerElementType();
+      ConstantInt *ArgSize = ConstantInt::get(
+          Call->getContext(), APInt(64, DL->getTypeStoreSize(ArgType)));
+      unsigned Alignment = Attrs.getParamAlignment(AttrIdx);
+      // In principle, using the alignment from the argument attribute
+      // should be enough.  However, Clang is not emitting this
+      // attribute for PNaCl.  LLVM alloca instructions do not use the
+      // ABI alignment of the type, so this must be specified
+      // explicitly.
+      // See https://code.google.com/p/nativeclient/issues/detail?id=3403
+      unsigned AllocAlignment =
+          std::max(Alignment, DL->getABITypeAlignment(ArgType));
+
+      // Make a copy of the byval argument.
+      Instruction *CopyBuf = CopyDebug(new AllocaInst(ArgType, 0, AllocAlignment,
+                                                      ArgPtr->getName() + ".byval_copy"),
+                                       Call);
+      Function *Func = Call->getParent()->getParent();
+      Func->getEntryBlock().getInstList().push_front(CopyBuf);
+      IRBuilder<> Builder(Call);
+      Builder.CreateLifetimeStart(CopyBuf, ArgSize);
+      // Using the argument's alignment attribute for the memcpy
+      // should be OK because the LLVM Language Reference says that
+      // the alignment attribute specifies "the alignment of the stack
+      // slot to form and the known alignment of the pointer specified
+      // to the call site".
+      CopyDebug(Builder.CreateMemCpy(CopyBuf, ArgPtr, ArgSize,
+                                     Alignment), Call);
+
+      Call->setArgOperand(ArgIdx, CopyBuf);
+
+      // Mark the argument copy as unused using llvm.lifetime.end.
+      if (isa<CallInst>(Call)) {
+        BasicBlock::iterator It = BasicBlock::iterator(Call);
+        Builder.SetInsertPoint(++It);
+        Builder.CreateLifetimeEnd(CopyBuf, ArgSize);
+      } else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Call)) {
+        Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstInsertionPt());
+        Builder.CreateLifetimeEnd(CopyBuf, ArgSize);
+        Builder.SetInsertPoint(Invoke->getUnwindDest()->getFirstInsertionPt());
+        Builder.CreateLifetimeEnd(CopyBuf, ArgSize);
+      }
+    }
+  }
+  if (Modify) {
+    Call->setAttributes(RemoveAttrs(Call->getContext(), Attrs));
+
+    if (CallInst *CI = dyn_cast<CallInst>(Call)) {
+      // This is no longer a tail call because the callee references
+      // memory alloca'd by the caller.
+      CI->setTailCall(false);
+    }
+  }
+  return Modify;
+}
+
+bool ExpandByVal::runOnModule(Module &M) {
+  bool Modified = false;
+  DataLayout DL = getAnalysis<DataLayout>();
+
+  for (Module::iterator Func = M.begin(), E = M.end(); Func != E; ++Func) {
+    AttributeSet NewAttrs = RemoveAttrs(Func->getContext(),
+                                        Func->getAttributes());
+    Modified |= (NewAttrs != Func->getAttributes());
+    Func->setAttributes(NewAttrs);
+
+    for (Function::iterator BB = Func->begin(), E = Func->end();
+         BB != E; ++BB) {
+      for (BasicBlock::iterator Inst = BB->begin(), E = BB->end();
+           Inst != E; ++Inst) {
+        if (CallInst *Call = dyn_cast<CallInst>(Inst)) {
+          Modified |= ExpandCall(&DL, Call);
+        } else if (InvokeInst *Call = dyn_cast<InvokeInst>(Inst)) {
+          Modified |= ExpandCall(&DL, Call);
+        }
+      }
+    }
+  }
+
+  return Modified;
+}
+
+ModulePass *llvm::createExpandByValPass() {
+  return new ExpandByVal();
+}
diff --git a/lib/Transforms/NaCl/ExpandConstantExpr.cpp b/lib/Transforms/NaCl/ExpandConstantExpr.cpp
new file mode 100644
index 000000000000..5f06719de289
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandConstantExpr.cpp
@@ -0,0 +1,94 @@
+//===- ExpandConstantExpr.cpp - Convert ConstantExprs to Instructions------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands out ConstantExprs into Instructions.
+//
+// Note that this only converts ConstantExprs that are referenced by
+// Instructions.  It does not convert ConstantExprs that are used as
+// initializers for global variables.
+//
+// This simplifies the language so that the PNaCl translator does not
+// need to handle ConstantExprs as part of a stable wire format for
+// PNaCl.
+//
+//===----------------------------------------------------------------------===//
+
+#include <map>
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+#include "llvm/IR/Constants.h"
+
+using namespace llvm;
+
+static bool expandInstruction(Instruction *Inst);
+
+namespace {
+  // This is a FunctionPass because our handling of PHI nodes means
+  // that our modifications may cross BasicBlocks.
+  struct ExpandConstantExpr : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    ExpandConstantExpr() : FunctionPass(ID) {
+      initializeExpandConstantExprPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &Func);
+  };
+}
+
+char ExpandConstantExpr::ID = 0;
+INITIALIZE_PASS(ExpandConstantExpr, "expand-constant-expr",
+                "Expand out ConstantExprs into Instructions",
+                false, false)
+
+static Value *expandConstantExpr(Instruction *InsertPt, ConstantExpr *Expr) {
+  Instruction *NewInst = Expr->getAsInstruction();
+  NewInst->insertBefore(InsertPt);
+  NewInst->setName("expanded");
+  expandInstruction(NewInst);
+  return NewInst;
+}
+
+static bool expandInstruction(Instruction *Inst) {
+  // A landingpad can only accept ConstantExprs, so it should remain
+  // unmodified.
+  if (isa<LandingPadInst>(Inst))
+    return false;
+
+  bool Modified = false;
+  for (unsigned OpNum = 0; OpNum < Inst->getNumOperands(); OpNum++) {
+    if (ConstantExpr *Expr =
+        dyn_cast<ConstantExpr>(Inst->getOperand(OpNum))) {
+      Modified = true;
+      Use *U = &Inst->getOperandUse(OpNum);
+      PhiSafeReplaceUses(U, expandConstantExpr(PhiSafeInsertPt(U), Expr));
+    }
+  }
+  return Modified;
+}
+
+bool ExpandConstantExpr::runOnFunction(Function &Func) {
+  bool Modified = false;
+  for (llvm::Function::iterator BB = Func.begin(), E = Func.end();
+       BB != E;
+       ++BB) {
+    for (BasicBlock::InstListType::iterator Inst = BB->begin(), E = BB->end();
+         Inst != E;
+         ++Inst) {
+      Modified |= expandInstruction(Inst);
+    }
+  }
+  return Modified;
+}
+
+FunctionPass *llvm::createExpandConstantExprPass() {
+  return new ExpandConstantExpr();
+}
diff --git a/lib/Transforms/NaCl/ExpandCtors.cpp b/lib/Transforms/NaCl/ExpandCtors.cpp
new file mode 100644
index 000000000000..fd38e2f0f1eb
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandCtors.cpp
@@ -0,0 +1,161 @@
+//===- ExpandCtors.cpp - Convert ctors/dtors to concrete arrays -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass converts LLVM's special symbols llvm.global_ctors and
+// llvm.global_dtors to concrete arrays, __init_array_start/end and
+// __fini_array_start/end, that are usable by a C library.
+//
+// This pass sorts the contents of global_ctors/dtors according to the
+// priority values they contain and removes the priority values.
+//
+//===----------------------------------------------------------------------===//
+
+#include <vector>
+
+#include "llvm/Pass.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/TypeBuilder.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  struct ExpandCtors : public ModulePass {
+    static char ID; // Pass identification, replacement for typeid
+    ExpandCtors() : ModulePass(ID) {
+      initializeExpandCtorsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+char ExpandCtors::ID = 0;
+INITIALIZE_PASS(ExpandCtors, "nacl-expand-ctors",
+                "Hook up constructor and destructor arrays to libc",
+                false, false)
+
+static void setGlobalVariableValue(Module &M, const char *Name,
+                                   Constant *Value) {
+  GlobalVariable *Var = M.getNamedGlobal(Name);
+  if (!Var) {
+    // This warning can happen in a program that does not use a libc
+    // and so does not call the functions in __init_array_start or
+    // __fini_array_end.  Such a program might be linked with
+    // "-nostdlib".
+    errs() << "Warning: Variable " << Name << " not referenced\n";
+  } else {
+    if (Var->hasInitializer()) {
+      report_fatal_error(std::string("Variable ") + Name +
+                         " already has an initializer");
+    }
+    Var->replaceAllUsesWith(ConstantExpr::getBitCast(Value, Var->getType()));
+    Var->eraseFromParent();
+  }
+}
+
+struct FuncArrayEntry {
+  uint64_t priority;
+  Constant *func;
+};
+
+static bool compareEntries(FuncArrayEntry Entry1, FuncArrayEntry Entry2) {
+  return Entry1.priority < Entry2.priority;
+}
+
+static void readFuncList(GlobalVariable *Array, std::vector<Constant*> *Funcs) {
+  if (!Array->hasInitializer())
+    return;
+  Constant *Init = Array->getInitializer();
+  ArrayType *Ty = dyn_cast<ArrayType>(Init->getType());
+  if (!Ty) {
+    errs() << "Initializer: " << *Array->getInitializer() << "\n";
+    report_fatal_error("ExpandCtors: Initializer is not of array type");
+  }
+  if (Ty->getNumElements() == 0)
+    return;
+  ConstantArray *InitList = dyn_cast<ConstantArray>(Init);
+  if (!InitList) {
+    errs() << "Initializer: " << *Array->getInitializer() << "\n";
+    report_fatal_error("ExpandCtors: Unexpected initializer ConstantExpr");
+  }
+  std::vector<FuncArrayEntry> FuncsToSort;
+  for (unsigned Index = 0; Index < InitList->getNumOperands(); ++Index) {
+    ConstantStruct *CS = cast<ConstantStruct>(InitList->getOperand(Index));
+    FuncArrayEntry Entry;
+    Entry.priority = cast<ConstantInt>(CS->getOperand(0))->getZExtValue();
+    Entry.func = CS->getOperand(1);
+    FuncsToSort.push_back(Entry);
+  }
+
+  std::sort(FuncsToSort.begin(), FuncsToSort.end(), compareEntries);
+  for (std::vector<FuncArrayEntry>::iterator Iter = FuncsToSort.begin();
+       Iter != FuncsToSort.end();
+       ++Iter) {
+    Funcs->push_back(Iter->func);
+  }
+}
+
+static void defineFuncArray(Module &M, const char *LlvmArrayName,
+                            const char *StartSymbol,
+                            const char *EndSymbol) {
+  std::vector<Constant*> Funcs;
+
+  GlobalVariable *Array = M.getNamedGlobal(LlvmArrayName);
+  if (Array) {
+    readFuncList(Array, &Funcs);
+    // No code should be referencing global_ctors/global_dtors,
+    // because this symbol is internal to LLVM.
+    Array->eraseFromParent();
+  }
+
+  Type *FuncTy = FunctionType::get(Type::getVoidTy(M.getContext()), false);
+  Type *FuncPtrTy = FuncTy->getPointerTo();
+  ArrayType *ArrayTy = ArrayType::get(FuncPtrTy, Funcs.size());
+  GlobalVariable *NewArray =
+      new GlobalVariable(M, ArrayTy, /* isConstant= */ true,
+                         GlobalValue::InternalLinkage,
+                         ConstantArray::get(ArrayTy, Funcs));
+  setGlobalVariableValue(M, StartSymbol, NewArray);
+  // We do this last so that LLVM gives NewArray the name
+  // "__{init,fini}_array_start" without adding any suffixes to
+  // disambiguate from the original GlobalVariable's name.  This is
+  // not essential -- it just makes the output easier to understand
+  // when looking at symbols for debugging.
+  NewArray->setName(StartSymbol);
+
+  // We replace "__{init,fini}_array_end" with the address of the end
+  // of NewArray.  This removes the name "__{init,fini}_array_end"
+  // from the output, which is not ideal for debugging.  Ideally we
+  // would convert "__{init,fini}_array_end" to being a GlobalAlias
+  // that points to the end of the array.  However, unfortunately LLVM
+  // does not generate correct code when a GlobalAlias contains a
+  // GetElementPtr ConstantExpr.
+  Constant *NewArrayEnd =
+      ConstantExpr::getGetElementPtr(NewArray,
+                                     ConstantInt::get(M.getContext(),
+                                                      APInt(32, 1)));
+  setGlobalVariableValue(M, EndSymbol, NewArrayEnd);
+}
+
+bool ExpandCtors::runOnModule(Module &M) {
+  defineFuncArray(M, "llvm.global_ctors",
+                  "__init_array_start", "__init_array_end");
+  defineFuncArray(M, "llvm.global_dtors",
+                  "__fini_array_start", "__fini_array_end");
+  return true;
+}
+
+ModulePass *llvm::createExpandCtorsPass() {
+  return new ExpandCtors();
+}
diff --git a/lib/Transforms/NaCl/ExpandGetElementPtr.cpp b/lib/Transforms/NaCl/ExpandGetElementPtr.cpp
new file mode 100644
index 000000000000..8cc1501d8bfd
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandGetElementPtr.cpp
@@ -0,0 +1,148 @@
+//===- ExpandGetElementPtr.cpp - Expand GetElementPtr into arithmetic------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands out GetElementPtr instructions into ptrtoint,
+// inttoptr and arithmetic instructions.
+//
+// This simplifies the language so that the PNaCl translator does not
+// need to handle GetElementPtr and struct types as part of a stable
+// wire format for PNaCl.
+//
+// Note that we drop the "inbounds" attribute of GetElementPtr.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  class ExpandGetElementPtr : public BasicBlockPass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ExpandGetElementPtr() : BasicBlockPass(ID) {
+      initializeExpandGetElementPtrPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnBasicBlock(BasicBlock &BB);
+  };
+}
+
+char ExpandGetElementPtr::ID = 0;
+INITIALIZE_PASS(ExpandGetElementPtr, "expand-getelementptr",
+                "Expand out GetElementPtr instructions into arithmetic",
+                false, false)
+
+static Value *CastToPtrSize(Value *Val, Instruction *InsertPt,
+                            Type *PtrType) {
+  unsigned ValSize = Val->getType()->getIntegerBitWidth();
+  unsigned PtrSize = PtrType->getIntegerBitWidth();
+  if (ValSize == PtrSize)
+    return Val;
+  Instruction *Inst;
+  if (ValSize > PtrSize) {
+    Inst = new TruncInst(Val, PtrType, "gep_trunc", InsertPt);
+  } else {
+    // GEP indexes must be sign-extended.
+    Inst = new SExtInst(Val, PtrType, "gep_sext", InsertPt);
+  }
+  return CopyDebug(Inst, InsertPt);
+}
+
+static void FlushOffset(Instruction **Ptr, uint64_t *CurrentOffset,
+                        Instruction *InsertPt, Type *PtrType) {
+  if (*CurrentOffset) {
+    *Ptr = BinaryOperator::Create(Instruction::Add, *Ptr,
+                                  ConstantInt::get(PtrType, *CurrentOffset),
+                                  "gep", InsertPt);
+    CopyDebug(*Ptr, InsertPt);
+    *CurrentOffset = 0;
+  }
+}
+
+static void ExpandGEP(GetElementPtrInst *GEP, DataLayout *DL, Type *PtrType) {
+  Instruction *Ptr = new PtrToIntInst(GEP->getPointerOperand(), PtrType,
+                                      "gep_int", GEP);
+  CopyDebug(Ptr, GEP);
+
+  Type *CurrentTy = GEP->getPointerOperand()->getType();
+  // We do some limited constant folding ourselves.  An alternative
+  // would be to generate verbose, unfolded output (e.g. multiple
+  // adds; adds of zero constants) and use a later pass such as
+  // "-instcombine" to clean that up.  However, "-instcombine" can
+  // reintroduce GetElementPtr instructions.
+  uint64_t CurrentOffset = 0;
+
+  for (GetElementPtrInst::op_iterator Op = GEP->op_begin() + 1;
+       Op != GEP->op_end();
+       ++Op) {
+    Value *Index = *Op;
+    if (StructType *StTy = dyn_cast<StructType>(CurrentTy)) {
+      uint64_t Field = cast<ConstantInt>(Op)->getZExtValue();
+      CurrentTy = StTy->getElementType(Field);
+      CurrentOffset += DL->getStructLayout(StTy)->getElementOffset(Field);
+    } else {
+      CurrentTy = cast<SequentialType>(CurrentTy)->getElementType();
+      uint64_t ElementSize = DL->getTypeAllocSize(CurrentTy);
+      if (ConstantInt *C = dyn_cast<ConstantInt>(Index)) {
+        CurrentOffset += C->getSExtValue() * ElementSize;
+      } else {
+        FlushOffset(&Ptr, &CurrentOffset, GEP, PtrType);
+        Index = CastToPtrSize(Index, GEP, PtrType);
+        if (ElementSize != 1) {
+          Index = CopyDebug(
+              BinaryOperator::Create(Instruction::Mul, Index,
+                                     ConstantInt::get(PtrType, ElementSize),
+                                     "gep_array", GEP),
+              GEP);
+        }
+        Ptr = BinaryOperator::Create(Instruction::Add, Ptr,
+                                     Index, "gep", GEP);
+        CopyDebug(Ptr, GEP);
+      }
+    }
+  }
+  FlushOffset(&Ptr, &CurrentOffset, GEP, PtrType);
+
+  assert(CurrentTy == GEP->getType()->getElementType());
+  Instruction *Result = new IntToPtrInst(Ptr, GEP->getType(), "", GEP);
+  CopyDebug(Result, GEP);
+  Result->takeName(GEP);
+  GEP->replaceAllUsesWith(Result);
+  GEP->eraseFromParent();
+}
+
+bool ExpandGetElementPtr::runOnBasicBlock(BasicBlock &BB) {
+  bool Modified = false;
+  DataLayout DL(BB.getParent()->getParent());
+  Type *PtrType = DL.getIntPtrType(BB.getContext());
+
+  for (BasicBlock::InstListType::iterator Iter = BB.begin();
+       Iter != BB.end(); ) {
+    Instruction *Inst = Iter++;
+    if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+      Modified = true;
+      ExpandGEP(GEP, &DL, PtrType);
+    }
+  }
+  return Modified;
+}
+
+BasicBlockPass *llvm::createExpandGetElementPtrPass() {
+  return new ExpandGetElementPtr();
+}
diff --git a/lib/Transforms/NaCl/ExpandSmallArguments.cpp b/lib/Transforms/NaCl/ExpandSmallArguments.cpp
new file mode 100644
index 000000000000..c8a321edb944
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandSmallArguments.cpp
@@ -0,0 +1,217 @@
+//===- ExpandSmallArguments.cpp - Expand out arguments smaller than i32----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LLVM IR allows function return types and argument types such as
+// "zeroext i8" and "signext i8".  The Language Reference says that
+// zeroext "indicates to the code generator that the parameter or
+// return value should be zero-extended to the extent required by the
+// target's ABI (which is usually 32-bits, but is 8-bits for a i1 on
+// x86-64) by the caller (for a parameter) or the callee (for a return
+// value)".
+//
+// This can lead to non-portable behaviour when calling functions
+// without C prototypes or with wrong C prototypes.
+//
+// In order to remove this non-portability from PNaCl, and to simplify
+// the language that the PNaCl translator accepts, the
+// ExpandSmallArguments pass widens integer arguments and return types
+// to be at least 32 bits.  The pass inserts explicit cast
+// instructions (ZExtInst/SExtInst/TruncInst) as needed.
+//
+// The pass chooses between ZExtInst and SExtInst widening based on
+// whether a "signext" attribute is present.  However, in principle
+// the pass could always use zero-extension, because the extent to
+// which either zero-extension or sign-extension is done is up to the
+// target ABI, which is up to PNaCl to specify.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  // This is a ModulePass because the pass recreates functions in
+  // order to change their arguments' types.
+  class ExpandSmallArguments : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ExpandSmallArguments() : ModulePass(ID) {
+      initializeExpandSmallArgumentsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+char ExpandSmallArguments::ID = 0;
+INITIALIZE_PASS(ExpandSmallArguments, "expand-small-arguments",
+                "Expand function arguments to be at least 32 bits in size",
+                false, false)
+
+// Returns the normalized version of the given argument/return type.
+static Type *NormalizeType(Type *Ty) {
+  if (IntegerType *IntTy = dyn_cast<IntegerType>(Ty)) {
+    if (IntTy->getBitWidth() < 32) {
+      return IntegerType::get(Ty->getContext(), 32);
+    }
+  }
+  return Ty;
+}
+
+// Returns the normalized version of the given function type.
+static FunctionType *NormalizeFunctionType(FunctionType *FTy) {
+  if (FTy->isVarArg()) {
+    report_fatal_error(
+        "ExpandSmallArguments does not handle varargs functions");
+  }
+  SmallVector<Type *, 8> ArgTypes;
+  for (unsigned I = 0; I < FTy->getNumParams(); ++I) {
+    ArgTypes.push_back(NormalizeType(FTy->getParamType(I)));
+  }
+  return FunctionType::get(NormalizeType(FTy->getReturnType()),
+                           ArgTypes, false);
+}
+
+// Convert the given function to use normalized argument/return types.
+static bool ConvertFunction(Function *Func) {
+  FunctionType *FTy = Func->getFunctionType();
+  FunctionType *NFTy = NormalizeFunctionType(FTy);
+  if (NFTy == FTy)
+    return false; // No change needed.
+  Function *NewFunc = RecreateFunction(Func, NFTy);
+
+  // Move the arguments across to the new function.
+  for (Function::arg_iterator Arg = Func->arg_begin(), E = Func->arg_end(),
+         NewArg = NewFunc->arg_begin();
+       Arg != E; ++Arg, ++NewArg) {
+    NewArg->takeName(Arg);
+    if (Arg->getType() == NewArg->getType()) {
+      Arg->replaceAllUsesWith(NewArg);
+    } else {
+      Instruction *Trunc = new TruncInst(
+          NewArg, Arg->getType(), NewArg->getName() + ".arg_trunc",
+          NewFunc->getEntryBlock().getFirstInsertionPt());
+      Arg->replaceAllUsesWith(Trunc);
+    }
+  }
+
+  if (FTy->getReturnType() != NFTy->getReturnType()) {
+    // Fix up return instructions.
+    Instruction::CastOps CastType =
+        Func->getAttributes().hasAttribute(0, Attribute::SExt) ?
+        Instruction::SExt : Instruction::ZExt;
+    for (Function::iterator BB = NewFunc->begin(), E = NewFunc->end();
+         BB != E;
+         ++BB) {
+      for (BasicBlock::iterator Iter = BB->begin(), E = BB->end();
+           Iter != E; ) {
+        Instruction *Inst = Iter++;
+        if (ReturnInst *Ret = dyn_cast<ReturnInst>(Inst)) {
+          Value *Ext = CopyDebug(
+              CastInst::Create(CastType, Ret->getReturnValue(),
+                               NFTy->getReturnType(),
+                               Ret->getReturnValue()->getName() + ".ret_ext",
+                               Ret),
+              Ret);
+          CopyDebug(ReturnInst::Create(Ret->getContext(), Ext, Ret), Ret);
+          Ret->eraseFromParent();
+        }
+      }
+    }
+  }
+
+  Func->eraseFromParent();
+  return true;
+}
+
+// Convert the given call to use normalized argument/return types.
+static bool ConvertCall(CallInst *Call) {
+  // Don't try to change calls to intrinsics.
+  if (isa<IntrinsicInst>(Call))
+    return false;
+  FunctionType *FTy = cast<FunctionType>(
+      Call->getCalledValue()->getType()->getPointerElementType());
+  FunctionType *NFTy = NormalizeFunctionType(FTy);
+  if (NFTy == FTy)
+    return false; // No change needed.
+
+  // Convert arguments.
+  SmallVector<Value *, 8> Args;
+  for (unsigned I = 0; I < Call->getNumArgOperands(); ++I) {
+    Value *Arg = Call->getArgOperand(I);
+    if (NFTy->getParamType(I) != FTy->getParamType(I)) {
+      Instruction::CastOps CastType =
+          Call->getAttributes().hasAttribute(I + 1, Attribute::SExt) ?
+          Instruction::SExt : Instruction::ZExt;
+      Arg = CopyDebug(CastInst::Create(CastType, Arg, NFTy->getParamType(I),
+                                       "arg_ext", Call), Call);
+    }
+    Args.push_back(Arg);
+  }
+  Value *CastFunc =
+    CopyDebug(new BitCastInst(Call->getCalledValue(), NFTy->getPointerTo(),
+                              Call->getName() + ".arg_cast", Call), Call);
+  CallInst *NewCall = CallInst::Create(CastFunc, Args, "", Call);
+  CopyDebug(NewCall, Call);
+  NewCall->takeName(Call);
+  NewCall->setAttributes(Call->getAttributes());
+  NewCall->setCallingConv(Call->getCallingConv());
+  NewCall->setTailCall(Call->isTailCall());
+  Value *Result = NewCall;
+  if (FTy->getReturnType() != NFTy->getReturnType()) {
+    Result = CopyDebug(new TruncInst(NewCall, FTy->getReturnType(),
+                                     NewCall->getName() + ".ret_trunc",
+                                     Call), Call);
+  }
+  Call->replaceAllUsesWith(Result);
+  Call->eraseFromParent();
+  return true;
+}
+
+bool ExpandSmallArguments::runOnModule(Module &M) {
+  bool Changed = false;
+  for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E; ) {
+    Function *Func = Iter++;
+    // Don't try to change intrinsic declarations because intrinsics
+    // will continue to have non-normalized argument types.  For
+    // example, memset() takes an i8 argument.  It shouldn't matter
+    // whether we modify the types of other function declarations, but
+    // we don't expect to see non-intrinsic function declarations in a
+    // PNaCl pexe.
+    if (Func->empty())
+      continue;
+
+    for (Function::iterator BB = Func->begin(), E = Func->end();
+         BB != E; ++BB) {
+      for (BasicBlock::iterator Iter = BB->begin(), E = BB->end();
+           Iter != E; ) {
+        Instruction *Inst = Iter++;
+        if (CallInst *Call = dyn_cast<CallInst>(Inst)) {
+          Changed |= ConvertCall(Call);
+        } else if (isa<InvokeInst>(Inst)) {
+          report_fatal_error(
+              "ExpandSmallArguments does not handle invoke instructions");
+        }
+      }
+    }
+
+    Changed |= ConvertFunction(Func);
+  }
+  return Changed;
+}
+
+ModulePass *llvm::createExpandSmallArgumentsPass() {
+  return new ExpandSmallArguments();
+}
diff --git a/lib/Transforms/NaCl/ExpandStructRegs.cpp b/lib/Transforms/NaCl/ExpandStructRegs.cpp
new file mode 100644
index 000000000000..73a584ac614a
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandStructRegs.cpp
@@ -0,0 +1,297 @@
+//===- ExpandStructRegs.cpp - Expand out variables with struct type--------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands out some uses of LLVM variables
+// (a.k.a. registers) of struct type.  It replaces loads and stores of
+// structs with separate loads and stores of the structs' fields.  The
+// motivation is to omit struct types from PNaCl's stable ABI.
+//
+// ExpandStructRegs does not yet handle all possible uses of struct
+// values.  It is intended to handle the uses that Clang and the SROA
+// pass generate.  Clang generates struct loads and stores, along with
+// extractvalue instructions, in its implementation of C++ method
+// pointers, and the SROA pass sometimes converts this code to using
+// insertvalue instructions too.
+//
+// ExpandStructRegs does not handle:
+//
+//  * Nested struct types.
+//  * Array types.
+//  * Function types containing arguments or return values of struct
+//    type without the "byval" or "sret" attributes.  Since by-value
+//    struct-passing generally uses "byval"/"sret", this does not
+//    matter.
+//
+// Other limitations:
+//
+//  * ExpandStructRegs does not attempt to use memcpy() where that
+//    might be more appropriate than copying fields individually.
+//  * ExpandStructRegs does not preserve the contents of padding
+//    between fields when copying structs.  However, the contents of
+//    padding fields are not defined anyway.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  struct ExpandStructRegs : public FunctionPass {
+    static char ID; // Pass identification, replacement for typeid
+    ExpandStructRegs() : FunctionPass(ID) {
+      initializeExpandStructRegsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnFunction(Function &F);
+  };
+}
+
+char ExpandStructRegs::ID = 0;
+INITIALIZE_PASS(ExpandStructRegs, "expand-struct-regs",
+                "Expand out variables with struct types", false, false)
+
+static void SplitUpPHINode(PHINode *Phi) {
+  StructType *STy = cast<StructType>(Phi->getType());
+
+  Value *NewStruct = UndefValue::get(STy);
+  Instruction *NewStructInsertPt = Phi->getParent()->getFirstInsertionPt();
+
+  // Create a separate PHINode for each struct field.
+  for (unsigned Index = 0; Index < STy->getNumElements(); ++Index) {
+    SmallVector<unsigned, 1> EVIndexes;
+    EVIndexes.push_back(Index);
+
+    PHINode *NewPhi = PHINode::Create(
+        STy->getElementType(Index), Phi->getNumIncomingValues(),
+        Phi->getName() + ".index", Phi);
+    CopyDebug(NewPhi, Phi);
+    for (unsigned PhiIndex = 0; PhiIndex < Phi->getNumIncomingValues();
+         ++PhiIndex) {
+      BasicBlock *IncomingBB = Phi->getIncomingBlock(PhiIndex);
+      Value *EV = CopyDebug(
+          ExtractValueInst::Create(
+              Phi->getIncomingValue(PhiIndex), EVIndexes,
+              Phi->getName() + ".extract", IncomingBB->getTerminator()), Phi);
+      NewPhi->addIncoming(EV, IncomingBB);
+    }
+
+    // Reconstruct the original struct value.
+    NewStruct = CopyDebug(
+        InsertValueInst::Create(NewStruct, NewPhi, EVIndexes,
+                                Phi->getName() + ".insert", NewStructInsertPt),
+        Phi);
+  }
+  Phi->replaceAllUsesWith(NewStruct);
+  Phi->eraseFromParent();
+}
+
+static void SplitUpSelect(SelectInst *Select) {
+  StructType *STy = cast<StructType>(Select->getType());
+  Value *NewStruct = UndefValue::get(STy);
+
+  // Create a separate SelectInst for each struct field.
+  for (unsigned Index = 0; Index < STy->getNumElements(); ++Index) {
+    SmallVector<unsigned, 1> EVIndexes;
+    EVIndexes.push_back(Index);
+
+    Value *TrueVal = CopyDebug(
+        ExtractValueInst::Create(Select->getTrueValue(), EVIndexes,
+                                 Select->getName() + ".extract", Select),
+        Select);
+    Value *FalseVal = CopyDebug(
+        ExtractValueInst::Create(Select->getFalseValue(), EVIndexes,
+                                 Select->getName() + ".extract", Select),
+        Select);
+    Value *NewSelect = CopyDebug(
+        SelectInst::Create(Select->getCondition(), TrueVal, FalseVal,
+                           Select->getName() + ".index", Select),
+        Select);
+
+    // Reconstruct the original struct value.
+    NewStruct = CopyDebug(
+        InsertValueInst::Create(NewStruct, NewSelect, EVIndexes,
+                                Select->getName() + ".insert", Select),
+        Select);
+  }
+  Select->replaceAllUsesWith(NewStruct);
+  Select->eraseFromParent();
+}
+
+template <class InstType>
+static void ProcessLoadOrStoreAttrs(InstType *Dest, InstType *Src) {
+  CopyDebug(Dest, Src);
+  Dest->setVolatile(Src->isVolatile());
+  if (Src->isAtomic()) {
+    errs() << "Use: " << *Src << "\n";
+    report_fatal_error("Atomic struct loads/stores not supported");
+  }
+  // Make a pessimistic assumption about alignment.  Preserving
+  // alignment information here is tricky and is not really desirable
+  // for PNaCl because mistakes here could lead to non-portable
+  // behaviour.
+  Dest->setAlignment(1);
+}
+
+static void SplitUpStore(StoreInst *Store) {
+  StructType *STy = cast<StructType>(Store->getValueOperand()->getType());
+  // Create a separate store instruction for each struct field.
+  for (unsigned Index = 0; Index < STy->getNumElements(); ++Index) {
+    SmallVector<Value *, 2> Indexes;
+    Indexes.push_back(ConstantInt::get(Store->getContext(), APInt(32, 0)));
+    Indexes.push_back(ConstantInt::get(Store->getContext(), APInt(32, Index)));
+    Value *GEP = CopyDebug(GetElementPtrInst::Create(
+                               Store->getPointerOperand(), Indexes,
+                               Store->getPointerOperand()->getName() + ".index",
+                               Store), Store);
+    SmallVector<unsigned, 1> EVIndexes;
+    EVIndexes.push_back(Index);
+    Value *Field = CopyDebug(ExtractValueInst::Create(Store->getValueOperand(),
+                                                      EVIndexes, "", Store),
+                             Store);
+    StoreInst *NewStore = CopyDebug(new StoreInst(Field, GEP, Store), Store);
+    ProcessLoadOrStoreAttrs(NewStore, Store);
+  }
+  Store->eraseFromParent();
+}
+
+static void SplitUpLoad(LoadInst *Load) {
+  StructType *STy = cast<StructType>(Load->getType());
+  Value *NewStruct = UndefValue::get(STy);
+
+  // Create a separate load instruction for each struct field.
+  for (unsigned Index = 0; Index < STy->getNumElements(); ++Index) {
+    SmallVector<Value *, 2> Indexes;
+    Indexes.push_back(ConstantInt::get(Load->getContext(), APInt(32, 0)));
+    Indexes.push_back(ConstantInt::get(Load->getContext(), APInt(32, Index)));
+    Value *GEP = CopyDebug(
+        GetElementPtrInst::Create(Load->getPointerOperand(), Indexes,
+                                  Load->getName() + ".index", Load), Load);
+    LoadInst *NewLoad = new LoadInst(GEP, Load->getName() + ".field", Load);
+    ProcessLoadOrStoreAttrs(NewLoad, Load);
+
+    // Reconstruct the struct value.
+    SmallVector<unsigned, 1> EVIndexes;
+    EVIndexes.push_back(Index);
+    NewStruct = CopyDebug(
+        InsertValueInst::Create(NewStruct, NewLoad, EVIndexes,
+                                Load->getName() + ".insert", Load), Load);
+  }
+  Load->replaceAllUsesWith(NewStruct);
+  Load->eraseFromParent();
+}
+
+static void ExpandExtractValue(ExtractValueInst *EV) {
+  // Search for the insertvalue instruction that inserts the struct
+  // field referenced by this extractvalue instruction.
+  Value *StructVal = EV->getAggregateOperand();
+  Value *ResultField = NULL;
+  for (;;) {
+    if (InsertValueInst *IV = dyn_cast<InsertValueInst>(StructVal)) {
+      if (EV->getIndices().equals(IV->getIndices())) {
+        ResultField = IV->getInsertedValueOperand();
+        break;
+      }
+      // No match.  Try the next struct value in the chain.
+      StructVal = IV->getAggregateOperand();
+    } else if (Constant *C = dyn_cast<Constant>(StructVal)) {
+      ResultField = ConstantExpr::getExtractValue(C, EV->getIndices());
+      break;
+    } else {
+      errs() << "Value: " << *StructVal << "\n";
+      report_fatal_error("Unrecognized struct value");
+    }
+  }
+
+  assert(ResultField != NULL);
+  EV->replaceAllUsesWith(ResultField);
+  EV->eraseFromParent();
+}
+
+bool ExpandStructRegs::runOnFunction(Function &Func) {
+  bool Changed = false;
+
+  // Split up aggregate loads, stores and phi nodes into operations on
+  // scalar types.  This inserts extractvalue and insertvalue
+  // instructions which we will expand out later.
+  for (Function::iterator BB = Func.begin(), E = Func.end();
+       BB != E; ++BB) {
+    for (BasicBlock::iterator Iter = BB->begin(), E = BB->end();
+         Iter != E; ) {
+      Instruction *Inst = Iter++;
+      if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
+        if (Store->getValueOperand()->getType()->isStructTy()) {
+          SplitUpStore(Store);
+          Changed = true;
+        }
+      } else if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
+        if (Load->getType()->isStructTy()) {
+          SplitUpLoad(Load);
+          Changed = true;
+        }
+      } else if (PHINode *Phi = dyn_cast<PHINode>(Inst)) {
+        if (Phi->getType()->isStructTy()) {
+          SplitUpPHINode(Phi);
+          Changed = true;
+        }
+      } else if (SelectInst *Select = dyn_cast<SelectInst>(Inst)) {
+        if (Select->getType()->isStructTy()) {
+          SplitUpSelect(Select);
+          Changed = true;
+        }
+      }
+    }
+  }
+
+  // Expand out all the extractvalue instructions.  Also collect up
+  // the insertvalue instructions for later deletion so that we do not
+  // need to make extra passes across the whole function.
+  SmallVector<Instruction *, 10> ToErase;
+  for (Function::iterator BB = Func.begin(), E = Func.end();
+       BB != E; ++BB) {
+    for (BasicBlock::iterator Iter = BB->begin(), E = BB->end();
+         Iter != E; ) {
+      Instruction *Inst = Iter++;
+      if (ExtractValueInst *EV = dyn_cast<ExtractValueInst>(Inst)) {
+        ExpandExtractValue(EV);
+        Changed = true;
+      } else if (isa<InsertValueInst>(Inst)) {
+        ToErase.push_back(Inst);
+        Changed = true;
+      }
+    }
+  }
+  // Delete the insertvalue instructions.  These can reference each
+  // other, so we must do dropAllReferences() before doing
+  // eraseFromParent(), otherwise we will try to erase instructions
+  // that are still referenced.
+  for (SmallVectorImpl<Instruction *>::iterator I = ToErase.begin(),
+           E = ToErase.end();
+       I != E; ++I) {
+    (*I)->dropAllReferences();
+  }
+  for (SmallVectorImpl<Instruction *>::iterator I = ToErase.begin(),
+           E = ToErase.end();
+       I != E; ++I) {
+    (*I)->eraseFromParent();
+  }
+  return Changed;
+}
+
+FunctionPass *llvm::createExpandStructRegsPass() {
+  return new ExpandStructRegs();
+}
diff --git a/lib/Transforms/NaCl/ExpandTls.cpp b/lib/Transforms/NaCl/ExpandTls.cpp
new file mode 100644
index 000000000000..19837f744876
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandTls.cpp
@@ -0,0 +1,334 @@
+//===- ExpandTls.cpp - Convert TLS variables to a concrete layout----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands out uses of thread-local (TLS) variables into
+// more primitive operations.
+//
+// A reference to the address of a TLS variable is expanded into code
+// which gets the current thread's thread pointer using
+// @llvm.nacl.read.tp() and adds a fixed offset.
+//
+// This pass allocates the offsets (relative to the thread pointer)
+// that will be used for TLS variables.  It sets up the global
+// variables __tls_template_start, __tls_template_end etc. to contain
+// a template for initializing TLS variables' values for each thread.
+// This is a task normally performed by the linker in ELF systems.
+//
+//===----------------------------------------------------------------------===//
+
+#include <vector>
+
+#include "llvm/Pass.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  struct VarInfo {
+    GlobalVariable *TlsVar;
+    bool IsBss; // Whether variable is in zero-intialized part of template
+    int TemplateIndex;
+  };
+
+  class PassState {
+  public:
+    PassState(Module *M): M(M), DL(M), Offset(0), Alignment(1) {}
+
+    Module *M;
+    DataLayout DL;
+    uint64_t Offset;
+    // 'Alignment' is the maximum variable alignment seen so far, in
+    // bytes.  After visiting all TLS variables, this is the overall
+    // alignment required for the TLS template.
+    uint32_t Alignment;
+  };
+
+  class ExpandTls : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ExpandTls() : ModulePass(ID) {
+      initializeExpandTlsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+char ExpandTls::ID = 0;
+INITIALIZE_PASS(ExpandTls, "nacl-expand-tls",
+                "Expand out TLS variables and fix TLS variable layout",
+                false, false)
+
+static void setGlobalVariableValue(Module &M, const char *Name,
+                                   Constant *Value) {
+  GlobalVariable *Var = M.getNamedGlobal(Name);
+  if (!Var) {
+    // This warning can happen in a program that does not use a libc
+    // and does not initialize TLS variables.  Such a program might be
+    // linked with "-nostdlib".
+    errs() << "Warning: Variable " << Name << " not referenced\n";
+  } else {
+    if (Var->hasInitializer()) {
+      report_fatal_error(std::string("Variable ") + Name +
+                         " already has an initializer");
+    }
+    Var->replaceAllUsesWith(ConstantExpr::getBitCast(Value, Var->getType()));
+    Var->eraseFromParent();
+  }
+}
+
+// Insert alignment padding into the TLS template.
+static void padToAlignment(PassState *State,
+                           std::vector<Type*> *FieldTypes,
+                           std::vector<Constant*> *FieldValues,
+                           unsigned Alignment) {
+  if ((State->Offset & (Alignment - 1)) != 0) {
+    unsigned PadSize = Alignment - (State->Offset & (Alignment - 1));
+    Type *i8 = Type::getInt8Ty(State->M->getContext());
+    Type *PadType = ArrayType::get(i8, PadSize);
+    FieldTypes->push_back(PadType);
+    if (FieldValues)
+      FieldValues->push_back(Constant::getNullValue(PadType));
+    State->Offset += PadSize;
+  }
+  if (State->Alignment < Alignment) {
+    State->Alignment = Alignment;
+  }
+}
+
+static void addVarToTlsTemplate(PassState *State,
+                                std::vector<Type*> *FieldTypes,
+                                std::vector<Constant*> *FieldValues,
+                                GlobalVariable *TlsVar) {
+  unsigned Alignment = State->DL.getPreferredAlignment(TlsVar);
+  padToAlignment(State, FieldTypes, FieldValues, Alignment);
+
+  FieldTypes->push_back(TlsVar->getType()->getElementType());
+  if (FieldValues)
+    FieldValues->push_back(TlsVar->getInitializer());
+  State->Offset +=
+      State->DL.getTypeAllocSize(TlsVar->getType()->getElementType());
+}
+
+static PointerType *buildTlsTemplate(Module &M, std::vector<VarInfo> *TlsVars) {
+  std::vector<Type*> FieldBssTypes;
+  std::vector<Type*> FieldInitTypes;
+  std::vector<Constant*> FieldInitValues;
+  PassState State(&M);
+
+  for (Module::global_iterator GV = M.global_begin();
+       GV != M.global_end();
+       ++GV) {
+    if (GV->isThreadLocal()) {
+      if (!GV->hasInitializer()) {
+        // Since this is a whole-program transformation, "extern" TLS
+        // variables are not allowed at this point.
+        report_fatal_error(std::string("TLS variable without an initializer: ")
+                           + GV->getName());
+      }
+      if (!GV->getInitializer()->isNullValue()) {
+        addVarToTlsTemplate(&State, &FieldInitTypes,
+                            &FieldInitValues, GV);
+        VarInfo Info;
+        Info.TlsVar = GV;
+        Info.IsBss = false;
+        Info.TemplateIndex = FieldInitTypes.size() - 1;
+        TlsVars->push_back(Info);
+      }
+    }
+  }
+  // Handle zero-initialized TLS variables in a second pass, because
+  // these should follow non-zero-initialized TLS variables.
+  for (Module::global_iterator GV = M.global_begin();
+       GV != M.global_end();
+       ++GV) {
+    if (GV->isThreadLocal() && GV->getInitializer()->isNullValue()) {
+      addVarToTlsTemplate(&State, &FieldBssTypes, NULL, GV);
+      VarInfo Info;
+      Info.TlsVar = GV;
+      Info.IsBss = true;
+      Info.TemplateIndex = FieldBssTypes.size() - 1;
+      TlsVars->push_back(Info);
+    }
+  }
+  // Add final alignment padding so that
+  //   (struct tls_struct *) __nacl_read_tp() - 1
+  // gives the correct, aligned start of the TLS variables given the
+  // x86-style layout we are using.  This requires some more bytes to
+  // be memset() to zero at runtime.  This wastage doesn't seem
+  // important gives that we're not trying to optimize packing by
+  // reordering to put similarly-aligned variables together.
+  padToAlignment(&State, &FieldBssTypes, NULL, State.Alignment);
+
+  // We create the TLS template structs as "packed" because we insert
+  // alignment padding ourselves, and LLVM's implicit insertion of
+  // padding would interfere with ours.  tls_bss_template can start at
+  // a non-aligned address immediately following the last field in
+  // tls_init_template.
+  StructType *InitTemplateType =
+      StructType::create(M.getContext(), "tls_init_template");
+  InitTemplateType->setBody(FieldInitTypes, /*isPacked=*/true);
+  StructType *BssTemplateType =
+      StructType::create(M.getContext(), "tls_bss_template");
+  BssTemplateType->setBody(FieldBssTypes, /*isPacked=*/true);
+
+  StructType *TemplateType = StructType::create(M.getContext(), "tls_struct");
+  SmallVector<Type*, 2> TemplateTopFields;
+  TemplateTopFields.push_back(InitTemplateType);
+  TemplateTopFields.push_back(BssTemplateType);
+  TemplateType->setBody(TemplateTopFields, /*isPacked=*/true);
+  PointerType *TemplatePtrType = PointerType::get(TemplateType, 0);
+
+  // We define the following symbols, which are the same as those
+  // defined by NaCl's original customized binutils linker scripts:
+  //   __tls_template_start
+  //   __tls_template_tdata_end
+  //   __tls_template_end
+  // We also define __tls_template_alignment, which was not defined by
+  // the original linker scripts.
+
+  const char *StartSymbol = "__tls_template_start";
+  Constant *TemplateData = ConstantStruct::get(InitTemplateType,
+                                               FieldInitValues);
+  GlobalVariable *TemplateDataVar =
+      new GlobalVariable(M, InitTemplateType, /*isConstant=*/true,
+                         GlobalValue::InternalLinkage, TemplateData);
+  setGlobalVariableValue(M, StartSymbol, TemplateDataVar);
+  TemplateDataVar->setName(StartSymbol);
+
+  Constant *TdataEnd = ConstantExpr::getGetElementPtr(
+      TemplateDataVar,
+      ConstantInt::get(M.getContext(), APInt(32, 1)));
+  setGlobalVariableValue(M, "__tls_template_tdata_end", TdataEnd);
+
+  Constant *TotalEnd = ConstantExpr::getGetElementPtr(
+      ConstantExpr::getBitCast(TemplateDataVar, TemplatePtrType),
+      ConstantInt::get(M.getContext(), APInt(32, 1)));
+  setGlobalVariableValue(M, "__tls_template_end", TotalEnd);
+
+  const char *AlignmentSymbol = "__tls_template_alignment";
+  Type *i32 = Type::getInt32Ty(M.getContext());
+  GlobalVariable *AlignmentVar = new GlobalVariable(
+      M, i32, /*isConstant=*/true,
+      GlobalValue::InternalLinkage,
+      ConstantInt::get(M.getContext(), APInt(32, State.Alignment)));
+  setGlobalVariableValue(M, AlignmentSymbol, AlignmentVar);
+  AlignmentVar->setName(AlignmentSymbol);
+
+  return TemplatePtrType;
+}
+
+static void rewriteTlsVars(Module &M, std::vector<VarInfo> *TlsVars,
+                           PointerType *TemplatePtrType) {
+  // Set up the intrinsic that reads the thread pointer.
+  Function *ReadTpFunc = Intrinsic::getDeclaration(&M, Intrinsic::nacl_read_tp);
+
+  for (std::vector<VarInfo>::iterator VarInfo = TlsVars->begin();
+       VarInfo != TlsVars->end();
+       ++VarInfo) {
+    GlobalVariable *Var = VarInfo->TlsVar;
+    while (!Var->use_empty()) {
+      Use *U = &Var->use_begin().getUse();
+      Instruction *InsertPt = PhiSafeInsertPt(U);
+      Value *RawThreadPtr = CallInst::Create(ReadTpFunc, "tls_raw", InsertPt);
+      Value *TypedThreadPtr = new BitCastInst(RawThreadPtr, TemplatePtrType,
+                                              "tls_struct", InsertPt);
+      SmallVector<Value*, 3> Indexes;
+      // We use -1 because we use the x86-style TLS layout in which
+      // the TLS data is stored at addresses below the thread pointer.
+      // This is largely because a check in nacl_irt_thread_create()
+      // in irt/irt_thread.c requires the thread pointer to be a
+      // self-pointer on x86-32.
+      // TODO(mseaborn): I intend to remove that check because it is
+      // non-portable.  In the mean time, we want PNaCl pexes to work
+      // in older Chromium releases when translated to nexes.
+      Indexes.push_back(ConstantInt::get(
+          M.getContext(), APInt(32, -1)));
+      Indexes.push_back(ConstantInt::get(
+          M.getContext(), APInt(32, VarInfo->IsBss ? 1 : 0)));
+      Indexes.push_back(ConstantInt::get(
+          M.getContext(), APInt(32, VarInfo->TemplateIndex)));
+      Value *TlsField = GetElementPtrInst::Create(TypedThreadPtr, Indexes,
+                                                  "field", InsertPt);
+      PhiSafeReplaceUses(U, TlsField);
+    }
+    VarInfo->TlsVar->eraseFromParent();
+  }
+}
+
+// Provide fixed definitions for PNaCl's TLS layout intrinsics.  We
+// adopt the x86-style layout: ExpandTls will output a program that
+// uses the x86-style layout wherever it runs.  This overrides any
+// architecture-specific definitions of the intrinsics that the LLVM
+// backend might provide.
+static void defineTlsLayoutIntrinsics(Module &M) {
+  Type *i32 = Type::getInt32Ty(M.getContext());
+  SmallVector<Type*, 1> ArgTypes;
+  ArgTypes.push_back(i32);
+  FunctionType *FuncType = FunctionType::get(i32, ArgTypes, /*isVarArg=*/false);
+  Function *NewFunc;
+  BasicBlock *BB;
+
+  // Define the intrinsic as follows:
+  //   uint32_t __nacl_tp_tdb_offset(uint32_t tdb_size) {
+  //     return 0;
+  //   }
+  // This means the thread pointer points to the TDB.
+  NewFunc = Function::Create(FuncType, GlobalValue::InternalLinkage,
+                             "nacl_tp_tdb_offset", &M);
+  BB = BasicBlock::Create(M.getContext(), "entry", NewFunc);
+  ReturnInst::Create(M.getContext(),
+                     ConstantInt::get(M.getContext(), APInt(32, 0)), BB);
+  if (Function *Intrinsic = M.getFunction("llvm.nacl.tp.tdb.offset")) {
+    Intrinsic->replaceAllUsesWith(NewFunc);
+    Intrinsic->eraseFromParent();
+  }
+
+  // Define the intrinsic as follows:
+  //   uint32_t __nacl_tp_tls_offset(uint32_t tls_size) {
+  //     return -tls_size;
+  //   }
+  // This means the TLS variables are stored below the thread pointer.
+  NewFunc = Function::Create(FuncType, GlobalValue::InternalLinkage,
+                             "nacl_tp_tls_offset", &M);
+  BB = BasicBlock::Create(M.getContext(), "entry", NewFunc);
+  Value *Arg = NewFunc->arg_begin();
+  Arg->setName("size");
+  Value *Result = BinaryOperator::CreateNeg(Arg, "result", BB);
+  ReturnInst::Create(M.getContext(), Result, BB);
+  if (Function *Intrinsic = M.getFunction("llvm.nacl.tp.tls.offset")) {
+    Intrinsic->replaceAllUsesWith(NewFunc);
+    Intrinsic->eraseFromParent();
+  }
+}
+
+bool ExpandTls::runOnModule(Module &M) {
+  ModulePass *Pass = createExpandTlsConstantExprPass();
+  Pass->runOnModule(M);
+  delete Pass;
+
+  std::vector<VarInfo> TlsVars;
+  PointerType *TemplatePtrType = buildTlsTemplate(M, &TlsVars);
+  rewriteTlsVars(M, &TlsVars, TemplatePtrType);
+
+  defineTlsLayoutIntrinsics(M);
+
+  return true;
+}
+
+ModulePass *llvm::createExpandTlsPass() {
+  return new ExpandTls();
+}
diff --git a/lib/Transforms/NaCl/ExpandTlsConstantExpr.cpp b/lib/Transforms/NaCl/ExpandTlsConstantExpr.cpp
new file mode 100644
index 000000000000..33766f42727c
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandTlsConstantExpr.cpp
@@ -0,0 +1,111 @@
+//===- ExpandTlsConstantExpr.cpp - Convert ConstantExprs to Instructions---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass is a helper used by the ExpandTls pass.
+//
+// LLVM treats the address of a TLS variable as a ConstantExpr.  This
+// is arguably a bug because the address of a TLS variable is *not* a
+// constant: it varies between threads.
+//
+// See http://llvm.org/bugs/show_bug.cgi?id=14353
+//
+// This is also a problem for the ExpandTls pass, which wants to use
+// replaceUsesOfWith() to replace each TLS variable with an
+// Instruction sequence that calls @llvm.nacl.read.tp().  This doesn't
+// work if the TLS variable is used inside other ConstantExprs,
+// because ConstantExprs are interned and are not associated with any
+// function, whereas each Instruction must be part of a function.
+//
+// To fix that problem, this pass converts ConstantExprs that
+// reference TLS variables into Instructions.
+//
+// For example, this use of a 'ptrtoint' ConstantExpr:
+//
+//   ret i32 ptrtoint (i32* @tls_var to i32)
+//
+// is converted into this 'ptrtoint' Instruction:
+//
+//   %expanded = ptrtoint i32* @tls_var to i32
+//   ret i32 %expanded
+//
+//===----------------------------------------------------------------------===//
+
+#include <vector>
+
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  class ExpandTlsConstantExpr : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ExpandTlsConstantExpr() : ModulePass(ID) {
+      initializeExpandTlsConstantExprPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+char ExpandTlsConstantExpr::ID = 0;
+INITIALIZE_PASS(ExpandTlsConstantExpr, "nacl-expand-tls-constant-expr",
+                "Eliminate ConstantExpr references to TLS variables",
+                false, false)
+
+// This removes ConstantExpr references to the given Constant.
+static void expandConstExpr(Constant *Expr) {
+  // First, ensure that ConstantExpr references to Expr are converted
+  // to Instructions so that we can modify them.
+  for (Value::use_iterator UI = Expr->use_begin();
+       UI != Expr->use_end();
+       ++UI) {
+    if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) {
+      expandConstExpr(CE);
+    }
+  }
+  Expr->removeDeadConstantUsers();
+
+  if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Expr)) {
+    while (!Expr->use_empty()) {
+      Use *U = &Expr->use_begin().getUse();
+      Instruction *NewInst = CE->getAsInstruction();
+      NewInst->insertBefore(PhiSafeInsertPt(U));
+      NewInst->setName("expanded");
+      PhiSafeReplaceUses(U, NewInst);
+    }
+  }
+}
+
+bool ExpandTlsConstantExpr::runOnModule(Module &M) {
+  for (Module::alias_iterator Iter = M.alias_begin();
+       Iter != M.alias_end(); ) {
+    GlobalAlias *GA = Iter++;
+    if (GA->isThreadDependent()) {
+      GA->replaceAllUsesWith(GA->getAliasee());
+      GA->eraseFromParent();
+    }
+  }
+  for (Module::global_iterator Global = M.global_begin();
+       Global != M.global_end();
+       ++Global) {
+    if (Global->isThreadLocal()) {
+      expandConstExpr(Global);
+    }
+  }
+  return true;
+}
+
+ModulePass *llvm::createExpandTlsConstantExprPass() {
+  return new ExpandTlsConstantExpr();
+}
diff --git a/lib/Transforms/NaCl/ExpandUtils.cpp b/lib/Transforms/NaCl/ExpandUtils.cpp
new file mode 100644
index 000000000000..3a42dd301edf
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandUtils.cpp
@@ -0,0 +1,57 @@
+//===-- ExpandUtils.cpp - Helper functions for expansion passes -----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+Instruction *llvm::PhiSafeInsertPt(Use *U) {
+  Instruction *InsertPt = cast<Instruction>(U->getUser());
+  if (PHINode *PN = dyn_cast<PHINode>(InsertPt)) {
+    // We cannot insert instructions before a PHI node, so insert
+    // before the incoming block's terminator.  This could be
+    // suboptimal if the terminator is a conditional.
+    InsertPt = PN->getIncomingBlock(*U)->getTerminator();
+  }
+  return InsertPt;
+}
+
+void llvm::PhiSafeReplaceUses(Use *U, Value *NewVal) {
+  if (PHINode *PN = dyn_cast<PHINode>(U->getUser())) {
+    // A PHI node can have multiple incoming edges from the same
+    // block, in which case all these edges must have the same
+    // incoming value.
+    BasicBlock *BB = PN->getIncomingBlock(*U);
+    for (unsigned I = 0; I < PN->getNumIncomingValues(); ++I) {
+      if (PN->getIncomingBlock(I) == BB)
+        PN->setIncomingValue(I, NewVal);
+    }
+  } else {
+    U->getUser()->replaceUsesOfWith(U->get(), NewVal);
+  }
+}
+
+Function *llvm::RecreateFunction(Function *Func, FunctionType *NewType) {
+  Function *NewFunc = Function::Create(NewType, Func->getLinkage());
+  NewFunc->copyAttributesFrom(Func);
+  Func->getParent()->getFunctionList().insert(Func, NewFunc);
+  NewFunc->takeName(Func);
+  NewFunc->getBasicBlockList().splice(NewFunc->begin(),
+                                      Func->getBasicBlockList());
+  Func->replaceAllUsesWith(
+      ConstantExpr::getBitCast(NewFunc,
+                               Func->getFunctionType()->getPointerTo()));
+  return NewFunc;
+}
diff --git a/lib/Transforms/NaCl/ExpandVarArgs.cpp b/lib/Transforms/NaCl/ExpandVarArgs.cpp
new file mode 100644
index 000000000000..18785ca7abf4
--- /dev/null
+++ b/lib/Transforms/NaCl/ExpandVarArgs.cpp
@@ -0,0 +1,338 @@
+//===- ExpandVarArgs.cpp - Expand out variable argument function calls-----===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands out all use of variable argument functions.
+//
+// This pass replaces a varargs function call with a function call in
+// which a pointer to the variable arguments is passed explicitly.
+// The callee explicitly allocates space for the variable arguments on
+// the stack using "alloca".
+//
+// Alignment:
+//
+// This pass does not add any alignment padding between the arguments
+// that are copied onto the stack.  We assume that the only argument
+// types that need to be handled are 32-bit and 64-bit -- i32, i64,
+// pointers and double:
+//
+//  * We won't see i1, i8, i16 and float as varargs arguments because
+//    the C standard requires the compiler to promote these to the
+//    types "int" and "double".
+//
+//  * We won't see va_arg instructions of struct type because Clang
+//    does not yet support them in PNaCl mode.  See
+//    https://code.google.com/p/nativeclient/issues/detail?id=2381
+//
+// If such arguments do appear in the input, this pass will generate
+// correct, working code, but this code might be inefficient due to
+// using unaligned memory accesses.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  // This is a ModulePass because the pass recreates functions in
+  // order to change their argument lists.
+  class ExpandVarArgs : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    ExpandVarArgs() : ModulePass(ID) {
+      initializeExpandVarArgsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+char ExpandVarArgs::ID = 0;
+INITIALIZE_PASS(ExpandVarArgs, "expand-varargs",
+                "Expand out variable argument function definitions and calls",
+                false, false)
+
+static void ExpandVarArgFunc(Function *Func) {
+  Type *PtrType = Type::getInt8PtrTy(Func->getContext());
+
+  FunctionType *FTy = Func->getFunctionType();
+  SmallVector<Type *, 8> Params(FTy->param_begin(), FTy->param_end());
+  Params.push_back(PtrType);
+  FunctionType *NFTy = FunctionType::get(FTy->getReturnType(), Params, false);
+  Function *NewFunc = RecreateFunction(Func, NFTy);
+
+  // Declare the new argument as "noalias".
+  NewFunc->setAttributes(
+      Func->getAttributes().addAttribute(
+          Func->getContext(), FTy->getNumParams() + 1, Attribute::NoAlias));
+
+  // Move the arguments across to the new function.
+  for (Function::arg_iterator Arg = Func->arg_begin(), E = Func->arg_end(),
+         NewArg = NewFunc->arg_begin();
+       Arg != E; ++Arg, ++NewArg) {
+    Arg->replaceAllUsesWith(NewArg);
+    NewArg->takeName(Arg);
+  }
+
+  Func->eraseFromParent();
+
+  Value *VarArgsArg = --NewFunc->arg_end();
+  VarArgsArg->setName("varargs");
+
+  // Expand out uses of llvm.va_start in this function.
+  for (Function::iterator BB = NewFunc->begin(), E = NewFunc->end();
+       BB != E;
+       ++BB) {
+    for (BasicBlock::iterator Iter = BB->begin(), E = BB->end();
+         Iter != E; ) {
+      Instruction *Inst = Iter++;
+      if (VAStartInst *VAS = dyn_cast<VAStartInst>(Inst)) {
+        Value *Cast = CopyDebug(new BitCastInst(VAS->getArgList(),
+                                                PtrType->getPointerTo(),
+                                                "arglist", VAS), VAS);
+        CopyDebug(new StoreInst(VarArgsArg, Cast, VAS), VAS);
+        VAS->eraseFromParent();
+      }
+    }
+  }
+}
+
+static void ExpandVAArgInst(VAArgInst *Inst) {
+  // Read the argument.  We assume that no realignment of the pointer
+  // is required.
+  Value *ArgList = CopyDebug(new BitCastInst(
+      Inst->getPointerOperand(),
+      Inst->getType()->getPointerTo()->getPointerTo(), "arglist", Inst), Inst);
+  Value *CurrentPtr = CopyDebug(new LoadInst(ArgList, "arglist_current", Inst),
+                                Inst);
+  Value *Result = CopyDebug(new LoadInst(CurrentPtr, "va_arg", Inst), Inst);
+  Result->takeName(Inst);
+
+  // Update the va_list to point to the next argument.
+  SmallVector<Value *, 1> Indexes;
+  Indexes.push_back(ConstantInt::get(Inst->getContext(), APInt(32, 1)));
+  Value *Next = CopyDebug(GetElementPtrInst::Create(
+                              CurrentPtr, Indexes, "arglist_next", Inst), Inst);
+  CopyDebug(new StoreInst(Next, ArgList, Inst), Inst);
+
+  Inst->replaceAllUsesWith(Result);
+  Inst->eraseFromParent();
+}
+
+static void ExpandVACopyInst(VACopyInst *Inst) {
+  // va_list may have more space reserved, but we only need to
+  // copy a single pointer.
+  Type *PtrTy = Type::getInt8PtrTy(Inst->getContext())->getPointerTo();
+  Value *Src = CopyDebug(new BitCastInst(Inst->getSrc(), PtrTy, "vacopy_src",
+                                         Inst), Inst);
+  Value *Dest = CopyDebug(new BitCastInst(Inst->getDest(), PtrTy, "vacopy_dest",
+                                          Inst), Inst);
+  Value *CurrentPtr = CopyDebug(new LoadInst(Src, "vacopy_currentptr", Inst),
+                                Inst);
+  CopyDebug(new StoreInst(CurrentPtr, Dest, Inst), Inst);
+  Inst->eraseFromParent();
+}
+
+static void LifetimeDecl(Intrinsic::ID id, Value *Ptr, Value *Size,
+                         Instruction *InsertPt) {
+  Module *M = InsertPt->getParent()->getParent()->getParent();
+  Value *Func = Intrinsic::getDeclaration(M, id);
+  SmallVector<Value *, 2> Args;
+  Args.push_back(Size);
+  Args.push_back(Ptr);
+  CopyDebug(CallInst::Create(Func, Args, "", InsertPt), InsertPt);
+}
+
+// CopyCall() uses argument overloading so that it can be used by the
+// template ExpandVarArgCall().
+static CallInst *CopyCall(CallInst *Original, Value *Callee,
+                          ArrayRef<Value*> Args) {
+  return CallInst::Create(Callee, Args, "", Original);
+}
+
+static InvokeInst *CopyCall(InvokeInst *Original, Value *Callee,
+                            ArrayRef<Value*> Args) {
+  return InvokeInst::Create(Callee, Original->getNormalDest(),
+                            Original->getUnwindDest(), Args, "", Original);
+}
+
+// ExpandVarArgCall() converts a CallInst or InvokeInst to expand out
+// of varargs.  It returns whether the module was modified.
+template <class InstType>
+static bool ExpandVarArgCall(InstType *Call, DataLayout *DL) {
+  FunctionType *FuncType = cast<FunctionType>(
+      Call->getCalledValue()->getType()->getPointerElementType());
+  if (!FuncType->isFunctionVarArg())
+    return false;
+
+  LLVMContext *Context = &Call->getContext();
+
+  SmallVector<AttributeSet, 8> Attrs;
+  Attrs.push_back(Call->getAttributes().getFnAttributes());
+  Attrs.push_back(Call->getAttributes().getRetAttributes());
+
+  // Split argument list into fixed and variable arguments.
+  SmallVector<Value *, 8> FixedArgs;
+  SmallVector<Value *, 8> VarArgs;
+  SmallVector<Type *, 8> VarArgsTypes;
+  for (unsigned I = 0; I < FuncType->getNumParams(); ++I) {
+    FixedArgs.push_back(Call->getArgOperand(I));
+    // AttributeSets use 1-based indexing.
+    Attrs.push_back(Call->getAttributes().getParamAttributes(I + 1));
+  }
+  for (unsigned I = FuncType->getNumParams();
+       I < Call->getNumArgOperands(); ++I) {
+    Value *ArgVal = Call->getArgOperand(I);
+    VarArgs.push_back(ArgVal);
+    if (Call->getAttributes().hasAttribute(I + 1, Attribute::ByVal)) {
+      // For "byval" arguments we must dereference the pointer.
+      VarArgsTypes.push_back(ArgVal->getType()->getPointerElementType());
+    } else {
+      VarArgsTypes.push_back(ArgVal->getType());
+    }
+  }
+  if (VarArgsTypes.size() == 0) {
+    // Some buggy code (e.g. 176.gcc in Spec2k) uses va_arg on an
+    // empty argument list, which gives undefined behaviour in C.  To
+    // work around such programs, we create a dummy varargs buffer on
+    // the stack even though there are no arguments to put in it.
+    // This allows va_arg to read an undefined value from the stack
+    // rather than crashing by reading from an uninitialized pointer.
+    // An alternative would be to pass a null pointer to catch the
+    // invalid use of va_arg.
+    VarArgsTypes.push_back(Type::getInt32Ty(*Context));
+  }
+
+  // Create struct type for packing variable arguments into.  We
+  // create this as packed for now and assume that no alignment
+  // padding is desired.
+  StructType *VarArgsTy = StructType::get(*Context, VarArgsTypes, true);
+
+  // Allocate space for the variable argument buffer.  Do this at the
+  // start of the function so that we don't leak space if the function
+  // is called in a loop.
+  Function *Func = Call->getParent()->getParent();
+  Instruction *Buf = new AllocaInst(VarArgsTy, "vararg_buffer");
+  Func->getEntryBlock().getInstList().push_front(Buf);
+
+  // Call llvm.lifetime.start/end intrinsics to indicate that Buf is
+  // only used for the duration of the function call, so that the
+  // stack space can be reused elsewhere.
+  Type *I8Ptr = Type::getInt8Ty(*Context)->getPointerTo();
+  Instruction *BufPtr = new BitCastInst(Buf, I8Ptr, "vararg_lifetime_bitcast");
+  BufPtr->insertAfter(Buf);
+  Value *BufSize = ConstantInt::get(*Context,
+                                    APInt(64, DL->getTypeAllocSize(VarArgsTy)));
+  LifetimeDecl(Intrinsic::lifetime_start, BufPtr, BufSize, Call);
+
+  // Copy variable arguments into buffer.
+  int Index = 0;
+  for (SmallVector<Value *, 8>::iterator Iter = VarArgs.begin();
+       Iter != VarArgs.end();
+       ++Iter, ++Index) {
+    SmallVector<Value *, 2> Indexes;
+    Indexes.push_back(ConstantInt::get(*Context, APInt(32, 0)));
+    Indexes.push_back(ConstantInt::get(*Context, APInt(32, Index)));
+    Value *Ptr = CopyDebug(GetElementPtrInst::Create(
+                               Buf, Indexes, "vararg_ptr", Call), Call);
+    if (Call->getAttributes().hasAttribute(
+            FuncType->getNumParams() + Index + 1, Attribute::ByVal)) {
+      IRBuilder<> Builder(Call);
+      Builder.CreateMemCpy(
+          Ptr, *Iter,
+          DL->getTypeAllocSize((*Iter)->getType()->getPointerElementType()),
+          /* Align= */ 1);
+    } else {
+      CopyDebug(new StoreInst(*Iter, Ptr, Call), Call);
+    }
+  }
+
+  // Cast function to new type to add our extra pointer argument.
+  SmallVector<Type *, 8> ArgTypes(FuncType->param_begin(),
+                                  FuncType->param_end());
+  ArgTypes.push_back(VarArgsTy->getPointerTo());
+  FunctionType *NFTy = FunctionType::get(FuncType->getReturnType(),
+                                         ArgTypes, false);
+  Value *CastFunc =
+    CopyDebug(new BitCastInst(Call->getCalledValue(), NFTy->getPointerTo(),
+                              "vararg_func", Call), Call);
+
+  // Create the converted function call.
+  FixedArgs.push_back(Buf);
+  InstType *NewCall = CopyCall(Call, CastFunc, FixedArgs);
+  CopyDebug(NewCall, Call);
+  NewCall->setAttributes(AttributeSet::get(Call->getContext(), Attrs));
+  NewCall->takeName(Call);
+
+  if (isa<CallInst>(Call)) {
+    LifetimeDecl(Intrinsic::lifetime_end, BufPtr, BufSize, Call);
+  } else if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Call)) {
+    LifetimeDecl(Intrinsic::lifetime_end, BufPtr, BufSize,
+                 Invoke->getNormalDest()->getFirstInsertionPt());
+    LifetimeDecl(Intrinsic::lifetime_end, BufPtr, BufSize,
+                 Invoke->getUnwindDest()->getFirstInsertionPt());
+  }
+
+  Call->replaceAllUsesWith(NewCall);
+  Call->eraseFromParent();
+
+  return true;
+}
+
+bool ExpandVarArgs::runOnModule(Module &M) {
+  bool Changed = false;
+  DataLayout DL(&M);
+
+  for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E; ) {
+    Function *Func = Iter++;
+
+    for (Function::iterator BB = Func->begin(), E = Func->end();
+         BB != E;
+         ++BB) {
+      for (BasicBlock::iterator Iter = BB->begin(), E = BB->end();
+           Iter != E; ) {
+        Instruction *Inst = Iter++;
+        if (VAArgInst *VI = dyn_cast<VAArgInst>(Inst)) {
+          Changed = true;
+          ExpandVAArgInst(VI);
+        } else if (isa<VAEndInst>(Inst)) {
+          // va_end() is a no-op in this implementation.
+          Changed = true;
+          Inst->eraseFromParent();
+        } else if (VACopyInst *VAC = dyn_cast<VACopyInst>(Inst)) {
+          Changed = true;
+          ExpandVACopyInst(VAC);
+        } else if (CallInst *Call = dyn_cast<CallInst>(Inst)) {
+          Changed |= ExpandVarArgCall(Call, &DL);
+        } else if (InvokeInst *Call = dyn_cast<InvokeInst>(Inst)) {
+          Changed |= ExpandVarArgCall(Call, &DL);
+        }
+      }
+    }
+
+    if (Func->isVarArg()) {
+      Changed = true;
+      ExpandVarArgFunc(Func);
+    }
+  }
+
+  return Changed;
+}
+
+ModulePass *llvm::createExpandVarArgsPass() {
+  return new ExpandVarArgs();
+}
diff --git a/lib/Transforms/NaCl/FlattenGlobals.cpp b/lib/Transforms/NaCl/FlattenGlobals.cpp
new file mode 100644
index 000000000000..966b4e0d0329
--- /dev/null
+++ b/lib/Transforms/NaCl/FlattenGlobals.cpp
@@ -0,0 +1,300 @@
+//===- FlattenGlobals.cpp - Flatten global variable initializers-----------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass converts initializers for global variables into a
+// flattened normal form which removes nested struct types and
+// simplifies ConstantExprs.
+//
+// In this normal form, an initializer is either a SimpleElement or a
+// CompoundElement.
+//
+// A SimpleElement is one of the following:
+//
+// 1) An i8 array literal or zeroinitializer:
+//
+//      [SIZE x i8] c"DATA"
+//      [SIZE x i8] zeroinitializer
+//
+// 2) A reference to a GlobalValue (a function or global variable)
+//    with an optional 32-bit byte offset added to it (the addend):
+//
+//      ptrtoint (TYPE* @GLOBAL to i32)
+//      add (i32 ptrtoint (TYPE* @GLOBAL to i32), i32 ADDEND)
+//
+//    We use ptrtoint+add rather than bitcast+getelementptr because
+//    the constructor for getelementptr ConstantExprs performs
+//    constant folding which introduces more complex getelementptrs,
+//    and it is hard to check that they follow a normal form.
+//
+//    For completeness, the pass also allows a BlockAddress as well as
+//    a GlobalValue here, although BlockAddresses are currently not
+//    allowed in the PNaCl ABI, so this should not be considered part
+//    of the normal form.
+//
+// A CompoundElement is a unnamed, packed struct containing only
+// SimpleElements.
+//
+// Limitations:
+//
+// LLVM IR allows ConstantExprs that calculate the difference between
+// two globals' addresses.  FlattenGlobals rejects these because Clang
+// does not generate these and because ELF does not support such
+// relocations in general.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  // A FlattenedConstant represents a global variable initializer that
+  // has been flattened and may be converted into the normal form.
+  class FlattenedConstant {
+    LLVMContext *Context;
+    IntegerType *IntPtrType;
+    unsigned PtrSize;
+
+    // A flattened global variable initializer is represented as:
+    // 1) an array of bytes;
+    unsigned BufSize;
+    uint8_t *Buf;
+    uint8_t *BufEnd;
+
+    // 2) an array of relocations.
+    struct Reloc {
+      unsigned RelOffset;  // Offset at which the relocation is to be applied.
+      Constant *GlobalRef;
+    };
+    typedef SmallVector<Reloc, 10> RelocArray;
+    RelocArray Relocs;
+
+    void putAtDest(DataLayout *DL, Constant *Value, uint8_t *Dest);
+
+    Constant *dataSlice(unsigned StartPos, unsigned EndPos) {
+      return ConstantDataArray::get(
+          *Context, ArrayRef<uint8_t>(Buf + StartPos, Buf + EndPos));
+    }
+
+  public:
+    FlattenedConstant(DataLayout *DL, Constant *Value):
+        Context(&Value->getContext()) {
+      IntPtrType = DL->getIntPtrType(*Context);
+      PtrSize = DL->getPointerSize();
+      BufSize = DL->getTypeAllocSize(Value->getType());
+      Buf = new uint8_t[BufSize];
+      BufEnd = Buf + BufSize;
+      memset(Buf, 0, BufSize);
+      putAtDest(DL, Value, Buf);
+    }
+
+    ~FlattenedConstant() {
+      delete[] Buf;
+    }
+
+    Constant *getAsNormalFormConstant();
+  };
+
+  class FlattenGlobals : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    FlattenGlobals() : ModulePass(ID) {
+      initializeFlattenGlobalsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+static void ExpandConstant(DataLayout *DL, Constant *Val,
+                           Constant **ResultGlobal, uint64_t *ResultOffset) {
+  if (isa<GlobalValue>(Val) || isa<BlockAddress>(Val)) {
+    *ResultGlobal = Val;
+    *ResultOffset = 0;
+  } else if (isa<ConstantPointerNull>(Val)) {
+    *ResultGlobal = NULL;
+    *ResultOffset = 0;
+  } else if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+    *ResultGlobal = NULL;
+    *ResultOffset = CI->getZExtValue();
+  } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Val)) {
+    ExpandConstant(DL, CE->getOperand(0), ResultGlobal, ResultOffset);
+    if (CE->getOpcode() == Instruction::GetElementPtr) {
+      SmallVector<Value *, 8> Indexes(CE->op_begin() + 1, CE->op_end());
+      *ResultOffset += DL->getIndexedOffset(CE->getOperand(0)->getType(),
+                                            Indexes);
+    } else if (CE->getOpcode() == Instruction::BitCast ||
+               CE->getOpcode() == Instruction::IntToPtr) {
+      // Nothing more to do.
+    } else if (CE->getOpcode() == Instruction::PtrToInt) {
+      if (Val->getType()->getIntegerBitWidth() < DL->getPointerSizeInBits()) {
+        errs() << "Not handled: " << *CE << "\n";
+        report_fatal_error("FlattenGlobals: a ptrtoint that truncates "
+                           "a pointer is not allowed");
+      }
+    } else {
+      errs() << "Not handled: " << *CE << "\n";
+      report_fatal_error(
+          std::string("FlattenGlobals: ConstantExpr opcode not handled: ")
+          + CE->getOpcodeName());
+    }
+  } else {
+    errs() << "Not handled: " << *Val << "\n";
+    report_fatal_error("FlattenGlobals: Constant type not handled for reloc");
+  }
+}
+
+void FlattenedConstant::putAtDest(DataLayout *DL, Constant *Val,
+                                  uint8_t *Dest) {
+  uint64_t ValSize = DL->getTypeAllocSize(Val->getType());
+  assert(Dest + ValSize <= BufEnd);
+  if (isa<ConstantAggregateZero>(Val) ||
+      isa<UndefValue>(Val) ||
+      isa<ConstantPointerNull>(Val)) {
+    // The buffer is already zero-initialized.
+  } else if (ConstantInt *CI = dyn_cast<ConstantInt>(Val)) {
+    memcpy(Dest, CI->getValue().getRawData(), ValSize);
+  } else if (ConstantFP *CF = dyn_cast<ConstantFP>(Val)) {
+    APInt Data = CF->getValueAPF().bitcastToAPInt();
+    assert((Data.getBitWidth() + 7) / 8 == ValSize);
+    assert(Data.getBitWidth() % 8 == 0);
+    memcpy(Dest, Data.getRawData(), ValSize);
+  } else if (ConstantDataSequential *CD =
+             dyn_cast<ConstantDataSequential>(Val)) {
+    // Note that getRawDataValues() assumes the host endianness is the same.
+    StringRef Data = CD->getRawDataValues();
+    assert(Data.size() == ValSize);
+    memcpy(Dest, Data.data(), Data.size());
+  } else if (isa<ConstantArray>(Val) || isa<ConstantVector>(Val)) {
+    uint64_t ElementSize = DL->getTypeAllocSize(
+        Val->getType()->getSequentialElementType());
+    for (unsigned I = 0; I < Val->getNumOperands(); ++I) {
+      putAtDest(DL, cast<Constant>(Val->getOperand(I)), Dest + ElementSize * I);
+    }
+  } else if (ConstantStruct *CS = dyn_cast<ConstantStruct>(Val)) {
+    const StructLayout *Layout = DL->getStructLayout(CS->getType());
+    for (unsigned I = 0; I < CS->getNumOperands(); ++I) {
+      putAtDest(DL, CS->getOperand(I), Dest + Layout->getElementOffset(I));
+    }
+  } else {
+    Constant *GV;
+    uint64_t Offset;
+    ExpandConstant(DL, Val, &GV, &Offset);
+    if (GV) {
+      Constant *NewVal = ConstantExpr::getPtrToInt(GV, IntPtrType);
+      if (Offset) {
+        // For simplicity, require addends to be 32-bit.
+        if ((int64_t) Offset != (int32_t) (uint32_t) Offset) {
+          errs() << "Not handled: " << *Val << "\n";
+          report_fatal_error(
+              "FlattenGlobals: Offset does not fit into 32 bits");
+        }
+        NewVal = ConstantExpr::getAdd(
+            NewVal, ConstantInt::get(IntPtrType, Offset, /* isSigned= */ true));
+      }
+      Reloc NewRel = { static_cast<unsigned int>(Dest - Buf), NewVal };
+      Relocs.push_back(NewRel);
+    } else {
+      memcpy(Dest, &Offset, ValSize);
+    }
+  }
+}
+
+Constant *FlattenedConstant::getAsNormalFormConstant() {
+  // Return a single SimpleElement.
+  if (Relocs.size() == 0)
+    return dataSlice(0, BufSize);
+  if (Relocs.size() == 1 && BufSize == PtrSize) {
+    assert(Relocs[0].RelOffset == 0);
+    return Relocs[0].GlobalRef;
+  }
+
+  // Return a CompoundElement.
+  SmallVector<Constant *, 10> Elements;
+  unsigned PrevPos = 0;
+  for (RelocArray::iterator Rel = Relocs.begin(), E = Relocs.end();
+       Rel != E; ++Rel) {
+    if (Rel->RelOffset > PrevPos)
+      Elements.push_back(dataSlice(PrevPos, Rel->RelOffset));
+    Elements.push_back(Rel->GlobalRef);
+    PrevPos = Rel->RelOffset + PtrSize;
+  }
+  if (PrevPos < BufSize)
+    Elements.push_back(dataSlice(PrevPos, BufSize));
+  return ConstantStruct::getAnon(*Context, Elements, true);
+}
+
+char FlattenGlobals::ID = 0;
+INITIALIZE_PASS(FlattenGlobals, "flatten-globals",
+                "Flatten global variable initializers into byte arrays",
+                false, false)
+
+bool FlattenGlobals::runOnModule(Module &M) {
+  bool Modified = false;
+  DataLayout DL(&M);
+  Type *I8 = Type::getInt8Ty(M.getContext());
+
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ) {
+    GlobalVariable *Global = I++;
+    // Variables with "appending" linkage must always be arrays and so
+    // cannot be normalized, so leave them alone.
+    if (Global->hasAppendingLinkage())
+      continue;
+    Modified = true;
+
+    Type *GlobalType = Global->getType()->getPointerElementType();
+    uint64_t Size = DL.getTypeAllocSize(GlobalType);
+    Constant *NewInit;
+    Type *NewType;
+    if (Global->hasInitializer()) {
+      if (Global->getInitializer()->isNullValue()) {
+        // As an optimization, for large BSS variables, avoid
+        // allocating a buffer that would only be filled with zeroes.
+        NewType = ArrayType::get(I8, Size);
+        NewInit = ConstantAggregateZero::get(NewType);
+      } else {
+        FlattenedConstant Buffer(&DL, Global->getInitializer());
+        NewInit = Buffer.getAsNormalFormConstant();
+        NewType = NewInit->getType();
+      }
+    } else {
+      NewInit = NULL;
+      NewType = ArrayType::get(I8, Size);
+    }
+    GlobalVariable *NewGlobal = new GlobalVariable(
+        M, NewType,
+        Global->isConstant(),
+        Global->getLinkage(),
+        NewInit, "", Global,
+        Global->getThreadLocalMode());
+    NewGlobal->copyAttributesFrom(Global);
+    if (NewGlobal->getAlignment() == 0)
+      NewGlobal->setAlignment(DL.getPrefTypeAlignment(GlobalType));
+    NewGlobal->setExternallyInitialized(Global->isExternallyInitialized());
+    NewGlobal->takeName(Global);
+    if (!Global->use_empty())
+      Global->replaceAllUsesWith(
+          ConstantExpr::getBitCast(NewGlobal, Global->getType()));
+    Global->eraseFromParent();
+  }
+  return Modified;
+
+}
+
+ModulePass *llvm::createFlattenGlobalsPass() {
+  return new FlattenGlobals();
+}
diff --git a/lib/Transforms/NaCl/GlobalCleanup.cpp b/lib/Transforms/NaCl/GlobalCleanup.cpp
new file mode 100644
index 000000000000..d489fefc1dcb
--- /dev/null
+++ b/lib/Transforms/NaCl/GlobalCleanup.cpp
@@ -0,0 +1,120 @@
+//===- GlobalCleanup.cpp - Cleanup global symbols post-bitcode-link -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// ===---------------------------------------------------------------------===//
+//
+// PNaCl executables should have no external symbols or aliases. These passes
+// internalize (or otherwise remove/resolve) GlobalValues and resolve all
+// GlobalAliases.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  class GlobalCleanup : public ModulePass {
+  public:
+    static char ID;
+    GlobalCleanup() : ModulePass(ID) {
+      initializeGlobalCleanupPass(*PassRegistry::getPassRegistry());
+    }
+    virtual bool runOnModule(Module &M);
+  };
+
+  class ResolveAliases : public ModulePass {
+  public:
+    static char ID;
+    ResolveAliases() : ModulePass(ID) {
+      initializeResolveAliasesPass(*PassRegistry::getPassRegistry());
+    }
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+char GlobalCleanup::ID = 0;
+INITIALIZE_PASS(GlobalCleanup, "nacl-global-cleanup",
+                "GlobalValue cleanup for PNaCl "
+                "(assumes all of the binary is linked statically)",
+                false, false)
+
+static bool CleanUpLinkage(GlobalValue *GV) {
+  // TODO(dschuff): handle the rest of the linkage types as necessary without
+  // running afoul of the IR verifier or breaking the native link
+  switch (GV->getLinkage()) {
+    case GlobalValue::ExternalWeakLinkage: {
+      Constant *NullRef = Constant::getNullValue(GV->getType());
+      GV->replaceAllUsesWith(NullRef);
+      GV->eraseFromParent();
+      return true;
+    }
+    case GlobalValue::WeakAnyLinkage: {
+      GV->setLinkage(GlobalValue::InternalLinkage);
+      return true;
+    }
+    default:
+      // default with fall through to avoid compiler warning
+      return false;
+  }
+  return false;
+}
+
+bool GlobalCleanup::runOnModule(Module &M) {
+  bool Modified = false;
+
+  if (GlobalVariable *GV = M.getNamedGlobal("llvm.compiler.used")) {
+    GV->eraseFromParent();
+    Modified = true;
+  }
+  if (GlobalVariable *GV = M.getNamedGlobal("llvm.used")) {
+    GV->eraseFromParent();
+    Modified = true;
+  }
+
+  for (Module::global_iterator I = M.global_begin(), E = M.global_end();
+       I != E; ) {
+    GlobalVariable *GV = I++;
+    Modified |= CleanUpLinkage(GV);
+  }
+
+  for (Module::iterator I = M.begin(), E = M.end(); I != E; ) {
+    Function *F = I++;
+    Modified |= CleanUpLinkage(F);
+  }
+  return Modified;
+}
+
+ModulePass *llvm::createGlobalCleanupPass() {
+  return new GlobalCleanup();
+}
+
+char ResolveAliases::ID = 0;
+INITIALIZE_PASS(ResolveAliases, "resolve-aliases",
+                "resolve global variable and function aliases", false, false)
+
+bool ResolveAliases::runOnModule(Module &M) {
+  bool Modified = false;
+
+  for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end();
+       I != E; ) {
+    GlobalAlias *Alias = I++;
+    Alias->replaceAllUsesWith(Alias->getAliasee());
+    Alias->eraseFromParent();
+    Modified = true;
+  }
+  return Modified;
+}
+
+ModulePass *llvm::createResolveAliasesPass() {
+  return new ResolveAliases();
+}
diff --git a/lib/Transforms/NaCl/InsertDivideCheck.cpp b/lib/Transforms/NaCl/InsertDivideCheck.cpp
new file mode 100644
index 000000000000..d8190a1df1b3
--- /dev/null
+++ b/lib/Transforms/NaCl/InsertDivideCheck.cpp
@@ -0,0 +1,111 @@
+//===- InsertDivideCheck.cpp - Add divide by zero checks ------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass adds a check for divide by zero before every integer DIV or REM.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "add-divide-check"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  class InsertDivideCheck : public FunctionPass {
+  public:
+    static char ID;
+    InsertDivideCheck() : FunctionPass(ID) {
+      initializeInsertDivideCheckPass(*PassRegistry::getPassRegistry());
+    }
+
+    bool runOnFunction(Function &F);
+  };
+}
+
+static BasicBlock *CreateTrapBlock(Function &F, Instruction* Dbg) {
+  BasicBlock *TrapBlock = BasicBlock::Create(F.getContext(), "divrem.by.zero",
+                                             &F);
+  Value *TrapFn = Intrinsic::getDeclaration(F.getParent(), Intrinsic::trap);
+  CopyDebug(CallInst::Create(TrapFn, "", TrapBlock), Dbg);
+  CopyDebug(new UnreachableInst(F.getContext(), TrapBlock), Dbg);
+  return TrapBlock;
+}
+
+bool InsertDivideCheck::runOnFunction(Function &F) {
+  SmallPtrSet<Instruction*, 8> GuardedDivs;
+  // If the pass finds a DIV/REM that needs to be checked for zero denominator,
+  // it will insert a new "trap" block, and split the block that contains the
+  // DIV/REM into two blocks.  The new BasicBlocks are added after the current
+  // BasicBlock, so that if there is more than one DIV/REM in the same block,
+  // all are visited.
+  for (Function::iterator I = F.begin(); I != F.end(); I++) {
+    BasicBlock *BB = I;
+
+    for (BasicBlock::iterator BI = BB->begin(), BE = BB->end();
+         BI != BE; BI++) {
+      BinaryOperator *DivInst = dyn_cast<BinaryOperator>(BI);
+      if (!DivInst || (GuardedDivs.count(DivInst) != 0))
+        continue;
+      unsigned Opcode = DivInst->getOpcode();
+      if (Opcode != Instruction::SDiv && Opcode != Instruction::UDiv &&
+          Opcode != Instruction::SRem && Opcode != Instruction::URem)
+        continue;
+      Value *Denominator = DivInst->getOperand(1);
+      if (!Denominator->getType()->isIntegerTy())
+        continue;
+      if (ConstantInt *DenomConst = dyn_cast<ConstantInt>(Denominator)) {
+        // Divides by constants do not need a denominator test.
+        if (DenomConst->isZero()) {
+          // For explicit divides by zero, insert a trap before DIV/REM
+          Value *TrapFn = Intrinsic::getDeclaration(F.getParent(),
+                                                    Intrinsic::trap);
+          CopyDebug(CallInst::Create(TrapFn, "", DivInst), DivInst);
+        }
+        continue;
+      }
+      // Create a trap block.
+      BasicBlock *TrapBlock = CreateTrapBlock(F, DivInst);
+      // Move instructions in BB from DivInst to BB's end to a new block.
+      BasicBlock *Successor = BB->splitBasicBlock(BI, "guarded.divrem");
+      // Remove the unconditional branch inserted by splitBasicBlock.
+      BB->getTerminator()->eraseFromParent();
+      // Remember that DivInst was already processed, so that when we process
+      // inserted blocks later, we do not attempt to again guard it.
+      GuardedDivs.insert(DivInst);
+      // Compare the denominator with zero.
+      Value *Zero = ConstantInt::get(Denominator->getType(), 0);
+      Value *DenomIsZero = CopyDebug(new ICmpInst(*BB, ICmpInst::ICMP_EQ, Denominator,
+                                                  Zero, ""), DivInst);
+      // Put in a condbranch to the trap block.
+      CopyDebug(BranchInst::Create(TrapBlock, Successor, DenomIsZero, BB), DivInst);
+      // BI is invalidated when we split.  Stop the BasicBlock iterator.
+      break;
+    }
+  }
+
+  return false;
+}
+
+char InsertDivideCheck::ID = 0;
+INITIALIZE_PASS(InsertDivideCheck, "insert-divide-check",
+                "Insert divide by zero checks", false, false)
+
+FunctionPass *llvm::createInsertDivideCheckPass() {
+  return new InsertDivideCheck();
+}
diff --git a/lib/Transforms/NaCl/LLVMBuild.txt b/lib/Transforms/NaCl/LLVMBuild.txt
new file mode 100644
index 000000000000..051a0d30eda5
--- /dev/null
+++ b/lib/Transforms/NaCl/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===- ./lib/Transforms/NaCl/LLVMBuild.txt ----------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = NaClTransforms
+parent = Transforms
+library_name = NaClTransforms
+required_libraries = Core Support IPO
diff --git a/lib/Transforms/NaCl/Makefile b/lib/Transforms/NaCl/Makefile
new file mode 100644
index 000000000000..f297b753d7c9
--- /dev/null
+++ b/lib/Transforms/NaCl/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Transforms/NaCl/Makefile-------------------------*- Makefile -*-===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../..
+LIBRARYNAME = LLVMNaClTransforms
+BUILD_ARCHIVE = 1
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Transforms/NaCl/PNaClABISimplify.cpp b/lib/Transforms/NaCl/PNaClABISimplify.cpp
new file mode 100644
index 000000000000..d8072540fcf6
--- /dev/null
+++ b/lib/Transforms/NaCl/PNaClABISimplify.cpp
@@ -0,0 +1,161 @@
+//===-- PNaClABISimplify.cpp - Lists PNaCl ABI simplification passes ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the meta-passes "-pnacl-abi-simplify-preopt"
+// and "-pnacl-abi-simplify-postopt".  It lists their constituent
+// passes.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/NaCl.h"
+#include "llvm/PassManager.h"
+#include "llvm/Transforms/IPO.h"
+#include "llvm/Transforms/NaCl.h"
+#include "llvm/Transforms/Scalar.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableSjLjEH("enable-pnacl-sjlj-eh",
+             cl::desc("Enable use of SJLJ-based C++ exception handling "
+                      "as part of the pnacl-abi-simplify passes"),
+             cl::init(false));
+
+void llvm::PNaClABISimplifyAddPreOptPasses(PassManagerBase &PM, const bool BuildingLibrary) {
+  if (EnableSjLjEH) {
+    // This comes before ExpandTls because it introduces references to
+    // a TLS variable, __pnacl_eh_stack.  This comes before
+    // InternalizePass because it assumes various variables (including
+    // __pnacl_eh_stack) have not been internalized yet.
+    PM.add(createPNaClSjLjEHPass());
+  } else {
+    // LowerInvoke prevents use of C++ exception handling by removing
+    // references to BasicBlocks which handle exceptions.
+    PM.add(createLowerInvokePass());
+    // Remove landingpad blocks made unreachable by LowerInvoke.
+    PM.add(createCFGSimplificationPass());
+  }
+
+  if(!BuildingLibrary) {
+    // Internalize all symbols in the module except _start, which is the only
+    // symbol a stable PNaCl pexe is allowed to export.
+    PM.add(createInternalizePass("_start"));
+  }
+
+  // LowerExpect converts Intrinsic::expect into branch weights,
+  // which can then be removed after BlockPlacement.
+  PM.add(createLowerExpectIntrinsicPass());
+  // Rewrite unsupported intrinsics to simpler and portable constructs.
+  PM.add(createRewriteLLVMIntrinsicsPass());
+
+  // Expand out some uses of struct types.
+  PM.add(createExpandArithWithOverflowPass());
+
+  // This small collection of passes is targeted toward Rust generated IR
+  // solely for the purpose of helping later NaCl transformations handle the
+  // high number of structures Rust outputs.
+  PM.add(createPromoteSimpleStructsPass());
+  PM.add(createPromoteReturnedStructsPass());
+  PM.add(createPromoteStructureArgsPass());
+
+  // ExpandStructRegs must be run after ExpandArithWithOverflow to
+  // expand out the insertvalue instructions that
+  // ExpandArithWithOverflow introduces.
+  PM.add(createExpandStructRegsPass());
+
+  PM.add(createExpandVarArgsPass());
+  PM.add(createExpandCtorsPass());
+  PM.add(createResolveAliasesPass());
+  PM.add(createExpandTlsPass());
+  if(!BuildingLibrary) {
+    // GlobalCleanup needs to run after ExpandTls because
+    // __tls_template_start etc. are extern_weak before expansion
+    PM.add(createGlobalCleanupPass());
+  }
+}
+
+void llvm::PNaClABISimplifyAddPostOptPasses(PassManagerBase &PM) {
+
+  PM.add(createRewritePNaClLibraryCallsPass());
+
+  // We place ExpandByVal after optimization passes because some byval
+  // arguments can be expanded away by the ArgPromotion pass.  Leaving
+  // in "byval" during optimization also allows some dead stores to be
+  // eliminated, because "byval" is a stronger constraint than what
+  // ExpandByVal expands it to.
+  PM.add(createExpandByValPass());
+
+  // We place ExpandSmallArguments after optimization passes because
+  // some optimizations undo its changes.  Note that
+  // ExpandSmallArguments requires that ExpandVarArgs has already been
+  // run.
+  PM.add(createExpandSmallArgumentsPass());
+
+  PM.add(createPromoteI1OpsPass());
+
+  // Optimization passes and ExpandByVal introduce
+  // memset/memcpy/memmove intrinsics with a 64-bit size argument.
+  // This pass converts those arguments to 32-bit.
+  PM.add(createCanonicalizeMemIntrinsicsPass());
+
+  // We place StripMetadata after optimization passes because
+  // optimizations depend on the metadata.
+  PM.add(createStripMetadataPass());
+
+  // FlattenGlobals introduces ConstantExpr bitcasts of globals which
+  // are expanded out later.
+  PM.add(createFlattenGlobalsPass());
+
+  // We should not place arbitrary passes after ExpandConstantExpr
+  // because they might reintroduce ConstantExprs.
+  PM.add(createExpandConstantExprPass());
+
+  // PromoteIntegersPass does not handle constexprs and creates GEPs,
+  // so it goes between those passes.
+  PM.add(createPromoteIntegersPass());
+
+  // ExpandGetElementPtr must follow ExpandConstantExpr to expand the
+  // getelementptr instructions it creates.
+  PM.add(createExpandGetElementPtrPass());
+
+  // Rewrite atomic and volatile instructions with intrinsic calls.
+  PM.add(createRewriteAtomicsPass());
+
+  // Remove ``asm("":::"memory")``. This must occur after rewriting
+  // atomics: a ``fence seq_cst`` surrounded by ``asm("":::"memory")``
+  // has special meaning and is translated differently.
+  PM.add(createRemoveAsmMemoryPass());
+
+  // ReplacePtrsWithInts assumes that getelementptr instructions and
+  // ConstantExprs have already been expanded out.
+  PM.add(createReplacePtrsWithIntsPass());
+
+  // We place StripAttributes after optimization passes because many
+  // analyses add attributes to reflect their results.
+  // StripAttributes must come after ExpandByVal and
+  // ExpandSmallArguments.
+  PM.add(createStripAttributesPass());
+
+  // Strip dead prototytes to appease the intrinsic ABI checks.
+  // ExpandVarArgs leaves around vararg intrinsics, and
+  // ReplacePtrsWithInts leaves the lifetime.start/end intrinsics.
+  PM.add(createStripDeadPrototypesPass());
+
+  // Eliminate simple dead code that the post-opt passes could have
+  // created.
+  PM.add(createDeadInstEliminationPass());
+  PM.add(createDeadCodeEliminationPass());
+
+  // Remove superfluous [0 x i8] and some [2 x i8] left over.
+  PM.add(createReplaceAggregatesWithIntsPass());
+
+  // Remove additional instructions killed by ReplaceArraysWithInts.
+  PM.add(createDeadInstEliminationPass());
+
+}
diff --git a/lib/Transforms/NaCl/PNaClSjLjEH.cpp b/lib/Transforms/NaCl/PNaClSjLjEH.cpp
new file mode 100644
index 000000000000..2057ddbc0b1f
--- /dev/null
+++ b/lib/Transforms/NaCl/PNaClSjLjEH.cpp
@@ -0,0 +1,515 @@
+//===- PNaClSjLjEH.cpp - Lower C++ exception handling to use setjmp()------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The PNaClSjLjEH pass is part of an implementation of C++ exception
+// handling for PNaCl that uses setjmp() and longjmp() to handle C++
+// exceptions.  The pass lowers LLVM "invoke" instructions to use
+// setjmp().
+//
+// For example, consider the following C++ code fragment:
+//
+//   int catcher_func() {
+//     try {
+//       int result = external_func();
+//       return result + 100;
+//     } catch (MyException &exc) {
+//       return exc.value + 200;
+//     }
+//   }
+//
+// PNaClSjLjEH converts the IR for that function to the following
+// pseudo-code:
+//
+//   struct LandingPadResult {
+//     void *exception_obj;  // For passing to __cxa_begin_catch()
+//     int matched_clause_id;  // See ExceptionInfoWriter.cpp
+//   };
+//
+//   struct ExceptionFrame {
+//     union {
+//       jmp_buf jmpbuf;  // Context for jumping to landingpad block
+//       struct LandingPadResult result;  // Data returned to landingpad block
+//     };
+//     struct ExceptionFrame *next;  // Next frame in linked list
+//     int clause_list_id;  // Reference to landingpad's exception info
+//   };
+//
+//   // Thread-local exception state
+//   __thread struct ExceptionFrame *__pnacl_eh_stack;
+//
+//   int catcher_func() {
+//     struct ExceptionFrame frame;
+//     frame.next = __pnacl_eh_stack;
+//     frame.clause_list_id = 123;
+//     __pnacl_eh_stack = &frame;  // Add frame to stack
+//     int result;
+//     if (!catcher_func_setjmp_caller(external_func, &frame.jmpbuf, &result)) {
+//       __pnacl_eh_stack = frame.next;  // Remove frame from stack
+//       return result + 100;
+//     } else {
+//       // Handle exception.  This is a simplification.  Real code would
+//       // call __cxa_begin_catch() to extract the thrown object.
+//       MyException &exc = *(MyException *) frame.result.exception_obj;
+//       return exc.value + 200;
+//     }
+//   }
+//
+//   // Helper function
+//   static int catcher_func_setjmp_caller(int (*func)(void), jmp_buf jmpbuf,
+//                                         int *result) {
+//     if (!setjmp(jmpbuf)) {
+//       *result = func();
+//       return 0;
+//     }
+//     return 1;
+//   }
+//
+// We use a helper function so that setjmp() is not called directly
+// from catcher_func(), due to a quirk of how setjmp() and longjmp()
+// are specified in C.
+//
+// func() might modify variables (allocas) that are local to
+// catcher_func() (if the variables' addresses are taken).  The C
+// standard says that these variables' values would become undefined
+// after longjmp() returned if setjmp() were called from
+// catcher_func().  Specifically, LLVM's GVN pass can optimize away
+// stores to allocas between setjmp() and longjmp() (see
+// pnacl-sjlj-eh-bug.ll for an example).  But this only applies to
+// allocas inside the caller of setjmp(), not to allocas inside the
+// caller of the caller of setjmp(), so doing the setjmp() call inside
+// a helper function that catcher_func() calls avoids the problem.
+//
+// The pass makes the following changes to IR:
+//
+//  * Convert "invoke" and "landingpad" instructions.
+//  * Convert "resume" instructions into __pnacl_eh_resume() calls.
+//  * Replace each call to llvm.eh.typeid.for() with an integer
+//    constant representing the exception type.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/NaCl.h"
+#include "ExceptionInfoWriter.h"
+
+using namespace llvm;
+
+namespace {
+  // This is a ModulePass so that it can introduce new global variables.
+  class PNaClSjLjEH : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    PNaClSjLjEH() : ModulePass(ID) {
+      initializePNaClSjLjEHPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+
+  class FuncRewriter {
+    StructType *ExceptionFrameTy;
+    ExceptionInfoWriter *ExcInfoWriter;
+    Function *Func;
+
+    // FrameInitialized indicates whether the following variables have
+    // been initialized.
+    bool FrameInitialized;
+    Function *SetjmpIntrinsic;  // setjmp() intrinsic function
+    Value *EHStackTlsVar;  // Possibly bitcasted value of thread-local __pnacl_eh_stack var
+    Instruction *Frame;  // Frame allocated for this function
+    Instruction *FrameJmpBuf;  // Frame's jmp_buf field
+    Instruction *FrameNextPtr;  // Frame's next field
+    Instruction *FrameExcInfo;  // Frame's clause_list_id field
+
+    Function *EHResumeFunc;  // __pnacl_eh_resume() function
+
+    // Initialize values that are shared across all "invoke"
+    // instructions within the function.
+    void initializeFrame();
+
+  public:
+    FuncRewriter(StructType *ExceptionFrameTy, ExceptionInfoWriter *ExcInfoWriter,
+                 Function *Func):
+        ExceptionFrameTy(ExceptionFrameTy),
+        ExcInfoWriter(ExcInfoWriter),
+        Func(Func),
+        FrameInitialized(false),
+        SetjmpIntrinsic(NULL), EHStackTlsVar(NULL),
+        Frame(NULL), FrameJmpBuf(NULL), FrameNextPtr(NULL), FrameExcInfo(NULL),
+        EHResumeFunc(NULL) {}
+
+    Value *createSetjmpWrappedCall(InvokeInst *Invoke);
+    void expandInvokeInst(InvokeInst *Invoke);
+    void expandResumeInst(ResumeInst *Resume);
+    void expandFunc();
+  };
+}
+
+char PNaClSjLjEH::ID = 0;
+INITIALIZE_PASS(PNaClSjLjEH, "pnacl-sjlj-eh",
+                "Lower C++ exception handling to use setjmp()",
+                false, false)
+
+static const int kPNaClJmpBufSize = 1024;
+static const int kPNaClJmpBufAlign = 8;
+
+void FuncRewriter::initializeFrame() {
+  if (FrameInitialized)
+    return;
+  FrameInitialized = true;
+  Module *M = Func->getParent();
+
+  SetjmpIntrinsic = Intrinsic::getDeclaration(M, Intrinsic::nacl_setjmp);
+
+  Instruction* InsertPt = Func->getEntryBlock().getFirstNonPHIOrDbgOrLifetime();
+
+  GlobalVariable *EHStackTlsVarUncast = M->getGlobalVariable("__pnacl_eh_stack");
+  PointerType* EhFrameTyPtr = ExceptionFrameTy->getPointerTo();
+  if (!EHStackTlsVarUncast) {
+    EHStackTlsVarUncast = cast<GlobalVariable>(M->getOrInsertGlobal("__pnacl_eh_stack",
+                                                                    EhFrameTyPtr));
+    EHStackTlsVarUncast->setThreadLocal(true);
+    EHStackTlsVarUncast->setLinkage(GlobalValue::LinkOnceAnyLinkage);
+    EHStackTlsVarUncast->setInitializer(ConstantPointerNull::get(EhFrameTyPtr));
+    EHStackTlsVar = EHStackTlsVarUncast;
+  } else {
+    EHStackTlsVar = new BitCastInst(EHStackTlsVarUncast,
+                                    EhFrameTyPtr->getPointerTo(),
+                                    "pnacl_eh_stack",
+                                    InsertPt);
+  }
+
+  // Allocate the new exception frame.  This is reused across all
+  // invoke instructions in the function.
+  Type *I32 = Type::getInt32Ty(M->getContext());
+  Frame = new AllocaInst(ExceptionFrameTy, ConstantInt::get(I32, 1),
+                         kPNaClJmpBufAlign, "invoke_frame", InsertPt);
+
+  // Calculate addresses of fields in the exception frame.
+  Value *JmpBufIndexes[] = { ConstantInt::get(I32, 0),
+                             ConstantInt::get(I32, 0),
+                             ConstantInt::get(I32, 0) };
+  FrameJmpBuf = GetElementPtrInst::Create(Frame, JmpBufIndexes,
+                                          "invoke_jmp_buf", InsertPt);
+
+  Value *NextPtrIndexes[] = { ConstantInt::get(I32, 0),
+                              ConstantInt::get(I32, 1) };
+  FrameNextPtr = GetElementPtrInst::Create(Frame, NextPtrIndexes,
+                                           "invoke_next", InsertPt);
+
+  Value *ExcInfoIndexes[] = { ConstantInt::get(I32, 0),
+                              ConstantInt::get(I32, 2) };
+  FrameExcInfo = GetElementPtrInst::Create(Frame, ExcInfoIndexes,
+                                           "exc_info_ptr", InsertPt);
+}
+
+// Creates the helper function that will do the setjmp() call and
+// function call for implementing Invoke.  Creates the call to the
+// helper function.  Returns a Value which is zero on the normal
+// execution path and non-zero if the landingpad block should be
+// entered.
+Value *FuncRewriter::createSetjmpWrappedCall(InvokeInst *Invoke) {
+  Type *I32 = Type::getInt32Ty(Func->getContext());
+
+  // Allocate space for storing the invoke's result temporarily (so
+  // that the helper function can return multiple values).  We don't
+  // need to do this if the result is unused, and we can't if its type
+  // is void.
+  Instruction *ResultAlloca = NULL;
+  if (!Invoke->use_empty()) {
+    ResultAlloca =
+      new AllocaInst(Invoke->getType(),
+                     "invoke_result_ptr",
+                     Func->getEntryBlock().getFirstNonPHIOrDbgOrLifetime());
+  }
+
+  // Create type for the helper function.
+  SmallVector<Type *, 10> ArgTypes;
+  for (unsigned I = 0, E = Invoke->getNumArgOperands(); I < E; ++I)
+    ArgTypes.push_back(Invoke->getArgOperand(I)->getType());
+  ArgTypes.push_back(Invoke->getCalledValue()->getType());
+  ArgTypes.push_back(FrameJmpBuf->getType());
+  if (ResultAlloca)
+    ArgTypes.push_back(Invoke->getType()->getPointerTo());
+  FunctionType *FTy = FunctionType::get(I32, ArgTypes, false);
+
+  // Create the helper function.
+  Function *HelperFunc = Function::Create(
+      FTy, GlobalValue::InternalLinkage, Func->getName() + "_setjmp_caller");
+  Func->getParent()->getFunctionList().insertAfter(Func, HelperFunc);
+  BasicBlock *EntryBB = BasicBlock::Create(Func->getContext(), "", HelperFunc);
+  BasicBlock *NormalBB = BasicBlock::Create(Func->getContext(), "normal",
+                                            HelperFunc);
+  BasicBlock *ExceptionBB = BasicBlock::Create(Func->getContext(), "exception",
+                                               HelperFunc);
+
+  // Unpack the helper function's arguments.
+  Function::arg_iterator ArgIter = HelperFunc->arg_begin();
+  SmallVector<Value *, 10> InnerCallArgs;
+  for (unsigned I = 0, E = Invoke->getNumArgOperands(); I < E; ++I) {
+    ArgIter->setName("arg");
+    InnerCallArgs.push_back(ArgIter++);
+  }
+  Argument *CalleeArg = ArgIter++;
+  Argument *JmpBufArg = ArgIter++;
+  CalleeArg->setName("func_ptr");
+  JmpBufArg->setName("jmp_buf");
+
+  // Create setjmp() call.
+  Value *SetjmpArgs[] = { JmpBufArg };
+  CallInst *SetjmpCall = CallInst::Create(SetjmpIntrinsic, SetjmpArgs,
+                                          "invoke_sj", EntryBB);
+  CopyDebug(SetjmpCall, Invoke);
+  // Setting the "returns_twice" attribute here prevents optimization
+  // passes from inlining HelperFunc into its caller.
+  SetjmpCall->setCanReturnTwice();
+  // Check setjmp()'s result.
+  Value *IsZero = CopyDebug(new ICmpInst(*EntryBB, CmpInst::ICMP_EQ, SetjmpCall,
+                                         ConstantInt::get(I32, 0),
+                                         "invoke_sj_is_zero"), Invoke);
+  CopyDebug(BranchInst::Create(NormalBB, ExceptionBB, IsZero, EntryBB), Invoke);
+  // Handle the normal, non-exceptional code path.
+  CallInst *InnerCall = CallInst::Create(CalleeArg, InnerCallArgs, "",
+                                         NormalBB);
+  CopyDebug(InnerCall, Invoke);
+  InnerCall->setAttributes(Invoke->getAttributes());
+  InnerCall->setCallingConv(Invoke->getCallingConv());
+  if (ResultAlloca) {
+    InnerCall->setName("result");
+    Argument *ResultArg = ArgIter++;
+    ResultArg->setName("result_ptr");
+    CopyDebug(new StoreInst(InnerCall, ResultArg, NormalBB), Invoke);
+  }
+  ReturnInst::Create(Func->getContext(), ConstantInt::get(I32, 0), NormalBB);
+  // Handle the exceptional code path.
+  ReturnInst::Create(Func->getContext(), ConstantInt::get(I32, 1), ExceptionBB);
+
+  // Create the outer call to the helper function.
+  SmallVector<Value *, 10> OuterCallArgs;
+  for (unsigned I = 0, E = Invoke->getNumArgOperands(); I < E; ++I)
+    OuterCallArgs.push_back(Invoke->getArgOperand(I));
+  OuterCallArgs.push_back(Invoke->getCalledValue());
+  OuterCallArgs.push_back(FrameJmpBuf);
+  if (ResultAlloca)
+    OuterCallArgs.push_back(ResultAlloca);
+  CallInst *OuterCall = CallInst::Create(HelperFunc, OuterCallArgs,
+                                         "invoke_is_exc", Invoke);
+  CopyDebug(OuterCall, Invoke);
+
+  // Retrieve the function return value stored in the alloca.  We only
+  // need to do this on the non-exceptional path, but we currently do
+  // it unconditionally because that is simpler.
+  if (ResultAlloca) {
+    Value *Result = new LoadInst(ResultAlloca, "", Invoke);
+    Result->takeName(Invoke);
+    Invoke->replaceAllUsesWith(Result);
+  }
+  return OuterCall;
+}
+
+static void convertInvokeToCall(InvokeInst *Invoke) {
+  SmallVector<Value*, 16> CallArgs(Invoke->op_begin(), Invoke->op_end() - 3);
+  // Insert a normal call instruction.
+  CallInst *NewCall = CallInst::Create(Invoke->getCalledValue(),
+                                       CallArgs, "", Invoke);
+  CopyDebug(NewCall, Invoke);
+  NewCall->takeName(Invoke);
+  NewCall->setCallingConv(Invoke->getCallingConv());
+  NewCall->setAttributes(Invoke->getAttributes());
+  Invoke->replaceAllUsesWith(NewCall);
+
+  // Insert an unconditional branch to the normal destination.
+  BranchInst::Create(Invoke->getNormalDest(), Invoke);
+  // Remove any PHI node entries from the exception destination.
+  Invoke->getUnwindDest()->removePredecessor(Invoke->getParent());
+  Invoke->eraseFromParent();
+}
+
+void FuncRewriter::expandInvokeInst(InvokeInst *Invoke) {
+  // Calls to ReturnsTwice functions, i.e. setjmp(), can't be moved
+  // into a helper function.  setjmp() can't throw an exception
+  // anyway, so convert the invoke to a call.
+  if (Invoke->hasFnAttr(Attribute::ReturnsTwice)) {
+    convertInvokeToCall(Invoke);
+    return;
+  }
+
+  initializeFrame();
+
+  LandingPadInst *LP = Invoke->getLandingPadInst();
+  Type *I32 = Type::getInt32Ty(Func->getContext());
+  Value *ExcInfo = ConstantInt::get(
+      I32, ExcInfoWriter->getIDForLandingPadClauseList(LP));
+
+  // Append the new frame to the list.
+  Value *OldList = CopyDebug(
+      new LoadInst(EHStackTlsVar, "old_eh_stack", Invoke), Invoke);
+  CopyDebug(new StoreInst(OldList, FrameNextPtr, Invoke), Invoke);
+  CopyDebug(new StoreInst(ExcInfo, FrameExcInfo, Invoke), Invoke);
+  CopyDebug(new StoreInst(Frame, EHStackTlsVar, Invoke), Invoke);
+  Value *IsException = createSetjmpWrappedCall(Invoke);
+  // Restore the old frame list.  We only need to do this on the
+  // non-exception code path, but we currently do it unconditionally
+  // because that is simpler.  (The PNaCl C++ runtime library restores
+  // the old frame list on the exceptional path; doing it again here
+  // redundantly is OK.)
+  CopyDebug(new StoreInst(OldList, EHStackTlsVar, Invoke), Invoke);
+
+  Value *IsZero = CopyDebug(new ICmpInst(Invoke, CmpInst::ICMP_EQ, IsException,
+                                         ConstantInt::get(I32, 0),
+                                         "invoke_sj_is_zero"), Invoke);
+  CopyDebug(BranchInst::Create(Invoke->getNormalDest(), Invoke->getUnwindDest(),
+                               IsZero, Invoke),
+            Invoke);
+
+  Invoke->eraseFromParent();
+}
+
+void FuncRewriter::expandResumeInst(ResumeInst *Resume) {
+  if (!EHResumeFunc) {
+    EHResumeFunc = Func->getParent()->getFunction("__pnacl_eh_resume");
+    if (!EHResumeFunc) {
+      // FIXME: This doesn't filter catches. Rust code can't catch, so this
+      // shouldn't be a problem in the short term.
+
+      Module* M = Func->getParent();
+      LLVMContext& C = M->getContext();
+      EHResumeFunc =
+        Function::Create(FunctionType::get(Type::getVoidTy(C),
+                                           std::vector<Type*>(1,
+                                                              Type::getInt8Ty(C)->getPointerTo()),
+                                           false),
+                         GlobalValue::InternalLinkage,
+                         "__pnacl_eh_resume");
+      M->getFunctionList().insertAfter(Func, EHResumeFunc);
+
+      EHResumeFunc->setDoesNotReturn();
+
+      BasicBlock* Entry = BasicBlock::Create(C, "entry", EHResumeFunc);
+
+      IntegerType* I32 = Type::getInt32Ty(M->getContext());
+
+      Value* EHStackTlsVar = M->getGlobalVariable("__pnacl_eh_stack");
+      if(EHStackTlsVar->getType() != ExceptionFrameTy->getPointerTo()->getPointerTo())
+        EHStackTlsVar =
+          new BitCastInst(EHStackTlsVar,
+                          ExceptionFrameTy->getPointerTo()->getPointerTo(),
+                          "pnacl_eh_stack");
+
+      LoadInst* EHStackTlsVarLoad = new LoadInst(EHStackTlsVar, "", Entry);
+
+      Value *JmpBufIndexes[] = { ConstantInt::get(I32, 0),
+                                 ConstantInt::get(I32, 0),
+                                 ConstantInt::get(I32, 0) };
+      Instruction* FrameJmpBufPtr = GetElementPtrInst::Create(EHStackTlsVarLoad,
+                                                              JmpBufIndexes,
+                                                              "",
+                                                              Entry);
+
+      Function* LongJmp = Intrinsic::getDeclaration(M, Intrinsic::nacl_longjmp);
+      Value* LongJmpArgs[] = { FrameJmpBufPtr, ConstantInt::get(I32, 1) };
+      CallInst::Create(LongJmp, LongJmpArgs, "", Entry);
+      new UnreachableInst(C, Entry);
+    }
+  }
+
+  // The "resume" instruction gets passed the landingpad's full result
+  // (struct LandingPadResult above).  Extract the exception_obj field
+  // to pass to __pnacl_eh_resume(), which doesn't need the
+  // matched_clause_id field.
+  unsigned Indexes[] = { 0 };
+  Value *ExceptionPtr =
+      CopyDebug(ExtractValueInst::Create(Resume->getValue(), Indexes,
+                                         "resume_exc", Resume), Resume);
+
+  // Cast to the pointer type that __pnacl_eh_resume() expects.
+  if (EHResumeFunc->getFunctionType()->getFunctionNumParams() != 1)
+    report_fatal_error("Bad type for __pnacl_eh_resume()");
+  Type *ArgType = EHResumeFunc->getFunctionType()->getFunctionParamType(0);
+  if(ArgType != ExceptionPtr->getType())
+    ExceptionPtr = new BitCastInst(ExceptionPtr, ArgType, "resume_cast", Resume);
+
+  Value *Args[] = { ExceptionPtr };
+  CopyDebug(CallInst::Create(EHResumeFunc, Args, "", Resume), Resume);
+  new UnreachableInst(Func->getContext(), Resume);
+  Resume->eraseFromParent();
+}
+
+void FuncRewriter::expandFunc() {
+  Type *I32 = Type::getInt32Ty(Func->getContext());
+
+  // We need to do two passes: When we process an invoke we need to
+  // look at its landingpad, so we can't remove the landingpads until
+  // all the invokes have been processed.
+  for (Function::iterator BB = Func->begin(), E = Func->end(); BB != E; ++BB) {
+    for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); Iter != E; ) {
+      Instruction *Inst = Iter++;
+      if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Inst)) {
+        expandInvokeInst(Invoke);
+      } else if (ResumeInst *Resume = dyn_cast<ResumeInst>(Inst)) {
+        expandResumeInst(Resume);
+      } else if (IntrinsicInst *Intrinsic = dyn_cast<IntrinsicInst>(Inst)) {
+        if (Intrinsic->getIntrinsicID() == Intrinsic::eh_typeid_for) {
+          Value *ExcType = Intrinsic->getArgOperand(0);
+          Value *Val = ConstantInt::get(
+              I32, ExcInfoWriter->getIDForExceptionType(ExcType));
+          Intrinsic->replaceAllUsesWith(Val);
+          Intrinsic->eraseFromParent();
+        }
+      }
+    }
+  }
+  for (Function::iterator BB = Func->begin(), E = Func->end(); BB != E; ++BB) {
+    for (BasicBlock::iterator Iter = BB->begin(), E = BB->end(); Iter != E; ) {
+      Instruction *Inst = Iter++;
+      if (LandingPadInst *LP = dyn_cast<LandingPadInst>(Inst)) {
+        initializeFrame();
+        Value *LPPtr = new BitCastInst(
+            FrameJmpBuf, LP->getType()->getPointerTo(), "landingpad_ptr", LP);
+        Value *LPVal = CopyDebug(new LoadInst(LPPtr, "", LP), LP);
+        LPVal->takeName(LP);
+        LP->replaceAllUsesWith(LPVal);
+        LP->eraseFromParent();
+      }
+    }
+  }
+}
+
+bool PNaClSjLjEH::runOnModule(Module &M) {
+  Type *JmpBufTy = ArrayType::get(Type::getInt8Ty(M.getContext()),
+                                  kPNaClJmpBufSize);
+
+  // Define "struct ExceptionFrame".
+  StructType *ExceptionFrameTy = StructType::create(M.getContext(),
+                                                    "ExceptionFrame");
+  Type *ExceptionFrameFields[] = {
+    JmpBufTy,  // jmp_buf
+    ExceptionFrameTy->getPointerTo(),  // struct ExceptionFrame *next
+    Type::getInt32Ty(M.getContext())  // Exception info (clause list ID)
+  };
+  ExceptionFrameTy->setBody(ExceptionFrameFields);
+
+  ExceptionInfoWriter ExcInfoWriter(&M.getContext());
+  for (Module::iterator Func = M.begin(), E = M.end(); Func != E; ++Func) {
+    FuncRewriter Rewriter(ExceptionFrameTy, &ExcInfoWriter, Func);
+    Rewriter.expandFunc();
+  }
+  ExcInfoWriter.defineGlobalVariables(&M);
+  return true;
+}
+
+ModulePass *llvm::createPNaClSjLjEHPass() {
+  return new PNaClSjLjEH();
+}
diff --git a/lib/Transforms/NaCl/PromoteI1Ops.cpp b/lib/Transforms/NaCl/PromoteI1Ops.cpp
new file mode 100644
index 000000000000..0c5493dafe6d
--- /dev/null
+++ b/lib/Transforms/NaCl/PromoteI1Ops.cpp
@@ -0,0 +1,171 @@
+//===- PromoteI1Ops.cpp - Promote various operations on the i1 type--------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass expands out various operations on the i1 type so that
+// these i1 operations do not need to be supported by the PNaCl
+// translator.
+//
+// This is similar to the PromoteIntegers pass in that it removes uses
+// of an unusual-size integer type.  The difference is that i1 remains
+// a valid type in other operations.  i1 can still be used in phi
+// nodes, "select" instructions, in "sext" and "zext", and so on.  In
+// contrast, the integer types that PromoteIntegers removes are not
+// allowed in any context by PNaCl's ABI verifier.
+//
+// This pass expands out the following:
+//
+//  * i1 loads and stores.
+//  * All i1 comparisons and arithmetic operations, with the exception
+//    of "and", "or" and "xor", because these are used in practice and
+//    don't overflow.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  class PromoteI1Ops : public BasicBlockPass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    PromoteI1Ops() : BasicBlockPass(ID) {
+      initializePromoteI1OpsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnBasicBlock(BasicBlock &BB);
+  };
+}
+
+char PromoteI1Ops::ID = 0;
+INITIALIZE_PASS(PromoteI1Ops, "nacl-promote-i1-ops",
+                "Promote various operations on the i1 type",
+                false, false)
+
+static Value *promoteValue(Value *Val, bool SignExt, Instruction *InsertPt) {
+  Instruction::CastOps CastType =
+      SignExt ? Instruction::SExt : Instruction::ZExt;
+  return CopyDebug(CastInst::Create(CastType, Val,
+                                    Type::getInt8Ty(Val->getContext()),
+                                    Val->getName() + ".expand_i1_val",
+                                    InsertPt), InsertPt);
+}
+
+bool PromoteI1Ops::runOnBasicBlock(BasicBlock &BB) {
+  bool Changed = false;
+
+  Type *I1Ty = Type::getInt1Ty(BB.getContext());
+  Type *I8Ty = Type::getInt8Ty(BB.getContext());
+
+  // Rewrite boolean Switch terminators:
+  if(isa<SwitchInst>(BB.getTerminator())) {
+    SwitchInst* Inst = cast<SwitchInst>(BB.getTerminator());
+    Value* Condition = Inst->getCondition();
+    Type* ConditionTy = Condition->getType();
+    if(ConditionTy->isIntegerTy(1)) {
+      ConstantInt* False = cast<ConstantInt>(ConstantInt::getFalse(ConditionTy));
+      ConstantInt* True  = cast<ConstantInt>(ConstantInt::getTrue(ConditionTy));
+      SwitchInst::CaseIt FalseCase = Inst->findCaseValue(False);
+      SwitchInst::CaseIt TrueCase  = Inst->findCaseValue(True);
+
+      BasicBlock* FalseBlock = FalseCase.getCaseSuccessor();
+      BasicBlock* TrueBlock  = TrueCase.getCaseSuccessor();
+
+      BasicBlock* DefaultDest = Inst->getDefaultDest();
+      if(TrueBlock == NULL) {
+        TrueBlock = DefaultDest;
+        assert(TrueBlock != NULL);
+      } else if(FalseBlock == NULL) {
+        FalseBlock = DefaultDest;
+        assert(FalseBlock != NULL);
+      } else if(DefaultDest != NULL &&
+                DefaultDest != TrueBlock &&
+                DefaultDest != FalseBlock){
+        // impossible destination
+        DefaultDest->removePredecessor(Inst->getParent());
+      }
+
+      CopyDebug(BranchInst::Create(TrueBlock,
+                                   FalseBlock,
+                                   Condition,
+                                   Inst),
+                Inst);
+      Inst->eraseFromParent();
+    }
+  }
+
+  for (BasicBlock::iterator Iter = BB.begin(), E = BB.end(); Iter != E; ) {
+    Instruction *Inst = Iter++;
+    if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
+      if (Load->getType() == I1Ty) {
+        Changed = true;
+        Value *Ptr = CopyDebug(
+            new BitCastInst(
+                Load->getPointerOperand(), I8Ty->getPointerTo(),
+                Load->getPointerOperand()->getName() + ".i8ptr", Load), Load);
+        LoadInst *NewLoad = new LoadInst(
+            Ptr, Load->getName() + ".pre_trunc", Load);
+        CopyDebug(NewLoad, Load);
+        CopyLoadOrStoreAttrs(NewLoad, Load);
+        Value *Result = CopyDebug(new TruncInst(NewLoad, I1Ty, "", Load), Load);
+        Result->takeName(Load);
+        Load->replaceAllUsesWith(Result);
+        Load->eraseFromParent();
+      }
+    } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
+      if (Store->getValueOperand()->getType() == I1Ty) {
+        Changed = true;
+        Value *Ptr = CopyDebug(
+            new BitCastInst(
+                Store->getPointerOperand(), I8Ty->getPointerTo(),
+                Store->getPointerOperand()->getName() + ".i8ptr", Store),
+            Store);
+        Value *Val = promoteValue(Store->getValueOperand(), false, Store);
+        StoreInst *NewStore = CopyDebug(new StoreInst(Val, Ptr, Store), Store);
+        CopyLoadOrStoreAttrs(NewStore, Store);
+        Store->eraseFromParent();
+      }
+    } else if (BinaryOperator *Op = dyn_cast<BinaryOperator>(Inst)) {
+      if (Op->getType() == I1Ty &&
+          !(Op->getOpcode() == Instruction::And ||
+            Op->getOpcode() == Instruction::Or ||
+            Op->getOpcode() == Instruction::Xor)) {
+        Value *Arg1 = promoteValue(Op->getOperand(0), false, Op);
+        Value *Arg2 = promoteValue(Op->getOperand(1), false, Op);
+        Value *NewOp = CopyDebug(
+            BinaryOperator::Create(
+                Op->getOpcode(), Arg1, Arg2,
+                Op->getName() + ".pre_trunc", Op), Op);
+        Value *Result = CopyDebug(new TruncInst(NewOp, I1Ty, "", Op), Op);
+        Result->takeName(Op);
+        Op->replaceAllUsesWith(Result);
+        Op->eraseFromParent();
+      }
+    } else if (ICmpInst *Op = dyn_cast<ICmpInst>(Inst)) {
+      if (Op->getOperand(0)->getType() == I1Ty) {
+        Value *Arg1 = promoteValue(Op->getOperand(0), Op->isSigned(), Op);
+        Value *Arg2 = promoteValue(Op->getOperand(1), Op->isSigned(), Op);
+        Value *Result = CopyDebug(
+            new ICmpInst(Op, Op->getPredicate(), Arg1, Arg2, ""), Op);
+        Result->takeName(Op);
+        Op->replaceAllUsesWith(Result);
+        Op->eraseFromParent();
+      }
+    }
+  }
+  return Changed;
+}
+
+BasicBlockPass *llvm::createPromoteI1OpsPass() {
+  return new PromoteI1Ops();
+}
diff --git a/lib/Transforms/NaCl/PromoteIntegers.cpp b/lib/Transforms/NaCl/PromoteIntegers.cpp
new file mode 100644
index 000000000000..8d3baa4cbda1
--- /dev/null
+++ b/lib/Transforms/NaCl/PromoteIntegers.cpp
@@ -0,0 +1,654 @@
+//===- PromoteIntegers.cpp - Promote illegal integers for PNaCl ABI -------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+// A limited set of transformations to promote illegal-sized int types.
+//
+//===----------------------------------------------------------------------===//
+//
+// Legal sizes are currently 1, 8, 16, 32, 64 (and higher, see note below)
+// Operations on illegal integers and int pointers are be changed to operate
+// on the next-higher legal size.
+// It maintains no invariants about the upper bits (above the size of the
+// original type); therefore before operations which can be affected by the
+// value of these bits (e.g. cmp, select, lshr), the upper bits of the operands
+// are cleared.
+//
+// Limitations:
+// 1) It can't change function signatures or global variables
+// 2) It won't promote (and can't expand) types larger than i64
+// 3) Doesn't support div operators
+// 4) Doesn't handle arrays or structs (or GEPs) with illegal types
+// 5) Doesn't handle constant expressions (it also doesn't produce them, so it
+//    can run after ExpandConstantExpr)
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+class PromoteIntegers : public FunctionPass {
+ public:
+  static char ID;
+  PromoteIntegers() : FunctionPass(ID) {
+    initializePromoteIntegersPass(*PassRegistry::getPassRegistry());
+  }
+  virtual bool runOnFunction(Function &F);
+};
+}
+
+char PromoteIntegers::ID = 0;
+INITIALIZE_PASS(PromoteIntegers, "nacl-promote-ints",
+                "Promote integer types which are illegal in PNaCl",
+                false, false)
+
+// Legal sizes are currently 1, 8, 16, 32, and 64.
+// We can't yet expand types above 64 bit, so don't try to touch them for now.
+// TODO(dschuff): expand >64bit types or disallow >64bit packed bitfields.
+// There are currently none in our tests that use the ABI checker.
+// See https://code.google.com/p/nativeclient/issues/detail?id=3360
+static bool isLegalSize(unsigned Size) {
+  if (Size > 64) return true;
+  return Size == 1 || Size == 8 || Size == 16 || Size == 32 || Size == 64;
+}
+
+static Type *getPromotedIntType(IntegerType *Ty) {
+  unsigned Width = Ty->getBitWidth();
+  assert(Width <= 64 && "Don't know how to legalize >64 bit types yet");
+  if (isLegalSize(Width))
+    return Ty;
+  return IntegerType::get(Ty->getContext(),
+                          Width < 8 ? 8 : NextPowerOf2(Width));
+}
+
+// Return a legal integer or pointer-to-integer type, promoting to a larger
+// size if necessary.
+static Type *getPromotedType(Type *Ty) {
+  assert((isa<IntegerType>(Ty) ||
+          (isa<PointerType>(Ty) && isa<IntegerType>(Ty->getContainedType(0))))
+         && "Trying to convert a non-integer type");
+
+  if (isa<PointerType>(Ty))
+    return getPromotedIntType(
+        cast<IntegerType>(Ty->getContainedType(0)))->getPointerTo();
+
+  return getPromotedIntType(cast<IntegerType>(Ty));
+}
+
+// Return true if Val is an int or pointer-to-int which should be converted.
+static bool shouldConvert(Value *Val) {
+  Type *Ty = Val->getType();
+  if (PointerType *Pty = dyn_cast<PointerType>(Ty))
+    Ty = Pty->getContainedType(0);
+  if (IntegerType *ITy = dyn_cast<IntegerType>(Ty)) {
+    if (!isLegalSize(ITy->getBitWidth())) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// Return a constant which has been promoted to a legal size.
+static Value *convertConstant(Constant *C, bool SignExt=false) {
+  assert(shouldConvert(C));
+  if (isa<UndefValue>(C)) {
+    return UndefValue::get(getPromotedType(C->getType()));
+  } else if (ConstantInt *CInt = dyn_cast<ConstantInt>(C)) {
+    return ConstantInt::get(
+        getPromotedType(C->getType()),
+        SignExt ? CInt->getSExtValue() : CInt->getZExtValue(),
+        /*isSigned=*/SignExt);
+  } else {
+    errs() << "Value: " << *C << "\n";
+    report_fatal_error("Unexpected constant value");
+  }
+}
+
+// Holds the state for converting/replacing values. Conversion is done in one
+// pass, with each value requiring conversion possibly having two stages. When
+// an instruction needs to be replaced (i.e. it has illegal operands or result)
+// a new instruction is created, and the pass calls getConverted to get its
+// operands. If the original operand has already been converted, the new value
+// is returned. Otherwise, a placeholder is created and used in the new
+// instruction. After a new instruction is created to replace an illegal one,
+// recordConverted is called to register the replacement. All users are updated,
+// and if there is a placeholder, its users are also updated.
+// recordConverted also queues the old value for deletion.
+// This strategy avoids the need for recursion or worklists for conversion.
+class ConversionState {
+ public:
+  // Return the promoted value for Val. If Val has not yet been converted,
+  // return a placeholder, which will be converted later.
+  Value *getConverted(Value *Val) {
+    if (!shouldConvert(Val))
+        return Val;
+    if (isa<GlobalVariable>(Val))
+      report_fatal_error("Can't convert illegal GlobalVariables");
+    if (RewrittenMap.count(Val))
+      return RewrittenMap[Val];
+    Value *P;
+    // Directly convert constants.
+    if (Constant *C = dyn_cast<Constant>(Val)) {
+      return convertConstant(C, /*SignExt=*/false);
+    } else {
+      // No converted value available yet, so create a placeholder.
+      P = new Argument(getPromotedType(Val->getType()));
+    }
+    RewrittenMap[Val] = P;
+    Placeholders[Val] = P;
+    return P;
+  }
+
+  // Replace the uses of From with To, replace the uses of any
+  // placeholders for From, and optionally give From's name to To.
+  // Also mark To for deletion.
+  void recordConverted(Instruction *From, Value *To, bool TakeName=true) {
+    ToErase.push_back(From);
+    if (!shouldConvert(From)) {
+      // From does not produce an illegal value, update its users in place.
+      From->replaceAllUsesWith(To);
+    } else {
+      // From produces an illegal value, so its users will be replaced. When
+      // replacements are created they will use values returned by getConverted.
+      if (Placeholders.count(From)) {
+        // Users of the placeholder can be updated in place.
+        Placeholders[From]->replaceAllUsesWith(To);
+        Placeholders.erase(From);
+      }
+      RewrittenMap[From] = To;
+    }
+    if (TakeName) {
+      To->takeName(From);
+    }
+  }
+
+  void eraseReplacedInstructions() {
+    for (SmallVectorImpl<Instruction *>::iterator I = ToErase.begin(),
+             E = ToErase.end(); I != E; ++I)
+      (*I)->dropAllReferences();
+    for (SmallVectorImpl<Instruction *>::iterator I = ToErase.begin(),
+             E = ToErase.end(); I != E; ++I)
+      (*I)->eraseFromParent();
+  }
+
+ private:
+  // Maps illegal values to their new converted values (or placeholders
+  // if no new value is available yet)
+  DenseMap<Value *, Value *> RewrittenMap;
+  // Maps illegal values with no conversion available yet to their placeholders
+  DenseMap<Value *, Value *> Placeholders;
+  // Illegal values which have already been converted, will be erased.
+  SmallVector<Instruction *, 8> ToErase;
+};
+
+// Split an illegal load into multiple legal loads and return the resulting
+// promoted value. The size of the load is assumed to be a multiple of 8.
+static Value *splitLoad(LoadInst *Inst, ConversionState &State) {
+  if (Inst->isVolatile() || Inst->isAtomic())
+    report_fatal_error("Can't split volatile/atomic loads");
+  if (cast<IntegerType>(Inst->getType())->getBitWidth() % 8 != 0)
+    report_fatal_error("Loads must be a multiple of 8 bits");
+
+  Value *OrigPtr = State.getConverted(Inst->getPointerOperand());
+  // OrigPtr is a placeholder in recursive calls, and so has no name
+  if (OrigPtr->getName().empty())
+    OrigPtr->setName(Inst->getPointerOperand()->getName());
+  unsigned Width = cast<IntegerType>(Inst->getType())->getBitWidth();
+  Type *NewType = getPromotedType(Inst->getType());
+  unsigned LoWidth = Width;
+
+  while (!isLegalSize(LoWidth)) LoWidth -= 8;
+  IntegerType *LoType = IntegerType::get(Inst->getContext(), LoWidth);
+  IntegerType *HiType = IntegerType::get(Inst->getContext(), Width - LoWidth);
+  IRBuilder<> IRB(Inst);
+
+  Value *BCLo = CopyDebug(IRB.CreateBitCast(OrigPtr,
+                                            LoType->getPointerTo(),
+                                            OrigPtr->getName() + ".loty"),
+                          Inst);
+  Value *LoadLo = CopyDebug(IRB.CreateAlignedLoad(BCLo,
+                                                  Inst->getAlignment(),
+                                                  Inst->getName() + ".lo"),
+                            Inst);
+  Value *LoExt = CopyDebug(IRB.CreateZExt(LoadLo, NewType, LoadLo->getName() + ".ext"),
+                           Inst);
+  Value *GEPHi = CopyDebug(IRB.CreateConstGEP1_32(BCLo, 1, OrigPtr->getName() + ".hi"),
+                           Inst);
+  Value *BCHi = CopyDebug(IRB.CreateBitCast(GEPHi,
+                                            HiType->getPointerTo(),
+                                            OrigPtr->getName() + ".hity"),
+                          Inst);
+
+  Value *LoadHi = CopyDebug(IRB.CreateLoad(BCHi, Inst->getName() + ".hi"), Inst);
+  if (!isLegalSize(Width - LoWidth)) {
+    LoadHi = splitLoad(cast<LoadInst>(LoadHi), State);
+    // BCHi was still illegal, and has been replaced with a placeholder in the
+    // recursive call. Since it is redundant with BCLo in the recursive call,
+    // just splice it out entirely.
+    State.recordConverted(cast<Instruction>(BCHi), GEPHi, /*TakeName=*/false);
+  }
+
+  Value *HiExt = CopyDebug(IRB.CreateZExt(LoadHi, NewType, LoadHi->getName() + ".ext"), Inst);
+  Value *HiShift = CopyDebug(IRB.CreateShl(HiExt, LoWidth, HiExt->getName() + ".sh"), Inst);
+  Value *Result = CopyDebug(IRB.CreateOr(LoExt, HiShift), Inst);
+
+  State.recordConverted(Inst, Result);
+
+  return Result;
+}
+
+static Value *splitStore(StoreInst *Inst, ConversionState &State) {
+  if (Inst->isVolatile() || Inst->isAtomic())
+    report_fatal_error("Can't split volatile/atomic stores");
+  if (cast<IntegerType>(Inst->getValueOperand()->getType())->getBitWidth() % 8
+      != 0)
+    report_fatal_error("Stores must be a multiple of 8 bits");
+
+  Value *OrigPtr = State.getConverted(Inst->getPointerOperand());
+  // OrigPtr is now a placeholder in recursive calls, and so has no name.
+  if (OrigPtr->getName().empty())
+    OrigPtr->setName(Inst->getPointerOperand()->getName());
+  Value *OrigVal = State.getConverted(Inst->getValueOperand());
+  unsigned Width = cast<IntegerType>(
+      Inst->getValueOperand()->getType())->getBitWidth();
+  unsigned LoWidth = Width;
+
+  while (!isLegalSize(LoWidth)) LoWidth -= 8;
+  IntegerType *LoType = IntegerType::get(Inst->getContext(), LoWidth);
+  IntegerType *HiType = IntegerType::get(Inst->getContext(), Width - LoWidth);
+  IRBuilder<> IRB(Inst);
+
+  Value *BCLo = CopyDebug(IRB.CreateBitCast(OrigPtr,
+                                            LoType->getPointerTo(),
+                                            OrigPtr->getName() + ".loty"),
+                          Inst);
+  Value *LoTrunc = CopyDebug(IRB.CreateTrunc(OrigVal, LoType, OrigVal->getName() + ".lo"),
+                             Inst);
+  CopyDebug(IRB.CreateAlignedStore(LoTrunc, BCLo, Inst->getAlignment()), Inst);
+
+  Value *HiLShr = CopyDebug(IRB.CreateLShr(OrigVal, LoWidth, OrigVal->getName() + ".hi.sh"),
+                            Inst);
+  Value *GEPHi = CopyDebug(IRB.CreateConstGEP1_32(BCLo, 1, OrigPtr->getName() + ".hi"), Inst);
+  Value *HiTrunc = CopyDebug(IRB.CreateTrunc(HiLShr, HiType, OrigVal->getName() + ".hi"), Inst);
+  Value *BCHi = CopyDebug(IRB.CreateBitCast(GEPHi,
+                                            HiType->getPointerTo(),
+                                            OrigPtr->getName() + ".hity"),
+                          Inst);
+
+  Value *StoreHi = CopyDebug(IRB.CreateStore(HiTrunc, BCHi), Inst);
+
+  if (!isLegalSize(Width - LoWidth)) {
+    // HiTrunc is still illegal, and is redundant with the truncate in the
+    // recursive call, so just get rid of it.
+    State.recordConverted(cast<Instruction>(HiTrunc), HiLShr,
+                          /*TakeName=*/false);
+    StoreHi = splitStore(cast<StoreInst>(StoreHi), State);
+    // BCHi was still illegal, and has been replaced with a placeholder in the
+    // recursive call. Since it is redundant with BCLo in the recursive call,
+    // just splice it out entirely.
+    State.recordConverted(cast<Instruction>(BCHi), GEPHi, /*TakeName=*/false);
+  }
+  State.recordConverted(Inst, StoreHi, /*TakeName=*/false);
+  return StoreHi;
+}
+
+// Return a converted value with the bits of the operand above the size of the
+// original type cleared.
+static Value *getClearConverted(Value *Operand, Instruction *InsertPt,
+                                ConversionState &State) {
+  Type *OrigType = Operand->getType();
+  Instruction *OrigInst = dyn_cast<Instruction>(Operand);
+  Operand = State.getConverted(Operand);
+  // If the operand is a constant, it will have been created by
+  // ConversionState.getConverted, which zero-extends by default.
+  if (isa<Constant>(Operand))
+    return Operand;
+  Instruction *NewInst = BinaryOperator::Create(
+      Instruction::And,
+      Operand,
+      ConstantInt::get(
+          getPromotedType(OrigType),
+          APInt::getLowBitsSet(getPromotedType(OrigType)->getIntegerBitWidth(),
+                               OrigType->getIntegerBitWidth())),
+      Operand->getName() + ".clear",
+      InsertPt);
+  if (OrigInst)
+    CopyDebug(NewInst, OrigInst);
+  return NewInst;
+}
+
+// Return a value with the bits of the operand above the size of the original
+// type equal to the sign bit of the original operand. The new operand is
+// assumed to have been legalized already.
+// This is done by shifting the sign bit of the smaller value up to the MSB
+// position in the larger size, and then arithmetic-shifting it back down.
+static Value *getSignExtend(Value *Operand, Value *OrigOperand,
+                            Instruction *InsertPt) {
+  // If OrigOperand was a constant, NewOperand will have been created by
+  // ConversionState.getConverted, which zero-extends by default. But that is
+  // wrong here, so replace it with a sign-extended constant.
+  if (Constant *C = dyn_cast<Constant>(OrigOperand))
+    return convertConstant(C, /*SignExt=*/true);
+  Type *OrigType = OrigOperand->getType();
+  ConstantInt *ShiftAmt = ConstantInt::getSigned(
+      cast<IntegerType>(getPromotedType(OrigType)),
+      getPromotedType(OrigType)->getIntegerBitWidth() -
+        OrigType->getIntegerBitWidth());
+  BinaryOperator *Shl = BinaryOperator::Create(
+      Instruction::Shl,
+      Operand,
+      ShiftAmt,
+      Operand->getName() + ".getsign",
+      InsertPt);
+  if (Instruction *Inst = dyn_cast<Instruction>(OrigOperand))
+    CopyDebug(Shl, Inst);
+  return CopyDebug(BinaryOperator::Create(
+      Instruction::AShr,
+      Shl,
+      ShiftAmt,
+      Operand->getName() + ".signed",
+      InsertPt), Shl);
+}
+
+static void convertInstruction(Instruction *Inst, ConversionState &State) {
+  if (SExtInst *Sext = dyn_cast<SExtInst>(Inst)) {
+    Value *Op = Sext->getOperand(0);
+    Value *NewInst = NULL;
+    // If the operand to be extended is illegal, we first need to fill its
+    // upper bits with its sign bit.
+    if (shouldConvert(Op)) {
+      NewInst = getSignExtend(State.getConverted(Op), Op, Sext);
+    }
+    // If the converted type of the operand is the same as the converted
+    // type of the result, we won't actually be changing the type of the
+    // variable, just its value.
+    if (getPromotedType(Op->getType()) !=
+        getPromotedType(Sext->getType())) {
+      NewInst = CopyDebug(new SExtInst(
+          NewInst ? NewInst : State.getConverted(Op),
+          getPromotedType(cast<IntegerType>(Sext->getType())),
+          Sext->getName() + ".sext", Sext), Sext);
+    }
+    assert(NewInst && "Failed to convert sign extension");
+    State.recordConverted(Sext, NewInst);
+  } else if (ZExtInst *Zext = dyn_cast<ZExtInst>(Inst)) {
+    Value *Op = Zext->getOperand(0);
+    Value *NewInst = NULL;
+    if (shouldConvert(Op)) {
+      NewInst = getClearConverted(Op, Zext, State);
+    }
+    // If the converted type of the operand is the same as the converted
+    // type of the result, we won't actually be changing the type of the
+    // variable, just its value.
+    if (getPromotedType(Op->getType()) !=
+        getPromotedType(Zext->getType())) {
+      NewInst = CopyDebug(CastInst::CreateZExtOrBitCast(
+          NewInst ? NewInst : State.getConverted(Op),
+          getPromotedType(cast<IntegerType>(Zext->getType())),
+          "", Zext), Zext);
+    }
+    assert(NewInst);
+    State.recordConverted(Zext, NewInst);
+  } else if (TruncInst *Trunc = dyn_cast<TruncInst>(Inst)) {
+    Value *Op = Trunc->getOperand(0);
+    Value *NewInst;
+    // If the converted type of the operand is the same as the converted
+    // type of the result, we don't actually need to change the type of the
+    // variable, just its value. However, because we don't care about the values
+    // of the upper bits until they are consumed, truncation can be a no-op.
+    if (getPromotedType(Op->getType()) !=
+        getPromotedType(Trunc->getType())) {
+      NewInst = CopyDebug(new TruncInst(
+          State.getConverted(Op),
+          getPromotedType(cast<IntegerType>(Trunc->getType())),
+          State.getConverted(Op)->getName() + ".trunc",
+          Trunc), Trunc);
+    } else {
+      NewInst = State.getConverted(Op);
+    }
+    State.recordConverted(Trunc, NewInst);
+  } else if (AllocaInst *Alloc = dyn_cast<AllocaInst>(Inst)) {
+    // Don't handle arrays of illegal types, but we could handle an array
+    // with size specified as an illegal type, as unlikely as that seems.
+    if (shouldConvert(Alloc) && Alloc->isArrayAllocation())
+      report_fatal_error("Can't convert arrays of illegal type");
+    AllocaInst *NewInst = new AllocaInst(
+        getPromotedType(Alloc->getAllocatedType()),
+        State.getConverted(Alloc->getArraySize()),
+        "", Alloc);
+    CopyDebug(NewInst, Alloc);
+    NewInst->setAlignment(Alloc->getAlignment());
+    State.recordConverted(Alloc, NewInst);
+  } else if (BitCastInst *BCInst = dyn_cast<BitCastInst>(Inst)) {
+    // Only handle pointers. Ints can't be casted to/from other ints
+    Type *DestType = shouldConvert(BCInst) ?
+        getPromotedType(BCInst->getDestTy()) : BCInst->getDestTy();
+    Instruction *NewInst = CopyDebug(new BitCastInst(
+        State.getConverted(BCInst->getOperand(0)),
+        DestType,
+        "", BCInst), BCInst);
+    State.recordConverted(BCInst, NewInst);
+  } else if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
+    if (shouldConvert(Load)) {
+      splitLoad(Load, State);
+    }
+  } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
+    if (shouldConvert(Store->getValueOperand())) {
+      splitStore(Store, State);
+    }
+  } else if (isa<CallInst>(Inst)) {
+    report_fatal_error("can't convert calls with illegal types");
+  } else if (BinaryOperator *Binop = dyn_cast<BinaryOperator>(Inst)) {
+    Value *NewInst = NULL;
+    switch (Binop->getOpcode()) {
+      case Instruction::AShr: {
+        // The AShr operand needs to be sign-extended to the promoted size
+        // before shifting. Because the sign-extension is implemented with
+        // with AShr, it can be combined with the original operation.
+        Value *Op = Binop->getOperand(0);
+        Value *ShiftAmount = NULL;
+        APInt SignShiftAmt = APInt(
+            getPromotedType(Op->getType())->getIntegerBitWidth(),
+            getPromotedType(Op->getType())->getIntegerBitWidth() -
+            Op->getType()->getIntegerBitWidth());
+        NewInst = CopyDebug(BinaryOperator::Create(
+            Instruction::Shl,
+            State.getConverted(Op),
+            ConstantInt::get(getPromotedType(Op->getType()), SignShiftAmt),
+            State.getConverted(Op)->getName() + ".getsign",
+            Binop), Binop);
+        if (ConstantInt *C = dyn_cast<ConstantInt>(
+                State.getConverted(Binop->getOperand(1)))) {
+          ShiftAmount = ConstantInt::get(getPromotedType(Op->getType()),
+                                         SignShiftAmt + C->getValue());
+        } else {
+          // Clear the upper bits of the original shift amount, and add back the
+          // amount we shifted to get the sign bit.
+          ShiftAmount = getClearConverted(Binop->getOperand(1), Binop, State);
+          ShiftAmount = CopyDebug(BinaryOperator::Create(
+              Instruction::Add,
+              ShiftAmount,
+              ConstantInt::get(
+                  getPromotedType(Binop->getOperand(1)->getType()),
+                  SignShiftAmt),
+              State.getConverted(Op)->getName() + ".shamt", Binop), Binop);
+        }
+        NewInst = CopyDebug(BinaryOperator::Create(
+            Instruction::AShr,
+            NewInst,
+            ShiftAmount,
+            Binop->getName() + ".result", Binop), Binop);
+        break;
+      }
+
+      case Instruction::LShr:
+      case Instruction::Shl: {
+        // For LShr, clear the upper bits of the operand before shifting them
+        // down into the valid part of the value.
+        Value *Op = Binop->getOpcode() == Instruction::LShr
+                        ? getClearConverted(Binop->getOperand(0), Binop, State)
+                        : State.getConverted(Binop->getOperand(0));
+        NewInst = BinaryOperator::Create(
+            Binop->getOpcode(), Op,
+            // Clear the upper bits of the shift amount.
+            getClearConverted(Binop->getOperand(1), Binop, State),
+            Binop->getName() + ".result", Binop);
+        break;
+      }
+      case Instruction::Add:
+      case Instruction::Sub:
+      case Instruction::Mul:
+      case Instruction::And:
+      case Instruction::Or:
+      case Instruction::Xor:
+        // These operations don't care about the state of the upper bits.
+        NewInst = CopyDebug(BinaryOperator::Create(
+            Binop->getOpcode(),
+            State.getConverted(Binop->getOperand(0)),
+            State.getConverted(Binop->getOperand(1)),
+            Binop->getName() + ".result", Binop), Binop);
+        break;
+      case Instruction::FAdd:
+      case Instruction::FSub:
+      case Instruction::FMul:
+      case Instruction::UDiv:
+      case Instruction::SDiv:
+      case Instruction::FDiv:
+      case Instruction::URem:
+      case Instruction::SRem:
+      case Instruction::FRem:
+      case Instruction::BinaryOpsEnd:
+        // We should not see FP operators here.
+        // We don't handle div.
+        errs() << *Inst << "\n";
+        llvm_unreachable("Cannot handle binary operator");
+        break;
+    }
+
+    if (isa<OverflowingBinaryOperator>(NewInst)) {
+      cast<BinaryOperator>(NewInst)->setHasNoUnsignedWrap(
+          Binop->hasNoUnsignedWrap());
+      cast<BinaryOperator>(NewInst)->setHasNoSignedWrap(
+          Binop->hasNoSignedWrap());
+    }
+    State.recordConverted(Binop, NewInst);
+  } else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Inst)) {
+    Value *Op0, *Op1;
+    // For signed compares, operands are sign-extended to their
+    // promoted type. For unsigned or equality compares, the upper bits are
+    // cleared.
+    if (Cmp->isSigned()) {
+      Op0 = getSignExtend(State.getConverted(Cmp->getOperand(0)),
+                          Cmp->getOperand(0),
+                          Cmp);
+      Op1 = getSignExtend(State.getConverted(Cmp->getOperand(1)),
+                          Cmp->getOperand(1),
+                          Cmp);
+    } else {
+      Op0 = getClearConverted(Cmp->getOperand(0), Cmp, State);
+      Op1 = getClearConverted(Cmp->getOperand(1), Cmp, State);
+    }
+    Instruction *NewInst = CopyDebug(new ICmpInst(
+        Cmp, Cmp->getPredicate(), Op0, Op1, ""), Cmp);
+    State.recordConverted(Cmp, NewInst);
+  } else if (SelectInst *Select = dyn_cast<SelectInst>(Inst)) {
+    Instruction *NewInst = CopyDebug(SelectInst::Create(
+        Select->getCondition(),
+        State.getConverted(Select->getTrueValue()),
+        State.getConverted(Select->getFalseValue()),
+        "", Select), Select);
+    State.recordConverted(Select, NewInst);
+  } else if (PHINode *Phi = dyn_cast<PHINode>(Inst)) {
+    PHINode *NewPhi = PHINode::Create(
+        getPromotedType(Phi->getType()),
+        Phi->getNumIncomingValues(),
+        "", Phi);
+    CopyDebug(NewPhi, Phi);
+    for (unsigned I = 0, E = Phi->getNumIncomingValues(); I < E; ++I) {
+      NewPhi->addIncoming(State.getConverted(Phi->getIncomingValue(I)),
+                          Phi->getIncomingBlock(I));
+    }
+    State.recordConverted(Phi, NewPhi);
+  } else if (SwitchInst *Switch = dyn_cast<SwitchInst>(Inst)) {
+    Value *Condition = getClearConverted(Switch->getCondition(), Switch, State);
+    Type* ConditionType = Condition->getType();
+    SwitchInst *NewInst = SwitchInst::Create(
+        Condition,
+        Switch->getDefaultDest(),
+        Switch->getNumCases(),
+        Switch);
+    CopyDebug(NewInst, Switch);
+    for (SwitchInst::CaseIt I = Switch->case_begin(),
+             E = Switch->case_end();
+         I != E; ++I) {
+      // Build a new case from the ranges that map to the successor BB. Each
+      // range consists of a high and low value which are typed, so the ranges
+      // must be rebuilt and a new case constructed from them.
+      if(shouldConvert(I.getCaseValue())) {
+	Type* PromotedType = getPromotedType(I.getCaseValue()->getType());
+	const APInt Value = I.getCaseValue()->getValue().sext(ConditionType->getPrimitiveSizeInBits());
+	Constant* C = ConstantInt::get(PromotedType, Value);
+	NewInst->addCase(cast<ConstantInt>(C),
+			 I.getCaseSuccessor());
+      }
+      else {
+	NewInst->addCase(I.getCaseValue(), I.getCaseSuccessor());
+      }
+    }
+    Switch->eraseFromParent();
+  } else {
+    errs() << *Inst<<"\n";
+    llvm_unreachable("unhandled instruction");
+  }
+}
+
+bool PromoteIntegers::runOnFunction(Function &F) {
+  // Don't support changing the function arguments. This should not be
+  // generated by clang.
+  for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) {
+    Value *Arg = I;
+    if (shouldConvert(Arg)) {
+      errs() << "Function " << F.getName() << ": " << *Arg << "\n";
+      llvm_unreachable("Function has illegal integer/pointer argument");
+    }
+  }
+
+  ConversionState State;
+  bool Modified = false;
+  for (Function::iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+    for (BasicBlock::iterator BBI = FI->begin(), BBE = FI->end(); BBI != BBE;) {
+      Instruction *Inst = BBI++;
+      // Only attempt to convert an instruction if its result or any of its
+      // operands are illegal.
+      bool ShouldConvert = shouldConvert(Inst);
+      for (User::op_iterator OI = Inst->op_begin(), OE = Inst->op_end();
+           OI != OE; ++OI)
+        ShouldConvert |= shouldConvert(cast<Value>(OI));
+
+      if (ShouldConvert) {
+        convertInstruction(Inst, State);
+        Modified = true;
+      }
+    }
+  }
+  State.eraseReplacedInstructions();
+  return Modified;
+}
+
+FunctionPass *llvm::createPromoteIntegersPass() {
+  return new PromoteIntegers();
+}
diff --git a/lib/Transforms/NaCl/PromoteReturnedStructs.cpp b/lib/Transforms/NaCl/PromoteReturnedStructs.cpp
new file mode 100644
index 000000000000..cf332715e8a7
--- /dev/null
+++ b/lib/Transforms/NaCl/PromoteReturnedStructs.cpp
@@ -0,0 +1,544 @@
+//===- PromoteReturnedStructs.cpp - Promote returned structures to sret args==//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// ** THIS PASS DESTROYS ABI RULES **
+// Getting this right is tricky; we can't rely on internal linkage specifications to avoid 
+// changing the ABI, ie Pepper PPB interfaces. Currently, we special case the return
+// type {}*, replacing it with i8*.
+//
+// TODO(diamond): some PPB interface functions return structures; this pass breaks
+// that particular ABI detail.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+#include "llvm/Analysis/NaCl.h"
+#include <set>
+#include <map>
+#include <iostream>
+
+using namespace llvm;
+
+class PromoteReturnedStructs : public ModulePass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  PromoteReturnedStructs();
+
+  typedef std::map<Type*, Type*>::iterator ty_iterator;
+  std::map<Type*, Type*> m_types;
+
+  typedef std::set<GlobalValue*>::iterator gv_iterator;
+  std::set<GlobalValue*> m_globals;
+
+  typedef std::map<Constant*, Constant*>::iterator const_iterator;
+  std::map<Constant*, Constant*> m_consts;
+
+  // we can't move the return type of functions like malloc PPP_GetInterface etc.
+  bool isProtected(Function* F) {
+    return !F->getName().startswith("_ZN");
+  }
+
+  // Rust uses {}* as void pointers.
+  bool isVoidPtrTy(Type* Ty) {
+    return Ty->isPointerTy() &&
+      Ty->getContainedType(0)->isStructTy() &&
+      (cast<StructType>(Ty->getContainedType(0))->isOpaque() ||
+       cast<StructType>(Ty->getContainedType(0))->getNumElements() == 0);
+  }
+
+  void promoteFunction(Function* F);
+  void promoteGlobalVariable(GlobalVariable* G);
+  
+  Type* promoteType(Type* Ty, const bool InRetPos = false);
+  Constant* promoteConstant(Constant* C);
+  void promoteOperands(User* U);
+  Value* promoteOperand(Value* Op);
+
+  void promoteGlobal(GlobalValue* V);
+
+  bool shouldPromote(Type* Ty);
+
+  template <class T>
+  void promoteCallInst(T* Inst);
+  template <class T>
+  void promoteCallArgs(T* Inst);
+
+  bool runOnModule(Module& M);
+};
+
+char PromoteReturnedStructs::ID = 0;
+INITIALIZE_PASS(PromoteReturnedStructs, "promote-returned-structures",
+                "Promote returned structures to sret arguments",
+                false, false)
+
+// I'mma leave this here; it should get optimized away anyhow.
+static size_t ActualPromotedFunctions1;
+
+PromoteReturnedStructs::PromoteReturnedStructs()
+: ModulePass(ID) {
+  initializePromoteReturnedStructsPass(*PassRegistry::getPassRegistry());
+}
+
+bool PromoteReturnedStructs::shouldPromote(Type* Ty) {
+  return Ty != NULL && !Ty->isVoidTy() &&
+    !(isVoidPtrTy(Ty)) &&
+    (Ty->isAggregateType() ||
+     (isa<PointerType>(Ty) && shouldPromote(Ty->getContainedType(0))));
+}
+Type* PromoteReturnedStructs::promoteType(Type* Ty, const bool InRetPos) {
+  if(Ty == NULL)
+    return NULL;
+
+  ty_iterator i = m_types.find(Ty);
+  if(i != m_types.end()) {
+    assert(i->second != NULL && "promoteType");
+    return i->second;
+  }
+  Type* NewTy = NULL;
+
+  if(isa<PointerType>(Ty)) {
+    if(InRetPos && isVoidPtrTy(Ty)) {
+      LLVMContext& C = Ty->getContext();
+      NewTy = Type::getInt8Ty(C)->getPointerTo();
+    } else {
+      Type* InnerTy = Ty->getContainedType(0);
+      Type* NewInnerTy = promoteType(InnerTy);
+      NewTy = PointerType::get(NewInnerTy, 0);
+    }
+  } else if(isa<StructType>(Ty) && Ty->getStructNumElements() != 0) {
+    StructType* STy = cast<StructType>(Ty);
+    
+    StructType* NewSTy;
+    if(STy->hasName())
+      NewSTy = StructType::create(Ty->getContext(), STy->getName());
+    else
+      NewSTy = StructType::create(Ty->getContext());
+
+    NewTy = NewSTy;
+    m_types[Ty] = NewTy;
+    m_types[NewTy] = NewTy;
+
+    std::vector<Type*> Types;
+    Types.reserve(STy->getNumElements());
+
+    for(unsigned j = 0; j < STy->getNumElements(); ++j) {
+      Type* OldTy2 = STy->getElementType(j);
+      Type* NewTy2 = promoteType(OldTy2);
+      Types.push_back(NewTy2);
+    }
+    NewSTy->setBody(Types, STy->isPacked());
+    return NewTy;
+  } else if(isa<FunctionType>(Ty)) {
+    FunctionType* FTy = cast<FunctionType>(Ty);
+    Type* RetTy = FTy->getReturnType();
+
+    const bool PromoteRet = shouldPromote(RetTy);
+
+    Type* NewRetTy = promoteType(RetTy, !PromoteRet);
+
+    std::vector<Type*> Args;
+    Args.reserve(FTy->getNumParams() + 1);
+
+    if(PromoteRet)
+      Args.push_back(PointerType::get(NewRetTy, 0));
+
+    for(unsigned j = 0; j < FTy->getNumParams(); ++j) {
+      Type* OldTy2 = FTy->getParamType(j);
+      Type* NewTy2 = promoteType(OldTy2);
+      Args.push_back(NewTy2);
+    }
+    
+    if(PromoteRet)
+      NewTy = FunctionType::get(Type::getVoidTy(Ty->getContext()), Args, FTy->isVarArg());
+    else
+      NewTy = FunctionType::get(NewRetTy, Args, FTy->isVarArg());
+  } else if(isa<ArrayType>(Ty)) {
+    ArrayType* ATy = cast<ArrayType>(Ty);
+    Type* ElementTy = ATy->getElementType();
+    Type* NewElemTy = promoteType(ElementTy);
+    NewTy = ArrayType::get(NewElemTy, ATy->getNumElements());
+  } else {
+    NewTy = Ty;
+  }
+  
+  assert(NewTy != NULL);
+  m_types.insert(std::make_pair(Ty, NewTy));
+  if(Ty != NewTy)
+    m_types.insert(std::make_pair(NewTy, NewTy));
+  return NewTy;
+}
+void PromoteReturnedStructs::promoteFunction(Function* F) {
+  if(F->isIntrinsic())
+    return;
+
+  Type* RetTy = F->getReturnType();  
+  const bool IsProtected = isProtected(F);
+  const bool ShouldPromote = !IsProtected && shouldPromote(RetTy);
+
+  Argument* RetArg = NULL;
+  if(ShouldPromote) {
+    RetTy = promoteType(RetTy);
+    RetTy = PointerType::get(RetTy, 0);
+    
+    // first create the new argument
+    RetArg = new Argument(RetTy, "");
+    F->getArgumentList().insert(F->arg_begin(), RetArg);
+
+    // then shift the existing attrs:
+    AttributeSet Attrs = F->getAttributes();
+    AttributeSet NewAttrs;
+    for(unsigned i = 0; i < Attrs.getNumSlots(); ++i) {
+      unsigned Index = Attrs.getSlotIndex(i);
+      AttrBuilder B(Attrs, Index);
+      if(Index != AttributeSet::FunctionIndex && Index != AttributeSet::ReturnIndex) {
+        Index += 1;
+      } else if(Index == AttributeSet::ReturnIndex) {
+        AttributeSet RetSet = AttributeFuncs::typeIncompatible(RetTy,
+                                                               1);
+        B.removeAttributes(RetSet, 1);
+        NewAttrs = NewAttrs.addAttributes(F->getContext(),
+                                          1,
+                                          AttributeSet::get(F->getContext(), 1, B));
+
+        // now the new return attrs:
+        B = AttrBuilder(Attrs, 0);
+        RetSet = AttributeFuncs::typeIncompatible(Type::getVoidTy(F->getContext()),
+                                                  0);
+        B.removeAttributes(RetSet, 0);
+        NewAttrs = NewAttrs.addAttributes(F->getContext(),
+                                          0,
+                                          AttributeSet::get(F->getContext(), 0, B));
+        continue;
+      } else { /* function index */ }
+      NewAttrs = NewAttrs.addAttributes(F->getContext(),
+                                        Index,
+                                        AttributeSet::get(F->getContext(), Index, B));
+    }
+    Type* OldTy = F->Value::getType();
+    Type* NewTy = promoteType(OldTy);
+    F->mutateType(NewTy);
+
+    F->setAttributes(NewAttrs);
+    F->addAttribute(1, Attribute::StructRet);
+  } else {
+    std::vector<Type*> Args;
+    Args.reserve(F->getFunctionType()->getNumParams());
+
+    for(unsigned j = 0; j < F->getFunctionType()->getNumParams(); ++j) {
+      Type* OldTy2 = F->getFunctionType()->getParamType(j);
+      Type* NewTy2 = promoteType(OldTy2);
+      Args.push_back(NewTy2);
+    }
+
+    FunctionType* FTy = FunctionType::get(promoteType(F->getReturnType(), true),
+                                          Args, F->isVarArg());
+    F->mutateType(FTy->getPointerTo());
+  }
+
+  const Function::arg_iterator arg_end = F->arg_end();
+  for(Function::arg_iterator i = F->arg_begin(); i != arg_end; ++i) {
+    Type* OldTy = i->getType();
+    Type* NewTy = promoteType(OldTy);
+    i->mutateType(NewTy);
+  }
+
+  const Function::iterator end = F->end();
+  for(Function::iterator i = F->begin(); i != end; ++i) {
+    BasicBlock::iterator end = i->end();
+    for(BasicBlock::iterator j = i->begin(); j != end;) {
+      if(isa<CallInst>(*j)) {
+        CallInst* Call = cast<CallInst>(j++);
+        promoteCallInst<CallInst>(Call);
+      } else if(isa<InvokeInst>(*j)) {
+        InvokeInst* Invoke = cast<InvokeInst>(j++);
+        promoteCallInst<InvokeInst>(Invoke);
+      } else {
+        promoteOperands(j);
+        Type* Ty = j->getType();
+        Type* NewTy = promoteType(Ty);
+        j->mutateType(NewTy);
+        ++j;
+      }
+    }
+    
+    TerminatorInst* Terminator = i->getTerminator();
+    if(isa<ReturnInst>(Terminator)) {
+      LLVMContext& C = F->getContext();
+      ReturnInst* Ret = cast<ReturnInst>(Terminator);
+      Value* RetVal = Ret->getReturnValue();
+      if(ShouldPromote) {
+        StoreInst* Store = CopyDebug(new StoreInst(RetVal, RetArg, Ret), Terminator);
+        Store->setAlignment(F->getParamAlignment(1));
+        CopyDebug(ReturnInst::Create(C, Ret->getParent()), Terminator);
+        Ret->dropAllReferences();
+        Ret->eraseFromParent();
+      } else if(isVoidPtrTy(Terminator->getType())) {
+        Type* NewTy = promoteType(Terminator->getType(), true);
+        BitCastInst* BitCast = CopyDebug(new BitCastInst(RetVal, NewTy, "", Ret), Ret);
+        CopyDebug(ReturnInst::Create(C, BitCast, Ret->getParent()), Ret);
+        Ret->dropAllReferences();
+        Ret->eraseFromParent();
+      }
+    }
+  }
+}
+void PromoteReturnedStructs::promoteGlobalVariable(GlobalVariable* G) {
+  Type* OriginalTy = G->getType();
+  Type* PromotedTy = promoteType(OriginalTy);
+  G->mutateType(PromotedTy);
+  if(G->hasInitializer()) {
+    Constant* OldC = G->getInitializer();
+    Constant* NewC = promoteConstant(OldC);
+    G->setInitializer(NewC);
+  }
+}
+Constant* PromoteReturnedStructs::promoteConstant(Constant* C) {
+  if(isa<GlobalValue>(C)) {
+    GlobalValue* V = cast<GlobalValue>(C);
+    promoteGlobal(V);
+    return C;
+  }
+
+  std::pair<const_iterator, bool> i
+    = m_consts.insert(std::make_pair(C, (Constant*)NULL));
+  // If i.first->second is NULL, we've encountered a recursion.
+  // See the comment in the first branch.
+  if(!i.second && i.first->second != NULL) {
+    return i.first->second;
+  }
+
+  Constant*& NewC = i.first->second;
+  if(isa<ConstantExpr>(C) ||
+     isa<ConstantStruct>(C) ||
+     isa<ConstantArray>(C)) {
+    std::vector<Constant*> Consts;
+    Consts.reserve(C->getNumOperands());
+    const User::value_op_iterator end = C->value_op_end();
+    for(User::value_op_iterator i = C->value_op_begin(); i != end; ++i) {
+      Constant* OldC2 = cast<Constant>(*i);
+      Constant* NewC2 = promoteConstant(OldC2);
+
+      // the promotion of one of the operands caused us to circle back around to this const.
+      // the only way this can happen is through a global, which means the second time around
+      // would skip the global causing the recursion, allowing the promotion to finish.
+      // if all that happens, our reference into the map will reflect the promotion,
+      // NewC != NULL, and we can just return.
+      if(NewC != NULL)
+        return NewC;
+
+      Consts.push_back(NewC2);
+    }
+    
+    Type* Ty = C->getType();
+    Type* NewTy = promoteType(Ty);
+    if(ConstantExpr* CE = dyn_cast<ConstantExpr>(C)) {
+      NewC = CE->getWithOperands(Consts, NewTy);
+    } else if(isa<ConstantStruct>(C)) {
+      StructType* STy = cast<StructType>(NewTy);
+      NewC = ConstantStruct::get(STy, Consts);
+    } else if(isa<ConstantArray>(C)) {
+      ArrayType* ATy = cast<ArrayType>(NewTy);
+      NewC = ConstantArray::get(ATy, Consts);
+    }
+  } else if(isa<UndefValue>(C)) {
+    NewC = UndefValue::get(promoteType(C->getType()));
+  } else if(isa<ConstantPointerNull>(C)) {
+    Type* OldTy = C->getType();
+    PointerType* NewTy = cast<PointerType>(promoteType(OldTy));
+    NewC = ConstantPointerNull::get(NewTy);
+  } else if(isa<ConstantAggregateZero>(C)) {
+    NewC = ConstantAggregateZero::get(promoteType(C->getType()));
+  } else if(isa<ConstantDataArray>(C)) {
+    NewC = C;
+  } else {
+    assert(!shouldPromote(C->getType()));
+    NewC = C;
+  }
+
+  assert(NewC != NULL);
+  if(C != NewC)
+    m_consts.insert(std::make_pair(NewC, NewC)).second;
+  return NewC;
+}
+void PromoteReturnedStructs::promoteOperands(User* U) {
+  unsigned pos = 0;
+  const User::value_op_iterator end = U->value_op_end();
+  for(User::value_op_iterator k = U->value_op_begin(); k != end; ++k, ++pos) {
+    Value* V = *k;
+    U->setOperand(pos, promoteOperand(V));
+  }
+}
+Value* PromoteReturnedStructs::promoteOperand(Value* V) {
+  if(isa<Constant>(V)) {
+    Constant* C = cast<Constant>(V);
+    Constant* NewC = promoteConstant(C);
+    return NewC;
+  } else
+    return V;
+}
+void PromoteReturnedStructs::promoteGlobal(GlobalValue* V) {
+  if(m_globals.insert(V).second) {
+    if(isa<Function>(V)) {
+      Function* F = cast<Function>(V);
+      promoteFunction(F);
+      ++ActualPromotedFunctions1;
+    } else if(isa<GlobalVariable>(V)) {
+      GlobalVariable* G = cast<GlobalVariable>(V);
+      promoteGlobalVariable(G);
+    }
+  }
+}
+
+template <class T>
+void PromoteReturnedStructs::promoteCallInst(T* Inst) {
+  Value* Called = Inst->getCalledValue();
+  if(isa<Function>(Called)) {
+    Function* F = cast<Function>(Called);
+    if(F->isIntrinsic())
+      return;
+    else if(isProtected(F) || isVoidPtrTy(Inst->getType())) {
+      promoteCallArgs(Inst);
+      return;
+    }
+  }
+
+  if(isa<GlobalValue>(Called)) {
+    GlobalValue* G = cast<GlobalValue>(Called);
+    promoteGlobal(G);
+  }
+  FunctionType* FTy = cast<FunctionType>(Called->getType()->getContainedType(0));
+  Type* InstTy = Inst->getType();
+  if(shouldPromote(Inst->getType())) {
+    Type* AllocaTy = promoteType(InstTy);
+    AllocaInst* Alloca = CopyDebug(new AllocaInst(AllocaTy, NULL, "", Inst), Inst);
+    std::vector<Value*> Args;
+    Args.reserve(FTy->getNumParams() + 1);
+    Args.push_back(Alloca);
+    for(unsigned i = 0; i < Inst->getNumArgOperands(); ++i) {
+      Value* V = Inst->getArgOperand(i);
+      if(isa<Constant>(V)) {
+        Constant* C = cast<Constant>(V);
+        V = promoteConstant(C);
+      }
+      Args.push_back(V);
+    }
+    Value* BaseCall = NULL;
+    // Note we don't set the calling convention;
+    // PNaCl just overrides it anyway.
+    if(isa<CallInst>(Inst)) {
+      CallInst* OldCall = cast<CallInst>(Inst);
+      CallInst* Call =  CopyDebug(CallInst::Create(Called, Args, "", Inst), Inst);
+      if(OldCall->canReturnTwice())
+        Call->setCanReturnTwice();
+      if(OldCall->cannotDuplicate())
+        Call->setCannotDuplicate();
+      BaseCall = Call;
+    }
+    else if(isa<InvokeInst>(Inst)) {
+      // should optimize to a no-op.
+      InvokeInst* Inst = cast<InvokeInst>(Inst);
+      BaseCall = CopyDebug(InvokeInst::Create(Called,
+                                              Inst->getNormalDest(),
+                                              Inst->getUnwindDest(),
+                                              Args,
+                                              "",
+                                              Inst),
+                           Inst);
+    }
+    T* Call = cast<T>(BaseCall);
+    if(Inst->doesNotThrow())
+      Call->setDoesNotThrow();
+    if(Inst->isNoInline())
+      Call->setIsNoInline();
+    if(Inst->doesNotAccessMemory())
+      Call->setDoesNotAccessMemory();
+    if(Inst->doesNotReturn())
+      Call->setDoesNotReturn();
+    if(Inst->onlyReadsMemory())
+      Call->setOnlyReadsMemory();
+
+    LoadInst* Ret;
+    if(Call->getNextNode() != NULL)
+      Ret = new LoadInst(Alloca,
+                         "",
+                         Call->getNextNode());
+    else
+      Ret = new LoadInst(Alloca,
+                         "",
+                         Call->getParent());
+    
+    CopyDebug(Ret, Call);
+
+    if(isa<Function>(Called))
+      Ret->setAlignment(cast<Function>(Called)->getParamAlignment(1));
+    Inst->mutateType(AllocaTy);
+    Inst->replaceAllUsesWith(Ret);
+    Inst->dropAllReferences();
+    Inst->eraseFromParent();
+  } else {
+    promoteCallArgs(Inst);
+  }
+}
+template <class T>
+void PromoteReturnedStructs::promoteCallArgs(T* Inst) {
+  unsigned end = Inst->getNumArgOperands();
+  for(unsigned k = 0; k < end; ++k) {
+    Value* V = Inst->getArgOperand(k);
+    Value* NewV = promoteOperand(V);
+    Inst->setArgOperand(k, NewV);
+  }
+  Type* NewTy = promoteType(Inst->getType(), true);
+  Inst->mutateType(NewTy);
+}
+bool PromoteReturnedStructs::runOnModule(Module& M) {
+  // I'mma leave this here; it should get optimized away anyhow.
+  size_t Promoted = 0;
+  {
+    const Module::iterator end = M.end();
+    for(Module::iterator i = M.begin(); i != end; ++i) {
+      promoteGlobal(i);
+      Promoted++;
+    }
+  }
+  {
+    const Module::global_iterator end = M.global_end();
+    for(Module::global_iterator i = M.global_begin(); i != end; ++i) {
+      promoteGlobal(i);
+    }
+  }
+  // remove dangling consts:
+  {
+    const const_iterator end = m_consts.end();
+    for(const_iterator i = m_consts.begin(); i != end; ++i) {
+      (*i).second->removeDeadConstantUsers();
+    }
+  }
+  {
+    const gv_iterator end = m_globals.end();
+    for(gv_iterator i = m_globals.begin(); i != end; ++i) {
+      (*i)->removeDeadConstantUsers();
+    }
+  }
+
+  m_globals.clear();
+  m_types.clear();
+  m_consts.clear();
+  return true;
+}
+
+ModulePass *llvm::createPromoteReturnedStructsPass() {
+  return new PromoteReturnedStructs();
+}
diff --git a/lib/Transforms/NaCl/PromoteSimpleStructs.cpp b/lib/Transforms/NaCl/PromoteSimpleStructs.cpp
new file mode 100644
index 000000000000..9581ed722aae
--- /dev/null
+++ b/lib/Transforms/NaCl/PromoteSimpleStructs.cpp
@@ -0,0 +1,995 @@
+//===- PromoteSimpleStructs.cpp - Expand out structs with a single element-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
+
+
+#include "llvm/Pass.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/Transforms/NaCl.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include <iostream>
+#include <set>
+
+using namespace llvm;
+
+struct PromoteSimpleStructs : public ModulePass {
+  static char ID;
+  PromoteSimpleStructs() : ModulePass(ID) {
+    initializePromoteSimpleStructsPass(*PassRegistry::getPassRegistry());
+  }
+    
+  struct ConversionState {
+    typedef std::set<Instruction*>::iterator iterator;
+    std::set<Instruction*> m_replacements;
+    PromoteSimpleStructs* m_p;
+
+    Function* m_f;
+
+    void convertOperands(User* From);
+
+    void recordConverted(Instruction* I);
+    void eraseConverted(Instruction* I);
+    size_t convertedSize();
+      
+    Value* get(Value* From, Type** OldTy = NULL);
+
+    void convertBlock(BasicBlock* Bb);
+    Value* convertInstruction(Instruction* Inst);
+    Value* convertGEPInstruction(GetElementPtrInst* Inst,
+                                 Type* OriginalTy,
+                                 Type* PromotedTy,
+                                 Value* PointerOp,
+                                 Type* PointerOpOriginalTy);
+    Value* convertEVOrIVInstruction(Instruction* Inst,
+                                    Type* OriginalTy,
+                                    Type* PromotedTy,
+                                    Value* AggOp,
+                                    Type* AggOpOriginalTy);
+    template <class T> void convertCall(T* Call);
+    void possiblyConvertUsers(Instruction* Inst, Value* Replacement, Type* OriginalTy);
+
+    ConversionState() {}
+    ConversionState(PromoteSimpleStructs* P, Function* F) 
+      : m_p(P), m_f(F) {
+      const Function::arg_iterator end = F->arg_end();
+      for(Function::arg_iterator i = F->arg_begin(); i != end; ++i) {
+        Type* OriginalTy = i->getType();
+        Type* PromotedTy = m_p->getPromotedType(OriginalTy);
+        m_p->mutateAndReplace(i, i, OriginalTy, PromotedTy);
+      }
+    }
+    ~ConversionState() {
+      const PromoteSimpleStructs::origin_ty_iterator end = m_p->m_original_types.end();
+      for(PromoteSimpleStructs::origin_ty_iterator i = m_p->m_original_types.begin();
+          i != end;) {
+        if(isa<Instruction>(i->first) || isa<Argument>(i->first))
+          m_p->m_original_types.erase(i++);
+        else
+          ++i;
+      }
+    }
+  };
+  
+  Module* m_module;
+  typedef std::set<GlobalValue*>::iterator iterator;
+  typedef std::map<Type*, Type*>::iterator ty_iterator;
+  typedef std::map<Value*, Type*>::iterator origin_ty_iterator;
+  typedef std::map<Constant*, Constant*>::iterator const_iterator;
+
+  std::set<GlobalValue*> m_promoted;
+  std::map<Type*, Type*> m_promoted_types;
+  std::map<Constant*, Constant*> m_promoted_consts;
+  std::map<Value*, Type*> m_original_types;
+  std::stack<Constant*> m_delayed;
+
+#ifndef NDEBUG
+  void debug_print_all_original_types();
+#endif
+
+  GlobalValue* getPromoted(GlobalValue* F);
+  Type* getPromotedType(Type* T);
+  Type* getPromotedTypeImpl(Type* T);
+  static bool shouldPromote(Type* T) {
+    std::set<Type*> chain;
+    return shouldPromote(T, chain);
+  }
+  static bool shouldPromote(Type* T, std::set<Type*>& Chain);
+  static bool shouldPromote(Function* F);
+  inline static bool isShallowPromotable(Type* T) {
+    return (isa<StructType>(T) && cast<StructType>(T)->getNumElements() == 1) ||
+      (isa<ArrayType>(T)       && cast<ArrayType>(T)->getNumElements() == 1) ||
+      (isa<PointerType>(T)     && isShallowPromotable(T->getContainedType(0)));
+  }
+  inline Constant* getPromotedConstant(Use* U) {
+    return getPromotedConstant(cast<Constant>(U));
+  }
+  Constant* getPromotedConstant(Constant* C);
+
+  Type* getOriginalType(Value* V);
+  void recordOriginalType(Value* NewV, Type* OldTy);
+  void eraseOriginalType(Value* V) {
+    origin_ty_iterator i = m_original_types.find(V);
+    assert(i != m_original_types.end());
+    m_original_types.erase(i);
+  }
+  void mutateAndReplace(Value* OldV, Value* NewV, Type* OldT, Type* NewT);
+
+  void promoteGlobal(GlobalVariable* G);
+  Function* promoteFunction(Function& F, const bool PrototypeOnly = false);
+
+  bool isAggregateType(Type* T);
+
+  bool runOnModule(Module& M);
+};
+char PromoteSimpleStructs::ID = 0;
+INITIALIZE_PASS(PromoteSimpleStructs,
+                "promote-simple-structs",
+                "Promote out structs with a single element", 
+                false, 
+                false)
+
+template <class T> 
+const std::string ToStr(const T &V) {
+  std::string S;
+  raw_string_ostream OS(S);
+  OS << const_cast<T &>(V);
+  return OS.str();
+}
+
+#ifndef NDEBUG
+// for use in gdb
+void debug_printf(Value* V) {
+  std::cout << ToStr(*V) << std::endl;
+}
+void debug_printf(Type* T) {
+  std::cout << ToStr(*T) << std::endl;
+}
+
+void debug_collect_all_subtypes(Type* T, std::set<Type*>& Collection);
+void debug_printf_all_subtypes(Type* T) {
+  std::set<Type*> C;
+  debug_collect_all_subtypes(T, C);
+  const std::set<Type*>::iterator end = C.end();
+  for(std::set<Type*>::iterator i = C.begin(); i != end; ++i) {
+    debug_printf(*i);
+  }
+}
+void debug_collect_all_subtypes(Type* T, std::set<Type*>& Collection) {
+  if(Collection.count(T) > 0)
+    return;
+  else if(isa<StructType>(T)) {
+    Collection.insert(T);
+  }
+
+  for(size_t i = 0; i < T->getNumContainedTypes(); ++i) {
+    debug_collect_all_subtypes(T->getContainedType(i), Collection);
+  }
+}
+
+void PromoteSimpleStructs::debug_print_all_original_types() {
+  const origin_ty_iterator end = m_original_types.end();
+  for(origin_ty_iterator i = m_original_types.begin(); i != end; ++i) {
+    std::cout << "Value        : " << ToStr(*i->first) << std::endl;
+    std::cout << "Original type: " << ToStr(*i->second) << std::endl;
+  }
+}
+#endif
+size_t PromoteSimpleStructs::ConversionState::convertedSize() {
+  return m_replacements.size();
+}
+void PromoteSimpleStructs::ConversionState::eraseConverted(Instruction* I) {
+  iterator i = m_replacements.find(I);
+#ifndef NDEBUG
+  if(i == m_replacements.end()) {
+      errs() << "Value: " << ToStr(*I) << "\n";
+      assert(i != m_replacements.end() && "Value not converted!");
+      llvm_unreachable("Value not converted!");
+  }
+#endif
+  m_replacements.erase(i);
+}
+void PromoteSimpleStructs::recordOriginalType(Value* NewV, Type* OldTy) {
+  std::pair<origin_ty_iterator, bool> R = m_original_types.insert(std::make_pair(NewV, OldTy));
+  if(!R.second && R.first->second != OldTy) {
+    errs() << "New value    : " << ToStr(*NewV) << "\n";
+    errs() << "Original type: " << ToStr(*R.first->second) << "\n";
+    errs() << "Old type     : " << ToStr(*OldTy) << "\n";
+    assert(0 && "Value already promoted!");
+    llvm_unreachable("Value already promoted!");
+  }
+}
+void PromoteSimpleStructs::mutateAndReplace(Value* OldV, Value* NewV, Type* OldT, Type* NewT) {
+  recordOriginalType(NewV, OldT);
+
+  if(OldT != NewT)
+    OldV->mutateType(NewT);
+  if(OldV != NewV) {
+    OldV->replaceAllUsesWith(NewV);
+    if(isa<Instruction>(OldV))
+      cast<Instruction>(OldV)->eraseFromParent();
+  }
+}
+
+GlobalValue* PromoteSimpleStructs::getPromoted(GlobalValue* F) {
+  if(!m_promoted.count(F)) {
+    if(isa<Function>(F)) {
+      promoteFunction(*cast<Function>(F), true);
+    } else if(isa<GlobalVariable>(F)) {
+      promoteGlobal(cast<GlobalVariable>(F));
+    }
+  }
+  return F;
+}
+Type* PromoteSimpleStructs::getPromotedType(Type* T) {
+  const ty_iterator i = m_promoted_types.find(T);
+  if(i == m_promoted_types.end()) {
+    Type* NewT = getPromotedTypeImpl(T);
+    m_promoted_types.insert(std::make_pair(T, NewT));
+    m_promoted_types.insert(std::make_pair(NewT, NewT));
+    return NewT;
+  }
+  else
+    return i->second;
+}
+Type* PromoteSimpleStructs::getOriginalType(Value* V) {
+  if(isa<Constant>(V) && !isa<GlobalValue>(V))
+    return V->getType();
+
+  origin_ty_iterator i = m_original_types.find(V);
+  if(i == m_original_types.end()) {
+      errs() << "Value: " << ToStr(*V) << "\n";
+#ifndef NDEBUG
+      assert(0 && "Couldn't find the original type!");
+#endif
+      llvm_unreachable("Couldn't find the original type!");
+  } else
+    return i->second;
+}
+
+Type* PromoteSimpleStructs::getPromotedTypeImpl(Type* T) {
+  if(!shouldPromote(T))
+    return T;
+
+  if(FunctionType* FT = dyn_cast<FunctionType>(T)) {
+    Type* RetT;
+    RetT = getPromotedType(FT->getReturnType());
+
+    std::vector<Type*> ArgTs;
+    ArgTs.reserve(FT->getNumParams());
+  
+    const FunctionType::param_iterator i_end = FT->param_end();
+    for(FunctionType::param_iterator i = FT->param_begin(); i != i_end; ++i) {
+        ArgTs.push_back(getPromotedType(*i));
+    }
+
+    return FunctionType::get(RetT, ArgTs, FT->isVarArg());
+  } else if(PointerType* PT = dyn_cast<PointerType>(T)) {
+    Type* InnerInnerTy = PT->getElementType();
+    if(shouldPromote(InnerInnerTy))
+      return PointerType::get(getPromotedType(InnerInnerTy), 0);
+    else
+      return T;
+  } else if(StructType* ST = dyn_cast<StructType>(T)) {
+    if(ST->getNumElements() == 0)
+      return T;
+    if(ST->getNumElements() == 1)
+      return getPromotedType(ST->getElementType(0));
+
+    StructType* Struct;
+    if(ST->hasName())
+      Struct = StructType::create(T->getContext(), ST->getName());
+    else
+      Struct = StructType::create(T->getContext());
+    // This is a requisite for recursive structures.
+    m_promoted_types[T] = Struct;
+
+    std::vector<Type*> ArgTs;
+    ArgTs.reserve(ST->getNumElements());
+
+    const StructType::element_iterator end = ST->element_end();
+    for(StructType::element_iterator i = ST->element_begin(); i != end; ++i) {
+      ArgTs.push_back(getPromotedType(*i));
+    }
+    Struct->setBody(ArgTs);
+  
+    m_promoted_types.erase(T);
+
+    return Struct;
+  } else if(ArrayType* AT = dyn_cast<ArrayType>(T)) {
+    if(AT->getNumElements() == 1)
+      return getPromotedType(AT->getElementType());
+
+    if(shouldPromote(AT->getElementType()))
+      return ArrayType::get(getPromotedType(AT->getElementType()), AT->getNumElements());
+    else
+      return T;
+  } else {
+    return T;
+  }
+}
+
+struct PromotionChainJanitor {
+  std::set<Type*>& chain;
+  Type* t;
+  PromotionChainJanitor(Type* T, std::set<Type*>& Chain) 
+    : chain(Chain)
+    , t(T) {
+    assert(chain.count(T) == 0);
+    chain.insert(T);
+  }
+  ~PromotionChainJanitor() {
+    assert(chain.count(t) != 0);
+    chain.erase(t);
+  }
+};
+
+bool PromoteSimpleStructs::shouldPromote(Type* T, std::set<Type*>& Chain) {
+  assert(T != NULL);
+  if(Chain.count(T) > 0)
+    return false;
+
+  PromotionChainJanitor cleanup(T, Chain);
+
+  if(isa<FunctionType>(T)) {
+    FunctionType* FT = cast<FunctionType>(T);
+    if(shouldPromote(FT->getReturnType(), Chain))
+      return true;
+      
+    const FunctionType::param_iterator end = FT->param_end();
+    for(FunctionType::param_iterator i = FT->param_begin(); i != end; ++i) {
+      if(shouldPromote(*i, Chain))
+        return true;
+    }
+
+    return false;
+  } else if(isa<StructType>(T)) {
+    StructType* ST = cast<StructType>(T);
+    if(ST->getNumElements() == 1)
+      return true;
+
+    const StructType::element_iterator end = ST->element_end();
+    for(StructType::element_iterator i = ST->element_begin(); i != end; ++i) {
+      // short cut for recursive structures
+      if(shouldPromote(*i, Chain))
+        return true;
+    }
+    return false;
+  }
+
+  return (isa<PointerType>(T) && shouldPromote(cast<PointerType>(T)->getElementType(), Chain)) ||
+    (isa<ArrayType>(T)        && (cast<ArrayType>(T)->getNumElements() == 1 ||
+                                  shouldPromote(T->getContainedType(0), Chain)));
+}
+bool PromoteSimpleStructs::shouldPromote(Function* F) {
+  return F && shouldPromote(F->getFunctionType());
+}
+bool PromoteSimpleStructs::isAggregateType(Type* T) {
+  return T && (isa<StructType>(T) || isa<ArrayType>(T));
+}
+
+void PromoteSimpleStructs::ConversionState::recordConverted(Instruction* I) {
+  if(!I) {
+    errs() << __FUNCTION__ << ":\n";
+    assert(0);
+    llvm_unreachable("I is NULL");
+  }
+  const bool result = m_replacements.insert(I).second;
+  assert(result && "Instruction already patched!");
+  (void)result;
+}
+Value* PromoteSimpleStructs::ConversionState::get(Value* From, Type** OldTy) {
+  if(isa<Argument>(From)) {
+    if(OldTy != NULL)
+        *OldTy = m_p->getOriginalType(From);
+    return From;
+  } else if(isa<Instruction>(From)) {
+    Instruction* Inst = cast<Instruction>(From);
+    if(m_replacements.count(Inst)) {
+      if(OldTy != NULL)
+        *OldTy = m_p->getOriginalType(From);
+      return From;
+    } else {
+      if(OldTy != NULL)
+        *OldTy = Inst->getType();
+      return convertInstruction(Inst);
+    }
+  } else if(isa<GlobalValue>(From)) {
+    Value* Promoted = m_p->getPromoted(cast<GlobalValue>(From));
+    if(OldTy != NULL)
+      *OldTy = m_p->getOriginalType(From);
+    return Promoted;
+  } else if(isa<Constant>(From)) {
+    if(OldTy != NULL)
+      *OldTy = From->getType();
+    return m_p->getPromotedConstant(cast<Constant>(From));
+  } else if(isa<MDNode>(From) || isa<MDString>(From) || isa<BasicBlock>(From)) {
+    if(OldTy != NULL)
+      *OldTy = From->getType();
+    return From;
+  }
+
+  assert(0 && "Unhandled case!");
+  llvm_unreachable("Unhandled case!");
+}
+
+void PromoteSimpleStructs::ConversionState::convertOperands(User* From) {
+  unsigned j = 0;
+  const User::op_iterator end = From->op_end();
+  for(User::op_iterator i = From->op_begin(); i != end; ++i, ++j) {
+    Value* R = get(*i);
+    // sometimes constant prop will short circuit an expression
+    // possibly yielding a global var; hence check the old operand
+    // for global var-ness.
+    if(isa<Constant>(R) && !isa<GlobalValue>(*i))
+      From->setOperand(j, R);
+  }
+}
+
+void PromoteSimpleStructs::ConversionState::convertBlock(BasicBlock* Bb) {
+  const BasicBlock::iterator j_end = Bb->end();
+  for(BasicBlock::iterator j = Bb->begin(); j != j_end;) {
+    Instruction* Inst = cast<Instruction>(&*(j++));
+    if(&*j == NULL) {
+      // restart if our iterator was invalidated.
+      // this is Okay because m_replacements will still hold
+      // the state on whats converted and whats not,
+      // though the inevitable waste is a bit unfortunate.
+      j = Bb->begin();
+      continue;
+    }
+
+    if(!m_replacements.count(Inst))
+      convertInstruction(Inst);
+  } // basicblock
+}
+void PromoteSimpleStructs::ConversionState::possiblyConvertUsers(Instruction* Inst,
+                                                                 Value* Replacement,
+                                                                 Type* OriginalTy) {
+  const Value::use_iterator end = Inst->use_end();
+  for(Value::use_iterator i = Inst->use_begin(); i != end;) {
+    if(!isa<Instruction>(*i)) {
+      ++i;
+      continue;
+    } else if(m_replacements.count(cast<Instruction>(*i))) {
+      ++i;
+      continue;
+    } else if(isa<GetElementPtrInst>(*i)) {
+      GetElementPtrInst* GEP = cast<GetElementPtrInst>(*i++);
+      if(GEP->getPointerOperand() != Inst)
+        continue;
+
+      Type* GEPOriginalTy = GEP->getType();
+      Type* GEPPromotedTy = m_p->getPromotedType(GEPOriginalTy);
+
+      recordConverted(GEP);
+      convertGEPInstruction(GEP, GEPOriginalTy, GEPPromotedTy, Replacement, OriginalTy);
+      eraseConverted(GEP);
+    } else if(isa<ExtractValueInst>(*i)) {
+      ExtractValueInst* EV = cast<ExtractValueInst>(*i++);
+      if(EV->getAggregateOperand() != Inst)
+        continue;
+
+      Type* EVOriginalTy = EV->getType();
+      Type* EVPromotedTy = m_p->getPromotedType(EVOriginalTy);
+
+      Value* Converted;
+      recordConverted(EV);
+      Converted = convertEVOrIVInstruction(EV,
+                                           EVOriginalTy,
+                                           EVPromotedTy,
+                                           Replacement,
+                                           OriginalTy);
+      if(Converted != EV)
+        eraseConverted(EV);
+
+    } else if(isa<InsertValueInst>(*i)) {
+      InsertValueInst* IV = cast<InsertValueInst>(*i++);
+      if(IV->getAggregateOperand() != Inst)
+        continue;
+
+      Type* IVOriginalTy = IV->getType();
+      Type* IVPromotedTy = m_p->getPromotedType(IVOriginalTy);
+      
+      Value* Converted;
+
+      recordConverted(IV);
+      Converted = convertEVOrIVInstruction(IV,
+                                           IVOriginalTy,
+                                           IVPromotedTy,
+                                           Replacement,
+                                           OriginalTy);
+      if(Converted != IV)
+        eraseConverted(IV);
+    } else
+      ++i;
+  }
+}
+Value* PromoteSimpleStructs::ConversionState::convertEVOrIVInstruction(Instruction* I,
+                                                                       Type* OriginalTy,
+                                                                       Type* PromotedTy,
+                                                                       Value* AggOp,
+                                                                       Type* AggOpOriginalTy) {
+  ArrayRef<unsigned> Indices;
+
+  if(isa<ExtractValueInst>(I))
+    Indices = cast<ExtractValueInst>(I)->getIndices();
+  else if(isa<InsertValueInst>(I))
+    Indices = cast<InsertValueInst>(I)->getIndices();
+      
+  std::vector<unsigned> NewIndices;
+  std::vector<unsigned> OldIndices;
+  OldIndices.reserve(Indices.size());
+  NewIndices.reserve(Indices.size());
+
+  const size_t end = Indices.size();
+  for(size_t i = 0; i < end; ++i) {
+    unsigned Idx = Indices[i];
+    Type* Ty = ExtractValueInst::getIndexedType(AggOpOriginalTy, OldIndices);
+    if(!isShallowPromotable(Ty))
+      NewIndices.push_back(Idx);
+    OldIndices.push_back(Idx);
+  }
+  Value* Converted;
+  if(NewIndices.size() == 0) {
+    if(isa<ExtractValueInst>(I))
+      Converted = AggOp;
+    else if(isa<InsertValueInst>(I))
+      Converted = get(cast<InsertValueInst>(I)->getInsertedValueOperand());
+
+    m_p->recordOriginalType(I, OriginalTy);
+    I->mutateType(PromotedTy);
+    possiblyConvertUsers(I, Converted, OriginalTy);
+    m_p->eraseOriginalType(I);
+    I->replaceAllUsesWith(Converted);
+    I->eraseFromParent();
+  } else if(NewIndices.size() == OldIndices.size()) {
+    if(isa<ExtractValueInst>(I))
+      I->setOperand(ExtractValueInst::getAggregateOperandIndex(), AggOp);
+    else if(isa<InsertValueInst>(I)) {
+      I->setOperand(InsertValueInst::getAggregateOperandIndex(), AggOp);
+      I->setOperand(InsertValueInst::getInsertedValueOperandIndex(),
+                    get(cast<InsertValueInst>(I)->getInsertedValueOperand()));
+    }
+    Converted = I;
+    m_p->mutateAndReplace(I, I, OriginalTy, PromotedTy);
+  } else {
+    Instruction* NewI;
+    if(isa<ExtractValueInst>(I))
+      NewI = CopyDebug(ExtractValueInst::Create(AggOp,
+                                                NewIndices,
+                                                "",
+                                                I),
+                       I);
+    else if(isa<InsertValueInst>(I))
+      NewI =
+        CopyDebug(InsertValueInst::Create(AggOp,
+                                          get(cast<InsertValueInst>(I)->getInsertedValueOperand()),
+                                          NewIndices,
+                                          "",
+                                          I),
+                  I);
+
+    recordConverted(NewI);
+    Converted = NewI;
+    m_p->mutateAndReplace(I, NewI, OriginalTy, PromotedTy);
+  }
+  return Converted;
+}
+Value* PromoteSimpleStructs::ConversionState::convertGEPInstruction(GetElementPtrInst* Inst,
+                                                                    Type* OriginalTy,
+                                                                    Type* PromotedTy,
+                                                                    Value* PointerOp,
+                                                                    Type* PointerOpOriginalTy) {
+  std::vector<Value*> OldIndices;
+  std::vector<Value*> NewIndices;
+  OldIndices.reserve(Inst->getNumIndices());
+  NewIndices.reserve(Inst->getNumIndices());
+
+  bool SkipNext = false;
+
+  const User::op_iterator end = Inst->idx_end();
+  for(User::op_iterator i = Inst->idx_begin(); i != end; ++i) {
+    assert(isa<Value>(*i) && "Woa. Internal error.");
+
+    if(!SkipNext) {
+      NewIndices.push_back(get(*i));
+    } else {
+      SkipNext = false;
+    }
+      
+    OldIndices.push_back(cast<Value>(*i));
+
+    Type* T = GetElementPtrInst::getIndexedType(PointerOpOriginalTy,
+                                                OldIndices);
+    if(isShallowPromotable(T)) {
+      SkipNext = true;
+    }
+  }
+  Value* Converted;
+  if(NewIndices.size() != 0) {
+    GetElementPtrInst* GEPI = GetElementPtrInst::Create(PointerOp,
+                                                        NewIndices,
+                                                        Inst->getName(),
+                                                        Inst);
+    GEPI->setIsInBounds(Inst->isInBounds());
+    CopyDebug(Inst, GEPI);
+    Converted = GEPI;
+    recordConverted(GEPI);
+    m_p->mutateAndReplace(Inst, GEPI, OriginalTy, PromotedTy);
+  } else {
+    assert(0 && "Invalid GEPi");
+    errs() << "GEPi: " << ToStr(*Inst) << "\n";
+    report_fatal_error("Invalid GEPi");
+    /*Inst->mutateType(PointerOp->getType());
+    possiblyConvertUsers(Inst, PointerOp, Inst->getType());
+    Inst->replaceAllUsesWith(PointerOp);
+    Inst->eraseFromParent();
+    Converted = PointerOp;*/
+  }
+  return Converted;
+}
+template <class T>
+void PromoteSimpleStructs::ConversionState::convertCall(T* Call) {
+  const unsigned end = Call->getNumArgOperands();
+  for(unsigned i = 0; i < end; ++i) {
+    Value* V = Call->getArgOperand(i);
+    Value* NewV = get(V);
+    Call->setArgOperand(i, NewV);
+  }
+}
+Value* PromoteSimpleStructs::ConversionState::convertInstruction(Instruction* I) {
+  recordConverted(I);
+  Value* Converted = NULL;
+  Type* OriginalType = I->getType();
+  Type* PromotedType = m_p->getPromotedType(OriginalType);
+
+  if(isa<GetElementPtrInst>(I)) {
+    GetElementPtrInst* Inst = cast<GetElementPtrInst>(I);
+    Value* PointerOp = Inst->getPointerOperand();
+    Type* PointerOpOriginalTy;
+    PointerOp = get(PointerOp, &PointerOpOriginalTy);
+    Converted = convertGEPInstruction(Inst,
+                                      OriginalType,
+                                      PromotedType,
+                                      PointerOp,
+                                      PointerOpOriginalTy);
+  } else if(isa<ExtractValueInst>(I) || isa<InsertValueInst>(I)) {
+    Value* AggOp;
+    if(isa<ExtractValueInst>(I))
+      AggOp = cast<ExtractValueInst>(I)->getAggregateOperand();
+    else if(isa<InsertValueInst>(I))
+      AggOp = cast<InsertValueInst>(I)->getAggregateOperand();
+
+    Type* AggOpOriginalTy;
+    AggOp = get(AggOp, &AggOpOriginalTy);
+
+    Converted = convertEVOrIVInstruction(I,
+                                         OriginalType,
+                                         PromotedType,
+                                         AggOp,
+                                         AggOpOriginalTy);
+  } else if(isa<PHINode>(I)) {
+    PHINode* Phi = cast<PHINode>(I);
+    m_p->mutateAndReplace(I, I, OriginalType, PromotedType);
+
+    for(size_t l = 0; l < Phi->getNumIncomingValues(); ++l) {
+      Value* NewIn = get(Phi->getIncomingValue(l));
+      Phi->setIncomingValue(l, NewIn);
+    }
+    Converted = Phi;
+  } else if(isa<CallInst>(I) || isa<InvokeInst>(I)) {
+    m_p->mutateAndReplace(I, I, OriginalType, PromotedType);
+    if(isa<CallInst>(I)) {
+      CallInst* Call = cast<CallInst>(I);
+      convertCall(Call);
+    } else if(isa<InvokeInst>(I)) {
+      InvokeInst* Invoke = cast<InvokeInst>(I);
+      convertCall(Invoke);
+    }
+    Converted = I;
+  } else {
+    m_p->mutateAndReplace(I, I, OriginalType, PromotedType);
+    convertOperands(I);
+    Converted = I;
+  }
+
+  assert(Converted != NULL);
+  if(Converted != I)
+    eraseConverted(I);
+  return Converted;
+}
+Constant* PromoteSimpleStructs::getPromotedConstant(Constant* C) {
+  User::op_iterator i;
+  Type* OriginalT;
+  Type* NewT;
+
+  std::pair<const_iterator, bool> ci =
+    m_promoted_consts.insert(std::make_pair(C, (Constant*)NULL));
+  if(!ci.second) {
+    assert(ci.first->second != NULL && "Should not be null");
+    return ci.first->second;
+  }
+  Constant*& NewC = ci.first->second;
+
+  OriginalT = C->getType();
+  if(!shouldPromote(C->getType())) {
+    // constant expression still need their operands patched
+    if(!isa<ConstantExpr>(C)   &&
+       !isa<ConstantStruct>(C) &&
+       !isa<ConstantArray>(C)  &&
+       !isa<GlobalValue>(C)) {
+      // we still need to record the 'original' type:
+      recordOriginalType(C, C->getType());
+      NewC = C;
+      return C;
+    } else
+      NewT = OriginalT;
+  } else
+    NewT = getPromotedType(OriginalT);
+
+  if(isa<GlobalValue>(C)) {
+    NewC = getPromoted(cast<GlobalValue>(C));
+  } else if(isa<UndefValue>(C)) { // fast path for this common const
+    NewC = UndefValue::get(NewT);
+  } else if(isa<ConstantExpr>(C)) {
+    ConstantExpr* CE = cast<ConstantExpr>(C);
+    unsigned Code = CE->getOpcode();
+
+    i = C->op_begin();
+
+    Constant* Agg = cast<Constant>(i++);
+    Type* AggOriginalTy;
+    const bool IsGlobal = isa<GlobalValue>(Agg);
+    if(!IsGlobal)
+      AggOriginalTy = Agg->getType();
+    Agg = getPromotedConstant(Agg);
+    if(IsGlobal)
+      AggOriginalTy = getOriginalType(Agg);
+
+    if(Code == Instruction::GetElementPtr) {
+      assert(isa<PointerType>(AggOriginalTy) && "Original type isn't a pointer!");
+
+      bool SkipNext = false;
+      
+      std::vector<Constant*> NewIndices;
+      {
+        std::vector<Constant*> OldIndices;
+        OldIndices.reserve(C->getNumOperands());
+        NewIndices.reserve(C->getNumOperands());
+
+        const User::op_iterator end = C->op_end();
+        for(; i != end; ++i) {
+          Constant* C2 = cast<Constant>(*i);
+          Constant* C3 = getPromotedConstant(C2);
+
+          OldIndices.push_back(C2);
+
+          if(!SkipNext)
+            NewIndices.push_back(C3);
+          else
+            SkipNext = false;
+          
+          Type* T2 = GetElementPtrInst::getIndexedType(AggOriginalTy, OldIndices);
+          if(isShallowPromotable(T2))
+            SkipNext = true; 
+        }
+      }
+      if(NewIndices.size() != 0)
+        NewC = ConstantExpr::getGetElementPtr(Agg, NewIndices);
+      else
+        NewC = Agg;
+    } else if(CE->hasIndices()) {
+      bool SkipNext = false;
+
+      Constant* Inserted = getPromotedConstant(cast<Constant>(i++));
+
+      std::vector<unsigned> NewIndices;
+      {
+        std::vector<unsigned> OldIndices;
+        OldIndices.reserve(C->getNumOperands());
+        NewIndices.reserve(C->getNumOperands());
+
+        const ArrayRef<unsigned> Indices = CE->getIndices();
+        const size_t end = Indices.size();
+        for(size_t i = 0; i < end; ++i) {
+          unsigned Idx = Indices[i];
+
+          OldIndices.push_back(Idx);
+
+          if(!SkipNext)
+            NewIndices.push_back(Idx);
+          else
+            SkipNext = false;
+
+          Type* T2 = ExtractValueInst::getIndexedType(AggOriginalTy, OldIndices);
+          if(isShallowPromotable(T2))
+            SkipNext = true; 
+        }
+      }
+      if(Code == Instruction::ExtractValue)
+        NewC = ConstantExpr::getExtractValue(Agg, NewIndices);
+      else if(Code == Instruction::InsertValue)
+        NewC = ConstantExpr::getInsertValue(Agg,
+                                            Inserted,
+                                            NewIndices);
+    } else {
+      std::vector<Constant*> Consts;
+      Consts.reserve(C->getNumOperands());
+      const User::op_iterator end = C->op_end();
+      for(i = C->op_begin(); i != end; ++i) {
+        Constant* C2 = cast<Constant>(*i);
+        Consts.push_back(getPromotedConstant(C2));
+      }
+      NewC = CE->getWithOperands(Consts, NewT);
+    }
+  } else if(isShallowPromotable(C->getType())) {
+    if(isa<ConstantStruct>(C) || isa<ConstantArray>(C))
+      NewC = getPromotedConstant(C->getAggregateElement((unsigned)0));
+    else if(isa<ConstantAggregateZero>(C)) {
+      if(isAggregateType(NewT) || isa<VectorType>(NewT)) {
+        NewC = C;
+      } else if(isa<PointerType>(NewT)) {
+        NewC = ConstantPointerNull::get(cast<PointerType>(NewT));
+      } else if(NewT->isIntegerTy()) {
+        IntegerType* IntTy = cast<IntegerType>(NewT);
+        NewC = ConstantInt::get(IntTy, 0, IntTy->getSignBit());
+      } else if(NewT->isFloatingPointTy()) {
+        NewC = ConstantFP::get(NewT, 0.0);
+      } else {
+        errs() << "Constant: " << ToStr(*C) << "\n";
+        assert(0 && "Unhandled else");
+        llvm_unreachable("Unhandled else");
+      }
+    } else if(isa<ConstantPointerNull>(C)) {
+      NewC = ConstantPointerNull::get(cast<PointerType>(NewT));
+    } else if(isa<ConstantDataSequential>(C))
+      NewC = cast<ConstantDataSequential>(C)->getElementAsConstant(0);
+    else
+      NewC = getPromotedConstant(cast<Constant>(C->getOperand(0)));
+  } else if(isa<ConstantDataSequential>(C)) {
+    if(cast<ConstantDataSequential>(C)->getNumElements() == 1)
+      NewC = cast<ConstantDataSequential>(C)->getElementAsConstant(0);
+    else
+      NewC = C;
+  } else if(isa<ConstantStruct>(C) ||
+            isa<ConstantArray>(C)) {
+    std::vector<Constant*> Consts;
+    Consts.reserve(C->getNumOperands());
+    const User::op_iterator end = C->op_end();
+    for(i = C->op_begin(); i != end; ++i) {
+      Constant* C2 = cast<Constant>(*i);
+      Consts.push_back(getPromotedConstant(C2));
+    }
+    if(isa<ConstantStruct>(C))
+      NewC = ConstantStruct::get(cast<StructType>(NewT), Consts);
+    else if(isa<ConstantArray>(C))
+      NewC = ConstantArray::get(cast<ArrayType>(NewT), Consts);
+  } else if(isa<BlockAddress>(C)) {
+    BlockAddress* Addr = cast<BlockAddress>(C);
+
+    // make sure the function type is promoted
+    getPromoted(Addr->getFunction());
+    NewC = C;
+  } else if(isa<ConstantPointerNull>(C)) {
+    NewC = ConstantPointerNull::get(cast<PointerType>(NewT));
+  } else {
+    NewC = C;
+  }
+
+  assert(NewC != NULL && "NewC is NULL");
+  m_promoted_consts.insert(std::make_pair(NewC, NewC));
+  return NewC;
+}
+
+
+void PromoteSimpleStructs::promoteGlobal(GlobalVariable* G) {
+  std::pair<iterator, bool> result = m_promoted.insert(G);
+  if(!result.second)
+    return;
+
+  Type* OriginalTy = G->getType();
+  Type* NewTy = getPromotedType(OriginalTy);
+  mutateAndReplace(G, G, OriginalTy, NewTy);
+  if(G->hasInitializer()) {
+    Constant* Old = G->getInitializer();
+    Constant* Initer = getPromotedConstant(Old);
+    G->setInitializer(Initer);
+  }
+  
+  // sometimes we can't reach all the uses of a GV.
+  // don't ask me how such a Constant would ever be unreachable
+  // but asserts are thrown later.
+  const Value::use_iterator end = G->use_end();
+  for(Value::use_iterator i = G->use_begin(); i != end; ++i) {
+    if(isa<Constant>(*i))
+      m_delayed.push(cast<Constant>(*i));
+  }
+}
+
+static size_t ConvertedFunctions;
+
+Function* PromoteSimpleStructs::promoteFunction(Function& F, const bool PrototypeOnly) {
+  if(F.isIntrinsic()) {
+    m_promoted.insert(&F);
+    return &F;
+  }
+
+  std::pair<iterator, bool> result = m_promoted.insert(&F);
+  Type* NewTy;
+  Function* NewF;
+  if(result.second) {
+    Type* OriginalTy = F.getType();
+    NewTy = getPromotedType(OriginalTy);
+    mutateAndReplace(&F, &F, OriginalTy, NewTy);
+    NewF = &F;
+  } else {
+    NewF = &F;
+    NewTy = NewF->getType();
+  }
+
+  if(PrototypeOnly)
+    return NewF;
+
+  ConversionState State(this, &F);
+
+  const Function::iterator i_end = F.end();
+  for(Function::iterator i = F.begin(); i != i_end; ++i) {
+    State.convertBlock(&*i);
+  } // function
+
+  ConvertedFunctions++;
+
+  return NewF;
+}
+
+bool PromoteSimpleStructs::runOnModule(Module& M) {
+  m_module = &M;
+
+  const Module::iterator i_end = M.end();
+  for(Module::iterator i = M.begin(); i != i_end; ++i) {
+    promoteFunction(*i, false);
+  }
+  const Module::global_iterator j_end = M.global_end();
+  for(Module::global_iterator j = M.global_begin(); j != j_end; ++j) {
+    promoteGlobal(j);
+  }
+  
+  // remove dangling consts:
+  {
+    const const_iterator end = m_promoted_consts.end();
+    for(const_iterator i = m_promoted_consts.begin(); i != end; ++i) {
+      (*i).second->removeDeadConstantUsers();
+    }
+  }
+  {
+    const iterator end = m_promoted.end();
+    for(iterator i = m_promoted.begin(); i != end; ++i) {
+      (*i)->removeDeadConstantUsers();
+    }
+  }
+
+  m_original_types.clear();
+  m_promoted_types.clear();
+  m_promoted.clear();
+  m_promoted_consts.clear();
+  while(!m_delayed.empty())
+    m_delayed.pop();
+  return true;
+}
+
+ModulePass *llvm::createPromoteSimpleStructsPass() {
+  return new PromoteSimpleStructs();
+}
diff --git a/lib/Transforms/NaCl/PromoteValueStructureArgs.cpp b/lib/Transforms/NaCl/PromoteValueStructureArgs.cpp
new file mode 100644
index 000000000000..53dc8c33bf22
--- /dev/null
+++ b/lib/Transforms/NaCl/PromoteValueStructureArgs.cpp
@@ -0,0 +1,392 @@
+//===- PromoteStructureArguments.cpp - Promote structure values to byval --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+#include "llvm/Analysis/NaCl.h"
+#include <set>
+#include <map>
+
+using namespace llvm;
+
+class PromoteStructureArgs :
+  public ModulePass {
+public:
+  static char ID;
+  PromoteStructureArgs() 
+    : ModulePass(ID) {
+    initializePromoteStructureArgsPass(*PassRegistry::getPassRegistry());
+  }
+  typedef std::map<Type*, Type*>::iterator ty_iterator;
+  std::map<Type*, Type*> m_types;
+
+  typedef std::set<GlobalValue*>::iterator gv_iterator;
+  std::set<GlobalValue*> m_globals;
+
+  typedef std::map<Constant*, Constant*>::iterator const_iterator;
+  std::map<Constant*, Constant*> m_consts;
+
+  void promoteFunction(Function* F);
+  void promoteGlobalVariable(GlobalVariable* G);
+  
+  Type* promoteType(Type* Ty);
+  Constant* promoteConstant(Constant* C);
+  void promoteOperands(User* U);
+  Value* promoteOperand(Value* V);
+
+  void promoteGlobal(GlobalValue* V);
+
+  bool shouldPromoteParam(Type* Ty);
+
+  template <class T>
+  void promoteCallInst(T* Inst, Function* ParentF);
+
+  bool runOnModule(Module& M);
+};
+
+char PromoteStructureArgs::ID = 0;
+INITIALIZE_PASS(PromoteStructureArgs, "promote-structure-arguments",
+                "Promote by value structure arguments to use byval",
+                false, false)
+
+size_t ActualPromotedFunctions2;
+
+ModulePass* llvm::createPromoteStructureArgsPass() {
+  return new PromoteStructureArgs();
+}
+
+bool PromoteStructureArgs::shouldPromoteParam(Type* Ty) {
+  return isa<StructType>(Ty) || isa<ArrayType>(Ty);
+}
+
+Type* PromoteStructureArgs::promoteType(Type* Ty) {
+  if(Ty == NULL)
+    return NULL;
+
+  ty_iterator i = m_types.find(Ty);
+  if(i != m_types.end()) {
+    assert(i->second != NULL && "promoteType");
+    return i->second;
+  }
+  Type* NewTy = NULL;
+
+  if(isa<PointerType>(Ty)) {
+    Type* InnerTy = Ty->getContainedType(0);
+    Type* NewInnerTy = promoteType(InnerTy);
+    NewTy = PointerType::get(NewInnerTy, 0);
+  } else if(isa<StructType>(Ty) && Ty->getStructNumElements() != 0) {
+    StructType* STy = cast<StructType>(Ty);
+    
+    StructType* NewSTy;
+    if(STy->hasName())
+      NewSTy = StructType::create(Ty->getContext(), STy->getName());
+    else
+      NewSTy = StructType::create(Ty->getContext());
+
+    NewTy = NewSTy;
+    m_types[Ty] = NewTy;
+    m_types[NewTy] = NewTy;
+
+    std::vector<Type*> Types;
+    Types.reserve(STy->getNumElements());
+
+    for(unsigned j = 0; j < STy->getNumElements(); ++j) {
+      Type* OldTy2 = STy->getElementType(j);
+      Type* NewTy2 = promoteType(OldTy2);
+      Types.push_back(NewTy2);
+    }
+    NewSTy->setBody(Types, STy->isPacked());
+    return NewTy;
+  } else if(isa<FunctionType>(Ty)) {
+    FunctionType* FTy = cast<FunctionType>(Ty);
+    Type* RetTy = FTy->getReturnType();
+    Type* NewRetTy = promoteType(RetTy);
+
+    std::vector<Type*> Args;
+    Args.reserve(FTy->getNumParams());
+
+    for(unsigned j = 0; j < FTy->getNumParams(); ++j) {
+      Type* OldTy2 = FTy->getParamType(j);
+      Type* NewTy2 = promoteType(OldTy2);
+      if(shouldPromoteParam(NewTy2))
+        Args.push_back(PointerType::get(NewTy2, 0));
+      else
+        Args.push_back(NewTy2);
+    }
+
+    NewTy = FunctionType::get(NewRetTy, Args, FTy->isVarArg());
+  } else if(isa<ArrayType>(Ty)) {
+    ArrayType* ATy = cast<ArrayType>(Ty);
+    Type* ElementTy = ATy->getElementType();
+    Type* NewElemTy = promoteType(ElementTy);
+    NewTy = ArrayType::get(NewElemTy, ATy->getNumElements());
+  } else {
+    NewTy = Ty;
+  }
+  
+  assert(NewTy != NULL);
+  m_types.insert(std::make_pair(Ty, NewTy));
+  if(Ty != NewTy)
+    m_types.insert(std::make_pair(NewTy, NewTy));
+  return NewTy;
+}
+void PromoteStructureArgs::promoteFunction(Function* F) {
+  if(F->isIntrinsic())
+    return;
+  FunctionType* FTy = F->getFunctionType();
+  FunctionType* NewFTy = cast<FunctionType>(promoteType(FTy));
+  Function::arg_iterator arg_i = F->arg_begin();
+  for(unsigned i = 0; i < FTy->getNumParams(); ++i, ++arg_i) {
+    Type* Ty = FTy->getParamType(i);
+    Type* NewTy = NewFTy->getParamType(i);
+
+    Argument* Arg = arg_i;
+    Arg->mutateType(NewTy);
+
+    if(!shouldPromoteParam(Ty))
+      continue;
+
+    F->addAttribute(i + 1, Attribute::ByVal);
+    if(F->size() == 0)
+      continue;
+
+    LoadInst* Load = new LoadInst(Arg, "", F->getEntryBlock().getFirstNonPHI());
+
+    // sigh.
+
+    // for the asserts in Value::replaceAllUsesWith()
+    Arg->mutateType(Load->getType()); 
+    Arg->replaceAllUsesWith(Load);
+    Arg->mutateType(NewTy);
+    // now reset Load's pointer operand to Arg
+    Load->setOperand(0, Arg);
+  }
+
+  Type* OldTy = F->Value::getType();
+  Type* NewTy = promoteType(OldTy);
+  F->mutateType(NewTy);
+
+  const Function::iterator end = F->end();
+  for(Function::iterator i = F->begin(); i != end; ++i) {
+    BasicBlock::iterator end = i->end();
+    for(BasicBlock::iterator j = i->begin(); j != end;) {
+      Type* Ty = j->getType();
+      if(isa<CallInst>(*j)) {
+        CallInst* Call = cast<CallInst>(j++);
+        promoteCallInst<CallInst>(Call, F);
+      } else if(isa<InvokeInst>(*j)) {
+        InvokeInst* Invoke = cast<InvokeInst>(j++);
+        promoteCallInst<InvokeInst>(Invoke, F);
+      } else {
+        promoteOperands(j);
+        Type* NewTy = promoteType(Ty);
+        j->mutateType(NewTy);
+        ++j;
+      }
+    }
+  }
+}
+void PromoteStructureArgs::promoteGlobalVariable(GlobalVariable* G) {
+  Type* OriginalTy = G->getType();
+  Type* PromotedTy = promoteType(OriginalTy);
+  G->mutateType(PromotedTy);
+  if(G->hasInitializer()) {
+    Constant* OldC = G->getInitializer();
+    Constant* NewC = promoteConstant(OldC);
+    G->setInitializer(NewC);
+  }
+}
+Constant* PromoteStructureArgs::promoteConstant(Constant* C) {
+  if(isa<GlobalValue>(C)) {
+    GlobalValue* V = cast<GlobalValue>(C);
+    promoteGlobal(V);
+    return C;
+  }
+
+  std::pair<const_iterator, bool> i
+    = m_consts.insert(std::make_pair(C, (Constant*)NULL));
+  // If i.first->second is NULL, we've encountered a recursion.
+  // See the comment in the first branch.
+  if(!i.second && i.first->second != NULL) {
+    return i.first->second;
+  }
+
+  Constant*& NewC = i.first->second;
+  if(isa<ConstantExpr>(C) ||
+     isa<ConstantStruct>(C) ||
+     isa<ConstantArray>(C)) {
+    std::vector<Constant*> Consts;
+    Consts.reserve(C->getNumOperands());
+    const User::value_op_iterator end = C->value_op_end();
+    for(User::value_op_iterator i = C->value_op_begin(); i != end; ++i) {
+      Constant* OldC2 = cast<Constant>(*i);
+      Constant* NewC2 = promoteConstant(OldC2);
+
+      // the promotion of one of the operands caused us to circle back around to this const.
+      // the only way this can happen is through a global, which means the second time around
+      // would skip the global causing the recursion, allowing the promotion to finish.
+      // if all that happens, our reference into the map will reflect the promotion,
+      // NewC won't be NULL, and we can just return.
+      if(NewC != NULL)
+        return NewC;
+
+      Consts.push_back(NewC2);
+    }
+    
+    Type* Ty = C->getType();
+    Type* NewTy = promoteType(Ty);
+    if(ConstantExpr* CE = dyn_cast<ConstantExpr>(C)) {
+      NewC = CE->getWithOperands(Consts, NewTy);
+    } else if(isa<ConstantStruct>(C)) {
+      StructType* STy = cast<StructType>(NewTy);
+      NewC = ConstantStruct::get(STy, Consts);
+    } else if(isa<ConstantArray>(C)) {
+      ArrayType* ATy = cast<ArrayType>(NewTy);
+      NewC = ConstantArray::get(ATy, Consts);
+    }
+  } else if(isa<UndefValue>(C)) {
+    NewC = UndefValue::get(promoteType(C->getType()));
+  } else if(isa<ConstantPointerNull>(C)) {
+    Type* OldTy = C->getType();
+    PointerType* NewTy = cast<PointerType>(promoteType(OldTy));
+    NewC = ConstantPointerNull::get(NewTy);
+  } else if(isa<ConstantAggregateZero>(C)) {
+    NewC = ConstantAggregateZero::get(promoteType(C->getType()));
+  } else {
+    NewC = C;
+  }
+
+  assert(NewC != NULL);
+  if(C != NewC) {
+    const bool NewCInserted = m_consts.insert(std::make_pair(NewC, NewC)).second;
+    assert(NewCInserted);
+    (void)NewCInserted;
+  }
+  return NewC;
+}
+void PromoteStructureArgs::promoteOperands(User* U) {
+  unsigned pos = 0;
+  const User::value_op_iterator end = U->value_op_end();
+  for(User::value_op_iterator k = U->value_op_begin(); k != end; ++k, ++pos) {
+    Value* NewV = promoteOperand(*k);
+    U->setOperand(pos, NewV);
+  }
+}
+Value* PromoteStructureArgs::promoteOperand(Value* V) {
+  if(isa<Constant>(V)) {
+    Constant* C = cast<Constant>(V);
+    Constant* NewC = promoteConstant(C);
+    return NewC;
+  } else
+    return V;
+}
+void PromoteStructureArgs::promoteGlobal(GlobalValue* V) {
+  if(m_globals.insert(V).second) {
+    if(isa<Function>(V)) {
+      Function* F = cast<Function>(V);
+      promoteFunction(F);
+      ++ActualPromotedFunctions2;
+    } else if(isa<GlobalVariable>(V)) {
+      GlobalVariable* G = cast<GlobalVariable>(V);
+      promoteGlobalVariable(G);
+    }
+  }
+}
+
+template <class T>
+void PromoteStructureArgs::promoteCallInst(T* Inst, Function* ParentF) {
+  Value* Called = Inst->getCalledValue();
+  if(isa<Function>(Called) && cast<Function>(Called)->isIntrinsic())
+    return;
+
+  if(isa<GlobalValue>(Called)) {
+    GlobalValue* G = cast<GlobalValue>(Called);
+    promoteGlobal(G);
+  }
+
+  const unsigned end = Inst->getNumArgOperands();
+  for(unsigned i = 0; i != end; ++i) {
+    Value* V = Inst->getArgOperand(i);
+    Value* NewOp = promoteOperand(V);
+    Type* Ty = V->getType();
+    if(!shouldPromoteParam(Ty)) {
+      Inst->setArgOperand(i, NewOp);
+      continue;
+    }
+
+    if(isa<LoadInst>(V)) {
+      LoadInst* Load = cast<LoadInst>(V);
+      if(Load->isUnordered()) {
+        Value* VPtr = Load->getPointerOperand();
+        Value* NewVPtr = promoteOperand(VPtr);
+        Inst->setArgOperand(i, NewVPtr);
+        continue;
+      }
+    }
+
+    AllocaInst* Alloca = new AllocaInst(promoteType(Ty),
+                                        NULL,
+                                        "",
+                                        ParentF->getEntryBlock().getFirstNonPHI());
+    CopyDebug(new StoreInst(NewOp, Alloca, Inst), Inst);
+    Inst->setArgOperand(i, Alloca);
+  }
+
+  Type* Ty = Inst->getType();
+  Type* NewTy = promoteType(Ty);
+  Inst->mutateType(NewTy);
+}
+
+bool PromoteStructureArgs::runOnModule(Module& M) {
+  size_t Promoted = 0;
+  {
+    const Module::iterator end = M.end();
+    for(Module::iterator i = M.begin(); i != end; ++i) {
+      promoteGlobal(i);
+      ++Promoted;
+    }
+  }
+
+  Promoted = 0;
+  {
+    const Module::global_iterator end = M.global_end();
+    for(Module::global_iterator i = M.global_begin(); i != end; ++i) {
+      promoteGlobal(i);
+      ++Promoted;
+    }
+  }
+
+  // remove dangling consts:
+  {
+    const const_iterator end = m_consts.end();
+    for(const_iterator i = m_consts.begin(); i != end; ++i) {
+      (*i).second->removeDeadConstantUsers();
+    }
+  }
+  {
+    const gv_iterator end = m_globals.end();
+    for(gv_iterator i = m_globals.begin(); i != end; ++i) {
+      (*i)->removeDeadConstantUsers();
+    }
+  }
+
+  m_globals.clear();
+  m_types.clear();
+  m_consts.clear();
+  return true;
+}
diff --git a/lib/Transforms/NaCl/RemoveAsmMemory.cpp b/lib/Transforms/NaCl/RemoveAsmMemory.cpp
new file mode 100644
index 000000000000..5295fe5ff8a6
--- /dev/null
+++ b/lib/Transforms/NaCl/RemoveAsmMemory.cpp
@@ -0,0 +1,84 @@
+//===- RemoveAsmMemory.cpp - Remove ``asm("":::"memory")`` ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass removes all instances of ``asm("":::"memory")``.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Pass.h"
+#include <string>
+
+using namespace llvm;
+
+namespace {
+class RemoveAsmMemory : public FunctionPass {
+public:
+  static char ID; // Pass identification, replacement for typeid
+  RemoveAsmMemory() : FunctionPass(ID) {
+    initializeRemoveAsmMemoryPass(*PassRegistry::getPassRegistry());
+  }
+
+  virtual bool runOnFunction(Function &F);
+};
+
+class AsmDirectivesVisitor : public InstVisitor<AsmDirectivesVisitor> {
+public:
+  AsmDirectivesVisitor(Function &F)
+      : F(F), C(F.getParent()->getContext()), ModifiedFunction(false) {}
+  ~AsmDirectivesVisitor() {}
+  bool modifiedFunction() const { return ModifiedFunction; }
+
+  /// Only Call Instructions are ever inline assembly directives.
+  void visitCallInst(CallInst &CI);
+
+private:
+  Function &F;
+  LLVMContext &C;
+  bool ModifiedFunction;
+
+  AsmDirectivesVisitor() LLVM_DELETED_FUNCTION;
+  AsmDirectivesVisitor(const AsmDirectivesVisitor &) LLVM_DELETED_FUNCTION;
+  AsmDirectivesVisitor &operator=(
+      const AsmDirectivesVisitor &) LLVM_DELETED_FUNCTION;
+};
+}
+
+char RemoveAsmMemory::ID = 0;
+INITIALIZE_PASS(RemoveAsmMemory, "remove-asm-memory",
+                "remove all instances of ``asm(\"\":::\"memory\")``", false,
+                false)
+
+bool RemoveAsmMemory::runOnFunction(Function &F) {
+  AsmDirectivesVisitor AV(F);
+  AV.visit(F);
+  return AV.modifiedFunction();
+}
+
+void AsmDirectivesVisitor::visitCallInst(CallInst &CI) {
+  if (!CI.isInlineAsm() ||
+      !cast<InlineAsm>(CI.getCalledValue())->isAsmMemory())
+    return;
+
+  // In NaCl ``asm("":::"memory")`` always comes in pairs, straddling a
+  // sequentially consistent fence. Other passes rewrite this fence to
+  // an equivalent stable NaCl intrinsic, meaning that this assembly can
+  // be removed.
+  CI.eraseFromParent();
+  ModifiedFunction = true;
+}
+
+namespace llvm {
+FunctionPass *createRemoveAsmMemoryPass() { return new RemoveAsmMemory(); }
+}
diff --git a/lib/Transforms/NaCl/ReplaceAggregatesWithInts.cpp b/lib/Transforms/NaCl/ReplaceAggregatesWithInts.cpp
new file mode 100644
index 000000000000..e120ec3a99f3
--- /dev/null
+++ b/lib/Transforms/NaCl/ReplaceAggregatesWithInts.cpp
@@ -0,0 +1,183 @@
+//===- ReplaceArraysWithInts.cpp - Replace remaining aggregate types with ints//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Replaces remaining BitCastInst|IntToPtrInst -> LoadInst
+//   -> (ValOperand) StoreInst combos.
+// This pass /should/ be safe because previous passes have reduced element
+// access to pointer offsets, so all that remains is the movement of
+// whole aggregate values.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+#include "llvm/Analysis/NaCl.h"
+#include <set>
+
+
+using namespace llvm;
+
+template <class T> 
+const std::string ToStr(const T &V) {
+  std::string S;
+  raw_string_ostream OS(S);
+  OS << const_cast<T &>(V);
+  return OS.str();
+}
+
+class ReplaceAggregatesWithInts :
+  public FunctionPass {
+public:
+  static char ID;
+  ReplaceAggregatesWithInts() : FunctionPass(ID), m_converted(0) {
+    initializeReplaceAggregatesWithIntsPass(*PassRegistry::getPassRegistry());
+  }
+
+  virtual void getAnalysisUsage(AnalysisUsage &Info) const {
+    Info.addRequired<DataLayout>();
+  }
+
+  size_t m_converted;
+
+  Type* getReplacedTy(Type* Ty) {
+    LLVMContext& C = Ty->getContext();
+    unsigned Width;
+    if(isa<ArrayType>(Ty)) {
+      ArrayType* ATy = cast<ArrayType>(Ty);
+      Type* ElemTy = ATy->getElementType();
+      if(!ElemTy->isIntegerTy()) {
+        errs() << "Type: " << ToStr(*ATy) << "\n";
+        assert(0 && "Unsupported replacement!");
+        report_fatal_error("Unsupported replacement!");
+      }
+
+      if(ATy->getNumElements() == 0)
+        return NULL;
+      else if(ATy->getNumElements() == 1)
+        return ElemTy;
+      else {
+        unsigned Bits = ElemTy->getIntegerBitWidth();
+        Width = Bits * ATy->getNumElements();
+      }
+    } else if(isa<StructType>(Ty)) {
+      const DataLayout DL(getAnalysis<DataLayout>());
+      Width = DL.getTypeSizeInBits(Ty);
+    } else {
+      errs() << "Type: " << ToStr(*Ty) << "\n";
+      assert(0 && "This shouldn't be reached!");
+      report_fatal_error("This shouldn't be reached!");
+    }
+
+    if(Width > 64) {
+      errs() << "Type: " << ToStr(*Ty) << "\n";
+      assert(0 && "Replacement would be too wide!");
+      report_fatal_error("Replacement would be too wide!");
+    } else if(Width == 0)
+      return NULL;
+
+    return Type::getIntNTy(C, Width);
+  }
+
+  virtual bool runOnFunction(Function& F) {
+    std::set<Instruction*> ToErase;
+
+    bool Changed = false;
+
+    const Function::iterator End = F.end();
+    for(Function::iterator I = F.begin(); I != End; ++I) {
+      BasicBlock* BB = I;
+      const BasicBlock::iterator End = BB->end();
+      for(BasicBlock::iterator J = BB->begin(); J != End; ++J) {
+        Instruction* Inst = J;
+
+        if(isa<IntToPtrInst>(Inst) || isa<BitCastInst>(Inst)) {
+          CastInst* Cast = cast<CastInst>(Inst);
+          Type* Ty = Cast->getType();
+          if(!isa<PointerType>(Ty))
+            continue;
+
+          Type* ContainedTy = Ty->getContainedType(0);
+          if(!(isa<ArrayType>(ContainedTy) || isa<StructType>(ContainedTy)))
+            continue;
+
+          Type* NewTy = getReplacedTy(ContainedTy);
+          if(NewTy == NULL) {
+            const Value::use_iterator End = Cast->use_end();
+            for(Value::use_iterator K = Cast->use_begin(); K != End;) {
+              if(!(isa<StoreInst>(*K) || isa<LoadInst>(*K))) {
+                errs() << "Inst: " << ToStr(*Cast) << "\n";
+                errs() << "Use : " << ToStr(**K) << "\n";
+                assert(0 && "Unknown use!");
+                report_fatal_error("Unknown use!");
+              }
+
+              Instruction* KInst = cast<Instruction>(*K++);
+              if(isa<LoadInst>(KInst)) {
+                const Value::use_iterator End = KInst->use_end();
+                for(Value::use_iterator L = KInst->use_begin(); L != End; ++L) {
+                  if(!isa<StoreInst>(*L)) {
+                    errs() << "Inst: " << ToStr(*KInst) << "\n";
+                    errs() << "Use: " << ToStr(**L) << "\n";
+                    assert(0 && "Non-StoreInst use!");
+                    report_fatal_error("Non-StoreInst use!");
+                  }
+
+                  ToErase.insert(cast<Instruction>(*L));
+                }
+              }
+              ToErase.insert(KInst);
+            }
+            ToErase.insert(Cast);
+            Changed = true;
+          } else {
+            // mutate load types.
+            const Value::use_iterator End = Cast->use_end();
+            for(Value::use_iterator K = Cast->use_begin(); K != End; ++K) {
+              if(isa<LoadInst>(*K)) {
+                assert(K->getType() == ContainedTy);
+                K->mutateType(NewTy);
+                Changed = true;
+              }
+            }
+            Cast->mutateType(PointerType::get(NewTy, 0));
+          }
+
+        } // BinOp
+      } // BasicBlock::iterator
+    } // Function::iterator
+
+    const std::set<Instruction*>::iterator End2 = ToErase.end();
+    for(std::set<Instruction*>::iterator I = ToErase.begin(); I != End2; ++I) {
+      (*I)->dropAllReferences();
+    }
+    for(std::set<Instruction*>::iterator I = ToErase.begin(); I != End2; ++I) {
+      (*I)->eraseFromParent();
+    }
+
+    ++m_converted;
+    return Changed;
+  }
+};
+
+char ReplaceAggregatesWithInts::ID = 0;
+INITIALIZE_PASS(ReplaceAggregatesWithInts,
+                "replace-aggregates-with-ints",
+                "Replace remaining aggregates with a single integer", 
+                false,
+                false)
+
+FunctionPass *llvm::createReplaceAggregatesWithIntsPass() {
+  return new ReplaceAggregatesWithInts();
+}
diff --git a/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp b/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp
new file mode 100644
index 000000000000..0338384cbf8b
--- /dev/null
+++ b/lib/Transforms/NaCl/ReplacePtrsWithInts.cpp
@@ -0,0 +1,646 @@
+//===- ReplacePtrsWithInts.cpp - Convert pointer values to integer values--===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass strips out aggregate pointer types and replaces them with
+// the integer type iPTR, which is i32 for PNaCl (though this pass
+// will allow iPTR to be i64 if the DataLayout specifies 64-bit
+// pointers).
+//
+// The pass converts IR to the following normal form:
+//
+// All inttoptr and ptrtoint instructions use the same integer size
+// (iPTR), so they do not implicitly truncate or zero-extend.
+//
+// alloca always has the result type i8*.
+//
+// Pointer types only appear in the following instructions:
+//  * loads and stores:  the pointer operand is a NormalizedPtr.
+//  * function calls:  the function operand is a NormalizedPtr.
+//  * intrinsic calls:  any pointer arguments are NormalizedPtrs.
+//  * alloca
+//  * bitcast and inttoptr:  only used as part of a NormalizedPtr.
+//  * ptrtoint:  the operand is an InherentPtr.
+//
+// Where an InherentPtr is defined as a pointer value that is:
+//  * an alloca;
+//  * a GlobalValue (a function or global variable); or
+//  * an intrinsic call.
+//
+// And a NormalizedPtr is defined as a pointer value that is:
+//  * an inttoptr instruction;
+//  * an InherentPtr; or
+//  * a bitcast of an InherentPtr.
+//
+// This pass currently strips out lifetime markers (that is, calls to
+// the llvm.lifetime.start/end intrinsics) and invariant markers
+// (calls to llvm.invariant.start/end).
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  // This is a ModulePass because the pass must recreate functions in
+  // order to change their argument and return types.
+  struct ReplacePtrsWithInts : public ModulePass {
+    static char ID; // Pass identification, replacement for typeid
+    ReplacePtrsWithInts() : ModulePass(ID) {
+      initializeReplacePtrsWithIntsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+
+  // FunctionConverter stores the state for mapping old instructions
+  // (of pointer type) to converted instructions (of integer type)
+  // within a function, and provides methods for doing the conversion.
+  class FunctionConverter {
+    // Int type that pointer types are to be replaced with, typically i32.
+    Type *IntPtrType;
+
+    struct RewrittenVal {
+      RewrittenVal(): Placeholder(NULL), NewIntVal(NULL) {}
+      Value *Placeholder;
+      Value *NewIntVal;
+    };
+    // Maps from old values (of pointer type) to converted values (of
+    // IntPtrType type).
+    DenseMap<Value *, RewrittenVal> RewriteMap;
+
+  public:
+    FunctionConverter(Type *IntPtrType) : IntPtrType(IntPtrType) {}
+
+    // Returns the normalized version of the given type, converting
+    // pointer types to IntPtrType.
+    Type *convertType(Type *Ty);
+    // Returns the normalized version of the given function type by
+    // normalizing the function's argument types.
+    FunctionType *convertFuncType(FunctionType *FTy);
+
+    // Records that 'To' is the normalized version of 'From'.  If 'To'
+    // is not of pointer type, no type conversion is required, so this
+    // can take the short cut of replacing 'To' with 'From'.
+    void recordConverted(Value *From, Value *To);
+    void recordConvertedAndErase(Instruction *From, Value *To);
+
+    // Returns Val with no-op casts (those that convert between
+    // IntPtrType and pointer types) stripped off.
+    Value *stripNoopCasts(Value *Val);
+
+    // Returns the normalized version of the given value.
+    //
+    // If the conversion of Val has been deferred, this returns a
+    // placeholder object, which will later be replaceAllUsesWith'd to
+    // the final value.  Since replaceAllUsesWith does not work on
+    // references by metadata nodes, this can be bypassed using
+    // BypassPlaceholder to get the real converted value, assuming it
+    // is available.
+    Value *convert(Value *Val, bool BypassPlaceholder = false);
+    // Returns the NormalizedPtr form of the given pointer value.
+    // Inserts conversion instructions at InsertPt.
+    Value *convertBackToPtr(Value *Val, Instruction *InsertPt);
+    // Returns the NormalizedPtr form of the given function pointer.
+    // Inserts conversion instructions at InsertPt.
+    Value *convertFunctionPtr(Value *Callee, Instruction *InsertPt);
+    // Converts an instruction without recreating it, by wrapping its
+    // operands and result.
+    void convertInPlace(Instruction *Inst);
+
+    void eraseReplacedInstructions();
+
+    // List of instructions whose deletion has been deferred.
+    SmallVector<Instruction *, 20> ToErase;
+  };
+}
+
+Type *FunctionConverter::convertType(Type *Ty) {
+  if (Ty->isPointerTy())
+    return IntPtrType;
+  else if (Ty->isVectorTy() && Ty->getContainedType(0)->isPointerTy())
+    return VectorType::get(IntPtrType, cast<VectorType>(Ty)->getNumElements());
+  return Ty;
+}
+
+FunctionType *FunctionConverter::convertFuncType(FunctionType *FTy) {
+  SmallVector<Type *, 8> ArgTypes;
+  for (FunctionType::param_iterator ArgTy = FTy->param_begin(),
+           E = FTy->param_end(); ArgTy != E; ++ArgTy) {
+    ArgTypes.push_back(convertType(*ArgTy));
+  }
+  return FunctionType::get(convertType(FTy->getReturnType()), ArgTypes,
+                           FTy->isVarArg());
+}
+
+static bool ShouldConvert(Value* V) {
+  return V->getType()->isPointerTy() ||
+    (V->getType()->isVectorTy() &&
+     cast<VectorType>(V->getType())->getElementType()->isPointerTy());
+}
+
+void FunctionConverter::recordConverted(Value *From, Value *To) {
+  if (!ShouldConvert(From)) {
+    From->replaceAllUsesWith(To);
+    return;
+  }
+  RewrittenVal *RV = &RewriteMap[From];
+  assert(!RV->NewIntVal);
+  RV->NewIntVal = To;
+}
+
+void FunctionConverter::recordConvertedAndErase(Instruction *From, Value *To) {
+  recordConverted(From, To);
+  // There may still be references to this value, so defer deleting it.
+  ToErase.push_back(From);
+}
+
+Value *FunctionConverter::stripNoopCasts(Value *Val) {
+  SmallPtrSet<Value *, 4> Visited;
+  for (;;) {
+    if (!Visited.insert(Val)) {
+      // It is possible to get a circular reference in unreachable
+      // basic blocks.  Handle this case for completeness.
+      return UndefValue::get(Val->getType());
+    }
+    if (CastInst *Cast = dyn_cast<CastInst>(Val)) {
+      Value *Src = Cast->getOperand(0);
+      if ((isa<BitCastInst>(Cast) && Cast->getType()->isPointerTy()) ||
+          (isa<PtrToIntInst>(Cast) && Cast->getType() == IntPtrType) ||
+          (isa<IntToPtrInst>(Cast) && Src->getType() == IntPtrType)) {
+        Val = Src;
+        continue;
+      }
+    }
+    return Val;
+  }
+}
+
+Value *FunctionConverter::convert(Value *Val, bool BypassPlaceholder) {
+  Val = stripNoopCasts(Val);
+  if (!Val->getType()->isPointerTy() &&
+      (!Val->getType()->isVectorTy() ||
+       !cast<VectorType>(Val->getType())->getElementType()->isPointerTy()))
+    return Val;
+  if (Constant *C = dyn_cast<Constant>(Val))
+    return ConstantExpr::getPtrToInt(C, IntPtrType);
+  RewrittenVal *RV = &RewriteMap[Val];
+  if (BypassPlaceholder) {
+    assert(RV->NewIntVal);
+    return RV->NewIntVal;
+  }
+  if (!RV->Placeholder)
+    RV->Placeholder = new Argument(convertType(Val->getType()));
+  return RV->Placeholder;
+}
+
+Value *FunctionConverter::convertBackToPtr(Value *Val, Instruction *InsertPt) {
+  Type *NewTy =
+    convertType(Val->getType()->getPointerElementType())->getPointerTo();
+  return CopyDebug(new IntToPtrInst(convert(Val), NewTy, "", InsertPt), InsertPt);
+}
+
+Value *FunctionConverter::convertFunctionPtr(Value *Callee,
+                                             Instruction *InsertPt) {
+  FunctionType *FuncType = cast<FunctionType>(
+      Callee->getType()->getPointerElementType());
+  return CopyDebug(new IntToPtrInst(convert(Callee),
+                                    convertFuncType(FuncType)->getPointerTo(),
+                                    "", InsertPt), InsertPt);
+}
+
+static bool ShouldLeaveAlone(Value *V) {
+  if (Function *F = dyn_cast<Function>(V))
+    return F->isIntrinsic();
+  if (isa<InlineAsm>(V))
+    return true;
+  return false;
+}
+
+void FunctionConverter::convertInPlace(Instruction *Inst) {
+  // Convert operands.
+  for (unsigned I = 0; I < Inst->getNumOperands(); ++I) {
+    Value *Arg = Inst->getOperand(I);
+    if (ShouldConvert(Arg) && !ShouldLeaveAlone(Arg)) {
+      Value *Conv = convert(Arg);
+      Inst->setOperand(I, CopyDebug(new IntToPtrInst(Conv, Arg->getType(), "", Inst), Inst));
+    }
+  }
+  // Convert result.
+  if (ShouldConvert(Inst)) {
+    Instruction *Cast = new PtrToIntInst(
+        Inst, convertType(Inst->getType()), Inst->getName() + ".asint");
+    CopyDebug(Cast, Inst);
+    Cast->insertAfter(Inst);
+    recordConverted(Inst, Cast);
+  }
+}
+
+void FunctionConverter::eraseReplacedInstructions() {
+  bool Error = false;
+  for (DenseMap<Value *, RewrittenVal>::iterator I = RewriteMap.begin(),
+           E = RewriteMap.end(); I != E; ++I) {
+    if (I->second.Placeholder) {
+      if (I->second.NewIntVal) {
+        I->second.Placeholder->replaceAllUsesWith(I->second.NewIntVal);
+      } else {
+        errs() << "Not converted: " << *I->first << "\n";
+        Error = true;
+      }
+    }
+  }
+  if (Error)
+    report_fatal_error("Case not handled in ReplacePtrsWithInts");
+
+  // Delete the placeholders in a separate pass.  This means that if
+  // one placeholder is accidentally rewritten to another, we will get
+  // a useful error message rather than accessing a dangling pointer.
+  for (DenseMap<Value *, RewrittenVal>::iterator I = RewriteMap.begin(),
+           E = RewriteMap.end(); I != E; ++I) {
+    delete I->second.Placeholder;
+  }
+
+  // We must do dropAllReferences() before doing eraseFromParent(),
+  // otherwise we will try to erase instructions that are still
+  // referenced.
+  for (SmallVectorImpl<Instruction *>::iterator I = ToErase.begin(),
+           E = ToErase.end();
+       I != E; ++I) {
+    (*I)->dropAllReferences();
+  }
+  for (SmallVectorImpl<Instruction *>::iterator I = ToErase.begin(),
+           E = ToErase.end();
+       I != E; ++I) {
+    (*I)->eraseFromParent();
+  }
+}
+
+static void ConvertMetadataOperand(FunctionConverter *FC,
+                                   IntrinsicInst *Call, int Index) {
+  MDNode *MD = cast<MDNode>(Call->getArgOperand(Index));
+  if (MD->getNumOperands() != 1)
+    return;
+  Value *MDArg = MD->getOperand(0);
+  if (MDArg && (isa<Argument>(MDArg) || isa<Instruction>(MDArg))) {
+    MDArg = FC->convert(MDArg, /* BypassPlaceholder= */ true);
+    if (PtrToIntInst *Cast = dyn_cast<PtrToIntInst>(MDArg)) {
+      // Unwrapping this is necessary for llvm.dbg.declare to work.
+      MDArg = Cast->getPointerOperand();
+    }
+    SmallVector<Value *, 1> Args;
+    Args.push_back(MDArg);
+    Call->setArgOperand(Index, MDNode::get(Call->getContext(), Args));
+  }
+}
+
+// Remove attributes that only apply to pointer arguments.  Returns
+// the updated AttributeSet.
+static AttributeSet RemovePointerAttrs(LLVMContext &Context,
+                                       AttributeSet Attrs) {
+  SmallVector<AttributeSet, 8> AttrList;
+  for (unsigned Slot = 0; Slot < Attrs.getNumSlots(); ++Slot) {
+    unsigned Index = Attrs.getSlotIndex(Slot);
+    AttrBuilder AB;
+    for (AttributeSet::iterator Attr = Attrs.begin(Slot), E = Attrs.end(Slot);
+         Attr != E; ++Attr) {
+      if (!Attr->isEnumAttribute()) {
+        continue;
+      }
+      switch (Attr->getKindAsEnum()) {
+        // ByVal and StructRet should already have been removed by the
+        // ExpandByVal pass.
+        case Attribute::ByVal:
+        case Attribute::StructRet:
+        case Attribute::Nest:
+          Attrs.dump();
+          report_fatal_error("ReplacePtrsWithInts cannot handle "
+                             "byval, sret or nest attrs");
+          break;
+        // Strip NoCapture and NoAlias because they are only allowed
+        // on arguments of pointer type, and we are removing the
+        // pointer types.
+        case Attribute::NoCapture:
+        case Attribute::NoAlias:
+        case Attribute::ReadOnly:
+          break;
+        default:
+          AB.addAttribute(*Attr);
+      }
+    }
+    AttrList.push_back(AttributeSet::get(Context, Index, AB));
+  }
+  return AttributeSet::get(Context, AttrList);
+}
+
+static void ConvertInstruction(DataLayout *DL, Type *IntPtrType,
+                               FunctionConverter *FC, Instruction *Inst) {
+  if (ReturnInst *Ret = dyn_cast<ReturnInst>(Inst)) {
+    Value *Result = Ret->getReturnValue();
+    if (Result)
+      Result = FC->convert(Result);
+    CopyDebug(ReturnInst::Create(Ret->getContext(), Result, Ret), Inst);
+    Ret->eraseFromParent();
+  } else if (PHINode *Phi = dyn_cast<PHINode>(Inst)) {
+    PHINode *Phi2 = PHINode::Create(FC->convertType(Phi->getType()),
+                                    Phi->getNumIncomingValues(),
+                                    "", Phi);
+    CopyDebug(Phi2, Phi);
+    for (unsigned I = 0; I < Phi->getNumIncomingValues(); ++I) {
+      Phi2->addIncoming(FC->convert(Phi->getIncomingValue(I)),
+                        Phi->getIncomingBlock(I));
+    }
+    Phi2->takeName(Phi);
+    FC->recordConvertedAndErase(Phi, Phi2);
+  } else if (SelectInst *Op = dyn_cast<SelectInst>(Inst)) {
+    Instruction *Op2 = SelectInst::Create(Op->getCondition(),
+                                          FC->convert(Op->getTrueValue()),
+                                          FC->convert(Op->getFalseValue()),
+                                          "", Op);
+    CopyDebug(Op2, Op);
+    Op2->takeName(Op);
+    FC->recordConvertedAndErase(Op, Op2);
+  } else if (isa<PtrToIntInst>(Inst) || isa<IntToPtrInst>(Inst)) {
+    Value *Arg = FC->convert(Inst->getOperand(0));
+    Type *ResultTy = FC->convertType(Inst->getType());
+    unsigned ArgSize = DL->getTypeSizeInBits(Arg->getType());
+    unsigned ResultSize = DL->getTypeSizeInBits(ResultTy);
+    Value *Result;
+    // We avoid using IRBuilder's CreateZExtOrTrunc() here because it
+    // constant-folds ptrtoint ConstantExprs.  This leads to creating
+    // ptrtoints of non-IntPtrType type, which is not what we want,
+    // because we want truncation/extension to be done explicitly by
+    // separate instructions.
+    if (ArgSize == ResultSize) {
+      Result = Arg;
+    } else {
+      Instruction::CastOps CastType =
+          ArgSize > ResultSize ? Instruction::Trunc : Instruction::ZExt;
+      Result = CopyDebug(CastInst::Create(CastType, Arg, ResultTy, "", Inst),
+                         Inst);
+    }
+    if (Result != Arg)
+      Result->takeName(Inst);
+    FC->recordConvertedAndErase(Inst, Result);
+  } else if (isa<BitCastInst>(Inst)) {
+    if (Inst->getType()->isPointerTy()) {
+      FC->ToErase.push_back(Inst);
+    }
+  } else if (ICmpInst *Cmp = dyn_cast<ICmpInst>(Inst)) {
+    Value *Cmp2 = CopyDebug(new ICmpInst(Inst, Cmp->getPredicate(),
+                                         FC->convert(Cmp->getOperand(0)),
+                                         FC->convert(Cmp->getOperand(1)), ""),
+                            Inst);
+    Cmp2->takeName(Cmp);
+    Cmp->replaceAllUsesWith(Cmp2);
+    Cmp->eraseFromParent();
+  } else if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
+    Value *Ptr = FC->convertBackToPtr(Load->getPointerOperand(), Inst);
+    LoadInst *Result = new LoadInst(Ptr, "", Inst);
+    Result->takeName(Inst);
+    CopyDebug(Result, Inst);
+    CopyLoadOrStoreAttrs(Result, Load);
+    FC->recordConvertedAndErase(Inst, Result);
+  } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
+    Value *Ptr = FC->convertBackToPtr(Store->getPointerOperand(), Inst);
+    StoreInst *Result = new StoreInst(FC->convert(Store->getValueOperand()),
+                                      Ptr, Inst);
+    CopyDebug(Result, Inst);
+    CopyLoadOrStoreAttrs(Result, Store);
+    Inst->eraseFromParent();
+  } else if (CallInst *Call = dyn_cast<CallInst>(Inst)) {
+    if (IntrinsicInst *ICall = dyn_cast<IntrinsicInst>(Inst)) {
+      if (ICall->getIntrinsicID() == Intrinsic::lifetime_start ||
+          ICall->getIntrinsicID() == Intrinsic::lifetime_end ||
+          ICall->getIntrinsicID() == Intrinsic::invariant_start ||
+          ICall->getIntrinsicID() == Intrinsic::invariant_end) {
+        // Remove alloca lifetime markers for now.  This is because
+        // the GVN pass can introduce lifetime markers taking PHI
+        // nodes as arguments.  If ReplacePtrsWithInts converts the
+        // PHI node to int type, we will render those lifetime markers
+        // ineffective.  But dropping a subset of lifetime markers is
+        // not safe in general.  So, until LLVM better defines the
+        // semantics of lifetime markers, we drop them all.  See:
+        // https://code.google.com/p/nativeclient/issues/detail?id=3443
+        // We do the same for invariant.start/end because they work in
+        // a similar way.
+	
+	FC->ToErase.push_back(Inst);
+        
+      } else {
+        FC->convertInPlace(Inst);
+      }
+    } else if (isa<InlineAsm>(Call->getCalledValue())) {
+      FC->convertInPlace(Inst);
+    } else {
+      SmallVector<Value *, 10> Args;
+      for (unsigned I = 0; I < Call->getNumArgOperands(); ++I)
+        Args.push_back(FC->convert(Call->getArgOperand(I)));
+      CallInst *NewCall = CallInst::Create(
+          FC->convertFunctionPtr(Call->getCalledValue(), Call),
+          Args, "", Inst);
+      CopyDebug(NewCall, Call);
+      NewCall->setAttributes(RemovePointerAttrs(Call->getContext(),
+                                                Call->getAttributes()));
+      NewCall->setCallingConv(Call->getCallingConv());
+      NewCall->takeName(Call);
+      FC->recordConvertedAndErase(Call, NewCall);
+    }
+  } else if (InvokeInst *Call = dyn_cast<InvokeInst>(Inst)) {
+    SmallVector<Value *, 10> Args;
+    for (unsigned I = 0; I < Call->getNumArgOperands(); ++I)
+      Args.push_back(FC->convert(Call->getArgOperand(I)));
+    InvokeInst *NewCall = InvokeInst::Create(
+        FC->convertFunctionPtr(Call->getCalledValue(), Call),
+        Call->getNormalDest(),
+        Call->getUnwindDest(),
+        Args, "", Inst);
+    CopyDebug(NewCall, Call);
+    NewCall->setAttributes(RemovePointerAttrs(Call->getContext(),
+                                              Call->getAttributes()));
+    NewCall->setCallingConv(Call->getCallingConv());
+    NewCall->takeName(Call);
+    FC->recordConvertedAndErase(Call, NewCall);
+  } else if (AllocaInst *Alloca = dyn_cast<AllocaInst>(Inst)) {
+    Type *ElementTy = Inst->getType()->getPointerElementType();
+    Constant *ElementSize = ConstantInt::get(IntPtrType,
+                                             DL->getTypeAllocSize(ElementTy));
+    // Expand out alloca's built-in multiplication.
+    Value *MulSize;
+    if (ConstantInt *C = dyn_cast<ConstantInt>(Alloca->getArraySize())) {
+      MulSize = ConstantExpr::getMul(ElementSize, C);
+    } else {
+      MulSize = CopyDebug(BinaryOperator::Create(Instruction::Mul,
+                                                 ElementSize,
+                                                 Alloca->getArraySize(),
+                                                 Alloca->getName() + ".alloca_mul",
+                                                 Alloca),
+                          Inst);
+    }
+    unsigned Alignment = Alloca->getAlignment();
+    if (Alignment == 0)
+      Alignment = DL->getPrefTypeAlignment(ElementTy);
+    Value *Tmp = CopyDebug(new AllocaInst(Type::getInt8Ty(Inst->getContext()),
+                                          MulSize, Alignment, "", Inst),
+                           Inst);
+    Tmp->takeName(Alloca);
+    Value *Alloca2 = CopyDebug(new PtrToIntInst(Tmp, IntPtrType,
+                                                Tmp->getName() + ".asint",
+                                                Inst),
+                               Inst);
+    FC->recordConvertedAndErase(Alloca, Alloca2);
+  } else if (// Handle these instructions as a convenience to allow
+             // the pass to be used in more situations, even though we
+             // don't expect them in PNaCl's stable ABI.
+             isa<GetElementPtrInst>(Inst) ||
+             isa<VAArgInst>(Inst) ||
+             isa<IndirectBrInst>(Inst) ||
+             isa<ExtractValueInst>(Inst) ||
+             isa<InsertValueInst>(Inst) ||
+             isa<ExtractElementInst>(Inst) ||
+             isa<InsertElementInst>(Inst) ||
+             // These atomics only operate on integer pointers, not
+             // other pointers, so we don't need to recreate the
+             // instruction.
+             isa<AtomicCmpXchgInst>(Inst) ||
+             isa<AtomicRMWInst>(Inst)) {
+    FC->convertInPlace(Inst);
+  }
+}
+
+// Convert ptrtoint+inttoptr to a bitcast because it's shorter and
+// because some intrinsics work on bitcasts but not on
+// ptrtoint+inttoptr, in particular:
+//  * llvm.lifetime.start/end (although we strip these out)
+//  * llvm.eh.typeid.for
+static void SimplifyCasts(Instruction *Inst, Type *IntPtrType) {
+  if (IntToPtrInst *Cast1 = dyn_cast<IntToPtrInst>(Inst)) {
+    if (PtrToIntInst *Cast2 = dyn_cast<PtrToIntInst>(Cast1->getOperand(0))) {
+      assert(Cast2->getType() == IntPtrType);
+      Value *V = Cast2->getPointerOperand();
+      if (V->getType() != Cast1->getType())
+        V = CopyDebug(new BitCastInst(V, Cast1->getType(), V->getName() + ".bc", Cast1), Inst);
+      Cast1->replaceAllUsesWith(V);
+      if (Cast1->use_empty())
+        Cast1->eraseFromParent();
+      if (Cast2->use_empty())
+        Cast2->eraseFromParent();
+    }
+  }
+}
+
+static void CleanUpFunction(Function *Func, Type *IntPtrType) {
+  // Remove the ptrtoint/bitcast ConstantExprs we introduced for
+  // referencing globals.
+  FunctionPass *Pass = createExpandConstantExprPass();
+  Pass->runOnFunction(*Func);
+  delete Pass;
+
+  for (Function::iterator BB = Func->begin(), E = Func->end();
+       BB != E; ++BB) {
+    for (BasicBlock::iterator Iter = BB->begin(), E = BB->end();
+         Iter != E; ) {
+      SimplifyCasts(Iter++, IntPtrType);
+    }
+  }
+  // Cleanup pass.
+  for (Function::iterator BB = Func->begin(), E = Func->end();
+       BB != E; ++BB) {
+    for (BasicBlock::iterator Iter = BB->begin(), E = BB->end();
+         Iter != E; ) {
+      Instruction *Inst = Iter++;
+      // Add names to inttoptrs to make the output more readable.  The
+      // placeholder values get in the way of doing this earlier when
+      // the inttoptrs are created.
+      if (isa<IntToPtrInst>(Inst))
+        Inst->setName(Inst->getOperand(0)->getName() + ".asptr");
+      // Remove ptrtoints that were introduced for allocas but not used.
+      if (isa<PtrToIntInst>(Inst) && Inst->use_empty())
+        Inst->eraseFromParent();
+    }
+  }
+}
+
+char ReplacePtrsWithInts::ID = 0;
+INITIALIZE_PASS(ReplacePtrsWithInts, "replace-ptrs-with-ints",
+                "Convert pointer values to integer values",
+                false, false)
+
+bool ReplacePtrsWithInts::runOnModule(Module &M) {
+  DataLayout DL(&M);
+  Type *IntPtrType = DL.getIntPtrType(M.getContext());
+
+  for (Module::iterator Iter = M.begin(), E = M.end(); Iter != E; ) {
+    Function *OldFunc = Iter++;
+    // Intrinsics' types must be left alone.
+    if (OldFunc->isIntrinsic())
+      continue;
+
+    FunctionConverter FC(IntPtrType);
+    FunctionType *NFTy = FC.convertFuncType(OldFunc->getFunctionType());
+    OldFunc->setAttributes(RemovePointerAttrs(M.getContext(),
+                                              OldFunc->getAttributes()));
+    Function *NewFunc = RecreateFunction(OldFunc, NFTy);
+
+    // Move the arguments across to the new function.
+    for (Function::arg_iterator Arg = OldFunc->arg_begin(),
+             E = OldFunc->arg_end(), NewArg = NewFunc->arg_begin();
+         Arg != E; ++Arg, ++NewArg) {
+      FC.recordConverted(Arg, NewArg);
+      NewArg->takeName(Arg);
+    }
+
+    // Convert the function body.
+    for (Function::iterator BB = NewFunc->begin(), E = NewFunc->end();
+         BB != E; ++BB) {
+      for (BasicBlock::iterator Iter = BB->begin(), E = BB->end();
+           Iter != E; ) {
+        ConvertInstruction(&DL, IntPtrType, &FC, Iter++);
+      }
+    }
+    // Now that all the replacement instructions have been created, we
+    // can update the debug intrinsic calls.
+    for (Function::iterator BB = NewFunc->begin(), E = NewFunc->end();
+         BB != E; ++BB) {
+      for (BasicBlock::iterator Inst = BB->begin(), E = BB->end();
+           Inst != E; ++Inst) {
+        if (IntrinsicInst *Call = dyn_cast<IntrinsicInst>(Inst)) {
+          if (Call->getIntrinsicID() == Intrinsic::dbg_declare) {
+            ConvertMetadataOperand(&FC, Call, 0);
+          }
+        }
+      }
+    }
+    FC.eraseReplacedInstructions();
+    OldFunc->eraseFromParent();
+  }
+  // Now that all functions have their normalized types, we can remove
+  // various casts.
+  for (Module::iterator Func = M.begin(), E = M.end(); Func != E; ++Func) {
+    CleanUpFunction(Func, IntPtrType);
+    // Delete the now-unused bitcast ConstantExprs that we created so
+    // that they don't interfere with StripDeadPrototypes.
+    Func->removeDeadConstantUsers();
+  }
+  return true;
+}
+
+ModulePass *llvm::createReplacePtrsWithIntsPass() {
+  return new ReplacePtrsWithInts();
+}
diff --git a/lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp b/lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp
new file mode 100644
index 000000000000..a6274002ad9b
--- /dev/null
+++ b/lib/Transforms/NaCl/ResolvePNaClIntrinsics.cpp
@@ -0,0 +1,595 @@
+//===- ResolvePNaClIntrinsics.cpp - Resolve calls to PNaCl intrinsics ----====//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass resolves calls to PNaCl stable bitcode intrinsics. It is
+// normally run in the PNaCl translator.
+//
+// Running AddPNaClExternalDeclsPass is a precondition for running this
+// pass. They are separate because one is a ModulePass and the other is
+// a FunctionPass.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/IR/Constant.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/NaClAtomicIntrinsics.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Transforms/NaCl.h"
+#if defined(__pnacl__)
+#include "native_client/src/untrusted/nacl/pnacl.h"
+#endif
+
+using namespace llvm;
+
+namespace {
+class ResolvePNaClIntrinsics : public FunctionPass {
+public:
+  ResolvePNaClIntrinsics() : FunctionPass(ID) {
+    initializeResolvePNaClIntrinsicsPass(*PassRegistry::getPassRegistry());
+  }
+
+  static char ID;
+  virtual bool runOnFunction(Function &F);
+
+  /// Interface specifying how intrinsic calls should be resolved. Each
+  /// intrinsic call handled by the implementor will be visited by the
+  /// doResolve method.
+  class CallResolver {
+  public:
+    /// Called once per \p Call to the intrinsic in the module.
+    /// Returns true if the Function was changed.
+    bool resolve(IntrinsicInst *Call) {
+      // To be a well-behaving FunctionPass, don't touch uses in other
+      // functions. These will be handled when the pass manager gets to
+      // those functions.
+      if (Call->getParent()->getParent() == &F)
+        return doResolve(Call);
+      return false;
+    }
+    Function *getDeclaration() const { return doGetDeclaration(); }
+    std::string getName() { return Intrinsic::getName(IntrinsicID); }
+
+  protected:
+    Function &F;
+    Module *M;
+    Intrinsic::ID IntrinsicID;
+
+    CallResolver(Function &F, Intrinsic::ID IntrinsicID)
+        : F(F), M(F.getParent()), IntrinsicID(IntrinsicID) {}
+    virtual ~CallResolver() {}
+
+    /// The following pure virtual methods must be defined by
+    /// implementors, and will be called once per intrinsic call.
+    virtual Function *doGetDeclaration() const = 0;
+    /// Returns true if the Function was changed.
+    virtual bool doResolve(IntrinsicInst *Call) = 0;
+
+  private:
+    CallResolver(const CallResolver &) LLVM_DELETED_FUNCTION;
+    CallResolver &operator=(const CallResolver &) LLVM_DELETED_FUNCTION;
+  };
+
+private:
+  /// Visit all calls matching the \p Resolver's declaration, and invoke
+  /// the CallResolver methods on each of them.
+  bool visitCalls(CallResolver &Resolver);
+};
+
+/// Rewrite intrinsic calls to another function.
+class IntrinsicCallToFunctionCall :
+    public ResolvePNaClIntrinsics::CallResolver {
+public:
+  IntrinsicCallToFunctionCall(Function &F, Intrinsic::ID IntrinsicID,
+                              const char *TargetFunctionName,
+                              ArrayRef<Type *> Tys = None)
+      : CallResolver(F, IntrinsicID),
+        TargetFunction(M->getFunction(TargetFunctionName)), Tys(Tys) {
+    // Expect to find the target function for this intrinsic already
+    // declared, even if it is never used.
+    if (!TargetFunction)
+      report_fatal_error(std::string(
+          "Expected to find external declaration of ") + TargetFunctionName);
+  }
+  virtual ~IntrinsicCallToFunctionCall() {}
+
+private:
+  Function *TargetFunction;
+  ArrayRef<Type *> Tys;
+
+  virtual Function *doGetDeclaration() const {
+    return Intrinsic::getDeclaration(M, IntrinsicID, Tys);
+  }
+
+  virtual bool doResolve(IntrinsicInst *Call) {
+    Call->setCalledFunction(TargetFunction);
+    return true;
+  }
+
+  IntrinsicCallToFunctionCall(const IntrinsicCallToFunctionCall &)
+      LLVM_DELETED_FUNCTION;
+  IntrinsicCallToFunctionCall &operator=(const IntrinsicCallToFunctionCall &)
+      LLVM_DELETED_FUNCTION;
+};
+
+/// Rewrite intrinsic calls to a constant whose value is determined by a
+/// functor. This functor is called once per Call, and returns a
+/// Constant that should replace the Call.
+template <class Callable>
+class ConstantCallResolver : public ResolvePNaClIntrinsics::CallResolver {
+public:
+  ConstantCallResolver(Function &F, Intrinsic::ID IntrinsicID, Callable Functor,
+                       ArrayRef<Type *> Tys = None)
+      : CallResolver(F, IntrinsicID), Functor(Functor) {}
+  virtual ~ConstantCallResolver() {}
+
+private:
+  Callable Functor;
+  ArrayRef<Type *> Tys;
+
+  virtual Function *doGetDeclaration() const {
+    return Intrinsic::getDeclaration(M, IntrinsicID, Tys);
+  }
+
+  virtual bool doResolve(IntrinsicInst *Call) {
+    Constant *C = Functor(Call);
+    Call->replaceAllUsesWith(C);
+    Call->eraseFromParent();
+    return true;
+  }
+
+  ConstantCallResolver(const ConstantCallResolver &) LLVM_DELETED_FUNCTION;
+  ConstantCallResolver &operator=(const ConstantCallResolver &)
+      LLVM_DELETED_FUNCTION;
+};
+
+/// Resolve __nacl_atomic_is_lock_free to true/false at translation
+/// time. PNaCl's currently supported platforms all support lock-free
+/// atomics at byte sizes {1,2,4,8} except for MIPS arch that supports
+/// lock-free atomics at byte sizes {1,2,4}, and the alignment of the
+/// pointer is always expected to be natural (as guaranteed by C11 and
+/// C++11). PNaCl's Module-level ABI verification checks that the byte
+/// size is constant and in {1,2,4,8}.
+struct IsLockFreeToConstant {
+  Constant *operator()(CallInst *Call) {
+    uint64_t MaxLockFreeByteSize = 8;
+    const APInt &ByteSize =
+        cast<Constant>(Call->getOperand(0))->getUniqueInteger();
+
+#   if defined(__pnacl__)
+    switch (__builtin_nacl_target_arch()) {
+    case PnaclTargetArchitectureX86_32:
+    case PnaclTargetArchitectureX86_64:
+    case PnaclTargetArchitectureARM_32:
+      break;
+    case PnaclTargetArchitectureMips_32:
+      MaxLockFreeByteSize = 4;
+      break;
+    default:
+      return false;
+    }
+#   elif defined(__i386__) || defined(__x86_64__) || defined(__arm__)
+    // Continue.
+#   elif defined(__mips__)
+    MaxLockFreeByteSize = 4;
+#   else
+#     error "Unknown architecture"
+#   endif
+
+    bool IsLockFree = ByteSize.ule(MaxLockFreeByteSize);
+    Constant *C = ConstantInt::get(Call->getType(), IsLockFree);
+    return C;
+  }
+};
+
+/// Rewrite atomic intrinsics to LLVM IR instructions.
+class AtomicCallResolver : public ResolvePNaClIntrinsics::CallResolver {
+public:
+  AtomicCallResolver(Function &F,
+                     const NaCl::AtomicIntrinsics::AtomicIntrinsic *I)
+      : CallResolver(F, I->ID), I(I) {}
+  virtual ~AtomicCallResolver() {}
+
+private:
+  const NaCl::AtomicIntrinsics::AtomicIntrinsic *I;
+
+  virtual Function *doGetDeclaration() const { return I->getDeclaration(M); }
+
+  virtual bool doResolve(IntrinsicInst *Call) {
+    // Assume the @llvm.nacl.atomic.* intrinsics follow the PNaCl ABI:
+    // this should have been checked by the verifier.
+    bool isVolatile = false;
+    SynchronizationScope SS = CrossThread;
+    Instruction *I;
+
+    switch (Call->getIntrinsicID()) {
+    default:
+      llvm_unreachable("unknown atomic intrinsic");
+    case Intrinsic::nacl_atomic_load:
+      I = new LoadInst(Call->getArgOperand(0), "", isVolatile,
+                       alignmentFromPointer(Call->getArgOperand(0)),
+                       thawMemoryOrder(Call->getArgOperand(1)), SS, Call);
+      break;
+    case Intrinsic::nacl_atomic_store:
+      I = new StoreInst(Call->getArgOperand(0), Call->getArgOperand(1),
+                        isVolatile,
+                        alignmentFromPointer(Call->getArgOperand(1)),
+                        thawMemoryOrder(Call->getArgOperand(2)), SS, Call);
+      break;
+    case Intrinsic::nacl_atomic_rmw:
+      if (needsX8632HackFor16BitAtomics(cast<PointerType>(
+              Call->getArgOperand(1)->getType())->getElementType())) {
+        // TODO(jfb) Remove this hack. See below.
+        atomic16BitX8632Hack(Call, false, Call->getArgOperand(1),
+                             Call->getArgOperand(2), Call->getArgOperand(0),
+                             NULL);
+        return true;
+      }
+      I = new AtomicRMWInst(thawRMWOperation(Call->getArgOperand(0)),
+                            Call->getArgOperand(1), Call->getArgOperand(2),
+                            thawMemoryOrder(Call->getArgOperand(3)), SS, Call);
+      break;
+    case Intrinsic::nacl_atomic_cmpxchg:
+      if (needsX8632HackFor16BitAtomics(cast<PointerType>(
+              Call->getArgOperand(0)->getType())->getElementType())) {
+        // TODO(jfb) Remove this hack. See below.
+        atomic16BitX8632Hack(Call, true, Call->getArgOperand(0),
+                             Call->getArgOperand(2), NULL,
+                             Call->getArgOperand(1));
+        return true;
+      }
+      // TODO LLVM currently doesn't support specifying separate memory
+      //      orders for compare exchange's success and failure cases:
+      //      LLVM IR implicitly drops the Release part of the specified
+      //      memory order on failure. It is therefore correct to map
+      //      the success memory order onto the LLVM IR and ignore the
+      //      failure one.
+      I = new AtomicCmpXchgInst(Call->getArgOperand(0), Call->getArgOperand(1),
+                                Call->getArgOperand(2),
+                                thawMemoryOrder(Call->getArgOperand(3)), SS,
+                                Call);
+      break;
+    case Intrinsic::nacl_atomic_fence:
+      I = new FenceInst(M->getContext(),
+                        thawMemoryOrder(Call->getArgOperand(0)), SS, Call);
+      break;
+    case Intrinsic::nacl_atomic_fence_all: {
+      FunctionType *FTy =
+          FunctionType::get(Type::getVoidTy(M->getContext()), false);
+      std::string AsmString; // Empty.
+      std::string Constraints("~{memory}");
+      bool HasSideEffect = true;
+      CallInst *Asm = CallInst::Create(
+          InlineAsm::get(FTy, AsmString, Constraints, HasSideEffect), "", Call);
+      CopyDebug(Asm, Call);
+      I = new FenceInst(M->getContext(), SequentiallyConsistent, SS, Asm);
+      Asm = CallInst::Create(
+          InlineAsm::get(FTy, AsmString, Constraints, HasSideEffect), "", I);
+      CopyDebug(Asm, Call);
+    } break;
+    }
+    I->setName(Call->getName());
+    CopyDebug(I, Call);
+    Call->replaceAllUsesWith(I);
+    Call->eraseFromParent();
+
+    return true;
+  }
+
+  unsigned alignmentFromPointer(const Value *Ptr) const {
+    const PointerType *PtrType = cast<PointerType>(Ptr->getType());
+    unsigned BitWidth = PtrType->getElementType()->getIntegerBitWidth();
+    return BitWidth / 8;
+  }
+
+  AtomicOrdering thawMemoryOrder(const Value *MemoryOrder) const {
+    NaCl::MemoryOrder MO = (NaCl::MemoryOrder)
+        cast<Constant>(MemoryOrder)->getUniqueInteger().getLimitedValue();
+    switch (MO) {
+    // Only valid values should pass validation.
+    default: llvm_unreachable("unknown memory order");
+    case NaCl::MemoryOrderRelaxed: return Monotonic;
+    // TODO Consume is unspecified by LLVM's internal IR.
+    case NaCl::MemoryOrderConsume: return SequentiallyConsistent;
+    case NaCl::MemoryOrderAcquire: return Acquire;
+    case NaCl::MemoryOrderRelease: return Release;
+    case NaCl::MemoryOrderAcquireRelease: return AcquireRelease;
+    case NaCl::MemoryOrderSequentiallyConsistent: return SequentiallyConsistent;
+    }
+  }
+
+  AtomicRMWInst::BinOp thawRMWOperation(const Value *Operation) const {
+    NaCl::AtomicRMWOperation Op = (NaCl::AtomicRMWOperation)
+        cast<Constant>(Operation)->getUniqueInteger().getLimitedValue();
+    switch (Op) {
+    // Only valid values should pass validation.
+    default: llvm_unreachable("unknown atomic RMW operation");
+    case NaCl::AtomicAdd: return AtomicRMWInst::Add;
+    case NaCl::AtomicSub: return AtomicRMWInst::Sub;
+    case NaCl::AtomicOr:  return AtomicRMWInst::Or;
+    case NaCl::AtomicAnd: return AtomicRMWInst::And;
+    case NaCl::AtomicXor: return AtomicRMWInst::Xor;
+    case NaCl::AtomicExchange: return AtomicRMWInst::Xchg;
+    }
+  }
+
+  // TODO(jfb) Remove the following hacks once NaCl's x86-32 validator
+  // supports 16-bit atomic intrisics. See:
+  //   https://code.google.com/p/nativeclient/issues/detail?id=3579
+  //   https://code.google.com/p/nativeclient/issues/detail?id=2981
+  // ===========================================================================
+  bool needsX8632HackFor16BitAtomics(Type *OverloadedType) const {
+    return Triple(M->getTargetTriple()).getArch() == Triple::x86 &&
+        OverloadedType == Type::getInt16Ty(M->getContext());
+  }
+
+  /// Expand the 16-bit Intrinsic into an equivalent 32-bit
+  /// compare-exchange loop.
+  void atomic16BitX8632Hack(IntrinsicInst *Call, bool IsCmpXChg,
+                            Value *Ptr16, Value *RHS, Value *RMWOp,
+                            Value *CmpXChgOldVal) const {
+    assert((IsCmpXChg ? CmpXChgOldVal : RMWOp) &&
+           "cmpxchg expects an old value, whereas RMW expects an operation");
+    Type *I16 = Type::getInt16Ty(M->getContext());
+    Type *I32 = Type::getInt32Ty(M->getContext());
+    Type *I32Ptr = Type::getInt32PtrTy(M->getContext());
+
+    // Precede this with a compiler fence.
+    FunctionType *FTy =
+        FunctionType::get(Type::getVoidTy(M->getContext()), false);
+    std::string AsmString; // Empty.
+    std::string Constraints("~{memory}");
+    bool HasSideEffect = true;
+    CopyDebug(CallInst::Create(InlineAsm::get(FTy,
+                                              AsmString,
+                                              Constraints,
+                                              HasSideEffect),
+                               "",
+                               Call),
+              Call);
+
+    BasicBlock *CurrentBB = Call->getParent();
+    IRBuilder<> IRB(CurrentBB, Call);
+    BasicBlock *Aligned32BB =
+        BasicBlock::Create(IRB.getContext(), "atomic16aligned32",
+                           CurrentBB->getParent());
+    BasicBlock *Aligned16BB =
+        BasicBlock::Create(IRB.getContext(), "atomic16aligned16",
+                           CurrentBB->getParent());
+
+    // Setup.
+    // Align the 16-bit pointer to 32-bits, and figure out if the 16-bit
+    // operation is to be carried on the top or bottom half of the
+    // 32-bit aligned value.
+    Value *IPtr = CopyDebug(IRB.CreatePtrToInt(Ptr16, I32, "uintptr"), Call);
+    Value *IPtrAlign = CopyDebug(IRB.CreateAnd(IPtr, IRB.getInt32(~3u), "aligneduintptr"), Call);
+    Value *Aligned32 = CopyDebug(IRB.CreateAnd(IPtr, IRB.getInt32(3u), "aligned32"), Call);
+    Value *Ptr32 = CopyDebug(IRB.CreateIntToPtr(IPtrAlign, I32Ptr, "ptr32"), Call);
+    Value *IsAligned32 = CopyDebug(IRB.CreateICmpEQ(Aligned32, IRB.getInt32(0),
+                                                    "isaligned32"), Call);
+    CopyDebug(IRB.CreateCondBr(IsAligned32, Aligned32BB, Aligned16BB), Call);
+
+    // Create a diamond after the setup. The rest of the basic block
+    // that the Call was in is separated into the successor block.
+    BasicBlock *Successor =
+        CurrentBB->splitBasicBlock(IRB.GetInsertPoint(), "atomic16successor");
+    // Remove the extra unconditional branch that the split added.
+    CurrentBB->getTerminator()->eraseFromParent();
+
+    // Aligned 32 block.
+    // The 16-bit value was aligned to 32-bits:
+    //  - Atomically load the full 32-bit value.
+    //  - Get the 16-bit value from its bottom.
+    //  - Perform the 16-bit operation.
+    //  - Truncate and merge the result back with the top half of the
+    //    loaded value.
+    //  - Try to compare-exchange this new 32-bit result. This will
+    //    succeed if the value at the 32-bit location is still what was
+    //    just loaded. If not, try the entire thing again.
+    //  - Return the 16-bit value before the operation was performed.
+    Value *Ret32;
+    {
+      IRB.SetInsertPoint(Aligned32BB);
+      LoadInst *Loaded = IRB.CreateAlignedLoad(Ptr32, 4, "loaded");
+      CopyDebug(Loaded, Call);
+      Loaded->setAtomic(SequentiallyConsistent);
+      Value *TruncVal = CopyDebug(IRB.CreateTrunc(Loaded, I16, "truncval"), Call);
+      Ret32 = TruncVal;
+      Value *Res;
+      if (IsCmpXChg) {
+        Res = RHS;
+      } else {
+        switch (thawRMWOperation(RMWOp)) {
+        default: llvm_unreachable("unknown atomic RMW operation");
+        case AtomicRMWInst::Add:
+          Res = IRB.CreateAdd(TruncVal, RHS, "res"); break;
+        case AtomicRMWInst::Sub:
+          Res = IRB.CreateSub(TruncVal, RHS, "res"); break;
+        case AtomicRMWInst::Or:
+          Res = IRB.CreateOr(TruncVal, RHS, "res"); break;
+        case AtomicRMWInst::And:
+          Res = IRB.CreateAnd(TruncVal, RHS, "res"); break;
+        case AtomicRMWInst::Xor:
+          Res = IRB.CreateXor(TruncVal, RHS, "res"); break;
+        case AtomicRMWInst::Xchg:
+          Res = RHS; break;
+        }
+
+        CopyDebug(Res, Call);
+      }
+      Value *MergeRes = CopyDebug(IRB.CreateZExt(Res, I32, "mergeres"), Call);
+      Value *MaskedLoaded = CopyDebug(IRB.CreateAnd(Loaded,
+                                                    IRB.getInt32(0xFFFF0000u),
+                                                    "maskedloaded"),
+                                      Call);
+      Value *FinalRes = CopyDebug(IRB.CreateOr(MergeRes, MaskedLoaded, "finalres"),
+                                  Call);
+      Value *Expected = IsCmpXChg ?
+        CopyDebug(IRB.CreateOr(MaskedLoaded,
+                               CopyDebug(IRB.CreateZExt(CmpXChgOldVal, I32, "zext"), Call),
+                               "expected"), Call) :
+          Loaded;
+      Value *OldVal = CopyDebug(IRB.CreateAtomicCmpXchg(Ptr32,
+                                                        Expected,
+                                                        FinalRes,
+                                                        SequentiallyConsistent),
+                                Call);
+      OldVal->setName("oldval");
+      // Test that the entire 32-bit value didn't change during the operation.
+      Value *Success = CopyDebug(IRB.CreateICmpEQ(OldVal, Loaded, "success"), Call);
+      CopyDebug(IRB.CreateCondBr(Success, Successor, Aligned32BB), Call);
+    }
+
+    // Aligned 16 block.
+    // Similar to the above aligned 32 block, but the 16-bit value is in
+    // the top half of the 32-bit value. It needs to be shifted down,
+    // and shifted back up before being merged in.
+    Value *Ret16;
+    {
+      IRB.SetInsertPoint(Aligned16BB);
+      LoadInst *Loaded = IRB.CreateAlignedLoad(Ptr32, 4, "loaded");
+      CopyDebug(Loaded, Call);
+      Loaded->setAtomic(SequentiallyConsistent);
+      Value *ShVal = CopyDebug(IRB.CreateTrunc(IRB.CreateLShr(Loaded,
+                                                              16,
+                                                              "lshr"),
+                                               I16,
+                                               "shval"),
+                               Call);
+      Ret16 = ShVal;
+      Value *Res;
+      if (IsCmpXChg) {
+        Res = RHS;
+      } else {
+        switch (thawRMWOperation(RMWOp)) {
+        default: llvm_unreachable("unknown atomic RMW operation");
+        case AtomicRMWInst::Add:
+          Res = IRB.CreateAdd(ShVal, RHS, "res"); break;
+        case AtomicRMWInst::Sub:
+          Res = IRB.CreateSub(ShVal, RHS, "res"); break;
+        case AtomicRMWInst::Or:
+          Res = IRB.CreateOr(ShVal, RHS, "res"); break;
+        case AtomicRMWInst::And:
+          Res = IRB.CreateAnd(ShVal, RHS, "res"); break;
+        case AtomicRMWInst::Xor:
+          Res = IRB.CreateXor(ShVal, RHS, "res"); break;
+        case AtomicRMWInst::Xchg:
+          Res = RHS; break;
+        }
+
+        CopyDebug(Res, Call);
+      }
+      Value *MergeRes = CopyDebug(IRB.CreateShl(IRB.CreateZExt(Res,
+                                                               I32,
+                                                               "zext"),
+                                                16,
+                                                "mergeres"),
+                                  Call);
+      Value *MaskedLoaded = CopyDebug(IRB.CreateAnd(Loaded, IRB.getInt32(0xFFFF),
+                                                    "maskedloaded"), Call);
+      Value *FinalRes = CopyDebug(IRB.CreateOr(MergeRes, MaskedLoaded, "finalres"), Call);
+      Value *Expected = IsCmpXChg ?
+        CopyDebug(IRB.CreateOr(MaskedLoaded, IRB.CreateShl(
+              IRB.CreateZExt(CmpXChgOldVal, I32, "zext"), 16, "shl"),
+                               "expected"), Call) :
+          Loaded;
+      Value *OldVal = CopyDebug(IRB.CreateAtomicCmpXchg(Ptr32, Expected, FinalRes,
+                                                        SequentiallyConsistent), Call);
+      OldVal->setName("oldval");
+      // Test that the entire 32-bit value didn't change during the operation.
+      Value *Success = CopyDebug(IRB.CreateICmpEQ(OldVal, Loaded, "success"), Call);
+      CopyDebug(IRB.CreateCondBr(Success, Successor, Aligned16BB), Call);
+    }
+
+    // Merge the value, and remove the original intrinsic Call.
+    IRB.SetInsertPoint(Successor->getFirstInsertionPt());
+    PHINode *PHI = CopyDebug(IRB.CreatePHI(I16, 2), Call);
+    PHI->addIncoming(Ret32, Aligned32BB);
+    PHI->addIncoming(Ret16, Aligned16BB);
+    Call->replaceAllUsesWith(PHI);
+    Call->eraseFromParent();
+
+    // Finish everything with another compiler fence.
+    CopyDebug(CallInst::Create(InlineAsm::get(
+        FTy, AsmString, Constraints, HasSideEffect), "",
+                               Successor->getFirstInsertionPt()), Call);
+  }
+  // ===========================================================================
+  // End hacks.
+
+  AtomicCallResolver(const AtomicCallResolver &);
+  AtomicCallResolver &operator=(const AtomicCallResolver &);
+};
+}
+
+bool ResolvePNaClIntrinsics::visitCalls(
+    ResolvePNaClIntrinsics::CallResolver &Resolver) {
+  bool Changed = false;
+  Function *IntrinsicFunction = Resolver.getDeclaration();
+  if (!IntrinsicFunction)
+    return false;
+
+  for (Value::use_iterator UI = IntrinsicFunction->use_begin(),
+                           UE = IntrinsicFunction->use_end();
+       UI != UE;) {
+    // At this point, the only uses of the intrinsic can be calls, since
+    // we assume this pass runs on bitcode that passed ABI verification.
+    IntrinsicInst *Call = dyn_cast<IntrinsicInst>(*UI++);
+    if (!Call)
+      report_fatal_error("Expected use of intrinsic to be a call: " +
+                         Resolver.getName());
+
+    Changed |= Resolver.resolve(Call);
+  }
+
+  return Changed;
+}
+
+bool ResolvePNaClIntrinsics::runOnFunction(Function &F) {
+  LLVMContext &C = F.getParent()->getContext();
+  bool Changed = false;
+
+  IntrinsicCallToFunctionCall SetJmpResolver(F, Intrinsic::nacl_setjmp,
+                                             "setjmp");
+  IntrinsicCallToFunctionCall LongJmpResolver(F, Intrinsic::nacl_longjmp,
+                                              "longjmp");
+  Changed |= visitCalls(SetJmpResolver);
+  Changed |= visitCalls(LongJmpResolver);
+
+  NaCl::AtomicIntrinsics AI(C);
+  NaCl::AtomicIntrinsics::View V = AI.allIntrinsicsAndOverloads();
+  for (NaCl::AtomicIntrinsics::View::iterator I = V.begin(), E = V.end();
+       I != E; ++I) {
+    AtomicCallResolver AtomicResolver(F, I);
+    Changed |= visitCalls(AtomicResolver);
+  }
+
+  ConstantCallResolver<IsLockFreeToConstant> IsLockFreeResolver(
+      F, Intrinsic::nacl_atomic_is_lock_free, IsLockFreeToConstant());
+  Changed |= visitCalls(IsLockFreeResolver);
+
+  return Changed;
+}
+
+char ResolvePNaClIntrinsics::ID = 0;
+INITIALIZE_PASS(ResolvePNaClIntrinsics, "resolve-pnacl-intrinsics",
+                "Resolve PNaCl intrinsic calls", false, false)
+
+FunctionPass *llvm::createResolvePNaClIntrinsicsPass() {
+  return new ResolvePNaClIntrinsics();
+}
diff --git a/lib/Transforms/NaCl/RewriteAtomics.cpp b/lib/Transforms/NaCl/RewriteAtomics.cpp
new file mode 100644
index 000000000000..9e98d8b6f39e
--- /dev/null
+++ b/lib/Transforms/NaCl/RewriteAtomics.cpp
@@ -0,0 +1,452 @@
+//===- RewriteAtomics.cpp - Stabilize instructions used for concurrency ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass encodes atomics, volatiles and fences using NaCl intrinsics
+// instead of LLVM's regular IR instructions.
+//
+// All of the above are transformed into one of the
+// @llvm.nacl.atomic.* intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/NaClAtomicIntrinsics.h"
+#include "llvm/InstVisitor.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/NaCl.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include <climits>
+#include <string>
+
+using namespace llvm;
+
+template <class T> 
+std::string ToStr(const T &V);
+
+namespace {
+  class RewriteAtomics :
+    public ModulePass,
+    public InstVisitor<RewriteAtomics> {
+public:
+  static char ID; // Pass identification, replacement for typeid
+    RewriteAtomics()
+      : ModulePass(ID)
+      , M(NULL)
+      , TD(NULL)
+      , C(NULL)
+      , ModifiedModule(false)
+      , AI(NULL) {
+    // This is a module pass because it may have to introduce
+    // intrinsic declarations into the module and modify a global function.
+    initializeRewriteAtomicsPass(*PassRegistry::getPassRegistry());
+  }
+    Module* M;
+    const DataLayout* TD;
+    LLVMContext* C;
+    bool ModifiedModule;
+    NaCl::AtomicIntrinsics* AI;
+
+    // NAnd atomics require a bit more surgery than what InstVisitor can cope with.
+    typedef std::vector<AtomicRMWInst*>::iterator delayed_iterator;
+    std::vector<AtomicRMWInst*> m_delayed;
+    
+  virtual bool runOnModule(Module &M);
+  virtual void getAnalysisUsage(AnalysisUsage &Info) const {
+    Info.addRequired<DataLayout>();
+  }
+
+  inline bool modifiedModule() const { return ModifiedModule; }
+
+  void visitLoadInst(LoadInst &I);
+  void visitStoreInst(StoreInst &I);
+  void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I);
+  void visitAtomicRMWInst(AtomicRMWInst &I);
+  void visitFenceInst(FenceInst &I);
+
+private:
+
+    void rewriteRMWNandInst(AtomicRMWInst& I);
+
+  /// Create an integer constant holding a NaCl::MemoryOrder that can be
+  /// passed as an argument to one of the @llvm.nacl.atomic.*
+  /// intrinsics. This function may strengthen the ordering initially
+  /// specified by the instruction \p I for stability purpose.
+  template <class Instruction>
+  ConstantInt *freezeMemoryOrder(const Instruction &I) const;
+
+  /// Sanity-check that instruction \p I which has pointer and value
+  /// parameters have matching sizes \p BitSize for the type-pointed-to
+  /// and the value's type \p T.
+  void checkSizeMatchesType(const Instruction &I, unsigned BitSize,
+                            const Type *T) const;
+
+  /// Verify that loads and stores are at least naturally aligned. Use
+  /// byte alignment because converting to bits could truncate the
+  /// value.
+  void checkAlignment(const Instruction &I, unsigned ByteAlignment,
+                      unsigned ByteSize) const;
+
+    /// Create a cast before Instruction \p I from \p Src to \p Dst with \p Name.
+    inline CastInst *createCast(Instruction &I,
+                                Value *Src,
+                                Type *Dst,
+                                Twine Name) const {
+      return createCast(I, Src, Dst, Name, &I);
+    }
+    template <class CastInsertion>
+    CastInst *createCast(Instruction &I,
+                         Value *Src,
+                         Type *Dst,
+                         Twine Name,
+                         CastInsertion* CastWhere) const;
+
+  /// Helper function which rewrites a single instruction \p I to a
+  /// particular intrinsic \p ID with overloaded type \p OverloadedType,
+  /// and argument list \p Args. Will perform a bitcast to the proper \p
+  /// DstType, if different from \p OverloadedType.
+  void replaceInstructionWithIntrinsicCall(Instruction &I, Intrinsic::ID ID,
+                                           Type *DstType, Type *OverloadedType,
+                                           ArrayRef<Value *> Args);
+
+  /// Most atomics instructions deal with at least one pointer, this
+  /// struct automates some of this and has generic sanity checks.
+  template <class Instruction> struct PointerHelper {
+    Value *P;
+    Type *OriginalPET;
+    Type *PET;
+    unsigned BitSize;
+    template <class CastInsertion>
+    PointerHelper(const RewriteAtomics* const AV,
+                  Instruction &I,
+                  CastInsertion* CastWhere)
+        : P(I.getPointerOperand()) {
+      if (I.getPointerAddressSpace() != 0)
+        report_fatal_error("unhandled pointer address space " +
+                           Twine(I.getPointerAddressSpace()) + " for atomic: " +
+                           ToStr(I));
+      assert(P->getType()->isPointerTy() && "expected a pointer");
+      PET = OriginalPET = P->getType()->getPointerElementType();
+      BitSize = AV->TD->getTypeSizeInBits(OriginalPET);
+      if (!OriginalPET->isIntegerTy()) {
+        // The pointer wasn't to an integer type. We define atomics in
+        // terms of integers, so bitcast the pointer to an integer of
+        // the proper width.
+        Type *IntNPtr = Type::getIntNPtrTy(*AV->C, BitSize);
+        P = AV->createCast(I, P, IntNPtr, P->getName() + ".cast");
+        PET = P->getType()->getPointerElementType();
+      }
+      AV->checkSizeMatchesType(I, BitSize, PET);
+    }
+  };
+};
+}
+
+template <class T> std::string ToStr(const T &V) {
+  std::string S;
+  raw_string_ostream OS(S);
+  OS << const_cast<T &>(V);
+  return OS.str();
+}
+char RewriteAtomics::ID = 0;
+INITIALIZE_PASS(RewriteAtomics, "nacl-rewrite-atomics",
+                "rewrite atomics, volatiles and fences into stable "
+                "@llvm.nacl.atomics.* intrinsics",
+                false, false)
+
+bool RewriteAtomics::runOnModule(Module &M) {
+  const DataLayout DL(getAnalysis<DataLayout>());
+  this->TD = &DL;
+  this->M  = &M;
+  this->C  = &M.getContext();
+  NaCl::AtomicIntrinsics AI(*C);
+  this->AI = &AI;
+
+  visit(M);
+  
+  const delayed_iterator end = m_delayed.end();
+  for(delayed_iterator i = m_delayed.begin(); i != end; ++i) {
+    rewriteRMWNandInst(**i);
+  }
+  m_delayed.clear();
+  
+  this->TD = NULL;
+  this->M  = NULL;
+  this->C  = NULL;
+  this->AI = NULL;
+  return modifiedModule();
+}
+
+void RewriteAtomics::rewriteRMWNandInst(AtomicRMWInst& I) {
+  ModifiedModule = true;
+  // this excerpt from PointerHelper is here because the initial
+  // atomic load needs to be placed in the same block as the
+  // pointer operand.
+  if (I.getPointerAddressSpace() != 0) {
+    report_fatal_error("unhandled pointer address space " +
+                       Twine(I.getPointerAddressSpace()) + " for atomic: " +
+                       ToStr(I));
+  }
+  Function* ThisFun = I.getParent()->getParent();
+
+  BasicBlock* ThisBlock = I.getParent();
+  BasicBlock* InitialBlock = isa<Instruction>(I.getPointerOperand()) ?
+    cast<Instruction>(I.getPointerOperand())->getParent() : &ThisFun->getEntryBlock();
+
+  PointerHelper<AtomicRMWInst> PH(this, I, InitialBlock);
+
+  Function* LoadF = AI->find(Intrinsic::nacl_atomic_load, PH.PET)->getDeclaration(M);
+  Value* LoadCallArgs[] = {PH.P, freezeMemoryOrder(I)};
+  CallInst* LoadCall = CopyDebug(CallInst::Create(LoadF, LoadCallArgs, "", &I), &I);
+    
+  BasicBlock* CmpXchgLoop = SplitBlock(ThisBlock, &I, this);
+  PHINode* Loop = CopyDebug(PHINode::Create(PH.PET, 2, "", CmpXchgLoop->begin()), &I);
+  Loop->addIncoming(LoadCall, ThisBlock);
+
+  BinaryOperator* NotOp = CopyDebug(BinaryOperator::CreateNot(LoadCall, "", &I), &I);
+  BinaryOperator* AndOp = CopyDebug(BinaryOperator::Create(Instruction::And,
+                                                           NotOp,
+                                                           I.getValOperand(),
+                                                           "",
+                                                           &I), &I);
+  Function* CmpXchgF = AI->find(Intrinsic::nacl_atomic_cmpxchg, PH.PET)->getDeclaration(M);
+  Value* CmpXchgArgs[] = {PH.P, Loop, AndOp,
+                          freezeMemoryOrder(I),
+                          freezeMemoryOrder(I)};
+  CallInst* CmpXchg = CopyDebug(CallInst::Create(CmpXchgF, CmpXchgArgs, "", &I), &I);
+  ICmpInst* Cmp = CopyDebug(new ICmpInst(&I, CmpInst::ICMP_EQ, CmpXchg, AndOp), &I);
+  BasicBlock* Rest = SplitBlock(CmpXchgLoop, &I, this);
+  BranchInst* LoopBranch = cast<BranchInst>(CmpXchgLoop->getTerminator());
+  LoopBranch->dropAllReferences();
+  LoopBranch->eraseFromParent();
+  LoopBranch = CopyDebug(BranchInst::Create(Rest, CmpXchgLoop, Cmp, CmpXchgLoop), &I);
+  Loop->addIncoming(CmpXchg, CmpXchgLoop);
+
+  PHINode* PhiRest = CopyDebug(PHINode::Create(PH.PET, 1, "", Rest->begin()), &I);
+  PhiRest->addIncoming(CmpXchg, CmpXchgLoop);
+
+  Instruction* Res;
+  if(PH.PET != PH.OriginalPET) {
+    Res = CopyDebug(createCast(I, PhiRest, PH.OriginalPET, I.getName() + ".cast"), &I);
+  } else
+    Res = PhiRest;
+
+  I.replaceAllUsesWith(Res);
+  I.eraseFromParent();
+}
+
+template <class Instruction>
+ConstantInt *RewriteAtomics::freezeMemoryOrder(const Instruction &I) const {
+  NaCl::MemoryOrder AO = NaCl::MemoryOrderInvalid;
+
+  // TODO Volatile load/store are promoted to sequentially consistent
+  //      for now. We could do something weaker.
+  if (const LoadInst *L = dyn_cast<LoadInst>(&I)) {
+    if (L->isVolatile())
+      AO = NaCl::MemoryOrderSequentiallyConsistent;
+  } else if (const StoreInst *S = dyn_cast<StoreInst>(&I)) {
+    if (S->isVolatile())
+      AO = NaCl::MemoryOrderSequentiallyConsistent;
+  }
+
+  if (AO == NaCl::MemoryOrderInvalid) {
+    switch (I.getOrdering()) {
+    default:
+    case NotAtomic: llvm_unreachable("unexpected memory order");
+    // Monotonic is a strict superset of Unordered. Both can therefore
+    // map to Relaxed ordering, which is in the C11/C++11 standard.
+    case Unordered: AO = NaCl::MemoryOrderRelaxed; break;
+    case Monotonic: AO = NaCl::MemoryOrderRelaxed; break;
+    // TODO Consume is currently unspecified by LLVM's internal IR.
+    case Acquire: AO = NaCl::MemoryOrderAcquire; break;
+    case Release: AO = NaCl::MemoryOrderRelease; break;
+    case AcquireRelease: AO = NaCl::MemoryOrderAcquireRelease; break;
+    case SequentiallyConsistent:
+      AO = NaCl::MemoryOrderSequentiallyConsistent; break;
+    }
+  }
+
+  // TODO For now only sequential consistency is allowed.
+  AO = NaCl::MemoryOrderSequentiallyConsistent;
+
+  return ConstantInt::get(Type::getInt32Ty(*C), AO);
+}
+
+void RewriteAtomics::checkSizeMatchesType(const Instruction &I, unsigned BitSize,
+                                         const Type *T) const {
+  Type *IntType = Type::getIntNTy(*C, BitSize);
+  if (IntType && T == IntType)
+    return;
+  report_fatal_error("unsupported atomic type " + ToStr(*T) + " of size " +
+                     Twine(BitSize) + " bits in: " + ToStr(I));
+}
+
+void RewriteAtomics::checkAlignment(const Instruction &I, unsigned ByteAlignment,
+                                   unsigned ByteSize) const {
+  if (ByteAlignment < ByteSize)
+    report_fatal_error("atomic load/store must be at least naturally aligned, "
+                       "got " +
+                       Twine(ByteAlignment) + ", bytes expected at least " +
+                       Twine(ByteSize) + " bytes, in: " + ToStr(I));
+}
+
+template <class CastInsertion>
+CastInst *RewriteAtomics::createCast(Instruction &I, Value *Src, Type *Dst,
+                                     Twine Name, CastInsertion* CastWhere) const {
+  Type *SrcT = Src->getType();
+  Instruction::CastOps Op = SrcT->isIntegerTy() && Dst->isPointerTy()
+                                ? Instruction::IntToPtr
+                                : SrcT->isPointerTy() && Dst->isIntegerTy()
+                                      ? Instruction::PtrToInt
+                                      : Instruction::BitCast;
+  if (!CastInst::castIsValid(Op, Src, Dst))
+    report_fatal_error("cannot emit atomic instruction while converting type " +
+                       ToStr(*SrcT) + " to " + ToStr(*Dst) + " for " + Name +
+                       " in " + ToStr(I));
+  return CastInst::Create(Op, Src, Dst, Name, CastWhere);
+}
+
+void RewriteAtomics::replaceInstructionWithIntrinsicCall(
+    Instruction &I, Intrinsic::ID ID, Type *DstType, Type *OverloadedType,
+    ArrayRef<Value *> Args) {
+  std::string Name(I.getName());
+  Function *F = AI->find(ID, OverloadedType)->getDeclaration(M);
+  CallInst *Call = CopyDebug(CallInst::Create(F, Args, "", &I), &I);
+  Instruction *Res = Call;
+  if (!Call->getType()->isVoidTy() && DstType != OverloadedType) {
+    // The call returns a value which needs to be cast to a non-integer.
+    Res = CopyDebug(createCast(I, Call, DstType, Name + ".cast"), &I);
+  }
+  I.replaceAllUsesWith(Res);
+  I.eraseFromParent();
+  Call->setName(Name);
+  ModifiedModule = true;
+}
+
+///   %res = load {atomic|volatile} T* %ptr memory_order, align sizeof(T)
+/// becomes:
+///   %res = call T @llvm.nacl.atomic.load.i<size>(%ptr, memory_order)
+void RewriteAtomics::visitLoadInst(LoadInst &I) {
+  if (I.isSimple())
+    return;
+  PointerHelper<LoadInst> PH(this, I, &I);
+  checkAlignment(I, I.getAlignment(), PH.BitSize / CHAR_BIT);
+  Value *Args[] = { PH.P, freezeMemoryOrder(I) };
+  replaceInstructionWithIntrinsicCall(I, Intrinsic::nacl_atomic_load,
+                                      PH.OriginalPET, PH.PET, Args);
+}
+
+///   store {atomic|volatile} T %val, T* %ptr memory_order, align sizeof(T)
+/// becomes:
+///   call void @llvm.nacl.atomic.store.i<size>(%val, %ptr, memory_order)
+void RewriteAtomics::visitStoreInst(StoreInst &I) {
+  if (I.isSimple())
+    return;
+  PointerHelper<StoreInst> PH(this, I, &I);
+  checkAlignment(I, I.getAlignment(), PH.BitSize / CHAR_BIT);
+  Value *V = I.getValueOperand();
+  if (!V->getType()->isIntegerTy()) {
+    // The store isn't of an integer type. We define atomics in terms of
+    // integers, so bitcast the value to store to an integer of the
+    // proper width.
+    CastInst *Cast = CopyDebug(createCast(I, V, Type::getIntNTy(*C, PH.BitSize),
+                                          V->getName() + ".cast"), &I);
+    V = Cast;
+  }
+  checkSizeMatchesType(I, PH.BitSize, V->getType());
+  Value *Args[] = { V, PH.P, freezeMemoryOrder(I) };
+  replaceInstructionWithIntrinsicCall(I, Intrinsic::nacl_atomic_store,
+                                      PH.OriginalPET, PH.PET, Args);
+}
+
+///   %res = atomicrmw OP T* %ptr, T %val memory_order
+/// becomes:
+///   %res = call T @llvm.nacl.atomic.rmw.i<size>(OP, %ptr, %val, memory_order)
+void RewriteAtomics::visitAtomicRMWInst(AtomicRMWInst &I) {
+  NaCl::AtomicRMWOperation Op;
+  switch (I.getOperation()) {
+  default: report_fatal_error("unsupported atomicrmw operation: " + ToStr(I));
+  case AtomicRMWInst::Add: Op = NaCl::AtomicAdd; break;
+  case AtomicRMWInst::Sub: Op = NaCl::AtomicSub; break;
+  case AtomicRMWInst::And: Op = NaCl::AtomicAnd; break;
+  case AtomicRMWInst::Or:  Op = NaCl::AtomicOr;  break;
+  case AtomicRMWInst::Xor: Op = NaCl::AtomicXor; break;
+  case AtomicRMWInst::Xchg: Op = NaCl::AtomicExchange; break;
+  case AtomicRMWInst::Nand: m_delayed.push_back(&I); return;
+  }
+
+  PointerHelper<AtomicRMWInst> PH(this, I, &I);
+  checkSizeMatchesType(I, PH.BitSize, I.getValOperand()->getType());
+  Value *Args[] = { ConstantInt::get(Type::getInt32Ty(*C), Op), PH.P,
+                    I.getValOperand(), freezeMemoryOrder(I) };
+  replaceInstructionWithIntrinsicCall(I, Intrinsic::nacl_atomic_rmw,
+                                      PH.OriginalPET, PH.PET, Args);
+}
+
+///   %res = cmpxchg T* %ptr, T %old, T %new memory_order
+/// becomes:
+///   %res = call T @llvm.nacl.atomic.cmpxchg.i<size>(
+///       %object, %expected, %desired, memory_order_success,
+///       memory_order_failure)
+void RewriteAtomics::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
+  PointerHelper<AtomicCmpXchgInst> PH(this, I, &I);
+  checkSizeMatchesType(I, PH.BitSize, I.getCompareOperand()->getType());
+  checkSizeMatchesType(I, PH.BitSize, I.getNewValOperand()->getType());
+  // TODO LLVM currently doesn't support specifying separate memory
+  //      orders for compare exchange's success and failure cases: LLVM
+  //      IR implicitly drops the Release part of the specified memory
+  //      order on failure.
+  Value *Args[] = { PH.P, I.getCompareOperand(), I.getNewValOperand(),
+                    freezeMemoryOrder(I), freezeMemoryOrder(I) };
+  replaceInstructionWithIntrinsicCall(I, Intrinsic::nacl_atomic_cmpxchg,
+                                      PH.OriginalPET, PH.PET, Args);
+}
+
+///   fence memory_order
+/// becomes:
+///   call void @llvm.nacl.atomic.fence(memory_order)
+/// and
+///   call void asm sideeffect "", "~{memory}"()
+///   fence seq_cst
+///   call void asm sideeffect "", "~{memory}"()
+/// becomes:
+///   call void asm sideeffect "", "~{memory}"()
+///   call void @llvm.nacl.atomic.fence.all()
+///   call void asm sideeffect "", "~{memory}"()
+/// Note that the assembly gets eliminated by the -remove-asm-memory pass.
+void RewriteAtomics::visitFenceInst(FenceInst &I) {
+  Type *T = Type::getInt32Ty(*C); // Fences aren't overloaded on type.
+  BasicBlock::InstListType &IL(I.getParent()->getInstList());
+  bool isFirst = IL.empty() || &*I.getParent()->getInstList().begin() == &I;
+  bool isLast = IL.empty() || &*I.getParent()->getInstList().rbegin() == &I;
+  CallInst *PrevC = isFirst ? 0 : dyn_cast<CallInst>(I.getPrevNode());
+  CallInst *NextC = isLast ? 0 : dyn_cast<CallInst>(I.getNextNode());
+
+  if ((PrevC && PrevC->isInlineAsm() &&
+       cast<InlineAsm>(PrevC->getCalledValue())->isAsmMemory()) &&
+      (NextC && NextC->isInlineAsm() &&
+       cast<InlineAsm>(NextC->getCalledValue())->isAsmMemory()) &&
+      I.getOrdering() == SequentiallyConsistent) {
+    replaceInstructionWithIntrinsicCall(I, Intrinsic::nacl_atomic_fence_all, T,
+                                        T, ArrayRef<Value *>());
+  } else {
+    Value *Args[] = { freezeMemoryOrder(I) };
+    replaceInstructionWithIntrinsicCall(I, Intrinsic::nacl_atomic_fence, T, T,
+                                        Args);
+  }
+}
+
+ModulePass *llvm::createRewriteAtomicsPass() { return new RewriteAtomics(); }
diff --git a/lib/Transforms/NaCl/RewriteLLVMIntrinsics.cpp b/lib/Transforms/NaCl/RewriteLLVMIntrinsics.cpp
new file mode 100644
index 000000000000..7e8415220756
--- /dev/null
+++ b/lib/Transforms/NaCl/RewriteLLVMIntrinsics.cpp
@@ -0,0 +1,145 @@
+//===- RewriteLLVMIntrinsics.cpp - Rewrite LLVM intrinsics to other values ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass replaces calls to LLVM intrinsics that are *not* part of the
+// PNaCl stable bitcode ABI into simpler values.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/NaCl.h"
+#include <string>
+
+using namespace llvm;
+
+namespace {
+class RewriteLLVMIntrinsics : public ModulePass {
+public:
+  static char ID;
+  RewriteLLVMIntrinsics() : ModulePass(ID) {
+    // This is a module pass because this makes it easier to access uses
+    // of global intrinsic functions.
+    initializeRewriteLLVMIntrinsicsPass(*PassRegistry::getPassRegistry());
+  }
+
+  virtual bool runOnModule(Module &M);
+
+  /// Rewrite an intrinsic to something different.
+  class IntrinsicRewriter {
+  public:
+    Function *function() const { return F; }
+    /// Called once per \p Call of the Intrinsic Function.
+    void rewriteCall(CallInst *Call) { doRewriteCall(Call); }
+
+  protected:
+    IntrinsicRewriter(Module &M, Intrinsic::ID IntrinsicID)
+        : F(Intrinsic::getDeclaration(&M, IntrinsicID)) {}
+    virtual ~IntrinsicRewriter() {}
+    /// This pure virtual method must be defined by implementors, and
+    /// will be called by rewriteCall.
+    virtual void doRewriteCall(CallInst *Call) = 0;
+
+    Function *F;
+
+  private:
+    IntrinsicRewriter() LLVM_DELETED_FUNCTION;
+    IntrinsicRewriter(const IntrinsicRewriter &) LLVM_DELETED_FUNCTION;
+    IntrinsicRewriter &operator=(
+        const IntrinsicRewriter &) LLVM_DELETED_FUNCTION;
+  };
+
+private:
+  /// Visit all uses of a Function, rewrite it using the \p Rewriter,
+  /// and then delete the Call. Later delete the Function from the
+  /// Module. Returns true if the Module was changed.
+  bool visitUses(IntrinsicRewriter &Rewriter);
+};
+
+/// Rewrite a Call to nothing.
+class ToNothing : public RewriteLLVMIntrinsics::IntrinsicRewriter {
+public:
+  ToNothing(Module &M, Intrinsic::ID IntrinsicID)
+      : IntrinsicRewriter(M, IntrinsicID) {}
+  virtual ~ToNothing() {}
+
+protected:
+  virtual void doRewriteCall(CallInst *Call) {
+    // Nothing to do: the visit does the deletion.
+  }
+};
+
+/// Rewrite a Call to a ConstantInt of the same type.
+class ToConstantInt : public RewriteLLVMIntrinsics::IntrinsicRewriter {
+public:
+  ToConstantInt(Module &M, Intrinsic::ID IntrinsicID, uint64_t Value)
+      : IntrinsicRewriter(M, IntrinsicID), Value(Value),
+        RetType(function()->getFunctionType()->getReturnType()) {}
+  virtual ~ToConstantInt() {}
+
+protected:
+  virtual void doRewriteCall(CallInst *Call) {
+    Constant *C = ConstantInt::get(RetType, Value);
+    Call->replaceAllUsesWith(C);
+  }
+
+private:
+  uint64_t Value;
+  Type *RetType;
+};
+}
+
+char RewriteLLVMIntrinsics::ID = 0;
+INITIALIZE_PASS(RewriteLLVMIntrinsics, "rewrite-llvm-intrinsic-calls",
+                "Rewrite LLVM intrinsic calls to simpler expressions", false,
+                false)
+
+bool RewriteLLVMIntrinsics::runOnModule(Module &M) {
+  // Replace all uses of the @llvm.flt.rounds intrinsic with the constant
+  // "1" (round-to-nearest). Until we add a second intrinsic like
+  // @llvm.set.flt.round it is impossible to have a rounding mode that is
+  // not the initial rounding mode (round-to-nearest). We can remove
+  // this rewrite after adding a set() intrinsic.
+  ToConstantInt FltRoundsRewriter(M, Intrinsic::flt_rounds, 1);
+
+  // Remove all @llvm.prefetch intrinsics.
+  ToNothing PrefetchRewriter(M, Intrinsic::prefetch);
+
+  return visitUses(FltRoundsRewriter) | visitUses(PrefetchRewriter);
+}
+
+bool RewriteLLVMIntrinsics::visitUses(IntrinsicRewriter &Rewriter) {
+  bool Changed = false;
+  Function *F = Rewriter.function();
+  for (Value::use_iterator UI = F->use_begin(), UE = F->use_end(); UI != UE;) {
+    Value *Use = *UI++;
+    if (CallInst *Call = dyn_cast<CallInst>(Use)) {
+      Rewriter.rewriteCall(Call);
+      Call->eraseFromParent();
+      Changed = true;
+    } else {
+      // Intrinsics we care about currently don't need to handle this case.
+      std::string S;
+      raw_string_ostream OS(S);
+      OS << "Taking the address of this intrinsic is invalid: " << *Use;
+      report_fatal_error(OS.str());
+    }
+  }
+  F->eraseFromParent();
+  return Changed;
+}
+
+ModulePass *llvm::createRewriteLLVMIntrinsicsPass() {
+  return new RewriteLLVMIntrinsics();
+}
diff --git a/lib/Transforms/NaCl/RewritePNaClLibraryCalls.cpp b/lib/Transforms/NaCl/RewritePNaClLibraryCalls.cpp
new file mode 100644
index 000000000000..fda24e457144
--- /dev/null
+++ b/lib/Transforms/NaCl/RewritePNaClLibraryCalls.cpp
@@ -0,0 +1,549 @@
+//===- RewritePNaClLibraryCalls.cpp - PNaCl library calls to intrinsics ---===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass replaces calls to known library functions with calls to intrinsics
+// that are part of the PNaCl stable bitcode ABI.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+#include <cstdarg>
+
+using namespace llvm;
+
+namespace {
+  class RewritePNaClLibraryCalls : public ModulePass {
+  public:
+    static char ID;
+    RewritePNaClLibraryCalls() :
+        ModulePass(ID), TheModule(NULL), Context(NULL), SetjmpIntrinsic(NULL),
+        LongjmpIntrinsic(NULL), MemcpyIntrinsic(NULL),
+        MemmoveIntrinsic(NULL), MemsetIntrinsic(NULL) {
+      // This is a module pass because it may have to introduce
+      // intrinsic declarations into the module and modify globals.
+      initializeRewritePNaClLibraryCallsPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  private:
+    typedef void (RewritePNaClLibraryCalls::*RewriteCallFunc)(CallInst *);
+    typedef void (RewritePNaClLibraryCalls::*PopulateWrapperFunc)(Function *);
+
+    /// Handles a certain pattern of library function -> intrinsic rewrites.
+    /// Currently all library functions this pass knows how to rewrite fall into
+    /// this pattern.
+    /// RewriteLibraryCall performs the rewrite for a single library function
+    /// and is customized by its arguments.
+    ///
+    /// \p LibraryFunctionName Name of the library function to look for.
+    /// \p CorrectFunctionType is the correct type of this library function.
+    /// \p CallRewriter Method that rewrites the library function call into an
+    ///    intrinsic call.
+    /// \p OnlyCallsAllowed Only calls to this library function are allowed.
+    /// \p WrapperPopulator called to populate the body of the library function
+    ///    with a wrapped intrinsic call.
+    bool RewriteLibraryCall(
+        const char *LibraryFunctionName,
+        FunctionType *CorrectFunctionType,
+        RewriteCallFunc CallRewriter,
+        bool OnlyCallsAllowed,
+        PopulateWrapperFunc WrapperPopulator);
+
+    /// Two function types are compatible if they have compatible return types
+    /// and the same number of compatible parameters. Return types and
+    /// parameters are compatible if they are exactly the same type or both are
+    /// pointer types.
+    static bool compatibleFunctionTypes(FunctionType *FTy1, FunctionType *FTy2);
+    static bool compatibleParamOrRetTypes(Type *Ty1, Type *Ty2);
+
+    void rewriteSetjmpCall(CallInst *Call);
+    void rewriteLongjmpCall(CallInst *Call);
+    void rewriteMemcpyCall(CallInst *Call);
+    void rewriteMemmoveCall(CallInst *Call);
+    void rewriteMemsetCall(CallInst *Call);
+
+    void populateSetjmpWrapper(Function *SetjmpFunc);
+    void populateLongjmpWrapper(Function *LongjmpFunc);
+    void populateMemcpyWrapper(Function *MemcpyFunc);
+    void populateMemmoveWrapper(Function *MemmoveFunc);
+    void populateMemsetWrapper(Function *MemsetFunc);
+
+    /// Generic implementation of populating a wrapper function.
+    /// Initially, the function exists in the module as a declaration with
+    /// unnamed arguments. This method is called with a NULL-terminated list
+    /// of argument names that get assigned in the generated IR for
+    /// readability.
+    void populateWrapperCommon(
+        Function *Func,
+        StringRef FuncName,
+        RewriteCallFunc CallRewriter,
+        bool CallCannotReturn,
+        ...);
+
+    /// Find and cache known intrinsics.
+    Function *findSetjmpIntrinsic();
+    Function *findLongjmpIntrinsic();
+    Function *findMemcpyIntrinsic();
+    Function *findMemmoveIntrinsic();
+    Function *findMemsetIntrinsic();
+
+    /// Cached data that remains the same throughout a module run.
+    Module *TheModule;
+    LLVMContext *Context;
+
+    /// These are cached but computed lazily.
+    Function *SetjmpIntrinsic;
+    Function *LongjmpIntrinsic;
+    Function *MemcpyIntrinsic;
+    Function *MemmoveIntrinsic;
+    Function *MemsetIntrinsic;
+  };
+}
+
+char RewritePNaClLibraryCalls::ID = 0;
+INITIALIZE_PASS(RewritePNaClLibraryCalls, "rewrite-pnacl-library-calls",
+                "Rewrite PNaCl library calls to stable intrinsics",
+                false, false)
+
+bool RewritePNaClLibraryCalls::RewriteLibraryCall(
+    const char *LibraryFunctionName,
+    FunctionType *CorrectFunctionType,
+    RewriteCallFunc CallRewriter,
+    bool OnlyCallsAllowed,
+    PopulateWrapperFunc WrapperPopulator) {
+  bool Changed = false;
+
+  Function *LibFunc = TheModule->getFunction(LibraryFunctionName);
+
+  // Iterate over all uses of this function, if it exists in the module with
+  // external linkage. If it exists but the linkage is not external, this may
+  // come from code that defines its own private function with the same name
+  // and doesn't actually include the standard libc header declaring it.
+  // In such a case we leave the code as it is.
+  //
+  // Another case we need to handle here is this function having the wrong
+  // prototype (incompatible with the C library function prototype, and hence
+  // incompatible with the intrinsic). In general, this is undefined behavior,
+  // but we can't fail compilation because some workflows rely on it
+  // compiling correctly (for example, autoconf). The solution is:
+  // When the declared type of the function in the module is not correct, we
+  // re-create the function with the correct prototype and replace all calls
+  // to this new function (casted to the old function type). Effectively this
+  // delays the undefined behavior until run-time.
+  if (LibFunc && LibFunc->hasExternalLinkage()) {
+    if (!compatibleFunctionTypes(LibFunc->getFunctionType(),
+                                 CorrectFunctionType)) {
+      // Use the RecreateFunction utility to create a new function with the
+      // correct prototype. RecreateFunction also RAUWs the function with
+      // proper bitcasts.
+      //
+      // One interesting case that may arise is when the original module had
+      // calls to both a correct and an incorrect version of the library
+      // function. Depending on the linking order, either version could be
+      // selected as the global declaration in the module, so even valid calls
+      // could end up being bitcast-ed from the incorrect to the correct
+      // function type. The RecreateFunction call below will eliminate such
+      // bitcasts (because the new type matches the call type), but dead
+      // constant expressions may be left behind.
+      // These are cleaned up with removeDeadConstantUsers.
+      Function *NewFunc = RecreateFunction(LibFunc, CorrectFunctionType);
+      LibFunc->eraseFromParent();
+      NewFunc->setLinkage(Function::InternalLinkage);
+      Changed = true;
+      NewFunc->removeDeadConstantUsers();
+      LibFunc = NewFunc;
+    }
+
+    // Handle all uses that are calls. These are simply replaced with
+    // equivalent intrinsic calls.
+    for (Value::use_iterator UI = LibFunc->use_begin(),
+                             UE = LibFunc->use_end(); UI != UE;) {
+      Value *Use = *UI++;
+      // use_iterator will also provide call instructions in which the used
+      // value is an argument, and not the value being called. Make sure we
+      // rewrite only actual calls to LibFunc here.
+      if (CallInst *Call = dyn_cast<CallInst>(Use)) {
+        if (Call->getCalledValue() == LibFunc) {
+          (this->*(CallRewriter))(Call);
+          Changed = true;
+        }
+      }
+    }
+
+    if (LibFunc->use_empty()) {
+      LibFunc->eraseFromParent();
+    } else if (OnlyCallsAllowed) {
+      // If additional uses remain, these aren't calls.
+      report_fatal_error(Twine("Taking the address of ") +
+                         LibraryFunctionName + " is invalid");
+    } else {
+      // If non-call uses remain and allowed for this function, populate it
+      // with a wrapper.
+      (this->*(WrapperPopulator))(LibFunc);
+      LibFunc->setLinkage(Function::InternalLinkage);
+      Changed = true;
+    }
+  }
+
+  return Changed;
+}
+
+bool RewritePNaClLibraryCalls::runOnModule(Module &M) {
+  TheModule = &M;
+  Context = &TheModule->getContext();
+  bool Changed = false;
+
+  Type *Int8PtrTy = Type::getInt8PtrTy(*Context);
+  Type *Int64PtrTy = Type::getInt64PtrTy(*Context);
+  Type *Int32Ty = Type::getInt32Ty(*Context);
+  Type *VoidTy = Type::getVoidTy(*Context);
+
+  Type *SetjmpParams[] = { Int64PtrTy };
+  FunctionType *SetjmpFunctionType = FunctionType::get(Int32Ty, SetjmpParams,
+                                                       false);
+  Changed |= RewriteLibraryCall(
+      "setjmp",
+      SetjmpFunctionType,
+      &RewritePNaClLibraryCalls::rewriteSetjmpCall,
+      true,
+      &RewritePNaClLibraryCalls::populateSetjmpWrapper);
+
+  Type *LongjmpParams[] = { Int64PtrTy, Int32Ty };
+  FunctionType *LongjmpFunctionType = FunctionType::get(VoidTy, LongjmpParams,
+                                                        false);
+  Changed |= RewriteLibraryCall(
+      "longjmp",
+      LongjmpFunctionType,
+      &RewritePNaClLibraryCalls::rewriteLongjmpCall,
+      false,
+      &RewritePNaClLibraryCalls::populateLongjmpWrapper);
+
+  Type *MemsetParams[] = { Int8PtrTy, Int32Ty, Int32Ty };
+  FunctionType *MemsetFunctionType = FunctionType::get(Int8PtrTy, MemsetParams,
+                                                       false);
+  Changed |= RewriteLibraryCall(
+      "memset",
+      MemsetFunctionType,
+      &RewritePNaClLibraryCalls::rewriteMemsetCall,
+      false,
+      &RewritePNaClLibraryCalls::populateMemsetWrapper);
+
+  Type *MemcpyParams[] = { Int8PtrTy, Int8PtrTy, Int32Ty };
+  FunctionType *MemcpyFunctionType = FunctionType::get(Int8PtrTy, MemcpyParams,
+                                                       false);
+  Changed |= RewriteLibraryCall(
+      "memcpy",
+      MemcpyFunctionType,
+      &RewritePNaClLibraryCalls::rewriteMemcpyCall,
+      false,
+      &RewritePNaClLibraryCalls::populateMemcpyWrapper);
+
+  Type *MemmoveParams[] = { Int8PtrTy, Int8PtrTy, Int32Ty };
+  FunctionType *MemmoveFunctionType = FunctionType::get(Int8PtrTy,
+                                                        MemmoveParams,
+                                                        false);
+  Changed |= RewriteLibraryCall(
+      "memmove",
+      MemmoveFunctionType,
+      &RewritePNaClLibraryCalls::rewriteMemmoveCall,
+      false,
+      &RewritePNaClLibraryCalls::populateMemmoveWrapper);
+
+  return Changed;
+}
+
+bool RewritePNaClLibraryCalls::compatibleFunctionTypes(FunctionType *FTy1,
+                                                       FunctionType *FTy2) {
+  if (FTy1->getNumParams() != FTy2->getNumParams()) {
+    return false;
+  }
+
+  if (!compatibleParamOrRetTypes(FTy1->getReturnType(),
+                                 FTy2->getReturnType())) {
+    return false;
+  }
+
+  for (unsigned I = 0, End = FTy1->getNumParams(); I != End; ++I) {
+    if (!compatibleParamOrRetTypes(FTy1->getParamType(I), 
+                                   FTy2->getParamType(I))) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+bool RewritePNaClLibraryCalls::compatibleParamOrRetTypes(Type *Ty1,
+                                                         Type *Ty2) {
+  return (Ty1 == Ty2 || (Ty1->isPointerTy() && Ty2->isPointerTy()));
+}
+
+void RewritePNaClLibraryCalls::rewriteSetjmpCall(CallInst *Call) {
+  // Find the intrinsic function.
+  Function *NaClSetjmpFunc = findSetjmpIntrinsic();
+  // Cast the jmp_buf argument to the type NaClSetjmpCall expects.
+  Type *PtrTy = NaClSetjmpFunc->getFunctionType()->getParamType(0);
+  BitCastInst *JmpBufCast = CopyDebug(new BitCastInst(Call->getArgOperand(0),
+                                                      PtrTy,
+                                                      "jmp_buf_i8",
+                                                      Call),
+                                      Call);
+
+  // Emit the updated call.
+  Value *Args[] = { JmpBufCast };
+  CallInst *NaClSetjmpCall = CopyDebug(CallInst::Create(NaClSetjmpFunc, Args, "", Call), Call);
+  NaClSetjmpCall->takeName(Call);
+
+  // Replace the original call.
+  Call->replaceAllUsesWith(NaClSetjmpCall);
+  Call->eraseFromParent();
+}
+
+void RewritePNaClLibraryCalls::rewriteLongjmpCall(CallInst *Call) {
+  // Find the intrinsic function.
+  Function *NaClLongjmpFunc = findLongjmpIntrinsic();
+  // Cast the jmp_buf argument to the type NaClLongjmpCall expects.
+  Type *PtrTy = NaClLongjmpFunc->getFunctionType()->getParamType(0);
+  BitCastInst *JmpBufCast = CopyDebug(new BitCastInst(Call->getArgOperand(0),
+                                                      PtrTy,
+                                                      "jmp_buf_i8",
+                                                      Call),
+                                      Call);
+
+  // Emit the call.
+  Value *Args[] = { JmpBufCast, Call->getArgOperand(1) };
+  CopyDebug(CallInst::Create(NaClLongjmpFunc, Args, "", Call), Call);
+  // No takeName here since longjmp is a void call that does not get assigned to
+  // a value.
+
+  // Remove the original call. There's no need for RAUW because longjmp
+  // returns void.
+  Call->eraseFromParent();
+}
+
+void RewritePNaClLibraryCalls::rewriteMemcpyCall(CallInst *Call) {
+  Function *MemcpyIntrinsic = findMemcpyIntrinsic();
+  // dest, src, len, align, isvolatile
+  Value *Args[] = { Call->getArgOperand(0),
+                    Call->getArgOperand(1),
+                    Call->getArgOperand(2),
+                    ConstantInt::get(Type::getInt32Ty(*Context), 1),
+                    ConstantInt::get(Type::getInt1Ty(*Context), 0) };
+  CopyDebug(CallInst::Create(MemcpyIntrinsic,
+                             Args,
+                             "",
+                             Call),
+            Call);
+
+  // libc memcpy returns the source pointer, but the LLVM intrinsic doesn't; if
+  // the return value has actual uses, just replace them with the dest
+  // argument itself.
+  Call->replaceAllUsesWith(Call->getArgOperand(0));
+  Call->eraseFromParent();
+}
+
+void RewritePNaClLibraryCalls::rewriteMemmoveCall(CallInst *Call) {
+  Function *MemmoveIntrinsic = findMemmoveIntrinsic();
+  // dest, src, len, align, isvolatile
+  Value *Args[] = { Call->getArgOperand(0),
+                    Call->getArgOperand(1),
+                    Call->getArgOperand(2),
+                    ConstantInt::get(Type::getInt32Ty(*Context), 1),
+                    ConstantInt::get(Type::getInt1Ty(*Context), 0) };
+  CallInst *MemmoveIntrinsicCall = CopyDebug(CallInst::Create(MemmoveIntrinsic,
+                                                              Args,
+                                                              "",
+                                                              Call),
+                                             Call);
+
+  // libc memmove returns the source pointer, but the LLVM intrinsic doesn't; if
+  // the return value has actual uses, just replace them with the dest
+  // argument itself.
+  Call->replaceAllUsesWith(Call->getArgOperand(0));
+  Call->eraseFromParent();
+}
+
+void RewritePNaClLibraryCalls::rewriteMemsetCall(CallInst *Call) {
+  Function *MemsetIntrinsic = findMemsetIntrinsic();
+  // libc memset has 'int c' for the filler byte, but the LLVM intrinsic uses
+  // a i8; truncation is required.
+  TruncInst *ByteTrunc = CopyDebug(new TruncInst(Call->getArgOperand(1),
+                                                 Type::getInt8Ty(*Context),
+                                                 "trunc_byte",
+                                                 Call),
+                                   Call);
+  
+
+  // dest, val, len, align, isvolatile
+  Value *Args[] = { Call->getArgOperand(0),
+                    ByteTrunc,
+                    Call->getArgOperand(2),
+                    ConstantInt::get(Type::getInt32Ty(*Context), 1),
+                    ConstantInt::get(Type::getInt1Ty(*Context), 0) };
+  CopyDebug(CallInst::Create(MemsetIntrinsic,
+                             Args,
+                             "",
+                             Call),
+            Call);
+
+  // libc memset returns the source pointer, but the LLVM intrinsic doesn't; if
+  // the return value has actual uses, just replace them with the dest
+  // argument itself.
+  Call->replaceAllUsesWith(Call->getArgOperand(0));
+  Call->eraseFromParent();
+}
+
+void RewritePNaClLibraryCalls::populateWrapperCommon(
+      Function *Func,
+      StringRef FuncName,
+      RewriteCallFunc CallRewriter,
+      bool CallCannotReturn,
+      ...) {
+  if (!Func->isDeclaration()) {
+    report_fatal_error(Twine("Expected ") + FuncName +
+                       " to be declared, not defined");
+  }
+
+  // Populate the function body with code.
+  BasicBlock *BB = BasicBlock::Create(*Context, "entry", Func);
+
+  // Collect and name the function arguments.
+  Function::arg_iterator FuncArgs = Func->arg_begin();
+  SmallVector<Value *, 4> Args;
+  va_list ap;
+  va_start(ap, CallCannotReturn);
+  while (true) {
+    // Iterate over the varargs until a terminated NULL is encountered.
+    const char *ArgName = va_arg(ap, const char *);
+    if (!ArgName)
+      break;
+    Value *Arg = FuncArgs++;
+    Arg->setName(ArgName);
+    Args.push_back(Arg);
+  }
+  va_end(ap);
+
+  // Emit a call to self, and then call CallRewriter to rewrite it to the
+  // intrinsic. This is done in order to keep the call rewriting logic in a
+  // single place.
+  CallInst *SelfCall = CallInst::Create(Func, Args, "", BB);
+
+  if (CallCannotReturn) {
+    new UnreachableInst(*Context, BB);
+  } else if (Func->getReturnType()->isVoidTy()) {
+    ReturnInst::Create(*Context, BB);
+  } else {
+    ReturnInst::Create(*Context, SelfCall, BB);
+  }
+
+  (this->*(CallRewriter))(SelfCall);
+}
+
+void RewritePNaClLibraryCalls::populateSetjmpWrapper(Function *SetjmpFunc) {
+  populateWrapperCommon(
+      /* Func             */ SetjmpFunc,
+      /* FuncName         */ "setjmp",
+      /* CallRewriter     */ &RewritePNaClLibraryCalls::rewriteSetjmpCall,
+      /* CallCannotReturn */ false,
+      /* ...              */ "env", NULL);
+}
+
+void RewritePNaClLibraryCalls::populateLongjmpWrapper(Function *LongjmpFunc) {
+  populateWrapperCommon(
+      /* Func             */ LongjmpFunc,
+      /* FuncName         */ "longjmp",
+      /* CallRewriter     */ &RewritePNaClLibraryCalls::rewriteLongjmpCall,
+      /* CallCannotReturn */ true,
+      /* ...              */ "env", "val", NULL);
+}
+
+void RewritePNaClLibraryCalls::populateMemcpyWrapper(Function *MemcpyFunc) {
+  populateWrapperCommon(
+      /* Func             */ MemcpyFunc,
+      /* FuncName         */ "memcpy",
+      /* CallRewriter     */ &RewritePNaClLibraryCalls::rewriteMemcpyCall,
+      /* CallCannotReturn */ false,
+      /* ...              */ "dest", "src", "len", NULL);
+}
+
+void RewritePNaClLibraryCalls::populateMemmoveWrapper(Function *MemmoveFunc) {
+  populateWrapperCommon(
+      /* Func             */ MemmoveFunc,
+      /* FuncName         */ "memmove",
+      /* CallRewriter     */ &RewritePNaClLibraryCalls::rewriteMemmoveCall,
+      /* CallCannotReturn */ false,
+      /* ...              */ "dest", "src", "len", NULL);
+}
+
+void RewritePNaClLibraryCalls::populateMemsetWrapper(Function *MemsetFunc) {
+  populateWrapperCommon(
+      /* Func             */ MemsetFunc,
+      /* FuncName         */ "memset",
+      /* CallRewriter     */ &RewritePNaClLibraryCalls::rewriteMemsetCall,
+      /* CallCannotReturn */ false,
+      /* ...              */ "dest", "val", "len", NULL);
+}
+
+Function *RewritePNaClLibraryCalls::findSetjmpIntrinsic() {
+  if (!SetjmpIntrinsic) {
+    SetjmpIntrinsic = Intrinsic::getDeclaration(
+        TheModule, Intrinsic::nacl_setjmp);
+  }
+  return SetjmpIntrinsic;
+}
+
+Function *RewritePNaClLibraryCalls::findLongjmpIntrinsic() {
+  if (!LongjmpIntrinsic) {
+    LongjmpIntrinsic = Intrinsic::getDeclaration(
+        TheModule, Intrinsic::nacl_longjmp);
+  }
+  return LongjmpIntrinsic;
+}
+
+Function *RewritePNaClLibraryCalls::findMemcpyIntrinsic() {
+  if (!MemcpyIntrinsic) {
+    Type *Tys[] = { Type::getInt8PtrTy(*Context),
+                    Type::getInt8PtrTy(*Context),
+                    Type::getInt32Ty(*Context) };
+    MemcpyIntrinsic = Intrinsic::getDeclaration(
+        TheModule, Intrinsic::memcpy, Tys);
+  }
+  return MemcpyIntrinsic;
+}
+
+Function *RewritePNaClLibraryCalls::findMemmoveIntrinsic() {
+  if (!MemmoveIntrinsic) {
+    Type *Tys[] = { Type::getInt8PtrTy(*Context),
+                    Type::getInt8PtrTy(*Context),
+                    Type::getInt32Ty(*Context) };
+    MemmoveIntrinsic = Intrinsic::getDeclaration(
+        TheModule, Intrinsic::memmove, Tys);
+  }
+  return MemmoveIntrinsic;
+}
+
+Function *RewritePNaClLibraryCalls::findMemsetIntrinsic() {
+  if (!MemsetIntrinsic) {
+    Type *Tys[] = { Type::getInt8PtrTy(*Context), Type::getInt32Ty(*Context) };
+    MemsetIntrinsic = Intrinsic::getDeclaration(
+        TheModule, Intrinsic::memset, Tys);
+  }
+  return MemsetIntrinsic;
+}
+
+ModulePass *llvm::createRewritePNaClLibraryCallsPass() {
+  return new RewritePNaClLibraryCalls();
+}
diff --git a/lib/Transforms/NaCl/StripAttributes.cpp b/lib/Transforms/NaCl/StripAttributes.cpp
new file mode 100644
index 000000000000..16a42b258aa9
--- /dev/null
+++ b/lib/Transforms/NaCl/StripAttributes.cpp
@@ -0,0 +1,243 @@
+//===- StripAttributes.cpp - Remove attributes not supported by PNaCl------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass strips out attributes that are not supported by PNaCl's
+// stable ABI.  Currently, this strips out:
+//
+//  * Function and argument attributes from functions and function
+//    calls.
+//  * Calling conventions from functions and function calls.
+//  * The "align" attribute on functions.
+//  * The alignment argument of memcpy/memmove/memset intrinsic calls.
+//  * The "unnamed_addr" attribute on functions and global variables.
+//  * The distinction between "internal" and "private" linkage.
+//  * "protected" and "internal" visibility of functions and globals.
+//  * The arithmetic attributes "nsw", "nuw" and "exact".
+//  * It reduces the set of possible "align" attributes on memory
+//    accesses.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Operator.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CallSite.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  // This is a ModulePass so that it can modify attributes of global
+  // variables.
+  class StripAttributes : public ModulePass {
+  public:
+    static char ID; // Pass identification, replacement for typeid
+    StripAttributes() : ModulePass(ID) {
+      initializeStripAttributesPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+  };
+}
+
+char StripAttributes::ID = 0;
+INITIALIZE_PASS(StripAttributes, "nacl-strip-attributes",
+                "Strip out attributes that are not part of PNaCl's ABI",
+                false, false)
+
+// Most attributes are just hints which can safely be removed.  A few
+// attributes can break programs if removed, so check all attributes
+// before removing them, in case LLVM adds new attributes.
+static void CheckAttributes(AttributeSet Attrs) {
+  for (unsigned Slot = 0; Slot < Attrs.getNumSlots(); ++Slot) {
+    for (AttributeSet::iterator Attr = Attrs.begin(Slot), E = Attrs.end(Slot);
+         Attr != E; ++Attr) {
+      if (!Attr->isEnumAttribute()) {
+        continue;
+      }
+      switch (Attr->getKindAsEnum()) {
+        // The following attributes can affect calling conventions.
+        // Rather than complaining, we just strip these out.
+        // ExpandSmallArguments should have rendered SExt/ZExt
+        // meaningless since the function arguments will be at least
+        // 32-bit.
+        case Attribute::InReg:
+        case Attribute::SExt:
+        case Attribute::ZExt:
+        // These attributes influence ABI decisions that should not be
+        // visible to PNaCl pexes.
+        case Attribute::NonLazyBind:  // Only relevant to dynamic linking.
+        case Attribute::NoRedZone:
+        case Attribute::StackAlignment:
+
+        // The following attributes are just hints, which can be
+        // safely removed.
+        case Attribute::AlwaysInline:
+        case Attribute::InlineHint:
+        case Attribute::MinSize:
+        case Attribute::NoAlias:
+        case Attribute::NoBuiltin:
+        case Attribute::NoCapture:
+        case Attribute::NoDuplicate:
+        case Attribute::NoImplicitFloat:
+        case Attribute::NoInline:
+        case Attribute::NoReturn:
+        case Attribute::OptimizeForSize:
+        case Attribute::ReadNone:
+        case Attribute::ReadOnly:
+        case Attribute::Cold:
+
+        // PNaCl does not support -fstack-protector in the translator.
+        case Attribute::StackProtect:
+        case Attribute::StackProtectReq:
+        case Attribute::StackProtectStrong:
+        // PNaCl does not support ASan in the translator.
+        case Attribute::SanitizeAddress:
+        case Attribute::SanitizeThread:
+        case Attribute::SanitizeMemory:
+
+        // The Language References cites setjmp() as an example of a
+        // function which returns twice, and says ReturnsTwice is
+        // necessary to disable optimizations such as tail calls.
+        // However, in the PNaCl ABI, setjmp() is an intrinsic, and
+        // user-defined functions are not allowed to return twice.
+        case Attribute::ReturnsTwice:
+
+        // NoUnwind is not a hint if it causes unwind info to be
+        // omitted, since this will prevent C++ exceptions from
+        // propagating.  In the future, when PNaCl supports zero-cost
+        // C++ exception handling using unwind info, we might allow
+        // NoUnwind and UWTable.  Alternatively, we might continue to
+        // disallow them, and just generate unwind info for all
+        // functions.
+        case Attribute::NoUnwind:
+        case Attribute::UWTable:
+          break;
+
+        case Attribute::ByVal:
+        case Attribute::StructRet:
+        case Attribute::Alignment:
+          Attrs.dump();
+          report_fatal_error(
+              "Attribute should already have been removed by ExpandByVal");
+
+        case Attribute::Naked:
+        case Attribute::Nest:
+          Attrs.dump();
+          report_fatal_error("Unsupported attribute");
+
+        default:
+          Attrs.dump();
+          report_fatal_error("Unrecognized attribute");
+      }
+    }
+  }
+}
+
+void stripGlobalValueAttrs(GlobalValue *GV) {
+  // In case source code uses __attribute__((visibility("hidden"))) or
+  // __attribute__((visibility("protected"))), strip these attributes.
+  GV->setVisibility(GlobalValue::DefaultVisibility);
+
+  GV->setUnnamedAddr(false);
+
+  // Convert "private" linkage to "internal" to reduce the number of
+  // linkage types that need to be represented in PNaCl's wire format.
+  //
+  // We convert "private" to "internal" rather than vice versa because
+  // "private" symbols are omitted from the nexe's symbol table, which
+  // would get in the way of debugging when an unstripped pexe is
+  // translated offline.
+  if (GV->getLinkage() == GlobalValue::PrivateLinkage)
+    GV->setLinkage(GlobalValue::InternalLinkage);
+}
+
+static unsigned normalizeAlignment(DataLayout *DL, unsigned Alignment,
+                                   Type *Ty, bool IsAtomic) {
+  unsigned MaxAllowed = 1;
+  if (Ty->isDoubleTy() || Ty->isFloatTy() || IsAtomic)
+    MaxAllowed = DL->getTypeAllocSize(Ty);
+  // If the alignment is set to 0, this means "use the default
+  // alignment for the target", which we fill in explicitly.
+  if (Alignment == 0 || Alignment >= MaxAllowed)
+    return MaxAllowed;
+  return 1;
+}
+
+void stripFunctionAttrs(DataLayout *DL, Function *Func) {
+  CheckAttributes(Func->getAttributes());
+  Func->setAttributes(AttributeSet());
+  Func->setCallingConv(CallingConv::C);
+  Func->setAlignment(0);
+
+  for (Function::iterator BB = Func->begin(), E = Func->end();
+       BB != E; ++BB) {
+    for (BasicBlock::iterator Inst = BB->begin(), E = BB->end();
+         Inst != E; ++Inst) {
+      CallSite Call(Inst);
+      if (Call) {
+        CheckAttributes(Call.getAttributes());
+        Call.setAttributes(AttributeSet());
+        Call.setCallingConv(CallingConv::C);
+
+        // Set memcpy(), memmove() and memset() to use pessimistic
+        // alignment assumptions.
+        if (MemIntrinsic *MemOp = dyn_cast<MemIntrinsic>(Inst)) {
+          Type *AlignTy = MemOp->getAlignmentCst()->getType();
+          MemOp->setAlignment(ConstantInt::get(AlignTy, 1));
+        }
+      } else if (OverflowingBinaryOperator *Op =
+                     dyn_cast<OverflowingBinaryOperator>(Inst)) {
+        cast<BinaryOperator>(Op)->setHasNoUnsignedWrap(false);
+        cast<BinaryOperator>(Op)->setHasNoSignedWrap(false);
+      } else if (PossiblyExactOperator *Op =
+                     dyn_cast<PossiblyExactOperator>(Inst)) {
+        cast<BinaryOperator>(Op)->setIsExact(false);
+      } else if (LoadInst *Load = dyn_cast<LoadInst>(Inst)) {
+        Load->setAlignment(normalizeAlignment(
+                               DL, Load->getAlignment(),
+                               Load->getType(),
+                               Load->isAtomic()));
+      } else if (StoreInst *Store = dyn_cast<StoreInst>(Inst)) {
+        Store->setAlignment(normalizeAlignment(
+                                DL, Store->getAlignment(),
+                                Store->getValueOperand()->getType(),
+                                Store->isAtomic()));
+      }
+    }
+  }
+}
+
+bool StripAttributes::runOnModule(Module &M) {
+  DataLayout DL(&M);
+  for (Module::iterator Func = M.begin(), E = M.end(); Func != E; ++Func) {
+    // Avoid stripping attributes from intrinsics because the
+    // constructor for Functions just adds them back again.  It would
+    // be confusing if the attributes were sometimes present on
+    // intrinsics and sometimes not.
+    // I'm disabling this check for the time being, until I have Rust building pexes directly
+    // instead, that is, of just passing translated IR to pnacl-clang.
+    if (true || !Func->isIntrinsic()) {
+      stripGlobalValueAttrs(Func);
+      stripFunctionAttrs(&DL, Func);
+    }
+  }
+  for (Module::global_iterator GV = M.global_begin(), E = M.global_end();
+       GV != E; ++GV) {
+    stripGlobalValueAttrs(GV);
+  }
+  return true;
+}
+
+ModulePass *llvm::createStripAttributesPass() {
+  return new StripAttributes();
+}
diff --git a/lib/Transforms/NaCl/StripMetadata.cpp b/lib/Transforms/NaCl/StripMetadata.cpp
new file mode 100644
index 000000000000..41052e82845c
--- /dev/null
+++ b/lib/Transforms/NaCl/StripMetadata.cpp
@@ -0,0 +1,97 @@
+//===- StripMetadata.cpp - Strip non-stable non-debug metadata       ------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The StripMetadata transformation strips instruction attachment
+// metadata, such as !tbaa and !prof metadata.
+// TODO: Strip NamedMetadata too.
+//
+// It does not strip debug metadata.  Debug metadata is used by debug
+// intrinsic functions and calls to those intrinsic functions.  Use the
+// -strip-debug or -strip pass to strip that instead.
+//
+// The goal of this pass is to reduce bitcode ABI surface area.
+// We don't know yet which kind of metadata is considered stable.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Transforms/NaCl.h"
+
+using namespace llvm;
+
+namespace {
+  class StripMetadata : public ModulePass {
+  public:
+    static char ID;
+    explicit StripMetadata() : ModulePass(ID) {
+      initializeStripMetadataPass(*PassRegistry::getPassRegistry());
+    }
+
+    virtual bool runOnModule(Module &M);
+
+    virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+      AU.setPreservesCFG();
+    }
+  };
+}
+
+char StripMetadata::ID = 0;
+INITIALIZE_PASS(StripMetadata, "strip-metadata",
+                "Strip all non-stable non-debug metadata from a module.",
+                false, false)
+
+ModulePass *llvm::createStripMetadataPass() {
+  return new StripMetadata();
+}
+
+static bool IsWhitelistedMetadata(const NamedMDNode *node) {
+  // Leave debug metadata to the -strip-debug pass.
+  return node->getName().startswith("llvm.dbg.");
+}
+
+static bool DoStripMetadata(Module &M) {
+  bool Changed = false;
+
+  for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) {
+    for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; ++FI) {
+      for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
+           ++BI) {
+        if(!BI->hasMetadataOtherThanDebugLoc())
+          continue;  // Nothing to do.
+
+        SmallVector<std::pair<unsigned, MDNode *>, 8> InstMeta;
+        // Let the debug metadata be stripped by the -strip-debug pass.
+        BI->getAllMetadata(InstMeta);
+        for (size_t i = 0; i < InstMeta.size(); ++i) {
+          if(InstMeta[i].first == LLVMContext::MD_dbg)
+            continue;
+          BI->setMetadata(InstMeta[i].first, NULL);
+          Changed = true;
+        }
+      }
+    }
+  }
+
+  // Strip unsupported named metadata.
+  SmallVector<NamedMDNode*, 8> ToErase;
+  for (Module::NamedMDListType::iterator I = M.named_metadata_begin(),
+           E = M.named_metadata_end(); I != E; ++I) {
+    if (!IsWhitelistedMetadata(I))
+      ToErase.push_back(I);
+  }
+  for (size_t i = 0; i < ToErase.size(); ++i)
+    M.eraseNamedMetadata(ToErase[i]);
+
+  return Changed;
+}
+
+bool StripMetadata::runOnModule(Module &M) {
+  return DoStripMetadata(M);
+}
diff --git a/test/NaCl/Bitcode/alloca-operand.ll b/test/NaCl/Bitcode/alloca-operand.ll
new file mode 100644
index 000000000000..fcabead6c7a7
--- /dev/null
+++ b/test/NaCl/Bitcode/alloca-operand.ll
@@ -0,0 +1,29 @@
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-bcanalyzer -dump-records \
+; RUN:              | FileCheck %s
+
+; Test that alloca's size operand is represented with a relative value
+; ID, the same as other instructions' operands.
+
+define external void @_start(i32 %arg) {
+; CHECK: <FUNCTION_BLOCK
+; CHECK: </CONSTANTS_BLOCK>
+
+  %size = mul i32 %arg, 4
+; CHECK-NEXT: <INST_BINOP
+  alloca i8, i32 %size
+; CHECK-NEXT: <INST_ALLOCA op0=1
+
+  ; Since the operand reference is a relative ID, references to %size
+  ; go up by 1 with each instruction.
+  alloca i8, i32 %size
+; CHECK-NEXT: <INST_ALLOCA op0=2
+  alloca i8, i32 %size
+; CHECK-NEXT: <INST_ALLOCA op0=3
+
+  ; Reference to a Constant operand.
+  alloca i8, i32 256
+; CHECK-NEXT: <INST_ALLOCA op0=5
+
+  ret void
+; CHECK-NEXT: <INST_RET/>
+}
diff --git a/test/NaCl/Bitcode/bcanalyzer-width.ll b/test/NaCl/Bitcode/bcanalyzer-width.ll
new file mode 100644
index 000000000000..e9c7c289cc19
--- /dev/null
+++ b/test/NaCl/Bitcode/bcanalyzer-width.ll
@@ -0,0 +1,23 @@
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-bcanalyzer -dump-records \
+; RUN:              | FileCheck %s -check-prefix=BC
+; RUN: llvm-as < %s | pnacl-freeze \
+; RUN:              | pnacl-bcanalyzer -dump-records -operands-per-line=2 \
+; RUN:              | FileCheck %s -check-prefix=BC2
+; RUN: llvm-as < %s | pnacl-freeze \
+; RUN:              | pnacl-bcanalyzer -dump-records -operands-per-line=8 \
+; RUN:              | FileCheck %s -check-prefix=BC8
+
+; Test that the command-line option -operands-per-line works as expected.
+
+@bytes = internal global [10 x i8] c"abcdefghij"
+
+; BC: <DATA op0=97 op1=98 op2=99 op3=100 op4=101 op5=102 op6=103 op7=104 op8=105 op9=106/>
+
+; BC2: <DATA op0=97 op1=98
+; BC2:       op2=99 op3=100
+; BC2:       op4=101 op5=102
+; BC2:       op6=103 op7=104
+; BC2:       op8=105 op9=106/>
+
+; BC8: <DATA op0=97 op1=98 op2=99 op3=100 op4=101 op5=102 op6=103 op7=104
+; BC8:       op8=105 op9=106/>
diff --git a/test/NaCl/Bitcode/bccompress.ll b/test/NaCl/Bitcode/bccompress.ll
new file mode 100644
index 000000000000..8f7f92b229a3
--- /dev/null
+++ b/test/NaCl/Bitcode/bccompress.ll
@@ -0,0 +1,1029 @@
+; Simple test to see if pnacl-bccompress maintains bitcode.
+
+; Test 1: Show that we generate the same disassembled code.
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-bccompress \
+; RUN:              | pnacl-thaw | llvm-dis - | FileCheck %s
+; ModuleID = 'bccompress.thaw'
+
+; Test 2: Show that both the precompressed, and the compressed versions
+; of the bitcode contain the same records.
+; RUN: llvm-as < %s | pnacl-freeze \
+; RUN:              | pnacl-bcanalyzer -operands-per-line=6 -dump-records \
+; RUN:              | FileCheck %s --check-prefix DUMP
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-bccompress \
+; RUN:              | pnacl-bcanalyzer -operands-per-line=6 -dump-records \
+; RUN:              | FileCheck %s --check-prefix DUMP
+
+@bytes7 = internal global [7 x i8] c"abcdefg"
+; CHECK: @bytes7 = internal global [7 x i8] c"abcdefg"
+
+@ptr_to_ptr = internal global i32 ptrtoint (i32* @ptr to i32)
+; CHECK-NEXT: @ptr_to_ptr = internal global i32 ptrtoint (i32* @ptr to i32)
+
+@ptr_to_func = internal global i32 ptrtoint (void ()* @func to i32)
+; CHECK-NEXT: @ptr_to_func = internal global i32 ptrtoint (void ()* @func to i32)
+
+@compound = internal global <{ [3 x i8], i32 }> <{ [3 x i8] c"foo", i32 ptrtoint (void ()* @func to i32) }>
+; CHECK-NEXT: @compound = internal global <{ [3 x i8], i32 }> <{ [3 x i8] c"foo", i32 ptrtoint (void ()* @func to i32) }>
+
+@ptr = internal global i32 ptrtoint ([7 x i8]* @bytes7 to i32)
+; CHECK-NEXT: @ptr = internal global i32 ptrtoint ([7 x i8]* @bytes7 to i32)
+
+@addend_ptr = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 1)
+; CHECK-NEXT: @addend_ptr = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 1)
+
+@addend_negative = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 -1)
+; CHECK-NEXT: @addend_negative = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 -1)
+
+@addend_array1 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes7 to i32), i32 1)
+; CHECK-NEXT: @addend_array1 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes7 to i32), i32 1)
+
+@addend_array2 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes7 to i32), i32 7)
+; CHECK-NEXT: @addend_array2 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes7 to i32), i32 7)
+
+@addend_array3 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes7 to i32), i32 9)
+; CHECK-NEXT: @addend_array3 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes7 to i32), i32 9)
+
+@addend_struct1 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 1)
+; CHECK-NEXT: @addend_struct1 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 1)
+
+@addend_struct2 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 4)
+; CHECK-NEXT: @addend_struct2 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 4)
+
+@ptr_to_func_align = internal global i32 ptrtoint (void ()* @func to i32), align 8
+; CHECK-NEXT: @ptr_to_func_align = internal global i32 ptrtoint (void ()* @func to i32), align 8
+
+@char = internal constant [1 x i8] c"0"
+; CHECK-NEXT: @char = internal constant [1 x i8] c"0"
+
+@short = internal constant [2 x i8] zeroinitializer
+; CHECK-NEXT: @short = internal constant [2 x i8] zeroinitializer
+
+@bytes = internal global [4 x i8] c"abcd"
+; CHECK-NEXT: @bytes = internal global [4 x i8] c"abcd"
+
+declare i32 @bar(i32)
+; CHECK-LABEL: declare i32 @bar(i32)
+
+define void @func() {
+  ret void
+}
+
+; CHECK-LABEL:      define void @func() {
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @AllocCastSimple() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint i8* %1 to i32
+  %3 = bitcast [4 x i8]* @bytes to i32*
+  store i32 %2, i32* %3, align 1
+  ret void
+}
+
+; CHECK-LABEL:      define void @AllocCastSimple() {
+; CHECK-NEXT:   %1 = alloca i8, i32 4, align 8
+; CHECK-NEXT:   %2 = ptrtoint i8* %1 to i32
+; CHECK-NEXT:   %3 = bitcast [4 x i8]* @bytes to i32*
+; CHECK-NEXT:   store i32 %2, i32* %3, align 1
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @AllocCastSimpleReversed() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint i8* %1 to i32
+  %3 = bitcast [4 x i8]* @bytes to i32*
+  store i32 %2, i32* %3, align 1
+  ret void
+}
+
+; CHECK-LABEL:      define void @AllocCastSimpleReversed() {
+; CHECK-NEXT:   %1 = alloca i8, i32 4, align 8
+; CHECK-NEXT:   %2 = ptrtoint i8* %1 to i32
+; CHECK-NEXT:   %3 = bitcast [4 x i8]* @bytes to i32*
+; CHECK-NEXT:   store i32 %2, i32* %3, align 1
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @AllocCastDelete() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = alloca i8, i32 4, align 8
+  ret void
+}
+
+; CHECK-LABEL:      define void @AllocCastDelete() {
+; CHECK-NEXT:   %1 = alloca i8, i32 4, align 8
+; CHECK-NEXT:   %2 = alloca i8, i32 4, align 8
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @AllocCastOpt() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint i8* %1 to i32
+  %3 = bitcast [4 x i8]* @bytes to i32*
+  store i32 %2, i32* %3, align 1
+  store i32 %2, i32* %3, align 1
+  ret void
+}
+
+; CHECK-LABEL:      define void @AllocCastOpt() {
+; CHECK-NEXT:   %1 = alloca i8, i32 4, align 8
+; CHECK-NEXT:   %2 = ptrtoint i8* %1 to i32
+; CHECK-NEXT:   %3 = bitcast [4 x i8]* @bytes to i32*
+; CHECK-NEXT:   store i32 %2, i32* %3, align 1
+; CHECK-NEXT:   store i32 %2, i32* %3, align 1
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @AllocBitcast(i32) {
+  %2 = alloca i8, i32 4, align 8
+  %3 = add i32 %0, 1
+  %4 = ptrtoint i8* %2 to i32
+  %5 = bitcast [4 x i8]* @bytes to i32*
+  store i32 %4, i32* %5, align 1
+  ret void
+}
+
+; CHECK-LABEL:      define void @AllocBitcast(i32) {
+; CHECK-NEXT:   %2 = alloca i8, i32 4, align 8
+; CHECK-NEXT:   %3 = add i32 %0, 1
+; CHECK-NEXT:   %4 = ptrtoint i8* %2 to i32
+; CHECK-NEXT:   %5 = bitcast [4 x i8]* @bytes to i32*
+; CHECK-NEXT:   store i32 %4, i32* %5, align 1
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @StoreGlobal() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint [4 x i8]* @bytes to i32
+  %3 = bitcast i8* %1 to i32*
+  store i32 %2, i32* %3, align 1
+  ret void
+}
+
+; CHECK-LABEL:      define void @StoreGlobal() {
+; CHECK-NEXT:   %1 = alloca i8, i32 4, align 8
+; CHECK-NEXT:   %2 = ptrtoint [4 x i8]* @bytes to i32
+; CHECK-NEXT:   %3 = bitcast i8* %1 to i32*
+; CHECK-NEXT:   store i32 %2, i32* %3, align 1
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @StoreGlobalCastsReversed() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint [4 x i8]* @bytes to i32
+  %3 = bitcast i8* %1 to i32*
+  store i32 %2, i32* %3, align 1
+  ret void
+}
+
+; CHECK-LABEL:      define void @StoreGlobalCastsReversed() {
+; CHECK-NEXT:   %1 = alloca i8, i32 4, align 8
+; CHECK-NEXT:   %2 = ptrtoint [4 x i8]* @bytes to i32
+; CHECK-NEXT:   %3 = bitcast i8* %1 to i32*
+; CHECK-NEXT:   store i32 %2, i32* %3, align 1
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define i32 @StoreGlobalCastPtr2Int() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint [4 x i8]* @bytes to i32
+  %3 = bitcast i8* %1 to i32*
+  store i32 %2, i32* %3, align 1
+  ret i32 0
+}
+
+; CHECK-LABEL:      define i32 @StoreGlobalCastPtr2Int() {
+; CHECK-NEXT:   %1 = alloca i8, i32 4, align 8
+; CHECK-NEXT:   %2 = ptrtoint [4 x i8]* @bytes to i32
+; CHECK-NEXT:   %3 = bitcast i8* %1 to i32*
+; CHECK-NEXT:   store i32 %2, i32* %3, align 1
+; CHECK-NEXT:   ret i32 0
+; CHECK-NEXT: }
+
+define void @CastAddAlloca() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = add i32 1, 2
+  %3 = ptrtoint i8* %1 to i32
+  %4 = add i32 %3, 2
+  %5 = add i32 1, %3
+  %6 = add i32 %3, %3
+  ret void
+}
+
+; CHECK-LABEL:      define void @CastAddAlloca() {
+; CHECK-NEXT:   %1 = alloca i8, i32 4, align 8
+; CHECK-NEXT:   %2 = add i32 1, 2
+; CHECK-NEXT:   %3 = ptrtoint i8* %1 to i32
+; CHECK-NEXT:   %4 = add i32 %3, 2
+; CHECK-NEXT:   %5 = add i32 1, %3
+; CHECK-NEXT:   %6 = add i32 %3, %3
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @CastAddGlobal() {
+  %1 = add i32 1, 2
+  %2 = ptrtoint [4 x i8]* @bytes to i32
+  %3 = add i32 %2, 2
+  %4 = add i32 1, %2
+  %5 = add i32 %2, %2
+  ret void
+}
+
+; CHECK-LABEL:      define void @CastAddGlobal() {
+; CHECK-NEXT:   %1 = add i32 1, 2
+; CHECK-NEXT:   %2 = ptrtoint [4 x i8]* @bytes to i32
+; CHECK-NEXT:   %3 = add i32 %2, 2
+; CHECK-NEXT:   %4 = add i32 1, %2
+; CHECK-NEXT:   %5 = add i32 %2, %2
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @CastBinop() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint i8* %1 to i32
+  %3 = ptrtoint [4 x i8]* @bytes to i32
+  %4 = sub i32 %2, %3
+  %5 = mul i32 %2, %3
+  %6 = udiv i32 %2, %3
+  %7 = urem i32 %2, %3
+  %8 = srem i32 %2, %3
+  %9 = shl i32 %2, %3
+  %10 = lshr i32 %2, %3
+  %11 = ashr i32 %2, %3
+  %12 = and i32 %2, %3
+  %13 = or i32 %2, %3
+  %14 = xor i32 %2, %3
+  ret void
+}
+
+; CHECK-LABEL:      define void @CastBinop() {
+; CHECK-NEXT:   %1 = alloca i8, i32 4, align 8
+; CHECK-NEXT:   %2 = ptrtoint i8* %1 to i32
+; CHECK-NEXT:   %3 = ptrtoint [4 x i8]* @bytes to i32
+; CHECK-NEXT:   %4 = sub i32 %2, %3
+; CHECK-NEXT:   %5 = mul i32 %2, %3
+; CHECK-NEXT:   %6 = udiv i32 %2, %3
+; CHECK-NEXT:   %7 = urem i32 %2, %3
+; CHECK-NEXT:   %8 = srem i32 %2, %3
+; CHECK-NEXT:   %9 = shl i32 %2, %3
+; CHECK-NEXT:   %10 = lshr i32 %2, %3
+; CHECK-NEXT:   %11 = ashr i32 %2, %3
+; CHECK-NEXT:   %12 = and i32 %2, %3
+; CHECK-NEXT:   %13 = or i32 %2, %3
+; CHECK-NEXT:   %14 = xor i32 %2, %3
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @TestSavedPtrToInt() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint i8* %1 to i32
+  %3 = add i32 %2, 0
+  %4 = call i32 @bar(i32 %2)
+  ret void
+}
+
+; CHECK-LABEL:      define void @TestSavedPtrToInt() {
+; CHECK-NEXT:   %1 = alloca i8, i32 4, align 8
+; CHECK-NEXT:   %2 = ptrtoint i8* %1 to i32
+; CHECK-NEXT:   %3 = add i32 %2, 0
+; CHECK-NEXT:   %4 = call i32 @bar(i32 %2)
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @CastSelect() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = select i1 true, i32 1, i32 2
+  %3 = ptrtoint i8* %1 to i32
+  %4 = select i1 true, i32 %3, i32 2
+  %5 = ptrtoint [4 x i8]* @bytes to i32
+  %6 = select i1 true, i32 1, i32 %5
+  %7 = select i1 true, i32 %3, i32 %5
+  %8 = select i1 true, i32 %5, i32 %3
+  ret void
+}
+
+; CHECK-LABEL:      define void @CastSelect() {
+; CHECK-NEXT:   %1 = alloca i8, i32 4, align 8
+; CHECK-NEXT:   %2 = select i1 true, i32 1, i32 2
+; CHECK-NEXT:   %3 = ptrtoint i8* %1 to i32
+; CHECK-NEXT:   %4 = select i1 true, i32 %3, i32 2
+; CHECK-NEXT:   %5 = ptrtoint [4 x i8]* @bytes to i32
+; CHECK-NEXT:   %6 = select i1 true, i32 1, i32 %5
+; CHECK-NEXT:   %7 = select i1 true, i32 %3, i32 %5
+; CHECK-NEXT:   %8 = select i1 true, i32 %5, i32 %3
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @PhiBackwardRefs(i1) {
+  %2 = alloca i8, i32 4, align 8
+  %3 = alloca i8, i32 4, align 8
+  br i1 %0, label %true, label %false
+
+true:                                             ; preds = %1
+  %4 = bitcast i8* %2 to i32*
+  %5 = load i32* %4
+  %6 = ptrtoint i8* %3 to i32
+  br label %merge
+
+false:                                            ; preds = %1
+  %7 = bitcast i8* %2 to i32*
+  %8 = load i32* %7
+  %9 = ptrtoint i8* %3 to i32
+  br label %merge
+
+merge:                                            ; preds = %false, %true
+  %10 = phi i32 [ %6, %true ], [ %9, %false ]
+  %11 = phi i32 [ %5, %true ], [ %8, %false ]
+  ret void
+}
+
+; CHECK-LABEL:      define void @PhiBackwardRefs(i1) {
+; CHECK-NEXT:   %2 = alloca i8, i32 4, align 8
+; CHECK-NEXT:   %3 = alloca i8, i32 4, align 8
+; CHECK-NEXT:   br i1 %0, label %true, label %false
+; CHECK-LABEL:      true:                                             ; preds = %1
+; CHECK-NEXT:   %4 = bitcast i8* %2 to i32*
+; CHECK-NEXT:   %5 = load i32* %4
+; CHECK-NEXT:   %6 = ptrtoint i8* %3 to i32
+; CHECK-NEXT:   br label %merge
+; CHECK-LABEL:      false:                                            ; preds = %1
+; CHECK-NEXT:   %7 = bitcast i8* %2 to i32*
+; CHECK-NEXT:   %8 = load i32* %7
+; CHECK-NEXT:   %9 = ptrtoint i8* %3 to i32
+; CHECK-NEXT:   br label %merge
+; CHECK-LABEL:      merge:                                            ; preds = %false, %true
+; CHECK-NEXT:   %10 = phi i32 [ %6, %true ], [ %9, %false ]
+; CHECK-NEXT:   %11 = phi i32 [ %5, %true ], [ %8, %false ]
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @PhiForwardRefs(i1) {
+  br label %start
+
+merge:                                            ; preds = %false, %true
+  %2 = phi i32 [ %11, %true ], [ %11, %false ]
+  %3 = phi i32 [ %5, %true ], [ %7, %false ]
+  ret void
+
+true:                                             ; preds = %start
+  %4 = inttoptr i32 %9 to i32*
+  %5 = load i32* %4
+  br label %merge
+
+false:                                            ; preds = %start
+  %6 = inttoptr i32 %9 to i32*
+  %7 = load i32* %6
+  br label %merge
+
+start:                                            ; preds = %1
+  %8 = alloca i8, i32 4, align 8
+  %9 = ptrtoint i8* %8 to i32
+  %10 = alloca i8, i32 4, align 8
+  %11 = ptrtoint i8* %10 to i32
+  br i1 %0, label %true, label %false
+}
+
+; CHECK-LABEL:      define void @PhiForwardRefs(i1) {
+; CHECK-NEXT:   br label %start
+; CHECK-LABEL:      merge:                                            ; preds = %false, %true
+; CHECK-NEXT:   %2 = phi i32 [ %11, %true ], [ %11, %false ]
+; CHECK-NEXT:   %3 = phi i32 [ %5, %true ], [ %7, %false ]
+; CHECK-NEXT:   ret void
+; CHECK-LABEL:      true:                                             ; preds = %start
+; CHECK-NEXT:   %4 = inttoptr i32 %9 to i32*
+; CHECK-NEXT:   %5 = load i32* %4
+; CHECK-NEXT:   br label %merge
+; CHECK-LABEL:      false:                                            ; preds = %start
+; CHECK-NEXT:   %6 = inttoptr i32 %9 to i32*
+; CHECK-NEXT:   %7 = load i32* %6
+; CHECK-NEXT:   br label %merge
+; CHECK-LABEL:      start:                                            ; preds = %1
+; CHECK-NEXT:   %8 = alloca i8, i32 4, align 8
+; CHECK-NEXT:   %9 = ptrtoint i8* %8 to i32
+; CHECK-NEXT:   %10 = alloca i8, i32 4, align 8
+; CHECK-NEXT:   %11 = ptrtoint i8* %10 to i32
+; CHECK-NEXT:   br i1 %0, label %true, label %false
+; CHECK-NEXT: }
+
+define void @PhiMergeCast(i1) {
+  %2 = alloca i8, i32 4, align 8
+  %3 = alloca i8, i32 4, align 8
+  br i1 %0, label %true, label %false
+
+true:                                             ; preds = %1
+  %4 = bitcast i8* %2 to i32*
+  %5 = load i32* %4
+  %6 = ptrtoint i8* %3 to i32
+  %7 = add i32 %5, %6
+  br label %merge
+
+false:                                            ; preds = %1
+  %8 = bitcast i8* %2 to i32*
+  %9 = load i32* %8
+  %10 = ptrtoint i8* %3 to i32
+  br label %merge
+
+merge:                                            ; preds = %false, %true
+  %11 = phi i32 [ %6, %true ], [ %10, %false ]
+  %12 = phi i32 [ %5, %true ], [ %9, %false ]
+  ret void
+}
+
+; CHECK-LABEL:      define void @PhiMergeCast(i1) {
+; CHECK-NEXT:   %2 = alloca i8, i32 4, align 8
+; CHECK-NEXT:   %3 = alloca i8, i32 4, align 8
+; CHECK-NEXT:   br i1 %0, label %true, label %false
+; CHECK-LABEL:      true:                                             ; preds = %1
+; CHECK-NEXT:   %4 = bitcast i8* %2 to i32*
+; CHECK-NEXT:   %5 = load i32* %4
+; CHECK-NEXT:   %6 = ptrtoint i8* %3 to i32
+; CHECK-NEXT:   %7 = add i32 %5, %6
+; CHECK-NEXT:   br label %merge
+; CHECK-LABEL:      false:                                            ; preds = %1
+; CHECK-NEXT:   %8 = bitcast i8* %2 to i32*
+; CHECK-NEXT:   %9 = load i32* %8
+; CHECK-NEXT:   %10 = ptrtoint i8* %3 to i32
+; CHECK-NEXT:   br label %merge
+; CHECK-LABEL:      merge:                                            ; preds = %false, %true
+; CHECK-NEXT:   %11 = phi i32 [ %6, %true ], [ %10, %false ]
+; CHECK-NEXT:   %12 = phi i32 [ %5, %true ], [ %9, %false ]
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @LongReachingCasts(i1) {
+  %2 = alloca i8, i32 4, align 8
+  br i1 %0, label %Split1, label %Split2
+
+Split1:                                           ; preds = %1
+  br i1 %0, label %b1, label %b2
+
+Split2:                                           ; preds = %1
+  br i1 %0, label %b3, label %b4
+
+b1:                                               ; preds = %Split1
+  %3 = ptrtoint i8* %2 to i32
+  %4 = bitcast [4 x i8]* @bytes to i32*
+  store i32 %3, i32* %4, align 1
+  store i32 %3, i32* %4, align 1
+  ret void
+
+b2:                                               ; preds = %Split1
+  %5 = ptrtoint i8* %2 to i32
+  %6 = bitcast [4 x i8]* @bytes to i32*
+  store i32 %5, i32* %6, align 1
+  store i32 %5, i32* %6, align 1
+  ret void
+
+b3:                                               ; preds = %Split2
+  %7 = ptrtoint i8* %2 to i32
+  %8 = bitcast [4 x i8]* @bytes to i32*
+  store i32 %7, i32* %8, align 1
+  store i32 %7, i32* %8, align 1
+  ret void
+
+b4:                                               ; preds = %Split2
+  %9 = ptrtoint i8* %2 to i32
+  %10 = bitcast [4 x i8]* @bytes to i32*
+  store i32 %9, i32* %10, align 1
+  store i32 %9, i32* %10, align 1
+  ret void
+}
+
+; CHECK-LABEL:      define void @LongReachingCasts(i1) {
+; CHECK-NEXT:   %2 = alloca i8, i32 4, align 8
+; CHECK-NEXT:   br i1 %0, label %Split1, label %Split2
+; CHECK-LABEL:      Split1:                                           ; preds = %1
+; CHECK-NEXT:   br i1 %0, label %b1, label %b2
+; CHECK-LABEL:      Split2:                                           ; preds = %1
+; CHECK-NEXT:   br i1 %0, label %b3, label %b4
+; CHECK-LABEL:      b1:                                               ; preds = %Split1
+; CHECK-NEXT:   %3 = ptrtoint i8* %2 to i32
+; CHECK-NEXT:   %4 = bitcast [4 x i8]* @bytes to i32*
+; CHECK-NEXT:   store i32 %3, i32* %4, align 1
+; CHECK-NEXT:   store i32 %3, i32* %4, align 1
+; CHECK-NEXT:   ret void
+; CHECK-LABEL:      b2:                                               ; preds = %Split1
+; CHECK-NEXT:   %5 = ptrtoint i8* %2 to i32
+; CHECK-NEXT:   %6 = bitcast [4 x i8]* @bytes to i32*
+; CHECK-NEXT:   store i32 %5, i32* %6, align 1
+; CHECK-NEXT:   store i32 %5, i32* %6, align 1
+; CHECK-NEXT:   ret void
+; CHECK-LABEL:      b3:                                               ; preds = %Split2
+; CHECK-NEXT:   %7 = ptrtoint i8* %2 to i32
+; CHECK-NEXT:   %8 = bitcast [4 x i8]* @bytes to i32*
+; CHECK-NEXT:   store i32 %7, i32* %8, align 1
+; CHECK-NEXT:   store i32 %7, i32* %8, align 1
+; CHECK-NEXT:   ret void
+; CHECK-LABEL:      b4:                                               ; preds = %Split2
+; CHECK-NEXT:   %9 = ptrtoint i8* %2 to i32
+; CHECK-NEXT:   %10 = bitcast [4 x i8]* @bytes to i32*
+; CHECK-NEXT:   store i32 %9, i32* %10, align 1
+; CHECK-NEXT:   store i32 %9, i32* %10, align 1
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+define void @SwitchVariable(i32) {
+  switch i32 %0, label %l1 [
+    i32 1, label %l2
+    i32 2, label %l2
+    i32 4, label %l3
+    i32 5, label %l3
+  ]
+                                                  ; No predecessors!
+  br label %end
+
+l1:                                               ; preds = %1
+  br label %end
+
+l2:                                               ; preds = %1, %1
+  br label %end
+
+l3:                                               ; preds = %1, %1
+  br label %end
+
+end:                                              ; preds = %l3, %l2, %l1, %2
+  ret void
+}
+
+; CHECK-LABEL:      define void @SwitchVariable(i32) {
+; CHECK-NEXT:   switch i32 %0, label %l1 [
+; CHECK-NEXT:     i32 1, label %l2
+; CHECK-NEXT:     i32 2, label %l2
+; CHECK-NEXT:     i32 4, label %l3
+; CHECK-NEXT:     i32 5, label %l3
+; CHECK-NEXT:   ]
+; CHECK-NEXT:                                                   ; No predecessors!
+; CHECK-NEXT:   br label %end
+; CHECK-LABEL:      l1:                                               ; preds = %1
+; CHECK-NEXT:   br label %end
+; CHECK-LABEL:      l2:                                               ; preds = %1, %1
+; CHECK-NEXT:   br label %end
+; CHECK-LABEL:      l3:                                               ; preds = %1, %1
+; CHECK-NEXT:   br label %end
+; CHECK-LABEL:      end:                                              ; preds = %l3, %l2, %l1, %2
+; CHECK-NEXT:   ret void
+; CHECK-NEXT: }
+
+; DUMP:     PNaCl Version: 2
+
+; DUMP:      <MODULE_BLOCK>
+; DUMP-NEXT:  <VERSION op0=1/>
+; DUMP-NEXT:  <BLOCKINFO_BLOCK/>
+; DUMP-NEXT:  <TYPE_BLOCK_ID>
+; DUMP-NEXT:    <NUMENTRY op0=9/>
+; DUMP-NEXT:    <INTEGER op0=32/>
+; DUMP-NEXT:    <VOID/>
+; DUMP-NEXT:    <INTEGER op0=8/>
+; DUMP-NEXT:    <INTEGER op0=1/>
+; DUMP-NEXT:    <FUNCTION op0=0 op1=1/>
+; DUMP-NEXT:    <FUNCTION op0=0 op1=1 op2=3/>
+; DUMP-NEXT:    <FUNCTION op0=0 op1=0 op2=0/>
+; DUMP-NEXT:    <FUNCTION op0=0 op1=1 op2=0/>
+; DUMP-NEXT:    <FUNCTION op0=0 op1=0/>
+; DUMP-NEXT:  </TYPE_BLOCK_ID>
+; DUMP-NEXT:  <FUNCTION op0=6 op1=0 op2=1 op3=0/>
+; DUMP-NEXT:  <FUNCTION op0=4 op1=0 op2=0 op3=0/>
+; DUMP-NEXT:  <FUNCTION op0=4 op1=0 op2=0 op3=0/>
+; DUMP-NEXT:  <FUNCTION op0=4 op1=0 op2=0 op3=0/>
+; DUMP-NEXT:  <FUNCTION op0=4 op1=0 op2=0 op3=0/>
+; DUMP-NEXT:  <FUNCTION op0=4 op1=0 op2=0 op3=0/>
+; DUMP-NEXT:  <FUNCTION op0=7 op1=0 op2=0 op3=0/>
+; DUMP-NEXT:  <FUNCTION op0=4 op1=0 op2=0 op3=0/>
+; DUMP-NEXT:  <FUNCTION op0=4 op1=0 op2=0 op3=0/>
+; DUMP-NEXT:  <FUNCTION op0=8 op1=0 op2=0 op3=0/>
+; DUMP-NEXT:  <FUNCTION op0=4 op1=0 op2=0 op3=0/>
+; DUMP-NEXT:  <FUNCTION op0=4 op1=0 op2=0 op3=0/>
+; DUMP-NEXT:  <FUNCTION op0=4 op1=0 op2=0 op3=0/>
+; DUMP-NEXT:  <FUNCTION op0=4 op1=0 op2=0 op3=0/>
+; DUMP-NEXT:  <FUNCTION op0=4 op1=0 op2=0 op3=0/>
+; DUMP-NEXT:  <FUNCTION op0=5 op1=0 op2=0 op3=0/>
+; DUMP-NEXT:  <FUNCTION op0=5 op1=0 op2=0 op3=0/>
+; DUMP-NEXT:  <FUNCTION op0=5 op1=0 op2=0 op3=0/>
+; DUMP-NEXT:  <FUNCTION op0=5 op1=0 op2=0 op3=0/>
+; DUMP-NEXT:  <FUNCTION op0=7 op1=0 op2=0 op3=0/>
+; DUMP-NEXT:  <GLOBALVAR_BLOCK>
+; DUMP-NEXT:    <COUNT op0=16/>
+; DUMP-NEXT:    <VAR op0=0 op1=0/>
+; DUMP-NEXT:    <DATA op0=97 op1=98 op2=99 op3=100 op4=101 op5=102
+; DUMP-NEXT:            op6=103/>
+; DUMP-NEXT:    <VAR op0=0 op1=0/>
+; DUMP-NEXT:    <RELOC op0=24/>
+; DUMP-NEXT:    <VAR op0=0 op1=0/>
+; DUMP-NEXT:    <RELOC op0=1/>
+; DUMP-NEXT:    <VAR op0=0 op1=0/>
+; DUMP-NEXT:    <COMPOUND op0=2/>
+; DUMP-NEXT:    <DATA op0=102 op1=111 op2=111/>
+; DUMP-NEXT:    <RELOC op0=1/>
+; DUMP-NEXT:    <VAR op0=0 op1=0/>
+; DUMP-NEXT:    <RELOC op0=20/>
+; DUMP-NEXT:    <VAR op0=0 op1=0/>
+; DUMP-NEXT:    <RELOC op0=24 op1=1/>
+; DUMP-NEXT:    <VAR op0=0 op1=0/>
+; DUMP-NEXT:    <RELOC op0=24 op1=4294967295/>
+; DUMP-NEXT:    <VAR op0=0 op1=0/>
+; DUMP-NEXT:    <RELOC op0=20 op1=1/>
+; DUMP-NEXT:    <VAR op0=0 op1=0/>
+; DUMP-NEXT:    <RELOC op0=20 op1=7/>
+; DUMP-NEXT:    <VAR op0=0 op1=0/>
+; DUMP-NEXT:    <RELOC op0=20 op1=9/>
+; DUMP-NEXT:    <VAR op0=0 op1=0/>
+; DUMP-NEXT:    <RELOC op0=23 op1=1/>
+; DUMP-NEXT:    <VAR op0=0 op1=0/>
+; DUMP-NEXT:    <RELOC op0=23 op1=4/>
+; DUMP-NEXT:    <VAR op0=4 op1=0/>
+; DUMP-NEXT:    <RELOC op0=1/>
+; DUMP-NEXT:    <VAR op0=0 op1=1/>
+; DUMP-NEXT:    <DATA op0=48/>
+; DUMP-NEXT:    <VAR op0=0 op1=1/>
+; DUMP-NEXT:    <ZEROFILL op0=2/>
+; DUMP-NEXT:    <VAR op0=0 op1=0/>
+; DUMP-NEXT:    <DATA op0=97 op1=98 op2=99 op3=100/>
+; DUMP-NEXT:  </GLOBALVAR_BLOCK>
+; DUMP-NEXT:  <VALUE_SYMTAB>
+; DUMP-NEXT:    <ENTRY op0=2 op1=65 op2=108 op3=108 op4=111 op5=99
+; DUMP-NEXT:             op6=67 op7=97 op8=115 op9=116 op10=83 op11=105
+; DUMP-NEXT:             op12=109 op13=112 op14=108 op15=101/>
+; DUMP-NEXT:    <ENTRY op0=10 op1=67 op2=97 op3=115 op4=116 op5=65
+; DUMP-NEXT:             op6=100 op7=100 op8=65 op9=108 op10=108 op11=111
+; DUMP-NEXT:             op12=99 op13=97/>
+; DUMP-NEXT:    <ENTRY op0=20 op1=98 op2=121 op3=116 op4=101 op5=115
+; DUMP-NEXT:             op6=55/>
+; DUMP-NEXT:    <ENTRY op0=23 op1=99 op2=111 op3=109 op4=112 op5=111
+; DUMP-NEXT:             op6=117 op7=110 op8=100/>
+; DUMP-NEXT:    <ENTRY op0=5 op1=65 op2=108 op3=108 op4=111 op5=99
+; DUMP-NEXT:             op6=67 op7=97 op8=115 op9=116 op10=79 op11=112
+; DUMP-NEXT:             op12=116/>
+; DUMP-NEXT:    <ENTRY op0=32 op1=112 op2=116 op3=114 op4=95 op5=116
+; DUMP-NEXT:             op6=111 op7=95 op8=102 op9=117 op10=110 op11=99
+; DUMP-NEXT:             op12=95 op13=97 op14=108 op15=105 op16=103 op17=110/>
+; DUMP-NEXT:    <ENTRY op0=18 op1=76 op2=111 op3=110 op4=103 op5=82
+; DUMP-NEXT:             op6=101 op7=97 op8=99 op9=104 op10=105 op11=110
+; DUMP-NEXT:             op12=103 op13=67 op14=97 op15=115 op16=116 op17=115/>
+; DUMP-NEXT:    <ENTRY op0=15 op1=80 op2=104 op3=105 op4=66 op5=97
+; DUMP-NEXT:             op6=99 op7=107 op8=119 op9=97 op10=114 op11=100
+; DUMP-NEXT:             op12=82 op13=101 op14=102 op15=115/>
+; DUMP-NEXT:    <ENTRY op0=26 op1=97 op2=100 op3=100 op4=101 op5=110
+; DUMP-NEXT:             op6=100 op7=95 op8=110 op9=101 op10=103 op11=97
+; DUMP-NEXT:             op12=116 op13=105 op14=118 op15=101/>
+; DUMP-NEXT:    <ENTRY op0=6 op1=65 op2=108 op3=108 op4=111 op5=99
+; DUMP-NEXT:             op6=66 op7=105 op8=116 op9=99 op10=97 op11=115
+; DUMP-NEXT:             op12=116/>
+; DUMP-NEXT:    <ENTRY op0=24 op1=112 op2=116 op3=114/>
+; DUMP-NEXT:    <ENTRY op0=25 op1=97 op2=100 op3=100 op4=101 op5=110
+; DUMP-NEXT:             op6=100 op7=95 op8=112 op9=116 op10=114/>
+; DUMP-NEXT:    <ENTRY op0=19 op1=83 op2=119 op3=105 op4=116 op5=99
+; DUMP-NEXT:             op6=104 op7=86 op8=97 op9=114 op10=105 op11=97
+; DUMP-NEXT:             op12=98 op13=108 op14=101/>
+; DUMP-NEXT:    <ENTRY op0=0 op1=98 op2=97 op3=114/>
+; DUMP-NEXT:    <ENTRY op0=33 op1=99 op2=104 op3=97 op4=114/>
+; DUMP-NEXT:    <ENTRY op0=3 op1=65 op2=108 op3=108 op4=111 op5=99
+; DUMP-NEXT:             op6=67 op7=97 op8=115 op9=116 op10=83 op11=105
+; DUMP-NEXT:             op12=109 op13=112 op14=108 op15=101 op16=82 op17=101
+; DUMP-NEXT:             op18=118 op19=101 op20=114 op21=115 op22=101 op23=100/>
+; DUMP-NEXT:    <ENTRY op0=22 op1=112 op2=116 op3=114 op4=95 op5=116
+; DUMP-NEXT:             op6=111 op7=95 op8=102 op9=117 op10=110 op11=99/>
+; DUMP-NEXT:    <ENTRY op0=12 op1=67 op2=97 op3=115 op4=116 op5=66
+; DUMP-NEXT:             op6=105 op7=110 op8=111 op9=112/>
+; DUMP-NEXT:    <ENTRY op0=11 op1=67 op2=97 op3=115 op4=116 op5=65
+; DUMP-NEXT:             op6=100 op7=100 op8=71 op9=108 op10=111 op11=98
+; DUMP-NEXT:             op12=97 op13=108/>
+; DUMP-NEXT:    <ENTRY op0=16 op1=80 op2=104 op3=105 op4=70 op5=111
+; DUMP-NEXT:             op6=114 op7=119 op8=97 op9=114 op10=100 op11=82
+; DUMP-NEXT:             op12=101 op13=102 op14=115/>
+; DUMP-NEXT:    <ENTRY op0=35 op1=98 op2=121 op3=116 op4=101 op5=115/>
+; DUMP-NEXT:    <ENTRY op0=4 op1=65 op2=108 op3=108 op4=111 op5=99
+; DUMP-NEXT:             op6=67 op7=97 op8=115 op9=116 op10=68 op11=101
+; DUMP-NEXT:             op12=108 op13=101 op14=116 op15=101/>
+; DUMP-NEXT:    <ENTRY op0=14 op1=67 op2=97 op3=115 op4=116 op5=83
+; DUMP-NEXT:             op6=101 op7=108 op8=101 op9=99 op10=116/>
+; DUMP-NEXT:    <ENTRY op0=1 op1=102 op2=117 op3=110 op4=99/>
+; DUMP-NEXT:    <ENTRY op0=21 op1=112 op2=116 op3=114 op4=95 op5=116
+; DUMP-NEXT:             op6=111 op7=95 op8=112 op9=116 op10=114/>
+; DUMP-NEXT:    <ENTRY op0=27 op1=97 op2=100 op3=100 op4=101 op5=110
+; DUMP-NEXT:             op6=100 op7=95 op8=97 op9=114 op10=114 op11=97
+; DUMP-NEXT:             op12=121 op13=49/>
+; DUMP-NEXT:    <ENTRY op0=28 op1=97 op2=100 op3=100 op4=101 op5=110
+; DUMP-NEXT:             op6=100 op7=95 op8=97 op9=114 op10=114 op11=97
+; DUMP-NEXT:             op12=121 op13=50/>
+; DUMP-NEXT:    <ENTRY op0=29 op1=97 op2=100 op3=100 op4=101 op5=110
+; DUMP-NEXT:             op6=100 op7=95 op8=97 op9=114 op10=114 op11=97
+; DUMP-NEXT:             op12=121 op13=51/>
+; DUMP-NEXT:    <ENTRY op0=34 op1=115 op2=104 op3=111 op4=114 op5=116/>
+; DUMP-NEXT:    <ENTRY op0=30 op1=97 op2=100 op3=100 op4=101 op5=110
+; DUMP-NEXT:             op6=100 op7=95 op8=115 op9=116 op10=114 op11=117
+; DUMP-NEXT:             op12=99 op13=116 op14=49/>
+; DUMP-NEXT:    <ENTRY op0=31 op1=97 op2=100 op3=100 op4=101 op5=110
+; DUMP-NEXT:             op6=100 op7=95 op8=115 op9=116 op10=114 op11=117
+; DUMP-NEXT:             op12=99 op13=116 op14=50/>
+; DUMP-NEXT:    <ENTRY op0=13 op1=84 op2=101 op3=115 op4=116 op5=83
+; DUMP-NEXT:             op6=97 op7=118 op8=101 op9=100 op10=80 op11=116
+; DUMP-NEXT:             op12=114 op13=84 op14=111 op15=73 op16=110 op17=116/>
+; DUMP-NEXT:    <ENTRY op0=17 op1=80 op2=104 op3=105 op4=77 op5=101
+; DUMP-NEXT:             op6=114 op7=103 op8=101 op9=67 op10=97 op11=115
+; DUMP-NEXT:             op12=116/>
+; DUMP-NEXT:    <ENTRY op0=8 op1=83 op2=116 op3=111 op4=114 op5=101
+; DUMP-NEXT:             op6=71 op7=108 op8=111 op9=98 op10=97 op11=108
+; DUMP-NEXT:             op12=67 op13=97 op14=115 op15=116 op16=115 op17=82
+; DUMP-NEXT:             op18=101 op19=118 op20=101 op21=114 op22=115 op23=101
+; DUMP-NEXT:             op24=100/>
+; DUMP-NEXT:    <ENTRY op0=7 op1=83 op2=116 op3=111 op4=114 op5=101
+; DUMP-NEXT:             op6=71 op7=108 op8=111 op9=98 op10=97 op11=108/>
+; DUMP-NEXT:    <ENTRY op0=9 op1=83 op2=116 op3=111 op4=114 op5=101
+; DUMP-NEXT:             op6=71 op7=108 op8=111 op9=98 op10=97 op11=108
+; DUMP-NEXT:             op12=67 op13=97 op14=115 op15=116 op16=80 op17=116
+; DUMP-NEXT:             op18=114 op19=50 op20=73 op21=110 op22=116/>
+; DUMP-NEXT:  </VALUE_SYMTAB>
+; DUMP-NEXT:  <FUNCTION_BLOCK>
+; DUMP-NEXT:    <DECLAREBLOCKS op0=1/>
+; DUMP-NEXT:    <INST_RET/>
+; DUMP-NEXT:  </FUNCTION_BLOCK>
+; DUMP-NEXT:  <FUNCTION_BLOCK>
+; DUMP-NEXT:    <DECLAREBLOCKS op0=1/>
+; DUMP-NEXT:    <CONSTANTS_BLOCK>
+; DUMP-NEXT:      <SETTYPE op0=0/>
+; DUMP-NEXT:      <INTEGER op0=8/>
+; DUMP-NEXT:    </CONSTANTS_BLOCK>
+; DUMP-NEXT:    <INST_ALLOCA op0=1 op1=4/>
+; DUMP-NEXT:    <INST_STORE op0=3 op1=1 op2=1/>
+; DUMP-NEXT:    <INST_RET/>
+; DUMP-NEXT:  </FUNCTION_BLOCK>
+; DUMP-NEXT:  <FUNCTION_BLOCK>
+; DUMP-NEXT:    <DECLAREBLOCKS op0=1/>
+; DUMP-NEXT:    <CONSTANTS_BLOCK>
+; DUMP-NEXT:      <SETTYPE op0=0/>
+; DUMP-NEXT:      <INTEGER op0=8/>
+; DUMP-NEXT:    </CONSTANTS_BLOCK>
+; DUMP-NEXT:    <INST_ALLOCA op0=1 op1=4/>
+; DUMP-NEXT:    <INST_STORE op0=3 op1=1 op2=1/>
+; DUMP-NEXT:    <INST_RET/>
+; DUMP-NEXT:  </FUNCTION_BLOCK>
+; DUMP-NEXT:  <FUNCTION_BLOCK>
+; DUMP-NEXT:    <DECLAREBLOCKS op0=1/>
+; DUMP-NEXT:    <CONSTANTS_BLOCK>
+; DUMP-NEXT:      <SETTYPE op0=0/>
+; DUMP-NEXT:      <INTEGER op0=8/>
+; DUMP-NEXT:    </CONSTANTS_BLOCK>
+; DUMP-NEXT:    <INST_ALLOCA op0=1 op1=4/>
+; DUMP-NEXT:    <INST_ALLOCA op0=2 op1=4/>
+; DUMP-NEXT:    <INST_RET/>
+; DUMP-NEXT:  </FUNCTION_BLOCK>
+; DUMP-NEXT:  <FUNCTION_BLOCK>
+; DUMP-NEXT:    <DECLAREBLOCKS op0=1/>
+; DUMP-NEXT:    <CONSTANTS_BLOCK>
+; DUMP-NEXT:      <SETTYPE op0=0/>
+; DUMP-NEXT:      <INTEGER op0=8/>
+; DUMP-NEXT:    </CONSTANTS_BLOCK>
+; DUMP-NEXT:    <INST_ALLOCA op0=1 op1=4/>
+; DUMP-NEXT:    <INST_STORE op0=3 op1=1 op2=1/>
+; DUMP-NEXT:    <INST_STORE op0=3 op1=1 op2=1/>
+; DUMP-NEXT:    <INST_RET/>
+; DUMP-NEXT:  </FUNCTION_BLOCK>
+; DUMP-NEXT:  <FUNCTION_BLOCK>
+; DUMP-NEXT:    <DECLAREBLOCKS op0=1/>
+; DUMP-NEXT:    <CONSTANTS_BLOCK>
+; DUMP-NEXT:      <SETTYPE op0=0/>
+; DUMP-NEXT:      <INTEGER op0=8/>
+; DUMP-NEXT:      <INTEGER op0=2/>
+; DUMP-NEXT:    </CONSTANTS_BLOCK>
+; DUMP-NEXT:    <INST_ALLOCA op0=2 op1=4/>
+; DUMP-NEXT:    <INST_BINOP op0=4 op1=2 op2=0/>
+; DUMP-NEXT:    <INST_STORE op0=6 op1=2 op2=1/>
+; DUMP-NEXT:    <INST_RET/>
+; DUMP-NEXT:  </FUNCTION_BLOCK>
+; DUMP-NEXT:  <FUNCTION_BLOCK>
+; DUMP-NEXT:    <DECLAREBLOCKS op0=1/>
+; DUMP-NEXT:    <CONSTANTS_BLOCK>
+; DUMP-NEXT:      <SETTYPE op0=0/>
+; DUMP-NEXT:      <INTEGER op0=8/>
+; DUMP-NEXT:    </CONSTANTS_BLOCK>
+; DUMP-NEXT:    <INST_ALLOCA op0=1 op1=4/>
+; DUMP-NEXT:    <INST_STORE op0=1 op1=3 op2=1/>
+; DUMP-NEXT:    <INST_RET/>
+; DUMP-NEXT:  </FUNCTION_BLOCK>
+; DUMP-NEXT:  <FUNCTION_BLOCK>
+; DUMP-NEXT:    <DECLAREBLOCKS op0=1/>
+; DUMP-NEXT:    <CONSTANTS_BLOCK>
+; DUMP-NEXT:      <SETTYPE op0=0/>
+; DUMP-NEXT:      <INTEGER op0=8/>
+; DUMP-NEXT:    </CONSTANTS_BLOCK>
+; DUMP-NEXT:    <INST_ALLOCA op0=1 op1=4/>
+; DUMP-NEXT:    <INST_STORE op0=1 op1=3 op2=1/>
+; DUMP-NEXT:    <INST_RET/>
+; DUMP-NEXT:  </FUNCTION_BLOCK>
+; DUMP-NEXT:  <FUNCTION_BLOCK>
+; DUMP-NEXT:    <DECLAREBLOCKS op0=1/>
+; DUMP-NEXT:    <CONSTANTS_BLOCK>
+; DUMP-NEXT:      <SETTYPE op0=0/>
+; DUMP-NEXT:      <INTEGER op0=8/>
+; DUMP-NEXT:      <INTEGER op0=0/>
+; DUMP-NEXT:    </CONSTANTS_BLOCK>
+; DUMP-NEXT:    <INST_ALLOCA op0=2 op1=4/>
+; DUMP-NEXT:    <INST_STORE op0=1 op1=4 op2=1/>
+; DUMP-NEXT:    <INST_RET op0=2/>
+; DUMP-NEXT:  </FUNCTION_BLOCK>
+; DUMP-NEXT:  <FUNCTION_BLOCK>
+; DUMP-NEXT:    <DECLAREBLOCKS op0=1/>
+; DUMP-NEXT:    <CONSTANTS_BLOCK>
+; DUMP-NEXT:      <SETTYPE op0=0/>
+; DUMP-NEXT:      <INTEGER op0=2/>
+; DUMP-NEXT:      <INTEGER op0=4/>
+; DUMP-NEXT:      <INTEGER op0=8/>
+; DUMP-NEXT:    </CONSTANTS_BLOCK>
+; DUMP-NEXT:    <INST_ALLOCA op0=1 op1=4/>
+; DUMP-NEXT:    <INST_BINOP op0=4 op1=3 op2=0/>
+; DUMP-NEXT:    <INST_BINOP op0=2 op1=4 op2=0/>
+; DUMP-NEXT:    <INST_BINOP op0=6 op1=3 op2=0/>
+; DUMP-NEXT:    <INST_BINOP op0=4 op1=4 op2=0/>
+; DUMP-NEXT:    <INST_RET/>
+; DUMP-NEXT:  </FUNCTION_BLOCK>
+; DUMP-NEXT:  <FUNCTION_BLOCK>
+; DUMP-NEXT:    <DECLAREBLOCKS op0=1/>
+; DUMP-NEXT:    <CONSTANTS_BLOCK>
+; DUMP-NEXT:      <SETTYPE op0=0/>
+; DUMP-NEXT:      <INTEGER op0=2/>
+; DUMP-NEXT:      <INTEGER op0=4/>
+; DUMP-NEXT:    </CONSTANTS_BLOCK>
+; DUMP-NEXT:    <INST_BINOP op0=2 op1=1 op2=0/>
+; DUMP-NEXT:    <INST_BINOP op0=4 op1=2 op2=0/>
+; DUMP-NEXT:    <INST_BINOP op0=4 op1=5 op2=0/>
+; DUMP-NEXT:    <INST_BINOP op0=6 op1=6 op2=0/>
+; DUMP-NEXT:    <INST_RET/>
+; DUMP-NEXT:  </FUNCTION_BLOCK>
+; DUMP-NEXT:  <FUNCTION_BLOCK>
+; DUMP-NEXT:    <DECLAREBLOCKS op0=1/>
+; DUMP-NEXT:    <CONSTANTS_BLOCK>
+; DUMP-NEXT:      <SETTYPE op0=0/>
+; DUMP-NEXT:      <INTEGER op0=8/>
+; DUMP-NEXT:    </CONSTANTS_BLOCK>
+; DUMP-NEXT:    <INST_ALLOCA op0=1 op1=4/>
+; DUMP-NEXT:    <INST_BINOP op0=1 op1=3 op2=1/>
+; DUMP-NEXT:    <INST_BINOP op0=2 op1=4 op2=2/>
+; DUMP-NEXT:    <INST_BINOP op0=3 op1=5 op2=3/>
+; DUMP-NEXT:    <INST_BINOP op0=4 op1=6 op2=5/>
+; DUMP-NEXT:    <INST_BINOP op0=5 op1=7 op2=6/>
+; DUMP-NEXT:    <INST_BINOP op0=6 op1=8 op2=7/>
+; DUMP-NEXT:    <INST_BINOP op0=7 op1=9 op2=8/>
+; DUMP-NEXT:    <INST_BINOP op0=8 op1=10 op2=9/>
+; DUMP-NEXT:    <INST_BINOP op0=9 op1=11 op2=10/>
+; DUMP-NEXT:    <INST_BINOP op0=10 op1=12 op2=11/>
+; DUMP-NEXT:    <INST_BINOP op0=11 op1=13 op2=12/>
+; DUMP-NEXT:    <INST_RET/>
+; DUMP-NEXT:  </FUNCTION_BLOCK>
+; DUMP-NEXT:  <FUNCTION_BLOCK>
+; DUMP-NEXT:    <DECLAREBLOCKS op0=1/>
+; DUMP-NEXT:    <CONSTANTS_BLOCK>
+; DUMP-NEXT:      <SETTYPE op0=0/>
+; DUMP-NEXT:      <INTEGER op0=8/>
+; DUMP-NEXT:      <INTEGER op0=0/>
+; DUMP-NEXT:    </CONSTANTS_BLOCK>
+; DUMP-NEXT:    <INST_ALLOCA op0=2 op1=4/>
+; DUMP-NEXT:    <INST_BINOP op0=1 op1=2 op2=0/>
+; DUMP-NEXT:    <INST_CALL op0=0 op1=40 op2=2/>
+; DUMP-NEXT:    <INST_RET/>
+; DUMP-NEXT:  </FUNCTION_BLOCK>
+; DUMP-NEXT:  <FUNCTION_BLOCK>
+; DUMP-NEXT:    <DECLAREBLOCKS op0=1/>
+; DUMP-NEXT:    <CONSTANTS_BLOCK>
+; DUMP-NEXT:      <SETTYPE op0=0/>
+; DUMP-NEXT:      <INTEGER op0=2/>
+; DUMP-NEXT:      <INTEGER op0=4/>
+; DUMP-NEXT:      <INTEGER op0=8/>
+; DUMP-NEXT:      <SETTYPE op0=3/>
+; DUMP-NEXT:      <INTEGER op0=3/>
+; DUMP-NEXT:    </CONSTANTS_BLOCK>
+; DUMP-NEXT:    <INST_ALLOCA op0=2 op1=4/>
+; DUMP-NEXT:    <INST_VSELECT op0=5 op1=4 op2=2/>
+; DUMP-NEXT:    <INST_VSELECT op0=2 op1=5 op2=3/>
+; DUMP-NEXT:    <INST_VSELECT op0=7 op1=8 op2=4/>
+; DUMP-NEXT:    <INST_VSELECT op0=4 op1=9 op2=5/>
+; DUMP-NEXT:    <INST_VSELECT op0=10 op1=5 op2=6/>
+; DUMP-NEXT:    <INST_RET/>
+; DUMP-NEXT:  </FUNCTION_BLOCK>
+; DUMP-NEXT:  <FUNCTION_BLOCK>
+; DUMP-NEXT:    <DECLAREBLOCKS op0=4/>
+; DUMP-NEXT:    <CONSTANTS_BLOCK>
+; DUMP-NEXT:      <SETTYPE op0=0/>
+; DUMP-NEXT:      <INTEGER op0=8/>
+; DUMP-NEXT:    </CONSTANTS_BLOCK>
+; DUMP-NEXT:    <INST_ALLOCA op0=1 op1=4/>
+; DUMP-NEXT:    <INST_ALLOCA op0=2 op1=4/>
+; DUMP-NEXT:    <INST_BR op0=1 op1=2 op2=4/>
+; DUMP-NEXT:    <INST_LOAD op0=2 op1=0 op2=0/>
+; DUMP-NEXT:    <INST_BR op0=3/>
+; DUMP-NEXT:    <INST_LOAD op0=3 op1=0 op2=0/>
+; DUMP-NEXT:    <INST_BR op0=3/>
+; DUMP-NEXT:    <INST_PHI op0=0 op1=6 op2=1 op3=6 op4=2/>
+; DUMP-NEXT:    <INST_PHI op0=0 op1=6 op2=1 op3=4 op4=2/>
+; DUMP-NEXT:    <INST_RET/>
+; DUMP-NEXT:    <VALUE_SYMTAB>
+; DUMP-NEXT:      <BBENTRY op0=1 op1=116 op2=114 op3=117 op4=101/>
+; DUMP-NEXT:      <BBENTRY op0=3 op1=109 op2=101 op3=114 op4=103 op5=101/>
+; DUMP-NEXT:      <BBENTRY op0=2 op1=102 op2=97 op3=108 op4=115 op5=101/>
+; DUMP-NEXT:    </VALUE_SYMTAB>
+; DUMP-NEXT:  </FUNCTION_BLOCK>
+; DUMP-NEXT:  <FUNCTION_BLOCK>
+; DUMP-NEXT:    <DECLAREBLOCKS op0=5/>
+; DUMP-NEXT:    <CONSTANTS_BLOCK>
+; DUMP-NEXT:      <SETTYPE op0=0/>
+; DUMP-NEXT:      <INTEGER op0=8/>
+; DUMP-NEXT:    </CONSTANTS_BLOCK>
+; DUMP-NEXT:    <INST_BR op0=4/>
+; DUMP-NEXT:    <FORWARDTYPEREF op0=43 op1=0/>
+; DUMP-NEXT:    <INST_PHI op0=0 op1=11 op2=2 op3=11 op4=3/>
+; DUMP-NEXT:    <FORWARDTYPEREF op0=40 op1=0/>
+; DUMP-NEXT:    <FORWARDTYPEREF op0=41 op1=0/>
+; DUMP-NEXT:    <INST_PHI op0=0 op1=3 op2=2 op3=5 op4=3/>
+; DUMP-NEXT:    <INST_RET/>
+; DUMP-NEXT:    <FORWARDTYPEREF op0=42 op1=0/>
+; DUMP-NEXT:    <INST_LOAD op0=4294967294 op1=0 op2=0/>
+; DUMP-NEXT:    <INST_BR op0=1/>
+; DUMP-NEXT:    <INST_LOAD op0=4294967295 op1=0 op2=0/>
+; DUMP-NEXT:    <INST_BR op0=1/>
+; DUMP-NEXT:    <INST_ALLOCA op0=5 op1=4/>
+; DUMP-NEXT:    <INST_ALLOCA op0=6 op1=4/>
+; DUMP-NEXT:    <INST_BR op0=2 op1=3 op2=8/>
+; DUMP-NEXT:    <VALUE_SYMTAB>
+; DUMP-NEXT:      <BBENTRY op0=1 op1=109 op2=101 op3=114 op4=103 op5=101/>
+; DUMP-NEXT:      <BBENTRY op0=2 op1=116 op2=114 op3=117 op4=101/>
+; DUMP-NEXT:      <BBENTRY op0=3 op1=102 op2=97 op3=108 op4=115 op5=101/>
+; DUMP-NEXT:      <BBENTRY op0=4 op1=115 op2=116 op3=97 op4=114 op5=116/>
+; DUMP-NEXT:    </VALUE_SYMTAB>
+; DUMP-NEXT:  </FUNCTION_BLOCK>
+; DUMP-NEXT:  <FUNCTION_BLOCK>
+; DUMP-NEXT:    <DECLAREBLOCKS op0=4/>
+; DUMP-NEXT:    <CONSTANTS_BLOCK>
+; DUMP-NEXT:      <SETTYPE op0=0/>
+; DUMP-NEXT:      <INTEGER op0=8/>
+; DUMP-NEXT:    </CONSTANTS_BLOCK>
+; DUMP-NEXT:    <INST_ALLOCA op0=1 op1=4/>
+; DUMP-NEXT:    <INST_ALLOCA op0=2 op1=4/>
+; DUMP-NEXT:    <INST_BR op0=1 op1=2 op2=4/>
+; DUMP-NEXT:    <INST_LOAD op0=2 op1=0 op2=0/>
+; DUMP-NEXT:    <INST_BINOP op0=1 op1=2 op2=0/>
+; DUMP-NEXT:    <INST_BR op0=3/>
+; DUMP-NEXT:    <INST_LOAD op0=4 op1=0 op2=0/>
+; DUMP-NEXT:    <INST_BR op0=3/>
+; DUMP-NEXT:    <INST_PHI op0=0 op1=8 op2=1 op3=8 op4=2/>
+; DUMP-NEXT:    <INST_PHI op0=0 op1=8 op2=1 op3=4 op4=2/>
+; DUMP-NEXT:    <INST_RET/>
+; DUMP-NEXT:    <VALUE_SYMTAB>
+; DUMP-NEXT:      <BBENTRY op0=1 op1=116 op2=114 op3=117 op4=101/>
+; DUMP-NEXT:      <BBENTRY op0=3 op1=109 op2=101 op3=114 op4=103 op5=101/>
+; DUMP-NEXT:      <BBENTRY op0=2 op1=102 op2=97 op3=108 op4=115 op5=101/>
+; DUMP-NEXT:    </VALUE_SYMTAB>
+; DUMP-NEXT:  </FUNCTION_BLOCK>
+; DUMP-NEXT:  <FUNCTION_BLOCK>
+; DUMP-NEXT:    <DECLAREBLOCKS op0=7/>
+; DUMP-NEXT:    <CONSTANTS_BLOCK>
+; DUMP-NEXT:      <SETTYPE op0=0/>
+; DUMP-NEXT:      <INTEGER op0=8/>
+; DUMP-NEXT:    </CONSTANTS_BLOCK>
+; DUMP-NEXT:    <INST_ALLOCA op0=1 op1=4/>
+; DUMP-NEXT:    <INST_BR op0=1 op1=2 op2=3/>
+; DUMP-NEXT:    <INST_BR op0=3 op1=4 op2=3/>
+; DUMP-NEXT:    <INST_BR op0=5 op1=6 op2=3/>
+; DUMP-NEXT:    <INST_STORE op0=4 op1=1 op2=1/>
+; DUMP-NEXT:    <INST_STORE op0=4 op1=1 op2=1/>
+; DUMP-NEXT:    <INST_RET/>
+; DUMP-NEXT:    <INST_STORE op0=4 op1=1 op2=1/>
+; DUMP-NEXT:    <INST_STORE op0=4 op1=1 op2=1/>
+; DUMP-NEXT:    <INST_RET/>
+; DUMP-NEXT:    <INST_STORE op0=4 op1=1 op2=1/>
+; DUMP-NEXT:    <INST_STORE op0=4 op1=1 op2=1/>
+; DUMP-NEXT:    <INST_RET/>
+; DUMP-NEXT:    <INST_STORE op0=4 op1=1 op2=1/>
+; DUMP-NEXT:    <INST_STORE op0=4 op1=1 op2=1/>
+; DUMP-NEXT:    <INST_RET/>
+; DUMP-NEXT:    <VALUE_SYMTAB>
+; DUMP-NEXT:      <BBENTRY op0=3 op1=98 op2=49/>
+; DUMP-NEXT:      <BBENTRY op0=4 op1=98 op2=50/>
+; DUMP-NEXT:      <BBENTRY op0=5 op1=98 op2=51/>
+; DUMP-NEXT:      <BBENTRY op0=6 op1=98 op2=52/>
+; DUMP-NEXT:      <BBENTRY op0=1 op1=83 op2=112 op3=108 op4=105 op5=116
+; DUMP-NEXT:                 op6=49/>
+; DUMP-NEXT:      <BBENTRY op0=2 op1=83 op2=112 op3=108 op4=105 op5=116
+; DUMP-NEXT:                 op6=50/>
+; DUMP-NEXT:    </VALUE_SYMTAB>
+; DUMP-NEXT:  </FUNCTION_BLOCK>
+; DUMP-NEXT:  <FUNCTION_BLOCK>
+; DUMP-NEXT:    <DECLAREBLOCKS op0=6/>
+; DUMP-NEXT:    <INST_SWITCH op0=0 op1=1 op2=2 op3=4 op4=1 op5=1
+; DUMP-NEXT:                   op6=2 op7=3 op8=1 op9=1 op10=4 op11=3
+; DUMP-NEXT:                   op12=1 op13=1 op14=8 op15=4 op16=1 op17=1
+; DUMP-NEXT:                   op18=10 op19=4/>
+; DUMP-NEXT:    <INST_BR op0=5/>
+; DUMP-NEXT:    <INST_BR op0=5/>
+; DUMP-NEXT:    <INST_BR op0=5/>
+; DUMP-NEXT:    <INST_BR op0=5/>
+; DUMP-NEXT:    <INST_RET/>
+; DUMP-NEXT:    <VALUE_SYMTAB>
+; DUMP-NEXT:      <BBENTRY op0=5 op1=101 op2=110 op3=100/>
+; DUMP-NEXT:      <BBENTRY op0=2 op1=108 op2=49/>
+; DUMP-NEXT:      <BBENTRY op0=3 op1=108 op2=50/>
+; DUMP-NEXT:      <BBENTRY op0=4 op1=108 op2=51/>
+; DUMP-NEXT:    </VALUE_SYMTAB>
+; DUMP-NEXT:  </FUNCTION_BLOCK>
+; DUMP-NEXT:</MODULE_BLOCK>
+
+
diff --git a/test/NaCl/Bitcode/bitcast-elide.ll b/test/NaCl/Bitcode/bitcast-elide.ll
new file mode 100644
index 000000000000..626e94cf8a3e
--- /dev/null
+++ b/test/NaCl/Bitcode/bitcast-elide.ll
@@ -0,0 +1,178 @@
+; Test how we handle eliding (pointer) bitcast instructions.
+
+; RUN: llvm-as < %s | pnacl-freeze  \
+; RUN:              | pnacl-bcanalyzer -dump-records \
+; RUN:              | FileCheck %s -check-prefix=PF2
+
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-thaw \
+; RUN:              | llvm-dis - | FileCheck %s -check-prefix=TD2
+
+; ------------------------------------------------------
+
+@bytes = internal global [4 x i8] c"abcd"
+
+; ------------------------------------------------------
+
+; Test that we elide the simple case of global.
+define void @SimpleLoad() {
+  %1 = bitcast [4 x i8]* @bytes to i32*
+  %2 = load i32* %1, align 4
+  ret void
+}
+
+; TD2-LABEL:      define void @SimpleLoad() {
+; TD2-NEXT:   %1 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT:   %2 = load i32* %1, align 4
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:       <FUNCTION_BLOCK>
+; PF2-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:    <INST_LOAD op0=1 op1=3 op2=0/>
+; PF2-NEXT:    <INST_RET/>
+; PF2-NEXT:  </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Test that we elide the simple case of an alloca.
+define void @SimpleLoadAlloca() {
+  %1 = alloca i8, i32 4, align 4
+  %2 = bitcast i8* %1 to i32*
+  %3 = load i32* %2, align 4
+  ret void
+}
+
+; TD2-LABEL:      define void @SimpleLoadAlloca() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 4
+; TD2-NEXT:   %2 = bitcast i8* %1 to i32*
+; TD2-NEXT:   %3 = load i32* %2, align 4
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=3/>
+; PF2-NEXT:     <INST_LOAD op0=1 op1=3 op2=0/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Test that we can handle multiple bitcasts.
+define i32 @TwoLoads(i32 %i) {
+  %1 = bitcast [4 x i8]* @bytes to i32*       
+  %2 = load i32* %1, align 4
+  %3 = bitcast [4 x i8]* @bytes to i32*       
+  %4 = load i32* %3, align 4
+  %5 = add i32 %2, %4
+  ret i32 %5
+}
+
+; TD2-LABEL:      define i32 @TwoLoads(i32 %i) {
+; TD2-NEXT:   %1 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT:   %2 = load i32* %1, align 4
+; TD2-NEXT:   %3 = load i32* %1, align 4
+; TD2-NEXT:   %4 = add i32 %2, %3
+; TD2-NEXT:   ret i32 %4
+; TD2-NEXT: }
+
+; PF2:       <FUNCTION_BLOCK>
+; PF2-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:    <INST_LOAD op0=2 op1=3 op2=0/>
+; PF2-NEXT:    <INST_LOAD op0=3 op1=3 op2=0/>
+; PF2-NEXT:    <INST_BINOP op0=2 op1=1 op2=0/>
+; PF2-NEXT:    <INST_RET op0=1/>
+; PF2:       </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Test how we handle bitcasts if optimized in the input file.  This
+; case tests within a single block.
+define i32 @TwoLoadOptOneBlock(i32 %i) {
+  %1 = bitcast [4 x i8]* @bytes to i32*       
+  %2 = load i32* %1, align 4
+  %3 = load i32* %1, align 4
+  %4 = add i32 %2, %3
+  ret i32 %4
+}
+
+; TD2-LABEL:      define i32 @TwoLoadOptOneBlock(i32 %i) {
+; TD2-NEXT:   %1 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT:   %2 = load i32* %1, align 4
+; TD2-NEXT:   %3 = load i32* %1, align 4
+; TD2-NEXT:   %4 = add i32 %2, %3
+; TD2-NEXT:   ret i32 %4
+; TD2-NEXT: }
+
+; PF2:       <FUNCTION_BLOCK>
+; PF2-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:    <INST_LOAD op0=2 op1=3 op2=0/>
+; PF2-NEXT:    <INST_LOAD op0=3 op1=3 op2=0/>
+; PF2-NEXT:    <INST_BINOP op0=2 op1=1 op2=0/>
+; PF2-NEXT:    <INST_RET op0=1/>
+; PF2:       </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Test how we handle bitcasts if optimized in the input file.  This
+; case tests accross blocks.
+define i32 @TwoLoadOptTwoBlocks(i32 %i) {
+  %1 = bitcast [4 x i8]* @bytes to i32*       
+  %2 = load i32* %1, align 4
+  %3 = load i32* %1, align 4
+  %4 = add i32 %2, %3
+  br label %BB
+
+BB:
+  %5 = load i32* %1, align 4
+  %6 = load i32* %1, align 4
+  %7 = add i32 %5, %6
+  ret i32 %4
+}
+
+; TD2-LABEL:      define i32 @TwoLoadOptTwoBlocks(i32 %i) {
+; TD2-NEXT:   %1 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT:   %2 = load i32* %1, align 4
+; TD2-NEXT:   %3 = load i32* %1, align 4
+; TD2-NEXT:   %4 = add i32 %2, %3
+; TD2-NEXT:   br label %BB
+; TD2-LABEL:      BB:
+; TD2-NEXT:   %5 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT:   %6 = load i32* %5, align 4
+; TD2-NEXT:   %7 = load i32* %5, align 4
+; TD2-NEXT:   %8 = add i32 %6, %7
+; TD2-NEXT:   ret i32 %4
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2-NEXT:     <DECLAREBLOCKS op0=2/>
+; PF2-NEXT:     <INST_LOAD op0=2 op1=3 op2=0/>
+; PF2-NEXT:     <INST_LOAD op0=3 op1=3 op2=0/>
+; PF2-NEXT:     <INST_BINOP op0=2 op1=1 op2=0/>
+; PF2-NEXT:     <INST_BR op0=1/>
+; PF2-NEXT:     <INST_LOAD op0=5 op1=3 op2=0/>
+; PF2-NEXT:     <INST_LOAD op0=6 op1=3 op2=0/>
+; PF2-NEXT:     <INST_BINOP op0=2 op1=1 op2=0/>
+; PF2-NEXT:     <INST_RET op0=4/>
+; PF2:        </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Test that we elide the simple case of bitcast for a store.
+define void @SimpleStore(i32 %i) {
+  %1 = bitcast [4 x i8]* @bytes to i32*
+  store i32 %i, i32* %1, align 4
+  ret void
+}
+
+; TD2-LABEL:      define void @SimpleStore(i32 %i) {
+; TD2-NEXT:   %1 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT:   store i32 %i, i32* %1, align 4
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2-NEXT:     <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:     <INST_STORE op0=2 op1=1 op2=3/>
+; PF2-NEXT:     <INST_RET/>
+; PF2:        </FUNCTION_BLOCK>
diff --git a/test/NaCl/Bitcode/call-elide.ll b/test/NaCl/Bitcode/call-elide.ll
new file mode 100644
index 000000000000..2f7d18d0ec2f
--- /dev/null
+++ b/test/NaCl/Bitcode/call-elide.ll
@@ -0,0 +1,218 @@
+; Test how we handle eliding pointers in call instructions.
+
+; RUN: llvm-as < %s | pnacl-freeze \
+; RUN:              | pnacl-bcanalyzer -dump-records \
+; RUN:              | FileCheck %s -check-prefix=PF2
+
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-thaw \
+; RUN:              | llvm-dis - | FileCheck %s -check-prefix=TD2
+
+; ------------------------------------------------------
+; Define some global functions/variables to be used in testing.
+
+
+@bytes = internal global [4 x i8] c"abcd"
+declare void @foo(i32 %i)
+declare i32 @llvm.nacl.setjmp(i8* %i)
+
+; ------------------------------------------------------
+; Test how we handle a direct call.
+
+define void @DirectCall() {
+  call void @foo(i32 0)
+  ret void
+}
+
+; TD2:      define void @DirectCall() {
+; TD2-NEXT:   call void @foo(i32 0)
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:      <FUNCTION_BLOCK>
+; PF2:        </CONSTANTS_BLOCK>
+; PF2-NEXT:   <INST_CALL op0=0 op1=14 op2=1/>
+; PF2-NEXT:   <INST_RET/>
+; PF2-NEXT: </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+; Test how we handle a direct call with a normalized inttoptr argument.
+; Pointer arguments are only allowed for intrinsic calls.
+
+define void @DirectCallIntToPtrArg(i32 %i) {
+  %1 = inttoptr i32 %i to i8*
+  %2 = call i32 @llvm.nacl.setjmp(i8* %1)
+  ret void
+}
+
+; TD2:      define void @DirectCallIntToPtrArg(i32 %i) {
+; TD2-NEXT:   %1 = inttoptr i32 %i to i8*
+; TD2-NEXT:   %2 = call i32 @llvm.nacl.setjmp(i8* %1)
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:      <FUNCTION_BLOCK>
+; PF2-NEXT:   <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:   <INST_CALL op0=0 op1=13 op2=1/>
+; PF2-NEXT:   <INST_RET/>
+; PF2:      </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+; Test how we handle a direct call with a normalized ptroint argument.
+; Pointer arguments are only allowed for intrinsic calls.
+
+define void @DirectCallPtrToIntArg() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint i8* %1 to i32
+  call void @foo(i32 %2)
+  ret void
+}
+
+; TD2:      define void @DirectCallPtrToIntArg() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   call void @foo(i32 %2)
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:      <FUNCTION_BLOCK>
+; PF2:        </CONSTANTS_BLOCK>
+; PF2-NEXT:   <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:   <INST_CALL op0=0 op1=15 op2=1/>
+; PF2-NEXT:   <INST_RET/>
+; PF2-NEXT: </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+; Test how we handle a direct call with a normalized bitcast argument.
+
+define void @DirectCallBitcastArg(i32 %i) {
+  %1 = bitcast [4 x i8]* @bytes to i8*
+  %2 = call i32 @llvm.nacl.setjmp(i8* %1)
+  ret void
+}
+
+; TD2:      define void @DirectCallBitcastArg(i32 %i) {
+; TD2-NEXT:   %1 = bitcast [4 x i8]* @bytes to i8*
+; TD2-NEXT:   %2 = call i32 @llvm.nacl.setjmp(i8* %1)
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:      <FUNCTION_BLOCK>
+; PF2-NEXT:   <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:   <INST_CALL op0=0 op1=13 op2=2/>
+; PF2-NEXT:   <INST_RET/>
+; PF2:      </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+; Test how we handle a direct call with a pointer to scalar conversion.
+
+define void @DirectCallScalarArg() {
+  %1 = ptrtoint [4 x i8]* @bytes to i32
+  call void @foo(i32 %1)
+  ret void
+}
+
+; TD2:      define void @DirectCallScalarArg() {
+; TD2-NEXT:   %1 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   call void @foo(i32 %1)
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:      <FUNCTION_BLOCK>
+; PF2-NEXT:   <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:   <INST_CALL op0=0 op1=13 op2=1/>
+; PF2-NEXT:   <INST_RET/>
+; PF2:      </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+; Test how we handle an indirect call.
+
+define void @IndirectCall(i32 %i) {
+  %1 = inttoptr i32 %i to void (i32)*
+  call void %1(i32 %i)
+  ret void
+}
+
+; TD2:      define void @IndirectCall(i32 %i) {
+; TD2-NEXT:   %1 = inttoptr i32 %i to void (i32)*
+; TD2-NEXT:   call void %1(i32 %i)
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:      <FUNCTION_BLOCK>
+; PF2-NEXT:   <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:   <INST_CALL_INDIRECT op0=0 op1=1 op2=1 op3=1/>
+; PF2-NEXT:   <INST_RET/>
+; PF2:      </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+; Test how we handle an indirect call with a normalized ptrtoint argument.
+
+define void @IndirectCallPtrToIntArg(i32 %i) {
+  %1 = alloca i8, i32 4, align 8
+  %2 = inttoptr i32 %i to void (i32)*
+  %3 = ptrtoint i8* %1 to i32
+  call void %2(i32 %3)
+  ret void
+}
+
+; TD2:      define void @IndirectCallPtrToIntArg(i32 %i) {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %3 = inttoptr i32 %i to void (i32)*
+; TD2-NEXT:   call void %3(i32 %2)
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:      <FUNCTION_BLOCK>
+; PF2:        </CONSTANTS_BLOCK>
+; PF2-NEXT:   <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:   <INST_CALL_INDIRECT op0=0 op1=3 op2=1 op3=1/>
+; PF2-NEXT:   <INST_RET/>
+; PF2:      </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+; Test how we handle an indirect call with a pointer to scalar conversion.
+
+define void @IndirectCallScalarArg(i32 %i) {
+  %1 = inttoptr i32 %i to void (i32)*
+  %2 = ptrtoint [4 x i8]* @bytes to i32
+  call void %1(i32 %2)
+  ret void
+}
+
+; TD2:      define void @IndirectCallScalarArg(i32 %i) {
+; TD2-NEXT:   %1 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %2 = inttoptr i32 %i to void (i32)*
+; TD2-NEXT:   call void %2(i32 %1)
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:      <FUNCTION_BLOCK>
+; PF2-NEXT:   <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:   <INST_CALL_INDIRECT op0=0 op1=1 op2=1 op3=2/>
+; PF2-NEXT:   <INST_RET/>
+; PF2:      </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+; Test how we handle intrinsics that can return (inherent) pointers, and
+; return statements that expect scalar values.
+
+declare i8* @llvm.nacl.read.tp()
+
+define i32 @ReturnPtrIntrinsic() {
+  %1 = call i8* @llvm.nacl.read.tp()
+  %2 = ptrtoint i8* %1 to i32
+  ret i32 %2
+}
+
+; TD2:      define i32 @ReturnPtrIntrinsic() {
+; TD2-NEXT:   %1 = call i8* @llvm.nacl.read.tp()
+; TD2-NEXT:   %2 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   ret i32 %2
+; TD2-NEXT: }
+
+; PF2:      <FUNCTION_BLOCK>
+; PF2-NEXT:   <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:   <INST_CALL op0=0 op1=3/>
+; PF2-NEXT:   <INST_RET op0=1/>
+; PF2-NEXT: </FUNCTION_BLOCK>
diff --git a/test/NaCl/Bitcode/cmp.ll b/test/NaCl/Bitcode/cmp.ll
new file mode 100644
index 000000000000..ac24c72704d2
--- /dev/null
+++ b/test/NaCl/Bitcode/cmp.ll
@@ -0,0 +1,72 @@
+; Tests that all comparison conditions survive through PNaCl bitcode files.
+
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-thaw | llvm-dis - \
+; RUN:              | FileCheck %s
+
+define void @IntCompare() {
+  %1 = icmp eq i32 0, 1
+  %2 = icmp ne i32 0, 1
+  %3 = icmp ugt i32 0, 1
+  %4 = icmp uge i32 0, 1
+  %5 = icmp ult i32 0, 1
+  %6 = icmp ule i32 0, 1
+  %7 = icmp sgt i32 0, 1
+  %8 = icmp sge i32 0, 1
+  %9 = icmp slt i32 0, 1
+  %10 = icmp sle i32 0, 1
+  ret void
+}
+
+; CHECK: define void @IntCompare() {
+; CHECK:   %1 = icmp eq i32 0, 1
+; CHECK:   %2 = icmp ne i32 0, 1
+; CHECK:   %3 = icmp ugt i32 0, 1
+; CHECK:   %4 = icmp uge i32 0, 1
+; CHECK:   %5 = icmp ult i32 0, 1
+; CHECK:   %6 = icmp ule i32 0, 1
+; CHECK:   %7 = icmp sgt i32 0, 1
+; CHECK:   %8 = icmp sge i32 0, 1
+; CHECK:   %9 = icmp slt i32 0, 1
+; CHECK:   %10 = icmp sle i32 0, 1
+; CHECK:   ret void
+; CHECK: }
+
+define void @FloatCompare() {
+  %1 = fcmp false float 0.000000e+00, 1.000000e+00
+  %2 = fcmp oeq float 0.000000e+00, 1.000000e+00
+  %3 = fcmp ogt float 0.000000e+00, 1.000000e+00
+  %4 = fcmp oge float 0.000000e+00, 1.000000e+00
+  %5 = fcmp olt float 0.000000e+00, 1.000000e+00
+  %6 = fcmp ole float 0.000000e+00, 1.000000e+00
+  %7 = fcmp one float 0.000000e+00, 1.000000e+00
+  %8 = fcmp ord float 0.000000e+00, 1.000000e+00
+  %9 = fcmp ueq float 0.000000e+00, 1.000000e+00
+  %10 = fcmp ugt float 0.000000e+00, 1.000000e+00
+  %11 = fcmp uge float 0.000000e+00, 1.000000e+00
+  %12 = fcmp ult float 0.000000e+00, 1.000000e+00
+  %13 = fcmp ule float 0.000000e+00, 1.000000e+00
+  %14 = fcmp une float 0.000000e+00, 1.000000e+00
+  %15 = fcmp uno float 0.000000e+00, 1.000000e+00
+  %16 = fcmp true float 0.000000e+00, 1.000000e+00
+  ret void
+}
+
+; CHECK: define void @FloatCompare() {
+; CHECK:   %1 = fcmp false float 0.000000e+00, 1.000000e+00
+; CHECK:   %2 = fcmp oeq float 0.000000e+00, 1.000000e+00
+; CHECK:   %3 = fcmp ogt float 0.000000e+00, 1.000000e+00
+; CHECK:   %4 = fcmp oge float 0.000000e+00, 1.000000e+00
+; CHECK:   %5 = fcmp olt float 0.000000e+00, 1.000000e+00
+; CHECK:   %6 = fcmp ole float 0.000000e+00, 1.000000e+00
+; CHECK:   %7 = fcmp one float 0.000000e+00, 1.000000e+00
+; CHECK:   %8 = fcmp ord float 0.000000e+00, 1.000000e+00
+; CHECK:   %9 = fcmp ueq float 0.000000e+00, 1.000000e+00
+; CHECK:   %10 = fcmp ugt float 0.000000e+00, 1.000000e+00
+; CHECK:   %11 = fcmp uge float 0.000000e+00, 1.000000e+00
+; CHECK:   %12 = fcmp ult float 0.000000e+00, 1.000000e+00
+; CHECK:   %13 = fcmp ule float 0.000000e+00, 1.000000e+00
+; CHECK:   %14 = fcmp une float 0.000000e+00, 1.000000e+00
+; CHECK:   %15 = fcmp uno float 0.000000e+00, 1.000000e+00
+; CHECK:   %16 = fcmp true float 0.000000e+00, 1.000000e+00
+; CHECK:   ret void
+; CHECK: }
diff --git a/test/NaCl/Bitcode/fast.ll b/test/NaCl/Bitcode/fast.ll
new file mode 100644
index 000000000000..2a89ca3b4d89
--- /dev/null
+++ b/test/NaCl/Bitcode/fast.ll
@@ -0,0 +1,110 @@
+; Tests that we don't write the fast (floating point) attributes into
+; PNaCl bitcode files (i.e. flags fast, nnan, ninf, nsz, and arcp).
+
+; Test 1: Show that flags are removed.
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-thaw | llvm-dis - \
+; RUN:              | FileCheck %s
+
+; Test 2: Show that the bitcode files do not contain flags (i.e.
+; the corresponding BINOP records only have 3 values, not 4).
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-bcanalyzer -dump-records \
+; RUN:              | FileCheck %s -check-prefix=RECORD
+
+define void @foo() {
+  ; Show that we handle all flags for fadd
+  %1 = fadd fast double 1.000000e+00, 2.000000e+00
+  %2 = fadd nnan double 3.000000e+00, 4.000000e+00
+  %3 = fadd ninf double 5.000000e+00, 6.000000e+00
+  %4 = fadd nsz double 7.000000e+00, 8.000000e+00
+  %5 = fadd arcp double 9.000000e+00, 10.000000e+00
+
+; CHECK:   %1 = fadd double 1.000000e+00, 2.000000e+00
+; CHECK:   %2 = fadd double 3.000000e+00, 4.000000e+00
+; CHECK:   %3 = fadd double 5.000000e+00, 6.000000e+00
+; CHECK:   %4 = fadd double 7.000000e+00, 8.000000e+00
+; CHECK:   %5 = fadd double 9.000000e+00, 1.000000e+01
+
+; RECORD: <INST_BINOP op0=10 op1=9 op2=0/>
+; RECORD: <INST_BINOP op0=9 op1=8 op2=0/>
+; RECORD: <INST_BINOP op0=8 op1=7 op2=0/>
+; RECORD: <INST_BINOP op0=7 op1=6 op2=0/>
+; RECORD: <INST_BINOP op0=6 op1=5 op2=0/>
+
+  ; Show that we handle all flags for fsub
+  %6 = fsub fast double 1.000000e+00, 2.000000e+00
+  %7 = fsub nnan double 3.000000e+00, 4.000000e+00
+  %8 = fsub ninf double 5.000000e+00, 6.000000e+00
+  %9 = fsub nsz double 7.000000e+00, 8.000000e+00
+  %10 = fsub arcp double 9.000000e+00, 10.000000e+00
+
+; CHECK:   %6 = fsub double 1.000000e+00, 2.000000e+00
+; CHECK:   %7 = fsub double 3.000000e+00, 4.000000e+00
+; CHECK:   %8 = fsub double 5.000000e+00, 6.000000e+00
+; CHECK:   %9 = fsub double 7.000000e+00, 8.000000e+00
+; CHECK:   %10 = fsub double 9.000000e+00, 1.000000e+01
+
+; RECORD: <INST_BINOP op0=15 op1=14 op2=1/>
+; RECORD: <INST_BINOP op0=14 op1=13 op2=1/>
+; RECORD: <INST_BINOP op0=13 op1=12 op2=1/>
+; RECORD: <INST_BINOP op0=12 op1=11 op2=1/>
+; RECORD: <INST_BINOP op0=11 op1=10 op2=1/>
+
+  ; Show that we can handle all flags for fmul
+  %11 = fmul fast double 1.000000e+00, 2.000000e+00
+  %12 = fmul nnan double 3.000000e+00, 4.000000e+00
+  %13 = fmul ninf double 5.000000e+00, 6.000000e+00
+  %14 = fmul nsz double 7.000000e+00, 8.000000e+00
+  %15 = fmul arcp double 9.000000e+00, 10.000000e+00
+
+; CHECK:   %11 = fmul double 1.000000e+00, 2.000000e+00
+; CHECK:   %12 = fmul double 3.000000e+00, 4.000000e+00
+; CHECK:   %13 = fmul double 5.000000e+00, 6.000000e+00
+; CHECK:   %14 = fmul double 7.000000e+00, 8.000000e+00
+; CHECK:   %15 = fmul double 9.000000e+00, 1.000000e+01
+
+; RECORD: <INST_BINOP op0=20 op1=19 op2=2/>
+; RECORD: <INST_BINOP op0=19 op1=18 op2=2/>
+; RECORD: <INST_BINOP op0=18 op1=17 op2=2/>
+; RECORD: <INST_BINOP op0=17 op1=16 op2=2/>
+; RECORD: <INST_BINOP op0=16 op1=15 op2=2/>
+
+  ; Show that we can handle all flags for fdiv
+  %16 = fdiv fast double 1.000000e+00, 2.000000e+00
+  %17 = fdiv nnan double 3.000000e+00, 4.000000e+00
+  %18 = fdiv ninf double 5.000000e+00, 6.000000e+00
+  %19 = fdiv nsz double 7.000000e+00, 8.000000e+00
+  %20 = fdiv arcp double 9.000000e+00, 10.000000e+00
+
+; CHECK:   %16 = fdiv double 1.000000e+00, 2.000000e+00
+; CHECK:   %17 = fdiv double 3.000000e+00, 4.000000e+00
+; CHECK:   %18 = fdiv double 5.000000e+00, 6.000000e+00
+; CHECK:   %19 = fdiv double 7.000000e+00, 8.000000e+00
+; CHECK:   %20 = fdiv double 9.000000e+00, 1.000000e+01
+
+; RECORD: <INST_BINOP op0=25 op1=24 op2=4/>
+; RECORD: <INST_BINOP op0=24 op1=23 op2=4/>
+; RECORD: <INST_BINOP op0=23 op1=22 op2=4/>
+; RECORD: <INST_BINOP op0=22 op1=21 op2=4/>
+; RECORD: <INST_BINOP op0=21 op1=20 op2=4/>
+
+  ; Show that we can handle all flags for frem.
+  %21 = frem fast double 1.000000e+00, 2.000000e+00
+  %22 = frem nnan double 3.000000e+00, 4.000000e+00
+  %23 = frem ninf double 5.000000e+00, 6.000000e+00
+  %24 = frem nsz double 7.000000e+00, 8.000000e+00
+  %25 = frem arcp double 9.000000e+00, 10.000000e+00
+
+; CHECK:   %21 = frem double 1.000000e+00, 2.000000e+00
+; CHECK:   %22 = frem double 3.000000e+00, 4.000000e+00
+; CHECK:   %23 = frem double 5.000000e+00, 6.000000e+00
+; CHECK:   %24 = frem double 7.000000e+00, 8.000000e+00
+; CHECK:   %25 = frem double 9.000000e+00, 1.000000e+01
+
+; RECORD: <INST_BINOP op0=30 op1=29 op2=6/>
+; RECORD: <INST_BINOP op0=29 op1=28 op2=6/>
+; RECORD: <INST_BINOP op0=28 op1=27 op2=6/>
+; RECORD: <INST_BINOP op0=27 op1=26 op2=6/>
+; RECORD: <INST_BINOP op0=26 op1=25 op2=6/>
+
+  ret void
+}
diff --git a/test/NaCl/Bitcode/flags.ll b/test/NaCl/Bitcode/flags.ll
new file mode 100644
index 000000000000..0b510e2a48ca
--- /dev/null
+++ b/test/NaCl/Bitcode/flags.ll
@@ -0,0 +1,17 @@
+; Test that we no longer support the "nuw", "nsw", or the "exact" attributes on
+; binary operators in PNaCl bitcode files, since the PNaClABI doesn't allow
+; these attributes.
+
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-thaw | llvm-dis - \
+; RUN:              | FileCheck %s
+
+define void @WrapFlags(i32, i32) {
+  %3 = add nuw i32 %0, %1
+  %4 = add nsw i32 %0, %1
+  %5 = udiv exact i32 %0, %1
+  ret void
+}
+
+; CHECK: %3 = add i32 %0, %1
+; CHECK: %4 = add i32 %0, %1
+; CHECK: %5 = udiv i32 %0, %1
diff --git a/test/NaCl/Bitcode/forward-ref-decl.ll b/test/NaCl/Bitcode/forward-ref-decl.ll
new file mode 100644
index 000000000000..ac02c850d72e
--- /dev/null
+++ b/test/NaCl/Bitcode/forward-ref-decl.ll
@@ -0,0 +1,59 @@
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-bcanalyzer -dump-records \
+; RUN:              | FileCheck %s
+
+; Test that FORWARDTYPEREF declarations are emitted in the correct
+; places.  These are emitted for forward value references inside
+; functions.
+
+define external void @_start(i32 %arg) {
+; CHECK: <FUNCTION_BLOCK
+
+  br label %bb1
+; CHECK: <INST_BR
+
+bb2:
+  ; This instruction contains two forward references, because %x and
+  ; %y are defined later in the function.
+  add i32 %forward1, %forward2
+; CHECK-NEXT: <FORWARDTYPEREF
+; CHECK-NEXT: <FORWARDTYPEREF
+; CHECK-NEXT: <INST_BINOP
+
+  ; The FORWARDTYPEREF declaration should only be emitted once per
+  ; value, so the following references will not emit more of them.
+  add i32 %forward1, %forward2
+; CHECK-NEXT: <INST_BINOP
+
+  ; Test another case of a forward reference.
+  call void @_start(i32 %forward3)
+; CHECK-NEXT: <FORWARDTYPEREF
+; CHECK-NEXT: <INST_CALL
+
+  ; Test that FORWARDTYPEREF is generated for phi nodes (since phi
+  ; node operands are a special case in the writer).
+  br label %bb3
+bb3:
+  phi i32 [ %forward4, %bb2 ]
+; CHECK-NEXT: <INST_BR
+; CHECK-NEXT: <FORWARDTYPEREF
+; CHECK-NEXT: <INST_PHI
+
+  ; Test that FORWARDTYPEREF is generated for switch instructions
+  ; (since switch condition operands are a special case in the
+  ; writer).
+  switch i32 %forward5, label %bb4 [i32 0, label %bb4]
+bb4:
+; CHECK-NEXT: <FORWARDTYPEREF
+; CHECK-NEXT: <INST_SWITCH
+
+  ret void
+; CHECK-NEXT: <INST_RET/>
+
+bb1:
+  %forward1 = add i32 %arg, 100
+  %forward2 = add i32 %arg, 200
+  %forward3 = add i32 %arg, 300
+  %forward4 = add i32 %arg, 400
+  %forward5 = add i32 %arg, 500
+  br label %bb2
+}
diff --git a/test/NaCl/Bitcode/forward-ref-pointer-intrinsic.ll b/test/NaCl/Bitcode/forward-ref-pointer-intrinsic.ll
new file mode 100644
index 000000000000..0ddfdd4b51ec
--- /dev/null
+++ b/test/NaCl/Bitcode/forward-ref-pointer-intrinsic.ll
@@ -0,0 +1,31 @@
+; Test forward reference of a pointer-typed intrinsic result.
+
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-thaw \
+; RUN:              | llvm-dis - | FileCheck %s -check-prefix=TD2
+
+declare i8* @llvm.nacl.read.tp()
+
+define i32 @forward_ref() {
+  br label %block1
+
+block2:
+  %1 = load i8* %3
+  %2 = ptrtoint i8* %3 to i32
+  ret i32 %2
+
+block1:
+  %3 = call i8* @llvm.nacl.read.tp()
+  br label %block2
+}
+
+; TD2:      define i32 @forward_ref() {
+; TD2-NEXT:   br label %block1
+; TD2:      block2:
+; TD2-NEXT:   %1 = inttoptr i32 %4 to i8*
+; TD2-NEXT:   %2 = load i8* %1
+; TD2-NEXT:   ret i32 %4
+; TD2:      block1:
+; TD2-NEXT:   %3 = call i8* @llvm.nacl.read.tp()
+; TD2-NEXT:   %4 = ptrtoint i8* %3 to i32
+; TD2-NEXT:   br label %block2
+; TD2-NEXT: }
diff --git a/test/NaCl/Bitcode/globalvars.ll b/test/NaCl/Bitcode/globalvars.ll
new file mode 100644
index 000000000000..3bd9890aa828
--- /dev/null
+++ b/test/NaCl/Bitcode/globalvars.ll
@@ -0,0 +1,106 @@
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-thaw | llvm-dis - | FileCheck %s
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-bcanalyzer -dump-records \
+; RUN:              | FileCheck %s -check-prefix=BC
+
+; Test that we generate appropriate bitcode values for global variables.
+
+; Make sure that no struct/array/pointer types are generated by the
+; global variables.
+; BC: <TYPE_BLOCK_ID
+; BC-NEXT: <NUMENTRY
+; BC-NEXT: <VOID/>
+; BC-NEXT: <FUNCTION
+; BC-NEXT: </TYPE_BLOCK_ID>
+
+; Make sure that the function declaration for function func (below)
+; appears before the global variables block.
+; BC: <FUNCTION op0=1 op1=0 op2=0 op3=0/>
+
+; Make sure we begin the globals block after function declarations.
+; BC-NEXT: <GLOBALVAR_BLOCK
+; BC-NEXT: <COUNT op0=15/>
+
+@bytes = internal global [7 x i8] c"abcdefg"
+; CHECK: @bytes = internal global [7 x i8] c"abcdefg"
+; BC-NEXT: <VAR op0=0 op1=0/>
+; BC-NEXT: <DATA op0=97 op1=98 op2=99 op3=100 op4=101 op5=102 op6=103/>
+
+
+@ptr_to_ptr = internal global i32 ptrtoint (i32* @ptr to i32)
+; CHECK: @ptr_to_ptr = internal global i32 ptrtoint (i32* @ptr to i32)
+; BC-NEXT: <VAR op0=0 op1=0/>
+; BC-NEXT: <RELOC op0=5/>
+
+@ptr_to_func = internal global i32 ptrtoint (void ()* @func to i32)
+; CHECK: @ptr_to_func = internal global i32 ptrtoint (void ()* @func to i32)
+; BC-NEXT: <VAR op0=0 op1=0/>
+; BC-NEXT: <RELOC op0=0/>
+
+@compound = internal global <{ [3 x i8], i32 }> <{ [3 x i8] c"foo", i32 ptrtoint (void ()* @func to i32) }>
+; CHECK: @compound = internal global <{ [3 x i8], i32 }> <{ [3 x i8] c"foo", i32 ptrtoint (void ()* @func to i32) }>
+; BC-NEXT: <VAR op0=0 op1=0/>
+; BC-NEXT: <COMPOUND op0=2/>
+; BC-NEXT: <DATA op0=102 op1=111 op2=111/>
+; BC-NEXT: <RELOC op0=0/>
+
+@ptr = internal global i32 ptrtoint ([7 x i8]* @bytes to i32)
+; CHECK: @ptr = internal global i32 ptrtoint ([7 x i8]* @bytes to i32)
+; BC-NEXT: <VAR op0=0 op1=0/>
+; BC-NEXT: <RELOC op0=1/>
+
+@addend_ptr = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 1)
+; CHECK: @addend_ptr = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 1)
+; BC-NEXT: <VAR op0=0 op1=0/>
+; BC-NEXT: <RELOC op0=5 op1=1/>
+
+@addend_negative = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 -1)
+; CHECK: @addend_negative = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 -1)
+; BC-NEXT: <VAR op0=0 op1=0/>
+; BC-NEXT: <RELOC op0=5 op1=4294967295/>
+
+@addend_array1 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 1)
+; CHECK: @addend_array1 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 1)
+; BC-NEXT: <VAR op0=0 op1=0/>
+; BC-NEXT: <RELOC op0=1 op1=1/>
+
+@addend_array2 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 7)
+; CHECK: @addend_array2 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 7)
+; BC-NEXT: <VAR op0=0 op1=0/>
+; BC-NEXT: <RELOC op0=1 op1=7/>
+
+@addend_array3 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 9)
+; CHECK: @addend_array3 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 9)
+; BC-NEXT: <VAR op0=0 op1=0/>
+; BC-NEXT: <RELOC op0=1 op1=9/>
+
+@addend_struct1 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 1)
+; CHECK: @addend_struct1 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 1)
+; BC-NEXT: <VAR op0=0 op1=0/>
+; BC-NEXT: <RELOC op0=4 op1=1/>
+
+@addend_struct2 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 4)
+; CHECK: @addend_struct2 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 4)
+; BC-NEXT: <VAR op0=0 op1=0/>
+; BC-NEXT: <RELOC op0=4 op1=4/>
+
+@ptr_to_func_align = internal global i32 ptrtoint (void ()* @func to i32), align 8
+; CHECK: @ptr_to_func_align = internal global i32 ptrtoint (void ()* @func to i32), align 8
+; BC-NEXT: <VAR op0=4 op1=0/>
+; BC-NEXT: <RELOC op0=0/>
+
+@char = internal constant [1 x i8] c"0"
+; CHECK: @char = internal constant [1 x i8] c"0"
+; BC-NEXT: <VAR op0=0 op1=1/>
+; BC-NEXT: <DATA op0=48/>
+
+@short = internal constant [2 x i8] zeroinitializer
+; CHECK: @short = internal constant [2 x i8] zeroinitializer
+; BC-NEXT:  <VAR op0=0 op1=1/>
+; BC-NEXT:  <ZEROFILL op0=2/>
+
+; BC-NEXT: </GLOBALVAR_BLOCK>
+
+define void @func() {
+  ret void
+}
+
diff --git a/test/NaCl/Bitcode/implicit-datalayout.ll b/test/NaCl/Bitcode/implicit-datalayout.ll
new file mode 100644
index 000000000000..5a957dffec2a
--- /dev/null
+++ b/test/NaCl/Bitcode/implicit-datalayout.ll
@@ -0,0 +1,9 @@
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-thaw - | llvm-dis - | FileCheck %s
+
+; The "datalayout" field is considered to be implicit in the pexe.  It
+; is not stored in the pexe; the reader adds it implicitly.
+;
+; The most important parts of the datalayout for PNaCl are the pointer
+; size and the endianness ("e" for little endian).
+
+; CHECK: target datalayout = "e{{.*}}p:32:32:32{{.*}}"
diff --git a/test/NaCl/Bitcode/intrinsic-pointer-args.ll b/test/NaCl/Bitcode/intrinsic-pointer-args.ll
new file mode 100644
index 000000000000..ca779fddc4c4
--- /dev/null
+++ b/test/NaCl/Bitcode/intrinsic-pointer-args.ll
@@ -0,0 +1,55 @@
+; Test that intrinsic declarations are read back correctly.
+
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-thaw \
+; RUN:              | llvm-dis - | FileCheck %s -check-prefix=TD
+
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+
+declare i8* @llvm.nacl.read.tp()
+declare void @llvm.nacl.longjmp(i8*, i32)
+declare void @llvm.nacl.setjmp(i8*)
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1)
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1)
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
+
+declare i32 @llvm.nacl.atomic.load.i32(i32*, i32)
+declare i64 @llvm.nacl.atomic.load.i64(i64*, i32)
+
+declare i32 @llvm.nacl.atomic.cmpxchg.i32(i32*, i32, i32, i32, i32)
+declare i64 @llvm.nacl.atomic.cmpxchg.i64(i64*, i64, i64, i32, i32)
+
+declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32)
+declare void @llvm.nacl.atomic.store.i64(i64, i64*, i32)
+
+declare i32 @llvm.nacl.atomic.rmw.i32(i32, i32*, i32, i32)
+declare i64 @llvm.nacl.atomic.rmw.i64(i32, i64*, i64, i32)
+
+declare i1 @llvm.nacl.atomic.is.lock.free(i32, i8*)
+
+
+; TD: declare i8* @llvm.stacksave()
+; TD: declare void @llvm.stackrestore(i8*)
+
+; TD: declare i8* @llvm.nacl.read.tp()
+; TD: declare void @llvm.nacl.longjmp(i8*, i32)
+; TD: declare void @llvm.nacl.setjmp(i8*)
+
+; TD: declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
+; TD: declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
+; TD: declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
+
+; TD: declare i32 @llvm.nacl.atomic.load.i32(i32*, i32)
+; TD: declare i64 @llvm.nacl.atomic.load.i64(i64*, i32)
+
+; TD: declare i32 @llvm.nacl.atomic.cmpxchg.i32(i32*, i32, i32, i32, i32)
+; TD: declare i64 @llvm.nacl.atomic.cmpxchg.i64(i64*, i64, i64, i32, i32)
+
+; TD: declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32)
+; TD: declare void @llvm.nacl.atomic.store.i64(i64, i64*, i32)
+
+; TD: declare i32 @llvm.nacl.atomic.rmw.i32(i32, i32*, i32, i32)
+; TD: declare i64 @llvm.nacl.atomic.rmw.i64(i32, i64*, i64, i32)
+
+; TD: declare i1 @llvm.nacl.atomic.is.lock.free(i32, i8*)
diff --git a/test/NaCl/Bitcode/inttoptr-elide.ll b/test/NaCl/Bitcode/inttoptr-elide.ll
new file mode 100644
index 000000000000..9d683cf20251
--- /dev/null
+++ b/test/NaCl/Bitcode/inttoptr-elide.ll
@@ -0,0 +1,150 @@
+; Test how we handle eliding inttoptr instructions.
+
+; RUN: llvm-as < %s | pnacl-freeze \
+; RUN:              | pnacl-bcanalyzer -dump-records \
+; RUN:              | FileCheck %s -check-prefix=PF2
+
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-thaw \
+; RUN:              | llvm-dis - | FileCheck %s -check-prefix=TD2
+
+; ------------------------------------------------------
+
+; Test that we elide the simple case of inttoptr of a load.
+define void @SimpleLoad(i32 %i) {
+  %1 = inttoptr i32 %i to i32*
+  %2 = load i32* %1, align 4
+  ret void
+}
+
+; TD2:      define void @SimpleLoad(i32 %i) {
+; TD2-NEXT:   %1 = inttoptr i32 %i to i32*
+; TD2-NEXT:   %2 = load i32* %1, align 4
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:       <FUNCTION_BLOCK>
+; PF2-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:    <INST_LOAD op0=1 op1=3 op2=0/>
+; PF2-NEXT:    <INST_RET/>
+; PF2:       </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Test that we can handle multiple inttoptr of loads.
+define i32 @TwoLoads(i32 %i) {
+  %1 = inttoptr i32 %i to i32*
+  %2 = load i32* %1, align 4
+  %3 = inttoptr i32 %i to i32*
+  %4 = load i32* %3, align 4
+  %5 = add i32 %2, %4
+  ret i32 %5
+}
+
+; TD2:      define i32 @TwoLoads(i32 %i) {
+; TD2-NEXT:   %1 = inttoptr i32 %i to i32*
+; TD2-NEXT:   %2 = load i32* %1, align 4
+; TD2-NEXT:   %3 = load i32* %1, align 4
+; TD2-NEXT:   %4 = add i32 %2, %3
+; TD2-NEXT:   ret i32 %4
+; TD2-NEXT: }
+
+; PF2:       <FUNCTION_BLOCK>
+; PF2-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:    <INST_LOAD op0=1 op1=3 op2=0/>
+; PF2-NEXT:    <INST_LOAD op0=2 op1=3 op2=0/>
+; PF2-NEXT:    <INST_BINOP op0=2 op1=1 op2=0/>
+; PF2-NEXT:    <INST_RET op0=1/>
+; PF2:      </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Test how we handle inttoptrs, if optimized in the input file. This
+; case tests within a single block.
+define i32 @TwoLoadOptOneBlock(i32 %i) {
+  %1 = inttoptr i32 %i to i32*
+  %2 = load i32* %1, align 4
+  %3 = load i32* %1, align 4
+  %4 = add i32 %2, %3
+  ret i32 %4
+}
+
+; TD2:      define i32 @TwoLoadOptOneBlock(i32 %i) {
+; TD2-NEXT:   %1 = inttoptr i32 %i to i32*
+; TD2-NEXT:   %2 = load i32* %1, align 4
+; TD2-NEXT:   %3 = load i32* %1, align 4
+; TD2-NEXT:   %4 = add i32 %2, %3
+; TD2-NEXT:   ret i32 %4
+; TD2-NEXT: }
+
+; PF2:       <FUNCTION_BLOCK>
+; PF2-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:    <INST_LOAD op0=1 op1=3 op2=0/>
+; PF2-NEXT:    <INST_LOAD op0=2 op1=3 op2=0/>
+; PF2-NEXT:    <INST_BINOP op0=2 op1=1 op2=0/>
+; PF2-NEXT:    <INST_RET op0=1/>
+; PF2:       </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Test how we handle inttoptrs if optimized in the input file.  This
+; case tests accross blocks.
+define i32 @TwoLoadOptTwoBlocks(i32 %i) {
+  %1 = inttoptr i32 %i to i32*
+  %2 = load i32* %1, align 4
+  %3 = load i32* %1, align 4
+  %4 = add i32 %2, %3
+  br label %BB
+
+BB:
+  %5 = load i32* %1, align 4
+  %6 = load i32* %1, align 4
+  %7 = add i32 %5, %6
+  ret i32 %7
+}
+
+; TD2:      define i32 @TwoLoadOptTwoBlocks(i32 %i) {
+; TD2-NEXT:   %1 = inttoptr i32 %i to i32*
+; TD2-NEXT:   %2 = load i32* %1, align 4
+; TD2-NEXT:   %3 = load i32* %1, align 4
+; TD2-NEXT:   %4 = add i32 %2, %3
+; TD2-NEXT:   br label %BB
+; TD2:      BB:
+; TD2-NEXT:   %5 = inttoptr i32 %i to i32*
+; TD2-NEXT:   %6 = load i32* %5, align 4
+; TD2-NEXT:   %7 = load i32* %5, align 4
+; TD2-NEXT:   %8 = add i32 %6, %7
+; TD2-NEXT:   ret i32 %8
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2-NEXT:     <DECLAREBLOCKS op0=2/>
+; PF2-NEXT:     <INST_LOAD op0=1 op1=3 op2=0/>
+; PF2-NEXT:     <INST_LOAD op0=2 op1=3 op2=0/>
+; PF2-NEXT:     <INST_BINOP op0=2 op1=1 op2=0/>
+; PF2-NEXT:     <INST_BR op0=1/>
+; PF2-NEXT:     <INST_LOAD op0=4 op1=3 op2=0/>
+; PF2-NEXT:     <INST_LOAD op0=5 op1=3 op2=0/>
+; PF2-NEXT:     <INST_BINOP op0=2 op1=1 op2=0/>
+; PF2-NEXT:     <INST_RET op0=1/>
+; PF2:        </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Test that we elide the simple case of inttoptr for a store.
+define void @SimpleStore(i32 %i) {
+  %1 = inttoptr i32 %i to i32*
+  store i32 %i, i32* %1, align 4
+  ret void
+}
+
+; TD2:      define void @SimpleStore(i32 %i) {
+; TD2-NEXT:   %1 = inttoptr i32 %i to i32*
+; TD2-NEXT:   store i32 %i, i32* %1, align 4
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:      <FUNCTION_BLOCK>
+; PF2-NEXT:   <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:   <INST_STORE op0=1 op1=1 op2=3/>
+; PF2-NEXT:   <INST_RET/>
+; PF2:      </FUNCTION_BLOCK>
diff --git a/test/NaCl/Bitcode/inttoptr-of-ptrtoint-elide.ll b/test/NaCl/Bitcode/inttoptr-of-ptrtoint-elide.ll
new file mode 100644
index 000000000000..1d88ca5cd96c
--- /dev/null
+++ b/test/NaCl/Bitcode/inttoptr-of-ptrtoint-elide.ll
@@ -0,0 +1,32 @@
+; Test that the writer elides an inttoptr of a ptrtoint.
+
+; RUN: llvm-as < %s | pnacl-freeze \
+; RUN:              | pnacl-bcanalyzer -dump-records \
+; RUN:              | FileCheck %s -check-prefix=PF2
+
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-thaw \
+; RUN:              | llvm-dis - | FileCheck %s -check-prefix=TD2
+
+
+@bytes = internal global [4 x i8] c"abcd"
+
+define void @inttoptr_of_ptrtoint() {
+  ; These two instructions are usually replaced with an equivalent
+  ; bitcast, but either sequence is allowed by the PNaCl ABI verifier.
+  %1 = ptrtoint [4 x i8]* @bytes to i32
+  %2 = inttoptr i32 %1 to i8*
+  load i8* %2
+  ret void
+}
+
+; TD2:      define void @inttoptr_of_ptrtoint() {
+; TD2-NEXT:   %1 = bitcast [4 x i8]* @bytes to i8*
+; TD2-NEXT:   %2 = load i8* %1
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:       <FUNCTION_BLOCK>
+; PF2-NEXT:    <DECLAREBLOCKS op0=1/>
+; PF2-NEXT:    <INST_LOAD {{.*}}/>
+; PF2-NEXT:    <INST_RET/>
+; PF2:       </FUNCTION_BLOCK>
diff --git a/test/NaCl/Bitcode/lit.local.cfg b/test/NaCl/Bitcode/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/NaCl/Bitcode/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/NaCl/Bitcode/no-structs.ll b/test/NaCl/Bitcode/no-structs.ll
new file mode 100644
index 000000000000..289cc119fb6d
--- /dev/null
+++ b/test/NaCl/Bitcode/no-structs.ll
@@ -0,0 +1,32 @@
+; Tests that even though global variables can define structured types,
+; they types are not put into the bitcode file.
+
+; RUN: llvm-as < %s | pnacl-freeze \
+; RUN:              | pnacl-bcanalyzer -dump-records \
+; RUN:              | FileCheck %s -check-prefix=PF2
+
+declare void @func()
+
+@compound = internal global <{ [4 x i8], i32 }>
+    <{ [4 x i8] c"home", i32 ptrtoint (void ()* @func to i32) }>
+
+define void @CheckBitcastGlobal() {
+  %1 = bitcast <{ [4 x i8], i32}>* @compound to i32*
+  %2 = load i32* %1, align 4
+  ret void
+}
+
+define void @CheckPtrToIntGlobal() {
+  %1 = ptrtoint <{ [4 x i8], i32 }>* @compound to i32
+  %2 = add i32 %1, 0
+  ret void
+}
+
+; Note that it doesn't define a struct type.
+
+; PF2:      <TYPE_BLOCK_ID>
+; PF2-NEXT:   <NUMENTRY op0=3/>
+; PF2-NEXT:   <INTEGER op0=32/>
+; PF2-NEXT:   <VOID/>
+; PF2-NEXT:   <FUNCTION op0=0 op1=1/>
+; PF2-NEXT: </TYPE_BLOCK_ID>
diff --git a/test/NaCl/Bitcode/null.ll b/test/NaCl/Bitcode/null.ll
new file mode 100644
index 000000000000..866c2017bfad
--- /dev/null
+++ b/test/NaCl/Bitcode/null.ll
@@ -0,0 +1,82 @@
+; Test that we no longer generate NULL for numeric constants.
+
+; RUN: llvm-as < %s | pnacl-freeze  \
+; RUN:              | pnacl-bcanalyzer -dump-records \
+; RUN:              | FileCheck %s -check-prefix=PF
+
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-thaw \
+; RUN:              | llvm-dis - | FileCheck %s -check-prefix=TD
+
+; ------------------------------------------------------
+
+define void @TestIntegers() {
+  %1 = and i1 true, false
+  %2 = add i8 1, 0
+  %3 = add i16 1, 0
+  %4 = add i32 1, 0
+  %5 = add i64 1, 0
+  ret void
+}
+
+; TD:      define void @TestIntegers() {
+; TD-NEXT:   %1 = and i1 true, false
+; TD-NEXT:   %2 = add i8 1, 0
+; TD-NEXT:   %3 = add i16 1, 0
+; TD-NEXT:   %4 = add i32 1, 0
+; TD-NEXT:   %5 = add i64 1, 0
+; TD-NEXT:   ret void
+; TD-NEXT: }
+
+; PF:      <FUNCTION_BLOCK>
+; PF-NEXT:   <DECLAREBLOCKS op0=1/>
+; PF-NEXT:   <CONSTANTS_BLOCK>
+; PF-NEXT:     <SETTYPE op0=1/>
+; PF-NEXT:     <INTEGER op0=3/>
+; PF-NEXT:     <INTEGER op0=0/>
+; PF-NEXT:     <SETTYPE op0=2/>
+; PF-NEXT:     <INTEGER op0=2/>
+; PF-NEXT:     <INTEGER op0=0/>
+; PF-NEXT:     <SETTYPE op0=3/>
+; PF-NEXT:     <INTEGER op0=2/>
+; PF-NEXT:     <INTEGER op0=0/>
+; PF-NEXT:     <SETTYPE op0=4/>
+; PF-NEXT:     <INTEGER op0=2/>
+; PF-NEXT:     <INTEGER op0=0/>
+; PF-NEXT:     <SETTYPE op0=5/>
+; PF-NEXT:     <INTEGER op0=2/>
+; PF-NEXT:     <INTEGER op0=0/>
+; PF-NEXT:   </CONSTANTS_BLOCK>
+; PF-NEXT:   <INST_BINOP op0=10 op1=9 op2=10/>
+; PF-NEXT:   <INST_BINOP op0=9 op1=8 op2=0/>
+; PF-NEXT:   <INST_BINOP op0=8 op1=7 op2=0/>
+; PF-NEXT:   <INST_BINOP op0=7 op1=6 op2=0/>
+; PF-NEXT:   <INST_BINOP op0=6 op1=5 op2=0/>
+; PF-NEXT:   <INST_RET/>
+; PF-NEXT: </FUNCTION_BLOCK>
+
+define void @TestFloats() {
+  %1 = fadd float 1.0, 0.0
+  %2 = fadd double 1.0, 0.0
+  ret void
+}
+
+; TD:      define void @TestFloats() {
+; TD-NEXT:   %1 = fadd float 1.000000e+00, 0.000000e+00
+; TD-NEXT:   %2 = fadd double 1.000000e+00, 0.000000e+00
+; TD-NEXT:   ret void
+; TD-NEXT: }
+
+; PF:      <FUNCTION_BLOCK>
+; PF-NEXT:   <DECLAREBLOCKS op0=1/>
+; PF-NEXT:   <CONSTANTS_BLOCK>
+; PF-NEXT:     <SETTYPE op0=6/>
+; PF-NEXT:     <FLOAT op0=1065353216/>
+; PF-NEXT:     <FLOAT op0=0/>
+; PF-NEXT:     <SETTYPE op0=7/>
+; PF-NEXT:     <FLOAT op0=4607182418800017408/>
+; PF-NEXT:     <FLOAT op0=0/>
+; PF-NEXT:   </CONSTANTS_BLOCK>
+; PF-NEXT:   <INST_BINOP op0=4 op1=3 op2=0/>
+; PF-NEXT:   <INST_BINOP op0=3 op1=2 op2=0/>
+; PF-NEXT:   <INST_RET/>
+; PF-NEXT: </FUNCTION_BLOCK>
diff --git a/test/NaCl/Bitcode/ptrtoint-elide.ll b/test/NaCl/Bitcode/ptrtoint-elide.ll
new file mode 100644
index 000000000000..bf9ac25bcd56
--- /dev/null
+++ b/test/NaCl/Bitcode/ptrtoint-elide.ll
@@ -0,0 +1,810 @@
+; Test how we handle eliding ptrtoint instructions.
+
+; RUN: llvm-as < %s | pnacl-freeze \
+; RUN:              | pnacl-bcanalyzer -dump-records \
+; RUN:              | FileCheck %s -check-prefix=PF2
+
+; RUN: llvm-as < %s | pnacl-freeze | pnacl-thaw \
+; RUN:              | llvm-dis - | FileCheck %s -check-prefix=TD2
+
+; ------------------------------------------------------
+
+declare i32 @bar(i32)
+
+@bytes = internal global [4 x i8] c"abcd"
+
+; ------------------------------------------------------
+
+; Show simple case where we use ptrtoint
+define void @AllocCastSimple() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint i8* %1 to i32
+  %3 = bitcast [4 x i8]* @bytes to i32*
+  store i32 %2, i32* %3, align 1
+  ret void
+}
+
+; TD2:      define void @AllocCastSimple() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %3 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT:   store i32 %2, i32* %3, align 1
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:     <INST_STORE op0=3 op1=1 op2=1/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Same as above, but with the cast order changed. Shows
+; that we always inject casts back in a fixed order. Hence,
+; the casts will be reversed.
+define void @AllocCastSimpleReversed() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = bitcast [4 x i8]* @bytes to i32*
+  %3 = ptrtoint i8* %1 to i32
+  store i32 %3, i32* %2, align 1
+  ret void
+}
+
+; TD2:      define void @AllocCastSimpleReversed() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %3 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT:   store i32 %2, i32* %3, align 1
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:     <INST_STORE op0=3 op1=1 op2=1/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show case where we delete ptrtoint because they aren't used.
+define void @AllocCastDelete() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint i8* %1 to i32
+  %3 = alloca i8, i32 4, align 8
+  %4 = ptrtoint i8* %3 to i32
+  ret void
+}
+
+; TD2:      define void @AllocCastDelete() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = alloca i8, i32 4, align 8
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:     <INST_ALLOCA op0=2 op1=4/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show case where we have optimized the ptrtoint (and bitcast) into a
+; single instruction, and will only be inserted before the first use
+; in the block.
+define void @AllocCastOpt() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = bitcast [4 x i8]* @bytes to i32*
+  %3 = ptrtoint i8* %1 to i32
+  store i32 %3, i32* %2, align 1
+  store i32 %3, i32* %2, align 1
+  ret void
+}
+
+; TD2:      define void @AllocCastOpt() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %3 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT:   store i32 %2, i32* %3, align 1
+; TD2-NEXT:   store i32 %2, i32* %3, align 1
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:     <INST_STORE op0=3 op1=1 op2=1/>
+; PF2-NEXT:     <INST_STORE op0=3 op1=1 op2=1/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show case where ptrtoint (and bitcast) for store are not immediately
+; before the store, the casts will be moved to the store.
+define void @AllocCastMove(i32) {
+  %2 = alloca i8, i32 4, align 8
+  %3 = bitcast [4 x i8]* @bytes to i32*
+  %4 = ptrtoint i8* %2 to i32
+  %5 = add i32 %0, 1
+  store i32 %4, i32* %3, align 1
+  ret void
+}
+
+; TD2:      define void @AllocCastMove(i32) {
+; TD2-NEXT:   %2 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %3 = add i32 %0, 1
+; TD2-NEXT:   %4 = ptrtoint i8* %2 to i32
+; TD2-NEXT:   %5 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT:   store i32 %4, i32* %5, align 1
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=2 op1=4/>
+; PF2-NEXT:     <INST_BINOP op0=4 op1=2 op2=0/>
+; PF2-NEXT:     <INST_STORE op0=6 op1=2 op2=1/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show case where ptrtoint on global variable is merged in a store, and
+; order is kept.
+define void @StoreGlobal() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint [4 x i8]* @bytes to i32
+  %3 = bitcast i8* %1 to i32*
+  store i32 %2, i32* %3, align 1
+  ret void
+}
+
+; TD2:      define void @StoreGlobal() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %3 = bitcast i8* %1 to i32*
+; TD2-NEXT:   store i32 %2, i32* %3, align 1
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:     <INST_STORE op0=1 op1=3 op2=1/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Same as above, but with cast order reversed.
+define void @StoreGlobalCastsReversed() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = bitcast i8* %1 to i32*
+  %3 = ptrtoint [4 x i8]* @bytes to i32
+  store i32 %3, i32* %2, align 1
+  ret void
+}
+
+; TD2:      define void @StoreGlobalCastsReversed() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %3 = bitcast i8* %1 to i32*
+; TD2-NEXT:   store i32 %2, i32* %3, align 1
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:     <INST_STORE op0=1 op1=3 op2=1/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show that we will move the ptrtoint of a global to the use.
+define i32 @StoreGlobalMovePtr2Int() {
+  %1 = ptrtoint [4 x i8]* @bytes to i32
+  %2 = alloca i8, i32 4, align 8
+  %3 = bitcast i8* %2 to i32*
+  store i32 %1, i32* %3, align 1
+  ret i32 0
+}
+
+; TD2:      define i32 @StoreGlobalMovePtr2Int() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %3 = bitcast i8* %1 to i32*
+; TD2-NEXT:   store i32 %2, i32* %3, align 1
+; TD2-NEXT:   ret i32 0
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=2 op1=4/>
+; PF2-NEXT:     <INST_STORE op0=1 op1=4 op2=1/>
+; PF2-NEXT:     <INST_RET op0=2/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show that we handle add instructions with pointer casts.
+define void @CastAddAlloca() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint i8* %1 to i32
+
+  ; Simple add.
+  %3 = add i32 1, 2
+
+  ; Cast first.
+  %4 = add i32 %2, 2
+
+  ; Cast second.
+  %5 = add i32 1, %2
+
+  ; Cast both.
+  %6 = add i32 %2, %2
+
+  ret void
+}
+
+; TD2:      define void @CastAddAlloca() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = add i32 1, 2
+; TD2-NEXT:   %3 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %4 = add i32 %3, 2
+; TD2-NEXT:   %5 = add i32 1, %3
+; TD2-NEXT:   %6 = add i32 %3, %3
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:     <INST_BINOP op0=4 op1=3 op2=0/>
+; PF2-NEXT:     <INST_BINOP op0=2 op1=4 op2=0/>
+; PF2-NEXT:     <INST_BINOP op0=6 op1=3 op2=0/>
+; PF2-NEXT:     <INST_BINOP op0=4 op1=4 op2=0/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show that we handle add instructions with pointer casts.
+define void @CastAddGlobal() {
+  %1 = ptrtoint [4 x i8]* @bytes to i32
+
+  ; Simple Add.
+  %2 = add i32 1, 2
+
+  ; Cast first.
+  %3 = add i32 %1, 2
+
+  ; Cast Second.
+  %4 = add i32 1, %1
+
+  ; Cast both.
+  %5 = add i32 %1, %1
+  ret void
+}
+
+; TD2:      define void @CastAddGlobal() {
+; TD2-NEXT:   %1 = add i32 1, 2
+; TD2-NEXT:   %2 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %3 = add i32 %2, 2
+; TD2-NEXT:   %4 = add i32 1, %2
+; TD2-NEXT:   %5 = add i32 %2, %2
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_BINOP op0=2 op1=1 op2=0/>
+; PF2-NEXT:     <INST_BINOP op0=4 op1=2 op2=0/>
+; PF2-NEXT:     <INST_BINOP op0=4 op1=5 op2=0/>
+; PF2-NEXT:     <INST_BINOP op0=6 op1=6 op2=0/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show that we can handle pointer conversions for other scalar binary operators.
+define void @CastBinop() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint i8* %1 to i32
+  %3 = ptrtoint [4 x i8]* @bytes to i32
+  %4 = sub i32 %2, %3
+  %5 = mul i32 %2, %3
+  %6 = udiv i32 %2, %3
+  %7 = urem i32 %2, %3
+  %8 = srem i32 %2, %3
+  %9 = shl i32 %2, %3
+  %10 = lshr i32 %2, %3
+  %11 = ashr i32 %2, %3
+  %12 = and i32 %2, %3
+  %13 = or i32 %2, %3
+  %14 = xor i32 %2, %3
+  ret void
+}
+
+; TD2:      define void @CastBinop() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %3 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %4 = sub i32 %2, %3
+; TD2-NEXT:   %5 = mul i32 %2, %3
+; TD2-NEXT:   %6 = udiv i32 %2, %3
+; TD2-NEXT:   %7 = urem i32 %2, %3
+; TD2-NEXT:   %8 = srem i32 %2, %3
+; TD2-NEXT:   %9 = shl i32 %2, %3
+; TD2-NEXT:   %10 = lshr i32 %2, %3
+; TD2-NEXT:   %11 = ashr i32 %2, %3
+; TD2-NEXT:   %12 = and i32 %2, %3
+; TD2-NEXT:   %13 = or i32 %2, %3
+; TD2-NEXT:   %14 = xor i32 %2, %3
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:     <INST_BINOP op0=1 op1=3 op2=1/>
+; PF2-NEXT:     <INST_BINOP op0=2 op1=4 op2=2/>
+; PF2-NEXT:     <INST_BINOP op0=3 op1=5 op2=3/>
+; PF2-NEXT:     <INST_BINOP op0=4 op1=6 op2=5/>
+; PF2-NEXT:     <INST_BINOP op0=5 op1=7 op2=6/>
+; PF2-NEXT:     <INST_BINOP op0=6 op1=8 op2=7/>
+; PF2-NEXT:     <INST_BINOP op0=7 op1=9 op2=8/>
+; PF2-NEXT:     <INST_BINOP op0=8 op1=10 op2=9/>
+; PF2-NEXT:     <INST_BINOP op0=9 op1=11 op2=10/>
+; PF2-NEXT:     <INST_BINOP op0=10 op1=12 op2=11/>
+; PF2-NEXT:     <INST_BINOP op0=11 op1=13 op2=12/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show that we handle (non-special) bitcasts by converting pointer
+; casts to integer.
+define void @TestCasts() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint i8* %1 to i32
+
+  %3 = trunc i32 257 to i8
+  %4 = trunc i32 %2 to i8
+
+  %5 = zext i32 257 to i64
+  %6 = zext i32 %2 to i64
+
+  %7 = sext i32 -1 to i64
+  %8 = sext i32 %2 to i64
+
+  %9 = uitofp i32 1 to float
+  %10 = uitofp i32 %2 to float
+
+  %11 = sitofp i32 -1 to float
+  %12 = sitofp i32 %2 to float
+  ret void
+}
+
+; TD2:      define void @TestCasts() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = trunc i32 257 to i8
+; TD2-NEXT:   %3 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %4 = trunc i32 %3 to i8
+; TD2-NEXT:   %5 = zext i32 257 to i64
+; TD2-NEXT:   %6 = zext i32 %3 to i64
+; TD2-NEXT:   %7 = sext i32 -1 to i64
+; TD2-NEXT:   %8 = sext i32 %3 to i64
+; TD2-NEXT:   %9 = uitofp i32 1 to float
+; TD2-NEXT:   %10 = uitofp i32 %3 to float
+; TD2-NEXT:   %11 = sitofp i32 -1 to float
+; TD2-NEXT:   %12 = sitofp i32 %3 to float
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=2 op1=4/>
+; PF2-NEXT:     <INST_CAST op0=5 op1={{.*}} op2=0/>
+; PF2-NEXT:     <INST_CAST op0=2 op1={{.*}} op2=0/>
+; PF2-NEXT:     <INST_CAST op0=7 op1={{.*}} op2=1/>
+; PF2-NEXT:     <INST_CAST op0=4 op1={{.*}} op2=1/>
+; PF2-NEXT:     <INST_CAST op0=8 op1={{.*}} op2=2/>
+; PF2-NEXT:     <INST_CAST op0=6 op1={{.*}} op2=2/>
+; PF2-NEXT:     <INST_CAST op0=8 op1={{.*}} op2=5/>
+; PF2-NEXT:     <INST_CAST op0=8 op1={{.*}} op2=5/>
+; PF2-NEXT:     <INST_CAST op0=12 op1={{.*}} op2=6/>
+; PF2-NEXT:     <INST_CAST op0=10 op1={{.*}} op2=6/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show that we elide a ptrtoint cast for a call.
+define void @TestSavedPtrToInt() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint i8* %1 to i32
+  %3 = add i32 %2, 0
+  %4 = call i32 @bar(i32 %2)
+  ret void
+}
+
+; TD2:      define void @TestSavedPtrToInt() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %3 = add i32 %2, 0
+; TD2-NEXT:   %4 = call i32 @bar(i32 %2)
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=2 op1=4/>
+; PF2-NEXT:     <INST_BINOP op0=1 op1=2 op2=0/>
+; PF2-NEXT:     <INST_CALL op0=0 op1=25 op2=2/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show that we can handle pointer conversions for icmp.
+define void @CastIcmp() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint i8* %1 to i32
+  %3 = ptrtoint [4 x i8]* @bytes to i32
+  %4 = icmp eq i32 1, 2
+  %5 = icmp eq i32 %2, 2
+  %6 = icmp eq i32 1, %3
+  %7 = icmp eq i32 %2, %3
+  %8 = icmp eq i32 %3, %2
+  ret void
+}
+
+; TD2:      define void @CastIcmp() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = icmp eq i32 1, 2
+; TD2-NEXT:   %3 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %4 = icmp eq i32 %3, 2
+; TD2-NEXT:   %5 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %6 = icmp eq i32 1, %5
+; TD2-NEXT:   %7 = icmp eq i32 %3, %5
+; TD2-NEXT:   %8 = icmp eq i32 %5, %3
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:     <INST_CMP2 op0=4 op1=3 op2=32/>
+; PF2-NEXT:     <INST_CMP2 op0=2 op1=4 op2=32/>
+; PF2-NEXT:     <INST_CMP2 op0=6 op1=7 op2=32/>
+; PF2-NEXT:     <INST_CMP2 op0=4 op1=8 op2=32/>
+; PF2-NEXT:     <INST_CMP2 op0=9 op1=5 op2=32/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show that we can handle pointer conversions for Select.
+define void @CastSelect() {
+  %1 = alloca i8, i32 4, align 8
+  %2 = ptrtoint i8* %1 to i32
+  %3 = ptrtoint [4 x i8]* @bytes to i32
+  %4 = select i1 true, i32 1, i32 2
+  %5 = select i1 true, i32 %2, i32 2
+  %6 = select i1 true, i32 1, i32 %3
+  %7 = select i1 true, i32 %2, i32 %3
+  %8 = select i1 true, i32 %3, i32 %2
+  ret void
+}
+
+; TD2:      define void @CastSelect() {
+; TD2-NEXT:   %1 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %2 = select i1 true, i32 1, i32 2
+; TD2-NEXT:   %3 = ptrtoint i8* %1 to i32
+; TD2-NEXT:   %4 = select i1 true, i32 %3, i32 2
+; TD2-NEXT:   %5 = ptrtoint [4 x i8]* @bytes to i32
+; TD2-NEXT:   %6 = select i1 true, i32 1, i32 %5
+; TD2-NEXT:   %7 = select i1 true, i32 %3, i32 %5
+; TD2-NEXT:   %8 = select i1 true, i32 %5, i32 %3
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=2 op1=4/>
+; PF2-NEXT:     <INST_VSELECT op0=5 op1=4 op2=2/>
+; PF2-NEXT:     <INST_VSELECT op0=2 op1=5 op2=3/>
+; PF2-NEXT:     <INST_VSELECT op0=7 op1=8 op2=4/>
+; PF2-NEXT:     <INST_VSELECT op0=4 op1=9 op2=5/>
+; PF2-NEXT:     <INST_VSELECT op0=10 op1=5 op2=6/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:   </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show that if a phi node refers to a pointer cast, we add
+; them at the end of the incoming block.
+define void @PhiBackwardRefs(i1) {
+  %2 = alloca i8, i32 4, align 8
+  %3 = bitcast i8* %2 to i32*
+  %4 = alloca i8, i32 4, align 8
+  %5 = ptrtoint i8* %4 to i32
+  br i1 %0, label %true, label %false
+
+true:
+  %6 = load i32* %3
+  br label %merge
+
+false:
+  %7 = load i32* %3
+  br label %merge
+
+merge:
+  %8 = phi i32 [%5, %true], [%5, %false]
+  %9 = phi i32 [%6, %true], [%7, %false]
+  ret void
+}
+
+; TD2:      define void @PhiBackwardRefs(i1) {
+; TD2-NEXT:   %2 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %3 = alloca i8, i32 4, align 8
+; TD2-NEXT:   br i1 %0, label %true, label %false
+; TD2:      true:
+; TD2-NEXT:   %4 = bitcast i8* %2 to i32*
+; TD2-NEXT:   %5 = load i32* %4
+; TD2-NEXT:   %6 = ptrtoint i8* %3 to i32
+; TD2-NEXT:   br label %merge
+; TD2:      false:
+; TD2-NEXT:   %7 = bitcast i8* %2 to i32*
+; TD2-NEXT:   %8 = load i32* %7
+; TD2-NEXT:   %9 = ptrtoint i8* %3 to i32
+; TD2-NEXT:   br label %merge
+; TD2:      merge:
+; TD2-NEXT:   %10 = phi i32 [ %6, %true ], [ %9, %false ]
+; TD2-NEXT:   %11 = phi i32 [ %5, %true ], [ %8, %false ]
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:     <INST_ALLOCA op0=2 op1=4/>
+; PF2-NEXT:     <INST_BR op0=1 op1=2 op2=4/>
+; PF2-NEXT:     <INST_LOAD op0=2 op1=0 op2=0/>
+; PF2-NEXT:     <INST_BR op0=3/>
+; PF2-NEXT:     <INST_LOAD op0=3 op1=0 op2=0/>
+; PF2-NEXT:     <INST_BR op0=3/>
+; PF2-NEXT:     <INST_PHI op0=0 op1=6 op2=1 op3=6 op4=2/>
+; PF2-NEXT:     <INST_PHI op0=0 op1=6 op2=1 op3=4 op4=2/>
+; PF2-NEXT:     <INST_RET/>
+; PF2:        </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Like PhiBackwardRefs except the phi nodes forward reference
+; instructions instead of backwards references.
+define void @PhiForwardRefs(i1) {
+  br label %start
+
+merge:
+  %2 = phi i32 [%9, %true], [%9, %false]
+  %3 = phi i32 [%4, %true], [%5, %false]
+  ret void
+
+true:
+  %4 = load i32* %7
+  br label %merge
+
+false:
+  %5 = load i32* %7
+  br label %merge
+
+start:
+  %6 = alloca i8, i32 4, align 8
+  %7 = bitcast i8* %6 to i32*
+  %8 = alloca i8, i32 4, align 8
+  %9 = ptrtoint i8* %8 to i32
+  br i1 %0, label %true, label %false
+}
+
+; TD2:      define void @PhiForwardRefs(i1) {
+; TD2-NEXT:   br label %start
+; TD2:      merge
+; TD2-NEXT:   %2 = phi i32 [ %11, %true ], [ %11, %false ]
+; TD2-NEXT:   %3 = phi i32 [ %5, %true ], [ %7, %false ]
+; TD2-NEXT:   ret void
+; TD2:      true:
+; TD2-NEXT:   %4 = inttoptr i32 %9 to i32*
+; TD2-NEXT:   %5 = load i32* %4
+; TD2-NEXT:   br label %merge
+; TD2:      false:
+; TD2-NEXT:   %6 = inttoptr i32 %9 to i32*
+; TD2-NEXT:   %7 = load i32* %6
+; TD2-NEXT:   br label %merge
+; TD2:      start:
+; TD2-NEXT:   %8 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %9 = ptrtoint i8* %8 to i32
+; TD2-NEXT:   %10 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %11 = ptrtoint i8* %10 to i32
+; TD2-NEXT:   br i1 %0, label %true, label %false
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_BR op0=4/>
+; PF2-NEXT:     <FORWARDTYPEREF op0=28 op1=0/>
+; PF2-NEXT:     <INST_PHI op0=0 op1=11 op2=2 op3=11 op4=3/>
+; PF2-NEXT:     <FORWARDTYPEREF op0=25 op1=0/>
+; PF2-NEXT:     <FORWARDTYPEREF op0=26 op1=0/>
+; PF2-NEXT:     <INST_PHI op0=0 op1=3 op2=2 op3=5 op4=3/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:     <FORWARDTYPEREF op0=27 op1=0/>
+; PF2-NEXT:     <INST_LOAD op0=4294967294 op1=0 op2=0/>
+; PF2-NEXT:     <INST_BR op0=1/>
+; PF2-NEXT:     <INST_LOAD op0=4294967295 op1=0 op2=0/>
+; PF2-NEXT:     <INST_BR op0=1/>
+; PF2-NEXT:     <INST_ALLOCA op0=5 op1=4/>
+; PF2-NEXT:     <INST_ALLOCA op0=6 op1=4/>
+; PF2-NEXT:     <INST_BR op0=2 op1=3 op2=8/>
+; PF2:        </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show that if a phi node incoming block already has a pointer cast,
+; we use it instead of adding one at the end of the block. In this
+; example, we reuse instruction %7 in block true for phi node %10.
+define void @PhiMergeCast(i1) {
+  %2 = alloca i8, i32 4, align 8
+  %3 = bitcast i8* %2 to i32*
+  %4 = alloca i8, i32 4, align 8
+  %5 = ptrtoint i8* %4 to i32
+  br i1 %0, label %true, label %false
+
+true:
+  %6 = load i32* %3
+  %7 = ptrtoint i8* %4 to i32
+  %8 = add i32 %6, %7
+  br label %merge
+
+false:
+  %9 = load i32* %3
+  br label %merge
+
+merge:
+  %10 = phi i32 [%5, %true], [%5, %false]
+  %11 = phi i32 [%6, %true], [%9, %false]
+  ret void
+}
+
+; TD2:      define void @PhiMergeCast(i1) {
+; TD2-NEXT:   %2 = alloca i8, i32 4, align 8
+; TD2-NEXT:   %3 = alloca i8, i32 4, align 8
+; TD2-NEXT:   br i1 %0, label %true, label %false
+; TD2:      true:
+; TD2-NEXT:   %4 = bitcast i8* %2 to i32*
+; TD2-NEXT:   %5 = load i32* %4
+; TD2-NEXT:   %6 = ptrtoint i8* %3 to i32
+; TD2-NEXT:   %7 = add i32 %5, %6
+; TD2-NEXT:   br label %merge
+; TD2:      false:
+; TD2-NEXT:   %8 = bitcast i8* %2 to i32*
+; TD2-NEXT:   %9 = load i32* %8
+; TD2-NEXT:   %10 = ptrtoint i8* %3 to i32
+; TD2-NEXT:   br label %merge
+; TD2:      merge:
+; TD2-NEXT:   %11 = phi i32 [ %6, %true ], [ %10, %false ]
+; TD2-NEXT:   %12 = phi i32 [ %5, %true ], [ %9, %false ]
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:     <INST_ALLOCA op0=2 op1=4/>
+; PF2-NEXT:     <INST_BR op0=1 op1=2 op2=4/>
+; PF2-NEXT:     <INST_LOAD op0=2 op1=0 op2=0/>
+; PF2-NEXT:     <INST_BINOP op0=1 op1=2 op2=0/>
+; PF2-NEXT:     <INST_BR op0=3/>
+; PF2-NEXT:     <INST_LOAD op0=4 op1=0 op2=0/>
+; PF2-NEXT:     <INST_BR op0=3/>
+; PF2-NEXT:     <INST_PHI op0=0 op1=8 op2=1 op3=8 op4=2/>
+; PF2-NEXT:     <INST_PHI op0=0 op1=8 op2=1 op3=4 op4=2/>
+; PF2-NEXT:     <INST_RET/>
+; PF2:        </FUNCTION_BLOCK>
+
+; ------------------------------------------------------
+
+; Show that we must introduce a cast reference for each
+; reachable block, but one is sufficient.
+define void @LongReachingCasts(i1) {
+  %2 = alloca i8, i32 4, align 8
+  %3 = ptrtoint i8* %2 to i32
+  %4 = bitcast [4 x i8]* @bytes to i32*
+  br i1 %0, label %Split1, label %Split2
+
+Split1:
+  br i1 %0, label %b1, label %b2
+
+Split2:
+  br i1 %0, label %b3, label %b4
+
+b1:
+  store i32 %3, i32* %4, align 1
+  store i32 %3, i32* %4, align 1
+  ret void
+
+b2:
+  store i32 %3, i32* %4, align 1
+  store i32 %3, i32* %4, align 1
+  ret void
+
+b3:
+  store i32 %3, i32* %4, align 1
+  store i32 %3, i32* %4, align 1
+  ret void
+
+b4:
+  store i32 %3, i32* %4, align 1
+  store i32 %3, i32* %4, align 1
+  ret void
+}
+
+; TD2:      define void @LongReachingCasts(i1) {
+; TD2-NEXT:   %2 = alloca i8, i32 4, align 8
+; TD2-NEXT:   br i1 %0, label %Split1, label %Split2
+; TD2:      Split1:
+; TD2-NEXT:   br i1 %0, label %b1, label %b2
+; TD2:      Split2:
+; TD2-NEXT:   br i1 %0, label %b3, label %b4
+; TD2:      b1:
+; TD2-NEXT:   %3 = ptrtoint i8* %2 to i32
+; TD2-NEXT:   %4 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT:   store i32 %3, i32* %4, align 1
+; TD2-NEXT:   store i32 %3, i32* %4, align 1
+; TD2-NEXT:   ret void
+; TD2:      b2:
+; TD2-NEXT:   %5 = ptrtoint i8* %2 to i32
+; TD2-NEXT:   %6 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT:   store i32 %5, i32* %6, align 1
+; TD2-NEXT:   store i32 %5, i32* %6, align 1
+; TD2-NEXT:   ret void
+; TD2:      b3:
+; TD2-NEXT:   %7 = ptrtoint i8* %2 to i32
+; TD2-NEXT:   %8 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT:   store i32 %7, i32* %8, align 1
+; TD2-NEXT:   store i32 %7, i32* %8, align 1
+; TD2-NEXT:   ret void
+; TD2:      b4:
+; TD2-NEXT:   %9 = ptrtoint i8* %2 to i32
+; TD2-NEXT:   %10 = bitcast [4 x i8]* @bytes to i32*
+; TD2-NEXT:   store i32 %9, i32* %10, align 1
+; TD2-NEXT:   store i32 %9, i32* %10, align 1
+; TD2-NEXT:   ret void
+; TD2-NEXT: }
+
+; PF2:        <FUNCTION_BLOCK>
+; PF2:          </CONSTANTS_BLOCK>
+; PF2-NEXT:     <INST_ALLOCA op0=1 op1=4/>
+; PF2-NEXT:     <INST_BR op0=1 op1=2 op2=3/>
+; PF2-NEXT:     <INST_BR op0=3 op1=4 op2=3/>
+; PF2-NEXT:     <INST_BR op0=5 op1=6 op2=3/>
+; PF2-NEXT:     <INST_STORE op0=4 op1=1 op2=1/>
+; PF2-NEXT:     <INST_STORE op0=4 op1=1 op2=1/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:     <INST_STORE op0=4 op1=1 op2=1/>
+; PF2-NEXT:     <INST_STORE op0=4 op1=1 op2=1/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:     <INST_STORE op0=4 op1=1 op2=1/>
+; PF2-NEXT:     <INST_STORE op0=4 op1=1 op2=1/>
+; PF2-NEXT:     <INST_RET/>
+; PF2-NEXT:     <INST_STORE op0=4 op1=1 op2=1/>
+; PF2-NEXT:     <INST_STORE op0=4 op1=1 op2=1/>
+; PF2-NEXT:     <INST_RET/>
+; PF2:        </FUNCTION_BLOCK>
diff --git a/test/NaCl/Bitcode/switch.ll b/test/NaCl/Bitcode/switch.ll
new file mode 100644
index 000000000000..3d5ac91281cc
--- /dev/null
+++ b/test/NaCl/Bitcode/switch.ll
@@ -0,0 +1,51 @@
+; Test that we no longer put VECTOR/ARRAY type entries, associated with
+; switch instructions, into the bitcode file.
+
+; RUN: llvm-as < %s | pnacl-freeze \
+; RUN:              | pnacl-bcanalyzer -dump-records \
+; RUN:              | FileCheck %s -check-prefix=PF2
+
+; Test case where we switch on a variable.
+define void @SwitchVariable(i32) {
+  switch i32 %0, label %l1 [
+    i32 1, label %l2
+    i32 2, label %l2
+    i32 4, label %l3
+    i32 5, label %l3
+  ]
+  br label %end
+l1:
+  br label %end
+l2:
+  br label %end
+l3:
+  br label %end
+end:
+  ret void
+}
+
+; Test case where we switch on a constant.
+define void @SwitchConstant(i32) {
+  switch i32 3, label %l1 [
+    i32 1, label %l2
+    i32 2, label %l2
+    i32 4, label %l3
+    i32 5, label %l3
+  ]
+  br label %end
+l1:
+  br label %end
+l2:
+  br label %end
+l3:
+  br label %end
+end:
+  ret void
+}
+
+; PF2:      <TYPE_BLOCK_ID>
+; PF2-NEXT:   <NUMENTRY op0=3/>
+; PF2-NEXT:   <VOID/>
+; PF2-NEXT:   <INTEGER op0=32/>
+; PF2-NEXT:   <FUNCTION op0={{.*}} op1={{.*}} op2={{.*}}/>
+; PF2-NEXT: </TYPE_BLOCK_ID>
diff --git a/test/NaCl/PNaClABI/abi-addrspace.ll b/test/NaCl/PNaClABI/abi-addrspace.ll
new file mode 100644
index 000000000000..6826a2d937b0
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-addrspace.ll
@@ -0,0 +1,17 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; XFAIL: *
+
+; This test checks that the "addrspace" pointer attribute is rejected
+; by the PNaCl ABI verifier.  The only allowed address space value is
+; 0 (the default).
+
+@var = addrspace(1) global [4 x i8] c"xxxx"
+; CHECK: Variable var has addrspace attribute (disallowed)
+
+define void @func() {
+  inttoptr i32 0 to i32 addrspace(2)*
+; CHECK: disallowed: bad result type: {{.*}} inttoptr {{.*}} addrspace
+  ret void
+}
+
+; CHECK-NOT: disallowed
diff --git a/test/NaCl/PNaClABI/abi-aliases.ll b/test/NaCl/PNaClABI/abi-aliases.ll
new file mode 100644
index 000000000000..187bb4569a18
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-aliases.ll
@@ -0,0 +1,8 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; XFAIL: *
+
+@aliased_var = internal global [1 x i8] c"x"
+; CHECK-NOT: disallowed
+
+@alias1 = alias [1 x i8]* @aliased_var
+; CHECK: Variable alias1 is an alias (disallowed)
diff --git a/test/NaCl/PNaClABI/abi-alignment.ll b/test/NaCl/PNaClABI/abi-alignment.ll
new file mode 100644
index 000000000000..b484d234bd17
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-alignment.ll
@@ -0,0 +1,98 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; XFAIL: *
+
+; Test the "align" attributes that are allowed on load and store
+; instructions.
+
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)
+
+
+define internal void @allowed_cases(i32 %ptr, float %f, double %d) {
+  %ptr.i32 = inttoptr i32 %ptr to i32*
+  load i32* %ptr.i32, align 1
+  store i32 123, i32* %ptr.i32, align 1
+
+  %ptr.float = inttoptr i32 %ptr to float*
+  load float* %ptr.float, align 1
+  load float* %ptr.float, align 4
+  store float %f, float* %ptr.float, align 1
+  store float %f, float* %ptr.float, align 4
+
+  %ptr.double = inttoptr i32 %ptr to double*
+  load double* %ptr.double, align 1
+  load double* %ptr.double, align 8
+  store double %d, double* %ptr.double, align 1
+  store double %d, double* %ptr.double, align 8
+
+  ; memcpy() et el take an alignment parameter, which is allowed to be 1.
+  %ptr.p = inttoptr i32 %ptr to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr.p, i8* %ptr.p,
+                                       i32 10, i32 1, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %ptr.p, i8* %ptr.p,
+                                        i32 10, i32 1, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %ptr.p, i8 99,
+                                  i32 10, i32 1, i1 false)
+
+  ret void
+}
+; CHECK-NOT: disallowed
+
+
+define internal void @rejected_cases(i32 %ptr, float %f, double %d, i32 %align) {
+  %ptr.i32 = inttoptr i32 %ptr to i32*
+  load i32* %ptr.i32, align 4
+  store i32 123, i32* %ptr.i32, align 4
+; CHECK: disallowed: bad alignment: {{.*}} load i32{{.*}} align 4
+; CHECK-NEXT: disallowed: bad alignment: store i32{{.*}} align 4
+
+  ; Unusual, not-very-useful alignments are rejected.
+  %ptr.float = inttoptr i32 %ptr to float*
+  load float* %ptr.float, align 2
+  load float* %ptr.float, align 8
+  store float %f, float* %ptr.float, align 2
+  store float %f, float* %ptr.float, align 8
+; CHECK-NEXT: disallowed: bad alignment: {{.*}} load float{{.*}} align 2
+; CHECK-NEXT: disallowed: bad alignment: {{.*}} load float{{.*}} align 8
+; CHECK-NEXT: disallowed: bad alignment: store float{{.*}} align 2
+; CHECK-NEXT: disallowed: bad alignment: store float{{.*}} align 8
+
+  %ptr.double = inttoptr i32 %ptr to double*
+  load double* %ptr.double, align 2
+  load double* %ptr.double, align 4
+  store double %d, double* %ptr.double, align 2
+  store double %d, double* %ptr.double, align 4
+; CHECK-NEXT: disallowed: bad alignment: {{.*}} load double{{.*}} align 2
+; CHECK-NEXT: disallowed: bad alignment: {{.*}} load double{{.*}} align 4
+; CHECK-NEXT: disallowed: bad alignment: store double{{.*}} align 2
+; CHECK-NEXT: disallowed: bad alignment: store double{{.*}} align 4
+
+  ; Non-pessimistic alignments for memcpy() et al are rejected.
+  %ptr.p = inttoptr i32 %ptr to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr.p, i8* %ptr.p,
+                                       i32 10, i32 4, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %ptr.p, i8* %ptr.p,
+                                        i32 10, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %ptr.p, i8 99,
+                                  i32 10, i32 4, i1 false)
+; CHECK-NEXT: bad alignment: call void @llvm.memcpy
+; CHECK-NEXT: bad alignment: call void @llvm.memmove
+; CHECK-NEXT: bad alignment: call void @llvm.memset
+
+  ; Check that the verifier does not crash if the alignment argument
+  ; is not a constant.
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr.p, i8* %ptr.p,
+                                       i32 10, i32 %align, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %ptr.p, i8* %ptr.p,
+                                        i32 10, i32 %align, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %ptr.p, i8 99,
+                                  i32 10, i32 %align, i1 false)
+; CHECK-NEXT: bad alignment: call void @llvm.memcpy
+; CHECK-NEXT: bad alignment: call void @llvm.memmove
+; CHECK-NEXT: bad alignment: call void @llvm.memset
+
+  ret void
+}
+; CHECK-NOT: disallowed
diff --git a/test/NaCl/PNaClABI/abi-arithmetic-attributes.ll b/test/NaCl/PNaClABI/abi-arithmetic-attributes.ll
new file mode 100644
index 000000000000..c7fdfe5cf08a
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-arithmetic-attributes.ll
@@ -0,0 +1,36 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; XFAIL: *
+
+; This tests that the arithmetic attributes "nuw" and "nsw" ("no
+; unsigned wrap" and "no signed wrap") and "exact" are disallowed by
+; the PNaCl ABI verifier.
+
+define internal void @allowed_cases() {
+  %add = add i32 1, 2
+  %shl = shl i32 3, 4
+  %udiv = udiv i32 4, 2
+  %lshr = lshr i32 2, 1
+  %ashr = ashr i32 2, 1
+  ret void
+}
+; CHECK-NOT: disallowed
+
+
+define internal void @rejected_cases() {
+  %add = add nsw i32 1, 2
+; CHECK: disallowed: has "nsw" attribute: %add
+  %shl1 = shl nuw i32 3, 4
+; CHECK-NEXT: disallowed: has "nuw" attribute: %shl1
+  %sub = sub nsw nuw i32 5, 6
+; CHECK-NEXT: disallowed: has "nuw" attribute: %sub
+
+  %lshr = lshr exact i32 2, 1
+; CHECK-NEXT: disallowed: has "exact" attribute: %lshr
+  %ashr = ashr exact i32 2, 1
+; CHECK-NEXT: disallowed: has "exact" attribute: %ashr
+  %udiv = udiv exact i32 4, 2
+; CHECK-NEXT: disallowed: has "exact" attribute: %udiv
+
+  ret void
+}
+; CHECK-NOT: disallowed
diff --git a/test/NaCl/PNaClABI/abi-bad-intrinsic.ll b/test/NaCl/PNaClABI/abi-bad-intrinsic.ll
new file mode 100644
index 000000000000..8566f17b728b
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-bad-intrinsic.ll
@@ -0,0 +1,11 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; XFAIL: *
+
+; This intrinsic is declared with the wrong type, using i32* arguments
+; instead of i8*.  Check that the ABI verifier rejects this.  This
+; must be tested in a separate .ll file from the correct intrinsic
+; declarations.
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i32* %dest, i32* %src,
+                                        i32 %len, i32 %align, i1 %isvolatile)
+; CHECK: Function llvm.memcpy.p0i8.p0i8.i32 is a disallowed LLVM intrinsic
diff --git a/test/NaCl/PNaClABI/abi-blockaddress.ll b/test/NaCl/PNaClABI/abi-blockaddress.ll
new file mode 100644
index 000000000000..8d066925ef3f
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-blockaddress.ll
@@ -0,0 +1,12 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; XFAIL: *
+
+define void @func_with_block() {
+  br label %some_block
+some_block:
+  ret void
+}
+; CHECK-NOT: disallowed
+
+@blockaddr = global i8* blockaddress(@func_with_block, %some_block)
+; CHECK: Global variable blockaddr has non-flattened initializer (disallowed): i8* blockaddress(@func_with_block, %some_block)
diff --git a/test/NaCl/PNaClABI/abi-call-attributes.ll b/test/NaCl/PNaClABI/abi-call-attributes.ll
new file mode 100644
index 000000000000..3ed96d26b34a
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-call-attributes.ll
@@ -0,0 +1,21 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; XFAIL: *
+
+define void @func(i32 %arg) {
+  ret void
+}
+
+define void @calls() {
+  call void @func(i32 1) noreturn nounwind
+; CHECK: disallowed: bad call attributes: call void @func(i32 1) #
+
+  call void @func(i32 inreg 1)
+; CHECK-NEXT: disallowed: bad call attributes: call void @func(i32 inreg 1)
+
+  call fastcc void @func(i32 1)
+; CHECK-NEXT: disallowed: bad calling convention: call fastcc void @func(i32 1)
+
+  ret void
+}
+
+; CHECK-NOT: disallowed
diff --git a/test/NaCl/PNaClABI/abi-debug-info.ll b/test/NaCl/PNaClABI/abi-debug-info.ll
new file mode 100644
index 000000000000..bbb940a74b73
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-debug-info.ll
@@ -0,0 +1,53 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; RUN: pnacl-abicheck -pnaclabi-allow-debug-metadata < %s | \
+; RUN:   FileCheck %s --check-prefix=DBG
+; XFAIL: *
+; DBG-NOT: disallowed
+
+
+declare void @llvm.dbg.declare(metadata, metadata)
+declare void @llvm.dbg.value(metadata, i64, metadata)
+
+; CHECK: Function llvm.dbg.declare is a disallowed LLVM intrinsic
+; CHECK: Function llvm.dbg.value is a disallowed LLVM intrinsic
+
+; We need to force LLParser to leave a call to llvm.dbg.value && llvm.dbg.declare
+; This was taken from 2010-05-03-OriginDIE.ll
+%struct.anon = type { i64, i32, i32, i32, [1 x i32] }
+%struct.gpm_t = type { i32, i8*, [16 x i8], i32, i64, i64, i64, i64, i64, i64, i32, i16, i16, [8 x %struct.gpmr_t] }
+%struct.gpmr_t = type { [48 x i8], [48 x i8], [16 x i8], i64, i64, i64, i64, i16 }
+%struct.gpt_t = type { [8 x i8], i32, i32, i32, i32, i64, i64, i64, i64, [16 x i8], %struct.anon }
+define fastcc void @gpt2gpm(%struct.gpm_t* %gpm, %struct.gpt_t* %gpt) nounwind optsize ssp {
+entry:
+  %data_addr.i18 = alloca i64, align 8            ; <i64*> [#uses=1]
+  %data_addr.i17 = alloca i64, align 8            ; <i64*> [#uses=2]
+  %data_addr.i16 = alloca i64, align 8            ; <i64*> [#uses=0]
+  %data_addr.i15 = alloca i32, align 4            ; <i32*> [#uses=0]
+  %data_addr.i = alloca i64, align 8              ; <i64*> [#uses=0]
+  %0 = getelementptr inbounds %struct.gpm_t* %gpm, i32 0, i32 2, i32 0 ; <i8*> [#uses=1]
+  %1 = getelementptr inbounds %struct.gpt_t* %gpt, i32 0, i32 9, i32 0 ; <i8*> [#uses=1]
+  call void @uuid_LtoB(i8* %0, i8* %1) nounwind, !dbg !0
+  %a9 = load volatile i64* %data_addr.i18, align 8 ; <i64> [#uses=1]
+  %a10 = call i64 @llvm.bswap.i64(i64 %a9) nounwind ; <i64> [#uses=1]
+  %a11 = getelementptr inbounds %struct.gpt_t* %gpt, i32 0, i32 8, !dbg !7 ; <i64*> [#uses=1]
+  %a12 = load i64* %a11, align 4, !dbg !7         ; <i64> [#uses=1]
+  call void @llvm.dbg.declare(metadata !{i64* %data_addr.i17}, metadata !8) nounwind, !dbg !14
+  store i64 %a12, i64* %data_addr.i17, align 8
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !15) nounwind
+  call void @llvm.dbg.value(metadata !18, i64 0, metadata !19) nounwind
+  call void @llvm.dbg.declare(metadata !6, metadata !23) nounwind
+  call void @llvm.dbg.value(metadata !{i64* %data_addr.i17}, i64 0, metadata !34) nounwind
+  %a13 = load volatile i64* %data_addr.i17, align 8 ; <i64> [#uses=1]
+  %a14 = call i64 @llvm.bswap.i64(i64 %a13) nounwind ; <i64> [#uses=2]
+  %a15 = add i64 %a10, %a14, !dbg !7              ; <i64> [#uses=1]
+  %a16 = sub i64 %a15, %a14                       ; <i64> [#uses=1]
+  %a17 = getelementptr inbounds %struct.gpm_t* %gpm, i32 0, i32 5, !dbg !7 ; <i64*> [#uses=1]
+  store i64 %a16, i64* %a17, align 4, !dbg !7
+  ret void, !dbg !7
+}
+
+; FileCheck gives an error if its input file is empty, so ensure that
+; the output of pnacl-abicheck is non-empty by generating at least one
+; error.
+declare void @bad_func(ppc_fp128 %bad_arg)
+; DBG: Function bad_func has disallowed type: void (ppc_fp128)
diff --git a/test/NaCl/PNaClABI/abi-externals-whitelist.ll b/test/NaCl/PNaClABI/abi-externals-whitelist.ll
new file mode 100644
index 000000000000..d798fc0fe15e
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-externals-whitelist.ll
@@ -0,0 +1,25 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; XFAIL: *
+
+; Make sure that external symbols are properly rejected or accepted
+
+define void @foo() {
+  ret void
+}
+
+; CHECK: foo is not a valid external symbol (disallowed)
+
+define external void @main() {
+  ret void
+}
+; CHECK: main is not a valid external symbol (disallowed)
+
+define external void @_start() {
+  ret void
+}
+; _start is whitelisted
+; CHECK-NOT: _start is not a valid external symbol (disallowed)
+
+; Intrinsics can be external too
+declare void @llvm.trap()
+
diff --git a/test/NaCl/PNaClABI/abi-flattened-globals.ll b/test/NaCl/PNaClABI/abi-flattened-globals.ll
new file mode 100644
index 000000000000..ec93d2289241
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-flattened-globals.ll
@@ -0,0 +1,69 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; XFAIL: *
+
+; Allowed cases
+
+@bytes = internal global [7 x i8] c"abcdefg"
+
+@ptr_to_ptr = internal global i32 ptrtoint (i32* @ptr to i32)
+@ptr_to_func = internal global i32 ptrtoint (void ()* @func to i32)
+
+@compound = internal global <{ [3 x i8], i32 }>
+    <{ [3 x i8] c"foo", i32 ptrtoint (void ()* @func to i32) }>
+
+@ptr = internal global i32 ptrtoint ([7 x i8]* @bytes to i32)
+
+@addend_ptr = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 1)
+@addend_negative = internal global i32 add (i32 ptrtoint (i32* @ptr to i32), i32 -1)
+
+@addend_array1 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 1)
+@addend_array2 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 7)
+@addend_array3 = internal global i32 add (i32 ptrtoint ([7 x i8]* @bytes to i32), i32 9)
+
+@addend_struct1 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 1)
+@addend_struct2 = internal global i32 add (i32 ptrtoint (<{ [3 x i8], i32 }>* @compound to i32), i32 4)
+
+; CHECK-NOT: disallowed
+
+
+; Disallowed cases
+
+@bad_external = external global [1 x i8]
+; CHECK: Global variable bad_external has no initializer (disallowed)
+
+@bad_int = internal global i32 0
+; CHECK: Global variable bad_int has non-flattened initializer (disallowed): i32 0
+
+@bad_size = internal global i64 ptrtoint ([7 x i8]* @bytes to i64)
+; CHECK: Global variable bad_size has non-flattened initializer
+
+; "null" is not allowed.
+@bad_ptr = internal global i8* null
+; CHECK: Global variable bad_ptr has non-flattened initializer
+
+@bad_ptr2 = internal global i64 ptrtoint (i8* null to i64)
+; CHECK: Global variable bad_ptr2 has non-flattened initializer
+
+@bad_sub = internal global i32 sub (i32 ptrtoint (i32* @ptr to i32), i32 1)
+; CHECK: Global variable bad_sub has non-flattened initializer
+
+; i16 not allowed here.
+@bad_compound = internal global <{ i32, i16 }>
+    <{ i32 ptrtoint (void ()* @func to i32), i16 0 }>
+; CHECK: Global variable bad_compound has non-flattened initializer
+
+; The struct type must be packed.
+@non_packed_struct = internal global { [3 x i8], i32 }
+    { [3 x i8] c"foo", i32 ptrtoint (void ()* @func to i32) }
+; CHECK: Global variable non_packed_struct has non-flattened initializer
+
+; The struct type must be anonymous.
+%struct = type <{ [3 x i8], i32 }>
+@named_struct = internal global %struct
+    <{ [3 x i8] c"foo", i32 ptrtoint (void ()* @func to i32) }>
+; CHECK: Global variable named_struct has non-flattened initializer
+
+
+define internal void @func() {
+  ret void
+}
diff --git a/test/NaCl/PNaClABI/abi-i1-operations.ll b/test/NaCl/PNaClABI/abi-i1-operations.ll
new file mode 100644
index 000000000000..7796635b7454
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-i1-operations.ll
@@ -0,0 +1,67 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; XFAIL: *
+
+; Most arithmetic operations are not very useful on i1, so use of i1
+; is restricted to a subset of operations.
+
+
+; i1 is allowed on these bitwise operations because:
+;  * These operations never overflow.
+;  * They do get generated in practice for combining conditions.
+define internal void @allowed_cases() {
+  %and = and i1 0, 0
+  %or = or i1 0, 0
+  %xor = xor i1 0, 0
+  ret void
+}
+; CHECK-NOT: disallowed
+
+
+define internal void @rejected_cases(i32 %ptr) {
+  ; Loads and stores of i1 are disallowed.  This is done by rejecting
+  ; i1* as a pointer type.
+  %ptr.p = inttoptr i32 %ptr to i1*
+; CHECK: disallowed: bad result type: %ptr.p = inttoptr
+  load i1* %ptr.p, align 1
+; CHECK-NEXT: disallowed: bad pointer: {{.*}} load i1*
+
+  ; i1 arithmetic is of dubious usefulness, so it is rejected.
+  add i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} add
+  sub i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} sub
+  mul i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} mul
+  udiv i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} udiv
+  sdiv i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} sdiv
+  urem i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} urem
+  srem i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} srem
+  shl i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} shl
+  lshr i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} lshr
+  ashr i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} ashr
+
+  ; The same applies to i1 comparisons.
+  icmp eq i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} icmp eq
+  icmp ult i1 0, 0
+; CHECK-NEXT: disallowed: arithmetic on i1: {{.*}} icmp ult
+
+  ; There should be no implicit zero-extension in alloca.
+  alloca i8, i1 1
+; CHECK-NEXT: disallowed: alloca array size is not i32
+
+  ; Switch on i1 is not useful.  "br" should be used instead.
+  switch i1 0, label %next [i1 0, label %next]
+; CHECK-NEXT: disallowed: switch on i1
+next:
+
+  ret void
+}
+; CHECK-NOT: disallowed
diff --git a/test/NaCl/PNaClABI/abi-metadata.ll b/test/NaCl/PNaClABI/abi-metadata.ll
new file mode 100644
index 000000000000..1b0346c58535
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-metadata.ll
@@ -0,0 +1,19 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; RUN: pnacl-abicheck -pnaclabi-allow-debug-metadata < %s | FileCheck %s --check-prefix=DEBUG
+; XFAIL: *
+
+; Metadata is not part of the PNaCl's stable ABI, so normally the ABI
+; checker rejects metadata entirely.  However, for debugging support,
+; pre-finalized pexes may contain metadata.  When checking a
+; pre-finalized pexe, the ABI checker does not check the types in the
+; metadata.
+
+; DEBUG-NOT: Named metadata node llvm.dbg.cu is disallowed
+; CHECK: Named metadata node llvm.dbg.cu is disallowed
+!llvm.dbg.cu = !{!0}
+!0 = metadata !{ half 0.0}
+
+; CHECK: Named metadata node madeup is disallowed
+; DEBUG: Named metadata node madeup is disallowed
+!madeup = !{!1}
+!1 = metadata !{ half 1.0}
diff --git a/test/NaCl/PNaClABI/abi-small-arguments.ll b/test/NaCl/PNaClABI/abi-small-arguments.ll
new file mode 100644
index 000000000000..422d4c8ba81c
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-small-arguments.ll
@@ -0,0 +1,53 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; XFAIL: *
+
+define void @arg_i1(i1 %bad) {
+  ret void
+}
+; CHECK: Function arg_i1 has disallowed type:
+
+define void @arg_i16(i32 %allowed, i16 %bad) {
+  ret void
+}
+; CHECK: Function arg_i16 has disallowed type:
+
+define i1 @return_i1() {
+  ret i1 0
+}
+; CHECK: Function return_i1 has disallowed type:
+
+define i8 @return_i8() {
+  ret i8 0
+}
+; CHECK: Function return_i8 has disallowed type:
+
+
+define void @bad_direct_calls() {
+  call void @arg_i1(i1 0)
+; CHECK: bad function callee operand: call void @arg_i1
+
+  call void @arg_i16(i32 0, i16 0)
+; CHECK-NEXT: bad function callee operand: call void @arg_i16
+
+  %result1 = call i1 @return_i1()
+; CHECK-NEXT: bad function callee operand: {{.*}} call i1 @return_i1
+
+  %result2 = call i8 @return_i8()
+; CHECK-NEXT: bad function callee operand: {{.*}} call i8 @return_i8
+
+  ret void
+}
+
+define void @bad_indirect_calls(i32 %ptr) {
+  %func1 = inttoptr i32 %ptr to void (i8)*
+; CHECK: bad result type: %func1
+  call void %func1(i8 0)
+; CHECK: bad function callee operand: {{.*}} %func1
+
+  %func2 = inttoptr i32 %ptr to i16 ()*
+; CHECK: bad result type: %func2
+  %result3 = call i16 %func2()
+; CHECK: bad function callee operand: {{.*}} %func2
+
+  ret void
+}
diff --git a/test/NaCl/PNaClABI/abi-stripped-pointers.ll b/test/NaCl/PNaClABI/abi-stripped-pointers.ll
new file mode 100644
index 000000000000..e3a5677c0dd0
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-stripped-pointers.ll
@@ -0,0 +1,133 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; XFAIL: *
+
+; This test checks that the PNaCl ABI verifier enforces the normal
+; form introduced by the ReplacePtrsWithInts pass.
+
+
+@var = global [4 x i8] c"xxxx"
+@ptr = global i32 ptrtoint ([4 x i8]* @var to i32)
+
+declare i8* @llvm.nacl.read.tp()
+
+
+define internal void @pointer_arg(i8* %arg) {
+  ret void
+}
+; CHECK: Function pointer_arg has disallowed type
+
+define internal i8* @pointer_return() {
+  unreachable
+}
+; CHECK-NEXT: Function pointer_return has disallowed type
+
+define internal void @func() {
+  ret void
+}
+
+define internal void @func_with_arg(i32 %arg) {
+  ret void
+}
+
+
+define internal void @allowed_cases(i32 %arg) {
+  inttoptr i32 123 to i8*
+
+  ptrtoint [4 x i8]* @var to i32
+
+  %alloc = alloca i8
+  ptrtoint i8* %alloc to i32
+  load i8* %alloc, align 1
+
+  ; These instructions may use a NormalizedPtr, which may be a global.
+  load i32* @ptr, align 1
+  store i32 123, i32* @ptr, align 1
+
+  ; A NormalizedPtr may be a bitcast.
+  %ptr_bitcast = bitcast [4 x i8]* @var to i32*
+  load i32* %ptr_bitcast, align 1
+
+  ; A NormalizedPtr may be an inttoptr.
+  %ptr_from_int = inttoptr i32 123 to i32*
+  load i32* %ptr_from_int, align 1
+
+  ; Check direct and indirect function calls.
+  %func_as_int = ptrtoint void ()* @func to i32
+  %func_ptr = inttoptr i32 %func_as_int to void ()*
+  call void %func_ptr()
+  call void @func()
+  call void @func_with_arg(i32 123)
+
+  ; Intrinsic calls may return pointers.
+  %thread_ptr = call i8* @llvm.nacl.read.tp()
+  ptrtoint i8* %thread_ptr to i32
+
+  ; Bitcasts between non-pointers are not restricted
+  bitcast i64 0 to double
+  bitcast i32 0 to float
+
+  ; ConstantInts and Arguments are allowed as operands.
+  add i32 %arg, 123
+
+  ret void
+}
+; CHECK-NOT: disallowed
+
+
+define internal void @bad_cases() {
+entry:
+  ptrtoint [4 x i8]* @var to i16
+; CHECK: Function bad_cases disallowed: non-i32 ptrtoint
+
+  inttoptr i16 123 to i8*
+; CHECK-NEXT: non-i32 inttoptr
+
+  %a = alloca i32
+; CHECK-NEXT: non-i8 alloca: %a
+  %a2 = alloca [4 x i8]
+; CHECK-NEXT: non-i8 alloca: %a2
+
+  store i32 0, i32* null, align 1
+; CHECK-NEXT: bad pointer
+
+  store i32 0, i32* undef, align 1
+; CHECK-NEXT: bad pointer
+
+  %bc = bitcast i32* @ptr to i31*
+; CHECK-NEXT: bad result type
+  store i31 0, i31* %bc, align 1
+; CHECK-NEXT: bad pointer
+
+  ; Only one level of bitcasts is allowed.
+  %b = bitcast i32* %a to i8*
+  %c = bitcast i8* %b to i16*
+; CHECK-NEXT: operand not InherentPtr
+
+  br label %block
+block:
+  %phi1 = phi i8* [ undef, %entry ]
+; CHECK-NEXT: bad operand: %phi1
+  %phi2 = phi i32* [ undef, %entry ]
+; CHECK-NEXT: bad operand: %phi2
+
+  icmp eq i32* @ptr, @ptr
+; CHECK-NEXT: bad operand: {{.*}} icmp
+  icmp eq void ()* @func, @func
+; CHECK-NEXT: bad operand: {{.*}} icmp
+  icmp eq i31 0, 0
+; CHECK-NEXT: bad operand: {{.*}} icmp
+
+  call void null()
+; CHECK-NEXT: bad function callee operand
+
+  call void @func_with_arg(i32 ptrtoint (i32* @ptr to i32))
+; CHECK-NEXT: bad operand
+
+  ; Taking the address of an intrinsic is not allowed.
+  ptrtoint i8* ()* @llvm.nacl.read.tp to i32
+; CHECK-NEXT: operand not InherentPtr
+
+  ret void
+}
+
+; CHECK-NOT: disallowed
diff --git a/test/NaCl/PNaClABI/abi-switch.ll b/test/NaCl/PNaClABI/abi-switch.ll
new file mode 100644
index 000000000000..911756850203
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-switch.ll
@@ -0,0 +1,32 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; XFAIL: *
+
+@var = internal global [4 x i8] c"xxxx"
+
+
+; CHECK-NOT: disallowed
+
+define internal void @bad_cases() {
+  ; ConstantExprs should be rejected here.
+  switch i32 ptrtoint ([4 x i8]* @var to i32), label %next [i32 0, label %next]
+; CHECK: disallowed: bad switch condition
+next:
+
+  ; Bad integer type.
+  switch i32 0, label %next [i99 0, label %next]
+; CHECK: bad switch case
+
+  ; Bad integer type.
+  switch i32 0, label %next [i32 0, label %next
+                             i99 1, label %next]
+; CHECK: bad switch case
+
+  ; Note that the reader only allows ConstantInts in the label list.
+  ; We don't need to check the following, because the reader rejects
+  ; it:
+  ; switch i32 0, label %next [i32 ptrtoint (i32* @ptr to i32), label %next]
+
+  ret void
+}
+
+; CHECK-NOT: disallowed
diff --git a/test/NaCl/PNaClABI/abi-varargs.ll b/test/NaCl/PNaClABI/abi-varargs.ll
new file mode 100644
index 000000000000..aa721fb8316a
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-varargs.ll
@@ -0,0 +1,14 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; XFAIL: *
+
+define void @varargs_func(i32 %arg, ...) {
+  ret void
+}
+; CHECK: Function varargs_func has disallowed type: void (i32, ...)
+
+define void @call_varargs_func(i32 %ptr) {
+  %ptr2 = inttoptr i32 %ptr to void (i32, ...)*
+  call void (i32, ...)* %ptr2(i32 123)
+  ret void
+}
+; CHECK: Function call_varargs_func disallowed: bad function callee operand: call void (i32, ...)*
diff --git a/test/NaCl/PNaClABI/abi-visibility.ll b/test/NaCl/PNaClABI/abi-visibility.ll
new file mode 100644
index 000000000000..aabe5f79b771
--- /dev/null
+++ b/test/NaCl/PNaClABI/abi-visibility.ll
@@ -0,0 +1,16 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; XFAIL: *
+
+; Disallow the visibility attributes set by
+; __attribute__((visibility("hidden"))) and
+; __attribute__((visibility("protected"))).
+
+define internal hidden void @visibility_hidden() {
+  ret void
+}
+; CHECK: Function visibility_hidden has disallowed visibility: hidden
+
+define internal protected void @visibility_protected() {
+  ret void
+}
+; CHECK-NEXT: Function visibility_protected has disallowed visibility: protected
diff --git a/test/NaCl/PNaClABI/global-attributes.ll b/test/NaCl/PNaClABI/global-attributes.ll
new file mode 100644
index 000000000000..408b4ce25cba
--- /dev/null
+++ b/test/NaCl/PNaClABI/global-attributes.ll
@@ -0,0 +1,63 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; XFAIL: *
+
+; Global variable attributes
+
+; CHECK: Variable var_with_section has disallowed "section" attribute
+@var_with_section = internal global [1 x i8] zeroinitializer, section ".some_section"
+
+; PNaCl programs can depend on data alignments in general, so we allow
+; "align" on global variables.
+; CHECK-NOT: var_with_alignment
+@var_with_alignment = internal global [4 x i8] zeroinitializer, align 8
+
+; TLS variables must be expanded out by ExpandTls.
+; CHECK-NEXT: Variable tls_var has disallowed "thread_local" attribute
+@tls_var = internal thread_local global [4 x i8] zeroinitializer
+
+; CHECK-NEXT: Variable var_with_unnamed_addr has disallowed "unnamed_addr" attribute
+@var_with_unnamed_addr = internal unnamed_addr constant [1 x i8] c"x"
+
+; CHECK-NEXT: Variable var_ext_init has disallowed "externally_initialized" attribute
+@var_ext_init = internal externally_initialized global [1 x i8] c"x"
+
+
+; Function attributes
+
+; CHECK-NEXT: Function func_with_attrs has disallowed attributes: noreturn nounwind
+define internal void @func_with_attrs() noreturn nounwind {
+  ret void
+}
+
+; CHECK-NEXT: Function func_with_arg_attrs has disallowed attributes: inreg zeroext
+define internal void @func_with_arg_attrs(i32 inreg zeroext) {
+  ret void
+}
+
+; CHECK-NEXT: Function func_with_callingconv has disallowed calling convention: 8
+define internal fastcc void @func_with_callingconv() {
+  ret void
+}
+
+; CHECK-NEXT: Function func_with_section has disallowed "section" attribute
+define internal void @func_with_section() section ".some_section" {
+  ret void
+}
+
+; CHECK-NEXT: Function func_with_alignment has disallowed "align" attribute
+define internal void @func_with_alignment() align 1 {
+  ret void
+}
+
+; CHECK-NEXT: Function func_with_gc has disallowed "gc" attribute
+define internal void @func_with_gc() gc "my_gc_func" {
+  ret void
+}
+
+; CHECK-NEXT: Function func_with_unnamed_addr has disallowed "unnamed_addr" attribute
+define internal void @func_with_unnamed_addr() unnamed_addr {
+  ret void
+}
+
+; CHECK-NOT: disallowed
+; If another check is added, there should be a check-not in between each check
diff --git a/test/NaCl/PNaClABI/instcombine.ll b/test/NaCl/PNaClABI/instcombine.ll
new file mode 100644
index 000000000000..6844d2025d2e
--- /dev/null
+++ b/test/NaCl/PNaClABI/instcombine.ll
@@ -0,0 +1,29 @@
+; Disabled for the time being b/c PromoteIntegers will remove any odd sized ints
+; and b/c we don't currently support translation for PNaCl so there are no platform 
+; idiosyncrasies to account for.
+; R;UN: opt < %s -instcombine -S | FileCheck %s
+; RUN: true
+; Test that instcombine does not introduce non-power-of-two integers into
+; the module
+
+target datalayout = "p:32:32:32"
+target triple = "le32-unknown-nacl"
+
+; This test is a counterpart to icmp_shl16 in
+; test/Transforms/InstCombine/icmp.ll, which should still pass.
+; CHECK-LABEL: @icmp_shl31
+; CHECK-NOT: i31
+define i1 @icmp_shl31(i32 %x) {
+  %shl = shl i32 %x, 1
+  %cmp = icmp slt i32 %shl, 36
+  ret i1 %cmp
+}
+
+; Check that we don't introduce i4, which is a power of 2 but still not allowed.
+; CHECK-LABEL: @icmp_shl4
+; CHECK-NOT: i4
+define i1 @icmp_shl4(i32 %x) {
+  %shl = shl i32 %x, 28
+  %cmp = icmp slt i32 %shl, 1073741824
+  ret i1 %cmp
+}
diff --git a/test/NaCl/PNaClABI/instructions.ll b/test/NaCl/PNaClABI/instructions.ll
new file mode 100644
index 000000000000..640d8c711373
--- /dev/null
+++ b/test/NaCl/PNaClABI/instructions.ll
@@ -0,0 +1,182 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; XFAIL: *
+; Test instruction opcodes allowed by PNaCl ABI
+
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
+target triple = "le32-unknown-nacl"
+
+define internal void @terminators() {
+; Terminator instructions
+terminators:
+ ret void
+ br i1 0, label %next2, label %next
+next:
+ switch i32 1, label %next2 [i32 0, label %next]
+next2:
+  unreachable
+; CHECK-NOT: disallowed
+; CHECK: Function terminators disallowed: bad instruction opcode: indirectbr
+  indirectbr i8* undef, [label %next, label %next2]
+}
+
+define internal void @binops() {
+; Binary operations
+  %a1 = add i32 0, 0
+  %a2 = sub i32 0, 0
+  %a3 = fsub float 0.0, 0.0
+  %a4 = mul i32 0, 0
+  %a5 = fmul float 0.0, 0.0
+  %a6 = udiv i32 0, 1
+  %a7 = sdiv i32 0, 1
+  %a8 = fdiv float 0.0, 1.0
+  %a9 = urem i32 0, 1
+  %a10 = srem i32 0, 1
+  %a11 = frem float 0.0, 1.0
+; Bitwise binary operations
+  %a12 = shl i32 1, 1
+  %a13 = lshr i32 1, 1
+  %a14 = ashr i32 1, 1
+  %a15 = and i32 1, 1
+  %a16 = or i32 1, 1
+  %a17 = xor i32 1, 1
+  ret void
+}
+
+define internal void @vectors() {
+; CHECK-NOT: disallowed
+
+; CHECK: disallowed: bad instruction opcode: {{.*}} extractelement
+  %a1 = extractelement <2 x i32> <i32 0, i32 0>, i32 0
+
+; CHECK: disallowed: bad instruction opcode: {{.*}} shufflevector
+  %a2 = shufflevector <2 x i32> undef, <2 x i32> undef, <2 x i32> undef
+
+; CHECK: disallowed: bad instruction opcode: {{.*}} insertelement
+  %a3 = insertelement <2 x i32> undef, i32 1, i32 0
+
+  ret void
+}
+
+define internal void @aggregates() {
+; CHECK-NOT: disallowed
+
+; Aggregate operations
+  %a1 = extractvalue { i32, i32 } { i32 0, i32 0 }, 0
+; CHECK: disallowed: bad instruction opcode: {{.*}} extractvalue
+
+  %a2 = insertvalue {i32, float} undef, i32 1, 0
+; CHECK-NEXT: disallowed: bad instruction opcode: {{.*}} insertvalue
+
+  ret void
+}
+
+define internal void @memory() {
+; Memory operations
+  %a1 = alloca i8, i32 4
+  %ptr = inttoptr i32 0 to i32*
+  %a2 = load i32* %ptr, align 1
+  store i32 undef, i32* %ptr, align 1
+; CHECK-NOT: disallowed
+; CHECK: disallowed: bad instruction opcode: {{.*}} getelementptr
+  %a3 = getelementptr { i32, i32}* undef
+  ret void
+}
+
+define internal void @atomic() {
+  %a1 = alloca i8, i32 4
+  %ptr = inttoptr i32 0 to i32*
+ ; CHECK: disallowed: atomic load: {{.*}} load atomic
+  %a2 = load atomic i32* %ptr seq_cst, align 4
+; CHECK: disallowed: volatile load: {{.*}} load volatile
+  %a3 = load volatile i32* %ptr, align 4
+; CHECK: disallowed: atomic store: store atomic
+  store atomic i32 undef, i32* %ptr seq_cst, align 4
+; CHECK: disallowed: volatile store: store volatile
+  store volatile i32 undef, i32* %ptr, align 4
+; CHECK: disallowed: bad instruction opcode: fence
+  fence acq_rel
+; CHECK: disallowed: bad instruction opcode: {{.*}} cmpxchg
+  %a4 = cmpxchg i32* %ptr, i32 undef, i32 undef acq_rel
+; CHECK: disallowed: bad instruction opcode: {{.*}} atomicrmw
+  %a5 = atomicrmw add i32* %ptr, i32 1 acquire
+  ret void
+}
+
+define internal void @conversion() {
+; Conversion operations
+  %a1 = trunc i32 undef to i8
+  %a2 = zext i8 undef to i32
+  %a3 = sext i8 undef to i32
+  %a4 = fptrunc double undef to float
+  %a5 = fpext float undef to double
+  %a6 = fptoui double undef to i64
+  %a7 = fptosi double undef to i64
+  %a8 = uitofp i64 undef to double
+  %a9 = sitofp i64 undef to double
+  ret void
+}
+
+define internal void @other() {
+entry:
+  %a1 = icmp eq i32 undef, undef
+  %a2 = fcmp oeq float undef, undef
+  %a2b= fcmp ueq float undef, undef
+  br i1 undef, label %foo, label %bar
+foo:
+; phi predecessor labels have to match to appease module verifier
+  %a3 = phi i32 [0, %entry], [0, %foo]
+  %a4 = select i1 true, i8 undef, i8 undef
+  call void @conversion()
+  br i1 undef, label %foo, label %bar
+bar:
+  ret void
+}
+
+define internal void @throwing_func() {
+  ret void
+}
+define internal void @personality_func() {
+  ret void
+}
+
+define internal void @invoke_func() {
+  invoke void @throwing_func() to label %ok unwind label %onerror
+; CHECK-NOT: disallowed
+; CHECK: disallowed: bad instruction opcode: invoke
+ok:
+  ret void
+onerror:
+  %lp = landingpad i32
+      personality i8* bitcast (void ()* @personality_func to i8*)
+      catch i32* null
+; CHECK: disallowed: bad instruction opcode: {{.*}} landingpad
+  resume i32 %lp
+; CHECK: disallowed: bad instruction opcode: resume
+}
+
+define internal i32 @va_arg(i32 %va_list_as_int) {
+  %va_list = inttoptr i32 %va_list_as_int to i8*
+  %val = va_arg i8* %va_list, i32
+  ret i32 %val
+}
+; CHECK-NOT: disallowed
+; CHECK: disallowed: bad instruction opcode: {{.*}} va_arg
+
+@global_var = internal global [4 x i8] zeroinitializer
+
+define internal void @constantexpr() {
+  ptrtoint i8* getelementptr ([4 x i8]* @global_var, i32 1, i32 0) to i32
+  ret void
+}
+; CHECK-NOT: disallowed
+; CHECK: disallowed: operand not InherentPtr: %1 = ptrtoint i8* getelementptr
+
+define internal void @inline_asm() {
+  call void asm "foo", ""()
+  ret void
+}
+; CHECK-NOT: disallowed
+; CHECK: disallowed: inline assembly: call void asm "foo", ""()
+
+; CHECK-NOT: disallowed
+; If another check is added, there should be a check-not in between each check
diff --git a/test/NaCl/PNaClABI/intrinsics.ll b/test/NaCl/PNaClABI/intrinsics.ll
new file mode 100644
index 000000000000..8ffb39dacbe6
--- /dev/null
+++ b/test/NaCl/PNaClABI/intrinsics.ll
@@ -0,0 +1,189 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; RUN: pnacl-abicheck -pnaclabi-allow-debug-metadata < %s | \
+; RUN:   FileCheck %s --check-prefix=DBG
+; RUN: pnacl-abicheck -pnaclabi-allow-dev-intrinsics < %s | \
+; RUN:   FileCheck %s --check-prefix=DEV
+; XFAIL: *
+
+; Test that only white-listed intrinsics are allowed.
+
+; ===================================
+; Some "Dev" intrinsics which are disallowed by default.
+
+; CHECK: Function llvm.nacl.target.arch is a disallowed LLVM intrinsic
+; DEV-NOT: Function llvm.nacl.target.arch is a disallowed LLVM intrinsic
+declare i32 @llvm.nacl.target.arch()
+
+; We need to force LLParser to leave a call to llvm.dbg.value && llvm.dbg.declare
+; This was taken from 2010-05-03-OriginDIE.ll
+%struct.anon = type { i64, i32, i32, i32, [1 x i32] }
+%struct.gpm_t = type { i32, i8*, [16 x i8], i32, i64, i64, i64, i64, i64, i64, i32, i16, i16, [8 x %struct.gpmr_t] }
+%struct.gpmr_t = type { [48 x i8], [48 x i8], [16 x i8], i64, i64, i64, i64, i16 }
+%struct.gpt_t = type { [8 x i8], i32, i32, i32, i32, i64, i64, i64, i64, [16 x i8], %struct.anon }
+define fastcc void @gpt2gpm(%struct.gpm_t* %gpm, %struct.gpt_t* %gpt) nounwind optsize ssp {
+entry:
+  %data_addr.i18 = alloca i64, align 8            ; <i64*> [#uses=1]
+  %data_addr.i17 = alloca i64, align 8            ; <i64*> [#uses=2]
+  %data_addr.i16 = alloca i64, align 8            ; <i64*> [#uses=0]
+  %data_addr.i15 = alloca i32, align 4            ; <i32*> [#uses=0]
+  %data_addr.i = alloca i64, align 8              ; <i64*> [#uses=0]
+  %0 = getelementptr inbounds %struct.gpm_t* %gpm, i32 0, i32 2, i32 0 ; <i8*> [#uses=1]
+  %1 = getelementptr inbounds %struct.gpt_t* %gpt, i32 0, i32 9, i32 0 ; <i8*> [#uses=1]
+  call void @uuid_LtoB(i8* %0, i8* %1) nounwind, !dbg !0
+  %a9 = load volatile i64* %data_addr.i18, align 8 ; <i64> [#uses=1]
+  %a10 = call i64 @llvm.bswap.i64(i64 %a9) nounwind ; <i64> [#uses=1]
+  %a11 = getelementptr inbounds %struct.gpt_t* %gpt, i32 0, i32 8, !dbg !7 ; <i64*> [#uses=1]
+  %a12 = load i64* %a11, align 4, !dbg !7         ; <i64> [#uses=1]
+  call void @llvm.dbg.declare(metadata !{i64* %data_addr.i17}, metadata !8) nounwind, !dbg !14
+  store i64 %a12, i64* %data_addr.i17, align 8
+  call void @llvm.dbg.value(metadata !6, i64 0, metadata !15) nounwind
+  call void @llvm.dbg.value(metadata !18, i64 0, metadata !19) nounwind
+  call void @llvm.dbg.declare(metadata !6, metadata !23) nounwind
+  call void @llvm.dbg.value(metadata !{i64* %data_addr.i17}, i64 0, metadata !34) nounwind
+  %a13 = load volatile i64* %data_addr.i17, align 8 ; <i64> [#uses=1]
+  %a14 = call i64 @llvm.bswap.i64(i64 %a13) nounwind ; <i64> [#uses=2]
+  %a15 = add i64 %a10, %a14, !dbg !7              ; <i64> [#uses=1]
+  %a16 = sub i64 %a15, %a14                       ; <i64> [#uses=1]
+  %a17 = getelementptr inbounds %struct.gpm_t* %gpm, i32 0, i32 5, !dbg !7 ; <i64*> [#uses=1]
+  store i64 %a16, i64* %a17, align 4, !dbg !7
+  ret void, !dbg !7
+}
+
+; ===================================
+; Debug info intrinsics, which are disallowed by default.
+; It seems the IR parser adapts dbg.value && dbg.declare into metadata
+; and promptly removes their respective declarations from the module.
+; CHECK: Function llvm.dbg.value is a disallowed LLVM intrinsic
+; DBG-NOT: Function llvm.dbg.value is a disallowed LLVM intrinsic
+declare void @llvm.dbg.value(metadata, i64, metadata)
+; CHECK: Function llvm.dbg.declare is a disallowed LLVM intrinsic
+; DBG-NOT: Function llvm.dbg.declare is a disallowed LLVM intrinsic
+declare void @llvm.dbg.declare(metadata, metadata)
+
+
+; ===================================
+; Always allowed intrinsics.
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src,
+                                        i32 %len, i32 %align, i1 %isvolatile)
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src,
+                                         i32 %len, i32 %align, i1 %isvolatile)
+declare void @llvm.memset.p0i8.i32(i8* %dest, i8 %val,
+                                    i32 %len, i32 %align, i1 %isvolatile)
+
+declare i8* @llvm.nacl.read.tp()
+
+declare i8 @llvm.nacl.atomic.load.i8(i8*, i32)
+declare i16 @llvm.nacl.atomic.load.i16(i16*, i32)
+declare i32 @llvm.nacl.atomic.load.i32(i32*, i32)
+declare i64 @llvm.nacl.atomic.load.i64(i64*, i32)
+declare void @llvm.nacl.atomic.store.i8(i8, i8*, i32)
+declare void @llvm.nacl.atomic.store.i16(i16, i16*, i32)
+declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32)
+declare void @llvm.nacl.atomic.store.i64(i64, i64*, i32)
+declare i8 @llvm.nacl.atomic.rmw.i8(i32, i8*, i8, i32)
+declare i16 @llvm.nacl.atomic.rmw.i16(i32, i16*, i16, i32)
+declare i32 @llvm.nacl.atomic.rmw.i32(i32, i32*, i32, i32)
+declare i64 @llvm.nacl.atomic.rmw.i64(i32, i64*, i64, i32)
+declare i8 @llvm.nacl.atomic.cmpxchg.i8(i8*, i8, i8, i32, i32)
+declare i16 @llvm.nacl.atomic.cmpxchg.i16(i16*, i16, i16, i32, i32)
+declare i32 @llvm.nacl.atomic.cmpxchg.i32(i32*, i32, i32, i32, i32)
+declare i64 @llvm.nacl.atomic.cmpxchg.i64(i64*, i64, i64, i32, i32)
+declare void @llvm.nacl.atomic.fence(i32)
+declare void @llvm.nacl.atomic.fence.all()
+declare i1 @llvm.nacl.atomic.is.lock.free(i32, i8*)
+
+declare i16 @llvm.bswap.i16(i16)
+declare i32 @llvm.bswap.i32(i32)
+declare i64 @llvm.bswap.i64(i64)
+
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i64 @llvm.cttz.i64(i64, i1)
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i64 @llvm.ctlz.i64(i64, i1)
+
+declare i32 @llvm.ctpop.i32(i32)
+declare i64 @llvm.ctpop.i64(i64)
+
+declare void @llvm.trap()
+
+declare float @llvm.sqrt.f32(float)
+declare double @llvm.sqrt.f64(double)
+
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
+
+declare void @llvm.nacl.longjmp(i8*, i32)
+declare i32 @llvm.nacl.setjmp(i8*)
+
+; CHECK-NOT: disallowed
+
+; ===================================
+; Always disallowed intrinsics.
+
+; CHECK: Function llvm.adjust.trampoline is a disallowed LLVM intrinsic
+; DBG: Function llvm.adjust.trampoline is a disallowed LLVM intrinsic
+; DEV: Function llvm.adjust.trampoline is a disallowed LLVM intrinsic
+declare i8* @llvm.adjust.trampoline(i8*)
+
+; CHECK: Function llvm.init.trampoline is a disallowed LLVM intrinsic
+; DBG: Function llvm.init.trampoline is a disallowed LLVM intrinsic
+; DEV: Function llvm.init.trampoline is a disallowed LLVM intrinsic
+declare void @llvm.init.trampoline(i8*, i8*, i8*)
+
+; CHECK: Function llvm.x86.aesni.aeskeygenassist is a disallowed LLVM intrinsic
+; DBG: Function llvm.x86.aesni.aeskeygenassist is a disallowed LLVM intrinsic
+; DEV: Function llvm.x86.aesni.aeskeygenassist is a disallowed LLVM intrinsic
+declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8)
+
+; CHECK: Function llvm.va_copy is a disallowed LLVM intrinsic
+; DBG: Function llvm.va_copy is a disallowed LLVM intrinsic
+; DEV: Function llvm.va_copy is a disallowed LLVM intrinsic
+declare void @llvm.va_copy(i8*, i8*)
+
+; CHECK: Function llvm.bswap.i1 is a disallowed LLVM intrinsic
+declare i1 @llvm.bswap.i1(i1)
+
+; CHECK: Function llvm.bswap.i8 is a disallowed LLVM intrinsic
+declare i8 @llvm.bswap.i8(i8)
+
+; CHECK: Function llvm.ctlz.i16 is a disallowed LLVM intrinsic
+declare i16 @llvm.ctlz.i16(i16, i1)
+
+; CHECK: Function llvm.cttz.i16 is a disallowed LLVM intrinsic
+declare i16 @llvm.cttz.i16(i16, i1)
+
+; CHECK: Function llvm.ctpop.i16 is a disallowed LLVM intrinsic
+declare i16 @llvm.ctpop.i16(i16)
+
+; CHECK: Function llvm.lifetime.start is a disallowed LLVM intrinsic
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+
+; CHECK: Function llvm.lifetime.end is a disallowed LLVM intrinsic
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+
+; CHECK: Function llvm.frameaddress is a disallowed LLVM intrinsic
+declare i8* @llvm.frameaddress(i32 %level)
+
+; CHECK: Function llvm.returnaddress is a disallowed LLVM intrinsic
+declare i8* @llvm.returnaddress(i32 %level)
+
+; CHECK: Function llvm.sqrt.fp128 is a disallowed LLVM intrinsic
+declare fp128 @llvm.sqrt.fp128(fp128)
+
+; The variants with 64-bit %len arguments are disallowed.
+; CHECK: Function llvm.memcpy.p0i8.p0i8.i64 is a disallowed LLVM intrinsic
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src,
+                                        i64 %len, i32 %align, i1 %isvolatile)
+; CHECK: Function llvm.memmove.p0i8.p0i8.i64 is a disallowed LLVM intrinsic
+declare void @llvm.memmove.p0i8.p0i8.i64(i8* %dest, i8* %src,
+                                         i64 %len, i32 %align, i1 %isvolatile)
+; CHECK: Function llvm.memset.p0i8.i64 is a disallowed LLVM intrinsic
+declare void @llvm.memset.p0i8.i64(i8* %dest, i8 %val,
+                                    i64 %len, i32 %align, i1 %isvolatile)
+
+; Test that the ABI checker checks the full function name.
+; CHECK: Function llvm.memset.foo is a disallowed LLVM intrinsic
+declare void @llvm.memset.foo(i8* %dest, i8 %val,
+                              i64 %len, i32 %align, i1 %isvolatile)
diff --git a/test/NaCl/PNaClABI/linkagetypes.ll b/test/NaCl/PNaClABI/linkagetypes.ll
new file mode 100644
index 000000000000..686bda7cf0e2
--- /dev/null
+++ b/test/NaCl/PNaClABI/linkagetypes.ll
@@ -0,0 +1,83 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; XFAIL: *
+; Test linkage types allowed by PNaCl ABI
+
+target datalayout = "e-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-p:32:32:32-v128:32:32"
+target triple = "le32-unknown-nacl"
+
+
+@gv_internal = internal global [1 x i8] c"x"
+; CHECK-NOT: disallowed
+
+@gv_private = private global [1 x i8] c"x"
+; CHECK: Variable gv_private has disallowed linkage type: private
+@gv_linker_private = linker_private global [1 x i8] c"x"
+; CHECK: Variable gv_linker_private has disallowed linkage type: linker_private
+@gv_linker_private_weak = linker_private_weak global [1 x i8] c"x"
+; CHECK: gv_linker_private_weak has disallowed linkage type: linker_private_weak
+@gv_linkonce = linkonce global [1 x i8] c"x"
+; CHECK: gv_linkonce has disallowed linkage type: linkonce
+@gv_linkonce_odr = linkonce_odr global [1 x i8] c"x"
+; CHECK: gv_linkonce_odr has disallowed linkage type: linkonce_odr
+@gv_weak = weak global [1 x i8] c"x"
+; CHECK: gv_weak has disallowed linkage type: weak
+@gv_weak_odr = weak_odr global [1 x i8] c"x"
+; CHECK: gv_weak_odr has disallowed linkage type: weak_odr
+@gv_common = common global [1 x i8] c"x"
+; CHECK: gv_common has disallowed linkage type: common
+@gv_appending = appending global [1 x i8] zeroinitializer
+; CHECK: gv_appending has disallowed linkage type: appending
+@gv_dllimport = dllimport global [1 x i8]
+; CHECK: gv_dllimport has disallowed linkage type: dllimport
+@gv_dllexport = dllexport global [1 x i8] c"x"
+; CHECK: gv_dllexport has disallowed linkage type: dllexport
+@gv_extern_weak = extern_weak global [1 x i8]
+; CHECK: gv_extern_weak has disallowed linkage type: extern_weak
+@gv_avilable_externally = available_externally global [1 x i8] c"x"
+; CHECK: gv_avilable_externally has disallowed linkage type: available_externally
+
+
+; CHECK-NOT: disallowed
+; CHECK-NOT: internal_func
+; internal linkage is allowed, and should not appear in error output.
+define internal void @internal_func() {
+  ret void
+}
+
+; CHECK: Function private_func has disallowed linkage type: private
+define private void @private_func() {
+  ret void
+}
+; CHECK: Function external_func is declared but not defined (disallowed)
+declare external void @external_func()
+; CHECK: linkonce_func has disallowed linkage type: linkonce
+define linkonce void @linkonce_func() {
+  ret void
+}
+; CHECK-NEXT: linkonce_odr_func has disallowed linkage type: linkonce_odr
+define linkonce_odr void @linkonce_odr_func() {
+  ret void
+}
+; CHECK-NEXT: weak_func has disallowed linkage type: weak
+define weak void @weak_func() {
+  ret void
+}
+; CHECK-NEXT: weak_odr_func has disallowed linkage type: weak_odr
+define weak_odr void @weak_odr_func() {
+  ret void
+}
+; CHECK-NEXT: dllimport_func is declared but not defined (disallowed)
+; CHECK-NEXT: dllimport_func has disallowed linkage type: dllimport
+declare dllimport void @dllimport_func()
+; CHECK-NEXT: dllexport_func has disallowed linkage type: dllexport
+define dllexport void @dllexport_func() {
+  ret void
+}
+; CHECK-NEXT: Function extern_weak_func is declared but not defined (disallowed)
+; CHECK-NEXT: Function extern_weak_func has disallowed linkage type: extern_weak
+declare extern_weak void @extern_weak_func()
+
+; CHECK-NEXT: Function avail_ext_func has disallowed linkage type: available_externally
+define available_externally void @avail_ext_func() {
+  ret void
+}
diff --git a/test/NaCl/PNaClABI/lit.local.cfg b/test/NaCl/PNaClABI/lit.local.cfg
new file mode 100644
index 000000000000..c6106e4746f2
--- /dev/null
+++ b/test/NaCl/PNaClABI/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/NaCl/PNaClABI/module-asm.ll b/test/NaCl/PNaClABI/module-asm.ll
new file mode 100644
index 000000000000..9cde268a1559
--- /dev/null
+++ b/test/NaCl/PNaClABI/module-asm.ll
@@ -0,0 +1,5 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; XFAIL: *
+
+module asm "foo"
+; CHECK: Module contains disallowed top-level inline assembly
diff --git a/test/NaCl/PNaClABI/types-function.ll b/test/NaCl/PNaClABI/types-function.ll
new file mode 100644
index 000000000000..bee3763e61a8
--- /dev/null
+++ b/test/NaCl/PNaClABI/types-function.ll
@@ -0,0 +1,41 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; XFAIL: *
+; Test type-checking in function bodies. This test is not intended to verify
+; all the rules about the various types, but instead to make sure that types
+; stashed in various places in function bodies are caught.
+
+@a2 = private global i17 zeroinitializer
+
+; CHECK: Function func has disallowed type: void (i15)
+declare void @func(i15 %arg)
+
+!llvm.foo = !{!0}
+!0 = metadata !{ half 0.0}
+
+define void @types() {
+; CHECK: bad result type: {{.*}} fptrunc
+  %h1 = fptrunc double undef to half
+
+; CHECK: bad operand: {{.*}} bitcast half
+  %h2 = bitcast half 0.0 to i16
+
+; see below...
+  %h3 = fadd double 0.0, fpext (half 0.0 to double)
+
+; CHECK: bad pointer: store
+  store i32 0, i32* bitcast (i17* @a2 to i32*), align 1
+
+; CHECK: bad function callee operand: call void @func(i15 1)
+  call void @func(i15 1)
+
+; CHECK: Function types has disallowed instruction metadata: !foo
+  ret void, !foo !0
+}
+; CHECK-NOT: disallowed
+
+
+; TODO:
+; the bitcode reader seems to expand some operations inline
+; (e.g. fpext, sext, uitofp) such that doing something like
+;   %h3 = fadd double 0.0, fpext (half 0.0 to double)
+; means the verifier pass will never see the fpext or its operands
diff --git a/test/NaCl/PNaClABI/types.ll b/test/NaCl/PNaClABI/types.ll
new file mode 100644
index 000000000000..68f8bc6e510a
--- /dev/null
+++ b/test/NaCl/PNaClABI/types.ll
@@ -0,0 +1,137 @@
+; RUN: pnacl-abicheck < %s | FileCheck %s
+; XFAIL: *
+; Test types allowed by PNaCl ABI
+
+
+; CHECK: Function badReturn has disallowed type: half* ()
+define internal half* @badReturn() {
+  unreachable
+}
+
+; CHECK: Function badArgType1 has disallowed type: void (half, i32)
+define internal void @badArgType1(half %a, i32 %b) {
+  ret void
+}
+; CHECK: Function badArgType2 has disallowed type: void (i32, half)
+define internal void @badArgType2(i32 %a, half %b) {
+  ret void
+}
+
+
+define internal void @func() {
+entry:
+  br label %block
+block:
+
+  ; We test for allowed/disallowed types via phi nodes.  This gives us
+  ; a uniform way to test any type.
+
+  ; Allowed types
+
+  phi i1 [ undef, %entry ]
+  phi i8 [ undef, %entry ]
+  phi i16 [ undef, %entry ]
+  phi i32 [ undef, %entry ]
+  phi i64 [ undef, %entry ]
+  phi float [ undef, %entry ]
+  phi double [ undef, %entry ]
+; CHECK-NOT: disallowed
+
+
+  ; Disallowed integer types
+
+  phi i4 [ undef, %entry ]
+; CHECK: Function func disallowed: bad operand: {{.*}} i4
+
+  phi i33 [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} i33
+
+  phi i128 [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} i128
+
+
+  ; Disallowed floating point types
+
+  phi half [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} half
+
+  phi x86_fp80 [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} x86_fp80
+
+  phi fp128 [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} fp128
+
+  phi ppc_fp128 [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} ppc_fp128
+
+  phi x86_mmx [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} x86_mmx
+
+
+  ; Derived types are disallowed too
+
+  phi i32* [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} i32*
+
+  phi [1 x i32] [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} [1 x i32]
+
+  phi { i32, float } [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} { i32, float }
+
+  phi void (i32)* [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} void (i32)*
+
+  phi <{ i8, i32 }> [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} <{ i8, i32 }>
+
+  ; Vector types are disallowed
+  phi <2 x i32> [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} <2 x i32>
+
+  ret void
+}
+
+
+; Named types. With the current implementation, named types are legal
+; until they are actually attempted to be used. Might want to fix that.
+%struct.s1 = type { half, float}
+%struct.s2 = type { i32, i32}
+
+define internal void @func2() {
+entry:
+  br label %block
+block:
+
+  phi %struct.s1 [ undef, %entry ]
+; CHECK: disallowed: bad operand: {{.*}} %struct.s1
+
+  phi %struct.s2 [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} %struct.s2
+
+  ret void
+}
+
+
+; Circularities:  here to make sure the verifier doesn't crash or assert.
+
+; This oddity is perfectly legal according to the IR and ABI verifiers.
+; Might want to fix that. (good luck initializing one of these, though.)
+%struct.snake = type { i32, %struct.tail }
+%struct.tail = type { %struct.snake, i32 }
+
+%struct.linked = type { i32, %struct.linked * }
+
+define internal void @func3() {
+entry:
+  br label %block
+block:
+
+  phi %struct.snake [ undef, %entry ]
+; CHECK: disallowed: bad operand: {{.*}} %struct.snake
+
+  phi %struct.linked [ undef, %entry ]
+; CHECK-NEXT: disallowed: bad operand: {{.*}} %struct.linked
+
+  ret void
+}
diff --git a/test/Transforms/InstCombine/overflow.ll b/test/Transforms/InstCombine/overflow.ll
index 3eddc80a7048..0f24a806ad55 100644
--- a/test/Transforms/InstCombine/overflow.ll
+++ b/test/Transforms/InstCombine/overflow.ll
@@ -1,6 +1,11 @@
 ; RUN: opt -S -instcombine < %s | FileCheck %s
 ; <rdar://problem/8558713>
 
+; PNaCl does not support the with.overflow intrinsics in its stable
+; ABI, so these optimizations are disabled whilst targeting PNaCl.
+; RUN: opt -S -instcombine -mtriple=le32-unknown-nacl < %s | FileCheck %s -check-prefix=PNACL
+; PNACL-NOT: with.overflow
+
 declare void @throwAnExceptionOrWhatever()
 
 ; CHECK-LABEL: @test1(
diff --git a/test/Transforms/NaCl/add-pnacl-external-decls.ll b/test/Transforms/NaCl/add-pnacl-external-decls.ll
new file mode 100644
index 000000000000..1f525a9268cd
--- /dev/null
+++ b/test/Transforms/NaCl/add-pnacl-external-decls.ll
@@ -0,0 +1,6 @@
+; RUN: opt < %s -add-pnacl-external-decls -S | FileCheck %s
+
+declare void @foobar(i32)
+
+; CHECK: declare i32 @setjmp(i8*)
+; CHECK: declare void @longjmp(i8*, i32)
diff --git a/test/Transforms/NaCl/atomics.ll b/test/Transforms/NaCl/atomics.ll
new file mode 100644
index 000000000000..96b2b7d6e168
--- /dev/null
+++ b/test/Transforms/NaCl/atomics.ll
@@ -0,0 +1,504 @@
+; RUN: opt -nacl-rewrite-atomics -remove-asm-memory -S < %s | FileCheck %s
+
+; Each of these tests validates that the corresponding legacy GCC-style
+; builtins are properly rewritten to NaCl atomic builtins. Only the
+; GCC-style builtins that have corresponding primitives in C11/C++11 and
+; which emit different code are tested. These legacy GCC-builtins only
+; support sequential-consistency.
+;
+; test_* tests the corresponding __sync_* builtin. See:
+; http://gcc.gnu.org/onlinedocs/gcc-4.8.1/gcc/_005f_005fsync-Builtins.html
+;
+; There are also tests which validate that volatile loads/stores get
+; rewritten into NaCl atomic builtins. The memory ordering for volatile
+; loads/stores is not validated: it could technically be constrained to
+; sequential consistency, or left as relaxed.
+;
+; Alignment is also expected to be at least natural alignment.
+
+target datalayout = "p:32:32:32"
+
+; CHECK: @test_fetch_and_add_i8
+define zeroext i8 @test_fetch_and_add_i8(i8* %ptr, i8 zeroext %value) {
+  ; CHECK-NEXT: %res = call i8 @llvm.nacl.atomic.rmw.i8(i32 1, i8* %ptr, i8 %value, i32 6)
+  ; CHECK-NEXT: ret i8 %res
+  %res = atomicrmw add i8* %ptr, i8 %value seq_cst
+  ret i8 %res
+}
+
+; CHECK: @test_fetch_and_add_i16
+define zeroext i16 @test_fetch_and_add_i16(i16* %ptr, i16 zeroext %value) {
+  ; CHECK-NEXT: %res = call i16 @llvm.nacl.atomic.rmw.i16(i32 1, i16* %ptr, i16 %value, i32 6)
+  ; CHECK-NEXT: ret i16 %res
+  %res = atomicrmw add i16* %ptr, i16 %value seq_cst
+  ret i16 %res
+}
+
+; CHECK: @test_fetch_and_add_i32
+define i32 @test_fetch_and_add_i32(i32* %ptr, i32 %value) {
+  ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.rmw.i32(i32 1, i32* %ptr, i32 %value, i32 6)
+  ; CHECK-NEXT: ret i32 %res
+  %res = atomicrmw add i32* %ptr, i32 %value seq_cst
+  ret i32 %res
+}
+
+; CHECK: @test_fetch_and_add_i64
+define i64 @test_fetch_and_add_i64(i64* %ptr, i64 %value) {
+  ; CHECK-NEXT: %res = call i64 @llvm.nacl.atomic.rmw.i64(i32 1, i64* %ptr, i64 %value, i32 6)
+  ; CHECK-NEXT: ret i64 %res
+  %res = atomicrmw add i64* %ptr, i64 %value seq_cst
+  ret i64 %res
+}
+
+; CHECK: @test_fetch_and_sub_i8
+define zeroext i8 @test_fetch_and_sub_i8(i8* %ptr, i8 zeroext %value) {
+  ; CHECK-NEXT: %res = call i8 @llvm.nacl.atomic.rmw.i8(i32 2, i8* %ptr, i8 %value, i32 6)
+  ; CHECK-NEXT: ret i8 %res
+  %res = atomicrmw sub i8* %ptr, i8 %value seq_cst
+  ret i8 %res
+}
+
+; CHECK: @test_fetch_and_sub_i16
+define zeroext i16 @test_fetch_and_sub_i16(i16* %ptr, i16 zeroext %value) {
+  ; CHECK-NEXT: %res = call i16 @llvm.nacl.atomic.rmw.i16(i32 2, i16* %ptr, i16 %value, i32 6)
+  ; CHECK-NEXT: ret i16 %res
+  %res = atomicrmw sub i16* %ptr, i16 %value seq_cst
+  ret i16 %res
+}
+
+; CHECK: @test_fetch_and_sub_i32
+define i32 @test_fetch_and_sub_i32(i32* %ptr, i32 %value) {
+  ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.rmw.i32(i32 2, i32* %ptr, i32 %value, i32 6)
+  ; CHECK-NEXT: ret i32 %res
+  %res = atomicrmw sub i32* %ptr, i32 %value seq_cst
+  ret i32 %res
+}
+
+; CHECK: @test_fetch_and_sub_i64
+define i64 @test_fetch_and_sub_i64(i64* %ptr, i64 %value) {
+  ; CHECK-NEXT: %res = call i64 @llvm.nacl.atomic.rmw.i64(i32 2, i64* %ptr, i64 %value, i32 6)
+  ; CHECK-NEXT: ret i64 %res
+  %res = atomicrmw sub i64* %ptr, i64 %value seq_cst
+  ret i64 %res
+}
+
+; CHECK: @test_fetch_and_or_i8
+define zeroext i8 @test_fetch_and_or_i8(i8* %ptr, i8 zeroext %value) {
+  ; CHECK-NEXT: %res = call i8 @llvm.nacl.atomic.rmw.i8(i32 3, i8* %ptr, i8 %value, i32 6)
+  ; CHECK-NEXT: ret i8 %res
+  %res = atomicrmw or i8* %ptr, i8 %value seq_cst
+  ret i8 %res
+}
+
+; CHECK: @test_fetch_and_or_i16
+define zeroext i16 @test_fetch_and_or_i16(i16* %ptr, i16 zeroext %value) {
+  ; CHECK-NEXT: %res = call i16 @llvm.nacl.atomic.rmw.i16(i32 3, i16* %ptr, i16 %value, i32 6)
+  ; CHECK-NEXT: ret i16 %res
+  %res = atomicrmw or i16* %ptr, i16 %value seq_cst
+  ret i16 %res
+}
+
+; CHECK: @test_fetch_and_or_i32
+define i32 @test_fetch_and_or_i32(i32* %ptr, i32 %value) {
+  ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.rmw.i32(i32 3, i32* %ptr, i32 %value, i32 6)
+  ; CHECK-NEXT: ret i32 %res
+  %res = atomicrmw or i32* %ptr, i32 %value seq_cst
+  ret i32 %res
+}
+
+; CHECK: @test_fetch_and_or_i64
+define i64 @test_fetch_and_or_i64(i64* %ptr, i64 %value) {
+  ; CHECK-NEXT: %res = call i64 @llvm.nacl.atomic.rmw.i64(i32 3, i64* %ptr, i64 %value, i32 6)
+  ; CHECK-NEXT: ret i64 %res
+  %res = atomicrmw or i64* %ptr, i64 %value seq_cst
+  ret i64 %res
+}
+
+; CHECK: @test_fetch_and_and_i8
+define zeroext i8 @test_fetch_and_and_i8(i8* %ptr, i8 zeroext %value) {
+  ; CHECK-NEXT: %res = call i8 @llvm.nacl.atomic.rmw.i8(i32 4, i8* %ptr, i8 %value, i32 6)
+  ; CHECK-NEXT: ret i8 %res
+  %res = atomicrmw and i8* %ptr, i8 %value seq_cst
+  ret i8 %res
+}
+
+; CHECK: @test_fetch_and_and_i16
+define zeroext i16 @test_fetch_and_and_i16(i16* %ptr, i16 zeroext %value) {
+  ; CHECK-NEXT: %res = call i16 @llvm.nacl.atomic.rmw.i16(i32 4, i16* %ptr, i16 %value, i32 6)
+  ; CHECK-NEXT: ret i16 %res
+  %res = atomicrmw and i16* %ptr, i16 %value seq_cst
+  ret i16 %res
+}
+
+; CHECK: @test_fetch_and_and_i32
+define i32 @test_fetch_and_and_i32(i32* %ptr, i32 %value) {
+  ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.rmw.i32(i32 4, i32* %ptr, i32 %value, i32 6)
+  ; CHECK-NEXT: ret i32 %res
+  %res = atomicrmw and i32* %ptr, i32 %value seq_cst
+  ret i32 %res
+}
+
+; CHECK: @test_fetch_and_and_i64
+define i64 @test_fetch_and_and_i64(i64* %ptr, i64 %value) {
+  ; CHECK-NEXT: %res = call i64 @llvm.nacl.atomic.rmw.i64(i32 4, i64* %ptr, i64 %value, i32 6)
+  ; CHECK-NEXT: ret i64 %res
+  %res = atomicrmw and i64* %ptr, i64 %value seq_cst
+  ret i64 %res
+}
+
+; CHECK: @test_fetch_and_xor_i8
+define zeroext i8 @test_fetch_and_xor_i8(i8* %ptr, i8 zeroext %value) {
+  ; CHECK-NEXT: %res = call i8 @llvm.nacl.atomic.rmw.i8(i32 5, i8* %ptr, i8 %value, i32 6)
+  ; CHECK-NEXT: ret i8 %res
+  %res = atomicrmw xor i8* %ptr, i8 %value seq_cst
+  ret i8 %res
+}
+
+; CHECK: @test_fetch_and_xor_i16
+define zeroext i16 @test_fetch_and_xor_i16(i16* %ptr, i16 zeroext %value) {
+  ; CHECK-NEXT: %res = call i16 @llvm.nacl.atomic.rmw.i16(i32 5, i16* %ptr, i16 %value, i32 6)
+  ; CHECK-NEXT: ret i16 %res
+  %res = atomicrmw xor i16* %ptr, i16 %value seq_cst
+  ret i16 %res
+}
+
+; CHECK: @test_fetch_and_xor_i32
+define i32 @test_fetch_and_xor_i32(i32* %ptr, i32 %value) {
+  ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.rmw.i32(i32 5, i32* %ptr, i32 %value, i32 6)
+  ; CHECK-NEXT: ret i32 %res
+  %res = atomicrmw xor i32* %ptr, i32 %value seq_cst
+  ret i32 %res
+}
+
+; CHECK: @test_fetch_and_xor_i64
+define i64 @test_fetch_and_xor_i64(i64* %ptr, i64 %value) {
+  ; CHECK-NEXT: %res = call i64 @llvm.nacl.atomic.rmw.i64(i32 5, i64* %ptr, i64 %value, i32 6)
+  ; CHECK-NEXT: ret i64 %res
+  %res = atomicrmw xor i64* %ptr, i64 %value seq_cst
+  ret i64 %res
+}
+
+; CHECK: @test_val_compare_and_swap_i8
+define zeroext i8 @test_val_compare_and_swap_i8(i8* %ptr, i8 zeroext %oldval, i8 zeroext %newval) {
+  ; CHECK-NEXT: %res = call i8 @llvm.nacl.atomic.cmpxchg.i8(i8* %ptr, i8 %oldval, i8 %newval, i32 6, i32 6)
+  ; CHECK-NEXT: ret i8 %res
+  %res = cmpxchg i8* %ptr, i8 %oldval, i8 %newval seq_cst
+  ret i8 %res
+}
+
+; CHECK: @test_val_compare_and_swap_i16
+define zeroext i16 @test_val_compare_and_swap_i16(i16* %ptr, i16 zeroext %oldval, i16 zeroext %newval) {
+  ; CHECK-NEXT: %res = call i16 @llvm.nacl.atomic.cmpxchg.i16(i16* %ptr, i16 %oldval, i16 %newval, i32 6, i32 6)
+  ; CHECK-NEXT: ret i16 %res
+  %res = cmpxchg i16* %ptr, i16 %oldval, i16 %newval seq_cst
+  ret i16 %res
+}
+
+; CHECK: @test_val_compare_and_swap_i32
+define i32 @test_val_compare_and_swap_i32(i32* %ptr, i32 %oldval, i32 %newval) {
+  ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %oldval, i32 %newval, i32 6, i32 6)
+  ; CHECK-NEXT: ret i32 %res
+  %res = cmpxchg i32* %ptr, i32 %oldval, i32 %newval seq_cst
+  ret i32 %res
+}
+
+; CHECK: @test_val_compare_and_swap_i64
+define i64 @test_val_compare_and_swap_i64(i64* %ptr, i64 %oldval, i64 %newval) {
+  ; CHECK-NEXT: %res = call i64 @llvm.nacl.atomic.cmpxchg.i64(i64* %ptr, i64 %oldval, i64 %newval, i32 6, i32 6)
+  ; CHECK-NEXT: ret i64 %res
+  %res = cmpxchg i64* %ptr, i64 %oldval, i64 %newval seq_cst
+  ret i64 %res
+}
+
+; This patterns gets emitted by C11/C++11 atomic thread fences.
+;
+; CHECK: @test_c11_fence
+define void @test_c11_fence() {
+  ; CHECK-NEXT: call void @llvm.nacl.atomic.fence(i32 6)
+  ; CHECK-NEXT: ret void
+  fence seq_cst
+  ret void
+}
+
+; This pattern gets emitted for ``__sync_synchronize`` and
+; ``asm("":::"memory")`` when Clang is configured for NaCl.
+;
+; CHECK: @test_synchronize
+define void @test_synchronize() {
+  ; CHECK-NEXT: call void @llvm.nacl.atomic.fence.all()
+  ; CHECK-NEXT: ret void
+  call void asm sideeffect "", "~{memory}"()
+  fence seq_cst
+  call void asm sideeffect "", "~{memory}"()
+  ret void
+}
+
+; Make sure the above pattern is respected and not partially-matched.
+;
+; CHECK: @test_synchronize_bad1
+define void @test_synchronize_bad1() {
+  ; CHECK-NOT: call void @llvm.nacl.atomic.fence.all()
+  call void asm sideeffect "", "~{memory}"()
+  fence seq_cst
+  ret void
+}
+
+; CHECK: @test_synchronize_bad2
+define void @test_synchronize_bad2() {
+  ; CHECK-NOT: call void @llvm.nacl.atomic.fence.all()
+  fence seq_cst
+  call void asm sideeffect "", "~{memory}"()
+  ret void
+}
+
+; CHECK: @test_lock_test_and_set_i8
+define zeroext i8 @test_lock_test_and_set_i8(i8* %ptr, i8 zeroext %value) {
+  ; CHECK-NEXT: %res = call i8 @llvm.nacl.atomic.rmw.i8(i32 6, i8* %ptr, i8 %value, i32 6)
+  ; CHECK-NEXT: ret i8 %res
+  %res = atomicrmw xchg i8* %ptr, i8 %value seq_cst
+  ret i8 %res
+}
+
+; CHECK: @test_lock_release_i8
+define void @test_lock_release_i8(i8* %ptr) {
+  ; Note that the 'release' was changed to a 'seq_cst'.
+  ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i8(i8 0, i8* %ptr, i32 6)
+  ; CHECK-NEXT: ret void
+  store atomic i8 0, i8* %ptr release, align 1
+  ret void
+}
+
+; CHECK: @test_lock_test_and_set_i16
+define zeroext i16 @test_lock_test_and_set_i16(i16* %ptr, i16 zeroext %value) {
+  ; CHECK-NEXT: %res = call i16 @llvm.nacl.atomic.rmw.i16(i32 6, i16* %ptr, i16 %value, i32 6)
+  ; CHECK-NEXT: ret i16 %res
+  %res = atomicrmw xchg i16* %ptr, i16 %value seq_cst
+  ret i16 %res
+}
+
+; CHECK: @test_lock_release_i16
+define void @test_lock_release_i16(i16* %ptr) {
+  ; Note that the 'release' was changed to a 'seq_cst'.
+  ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i16(i16 0, i16* %ptr, i32 6)
+  ; CHECK-NEXT: ret void
+  store atomic i16 0, i16* %ptr release, align 2
+  ret void
+}
+
+; CHECK: @test_lock_test_and_set_i32
+define i32 @test_lock_test_and_set_i32(i32* %ptr, i32 %value) {
+  ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.rmw.i32(i32 6, i32* %ptr, i32 %value, i32 6)
+  ; CHECK-NEXT: ret i32 %res
+  %res = atomicrmw xchg i32* %ptr, i32 %value seq_cst
+  ret i32 %res
+}
+
+; CHECK: @test_lock_release_i32
+define void @test_lock_release_i32(i32* %ptr) {
+  ; Note that the 'release' was changed to a 'seq_cst'.
+  ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i32(i32 0, i32* %ptr, i32 6)
+  ; CHECK-NEXT: ret void
+  store atomic i32 0, i32* %ptr release, align 4
+  ret void
+}
+
+; CHECK: @test_lock_test_and_set_i64
+define i64 @test_lock_test_and_set_i64(i64* %ptr, i64 %value) {
+  ; CHECK-NEXT: %res = call i64 @llvm.nacl.atomic.rmw.i64(i32 6, i64* %ptr, i64 %value, i32 6)
+  ; CHECK-NEXT: ret i64 %res
+  %res = atomicrmw xchg i64* %ptr, i64 %value seq_cst
+  ret i64 %res
+}
+
+; CHECK: @test_lock_release_i64
+define void @test_lock_release_i64(i64* %ptr) {
+  ; Note that the 'release' was changed to a 'seq_cst'.
+  ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i64(i64 0, i64* %ptr, i32 6)
+  ; CHECK-NEXT: ret void
+  store atomic i64 0, i64* %ptr release, align 8
+  ret void
+}
+
+; CHECK: @test_volatile_load_i8
+define zeroext i8 @test_volatile_load_i8(i8* %ptr) {
+  ; CHECK-NEXT: %res = call i8 @llvm.nacl.atomic.load.i8(i8* %ptr, i32 6)
+  ; CHECK-NEXT: ret i8 %res
+  %res = load volatile i8* %ptr, align 1
+  ret i8 %res
+}
+
+; CHECK: @test_volatile_store_i8
+define void @test_volatile_store_i8(i8* %ptr, i8 zeroext %value) {
+  ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i8(i8 %value, i8* %ptr, i32 6)
+  ; CHECK-NEXT: ret void
+  store volatile i8 %value, i8* %ptr, align 1
+  ret void
+}
+
+; CHECK: @test_volatile_load_i16
+define zeroext i16 @test_volatile_load_i16(i16* %ptr) {
+  ; CHECK-NEXT: %res = call i16 @llvm.nacl.atomic.load.i16(i16* %ptr, i32 6)
+  ; CHECK-NEXT: ret i16 %res
+  %res = load volatile i16* %ptr, align 2
+  ret i16 %res
+}
+
+; CHECK: @test_volatile_store_i16
+define void @test_volatile_store_i16(i16* %ptr, i16 zeroext %value) {
+  ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i16(i16 %value, i16* %ptr, i32 6)
+  ; CHECK-NEXT: ret void
+  store volatile i16 %value, i16* %ptr, align 2
+  ret void
+}
+
+; CHECK: @test_volatile_load_i32
+define i32 @test_volatile_load_i32(i32* %ptr) {
+  ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6)
+  ; CHECK-NEXT: ret i32 %res
+  %res = load volatile i32* %ptr, align 4
+  ret i32 %res
+}
+
+; CHECK: @test_volatile_store_i32
+define void @test_volatile_store_i32(i32* %ptr, i32 %value) {
+  ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i32(i32 %value, i32* %ptr, i32 6)
+  ; CHECK-NEXT: ret void
+  store volatile i32 %value, i32* %ptr, align 4
+  ret void
+}
+
+; CHECK: @test_volatile_load_i64
+define i64 @test_volatile_load_i64(i64* %ptr) {
+  ; CHECK-NEXT: %res = call i64 @llvm.nacl.atomic.load.i64(i64* %ptr, i32 6)
+  ; CHECK-NEXT: ret i64 %res
+  %res = load volatile i64* %ptr, align 8
+  ret i64 %res
+}
+
+; CHECK: @test_volatile_store_i64
+define void @test_volatile_store_i64(i64* %ptr, i64 %value) {
+  ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i64(i64 %value, i64* %ptr, i32 6)
+  ; CHECK-NEXT: ret void
+  store volatile i64 %value, i64* %ptr, align 8
+  ret void
+}
+
+; CHECK: @test_volatile_load_float
+define float @test_volatile_load_float(float* %ptr) {
+  ; CHECK-NEXT: %ptr.cast = bitcast float* %ptr to i32*
+  ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr.cast, i32 6)
+  ; CHECK-NEXT: %res.cast = bitcast i32 %res to float
+  ; CHECK-NEXT: ret float %res.cast
+  %res = load volatile float* %ptr, align 4
+  ret float %res
+}
+
+; CHECK: @test_volatile_store_float
+define void @test_volatile_store_float(float* %ptr, float %value) {
+  ; CHECK-NEXT: %ptr.cast = bitcast float* %ptr to i32*
+  ; CHECK-NEXT: %value.cast = bitcast float %value to i32
+  ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i32(i32 %value.cast, i32* %ptr.cast, i32 6)
+  ; CHECK-NEXT: ret void
+  store volatile float %value, float* %ptr, align 4
+  ret void
+}
+
+; CHECK: @test_volatile_load_double
+define double @test_volatile_load_double(double* %ptr) {
+  ; CHECK-NEXT: %ptr.cast = bitcast double* %ptr to i64*
+  ; CHECK-NEXT: %res = call i64 @llvm.nacl.atomic.load.i64(i64* %ptr.cast, i32 6)
+  ; CHECK-NEXT: %res.cast = bitcast i64 %res to double
+  ; CHECK-NEXT: ret double %res.cast
+  %res = load volatile double* %ptr, align 8
+  ret double %res
+}
+
+; CHECK: @test_volatile_store_double
+define void @test_volatile_store_double(double* %ptr, double %value) {
+  ; CHECK-NEXT: %ptr.cast = bitcast double* %ptr to i64*
+  ; CHECK-NEXT: %value.cast = bitcast double %value to i64
+  ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i64(i64 %value.cast, i64* %ptr.cast, i32 6)
+  ; CHECK-NEXT: ret void
+  store volatile double %value, double* %ptr, align 8
+  ret void
+}
+
+; CHECK: @test_volatile_load_i32_pointer
+define i32* @test_volatile_load_i32_pointer(i32** %ptr) {
+  ; CHECK-NEXT: %ptr.cast = bitcast i32** %ptr to i32*
+  ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr.cast, i32 6)
+  ; CHECK-NEXT: %res.cast = inttoptr i32 %res to i32*
+  ; CHECK-NEXT: ret i32* %res.cast
+  %res = load volatile i32** %ptr, align 4
+  ret i32* %res
+}
+
+; CHECK: @test_volatile_store_i32_pointer
+define void @test_volatile_store_i32_pointer(i32** %ptr, i32* %value) {
+  ; CHECK-NEXT: %ptr.cast = bitcast i32** %ptr to i32*
+  ; CHECK-NEXT: %value.cast = ptrtoint i32* %value to i32
+  ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i32(i32 %value.cast, i32* %ptr.cast, i32 6)
+  ; CHECK-NEXT: ret void
+  store volatile i32* %value, i32** %ptr, align 4
+  ret void
+}
+
+; CHECK: @test_volatile_load_double_pointer
+define double* @test_volatile_load_double_pointer(double** %ptr) {
+  ; CHECK-NEXT: %ptr.cast = bitcast double** %ptr to i32*
+  ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr.cast, i32 6)
+  ; CHECK-NEXT: %res.cast = inttoptr i32 %res to double*
+  ; CHECK-NEXT: ret double* %res.cast
+  %res = load volatile double** %ptr, align 4
+  ret double* %res
+}
+
+; CHECK: @test_volatile_store_double_pointer
+define void @test_volatile_store_double_pointer(double** %ptr, double* %value) {
+  ; CHECK-NEXT: %ptr.cast = bitcast double** %ptr to i32*
+  ; CHECK-NEXT: %value.cast = ptrtoint double* %value to i32
+  ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i32(i32 %value.cast, i32* %ptr.cast, i32 6)
+  ; CHECK-NEXT: ret void
+  store volatile double* %value, double** %ptr, align 4
+  ret void
+}
+
+; CHECK: @test_volatile_load_v4i8
+define <4 x i8> @test_volatile_load_v4i8(<4 x i8>* %ptr) {
+  ; CHECK-NEXT: %ptr.cast = bitcast <4 x i8>* %ptr to i32*
+  ; CHECK-NEXT: %res = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr.cast, i32 6)
+  ; CHECK-NEXT: %res.cast = bitcast i32 %res to <4 x i8>
+  ; CHECK-NEXT: ret <4 x i8> %res.cast
+  %res = load volatile <4 x i8>* %ptr, align 8
+  ret <4 x i8> %res
+}
+
+; CHECK: @test_volatile_store_v4i8
+define void @test_volatile_store_v4i8(<4 x i8>* %ptr, <4 x i8> %value) {
+  ; CHECK-NEXT: %ptr.cast = bitcast <4 x i8>* %ptr to i32*
+  ; CHECK-NEXT: %value.cast = bitcast <4 x i8> %value to i32
+  ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i32(i32 %value.cast, i32* %ptr.cast, i32 6)
+  ; CHECK-NEXT: ret void
+  store volatile <4 x i8> %value, <4 x i8>* %ptr, align 8
+  ret void
+}
+
+; CHECK: @test_volatile_load_v4i16
+define <4 x i16> @test_volatile_load_v4i16(<4 x i16>* %ptr) {
+  ; CHECK-NEXT: %ptr.cast = bitcast <4 x i16>* %ptr to i64*
+  ; CHECK-NEXT: %res = call i64 @llvm.nacl.atomic.load.i64(i64* %ptr.cast, i32 6)
+  ; CHECK-NEXT: %res.cast = bitcast i64 %res to <4 x i16>
+  ; CHECK-NEXT: ret <4 x i16> %res.cast
+  %res = load volatile <4 x i16>* %ptr, align 8
+  ret <4 x i16> %res
+}
+
+; CHECK: @test_volatile_store_v4i16
+define void @test_volatile_store_v4i16(<4 x i16>* %ptr, <4 x i16> %value) {
+  ; CHECK-NEXT: %ptr.cast = bitcast <4 x i16>* %ptr to i64*
+  ; CHECK-NEXT: %value.cast = bitcast <4 x i16> %value to i64
+  ; CHECK-NEXT: call void @llvm.nacl.atomic.store.i64(i64 %value.cast, i64* %ptr.cast, i32 6)
+  ; CHECK-NEXT: ret void
+  store volatile <4 x i16> %value, <4 x i16>* %ptr, align 8
+  ret void
+}
diff --git a/test/Transforms/NaCl/canonicalize-mem-intrinsics.ll b/test/Transforms/NaCl/canonicalize-mem-intrinsics.ll
new file mode 100644
index 000000000000..9c263fd15e71
--- /dev/null
+++ b/test/Transforms/NaCl/canonicalize-mem-intrinsics.ll
@@ -0,0 +1,45 @@
+; RUN: opt %s -canonicalize-mem-intrinsics -S | FileCheck %s
+; RUN: opt %s -canonicalize-mem-intrinsics -S \
+; RUN:     | FileCheck %s -check-prefix=CLEANED
+
+declare void @llvm.memset.p0i8.i64(i8*, i8, i64, i32, i1)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
+declare void @llvm.memmove.p0i8.p0i8.i64(i8*, i8*, i64, i32, i1)
+; CLEANED-NOT: @llvm.mem{{.*}}i64
+
+
+define void @memset_caller(i8* %dest, i8 %char, i64 %size) {
+  call void @llvm.memset.p0i8.i64(i8* %dest, i8 %char, i64 %size, i32 1, i1 0)
+  ret void
+}
+; CHECK: define void @memset_caller
+; CHECK-NEXT: %mem_len_truncate = trunc i64 %size to i32
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dest, i8 %char, i32 %mem_len_truncate, i32 1, i1 false)
+
+
+define void @memcpy_caller(i8* %dest, i8* %src, i64 %size) {
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 %size, i32 1, i1 0)
+  ret void
+}
+; CHECK: define void @memcpy_caller
+; CHECK-NEXT: %mem_len_truncate = trunc i64 %size to i32
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %mem_len_truncate, i32 1, i1 false)
+
+
+define void @memmove_caller(i8* %dest, i8* %src, i64 %size) {
+  call void @llvm.memmove.p0i8.p0i8.i64(i8* %dest, i8* %src, i64 %size, i32 1, i1 0)
+  ret void
+}
+; CHECK: define void @memmove_caller
+; CHECK-NEXT: %mem_len_truncate = trunc i64 %size to i32
+; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %mem_len_truncate, i32 1, i1 false)
+
+
+; Check that constant sizes remain as constants.
+
+define void @memset_caller_const(i8* %dest, i8 %char) {
+  call void @llvm.memset.p0i8.i64(i8* %dest, i8 %char, i64 123, i32 1, i1 0)
+  ret void
+}
+; CHECK: define void @memset_caller
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %dest, i8 %char, i32 123, i32 1, i1 false)
diff --git a/test/Transforms/NaCl/expand-arith-with-overflow.ll b/test/Transforms/NaCl/expand-arith-with-overflow.ll
new file mode 100644
index 000000000000..445a5301d2fa
--- /dev/null
+++ b/test/Transforms/NaCl/expand-arith-with-overflow.ll
@@ -0,0 +1,88 @@
+; RUN: opt < %s -expand-arith-with-overflow -expand-struct-regs -S | FileCheck %s
+; RUN: opt < %s -expand-arith-with-overflow -expand-struct-regs -S | \
+; RUN:     FileCheck %s -check-prefix=CLEANUP
+
+target datalayout = "p:32:32:32"
+target triple = "le32-unknown-nacl"
+
+declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32)
+declare {i64, i1} @llvm.umul.with.overflow.i64(i64, i64)
+declare {i16, i1} @llvm.uadd.with.overflow.i16(i16, i16)
+
+; CLEANUP-NOT: with.overflow
+; CLEANUP-NOT: extractvalue
+; CLEANUP-NOT: insertvalue
+
+
+define void @umul32_by_const(i32 %x, i32* %result_val, i1* %result_overflow) {
+  %pair = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 256)
+  %val = extractvalue {i32, i1} %pair, 0
+  %overflow = extractvalue {i32, i1} %pair, 1
+
+  store i32 %val, i32* %result_val
+  store i1 %overflow, i1* %result_overflow
+  ret void
+}
+
+; The bound is 16777215 == 0xffffff == ((1 << 32) - 1) / 256
+; CHECK-LABEL: define void @umul32_by_const(
+; CHECK-NEXT: %pair.arith = mul i32 %x, 256
+; CHECK-NEXT: %pair.overflow = icmp ugt i32 %x, 16777215
+; CHECK-NEXT: store i32 %pair.arith, i32* %result_val
+; CHECK-NEXT: store i1 %pair.overflow, i1* %result_overflow
+
+
+; Check that the pass can expand multiple uses of the same intrinsic.
+define void @umul32_by_const2(i32 %x, i32* %result_val, i1* %result_overflow) {
+  %pair = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %x, i32 65536)
+  %val = extractvalue {i32, i1} %pair, 0
+  ; Check that the pass can expand multiple uses of %pair.
+  %overflow1 = extractvalue {i32, i1} %pair, 1
+  %overflow2 = extractvalue {i32, i1} %pair, 1
+
+  store i32 %val, i32* %result_val
+  store i1 %overflow1, i1* %result_overflow
+  store i1 %overflow2, i1* %result_overflow
+  ret void
+}
+
+; CHECK-LABEL: define void @umul32_by_const2(
+; CHECK-NEXT: %pair.arith = mul i32 %x, 65536
+; CHECK-NEXT: %pair.overflow = icmp ugt i32 %x, 65535
+; CHECK-NEXT: store i32 %pair.arith, i32* %result_val
+; CHECK-NEXT: store i1 %pair.overflow, i1* %result_overflow
+; CHECK-NEXT: store i1 %pair.overflow, i1* %result_overflow
+
+
+define void @umul64_by_const(i64 %x, i64* %result_val, i1* %result_overflow) {
+  ; Multiply by 1 << 55.
+  %pair = call {i64, i1} @llvm.umul.with.overflow.i64(i64 36028797018963968, i64 %x)
+  %val = extractvalue {i64, i1} %pair, 0
+  %overflow = extractvalue {i64, i1} %pair, 1
+
+  store i64 %val, i64* %result_val
+  store i1 %overflow, i1* %result_overflow
+  ret void
+}
+
+; CHECK-LABEL: define void @umul64_by_const(i64 %x, i64* %result_val, i1* %result_overflow) {
+; CHECK-NEXT: %pair.arith = mul i64 %x, 36028797018963968
+; CHECK-NEXT: %pair.overflow = icmp ugt i64 %x, 511
+; CHECK-NEXT: store i64 %pair.arith, i64* %result_val
+; CHECK-NEXT: store i1 %pair.overflow, i1* %result_overflow
+
+
+define void @uadd16_with_const(i16 %x, i16* %result_val, i1* %result_overflow) {
+  %pair = call {i16, i1} @llvm.uadd.with.overflow.i16(i16 %x, i16 35)
+  %val = extractvalue {i16, i1} %pair, 0
+  %overflow = extractvalue {i16, i1} %pair, 1
+
+  store i16 %val, i16* %result_val
+  store i1 %overflow, i1* %result_overflow
+  ret void
+}
+; CHECK-LABEL: define void @uadd16_with_const(i16 %x, i16* %result_val, i1* %result_overflow) {
+; CHECK-NEXT: %pair.arith = add i16 %x, 35
+; CHECK-NEXT: %pair.overflow = icmp ugt i16 %x, -36
+; CHECK-NEXT: store i16 %pair.arith, i16* %result_val
+; CHECK-NEXT: store i1 %pair.overflow, i1* %result_overflow
diff --git a/test/Transforms/NaCl/expand-byval.ll b/test/Transforms/NaCl/expand-byval.ll
new file mode 100644
index 000000000000..151e36a8255a
--- /dev/null
+++ b/test/Transforms/NaCl/expand-byval.ll
@@ -0,0 +1,123 @@
+; RUN: opt -expand-byval %s -S | FileCheck %s
+
+target datalayout = "p:32:32:32"
+
+%MyStruct = type { i32, i8, i32 }
+%AlignedStruct = type { double, double }
+
+
+; Removal of "byval" attribute for passing structs arguments by value
+
+declare void @ext_func(%MyStruct*)
+
+define void @byval_receiver(%MyStruct* byval align 32 %ptr) {
+  call void @ext_func(%MyStruct* %ptr)
+  ret void
+}
+; Strip the "byval" and "align" attributes.
+; CHECK: define void @byval_receiver(%MyStruct* noalias %ptr) {
+; CHECK-NEXT: call void @ext_func(%MyStruct* %ptr)
+
+
+declare void @ext_byval_func(%MyStruct* byval)
+; CHECK: declare void @ext_byval_func(%MyStruct* noalias)
+
+define void @byval_caller(%MyStruct* %ptr) {
+  call void @ext_byval_func(%MyStruct* byval %ptr)
+  ret void
+}
+; CHECK: define void @byval_caller(%MyStruct* %ptr) {
+; CHECK-NEXT: %ptr.byval_copy = alloca %MyStruct, align 4
+; CHECK: call void @llvm.lifetime.start(i64 12, i8* %{{.*}})
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %{{.*}}, i64 12, i32 0, i1 false)
+; CHECK-NEXT: call void @ext_byval_func(%MyStruct* noalias %ptr.byval_copy)
+
+
+define void @byval_tail_caller(%MyStruct* %ptr) {
+  tail call void @ext_byval_func(%MyStruct* byval %ptr)
+  ret void
+}
+; CHECK: define void @byval_tail_caller(%MyStruct* %ptr) {
+; CHECK: {{^}} call void @ext_byval_func(%MyStruct* noalias %ptr.byval_copy)
+
+
+define void @byval_invoke(%MyStruct* %ptr) {
+  invoke void @ext_byval_func(%MyStruct* byval align 32 %ptr)
+      to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  %lp = landingpad { i8*, i32 } personality i8* null cleanup
+  ret void
+}
+; CHECK: define void @byval_invoke(%MyStruct* %ptr) {
+; CHECK: %ptr.byval_copy = alloca %MyStruct, align 32
+; CHECK: call void @llvm.lifetime.start(i64 12, i8* %{{.*}})
+; CHECK: invoke void @ext_byval_func(%MyStruct* noalias %ptr.byval_copy)
+; CHECK: cont:
+; CHECK: call void @llvm.lifetime.end(i64 12, i8* %{{.*}})
+; CHECK: lpad:
+; CHECK: call void @llvm.lifetime.end(i64 12, i8* %{{.*}})
+
+
+; Check handling of alignment
+
+; Check that "align" is stripped for declarations too.
+declare void @ext_byval_func_align(%MyStruct* byval align 32)
+; CHECK: declare void @ext_byval_func_align(%MyStruct* noalias)
+
+define void @byval_caller_align_via_attr(%MyStruct* %ptr) {
+  call void @ext_byval_func(%MyStruct* byval align 32 %ptr)
+  ret void
+}
+; CHECK: define void @byval_caller_align_via_attr(%MyStruct* %ptr) {
+; CHECK-NEXT: %ptr.byval_copy = alloca %MyStruct, align 32
+; The memcpy may assume that %ptr is 32-byte-aligned.
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %2, i8* %3, i64 12, i32 32, i1 false)
+
+declare void @ext_byval_func_align_via_type(%AlignedStruct* byval)
+
+; %AlignedStruct contains a double so requires an alignment of 8 bytes.
+; Looking at the alignment of %AlignedStruct is a workaround for a bug
+; in pnacl-clang:
+; https://code.google.com/p/nativeclient/issues/detail?id=3403
+define void @byval_caller_align_via_type(%AlignedStruct* %ptr) {
+  call void @ext_byval_func_align_via_type(%AlignedStruct* byval %ptr)
+  ret void
+}
+; CHECK: define void @byval_caller_align_via_type(%AlignedStruct* %ptr) {
+; CHECK-NEXT: %ptr.byval_copy = alloca %AlignedStruct, align 8
+; Don't assume that %ptr is 8-byte-aligned when doing the memcpy.
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* %{{.*}}, i64 16, i32 0, i1 false)
+
+
+; Removal of "sret" attribute for returning structs by value
+
+declare void @ext_sret_func(%MyStruct* sret align 32)
+; CHECK: declare void @ext_sret_func(%MyStruct*)
+
+define void @sret_func(%MyStruct* sret align 32 %buf) {
+  ret void
+}
+; CHECK: define void @sret_func(%MyStruct* %buf) {
+
+define void @sret_caller(%MyStruct* %buf) {
+  call void @ext_sret_func(%MyStruct* sret align 32 %buf)
+  ret void
+}
+; CHECK: define void @sret_caller(%MyStruct* %buf) {
+; CHECK-NEXT: call void @ext_sret_func(%MyStruct* %buf)
+
+
+; Check that other attributes are preserved
+
+define void @inreg_attr(%MyStruct* inreg %ptr) {
+  ret void
+}
+; CHECK: define void @inreg_attr(%MyStruct* inreg %ptr) {
+
+declare void @func_attrs() #0
+; CHECK: declare void @func_attrs() #0
+
+attributes #0 = { noreturn nounwind }
+; CHECK: attributes #0 = { noreturn nounwind }
diff --git a/test/Transforms/NaCl/expand-constantexpr.ll b/test/Transforms/NaCl/expand-constantexpr.ll
new file mode 100644
index 000000000000..3c5b1192edbd
--- /dev/null
+++ b/test/Transforms/NaCl/expand-constantexpr.ll
@@ -0,0 +1,109 @@
+; RUN: opt < %s -expand-constant-expr -S | FileCheck %s
+
+@global_var1 = global i32 123
+@global_var2 = global i32 123
+
+
+define i8* @constantexpr_bitcast() {
+  ret i8* bitcast (i32* @global_var1 to i8*)
+}
+; CHECK-LABEL: @constantexpr_bitcast
+; CHECK: %expanded = bitcast i32* @global_var1 to i8*
+; CHECK: ret i8* %expanded
+
+
+define i32 @constantexpr_nested() {
+  ret i32 add (i32 ptrtoint (i32* @global_var1 to i32),
+               i32 ptrtoint (i32* @global_var2 to i32))
+}
+; CHECK-LABEL: @constantexpr_nested
+; CHECK: %expanded1 = ptrtoint i32* @global_var1 to i32
+; CHECK: %expanded2 = ptrtoint i32* @global_var2 to i32
+; CHECK: %expanded = add i32 %expanded1, %expanded2
+; CHECK: ret i32 %expanded
+
+
+define i32 @constantexpr_nested2() {
+  ret i32 mul (i32 add (i32 ptrtoint (i32* @global_var1 to i32),
+                        i32 ptrtoint (i32* @global_var2 to i32)), i32 2)
+}
+; CHECK-LABEL: @constantexpr_nested2
+; CHECK: %expanded2 = ptrtoint i32* @global_var1 to i32
+; CHECK: %expanded3 = ptrtoint i32* @global_var2 to i32
+; CHECK: %expanded1 = add i32 %expanded2, %expanded3
+; CHECK: %expanded = mul i32 %expanded1, 2
+; CHECK: ret i32 %expanded
+
+
+define i32 @constantexpr_phi() {
+entry:
+  br label %label
+label:
+  %result = phi i32 [ ptrtoint (i32* @global_var1 to i32), %entry ]
+  ret i32 %result
+}
+; CHECK-LABEL: @constantexpr_phi
+; CHECK: entry:
+; CHECK: %expanded = ptrtoint i32* @global_var1 to i32
+; CHECK: br label %label
+; CHECK: label:
+; CHECK: %result = phi i32 [ %expanded, %entry ]
+
+
+; This tests that ExpandConstantExpr correctly handles a PHI node that
+; contains the same ConstantExpr twice.
+; Using replaceAllUsesWith() is not correct on a PHI node when the
+; new instruction has to be added to an incoming block.
+define i32 @constantexpr_phi_twice(i1 %arg) {
+  br i1 %arg, label %iftrue, label %iffalse
+iftrue:
+  br label %exit
+iffalse:
+  br label %exit
+exit:
+  %result = phi i32 [ ptrtoint (i32* @global_var1 to i32), %iftrue ],
+                    [ ptrtoint (i32* @global_var1 to i32), %iffalse ]
+  ret i32 %result
+}
+; CHECK-LABEL: @constantexpr_phi_twice
+; CHECK: iftrue:
+; CHECK: %expanded = ptrtoint i32* @global_var1 to i32
+; CHECK: iffalse:
+; CHECK: %expanded1 = ptrtoint i32* @global_var1 to i32
+; CHECK: exit:
+
+
+define i32 @constantexpr_phi_multiple_entry(i1 %arg) {
+entry:
+  br i1 %arg, label %done, label %done
+done:
+  %result = phi i32 [ ptrtoint (i32* @global_var1 to i32), %entry ],
+                    [ ptrtoint (i32* @global_var1 to i32), %entry ]
+  ret i32 %result
+}
+; CHECK-LABEL: @constantexpr_phi_multiple_entry
+; CHECK: entry:
+; CHECK: %expanded = ptrtoint i32* @global_var1 to i32
+; CHECK: br i1 %arg, label %done, label %done
+; CHECK: done:
+; CHECK: %result = phi i32 [ %expanded, %entry ], [ %expanded, %entry ]
+
+
+
+declare void @external_func()
+declare void @personality_func()
+
+define void @test_landingpad() {
+  invoke void @external_func() to label %ok unwind label %onerror
+ok:
+  ret void
+onerror:
+  %lp = landingpad i32
+      personality i8* bitcast (void ()* @personality_func to i8*)
+      catch i32* null
+  ret void
+}
+; landingpad can only accept a ConstantExpr, so this should remain
+; unmodified.
+; CHECK-LABEL: @test_landingpad
+; CHECK: personality i8* bitcast (void ()* @personality_func to i8*)
diff --git a/test/Transforms/NaCl/expand-ctors-empty.ll b/test/Transforms/NaCl/expand-ctors-empty.ll
new file mode 100644
index 000000000000..f0788a0873e4
--- /dev/null
+++ b/test/Transforms/NaCl/expand-ctors-empty.ll
@@ -0,0 +1,12 @@
+; Currently we do not define __{init,fini}_array_end as named aliases.
+; RUN: opt < %s -nacl-expand-ctors -S | FileCheck %s -check-prefix=NO_CTORS
+; NO_CTORS-NOT: __init_array_end
+; NO_CTORS-NOT: __fini_array_end
+
+; RUN: opt < %s -nacl-expand-ctors -S | FileCheck %s
+
+; If llvm.global_ctors is not present, it is treated as if it is an
+; empty array, and __{init,fini}_array_start are defined anyway.
+
+; CHECK: @__init_array_start = internal constant [0 x void ()*] zeroinitializer
+; CHECK: @__fini_array_start = internal constant [0 x void ()*] zeroinitializer
diff --git a/test/Transforms/NaCl/expand-ctors-emptylist.ll b/test/Transforms/NaCl/expand-ctors-emptylist.ll
new file mode 100644
index 000000000000..6ab68852b9d3
--- /dev/null
+++ b/test/Transforms/NaCl/expand-ctors-emptylist.ll
@@ -0,0 +1,13 @@
+; RUN: opt %s -nacl-expand-ctors -S | FileCheck %s -check-prefix=NO_CTORS
+; NO_CTORS-NOT: __init_array_end
+; NO_CTORS-NOT: __fini_array_end
+; NO_CTORS-NOT: llvm.global_ctors
+
+; RUN: opt %s -nacl-expand-ctors -S | FileCheck %s
+
+; Check that the pass works when the initializer is "[]", which gets
+; converted into "undef" by the reader.
+@llvm.global_ctors = appending global [0 x { i32, void ()* }] []
+
+; CHECK: @__init_array_start = internal constant [0 x void ()*] zeroinitializer
+; CHECK: @__fini_array_start = internal constant [0 x void ()*] zeroinitializer
diff --git a/test/Transforms/NaCl/expand-ctors-zeroinit.ll b/test/Transforms/NaCl/expand-ctors-zeroinit.ll
new file mode 100644
index 000000000000..824b2b23b72d
--- /dev/null
+++ b/test/Transforms/NaCl/expand-ctors-zeroinit.ll
@@ -0,0 +1,17 @@
+; Currently we do not define __{init,fini}_array_end as named aliases.
+; RUN: opt < %s -nacl-expand-ctors -S | FileCheck %s -check-prefix=NO_CTORS
+; NO_CTORS-NOT: __init_array_end
+; NO_CTORS-NOT: __fini_array_end
+
+; We expect this symbol to be removed:
+; RUN: opt < %s -nacl-expand-ctors -S | not grep llvm.global_ctors
+
+; RUN: opt < %s -nacl-expand-ctors -S | FileCheck %s
+
+; If llvm.global_ctors is zeroinitializer, it should be treated the
+; same as an empty array.
+
+@llvm.global_ctors = appending global [0 x { i32, void ()* }] zeroinitializer
+
+; CHECK: @__init_array_start = internal constant [0 x void ()*] zeroinitializer
+; CHECK: @__fini_array_start = internal constant [0 x void ()*] zeroinitializer
diff --git a/test/Transforms/NaCl/expand-ctors.ll b/test/Transforms/NaCl/expand-ctors.ll
new file mode 100644
index 000000000000..250abbc1bf90
--- /dev/null
+++ b/test/Transforms/NaCl/expand-ctors.ll
@@ -0,0 +1,37 @@
+; We expect these symbol names to be removed:
+; RUN: opt < %s -nacl-expand-ctors -S | FileCheck %s -check-prefix=NO_CTORS
+; NO_CTORS-NOT: llvm.global.ctors
+; NO_CTORS-NOT: __init_array_end
+; NO_CTORS-NOT: __fini_array_end
+
+; RUN: opt < %s -nacl-expand-ctors -S | FileCheck %s
+
+@llvm.global_ctors = appending global [3 x { i32, void ()* }]
+  [{ i32, void ()* } { i32 300, void ()* @init_func_A },
+   { i32, void ()* } { i32 100, void ()* @init_func_B },
+   { i32, void ()* } { i32 200, void ()* @init_func_C }]
+
+@__init_array_start = extern_weak global [0 x void ()*]
+@__init_array_end = extern_weak global [0 x void ()*]
+
+; CHECK: @__init_array_start = internal constant [3 x void ()*] [void ()* @init_func_B, void ()* @init_func_C, void ()* @init_func_A]
+; CHECK: @__fini_array_start = internal constant [0 x void ()*] zeroinitializer
+
+define void @init_func_A() { ret void }
+define void @init_func_B() { ret void }
+define void @init_func_C() { ret void }
+
+define [0 x void ()*]* @get_array_start() {
+  ret [0 x void ()*]* @__init_array_start;
+}
+; CHECK: @get_array_start()
+; CHECK: ret {{.*}} @__init_array_start
+
+define [0 x void ()*]* @get_array_end() {
+  ret [0 x void ()*]* @__init_array_end;
+}
+
+; @get_array_end() is converted to use a GetElementPtr that returns
+; the end of the generated array:
+; CHECK: @get_array_end()
+; CHECK: ret {{.*}} bitcast ([3 x void ()*]* getelementptr inbounds ([3 x void ()*]* @__init_array_start, i32 1)
diff --git a/test/Transforms/NaCl/expand-getelementptr.ll b/test/Transforms/NaCl/expand-getelementptr.ll
new file mode 100644
index 000000000000..9f5a4bd8d254
--- /dev/null
+++ b/test/Transforms/NaCl/expand-getelementptr.ll
@@ -0,0 +1,123 @@
+; RUN: opt < %s -expand-getelementptr -S | FileCheck %s
+
+target datalayout = "p:32:32:32"
+
+%MyStruct = type { i8, i32, i8 }
+%MyArray = type { [100 x i64] }
+%MyArrayOneByte = type { [100 x i8] }
+
+
+; Test indexing struct field
+define i8* @test_struct_field(%MyStruct* %ptr) {
+  %addr = getelementptr %MyStruct* %ptr, i32 0, i32 2
+  ret i8* %addr
+}
+; CHECK: @test_struct_field
+; CHECK-NEXT: %gep_int = ptrtoint %MyStruct* %ptr to i32
+; CHECK-NEXT: %gep = add i32 %gep_int, 8
+; CHECK-NEXT: %addr = inttoptr i32 %gep to i8*
+; CHECK-NEXT: ret i8* %addr
+
+
+; Test non-constant index into an array
+define i64* @test_array_index(%MyArray* %ptr, i32 %index) {
+  %addr = getelementptr %MyArray* %ptr, i32 0, i32 0, i32 %index
+  ret i64* %addr
+}
+; CHECK: @test_array_index
+; CHECK-NEXT: %gep_int = ptrtoint %MyArray* %ptr to i32
+; CHECK-NEXT: %gep_array = mul i32 %index, 8
+; CHECK-NEXT: %gep = add i32 %gep_int, %gep_array
+; CHECK-NEXT: %addr = inttoptr i32 %gep to i64*
+; CHECK-NEXT: ret i64* %addr
+
+
+; Test constant index into an array (as a pointer)
+define %MyStruct* @test_ptr_add(%MyStruct* %ptr) {
+  %addr = getelementptr %MyStruct* %ptr, i32 2
+  ret %MyStruct* %addr
+}
+; CHECK: @test_ptr_add
+; CHECK-NEXT: %gep_int = ptrtoint %MyStruct* %ptr to i32
+; CHECK-NEXT: %gep = add i32 %gep_int, 24
+; CHECK-NEXT: %addr = inttoptr i32 %gep to %MyStruct*
+; CHECK-NEXT: ret %MyStruct* %addr
+
+
+; Test that additions and multiplications are combined properly
+define i64* @test_add_and_index(%MyArray* %ptr, i32 %index) {
+  %addr = getelementptr %MyArray* %ptr, i32 1, i32 0, i32 %index
+  ret i64* %addr
+}
+; CHECK: @test_add_and_index
+; CHECK-NEXT: %gep_int = ptrtoint %MyArray* %ptr to i32
+; CHECK-NEXT: %gep = add i32 %gep_int, 800
+; CHECK-NEXT: %gep_array = mul i32 %index, 8
+; CHECK-NEXT: %gep1 = add i32 %gep, %gep_array
+; CHECK-NEXT: %addr = inttoptr i32 %gep1 to i64*
+; CHECK-NEXT: ret i64* %addr
+
+
+; Test that we don't multiply by 1 unnecessarily
+define i8* @test_add_and_index_one_byte(%MyArrayOneByte* %ptr, i32 %index) {
+  %addr = getelementptr %MyArrayOneByte* %ptr, i32 1, i32 0, i32 %index
+  ret i8* %addr
+}
+; CHECK: @test_add_and_index
+; CHECK-NEXT: %gep_int = ptrtoint %MyArrayOneByte* %ptr to i32
+; CHECK-NEXT: %gep = add i32 %gep_int, 100
+; CHECK-NEXT: %gep1 = add i32 %gep, %index
+; CHECK-NEXT: %addr = inttoptr i32 %gep1 to i8*
+; CHECK-NEXT: ret i8* %addr
+
+
+; Test >32-bit array index
+define i64* @test_array_index64(%MyArray* %ptr, i64 %index) {
+  %addr = getelementptr %MyArray* %ptr, i32 0, i32 0, i64 %index
+  ret i64* %addr
+}
+; CHECK: @test_array_index64
+; CHECK-NEXT: %gep_int = ptrtoint %MyArray* %ptr to i32
+; CHECK-NEXT: %gep_trunc = trunc i64 %index to i32
+; CHECK-NEXT: %gep_array = mul i32 %gep_trunc, 8
+; CHECK-NEXT: %gep = add i32 %gep_int, %gep_array
+; CHECK-NEXT: %addr = inttoptr i32 %gep to i64*
+; CHECK-NEXT: ret i64* %addr
+
+
+; Test <32-bit array index
+define i64* @test_array_index16(%MyArray* %ptr, i16 %index) {
+  %addr = getelementptr %MyArray* %ptr, i32 0, i32 0, i16 %index
+  ret i64* %addr
+}
+; CHECK: @test_array_index16
+; CHECK-NEXT: %gep_int = ptrtoint %MyArray* %ptr to i32
+; CHECK-NEXT: %gep_sext = sext i16 %index to i32
+; CHECK-NEXT: %gep_array = mul i32 %gep_sext, 8
+; CHECK-NEXT: %gep = add i32 %gep_int, %gep_array
+; CHECK-NEXT: %addr = inttoptr i32 %gep to i64*
+; CHECK-NEXT: ret i64* %addr
+
+
+; Test >32-bit constant array index
+define i64* @test_array_index64_const(%MyArray* %ptr) {
+  %addr = getelementptr %MyArray* %ptr, i32 0, i32 0, i64 100
+  ret i64* %addr
+}
+; CHECK: @test_array_index64_const
+; CHECK-NEXT: %gep_int = ptrtoint %MyArray* %ptr to i32
+; CHECK-NEXT: %gep = add i32 %gep_int, 800
+; CHECK-NEXT: %addr = inttoptr i32 %gep to i64*
+; CHECK-NEXT: ret i64* %addr
+
+
+; Test <32-bit constant array index -- test sign extension
+define i64* @test_array_index16_const(%MyArray* %ptr) {
+  %addr = getelementptr %MyArray* %ptr, i32 0, i32 0, i16 -100
+  ret i64* %addr
+}
+; CHECK: @test_array_index16_const
+; CHECK-NEXT: %gep_int = ptrtoint %MyArray* %ptr to i32
+; CHECK-NEXT: %gep = add i32 %gep_int, -800
+; CHECK-NEXT: %addr = inttoptr i32 %gep to i64*
+; CHECK-NEXT: ret i64* %addr
diff --git a/test/Transforms/NaCl/expand-small-arguments.ll b/test/Transforms/NaCl/expand-small-arguments.ll
new file mode 100644
index 000000000000..48a62d80d738
--- /dev/null
+++ b/test/Transforms/NaCl/expand-small-arguments.ll
@@ -0,0 +1,97 @@
+; RUN: opt %s -expand-small-arguments -S | FileCheck %s
+
+@var = global i8 0
+
+
+define void @small_arg(i8 %val) {
+  store i8 %val, i8* @var
+  ret void
+}
+; CHECK: define void @small_arg(i32 %val) {
+; CHECK-NEXT: %val.arg_trunc = trunc i32 %val to i8
+; CHECK-NEXT: store i8 %val.arg_trunc, i8* @var
+
+
+define i8 @small_result() {
+  %val = load i8* @var
+  ret i8 %val
+}
+; CHECK: define i32 @small_result() {
+; CHECK-NEXT: %val = load i8* @var
+; CHECK-NEXT: %val.ret_ext = zext i8 %val to i32
+; CHECK-NEXT: ret i32 %val.ret_ext
+
+define signext i8 @small_result_signext() {
+  %val = load i8* @var
+  ret i8 %val
+}
+; CHECK: define signext i32 @small_result_signext() {
+; CHECK-NEXT: %val = load i8* @var
+; CHECK-NEXT: %val.ret_ext = sext i8 %val to i32
+; CHECK-NEXT: ret i32 %val.ret_ext
+
+
+define void @call_small_arg() {
+  call void @small_arg(i8 100)
+  ret void
+}
+; CHECK: define void @call_small_arg() {
+; CHECK-NEXT: %arg_ext = zext i8 100 to i32
+; CHECK-NEXT: %.arg_cast = bitcast {{.*}} @small_arg
+; CHECK-NEXT: call void %.arg_cast(i32 %arg_ext)
+
+define void @call_small_arg_signext() {
+  call void @small_arg(i8 signext 100)
+  ret void
+}
+; CHECK: define void @call_small_arg_signext() {
+; CHECK-NEXT: %arg_ext = sext i8 100 to i32
+; CHECK-NEXT: %.arg_cast = bitcast {{.*}} @small_arg
+; CHECK-NEXT: call void %.arg_cast(i32 signext %arg_ext)
+
+
+define void @call_small_result() {
+  %r = call i8 @small_result()
+  store i8 %r, i8* @var
+  ret void
+}
+; CHECK: define void @call_small_result() {
+; CHECK-NEXT: %r.arg_cast = bitcast {{.*}} @small_result
+; CHECK-NEXT: %r = call i32 %r.arg_cast()
+; CHECK-NEXT: %r.ret_trunc = trunc i32 %r to i8
+; CHECK-NEXT: store i8 %r.ret_trunc, i8* @var
+
+
+; Check that various attributes are preserved.
+define i1 @attributes(i8 %arg) nounwind {
+  %r = tail call fastcc i1 @attributes(i8 %arg) nounwind
+  ret i1 %r
+}
+; CHECK: define i32 @attributes(i32 %arg) [[NOUNWIND:#[0-9]+]] {
+; CHECK: tail call fastcc i32 {{.*}} [[NOUNWIND]]
+
+
+; These arguments and results should be left alone.
+define i64 @larger_arguments(i32 %a, i64 %b, i8* %ptr, double %d) {
+  %r = call i64 @larger_arguments(i32 %a, i64 %b, i8* %ptr, double %d)
+  ret i64 %r
+}
+; CHECK: define i64 @larger_arguments(i32 %a, i64 %b, i8* %ptr, double %d) {
+; CHECK-NEXT: %r = call i64 @larger_arguments(i32 %a, i64 %b, i8* %ptr, double %d)
+; CHECK-NEXT: ret i64 %r
+
+
+; Intrinsics must be left alone since the pass cannot change their types.
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
+; CHECK: declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
+
+define void @intrinsic_call(i8* %ptr) {
+  call void @llvm.memset.p0i8.i32(i8* %ptr, i8 99, i32 256, i32 1, i1 0)
+  ret void
+}
+; CHECK: define void @intrinsic_call
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %ptr, i8 99,
+
+
+; CHECK: attributes [[NOUNWIND]] = { nounwind }
diff --git a/test/Transforms/NaCl/expand-struct-regs.ll b/test/Transforms/NaCl/expand-struct-regs.ll
new file mode 100644
index 000000000000..c1c1b3803b79
--- /dev/null
+++ b/test/Transforms/NaCl/expand-struct-regs.ll
@@ -0,0 +1,126 @@
+; RUN: opt < %s -expand-struct-regs -S | FileCheck %s
+; RUN: opt < %s -expand-struct-regs -S | FileCheck %s -check-prefix=CLEANUP
+
+; These two instructions should not appear in the output:
+; CLEANUP-NOT: extractvalue
+; CLEANUP-NOT: insertvalue
+
+%struct = type { i8, i32 }
+
+
+define void @struct_load(%struct* %p, i8* %out0, i32* %out1) {
+  %val = load %struct* %p
+  %field0 = extractvalue %struct %val, 0
+  %field1 = extractvalue %struct %val, 1
+  store i8 %field0, i8* %out0
+  store i32 %field1, i32* %out1
+  ret void
+}
+; CHECK-LABEL: define void @struct_load
+; CHECK-NEXT: %val.index{{.*}} = getelementptr %struct* %p, i32 0, i32 0
+; CHECK-NEXT: %val.field{{.*}} = load i8* %val.index{{.*}}, align 1
+; CHECK-NEXT: %val.index{{.*}} = getelementptr %struct* %p, i32 0, i32 1
+; CHECK-NEXT: %val.field{{.*}} = load i32* %val.index{{.*}}, align 1
+; CHECK-NEXT: store i8 %val.field{{.*}}, i8* %out0
+; CHECK-NEXT: store i32 %val.field{{.*}}, i32* %out1
+
+
+define void @struct_store(%struct* %in_ptr, %struct* %out_ptr) {
+  %val = load %struct* %in_ptr
+  store %struct %val, %struct* %out_ptr
+  ret void
+}
+; CHECK-LABEL: define void @struct_store
+; CHECK-NEXT: %val.index{{.*}} = getelementptr %struct* %in_ptr, i32 0, i32 0
+; CHECK-NEXT: %val.field{{.*}} = load i8* %val.index{{.*}}, align 1
+; CHECK-NEXT: %val.index{{.*}} = getelementptr %struct* %in_ptr, i32 0, i32 1
+; CHECK-NEXT: %val.field{{.*}} = load i32* %val.index{{.*}}, align 1
+; CHECK-NEXT: %out_ptr.index{{.*}} = getelementptr %struct* %out_ptr, i32 0, i32 0
+; CHECK-NEXT: store i8 %val.field{{.*}}, i8* %out_ptr.index{{.*}}, align 1
+; CHECK-NEXT: %out_ptr.index{{.*}} = getelementptr %struct* %out_ptr, i32 0, i32 1
+; CHECK-NEXT: store i32 %val.field{{.*}}, i32* %out_ptr.index{{.*}}, align 1
+
+
+; Ensure that the pass works correctly across basic blocks.
+define void @across_basic_block(%struct* %in_ptr, %struct* %out_ptr) {
+  %val = load %struct* %in_ptr
+  br label %bb
+bb:
+  store %struct %val, %struct* %out_ptr
+  ret void
+}
+; CHECK-LABEL: define void @across_basic_block
+; CHECK: load
+; CHECK: load
+; CHECK: bb:
+; CHECK: store
+; CHECK: store
+
+
+define void @const_struct_store(%struct* %ptr) {
+  store %struct { i8 99, i32 1234 }, %struct* %ptr
+  ret void
+}
+; CHECK-LABEL: define void @const_struct_store
+; CHECK: store i8 99
+; CHECK: store i32 1234
+
+
+define void @struct_phi_node(%struct* %ptr) {
+entry:
+  %val = load %struct* %ptr
+  br label %bb
+bb:
+  %phi = phi %struct [ %val, %entry ]
+  ret void
+}
+; CHECK-LABEL: bb:
+; CHECK-NEXT: %phi.index{{.*}} = phi i8 [ %val.field{{.*}}, %entry ]
+; CHECK-NEXT: %phi.index{{.*}} = phi i32 [ %val.field{{.*}}, %entry ]
+
+
+define void @struct_phi_node_multiple_entry(i1 %arg, %struct* %ptr) {
+entry:
+  %val = load %struct* %ptr
+  br i1 %arg, label %bb, label %bb
+bb:
+  %phi = phi %struct [ %val, %entry ], [ %val, %entry ]
+  ret void
+}
+; CHECK-LABEL: bb:
+; CHECK-NEXT: %phi.index{{.*}} = phi i8 [ %val.field{{.*}}, %entry ], [ %val.field{{.*}}, %entry ]
+; CHECK-NEXT: %phi.index{{.*}} = phi i32 [ %val.field{{.*}}, %entry ], [ %val.field{{.*}}, %entry ]
+
+
+define void @struct_select_inst(i1 %cond, %struct* %ptr1, %struct* %ptr2) {
+  %val1 = load %struct* %ptr1
+  %val2 = load %struct* %ptr2
+  %select = select i1 %cond, %struct %val1, %struct %val2
+  ret void
+}
+; CHECK-LABEL: define void @struct_select_inst
+; CHECK: %select.index{{.*}} = select i1 %cond, i8 %val1.field{{.*}}, i8 %val2.field{{.*}}
+; CHECK-NEXT: %select.index{{.*}} = select i1 %cond, i32 %val1.field{{.*}}, i32 %val2.field{{.*}}
+
+
+define void @insert_and_extract(i8* %out0, i32* %out1) {
+  %temp = insertvalue %struct undef, i8 100, 0
+  %sval = insertvalue %struct %temp, i32 200, 1
+  %field0 = extractvalue %struct %sval, 0
+  %field1 = extractvalue %struct %sval, 1
+  store i8 %field0, i8* %out0
+  store i32 %field1, i32* %out1
+  ret void
+}
+; CHECK-LABEL: define void @insert_and_extract(i8* %out0, i32* %out1) {
+; CHECK-NEXT: store i8 100, i8* %out0
+; CHECK-NEXT: store i32 200, i32* %out1
+; CHECK-NEXT: ret void
+
+
+define i32 @extract_from_constant() {
+  %ev = extractvalue %struct { i8 99, i32 888 }, 1
+  ret i32 %ev
+}
+; CHECK-LABEL: define i32 @extract_from_constant() {
+; CHECK-NEXT: ret i32 888
diff --git a/test/Transforms/NaCl/expand-tls-aligned.ll b/test/Transforms/NaCl/expand-tls-aligned.ll
new file mode 100644
index 000000000000..75f03ba306ff
--- /dev/null
+++ b/test/Transforms/NaCl/expand-tls-aligned.ll
@@ -0,0 +1,42 @@
+; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s
+
+target datalayout = "p:32:32:32"
+
+
+@var = global i32 123
+
+; Put this first to check that the pass handles BSS variables last.
+@bss_tvar_aligned = thread_local global i32 0, align 64
+
+@tvar1 = thread_local global i16 234
+; Test a pointer to check we are getting the right pointer size.
+@tvar2 = thread_local global i32* @var
+@tvar_aligned = thread_local global i8 99, align 32
+
+
+; CHECK: %tls_init_template = type <{ i16, [2 x i8], i32*, [24 x i8], i8 }>
+; CHECK: %tls_struct = type <{ %tls_init_template, %tls_bss_template }>
+
+; This struct type must be "packed" because the 31 byte padding here
+; is followed by an i32.
+; CHECK: %tls_bss_template = type <{ [31 x i8], i32, [60 x i8] }>
+
+; CHECK: @__tls_template_start = internal constant %tls_init_template <{ i16 234, [2 x i8] zeroinitializer, i32* @var, [24 x i8] zeroinitializer, i8 99 }>
+
+; CHECK: @__tls_template_alignment = internal constant i32 64
+
+
+; Create references to __tls_template_* to keep these live, otherwise
+; the definition of %tls_struct (which we check for above) is removed
+; from the output.
+
+@__tls_template_tdata_end = external global i8
+@__tls_template_end = external global i8
+
+define i8* @get_tls_template_tdata_end() {
+  ret i8* @__tls_template_tdata_end
+}
+
+define i8* @get_tls_template_end() {
+  ret i8* @__tls_template_end
+}
diff --git a/test/Transforms/NaCl/expand-tls-bss.ll b/test/Transforms/NaCl/expand-tls-bss.ll
new file mode 100644
index 000000000000..02504611f091
--- /dev/null
+++ b/test/Transforms/NaCl/expand-tls-bss.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s
+
+
+@tvar_bss1 = thread_local global i64 0
+@tvar_bss2 = thread_local global i32 0
+
+
+; CHECK: %tls_struct = type <{ %tls_init_template, %tls_bss_template }>
+; CHECK: %tls_bss_template = type <{ i64, i32, [4 x i8] }>
+
+
+define i64* @get_tvar_bss1() {
+  ret i64* @tvar_bss1
+}
+; CHECK: define i64* @get_tvar_bss1()
+; CHECK: %field = getelementptr %tls_struct* %tls_struct, i32 -1, i32 1, i32 0
+; CHECK: ret i64* %field
diff --git a/test/Transforms/NaCl/expand-tls-constexpr-alias.ll b/test/Transforms/NaCl/expand-tls-constexpr-alias.ll
new file mode 100644
index 000000000000..65daa5eacd4a
--- /dev/null
+++ b/test/Transforms/NaCl/expand-tls-constexpr-alias.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -nacl-expand-tls-constant-expr -S | FileCheck %s
+
+@real_tvar = thread_local global i32 123
+@tvar_alias = alias i32* @real_tvar
+@tvar_alias2 = alias i32* getelementptr (i32* @real_tvar, i32 100)
+
+
+define i32* @get_tvar() {
+  ret i32* @tvar_alias
+}
+; CHECK: define i32* @get_tvar()
+; CHECK: ret i32* @real_tvar
+
+
+define i32* @get_tvar2() {
+  ret i32* @tvar_alias2
+}
+; CHECK: define i32* @get_tvar2()
+; CHECK: %expanded = getelementptr i32* @real_tvar, i32 100
+; CHECK: ret i32* %expanded
+
+
+define i32* @get_tvar3() {
+  ret i32* getelementptr (i32* @tvar_alias2, i32 100)
+}
+; CHECK: define i32* @get_tvar3()
+; CHECK: %expanded = getelementptr i32* @real_tvar, i32 200
+; CHECK: ret i32* %expanded
diff --git a/test/Transforms/NaCl/expand-tls-constexpr.ll b/test/Transforms/NaCl/expand-tls-constexpr.ll
new file mode 100644
index 000000000000..67b1b493fd16
--- /dev/null
+++ b/test/Transforms/NaCl/expand-tls-constexpr.ll
@@ -0,0 +1,140 @@
+; RUN: opt < %s -nacl-expand-tls-constant-expr -S | FileCheck %s
+
+@tvar = thread_local global i32 0
+
+
+define i32 @test_converting_ptrtoint() {
+  ret i32 ptrtoint (i32* @tvar to i32)
+}
+; CHECK-LABEL: define i32 @test_converting_ptrtoint()
+; CHECK: %expanded = ptrtoint i32* @tvar to i32
+; CHECK: ret i32 %expanded
+
+
+define i32 @test_converting_add() {
+  ret i32 add (i32 ptrtoint (i32* @tvar to i32), i32 4)
+}
+; CHECK-LABEL: define i32 @test_converting_add()
+; CHECK: %expanded1 = ptrtoint i32* @tvar to i32
+; CHECK: %expanded = add i32 %expanded1, 4
+; CHECK: ret i32 %expanded
+
+
+define i32 @test_converting_multiple_operands() {
+  ret i32 add (i32 ptrtoint (i32* @tvar to i32),
+               i32 ptrtoint (i32* @tvar to i32))
+}
+; CHECK-LABEL: define i32 @test_converting_multiple_operands()
+; CHECK: %expanded1 = ptrtoint i32* @tvar to i32
+; CHECK: %expanded = add i32 %expanded1, %expanded1
+; CHECK: ret i32 %expanded
+
+
+define i32 @test_allocating_new_var_name(i32 %expanded) {
+  %result = add i32 %expanded, ptrtoint (i32* @tvar to i32)
+  ret i32 %result
+}
+; CHECK-LABEL: define i32 @test_allocating_new_var_name(i32 %expanded)
+; CHECK: %expanded1 = ptrtoint i32* @tvar to i32
+; CHECK: %result = add i32 %expanded, %expanded1
+; CHECK: ret i32 %result
+
+
+define i8* @test_converting_bitcast() {
+  ret i8* bitcast (i32* @tvar to i8*)
+}
+; CHECK-LABEL: define i8* @test_converting_bitcast()
+; CHECK: %expanded = bitcast i32* @tvar to i8*
+; CHECK: ret i8* %expanded
+
+
+define i32* @test_converting_getelementptr() {
+  ; Use an index >1 to ensure that "inbounds" is not added automatically.
+  ret i32* getelementptr (i32* @tvar, i32 2)
+}
+; CHECK-LABEL: define i32* @test_converting_getelementptr()
+; CHECK: %expanded = getelementptr i32* @tvar, i32 2
+; CHECK: ret i32* %expanded
+
+
+; This is identical to @test_converting_getelementptr().
+; We need to check that both copies of getelementptr are fixed.
+define i32* @test_converting_getelementptr_copy() {
+  ret i32* getelementptr (i32* @tvar, i32 2)
+}
+; CHECK-LABEL: define i32* @test_converting_getelementptr_copy()
+; CHECK: %expanded = getelementptr i32* @tvar, i32 2
+; CHECK: ret i32* %expanded
+
+
+define i32* @test_converting_getelementptr_inbounds() {
+  ret i32* getelementptr inbounds (i32* @tvar, i32 2)
+}
+; CHECK-LABEL: define i32* @test_converting_getelementptr_inbounds()
+; CHECK: %expanded = getelementptr inbounds i32* @tvar, i32 2
+; CHECK: ret i32* %expanded
+
+
+define i32* @test_converting_phi(i1 %cmp) {
+entry:
+  br i1 %cmp, label %return, label %else
+
+else:
+  br label %return
+
+return:
+  %result = phi i32* [ getelementptr (i32* @tvar, i32 1), %entry ], [ null, %else ]
+  ret i32* %result
+}
+; The converted ConstantExprs get pushed back into the PHI node's
+; incoming block, which might be suboptimal but works in all cases.
+; CHECK-LABEL: define i32* @test_converting_phi(i1 %cmp)
+; CHECK-LABEL: entry:
+; CHECK: %expanded = getelementptr inbounds i32* @tvar, i32 1
+; CHECK-LABEL: else:
+; CHECK-LABEL: return:
+; CHECK: %result = phi i32* [ %expanded, %entry ], [ null, %else ]
+
+
+@addr1 = global i8* blockaddress(@test_converting_phi_with_indirectbr, %return)
+@addr2 = global i8* blockaddress(@test_converting_phi_with_indirectbr, %else)
+define i32* @test_converting_phi_with_indirectbr(i8* %addr) {
+entry:
+  indirectbr i8* %addr, [ label %return, label %else ]
+
+else:
+  br label %return
+
+return:
+  %result = phi i32* [ getelementptr (i32* @tvar, i32 1), %entry ], [ null, %else ]
+  ret i32* %result
+}
+; CHECK-LABEL: define i32* @test_converting_phi_with_indirectbr(i8* %addr)
+; CHECK-LABEL: entry:
+; CHECK: %expanded = getelementptr inbounds i32* @tvar, i32 1
+; CHECK-LABEL: return:
+; CHECK: %result = phi i32* [ %expanded, %entry ], [ null, %else ]
+
+
+; This tests that ExpandTlsConstantExpr correctly handles a PHI node
+; that contains the same ConstantExpr twice.  Using
+; replaceAllUsesWith() is not correct on a PHI node when the new
+; instruction has to be added to an incoming block.
+define i32 @test_converting_phi_twice(i1 %arg) {
+  br i1 %arg, label %iftrue, label %iffalse
+iftrue:
+  br label %exit
+iffalse:
+  br label %exit
+exit:
+  %result = phi i32 [ ptrtoint (i32* @tvar to i32), %iftrue ],
+                    [ ptrtoint (i32* @tvar to i32), %iffalse ]
+  ret i32 %result
+}
+; CHECK-LABEL: define i32 @test_converting_phi_twice(i1 %arg)
+; CHECK-LABEL: iftrue:
+; CHECK: %expanded{{.*}} = ptrtoint i32* @tvar to i32
+; CHECK-LABEL: iffalse:
+; CHECK: %expanded{{.*}} = ptrtoint i32* @tvar to i32
+; CHECK-LABEL: exit:
+; CHECK: %result = phi i32 [ %expanded1, %iftrue ], [ %expanded, %iffalse ]
diff --git a/test/Transforms/NaCl/expand-tls-constexpr2.ll b/test/Transforms/NaCl/expand-tls-constexpr2.ll
new file mode 100644
index 000000000000..ca7054961b7f
--- /dev/null
+++ b/test/Transforms/NaCl/expand-tls-constexpr2.ll
@@ -0,0 +1,12 @@
+; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s
+
+@tvar = thread_local global i32 0
+
+define i32 @get_tvar() {
+  ret i32 ptrtoint (i32* @tvar to i32)
+}
+; CHECK: %tls_raw = call i8* @llvm.nacl.read.tp()
+; CHECK: %tls_struct = bitcast i8* %tls_raw to %tls_struct*
+; CHECK: %field = getelementptr %tls_struct* %tls_struct, i32 -1, i32 1, i32 0
+; CHECK: %expanded = ptrtoint i32* %field to i32
+; CHECK: ret i32 %expanded
diff --git a/test/Transforms/NaCl/expand-tls-phi.ll b/test/Transforms/NaCl/expand-tls-phi.ll
new file mode 100644
index 000000000000..4aa0a7a32cc5
--- /dev/null
+++ b/test/Transforms/NaCl/expand-tls-phi.ll
@@ -0,0 +1,60 @@
+; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s
+
+
+@tvar = thread_local global i32 123
+
+define i32* @get_tvar(i1 %cmp) {
+entry:
+  br i1 %cmp, label %return, label %else
+
+else:
+  br label %return
+
+return:
+  %result = phi i32* [ @tvar, %entry ], [ null, %else ]
+  ret i32* %result
+}
+; The TLS access gets pushed back into the PHI node's incoming block,
+; which might be suboptimal but works in all cases.
+; CHECK: define i32* @get_tvar(i1 %cmp) {
+; CHECK: entry:
+; CHECK: %field = getelementptr %tls_struct* %tls_struct, i32 -1, i32 0, i32 0
+; CHECK: else:
+; CHECK: return:
+; CHECK: %result = phi i32* [ %field, %entry ], [ null, %else ]
+
+
+; This tests that ExpandTls correctly handles a PHI node that contains
+; the same TLS variable twice.  Using replaceAllUsesWith() is not
+; correct on a PHI node when the new instruction has to be added to an
+; incoming block.
+define i32* @tls_phi_twice(i1 %arg) {
+  br i1 %arg, label %iftrue, label %iffalse
+iftrue:
+  br label %exit
+iffalse:
+  br label %exit
+exit:
+  %result = phi i32* [ @tvar, %iftrue ], [ @tvar, %iffalse ]
+  ret i32* %result
+}
+; CHECK: define i32* @tls_phi_twice(i1 %arg) {
+; CHECK: iftrue:
+; CHECK: %field{{.*}} = getelementptr %tls_struct* %tls_struct{{.*}}, i32 -1, i32 0, i32 0
+; CHECK: iffalse:
+; CHECK: %field{{.*}} = getelementptr %tls_struct* %tls_struct{{.*}}, i32 -1, i32 0, i32 0
+; CHECK: exit:
+; CHECK: %result = phi i32* [ %field{{.*}}, %iftrue ], [ %field{{.*}}, %iffalse ]
+
+
+; In this corner case, ExpandTls must expand out @tvar only once,
+; otherwise it will produce invalid IR.
+define i32* @tls_phi_multiple_entry(i1 %arg) {
+entry:
+  br i1 %arg, label %done, label %done
+done:
+  %result = phi i32* [ @tvar, %entry ], [ @tvar, %entry ]
+  ret i32* %result
+}
+; CHECK: define i32* @tls_phi_multiple_entry(i1 %arg) {
+; CHECK: %result = phi i32* [ %field, %entry ], [ %field, %entry ]
diff --git a/test/Transforms/NaCl/expand-tls.ll b/test/Transforms/NaCl/expand-tls.ll
new file mode 100644
index 000000000000..77d83357e144
--- /dev/null
+++ b/test/Transforms/NaCl/expand-tls.ll
@@ -0,0 +1,86 @@
+; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s
+
+; All thread-local variables should be removed
+; RUN: opt < %s -nacl-expand-tls -S | FileCheck %s -check-prefix=NO_TLS
+
+; NO_TLS-NOT: thread_local
+
+@tvar1 = thread_local global i64 123
+@tvar2 = thread_local global i32 456
+
+
+; CHECK: %tls_init_template = type <{ i64, i32 }>
+; CHECK: %tls_struct = type <{ %tls_init_template, %tls_bss_template }>
+; CHECK: %tls_bss_template = type <{ [4 x i8] }>
+
+
+; CHECK: @__tls_template_start = internal constant %tls_init_template <{ i64 123, i32 456 }>
+
+; CHECK: @__tls_template_alignment = internal constant i32 8
+
+
+define i64* @get_tvar1() {
+  ret i64* @tvar1
+}
+; CHECK-LABEL: define i64* @get_tvar1()
+; CHECK: %tls_raw = call i8* @llvm.nacl.read.tp()
+; CHECK: %tls_struct = bitcast i8* %tls_raw to %tls_struct*
+; CHECK: %field = getelementptr %tls_struct* %tls_struct, i32 -1, i32 0, i32 0
+; CHECK: ret i64* %field
+
+
+define i32* @get_tvar2() {
+  ret i32* @tvar2
+}
+; Much the same as for get_tvar1.
+; CHECK-LABEL: define i32* @get_tvar2()
+; CHECK: %field = getelementptr %tls_struct* %tls_struct, i32 -1, i32 0, i32 1
+
+
+; Check that we define global variables for TLS templates
+
+@__tls_template_start = external global i8
+@__tls_template_tdata_end = external global i8
+@__tls_template_end = external global i8
+
+define i8* @get_tls_template_start() {
+  ret i8* @__tls_template_start
+}
+; CHECK-LABEL: define i8* @get_tls_template_start()
+; CHECK: ret i8* bitcast (%tls_init_template* @__tls_template_start to i8*)
+
+define i8* @get_tls_template_tdata_end() {
+  ret i8* @__tls_template_tdata_end
+}
+; CHECK-LABEL: define i8* @get_tls_template_tdata_end()
+; CHECK: ret i8* bitcast (%tls_init_template* getelementptr inbounds (%tls_init_template* @__tls_template_start, i32 1) to i8*)
+
+define i8* @get_tls_template_end() {
+  ret i8* @__tls_template_end
+}
+; CHECK-LABEL: define i8* @get_tls_template_end()
+; CHECK: ret i8* bitcast (%tls_struct* getelementptr (%tls_struct* bitcast (%tls_init_template* @__tls_template_start to %tls_struct*), i32 1) to i8*)
+
+
+; Check that we expand out the TLS layout intrinsics
+
+declare i32 @llvm.nacl.tp.tls.offset(i32)
+declare i32 @llvm.nacl.tp.tdb.offset(i32)
+
+define i32 @test_get_tp_tls_offset(i32 %tls_size) {
+  %offset = call i32 @llvm.nacl.tp.tls.offset(i32 %tls_size)
+  ret i32 %offset
+}
+; Uses of the intrinsic are replaced with uses of a regular function.
+; CHECK-LABEL: define i32 @test_get_tp_tls_offset
+; CHECK: call i32 @nacl_tp_tls_offset
+; NO_TLS-NOT: llvm.nacl.tp.tls.offset
+
+define i32 @test_get_tp_tdb_offset(i32 %tdb_size) {
+  %offset = call i32 @llvm.nacl.tp.tdb.offset(i32 %tdb_size)
+  ret i32 %offset
+}
+; Uses of the intrinsic are replaced with uses of a regular function.
+; CHECK-LABEL: define i32 @test_get_tp_tdb_offset
+; CHECK: call i32 @nacl_tp_tdb_offset
+; NO_TLS-NOT: llvm.nacl.tp.tdb.offset
diff --git a/test/Transforms/NaCl/expand-varargs-attrs.ll b/test/Transforms/NaCl/expand-varargs-attrs.ll
new file mode 100644
index 000000000000..d95a572d6b2c
--- /dev/null
+++ b/test/Transforms/NaCl/expand-varargs-attrs.ll
@@ -0,0 +1,72 @@
+; RUN: opt < %s -expand-varargs -S | FileCheck %s
+
+declare i32 @varargs_func(i32 %arg, ...)
+
+
+; Check that attributes such as "byval" are preserved on fixed arguments.
+
+%MyStruct = type { i64, i64 }
+
+define void @func_with_arg_attrs(%MyStruct* byval, ...) {
+  ret void
+}
+; CHECK: define void @func_with_arg_attrs(%MyStruct* byval, i8* noalias %varargs) {
+
+
+declare void @take_struct_arg(%MyStruct* byval %s, ...)
+
+define void @call_with_arg_attrs(%MyStruct* %s) {
+  call void (%MyStruct*, ...)* @take_struct_arg(%MyStruct* byval %s)
+  ret void
+}
+; CHECK: define void @call_with_arg_attrs(%MyStruct* %s) {
+; CHECK: call void %vararg_func(%MyStruct* byval %s, <{ i32 }>* %vararg_buffer)
+
+
+; The "byval" attribute here should be dropped.
+define i32 @pass_struct_via_vararg1(%MyStruct* %s) {
+  %result = call i32 (i32, ...)* @varargs_func(i32 111, %MyStruct* byval %s)
+  ret i32 %result
+}
+; CHECK: define i32 @pass_struct_via_vararg1(%MyStruct* %s) {
+; CHECK: %result = call i32 %vararg_func(i32 111, <{ %MyStruct }>* %vararg_buffer)
+
+
+; The "byval" attribute here should be dropped.
+define i32 @pass_struct_via_vararg2(%MyStruct* %s) {
+  %result = call i32 (i32, ...)* @varargs_func(i32 111, i32 2, %MyStruct* byval %s)
+  ret i32 %result
+}
+; CHECK: define i32 @pass_struct_via_vararg2(%MyStruct* %s) {
+; CHECK: %result = call i32 %vararg_func(i32 111, <{ i32, %MyStruct }>* %vararg_buffer)
+
+
+; Check that return attributes such as "signext" are preserved.
+define i32 @call_with_return_attr() {
+  %result = call signext i32 (i32, ...)* @varargs_func(i32 111, i64 222)
+  ret i32 %result
+}
+; CHECK: define i32 @call_with_return_attr() {
+; CHECK: %result = call signext i32 %vararg_func(i32 111
+
+
+; Check that the "readonly" function attribute is preserved.
+define i32 @call_readonly() {
+  %result = call i32 (i32, ...)* @varargs_func(i32 111, i64 222) readonly
+  ret i32 %result
+}
+; CHECK: define i32 @call_readonly() {
+; CHECK: %result = call i32 %vararg_func(i32 111, {{.*}}) #1
+
+
+; Check that the "tail" attribute gets removed, because the callee
+; reads space alloca'd by the caller.
+define i32 @tail_call() {
+  %result = tail call i32 (i32, ...)* @varargs_func(i32 111, i64 222)
+  ret i32 %result
+}
+; CHECK: define i32 @tail_call() {
+; CHECK: %result = call i32 %vararg_func(i32 111
+
+
+; CHECK: attributes #1 = { readonly }
diff --git a/test/Transforms/NaCl/expand-varargs-struct.ll b/test/Transforms/NaCl/expand-varargs-struct.ll
new file mode 100644
index 000000000000..b96b41875c3a
--- /dev/null
+++ b/test/Transforms/NaCl/expand-varargs-struct.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -expand-varargs -S | FileCheck %s
+
+declare i32 @varargs_func(i32 %arg, ...)
+
+
+%MyStruct = type { i64, i64 }
+
+; Test passing a struct by value.
+define i32 @varargs_call_struct(%MyStruct* %ptr) {
+  %result = call i32 (i32, ...)* @varargs_func(i32 111, i64 222, %MyStruct* byval %ptr)
+  ret i32 %result
+}
+; CHECK: define i32 @varargs_call_struct(%MyStruct* %ptr) {
+; CHECK: %vararg_ptr1 = getelementptr <{ i64, %MyStruct }>* %vararg_buffer, i32 0, i32 1
+; CHECK: %1 = bitcast %MyStruct* %vararg_ptr1 to i8*
+; CHECK: %2 = bitcast %MyStruct* %ptr to i8*
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 16, i32 1, i1 false)
diff --git a/test/Transforms/NaCl/expand-varargs.ll b/test/Transforms/NaCl/expand-varargs.ll
new file mode 100644
index 000000000000..56e722a9a8c4
--- /dev/null
+++ b/test/Transforms/NaCl/expand-varargs.ll
@@ -0,0 +1,126 @@
+; RUN: opt < %s -expand-varargs -S | FileCheck %s
+
+%va_list = type i8*
+
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_end(i8*)
+declare void @llvm.va_copy(i8*, i8*)
+
+declare i32 @outside_func(i32 %arg, %va_list* %args)
+
+define i32 @varargs_func(i32 %arg, ...) {
+  %arglist_alloc = alloca %va_list
+  %arglist = bitcast %va_list* %arglist_alloc to i8*
+
+  call void @llvm.va_start(i8* %arglist)
+  %result = call i32 @outside_func(i32 %arg, %va_list* %arglist_alloc)
+  call void @llvm.va_end(i8* %arglist)
+  ret i32 %result
+}
+; CHECK: define i32 @varargs_func(i32 %arg, i8* noalias %varargs) {
+; CHECK-NEXT: %arglist_alloc = alloca i8*
+; CHECK-NEXT: %arglist = bitcast i8** %arglist_alloc to i8*
+; CHECK-NEXT: %arglist1 = bitcast i8* %arglist to i8**
+; CHECK-NEXT: store i8* %varargs, i8** %arglist1
+; CHECK-NEXT: %result = call i32 @outside_func(i32 %arg, i8** %arglist_alloc)
+; CHECK-NEXT: ret i32 %result
+
+
+define i32 @varargs_call1() {
+  %result = call i32 (i32, ...)* @varargs_func(i32 111, i64 222, i32 333)
+  ret i32 %result
+}
+; CHECK: define i32 @varargs_call1() {
+; CHECK-NEXT: %vararg_buffer = alloca <{ i64, i32 }>
+; CHECK-NEXT: %vararg_lifetime_bitcast = bitcast <{ i64, i32 }>* %vararg_buffer to i8*
+; CHECK-NEXT: call void @llvm.lifetime.start(i64 12, i8* %vararg_lifetime_bitcast)
+; CHECK-NEXT: %vararg_ptr = getelementptr <{ i64, i32 }>* %vararg_buffer, i32 0, i32 0
+; CHECK-NEXT: store i64 222, i64* %vararg_ptr
+; CHECK-NEXT: %vararg_ptr1 = getelementptr <{ i64, i32 }>* %vararg_buffer, i32 0, i32 1
+; CHECK-NEXT: store i32 333, i32* %vararg_ptr1
+; CHECK-NEXT: %vararg_func = bitcast i32 (i32, ...)* bitcast (i32 (i32, i8*)* @varargs_func to i32 (i32, ...)*) to i32 (i32, <{ i64, i32 }>*)*
+; CHECK-NEXT: %result = call i32 %vararg_func(i32 111, <{ i64, i32 }>* %vararg_buffer)
+; CHECK-NEXT: call void @llvm.lifetime.end(i64 12, i8* %vararg_lifetime_bitcast)
+; CHECK-NEXT: ret i32 %result
+
+
+; Check that the pass works when there are no variable arguments.
+define i32 @call_with_zero_varargs() {
+  %result = call i32 (i32, ...)* @varargs_func(i32 111)
+  ret i32 %result
+}
+; CHECK: define i32 @call_with_zero_varargs() {
+; We have a dummy i32 field to deal with buggy programs:
+; CHECK-NEXT: %vararg_buffer = alloca <{ i32 }>
+; CHECK: %vararg_func = bitcast i32 (i32, ...)* bitcast (i32 (i32, i8*)* @varargs_func to i32 (i32, ...)*) to i32 (i32, <{ i32 }>*)*
+; CHECK-NEXT: %result = call i32 %vararg_func(i32 111, <{ i32 }>* %vararg_buffer)
+
+
+; Check that "invoke" instructions are expanded out too.
+define i32 @varargs_invoke() {
+  %result = invoke i32 (i32, ...)* @varargs_func(i32 111, i64 222)
+      to label %cont unwind label %lpad
+cont:
+  ret i32 %result
+lpad:
+  %lp = landingpad { i8*, i32 } personality i8* null cleanup
+  ret i32 0
+}
+; CHECK: @varargs_invoke
+; CHECK: %result = invoke i32 %vararg_func(i32 111, <{ i64 }>* %vararg_buffer)
+; CHECK-NEXT: to label %cont unwind label %lpad
+; CHECK: cont:
+; CHECK-NEXT: call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast)
+; CHECK: lpad:
+; CHECK: call void @llvm.lifetime.end(i64 8, i8* %vararg_lifetime_bitcast)
+
+
+define void @varargs_multiple_calls() {
+  %call1 = call i32 (i32, ...)* @varargs_func(i32 11, i64 22, i32 33)
+  %call2 = call i32 (i32, ...)* @varargs_func(i32 44, i64 55, i32 66)
+  ret void
+}
+; CHECK: @varargs_multiple_calls()
+; The added allocas should appear at the start of the function.
+; CHECK: %vararg_buffer{{.*}} = alloca <{ i64, i32 }>
+; CHECK: %vararg_buffer{{.*}} = alloca <{ i64, i32 }>
+; CHECK: %call1 = call i32 %vararg_func{{.*}}(i32 11, <{ i64, i32 }>* %vararg_buffer{{.*}})
+; CHECK: %call2 = call i32 %vararg_func{{.*}}(i32 44, <{ i64, i32 }>* %vararg_buffer{{.*}})
+
+
+define i32 @va_arg_i32(i8* %arglist) {
+  %result = va_arg i8* %arglist, i32
+  ret i32 %result
+}
+; CHECK: define i32 @va_arg_i32(i8* %arglist) {
+; CHECK-NEXT: %arglist1 = bitcast i8* %arglist to i32**
+; CHECK-NEXT: %arglist_current = load i32** %arglist1
+; CHECK-NEXT: %result = load i32* %arglist_current
+; CHECK-NEXT: %arglist_next = getelementptr i32* %arglist_current, i32 1
+; CHECK-NEXT: store i32* %arglist_next, i32** %arglist1
+; CHECK-NEXT: ret i32 %result
+
+
+define i64 @va_arg_i64(i8* %arglist) {
+  %result = va_arg i8* %arglist, i64
+  ret i64 %result
+}
+; CHECK: define i64 @va_arg_i64(i8* %arglist) {
+; CHECK-NEXT: %arglist1 = bitcast i8* %arglist to i64**
+; CHECK-NEXT: %arglist_current = load i64** %arglist1
+; CHECK-NEXT: %result = load i64* %arglist_current
+; CHECK-NEXT: %arglist_next = getelementptr i64* %arglist_current, i32 1
+; CHECK-NEXT: store i64* %arglist_next, i64** %arglist1
+; CHECK-NEXT: ret i64 %result
+
+
+define void @do_va_copy(i8* %dest, i8* %src) {
+  call void @llvm.va_copy(i8* %dest, i8* %src)
+  ret void
+}
+; CHECK: define void @do_va_copy(i8* %dest, i8* %src) {
+; CHECK-NEXT: %vacopy_src = bitcast i8* %src to i8**
+; CHECK-NEXT: %vacopy_dest = bitcast i8* %dest to i8**
+; CHECK-NEXT: %vacopy_currentptr = load i8** %vacopy_src
+; CHECK-NEXT: store i8* %vacopy_currentptr, i8** %vacopy_dest
+; CHECK-NEXT: ret void
diff --git a/test/Transforms/NaCl/flatten-globals.ll b/test/Transforms/NaCl/flatten-globals.ll
new file mode 100644
index 000000000000..2d3d224b52ed
--- /dev/null
+++ b/test/Transforms/NaCl/flatten-globals.ll
@@ -0,0 +1,200 @@
+; RUN: opt -flatten-globals %s -S | FileCheck %s
+; RUN: opt -flatten-globals %s -S | FileCheck %s -check-prefix=CLEANED
+
+target datalayout = "p:32:32:32"
+
+
+; Check simple cases
+
+@var_i32 = global i32 258
+; CHECK: @var_i32 = global [4 x i8] c"\02\01\00\00"
+; CLEANED-NOT: global i32 258
+
+@external_var = external global i32
+; CHECK: @external_var = external global [4 x i8]
+
+@zero_init = global i32 0
+; CHECK: @zero_init = global [4 x i8] zeroinitializer
+
+@big_zero_init = global [2000 x i8] zeroinitializer
+; CHECK: @big_zero_init = global [2000 x i8] zeroinitializer
+
+@null_ptr = global i32* null
+; CHECK: @null_ptr = global [4 x i8] zeroinitializer
+
+@undef_value = global i32 undef
+; CHECK: @undef_value = global [4 x i8] zeroinitializer
+
+
+; Check various data types
+
+@var_i1 = global i8 1
+; CHECK: @var_i1 = global [1 x i8] c"\01"
+
+@var_i8 = global i8 65
+; CHECK: @var_i8 = global [1 x i8] c"A"
+
+@var_i16 = global i16 258
+; CHECK: @var_i16 = global [2 x i8] c"\02\01"
+
+@var_i64 = global i64 72623859790382856
+; CHECK: @var_i64 = global [8 x i8] c"\08\07\06\05\04\03\02\01"
+
+@var_i128 = global i128 1339673755198158349044581307228491536
+; CHECK: @var_i128 = global [16 x i8] c"\10\0F\0E\0D\0C\0B\0A\09\08\07\06\05\04\03\02\01"
+
+; Check that padding bits come out as zero.
+@var_i121 = global i121 1339673755198158349044581307228491536
+; CHECK: @var_i121 = global [16 x i8] c"\10\0F\0E\0D\0C\0B\0A\09\08\07\06\05\04\03\02\01"
+
+@var_double = global double 123.456
+; CHECK: @var_double = global [8 x i8] c"w\BE\9F\1A/\DD^@"
+
+@var_float = global float 123.0
+; CHECK: @var_float = global [4 x i8] c"\00\00\F6B"
+
+
+; Check aggregates
+
+@padded_struct = global { i8, i8, i32 } { i8 65, i8 66, i32 258 }
+; CHECK: @padded_struct = global [8 x i8] c"AB\00\00\02\01\00\00"
+
+@packed_struct = global <{ i8, i8, i32 }> <{ i8 67, i8 68, i32 258 }>
+; CHECK: @packed_struct = global [6 x i8] c"CD\02\01\00\00"
+
+@i8_array = global [6 x i8] c"Hello\00"
+; CHECK: @i8_array = global [6 x i8] c"Hello\00"
+
+@i16_array = global [3 x i16] [ i16 1, i16 2, i16 3 ]
+; CHECK: @i16_array = global [6 x i8] c"\01\00\02\00\03\00"
+
+%s = type { i8, i8 }
+@struct_array = global [2 x %s] [%s { i8 1, i8 2 }, %s { i8 3, i8 4 }]
+; CHECK: @struct_array = global [4 x i8] c"\01\02\03\04"
+
+@vector = global <2 x i32> <i32 259, i32 520>
+; CHECK: @vector = global [8 x i8] c"\03\01\00\00\08\02\00\00"
+
+
+; Check that various attributes are preserved
+
+@constant_var = constant i32 259
+; CHECK: @constant_var = constant [4 x i8] c"\03\01\00\00"
+
+@weak_external_var = extern_weak global i32
+; CHECK: @weak_external_var = extern_weak global [4 x i8]
+
+@tls_var = external thread_local global i32
+; CHECK: @tls_var = external thread_local global [4 x i8]
+
+@aligned_var = global i32 260, align 8
+; CHECK: @aligned_var = global [4 x i8] c"\04\01\00\00", align 8
+
+
+; Check alignment handling
+
+@implicit_alignment_i32 = global i32 zeroinitializer
+; CHECK: @implicit_alignment_i32 = global [4 x i8] zeroinitializer, align 4
+
+@implicit_alignment_double = global double zeroinitializer
+; CHECK: @implicit_alignment_double = global [8 x i8] zeroinitializer, align 8
+
+; FlattenGlobals is not allowed to increase the alignment of the
+; variable when an explicit section is specified (although PNaCl does
+; not support this attribute).
+@lower_alignment_section = global i32 0, section "mysection", align 1
+; CHECK: @lower_alignment_section = global [4 x i8] zeroinitializer, section "mysection", align 1
+
+; FlattenGlobals could increase the alignment when no section is
+; specified, but it does not.
+@lower_alignment = global i32 0, align 1
+; CHECK: @lower_alignment = global [4 x i8] zeroinitializer, align 1
+
+
+; Check handling of global references
+
+@var1 = external global i32
+@var2 = external global i8
+
+%ptrs1 = type { i32*, i8*, i32 }
+@ptrs1 = global %ptrs1 { i32* @var1, i8* null, i32 259 }
+; CHECK: @ptrs1 = global <{ i32, [8 x i8] }> <{ i32 ptrtoint ([4 x i8]* @var1 to i32), [8 x i8] c"\00\00\00\00\03\01\00\00" }>
+
+%ptrs2 = type { i32, i32*, i8* }
+@ptrs2 = global %ptrs2 { i32 259, i32* @var1, i8* @var2 }
+; CHECK: @ptrs2 = global <{ [4 x i8], i32, i32 }> <{ [4 x i8] c"\03\01\00\00", i32 ptrtoint ([4 x i8]* @var1 to i32), i32 ptrtoint ([1 x i8]* @var2 to i32) }>
+
+%ptrs3 = type { i32*, [3 x i8], i8* }
+@ptrs3 = global %ptrs3 { i32* @var1, [3 x i8] c"foo", i8* @var2 }
+; CHECK: @ptrs3 = global <{ i32, [4 x i8], i32 }> <{ i32 ptrtoint ([4 x i8]* @var1 to i32), [4 x i8] c"foo\00", i32 ptrtoint ([1 x i8]* @var2 to i32) }>
+
+@ptr = global i32* @var1
+; CHECK: @ptr = global i32 ptrtoint ([4 x i8]* @var1 to i32)
+
+@func_ptr = global i32* ()* @get_address
+; CHECK: @func_ptr = global i32 ptrtoint (i32* ()* @get_address to i32)
+
+@block_addr = global i8* blockaddress(@func_with_block, %label)
+; CHECK: @block_addr = global i32 ptrtoint (i8* blockaddress(@func_with_block, %label) to i32)
+
+@vector_reloc = global <2 x i32*> <i32* @var1, i32* @var1>
+; CHECK: global <{ i32, i32 }> <{ i32 ptrtoint ([4 x i8]* @var1 to i32), i32 ptrtoint ([4 x i8]* @var1 to i32) }>
+
+
+; Global references with addends
+
+@reloc_addend = global i32* getelementptr (%ptrs1* @ptrs1, i32 0, i32 2)
+; CHECK: @reloc_addend = global i32 add (i32 ptrtoint (<{ i32, [8 x i8] }>* @ptrs1 to i32), i32 8)
+
+@negative_addend = global %ptrs1* getelementptr (%ptrs1* @ptrs1, i32 -1)
+; CHECK: @negative_addend = global i32 add (i32 ptrtoint (<{ i32, [8 x i8] }>* @ptrs1 to i32), i32 -12)
+
+@const_ptr = global i32* getelementptr (%ptrs1* null, i32 0, i32 2)
+; CHECK: @const_ptr = global [4 x i8] c"\08\00\00\00"
+
+@int_to_ptr = global i32* inttoptr (i16 260 to i32*)
+; CHECK: @int_to_ptr = global [4 x i8] c"\04\01\00\00"
+
+; Clang allows "(uintptr_t) &var" as a global initializer, so we
+; handle this case.
+@ptr_to_int = global i32 ptrtoint (i8* @var2 to i32)
+; CHECK: @ptr_to_int = global i32 ptrtoint ([1 x i8]* @var2 to i32)
+
+; This is handled via Constant folding.  The getelementptr is
+; converted to an undef when it is created, so the pass does not see a
+; getelementptr here.
+@undef_gep = global i32* getelementptr (%ptrs1* undef, i32 0, i32 2)
+; CHECK: @undef_gep = global [4 x i8] zeroinitializer
+
+; Adding an offset to a function address isn't useful, but check that
+; the pass handles it anyway.
+@func_addend = global i8* getelementptr (
+    i8* bitcast (void ()* @func_with_block to i8*), i32 123)
+; CHECK: @func_addend = global i32 add (i32 ptrtoint (void ()* @func_with_block to i32), i32 123)
+
+; Similarly, adding an offset to a label address isn't useful, but
+; check it anyway.
+@block_addend = global i8* getelementptr (
+    i8* blockaddress(@func_with_block, %label), i32 100)
+; CHECK: @block_addend = global i32 add (i32 ptrtoint (i8* blockaddress(@func_with_block, %label) to i32), i32 100)
+
+
+; Special cases
+
+; Leave vars with "appending" linkage alone.
+@appending = appending global [1 x i32*] [i32* @var1]
+; CHECK: @appending = appending global [1 x i32*] [i32* bitcast ([4 x i8]* @var1 to i32*)]
+
+
+define i32* @get_address() {
+  ret i32* @var_i32
+}
+; CHECK: define i32* @get_address() {
+; CHECK-NEXT: ret i32* bitcast ([4 x i8]* @var_i32 to i32*)
+
+
+define void @func_with_block() {
+  br label %label
+label:
+  ret void
+}
diff --git a/test/Transforms/NaCl/globalcleanup.ll b/test/Transforms/NaCl/globalcleanup.ll
new file mode 100644
index 000000000000..5f976e22da6a
--- /dev/null
+++ b/test/Transforms/NaCl/globalcleanup.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -nacl-global-cleanup -S | FileCheck %s
+; RUN: opt < %s -nacl-global-cleanup -S | FileCheck -check-prefix=GV %s
+
+@llvm.compiler.used = appending global [0 x i8*] zeroinitializer, section "llvm.metadata"
+@llvm.used = appending global [0 x i8*] zeroinitializer, section "llvm.metadata"
+
+; GV-NOT: llvm.used
+; GV-NOT: llvm.compiler.used
+
+@extern_weak_const = extern_weak constant i32
+@extern_weak_gv = extern_weak global i32
+
+; GV-NOT: @extern_weak_const
+; GV-NOT: @extern_weak_gv
+
+; CHECK: @weak_gv = internal global
+@weak_gv = weak global i32 0
+
+; CHECK: define void @_start
+define void @_start() {
+  ret void
+}
+
+define i32* @ewgv() {
+; CHECK: %bc = getelementptr i8* null, i32 0
+  %bc = getelementptr i8* bitcast (i32* @extern_weak_gv to i8*), i32 0
+; CHECK: ret i32* null
+  ret i32* @extern_weak_gv
+}
+
+define i32* @ewc() {
+; CHECK: %bc = getelementptr i8* null, i32 0
+  %bc = getelementptr i8* bitcast (i32* @extern_weak_const to i8*), i32 0
+; CHECK: ret i32* null
+  ret i32* @extern_weak_gv
+}
+
+; Make sure @weak_gv is actually used.
+define i32* @wgv() {
+; CHECK: ret i32* @weak_gv
+  ret i32* @weak_gv
+}
+
+; GV-NOT: @extern_weak_func
+declare extern_weak i32 @extern_weak_func()
+; CHECK: @ewf
+define i32 @ewf() {
+; CHECK: %ret = call i32 null()
+  %ret = call i32 @extern_weak_func()
+  ret i32 %ret
+}
+
+; CHECK: define internal void @weak_func
+define weak void @weak_func() {
+  ret void
+}
diff --git a/test/Transforms/NaCl/lit.local.cfg b/test/Transforms/NaCl/lit.local.cfg
new file mode 100644
index 000000000000..a43fd3ebdd5a
--- /dev/null
+++ b/test/Transforms/NaCl/lit.local.cfg
@@ -0,0 +1,3 @@
+# -*- Python -*-
+
+config.suffixes = ['.ll']
diff --git a/test/Transforms/NaCl/pnacl-abi-internalize-symbols.ll b/test/Transforms/NaCl/pnacl-abi-internalize-symbols.ll
new file mode 100644
index 000000000000..76cac8a4bc51
--- /dev/null
+++ b/test/Transforms/NaCl/pnacl-abi-internalize-symbols.ll
@@ -0,0 +1,22 @@
+; RUN: opt %s -pnacl-abi-simplify-preopt -S | FileCheck %s
+
+; Checks that PNaCl ABI pre-opt simplification correctly internalizes
+; symbols except _start.
+
+target datalayout = "p:32:32:32"
+
+define void @main() {
+; CHECK: define internal void @main
+  ret void
+}
+
+define external void @foobarbaz() {
+; CHECK: define internal void @foobarbaz
+  ret void
+}
+
+define void @_start() {
+; CHECK: define void @_start
+  ret void
+}
+
diff --git a/test/Transforms/NaCl/pnacl-abi-simplify-postopt.ll b/test/Transforms/NaCl/pnacl-abi-simplify-postopt.ll
new file mode 100644
index 000000000000..74b7f8cf9b0e
--- /dev/null
+++ b/test/Transforms/NaCl/pnacl-abi-simplify-postopt.ll
@@ -0,0 +1,23 @@
+; RUN: opt %s -pnacl-abi-simplify-postopt -S | FileCheck %s
+; RUN: opt %s -pnacl-abi-simplify-postopt -S \
+; RUN:     | FileCheck %s -check-prefix=CLEANUP
+
+; "-pnacl-abi-simplify-postopt" runs various passes which are tested
+; thoroughly in other *.ll files.  This file is a smoke test to check
+; that the passes work together OK.
+
+target datalayout = "p:32:32:32"
+
+@var = global i32 256
+; CHECK: @var = global [4 x i8]
+
+define i16 @read_var() {
+  %val = load i16* bitcast (i32* @var to i16*)
+  ret i16 %val
+}
+; CHECK: = bitcast [4 x i8]* @var
+; CHECK-NEXT: load i16*
+
+; Check that dead prototypes are successfully removed.
+declare void @unused_prototype(i8*)
+; CLEANUP-NOT: unused_prototype
diff --git a/test/Transforms/NaCl/pnacl-abi-simplify-preopt.ll b/test/Transforms/NaCl/pnacl-abi-simplify-preopt.ll
new file mode 100644
index 000000000000..487e87aa17df
--- /dev/null
+++ b/test/Transforms/NaCl/pnacl-abi-simplify-preopt.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -pnacl-abi-simplify-preopt -S | FileCheck %s
+
+; "-pnacl-abi-simplify-preopt" runs various passes which are tested
+; thoroughly in other *.ll files.  This file is a smoke test to check
+; that "-pnacl-abi-simplify-preopt" runs what it's supposed to run.
+
+target datalayout = "p:32:32:32"
+
+declare void @ext_func()
+
+
+define void @invoke_func() {
+  invoke void @ext_func() to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  %lp = landingpad { i8*, i32 } personality i8* null cleanup
+  ret void
+}
+; CHECK-NOT: invoke void @ext_func()
+; CHECK-NOT: landingpad
+
+
+define void @varargs_func(...) {
+  ret void
+}
+; CHECK-NOT: @varargs_func(...)
+
+
+@llvm.global_ctors = appending global [0 x { i32, void ()* }] zeroinitializer
+; CHECK-NOT: @llvm.global_ctors
+
+@tls_var = thread_local global i32 0
+; CHECK-NOT: thread_local
+
+@alias = alias i32* @tls_var
+; CHECK-NOT: @alias
+
+@weak_ref = extern_weak global i8*
+; CHECK-NOT: extern_weak
diff --git a/test/Transforms/NaCl/pnacl-eh-exception-info.ll b/test/Transforms/NaCl/pnacl-eh-exception-info.ll
new file mode 100644
index 000000000000..478bc97d3f19
--- /dev/null
+++ b/test/Transforms/NaCl/pnacl-eh-exception-info.ll
@@ -0,0 +1,127 @@
+; RUN: opt %s -pnacl-sjlj-eh -S | FileCheck %s
+
+; Example std::type_info objects.
+@exc_typeid1 = external global i8
+@exc_typeid2 = external global i8
+@exc_typeid3 = external global i8
+
+; This must be declared for "-pnacl-sjlj-eh" to work.
+@__pnacl_eh_stack = external thread_local global i8*
+
+declare i32 @llvm.eh.typeid.for(i8*)
+
+declare void @external_func()
+
+
+@__pnacl_eh_type_table = external global i8*
+@__pnacl_eh_action_table = external global i8*
+@__pnacl_eh_filter_table = external global i8*
+
+; CHECK: %action_table_entry = type { i32, i32 }
+
+; CHECK: @__pnacl_eh_type_table = internal constant [4 x i8*] [i8* @exc_typeid1, i8* @exc_typeid2, i8* @exc_typeid3, i8* null]
+
+; CHECK: @__pnacl_eh_action_table = internal constant [7 x %action_table_entry] [%action_table_entry { i32 3, i32 0 }, %action_table_entry { i32 2, i32 1 }, %action_table_entry { i32 1, i32 2 }, %action_table_entry { i32 -1, i32 0 }, %action_table_entry { i32 -2, i32 0 }, %action_table_entry { i32 4, i32 0 }, %action_table_entry zeroinitializer]
+
+; CHECK: @__pnacl_eh_filter_table = internal constant [5 x i32] [i32 0, i32 2, i32 3, i32 1, i32 0]
+
+
+; Exception type pointers are allocated IDs which specify the index
+; into __pnacl_eh_type_table where the type may be found.
+define void @test_eh_typeid(i32 %arg) {
+  %id1 = call i32 @llvm.eh.typeid.for(i8* @exc_typeid1)
+  %id2 = call i32 @llvm.eh.typeid.for(i8* @exc_typeid2)
+  %id3 = call i32 @llvm.eh.typeid.for(i8* @exc_typeid3)
+  %cmp1 = icmp eq i32 %arg, %id1
+  %cmp2 = icmp eq i32 %arg, %id2
+  %cmp3 = icmp eq i32 %arg, %id3
+  ret void
+}
+; CHECK: define void @test_eh_typeid
+; CHECK-NEXT: %cmp1 = icmp eq i32 %arg, 1
+; CHECK-NEXT: %cmp2 = icmp eq i32 %arg, 2
+; CHECK-NEXT: %cmp3 = icmp eq i32 %arg, 3
+; CHECK-NEXT: ret void
+
+
+define void @test_single_catch_clause() {
+  invoke void @external_func() to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  landingpad i32 personality i8* null
+      catch i8* @exc_typeid3
+  ret void
+}
+; CHECK: define void @test_single_catch_clause
+; CHECK: store i32 1, i32* %exc_info_ptr
+
+
+define void @test_multiple_catch_clauses() {
+  invoke void @external_func() to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  landingpad i32 personality i8* null
+      catch i8* @exc_typeid1
+      catch i8* @exc_typeid2
+      catch i8* @exc_typeid3
+  ret void
+}
+; CHECK: define void @test_multiple_catch_clauses
+; CHECK: store i32 3, i32* %exc_info_ptr
+
+
+define void @test_empty_filter_clause() {
+  invoke void @external_func() to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  landingpad i32 personality i8* null
+      filter [0 x i8*] []
+  ret void
+}
+; CHECK: define void @test_empty_filter_clause
+; CHECK: store i32 4, i32* %exc_info_ptr
+
+
+define void @test_filter_clause() {
+  invoke void @external_func() to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  landingpad i32 personality i8* null
+      filter [3 x i8*] [i8* @exc_typeid2,
+                        i8* @exc_typeid3,
+                        i8* @exc_typeid1]
+  ret void
+}
+; CHECK: define void @test_filter_clause
+; CHECK: store i32 5, i32* %exc_info_ptr
+
+
+; "catch i8* null" means that any C++ exception matches.
+define void @test_catch_all_clause() {
+  invoke void @external_func() to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  landingpad i32 personality i8* null
+      catch i8* null
+  ret void
+}
+; CHECK: define void @test_catch_all_clause
+; CHECK: store i32 6, i32* %exc_info_ptr
+
+
+define void @test_cleanup_clause() {
+  invoke void @external_func() to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  landingpad i32 personality i8* null
+      cleanup
+  ret void
+}
+; CHECK: define void @test_cleanup_clause
+; CHECK: store i32 7, i32* %exc_info_ptr
diff --git a/test/Transforms/NaCl/pnacl-sjlj-eh-bug.ll b/test/Transforms/NaCl/pnacl-sjlj-eh-bug.ll
new file mode 100644
index 000000000000..e0796881285c
--- /dev/null
+++ b/test/Transforms/NaCl/pnacl-sjlj-eh-bug.ll
@@ -0,0 +1,81 @@
+; RUN: opt %s -pnacl-sjlj-eh -O2 -S | FileCheck %s
+
+; datalayout must be specified for GVN to work.
+target datalayout = "p:32:32:32"
+
+; This must be declared for expanding "invoke" and "landingpad" instructions.
+@__pnacl_eh_stack = external thread_local global i8*
+
+declare i1 @might_be_setjmp()
+declare void @external_func(i32* %ptr)
+declare void @var_is_nonzero()
+
+
+; Test for a bug in which PNaClSjLjEH would transform
+; @invoke_optimize_test() such that the call to @var_is_nonzero()
+; could get optimized away by a later optimization pass.  This
+; happened because PNaClSjLjEH generated code similar to
+; @branch_optimize_test() below.
+
+define void @invoke_optimize_test() {
+  %var = alloca i32
+  store i32 0, i32* %var
+
+  invoke void @external_func(i32* %var)
+      to label %exit unwind label %lpad
+
+lpad:
+  landingpad i32 personality i8* null
+      catch i8* null
+  %value = load i32* %var
+  %is_zero = icmp eq i32 %value, 0
+  br i1 %is_zero, label %exit, label %do_call
+
+do_call:
+  call void @var_is_nonzero()
+  ret void
+
+exit:
+  ret void
+}
+; CHECK: define void @invoke_optimize_test()
+; CHECK: @var_is_nonzero()
+
+
+; In @branch_optimize_test(), the optimizer can optimize away the call
+; to @var_is_nonzero(), because it can assume that %var always
+; contains 0 on the "iffalse" branch.
+;
+; The passes "-gvn -instcombine" are enough to do this.
+;
+; The optimizer can do this regardless of whether @might_be_setjmp()
+; is setjmp() or a normal function.  It doesn't need to know that
+; @might_be_setjmp() might return twice, because storing to %var
+; between setjmp() and longjmp() leaves %var pointing to an undefined
+; value.
+
+define void @branch_optimize_test() {
+  %var = alloca i32
+  store i32 0, i32* %var
+
+  %cond = call i1 @might_be_setjmp() returns_twice
+  br i1 %cond, label %iftrue, label %iffalse
+
+iftrue:
+  call void @external_func(i32* %var)
+  ret void
+
+iffalse:
+  %value = load i32* %var
+  %is_zero = icmp eq i32 %value, 0
+  br i1 %is_zero, label %exit, label %do_call
+
+do_call:
+  call void @var_is_nonzero()
+  ret void
+
+exit:
+  ret void
+}
+; CHECK: define void @branch_optimize_test()
+; CHECK-NOT: @var_is_nonzero
diff --git a/test/Transforms/NaCl/pnacl-sjlj-eh.ll b/test/Transforms/NaCl/pnacl-sjlj-eh.ll
new file mode 100644
index 000000000000..566a19a14905
--- /dev/null
+++ b/test/Transforms/NaCl/pnacl-sjlj-eh.ll
@@ -0,0 +1,173 @@
+; RUN: opt %s -pnacl-sjlj-eh -S | FileCheck %s
+
+; This must be declared for expanding "invoke" and "landingpad" instructions.
+@__pnacl_eh_stack = external thread_local global i8*
+
+; This must be declared for expanding "resume" instructions.
+declare void @__pnacl_eh_resume(i32* %exception)
+
+declare i32 @external_func(i64 %arg)
+declare void @external_func_void()
+declare i32 @my_setjmp()
+
+
+; CHECK: %ExceptionFrame = type { [1024 x i8], %ExceptionFrame*, i32 }
+
+define i32 @invoke_test(i64 %arg) {
+  %result = invoke i32 @external_func(i64 %arg)
+      to label %cont unwind label %lpad
+cont:
+  ret i32 %result
+lpad:
+  %lp = landingpad { i8*, i32 } personality i8* null cleanup
+  ret i32 999
+}
+; CHECK-LABEL: define i32 @invoke_test
+; CHECK-NEXT: %invoke_result_ptr = alloca i32
+; CHECK-NEXT: %pnacl_eh_stack = bitcast i8** @__pnacl_eh_stack to %ExceptionFrame**
+; CHECK-NEXT: %invoke_frame = alloca %ExceptionFrame, align 8
+; CHECK-NEXT: %invoke_jmp_buf = getelementptr %ExceptionFrame* %invoke_frame, i32 0, i32 0, i32 0
+; CHECK-NEXT: %invoke_next = getelementptr %ExceptionFrame* %invoke_frame, i32 0, i32 1
+; CHECK-NEXT: %exc_info_ptr = getelementptr %ExceptionFrame* %invoke_frame, i32 0, i32 2
+; CHECK-NEXT: %old_eh_stack = load %ExceptionFrame** %pnacl_eh_stack
+; CHECK-NEXT: store %ExceptionFrame* %old_eh_stack, %ExceptionFrame** %invoke_next
+; CHECK-NEXT: store i32 {{[0-9]+}}, i32* %exc_info_ptr
+; CHECK-NEXT: store %ExceptionFrame* %invoke_frame, %ExceptionFrame** %pnacl_eh_stack
+; CHECK-NEXT: %invoke_is_exc = call i32 @invoke_test_setjmp_caller(i64 %arg, i32 (i64)* @external_func, i8* %invoke_jmp_buf, i32* %invoke_result_ptr)
+; CHECK-NEXT: %result = load i32* %invoke_result_ptr
+; CHECK-NEXT: store %ExceptionFrame* %old_eh_stack, %ExceptionFrame** %pnacl_eh_stack
+; CHECK-NEXT: %invoke_sj_is_zero = icmp eq i32 %invoke_is_exc, 0
+; CHECK-NEXT: br i1 %invoke_sj_is_zero, label %cont, label %lpad
+; CHECK-LABEL: cont:
+; CHECK-NEXT: ret i32 %result
+; CHECK-LABEL: lpad:
+; CHECK-NEXT: %landingpad_ptr = bitcast i8* %invoke_jmp_buf to { i8*, i32 }*
+; CHECK-NEXT: %lp = load { i8*, i32 }* %landingpad_ptr
+; CHECK-NEXT: ret i32 999
+
+; Check definition of helper function:
+; CHECK-LABEL: define internal i32 @invoke_test_setjmp_caller(i64 %arg, i32 (i64)* %func_ptr, i8* %jmp_buf, i32* %result_ptr) {
+; CHECK-NEXT: %invoke_sj = call i32 @llvm.nacl.setjmp(i8* %jmp_buf) [[RETURNS_TWICE:#[0-9]+]]
+; CHECK-NEXT: %invoke_sj_is_zero = icmp eq i32 %invoke_sj, 0
+; CHECK-NEXT: br i1 %invoke_sj_is_zero, label %normal, label %exception
+; CHECK-LABEL: normal:
+; CHECK-NEXT: %result = call i32 %func_ptr(i64 %arg)
+; CHECK-NEXT: store i32 %result, i32* %result_ptr
+; CHECK-NEXT: ret i32 0
+; CHECK-LABEL: exception:
+; CHECK-NEXT: ret i32 1
+
+
+; A landingpad block may be used by multiple "invoke" instructions.
+define i32 @shared_landingpad(i64 %arg) {
+  %result1 = invoke i32 @external_func(i64 %arg)
+      to label %cont1 unwind label %lpad
+cont1:
+  %result2 = invoke i32 @external_func(i64 %arg)
+      to label %cont2 unwind label %lpad
+cont2:
+  ret i32 %result2
+lpad:
+  %lp = landingpad { i8*, i32 } personality i8* null cleanup
+  ret i32 999
+}
+; CHECK-LABEL: define i32 @shared_landingpad
+; CHECK: br i1 %invoke_sj_is_zero{{[0-9]*}}, label %cont1, label %lpad
+; CHECK: br i1 %invoke_sj_is_zero{{[0-9]*}}, label %cont2, label %lpad
+
+
+; Check that the pass can handle a landingpad appearing before an invoke.
+define i32 @landingpad_before_invoke() {
+  ret i32 123
+
+dead_block:
+  %lp = landingpad i32 personality i8* null cleanup
+  ret i32 %lp
+}
+; CHECK-LABEL: define i32 @landingpad_before_invoke
+; CHECK: %lp = load i32* %landingpad_ptr
+
+
+; Test the expansion of the "resume" instruction.
+define void @test_resume({ i8*, i32 } %arg) {
+  resume { i8*, i32 } %arg
+}
+; CHECK-LABEL: define void @test_resume
+; CHECK-NEXT: %resume_exc = extractvalue { i8*, i32 } %arg, 0
+; CHECK-NEXT: %resume_cast = bitcast i8* %resume_exc to i32*
+; CHECK-NEXT: call void @__pnacl_eh_resume(i32* %resume_cast)
+; CHECK-NEXT: unreachable
+
+
+; Check that call attributes are preserved.
+define i32 @call_attrs(i64 %arg) {
+  %result = invoke fastcc i32 @external_func(i64 inreg %arg) noreturn
+      to label %cont unwind label %lpad
+cont:
+  ret i32 %result
+lpad:
+  %lp = landingpad { i8*, i32 } personality i8* null cleanup
+  ret i32 999
+}
+; CHECK-LABEL: define i32 @call_attrs
+; CHECK: %result = call fastcc i32 %func_ptr(i64 inreg %arg) [[NORETURN:#[0-9]+]]
+
+
+; If the PNaClSjLjEH pass needs to insert any instructions into the
+; non-exceptional path, check that PHI nodes are updated correctly.
+; (An earlier version needed to do this, but the current version
+; doesn't.)
+define i32 @invoke_with_phi_nodes(i64 %arg) {
+entry:
+  %result = invoke i32 @external_func(i64 %arg)
+      to label %cont unwind label %lpad
+cont:
+  %cont_phi = phi i32 [ 100, %entry ]
+  ret i32 %cont_phi
+lpad:
+  %lpad_phi = phi i32 [ 200, %entry ]
+  %lp = landingpad { i8*, i32 } personality i8* null cleanup
+  ret i32 %lpad_phi
+}
+; CHECK-LABEL: define i32 @invoke_with_phi_nodes
+; CHECK-LABEL: cont:
+; CHECK-NEXT: %cont_phi = phi i32 [ 100, %entry ]
+; CHECK-NEXT: ret i32 %cont_phi
+; CHECK-LABEL: lpad:
+; CHECK-NEXT: %lpad_phi = phi i32 [ 200, %entry ]
+; CHECK: ret i32 %lpad_phi
+
+
+; Test "void" result type from "invoke".  This requires special
+; handling because void* is not a valid type.
+define void @invoke_void_result() {
+  invoke void @external_func_void() to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  landingpad i32 personality i8* null cleanup
+  ret void
+}
+; CHECK-LABEL: define void @invoke_void_result()
+; "%result_ptr" argument is omitted from the helper function:
+; CHECK-LABEL: define internal i32 @invoke_void_result_setjmp_caller(void ()* %func_ptr, i8* %jmp_buf)
+
+
+; A call to setjmp() cannot be moved into a helper function, so test
+; that it isn't moved.
+define void @invoke_setjmp() {
+  %x = invoke i32 @my_setjmp() returns_twice to label %cont unwind label %lpad
+cont:
+  ret void
+lpad:
+  landingpad i32 personality i8* null cleanup
+  ret void
+}
+; CHECK-LABEL: define void @invoke_setjmp()
+; CHECK-NOT: call
+; CHECK: %x = call i32 @my_setjmp() [[RETURNS_TWICE]]
+; CHECK-NEXT: br label %cont
+
+
+; CHECK: attributes [[RETURNS_TWICE]] = { returns_twice }
+; CHECK: attributes [[NORETURN]] = { noreturn }
diff --git a/test/Transforms/NaCl/promote-i1-ops.ll b/test/Transforms/NaCl/promote-i1-ops.ll
new file mode 100644
index 000000000000..245004b6813d
--- /dev/null
+++ b/test/Transforms/NaCl/promote-i1-ops.ll
@@ -0,0 +1,77 @@
+; RUN: opt %s -nacl-promote-i1-ops -S | FileCheck %s
+
+; Test that the PromoteI1Ops pass expands out i1 loads/stores and i1
+; comparison and arithmetic operations, with the exception of "and",
+; "or" and "xor".
+
+
+; i1 loads and stores are converted to i8 load and stores with
+; explicit casts.
+
+define i1 @load(i1* %ptr) {
+  %val = load i1* %ptr
+  ret i1 %val
+}
+; CHECK: define i1 @load
+; CHECK-NEXT: %ptr.i8ptr = bitcast i1* %ptr to i8*
+; CHECK-NEXT: %val.pre_trunc = load i8* %ptr.i8ptr
+; CHECK-NEXT: %val = trunc i8 %val.pre_trunc to i1
+
+define void @store(i1 %val, i1* %ptr) {
+  store i1 %val, i1* %ptr
+  ret void
+}
+; CHECK: define void @store
+; CHECK-NEXT: %ptr.i8ptr = bitcast i1* %ptr to i8*
+; CHECK-NEXT: %val.expand_i1_val = zext i1 %val to i8
+; CHECK-NEXT: store i8 %val.expand_i1_val, i8* %ptr.i8ptr
+
+
+; i1 arithmetic and comparisons are converted to their i8 equivalents
+; with explicit casts.
+
+define i1 @add(i1 %x, i1 %y) {
+  %result = add i1 %x, %y
+  ret i1 %result
+}
+; CHECK: define i1 @add
+; CHECK-NEXT: %x.expand_i1_val = zext i1 %x to i8
+; CHECK-NEXT: %y.expand_i1_val = zext i1 %y to i8
+; CHECK-NEXT: %result.pre_trunc = add i8 %x.expand_i1_val, %y.expand_i1_val
+; CHECK-NEXT: %result = trunc i8 %result.pre_trunc to i1
+
+define i1 @compare(i1 %x, i1 %y) {
+  %result = icmp slt i1 %x, %y
+  ret i1 %result
+}
+; CHECK: define i1 @compare
+; CHECK-NEXT: %x.expand_i1_val = sext i1 %x to i8
+; CHECK-NEXT: %y.expand_i1_val = sext i1 %y to i8
+; CHECK-NEXT: %result = icmp slt i8 %x.expand_i1_val, %y.expand_i1_val
+
+
+; Non-shift bitwise operations should not be modified.
+define void @bitwise_ops(i1 %x, i1 %y) {
+  %and = and i1 %x, %y
+  %or = or i1 %x, %y
+  %xor = xor i1 %x, %y
+  ret void
+}
+; CHECK: define void @bitwise_ops
+; CHECK-NEXT: %and = and i1 %x, %y
+; CHECK-NEXT: %or = or i1 %x, %y
+; CHECK-NEXT: %xor = xor i1 %x, %y
+
+
+define void @unchanged_cases(i32 %x, i32 %y, i32* %ptr) {
+  %add = add i32 %x, %y
+  %cmp = icmp slt i32 %x, %y
+  %val = load i32* %ptr
+  store i32 %x, i32* %ptr
+  ret void
+}
+; CHECK: define void @unchanged_cases
+; CHECK-NEXT: %add = add i32 %x, %y
+; CHECK-NEXT: %cmp = icmp slt i32 %x, %y
+; CHECK-NEXT: %val = load i32* %ptr
+; CHECK-NEXT: store i32 %x, i32* %ptr
diff --git a/test/Transforms/NaCl/promote-integers.ll b/test/Transforms/NaCl/promote-integers.ll
new file mode 100644
index 000000000000..1067e25fd58d
--- /dev/null
+++ b/test/Transforms/NaCl/promote-integers.ll
@@ -0,0 +1,400 @@
+; RUN: opt < %s -nacl-promote-ints -S | FileCheck %s
+
+declare void @consume_i16(i16 %a)
+
+; CHECK: @sext_to_illegal
+; CHECK-NEXT: %a40 = sext i32 %a to i64
+; (0xFFFFFFFFFF)
+define void @sext_to_illegal(i32 %a) {
+  %a40 = sext i32 %a to i40
+  ret void
+}
+
+; CHECK; @sext_from_illegal
+define void @sext_from_illegal(i8 %a) {
+; CHECK: call void @consume_i16(i16 -2)
+  %c12 = sext i12 -2 to i16
+  call void @consume_i16(i16 %c12)
+; CHECK: %a12 = sext i8 %a to i16
+  %a12 = sext i8 %a to i12
+; CHECK: %a12.getsign = shl i16 %a12, 4
+; CHECK-NEXT: %a16 = ashr i16 %a12.getsign, 4
+  %a16 = sext i12 %a12 to i16
+; CHECK: %a12.getsign1 = shl i16 %a12, 4
+; CHECK-NEXT: %a14 = ashr i16 %a12.getsign1, 4
+; (0x3FFF)
+  %a14 = sext i12 %a12 to i14
+; CHECK-NEXT: %a12.getsign2 = shl i16 %a12, 4
+; CHECK-NEXT: %a12.signed = ashr i16 %a12.getsign2, 4
+; CHECK-NEXT: %a24 = sext i16 %a12.signed to i32
+; (0xFFFFFF)
+  %a24 = sext i12 %a12 to i24
+
+  %a37 = zext i8 %a to i37
+; CHECK: %a37.getsign = shl i64 %a37, 27
+; CHECK-NEXT: %a64 = ashr i64 %a37.getsign, 27
+  %a64 = sext i37 %a37 to i64
+  ret void
+}
+
+; CHECK: @zext_to_illegal
+define void @zext_to_illegal(i32 %a) {
+; CHECK: zext i32 %a to i64
+; CHECK-NOT: and
+  %a40 = zext i32 %a to i40
+  ret void
+}
+
+; CHECK: @zext_from_illegal
+define void @zext_from_illegal(i8 %a) {
+; get some illegal values to start with
+  %a24 = zext i8 %a to i24
+  %a40 = zext i8 %a to i40
+  %a18 = zext i8 %a to i18
+
+; CHECK: %a32 = and i32 %a24, 16777215
+; (0xFFFFFF)
+  %a32 = zext i24 %a24 to i32
+
+; CHECK: %b24 = and i32 %a18, 262143
+; (0x3FFFF)
+  %b24 = zext i18 %a18 to i24
+
+; CHECK: %a24.clear = and i32 %a24, 16777215
+; CHECK: %b40 = zext i32 %a24.clear to i64
+  %b40 = zext i24 %a24 to i40
+
+; CHECK: call void @consume_i16(i16 4094)
+  %c16 = zext i12 -2 to i16
+  call void @consume_i16(i16 %c16)
+; CHECK: call void @consume_i16(i16 4094)
+  %c14 = zext i12 -2 to i14
+  %c16.2 = zext i14 %c14 to i16
+  call void @consume_i16(i16 %c16.2)
+  ret void
+}
+
+; CHECK: @trunc_from_illegal
+define void @trunc_from_illegal(i8 %a) {
+  %a24 = zext i8 %a to i24
+; CHECK: %a16 = trunc i32 %a24 to i16
+  %a16 = trunc i24 %a24 to i16
+  ret void
+}
+
+; CHECK: @trunc_to_illegal
+define void @trunc_to_illegal(i8 %a8) {
+  %a = zext i8 %a8 to i32
+; CHECK-NOT: trunc i32 %a
+; CHECK-NOT: and
+  %a24 = trunc i32 %a to i24
+
+; CHECK: %a12 = trunc i32 %a24 to i16
+; CHECK-NOT: and
+  %a12 = trunc i24 %a24 to i12
+  ret void
+}
+
+; CHECK: @icmpsigned
+define void @icmpsigned(i32 %a) {
+  %shl = trunc i32 %a to i24
+; CHECK: %shl.getsign = shl i32 %shl, 8
+; CHECK-NEXT: %shl.signed = ashr i32 %shl.getsign, 8
+; CHECK-NEXT: %cmp = icmp slt i32 %shl.signed, -2
+  %cmp = icmp slt i24 %shl, -2
+  ret void
+}
+
+%struct.ints = type { i32, i32 }
+; CHECK: @bc1
+; CHECK: bc1 = bitcast i32* %a to i64*
+; CHECK-NEXT: bc2 = bitcast i64* %bc1 to i32*
+; CHECK-NEXT: bc3 = bitcast %struct.ints* null to i64*
+; CHECK-NEXT: bc4 = bitcast i64* %bc1 to %struct.ints*
+define i32* @bc1(i32* %a) {
+  %bc1 = bitcast i32* %a to i40*
+  %bc2 = bitcast i40* %bc1 to i32*
+  %bc3 = bitcast %struct.ints* null to i40*
+  %bc4 = bitcast i40* %bc1 to %struct.ints*
+  ret i32* %bc2
+}
+
+; CHECK: zext i32 %a to i64
+; CHECK: and i64 %a40, 255
+define void @and1(i32 %a) {
+  %a40 = zext i32 %a to i40
+  %and = and i40 %a40, 255
+  ret void
+}
+
+; CHECK: @andi3
+define void @andi3(i8 %a) {
+  %a3 = trunc i8 %a to i3
+; CHECK: and i8 %a3, 2
+  %and = and i3 %a3, 2
+  ret void
+}
+
+; CHECK: @ori7
+define void @ori7(i8 %a, i8 %b) {
+  %a7 = trunc i8 %a to i7
+  %b7 = trunc i8 %b to i7
+; CHECK: %or = or i8 %a7, %b7
+  %or = or i7 %a7, %b7
+  ret void
+}
+
+; CHECK: @add1
+define void @add1(i16 %a) {
+; CHECK-NEXT: %a24 = sext i16 %a to i32
+  %a24 = sext i16 %a to i24
+; CHECK-NEXT: %sum = add i32 %a24, 16777214
+  %sum = add i24 %a24, -2
+; CHECK-NEXT: %sumnsw = add nsw i32 %a24, 16777214
+  %sumnsw = add nsw i24 %a24, -2
+; CHECK-NEXT: %sumnuw = add nuw i32 %a24, 16777214
+  %sumnuw = add nuw i24 %a24, -2
+; CHECK-NEXT: %sumnw = add nuw nsw i32 %a24, 16777214
+  %sumnw = add nuw nsw i24 %a24, -2
+  ret void
+}
+
+; CHECK: @mul1
+define void @mul1(i32 %a, i32 %b) {
+; CHECK-NEXT: %a33 = sext i32 %a to i64
+  %a33 = sext i32 %a to i33
+; CHECK-NEXT: %b33 = sext i32 %b to i64
+  %b33 = sext i32 %b to i33
+; CHECK-NEXT: %product = mul i64 %a33, %b33
+  %product = mul i33 %a33, %b33
+; CHECK-NEXT: %prodnw = mul nuw nsw i64 %a33, %b33
+  %prodnw = mul nuw nsw i33 %a33, %b33
+  ret void
+}
+
+; CHECK: @shl1
+define void @shl1(i16 %a) {
+  %a24 = zext i16 %a to i24
+; CHECK: %ashl = shl i32 %a24, 5
+  %ashl = shl i24 %a24, 5
+
+; CHECK-NEXT: %ashl2 = shl i32 %a24, 1
+  %ashl2 = shl i24 %a24, 4278190081 ;0xFF000001
+
+  %b24 = zext i16 %a to i24
+; CHECK: %b24.clear = and i32 %b24, 16777215
+; CHECK-NEXT: %bshl = shl i32 %a24, %b24.clear
+  %bshl = shl i24 %a24, %b24
+  ret void
+}
+
+; CHECK: @shlnuw
+define void @shlnuw(i16 %a) {
+  %a12 = trunc i16 %a to i12
+; CHECK: %ashl = shl nuw i16 %a12, 5
+  %ashl = shl nuw i12 %a12, 5
+  ret void
+}
+
+; CHECK: @lshr1
+define void @lshr1(i16 %a) {
+  %a24 = zext i16 %a to i24
+; CHECK: %a24.clear = and i32 %a24, 16777215
+; CHECK-NEXT: %b = lshr i32 %a24.clear, 20
+  %b = lshr i24 %a24, 20
+; CHECK-NEXT: %a24.clear1 = and i32 %a24, 16777215
+; CHECK-NEXT: %c = lshr i32 %a24.clear1, 5
+  %c = lshr i24 %a24, 5
+
+  %b24 = zext i16 %a to i24
+  %d = lshr i24 %a24, %b24
+; CHECK: %a24.clear2 = and i32 %a24, 16777215
+; CHECK-NEXT: %b24.clear = and i32 %b24, 16777215
+; CHECK-NEXT: %d = lshr i32 %a24.clear2, %b24.clear
+  ret void
+}
+
+; CHECK: @ashr1
+define void @ashr1(i16 %a) {
+  %a24 = sext i16 %a to i24
+; CHECK: %a24.getsign = shl i32 %a24, 8
+; CHECK-NEXT: %b24 = ashr i32 %a24.getsign, 19
+  %b24 = ashr i24 %a24, 11
+; CHECK-NEXT: %a24.getsign1 = shl i32 %a24, 8
+; CHECK-NEXT: %b24.clear = and i32 %b24, 16777215
+; CHECK-NEXT: %a24.shamt = add i32 %b24.clear, 8
+; CHECK-NEXT: %c = ashr i32 %a24.getsign1, %a24.shamt
+  %c = ashr i24 %a24, %b24
+  ret void
+}
+
+; CHECK: @phi_icmp
+define void @phi_icmp(i32 %a) {
+entry:
+  br label %loop
+loop:
+; CHECK: %phi40 = phi i64 [ 1099511627774, %entry ], [ %phi40, %loop ]
+  %phi40 = phi i40 [ -2, %entry ],  [ %phi40, %loop ]
+; CHECK-NEXT: %phi40.clear = and i64 %phi40, 1099511627775
+; CHECK-NEXT: %b = icmp eq i64 %phi40.clear, 1099511627775
+  %b = icmp eq i40 %phi40, -1
+; CHECK-NEXT: br i1 %b, label %loop, label %end
+  br i1 %b, label %loop, label %end
+end:
+  ret void
+}
+
+; CHECK: @icmp_ult
+define void @icmp_ult(i32 %a) {
+  %a40 = zext i32 %a to i40
+; CHECK: %a40.clear = and i64 %a40, 1099511627775
+; CHECK-NEXT: %b = icmp ult i64 %a40.clear, 1099511627774
+  %b = icmp ult i40 %a40, -2
+
+; CHECK: %a40.clear1 = and i64 %a40, 1099511627775
+; CHECK-NEXT: %b40.clear = and i64 %b40, 1099511627775
+; CHECK-NEXT: %c = icmp ult i64 %a40.clear1, %b40.clear
+  %b40 = zext i32 %a to i40
+  %c = icmp ult i40 %a40, %b40
+  ret void
+}
+
+; CHECK: @select1
+define void @select1(i32 %a) {
+  %a40 = zext i32 %a to i40
+; CHECK: %s40 = select i1 true, i64 %a40, i64 1099511627775
+  %s40 = select i1 true, i40 %a40, i40 -1
+  ret void
+}
+
+; CHECK: @alloca40
+; CHECK: %a = alloca i64, align 8
+define void @alloca40() {
+  %a = alloca i40, align 8
+  %b = bitcast i40* %a to i8*
+  %c = load i8* %b
+  ret void
+}
+
+; CHECK: @load24
+; CHECK: %bc.loty = bitcast i32* %bc to i16*
+; CHECK-NEXT: %load.lo = load i16* %bc.loty
+; CHECK-NEXT: %load.lo.ext = zext i16 %load.lo to i32
+; CHECK-NEXT: %bc.hi = getelementptr i16* %bc.loty, i32 1
+; CHECK-NEXT: %bc.hity = bitcast i16* %bc.hi to i8*
+; CHECK-NEXT: %load.hi = load i8* %bc.hity
+; CHECK-NEXT: %load.hi.ext = zext i8 %load.hi to i32
+; CHECK-NEXT: %load.hi.ext.sh = shl i32 %load.hi.ext, 16
+; CHECK-NEXT: %load = or i32 %load.lo.ext, %load.hi.ext.sh
+define void @load24(i8* %a) {
+  %bc = bitcast i8* %a to i24*
+  %load = load i24* %bc, align 8
+  ret void
+}
+
+; CHECK: @load48
+; CHECK: %bc.loty = bitcast i64* %bc to i32*
+; CHECK-NEXT: %load.lo = load i32* %bc.loty
+; CHECK-NEXT: %load.lo.ext = zext i32 %load.lo to i64
+; CHECK-NEXT: %bc.hi = getelementptr i32* %bc.loty, i32 1
+; CHECK-NEXT: %bc.hity = bitcast i32* %bc.hi to i16*
+; CHECK-NEXT: %load.hi = load i16* %bc.hity
+; CHECK-NEXT: %load.hi.ext = zext i16 %load.hi to i64
+; CHECK-NEXT: %load.hi.ext.sh = shl i64 %load.hi.ext, 32
+; CHECK-NEXT: %load = or i64 %load.lo.ext, %load.hi.ext.sh
+define void @load48(i32* %a) {
+  %bc = bitcast i32* %a to i48*
+  %load = load i48* %bc, align 8
+  ret void
+}
+
+; CHECK:  %bc = bitcast i32* %a to i64*
+; CHECK-NEXT:  %bc.loty = bitcast i64* %bc to i32*
+; CHECK-NEXT:  %load.lo = load i32* %bc.loty
+; CHECK-NEXT:  %load.lo.ext = zext i32 %load.lo to i64
+; CHECK-NEXT:  %bc.hi = getelementptr i32* %bc.loty, i32 1
+; CHECK-NEXT:  %bc.hity.loty = bitcast i32* %bc.hi to i16*
+; CHECK-NEXT:  %load.hi.lo = load i16* %bc.hity.loty
+; CHECK-NEXT:  %load.hi.lo.ext = zext i16 %load.hi.lo to i32
+; CHECK-NEXT:  %bc.hity.hi = getelementptr i16* %bc.hity.loty, i32 1
+; CHECK-NEXT:  %bc.hity.hity = bitcast i16* %bc.hity.hi to i8*
+; CHECK-NEXT:  %load.hi.hi = load i8* %bc.hity.hity
+; CHECK-NEXT:  %load.hi.hi.ext = zext i8 %load.hi.hi to i32
+; CHECK-NEXT:  %load.hi.hi.ext.sh = shl i32 %load.hi.hi.ext, 16
+; CHECK-NEXT:  %load.hi = or i32 %load.hi.lo.ext, %load.hi.hi.ext.sh
+; CHECK-NEXT:  %load.hi.ext = zext i32 %load.hi to i64
+; CHECK-NEXT:  %load.hi.ext.sh = shl i64 %load.hi.ext, 32
+; CHECK-NEXT:  %load = or i64 %load.lo.ext, %load.hi.ext.sh
+define void @load56(i32* %a) {
+  %bc = bitcast i32* %a to i56*
+  %load = load i56* %bc
+  ret void
+}
+
+; CHECK: @store24
+; CHECK: %b24 = zext i8 %b to i32
+; CHECK-NEXT: %bc.loty = bitcast i32* %bc to i16*
+; CHECK-NEXT: %b24.lo = trunc i32 %b24 to i16
+; CHECK-NEXT: store i16 %b24.lo, i16* %bc.loty
+; CHECK-NEXT: %b24.hi.sh = lshr i32 %b24, 16
+; CHECK-NEXT: %bc.hi = getelementptr i16* %bc.loty, i32 1
+; CHECK-NEXT: %b24.hi = trunc i32 %b24.hi.sh to i8
+; CHECK-NEXT: %bc.hity = bitcast i16* %bc.hi to i8*
+; CHECK-NEXT: store i8 %b24.hi, i8* %bc.hity
+define void @store24(i8* %a, i8 %b) {
+  %bc = bitcast i8* %a to i24*
+  %b24 = zext i8 %b to i24
+  store i24 %b24, i24* %bc
+  ret void
+}
+
+; CHECK: @store56
+; CHECK: %b56 = zext i8 %b to i64
+; CHECK-NEXT: %bc.loty = bitcast i64* %bc to i32*
+; CHECK-NEXT: %b56.lo = trunc i64 %b56 to i32
+; CHECK-NEXT: store i32 %b56.lo, i32* %bc.loty
+; CHECK-NEXT: %b56.hi.sh = lshr i64 %b56, 32
+; CHECK-NEXT: %bc.hi = getelementptr i32* %bc.loty, i32 1
+; CHECK-NEXT: %bc.hity.loty = bitcast i32* %bc.hi to i16*
+; CHECK-NEXT: %b56.hi.sh.lo = trunc i64 %b56.hi.sh to i16
+; CHECK-NEXT: store i16 %b56.hi.sh.lo, i16* %bc.hity.loty
+; CHECK-NEXT: %b56.hi.sh.hi.sh = lshr i64 %b56.hi.sh, 16
+; CHECK-NEXT: %bc.hity.hi = getelementptr i16* %bc.hity.loty, i32 1
+; CHECK-NEXT: %b56.hi.sh.hi = trunc i64 %b56.hi.sh.hi.sh to i8
+; CHECK-NEXT: %bc.hity.hity = bitcast i16* %bc.hity.hi to i8*
+; CHECK-NEXT: store i8 %b56.hi.sh.hi, i8* %bc.hity.hity
+define void @store56(i8* %a, i8 %b) {
+  %bc = bitcast i8* %a to i56*
+  %b56 = zext i8 %b to i56
+  store i56 %b56, i56* %bc
+  ret void
+}
+
+; CHECK: @undefoperand
+; CHECK-NEXT: %a40 = zext i32 %a to i64
+; CHECK-NEXT: %au = and i64 %a40, undef
+define void @undefoperand(i32 %a) {
+  %a40 = zext i32 %a to i40
+  %au = and i40 %a40, undef
+  ret void
+}
+
+; CHECK: @switch
+; CHECK-NEXT: %a24 = zext i16 %a to i32
+; CHECK-NEXT: %a24.clear = and i32 %a24, 16777215
+; CHECK-NEXT: switch i32 %a24.clear, label %end [
+; CHECK-NEXT: i32 0, label %if1
+; CHECK-NEXT: i32 1, label %if2
+define void @switch(i16 %a) {
+  %a24 = zext i16 %a to i24
+  switch i24 %a24, label %end [
+    i24 0, label %if1
+    i24 1, label %if2
+  ]
+if1:
+  ret void
+if2:
+  ret void
+end:
+  ret void
+}
diff --git a/test/Transforms/NaCl/promote-returned-structs.ll b/test/Transforms/NaCl/promote-returned-structs.ll
new file mode 100644
index 000000000000..302a04be7e03
--- /dev/null
+++ b/test/Transforms/NaCl/promote-returned-structs.ll
@@ -0,0 +1,34 @@
+; RUN: opt -S -promote-returned-structures < %s | FileCheck %s
+
+; I have to admit testing this pass is problematic: type renaming throws a lot off.
+
+@g1 = global { {}* ()* } { {}* ()* @_ZNf1 }
+; CHECK: @g1 = global %0 { i8* ()* @_ZNf1 }
+@g2 = global { { i32 } ()* } { { i32 } ()* @_ZNf2 }
+; CHECK: @g2 = global %1 { void (%2*)* @_ZNf2 }
+@g3 = global { { i32 } ({ i32 }*)* } { { i32 } ({ i32 }*)* @_ZNf3 }
+; CHECK: @g3 = global %3 { void (%2*, %2*)* @_ZNf3 }
+
+; leave {}* alone:
+define {}* @_ZNf1() {
+; CHECK-LABEL: define i8* @_ZNf1()
+  ret {}* null
+; CHECK: ret i8* null
+}
+
+define { i32 } @_ZNf2() {
+  ret { i32 } zeroinitializer
+}
+; CHECK-LABEL: define void @_ZNf2(%2* sret)
+; CHECK-NEXT: store %2 zeroinitializer, %2* %0
+; CHECK-NEXT: ret void
+
+; shift attributes right:
+define { i32 } @_ZNf3({ i32 }* byval) {
+  %a1 = load { i32 }* %0
+  ret { i32 } %a1
+}
+; CHECK-LABEL: define void @_ZNf3(%2* sret, %2* byval)
+; CHECK-NEXT: %a1 = load %2* %1
+; CHECK-NEXT: store %2 %a1, %2* %0
+; CHECK-NEXT: ret void
\ No newline at end of file
diff --git a/test/Transforms/NaCl/promote-simple-structs.ll b/test/Transforms/NaCl/promote-simple-structs.ll
new file mode 100644
index 000000000000..ad1a9a6cd7f0
--- /dev/null
+++ b/test/Transforms/NaCl/promote-simple-structs.ll
@@ -0,0 +1,317 @@
+; RUN: opt -promote-simple-structs -S %s | FileCheck %s
+
+%not_promoted = type { i32, i64, i32* }
+%nested1 = type { %not_promoted }
+%nested2 = type { %nested1 }
+%promoted1 = type { i32* }
+%promoted2 = type { i64  }
+%promoted3 = type { %not_promoted (%promoted1)*, i64 (%promoted2*)* }
+%promoted4 = type { %promoted1, %promoted3 }
+%linked_list = type { %promoted1, %linked_list* }
+; CHECK: %"enum.trie::Child<()>.3" = type { i8, [3 x i8], i32 }
+%"enum.trie::Child<()>" = type { i8, [3 x i8], [1 x i32] }
+; C;HECK: %"struct.trie::TrieNode<()>.2" = type { i32, [16 x %"enum.trie::Child<()>.3"] }
+%"struct.trie::TrieNode<()>" = type { i32, [16 x %"enum.trie::Child<()>"] }
+; C;HECK: %tydesc = type { i32, i32, void ({}*, i8*)*, void ({}*, i8*)*, void ({}*, i8*)*, void ({}*, i8*)*, i32, { i8*, i32 } }
+%tydesc = type { i32, i32, void ({}*, i8*)*, void ({}*, i8*)*, void ({}*, i8*)*, void ({}*, i8*)*, i32, { i8*, i32 } }
+%"enum.option::Option<(*libc::types::common::c95::c_void,~local_data::LocalData:Send,local_data::LoanState)>" = type { i8, [3 x i8], [4 x i32] }
+%"enum.libc::types::common::c95::c_void" = type {}
+
+@g1 = global i32 42, align 4
+
+; CHECK-LABEL: define %not_promoted @not_promoted_fun1(%not_promoted* %a1, i32 %a2)
+define %not_promoted @not_promoted_fun1(%not_promoted* %a1, i32 %a2) {
+; CHECK: %1 = load %not_promoted* %a1
+       %1 = load %not_promoted* %a1
+; CHECK: %2 = extractvalue %not_promoted %1, 0
+       %2 = extractvalue %not_promoted %1, 0
+; CHECK: %3 = add i32 %2, %a2
+       %3 = add i32 %2, %a2
+; CHECK: %4 = insertvalue %not_promoted %1, i32 %3, 0
+       %4 = insertvalue %not_promoted %1, i32 %3, 0
+; CHECK: ret %not_promoted %4
+       ret %not_promoted %4
+}
+
+; CHECK-LABEL: define i32* @f1(i32* %a1)
+define %promoted1 @f1(i32* %a1) {
+; CHECK: ret i32* %a1
+       %1 = insertvalue %promoted1 undef, i32* %a1, 0
+       ret %promoted1 %1
+}
+; CHECK-LABEL: define i32* @f2(i32* %a1)
+define i32* @f2(%promoted1 %a1) {
+       %1 = extractvalue %promoted1 %a1, 0
+; CHECK: ret i32* %a1
+       ret i32* %1
+}
+
+; CHECK-LABEL: define i64 @f3(i64* %a1)
+define i64 @f3(%promoted2* %a1) {
+; CHECK: %1 = load i64* %a1
+       %1 = load %promoted2* %a1
+; CHECK-NOT: %2 = extractvalue %promoted2 %1, 0
+       %2 = extractvalue %promoted2 %1, 0
+; CHECK: ret i64 %1
+       ret i64 %2
+}
+
+; CHECK-LABEL: define i64 @f4(i64** %a1)
+define i64 @f4(%promoted2** %a1) {
+; CHECK: %1 = load i64** %a1
+       %1 = load %promoted2** %a1
+; CHECK: %2 = load i64* %1
+       %2 = load %promoted2*  %1
+; CHECK-NOT: %3 = extractvalue %promoted2 %2, 0
+       %3 = extractvalue %promoted2 %2, 0
+; CHECK: ret i64 %2
+       ret i64 %3
+}
+
+; CHECK-LABEL: define i32* @f5()
+define %promoted1 @f5() {
+       %1 = insertvalue %promoted1 undef, i32* @g1, 0
+; CHECK: ret i32* @g1
+       ret %promoted1 %1
+}
+; CHECK-LABEL: define %not_promoted @f6(i32* %a1, i64 %a2)
+define %not_promoted @f6(%promoted1 %a1, %promoted2 %a2) {
+; CHECK: %1 = call i32* @f2(i32* %a1)
+       %1 = call i32* @f2(%promoted1 %a1)
+; CHECK: %2 = insertvalue %not_promoted undef, i32* %1, 2
+       %2 = insertvalue %not_promoted undef, i32* %1, 2
+; CHECK: %3 = alloca i64
+       %3 = alloca %promoted2
+; CHECK: store i64 %a2, i64* %3
+       store %promoted2 %a2, %promoted2* %3
+; CHECK: %4 = call i64 @f3(i64* %3)
+       %4 = call i64 @f3(%promoted2* %3)
+; CHECK: %5 = insertvalue %not_promoted %2, i64 %4, 1
+       %5 = insertvalue %not_promoted %2, i64 %4, 1
+; CHECK: %6 = insertvalue %not_promoted %5, i32 10, 0
+       %6 = insertvalue %not_promoted %5, i32 10, 0
+; CHECK: ret %not_promoted %6
+       ret %not_promoted %6
+}
+; CHECK-LABEL: define %not_promoted @f7(i32* %a1)
+define %not_promoted @f7(%promoted1 %a1) {
+Entry:
+; CHECK: %0 = call i32* @f2(i32* %a1)
+        %0 = call i32* @f2(%promoted1 %a1)
+        %1 = load i32* %0
+        %2 = icmp eq i32 %1, 0
+        br i1 %2, label %Null, label %NotNull
+
+Null:
+; CHECK: %3 = call i32* @f1(i32* @g1)
+        %3 = call %promoted1 @f1(i32* @g1)
+; CHECK: %4 = call %not_promoted @f7(i32* %3)
+        %4 = call %not_promoted @f7(%promoted1 %3)
+        br label %Exit
+
+NotNull:
+        %5 = phi i32* [ %0, %Entry ]
+; CHECK: %6 = call i32* @f1(i32* %5)
+        %6 = call %promoted1 @f1(i32* %5)
+; CHECK-NOT: %7 = insertvalue %promoted1 undef, i32* %5, 0
+        %7 = insertvalue %promoted1 undef, i32* %5, 0
+; CHECK-NOT: %8 = insertvalue %promoted2 undef, i64 16, 0
+        %8 = insertvalue %promoted2 undef, i64 16, 0
+; CHECK: %7 = call %not_promoted @f6(i32* %5, i64 16)
+        %9 = call %not_promoted @f6(%promoted1 %7, %promoted2 %8)
+        br label %Exit
+
+Exit:
+        %10 = phi %not_promoted [ %4, %Null ], [ %9, %NotNull ]
+; CHECK: %9 = phi i32* [ %3, %Null ], [ %6, %NotNull ]
+        %11 = phi %promoted1 [ %3, %Null ], [ %6, %NotNull ]
+; CHECK: %10 = call i32* @f2(i32* %9)
+        %12 = call i32* @f2(%promoted1 %11)
+        %13 = load i32* %12
+        %14 = insertvalue %not_promoted %10, i32 %13, 0
+        ret %not_promoted %14
+}
+; CHECK-LABEL: define %not_promoted @f8(%not_promoted (i32*)* %a1)
+define %not_promoted @f8(%not_promoted (%promoted1)* %a1) {
+       %1 = alloca i32
+       store i32 42, i32* %1
+; CHECK: %2 = call i32* @f1(i32* %1)
+       %2 = call %promoted1 @f1(i32* %1)
+; CHECK: %3 = call %not_promoted %a1(i32* %2)
+       %3 = call %not_promoted %a1(%promoted1 %2)
+       ret %not_promoted %3
+}
+
+define %not_promoted @f9() {
+; CHECK: %1 = call %not_promoted @f8(%not_promoted (i32*)* @f7)
+       %1 = call %not_promoted @f8(%not_promoted (%promoted1)* @f7)
+       ret %not_promoted %1
+}
+; CHECK-LABEL: define %promoted3.0 @f10()
+define %promoted3 @f10() {
+; CHECK: %1 = insertvalue %promoted3.0 undef, %not_promoted (i32*)* @f7, 0
+       %1 = insertvalue %promoted3 undef, %not_promoted (%promoted1)* @f7, 0
+; CHECK: %2 = insertvalue %promoted3.0 %1, i64 (i64*)* @f3, 1
+       %2 = insertvalue %promoted3 %1, i64 (%promoted2*)* @f3, 1
+; CHECK: ret %promoted3.0 %2
+       ret %promoted3 %2
+}
+; CHECK-LABEL: define %not_promoted @f11()
+define %not_promoted @f11() {
+; CHECK: %1 = call %promoted3.0 @f10()
+       %1 = call %promoted3 @f10()
+; CHECK: %2 = extractvalue %promoted3.0 %1, 0
+       %2 = extractvalue %promoted3 %1, 0
+; CHECK: %3 = extractvalue %promoted3.0 %1, 1
+       %3 = extractvalue %promoted3 %1, 1
+; CHECK: %4 = call %not_promoted @f8(%not_promoted (i32*)* %2)
+       %4 = call %not_promoted @f8(%not_promoted (%promoted1)* %2)
+; CHECK: ret %not_promoted %4
+       ret %not_promoted %4
+}
+define %promoted1 @f12() {
+       %1 = bitcast %promoted3 ()* @f10 to i32*
+       %2 = call %promoted1 @f1(i32* %1)
+       %3 = call %not_promoted @f7(%promoted1 %2)
+       ret %promoted1 %2
+}
+define void @f13() {
+       %1 = call %promoted1 @f12()
+       %2 = call i32* @f2(%promoted1 %1)
+       %3 = bitcast i32* %2 to %promoted3 ()*
+       %4 = call %promoted3 %3()
+       ret void
+}
+define void @f14(%linked_list* %a1, %linked_list* %a2) {
+       %1 = load %linked_list* %a1
+       %2 = insertvalue %linked_list %1, %linked_list* %a2, 1
+       store %linked_list %2, %linked_list* %a1
+       ret void
+}
+
+; Function Attrs: inlinehint uwtable
+define internal void @_ZN4trie8TrieNode3new69h3210031e2fef4109c0163f68203dffa433c4a299f6bad390a11a2f2c49d0df2cJyaj8v0.9.preE(%"struct.trie::TrieNode<()>"* noalias sret, { i32, %tydesc*, i8*, i8*, i8 }*) unnamed_addr #4 {
+"function top level":
+  %2 = getelementptr inbounds %"struct.trie::TrieNode<()>"* %0, i32 0, i32 0
+  store i32 0, i32* %2
+  %3 = getelementptr inbounds %"struct.trie::TrieNode<()>"* %0, i32 0, i32 1
+  %4 = getelementptr inbounds [16 x %"enum.trie::Child<()>"]* %3, i32 0, i32 0
+  %5 = getelementptr inbounds %"enum.trie::Child<()>"* %4, i32 0
+  %6 = getelementptr inbounds %"enum.trie::Child<()>"* %5, i32 0, i32 0
+  store i8 2, i8* %6
+  %7 = getelementptr inbounds %"enum.trie::Child<()>"* %4, i32 1
+  %8 = getelementptr inbounds %"enum.trie::Child<()>"* %7, i32 0, i32 0
+  store i8 2, i8* %8
+  %9 = getelementptr inbounds %"enum.trie::Child<()>"* %4, i32 2
+  %10 = getelementptr inbounds %"enum.trie::Child<()>"* %9, i32 0, i32 0
+  store i8 2, i8* %10
+  %11 = getelementptr inbounds %"enum.trie::Child<()>"* %4, i32 3
+  %12 = getelementptr inbounds %"enum.trie::Child<()>"* %11, i32 0, i32 0
+  store i8 2, i8* %12
+  %13 = getelementptr inbounds %"enum.trie::Child<()>"* %4, i32 4
+  %14 = getelementptr inbounds %"enum.trie::Child<()>"* %13, i32 0, i32 0
+  store i8 2, i8* %14
+  %15 = getelementptr inbounds %"enum.trie::Child<()>"* %4, i32 5
+  %16 = getelementptr inbounds %"enum.trie::Child<()>"* %15, i32 0, i32 0
+  store i8 2, i8* %16
+  %17 = getelementptr inbounds %"enum.trie::Child<()>"* %4, i32 6
+  %18 = getelementptr inbounds %"enum.trie::Child<()>"* %17, i32 0, i32 0
+  store i8 2, i8* %18
+  %19 = getelementptr inbounds %"enum.trie::Child<()>"* %4, i32 7
+  %20 = getelementptr inbounds %"enum.trie::Child<()>"* %19, i32 0, i32 0
+  store i8 2, i8* %20
+  %21 = getelementptr inbounds %"enum.trie::Child<()>"* %4, i32 8
+  %22 = getelementptr inbounds %"enum.trie::Child<()>"* %21, i32 0, i32 0
+  store i8 2, i8* %22
+  %23 = getelementptr inbounds %"enum.trie::Child<()>"* %4, i32 9
+  %24 = getelementptr inbounds %"enum.trie::Child<()>"* %23, i32 0, i32 0
+  store i8 2, i8* %24
+  %25 = getelementptr inbounds %"enum.trie::Child<()>"* %4, i32 10
+  %26 = getelementptr inbounds %"enum.trie::Child<()>"* %25, i32 0, i32 0
+  store i8 2, i8* %26
+  %27 = getelementptr inbounds %"enum.trie::Child<()>"* %4, i32 11
+  %28 = getelementptr inbounds %"enum.trie::Child<()>"* %27, i32 0, i32 0
+  store i8 2, i8* %28
+  %29 = getelementptr inbounds %"enum.trie::Child<()>"* %4, i32 12
+  %30 = getelementptr inbounds %"enum.trie::Child<()>"* %29, i32 0, i32 0
+  store i8 2, i8* %30
+  %31 = getelementptr inbounds %"enum.trie::Child<()>"* %4, i32 13
+  %32 = getelementptr inbounds %"enum.trie::Child<()>"* %31, i32 0, i32 0
+  store i8 2, i8* %32
+  %33 = getelementptr inbounds %"enum.trie::Child<()>"* %4, i32 14
+  %34 = getelementptr inbounds %"enum.trie::Child<()>"* %33, i32 0, i32 0
+  store i8 2, i8* %34
+  %35 = getelementptr inbounds %"enum.trie::Child<()>"* %4, i32 15
+  %36 = getelementptr inbounds %"enum.trie::Child<()>"* %35, i32 0, i32 0
+  store i8 2, i8* %36
+  ret void
+}
+; Function Attrs: inlinehint uwtable
+define internal %"enum.libc::types::common::c95::c_void"* @_ZN7reflect14MovePtrAdaptor5align4anon7expr_fn6zxa7a9E({ i32, %tydesc*, i8*, i8*, i8 }*, %"enum.libc::types::common::c95::c_void"*) unnamed_addr #4 {
+; CHECK-LABEL: @_ZN7reflect14MovePtrAdaptor5align4anon7expr_fn6zxa7a9E
+"function top level":
+  %__arg = alloca %"enum.libc::types::common::c95::c_void"*
+  %p = alloca %"enum.libc::types::common::c95::c_void"*
+; CHECK: %__debuginfo_env_ptr = alloca i32**
+  %__debuginfo_env_ptr = alloca { i32* }*
+  %2 = alloca i32
+  store %"enum.libc::types::common::c95::c_void"* %1, %"enum.libc::types::common::c95::c_void"** %__arg
+  %3 = load %"enum.libc::types::common::c95::c_void"** %__arg
+  store %"enum.libc::types::common::c95::c_void"* %3, %"enum.libc::types::common::c95::c_void"** %p
+  %4 = bitcast { i32, %tydesc*, i8*, i8*, i8 }* %0 to { i32, %tydesc*, i8*, i8*, { i32* } }*
+  %5 = getelementptr inbounds { i32, %tydesc*, i8*, i8*, { i32* } }* %4, i32 0, i32 4
+  store { i32* }* %5, { i32* }** %__debuginfo_env_ptr
+  %6 = getelementptr inbounds { i32* }* %5, i32 0, i32 0
+  %7 = load i32** %6
+  %8 = load %"enum.libc::types::common::c95::c_void"** %p
+  %9 = ptrtoint %"enum.libc::types::common::c95::c_void"* %8 to i32
+  %10 = load i32* %7
+  %11 = load i32* %7
+  store i32 %11, i32* %2
+  %12 = load i32* %2
+  %13 = inttoptr i32 %12 to %"enum.libc::types::common::c95::c_void"*
+  ret %"enum.libc::types::common::c95::c_void"* %13
+}
+
+define internal void @"_ZN142unboxed_vec$LT$option..Option$LT$$LP$$RP$libc..types..common..c95..c_void$C$$UP$local_data..LocalData.Send$C$local_data..LoanState$RP$$GT$$GT$9glue_drop67hdc2acc74788e8d0863032f96df4ac96f58cf283c1470b3e70ab84f6398ec1bdeaJE"({}*, { i32, i32, [0 x %"enum.option::Option<(*libc::types::common::c95::c_void,~local_data::LocalData:Send,local_data::LoanState)>"] }*) unnamed_addr {
+"function top level":
+  %2 = getelementptr inbounds { i32, i32, [0 x %"enum.option::Option<(*libc::types::common::c95::c_void,~local_data::LocalData:Send,local_data::LoanState)>"] }* %1, i32 0, i32 0
+  %3 = load i32* %2
+  %4 = getelementptr inbounds { i32, i32, [0 x %"enum.option::Option<(*libc::types::common::c95::c_void,~local_data::LocalData:Send,local_data::LoanState)>"] }* %1, i32 0, i32 2, i32 0
+  %5 = bitcast %"enum.option::Option<(*libc::types::common::c95::c_void,~local_data::LocalData:Send,local_data::LoanState)>"* %4 to i8*
+  %6 = getelementptr inbounds i8* %5, i32 %3
+  %7 = bitcast i8* %6 to %"enum.option::Option<(*libc::types::common::c95::c_void,~local_data::LocalData:Send,local_data::LoanState)>"*
+  br label %iter_vec_loop_header
+
+iter_vec_loop_header:                             ; preds = %iter_vec_loop_body, %"function top level"
+  %8 = phi %"enum.option::Option<(*libc::types::common::c95::c_void,~local_data::LocalData:Send,local_data::LoanState)>"* [ %4, %"function top level" ], [ %10, %iter_vec_loop_body ]
+  %9 = icmp ult %"enum.option::Option<(*libc::types::common::c95::c_void,~local_data::LocalData:Send,local_data::LoanState)>"* %8, %7
+  br i1 %9, label %iter_vec_loop_body, label %iter_vec_next
+
+iter_vec_loop_body:                               ; preds = %iter_vec_loop_header
+  %10 = getelementptr inbounds %"enum.option::Option<(*libc::types::common::c95::c_void,~local_data::LocalData:Send,local_data::LoanState)>"* %8, i32 1
+  br label %iter_vec_loop_header
+
+iter_vec_next:                                    ; preds = %iter_vec_loop_header
+  ret void
+}
+define internal void @"_ZN51_$x5btrie..Child$LT$$LP$$RP$$GT$$C$$x20..$x2016$x5d9glue_drop67hf75fc21ffe9fcb2cdb87b4fc93776499375d61a8fd9b32988157b4dec027e4e8arE"({}*, [16 x %"enum.trie::Child<()>"]*) unnamed_addr {
+"function top level":
+  %2 = getelementptr inbounds [16 x %"enum.trie::Child<()>"]* %1, i32 0, i32 0
+  %3 = bitcast %"enum.trie::Child<()>"* %2 to i8*
+  %4 = getelementptr inbounds i8* %3, i32 128
+  %5 = bitcast i8* %4 to %"enum.trie::Child<()>"*
+  br label %iter_vec_loop_header
+
+iter_vec_loop_header:                             ; preds = %iter_vec_loop_body, %"function top level"
+  %6 = phi %"enum.trie::Child<()>"* [ %2, %"function top level" ], [ %8, %iter_vec_loop_body ]
+  %7 = icmp ult %"enum.trie::Child<()>"* %6, %5
+  br i1 %7, label %iter_vec_loop_body, label %iter_vec_next
+
+iter_vec_loop_body:                               ; preds = %iter_vec_loop_header
+  %8 = getelementptr inbounds %"enum.trie::Child<()>"* %6, i32 1
+  br label %iter_vec_loop_header
+
+iter_vec_next:                                    ; preds = %iter_vec_loop_header
+  ret void
+}
\ No newline at end of file
diff --git a/test/Transforms/NaCl/promote-value-structure-args.ll b/test/Transforms/NaCl/promote-value-structure-args.ll
new file mode 100644
index 000000000000..612fc175b6b2
--- /dev/null
+++ b/test/Transforms/NaCl/promote-value-structure-args.ll
@@ -0,0 +1,16 @@
+; RUN: opt -S -promote-structure-arguments < %s | FileCheck %s
+
+; I have to admit testing this pass is problematic: type renaming throws a lot off.
+
+%struct1 = type { i32 }
+%struct2 = type { %struct1 (%struct1)* }
+
+@g1 = global %struct2 { %struct1 (%struct1)* @f }
+; CHECK: @g1 = global %struct2.1 { %struct1.0 (%struct1.0*)* @f }
+
+define %struct1 @f(%struct1) {
+       ret %struct1 %0
+}
+; CHECK-LABEL: define %struct1.0 @f(%struct1.0* byval)
+; CHECK-NEXT: load %struct1.0* %0
+; CHECK-NEXT: ret %struct1.0
\ No newline at end of file
diff --git a/test/Transforms/NaCl/remove-asm-memory.ll b/test/Transforms/NaCl/remove-asm-memory.ll
new file mode 100644
index 000000000000..ae799a74597c
--- /dev/null
+++ b/test/Transforms/NaCl/remove-asm-memory.ll
@@ -0,0 +1,88 @@
+; RUN: opt < %s -nacl-rewrite-atomics -remove-asm-memory -S | \
+; RUN:       FileCheck %s
+; RUN: opt < %s -O3 -nacl-rewrite-atomics -remove-asm-memory -S | \
+; RUN:       FileCheck %s
+; RUN: opt < %s -O3 -nacl-rewrite-atomics -remove-asm-memory -S | \
+; RUN:       FileCheck %s -check-prefix=ELIM
+; RUN: opt < %s -nacl-rewrite-atomics -remove-asm-memory -S | \
+; RUN:       FileCheck %s -check-prefix=CLEANED
+
+; ``asm("":::"memory")`` is used as a compiler barrier and the GCC-style
+; builtin ``__sync_synchronize`` is intended as a barrier for all memory
+; that could be observed by external threads. They both get rewritten
+; for NaCl by Clang to a sequentially-consistent fence surrounded by
+; ``call void asm sideeffect "", "~{memory}"``.
+;
+; The test is also run at O3 to make sure that non-volatile and
+; non-atomic loads and stores to escaping objects (i.e. loads and stores
+; which could be observed by other threads) don't get unexpectedly
+; eliminated.
+
+; CLEANED-NOT: asm
+
+target datalayout = "p:32:32:32"
+
+@a = external global i32
+@b = external global i32
+
+; Different triples encode ``asm("":::"memory")``'s "touch everything"
+; constraints differently.  They should get detected and removed.
+define void @memory_assembly_encoding_test() {
+; CHECK: @memory_assembly_encoding_test()
+  call void asm sideeffect "", "~{memory}"()
+  call void asm sideeffect "", "~{memory},~{dirflag},~{fpsr},~{flags}"()
+  call void asm sideeffect "", "~{foo},~{memory},~{bar}"()
+
+  ret void
+  ; CHECK-NEXT: ret void
+}
+
+define void @memory_assembly_ordering_test() {
+; CHECK: @memory_assembly_ordering_test()
+  %1 = load i32* @a, align 4
+  store i32 %1, i32* @b, align 4
+  call void asm sideeffect "", "~{memory}"()
+  fence seq_cst
+  call void asm sideeffect "", "~{memory}"()
+  ; CHECK-NEXT: %1 = load i32* @a, align 4
+  ; CHECK-NEXT: store i32 %1, i32* @b, align 4
+  ; CHECK-NEXT: call void @llvm.nacl.atomic.fence.all()
+
+  ; Redundant load from the previous location, and store to the same
+  ; location (making the previous one dead). Shouldn't get eliminated
+  ; because of the fence.
+  %2 = load i32* @a, align 4
+  store i32 %2, i32* @b, align 4
+  call void asm sideeffect "", "~{memory}"()
+  fence seq_cst
+  call void asm sideeffect "", "~{memory}"()
+  ; CHECK-NEXT: %2 = load i32* @a, align 4
+  ; CHECK-NEXT: store i32 %2, i32* @b, align 4
+  ; CHECK-NEXT: call void @llvm.nacl.atomic.fence.all()
+
+  ; Same here.
+  %3 = load i32* @a, align 4
+  store i32 %3, i32* @b, align 4
+  ; CHECK-NEXT: %3 = load i32* @a, align 4
+  ; CHECK-NEXT: store i32 %3, i32* @b, align 4
+
+  ret void
+  ; CHECK-NEXT: ret void
+}
+
+; Same function as above, but without the barriers. At O3 some loads and
+; stores should get eliminated.
+define void @memory_ordering_test() {
+; ELIM: @memory_ordering_test()
+  %1 = load i32* @a, align 4
+  store i32 %1, i32* @b, align 4
+  %2 = load i32* @a, align 4
+  store i32 %2, i32* @b, align 4
+  %3 = load i32* @a, align 4
+  store i32 %3, i32* @b, align 4
+  ; ELIM-NEXT: %1 = load i32* @a, align 4
+  ; ELIM-NEXT: store i32 %1, i32* @b, align 4
+
+  ret void
+  ; ELIM-NEXT: ret void
+}
diff --git a/test/Transforms/NaCl/replace-arrays-with-ints.ll b/test/Transforms/NaCl/replace-arrays-with-ints.ll
new file mode 100644
index 000000000000..fcb51a6a80b3
--- /dev/null
+++ b/test/Transforms/NaCl/replace-arrays-with-ints.ll
@@ -0,0 +1,304 @@
+; RUN: opt -replace-aggregates-with-ints -S < %s | FileCheck %s
+
+target datalayout = "p:32:32:32"
+
+; functions taken from rustc output. References to other functions 'n things were removed.
+
+define internal void @_ZN5c_str14check_for_null67hbf4e3101dcb4056349d6b8c20609ff49be8c536440089c7d70f397149cc8b8daaz4v0.0E(i32, i32, i32) {
+"function top level":
+  %__arg = alloca i8, i32 4, align 4
+  %__arg.asint = ptrtoint i8* %__arg to i32
+  %i = alloca i8, i32 4, align 4
+  %3 = alloca i8, i32 12, align 8
+  %.asint = ptrtoint i8* %3 to i32
+  %4 = alloca i8, i32 4, align 4
+  %5 = alloca i8, i32 8, align 8
+  %.asint2 = ptrtoint i8* %5 to i32
+  %__llmatch = alloca i8, i32 4, align 4
+  %i2 = alloca i8, i32 4, align 4
+  %p = alloca i8, i32 4, align 4
+  %6 = alloca i8, i32 2, align 8
+  %.asint3 = ptrtoint i8* %6 to i32
+  %7 = alloca i8, i32 4, align 4
+  %__self = alloca i8, i32 4, align 4
+  %__llmatch5 = alloca i8, i32 4, align 4
+  %c = alloca i8, align 1
+  %8 = alloca i8, i32 2, align 8
+  %.asint5 = ptrtoint i8* %8 to i32
+  %__arg.bc = bitcast i8* %__arg to i32*
+  store i32 %2, i32* %__arg.bc, align 1
+  %.bc = bitcast i8* %4 to i32*
+  %9 = load i32* %.bc, align 1
+  store i32 %9, i32* %.bc, align 1
+  %.bc14 = bitcast i8* %4 to i32*
+  %10 = load i32* %.bc14, align 1
+  %i.bc = bitcast i8* %i to i32*
+  store i32 %.asint, i32* %i.bc, align 1
+  br label %"`loop`"
+
+next:                                             ; preds = %then, %"`loop`"
+  ret void
+
+"`loop`":                                         ; preds = %match_else6, %match_else, %"function top level"
+  %i.bc15 = bitcast i8* %i to i32*
+  %11 = load i32* %i.bc15, align 1
+  %12 = load i8* %5, align 1
+  %cond = icmp eq i8 %12, 0
+  br i1 %cond, label %next, label %match_else
+
+match_else:                                       ; preds = %"`loop`"
+  %gep = add i32 %.asint2, 4
+  %__llmatch.bc = bitcast i8* %__llmatch to i32*
+  store i32 %gep, i32* %__llmatch.bc, align 1
+  %__llmatch.bc16 = bitcast i8* %__llmatch to i32*
+  %13 = load i32* %__llmatch.bc16, align 1
+  %.asptr = inttoptr i32 %13 to i32*
+  %14 = load i32* %.asptr, align 1
+  %i2.bc = bitcast i8* %i2 to i32*
+  store i32 %14, i32* %i2.bc, align 1
+  %i2.bc17 = bitcast i8* %i2 to i32*
+  %15 = load i32* %i2.bc17, align 1
+  %p.bc = bitcast i8* %p to i32*
+  store i32 undef, i32* %p.bc, align 1
+  %p.bc18 = bitcast i8* %p to i32*
+  %16 = load i32* %p.bc18, align 1
+  %17 = load i32* %p.bc18, align 1
+  %.asptr25 = inttoptr i32 %17 to i8*
+  %18 = load i8* %.asptr25, align 1
+  %19 = icmp eq i8 %18, 0
+  %20 = zext i1 %19 to i8
+  %21 = icmp ne i8 %20, 0
+  br i1 %21, label %then, label %"`loop`"
+
+then:                                             ; preds = %match_else
+  %22 = alloca i8, i32 4, align 4
+  %.asint6 = ptrtoint i8* %22 to i32
+  %.bc19 = bitcast i8* %22 to i32*
+  %23 = load i32* %.bc19, align 1
+  %.bc20 = bitcast i8* %7 to i32*
+  store i32 %23, i32* %.bc20, align 1
+  %.bc21 = bitcast i8* %7 to i32*
+  %24 = load i32* %.bc21, align 1
+  %__self.bc = bitcast i8* %__self to i32*
+  store i32 %24, i32* %__self.bc, align 1
+  %__self.bc22 = bitcast i8* %__self to i32*
+  %25 = load i32* %__self.bc22, align 1
+  %26 = alloca i8, i32 2, align 8
+  %.asint7 = ptrtoint i8* %26 to i32
+  %expanded13 = ptrtoint [12 x i8]* null to i32
+  %.field = load i8* %26, align 1
+  %gep35 = add i32 %.asint7, 1
+  %gep35.asptr = inttoptr i32 %gep35 to [0 x i8]*
+; CHECK-NOT: %gep35.asptr = inttoptr i32 %gep35 to [0 x i8]*
+  %.field12 = load [0 x i8]* %gep35.asptr, align 1
+; CHECK-NOT: %.field12 = load [0 x i8]* %gep35.asptr, align 1
+  %gep37 = add i32 %.asint7, 1
+  %gep37.asptr = inttoptr i32 %gep37 to i8*
+  %.field15 = load i8* %gep37.asptr, align 1
+  store i8 %.field, i8* %6, align 1
+  %gep40 = add i32 %.asint3, 1
+  %gep40.asptr = inttoptr i32 %gep40 to [0 x i8]*
+; CHECK-NOT: %gep40.asptr = inttoptr i32 %gep40 to [0 x i8]*
+  store [0 x i8] %.field12, [0 x i8]* %gep40.asptr, align 1
+; CHECK-NOT: store [0 x i8] %.field12, [0 x i8]* %gep40.asptr, align 1
+  %gep42 = add i32 %.asint3, 1
+  %gep42.asptr = inttoptr i32 %gep42 to i8*
+  store i8 %.field15, i8* %gep42.asptr, align 1
+  %.field21 = load i8* %6, align 1
+  %gep45 = add i32 %.asint3, 1
+  %gep45.asptr = inttoptr i32 %gep45 to [0 x i8]*
+; CHECK-NOT: %gep45.asptr = inttoptr i32 %gep45 to [0 x i8]*
+  %.field24 = load [0 x i8]* %gep45.asptr, align 1
+; CHECK-NOT: %.field24 = load [0 x i8]* %gep45.asptr, align 1
+  %gep47 = add i32 %.asint3, 1
+  %gep47.asptr = inttoptr i32 %gep47 to i8*
+  %.field27 = load i8* %gep47.asptr, align 1
+  store i8 %.field21, i8* %8, align 1
+  %gep50 = add i32 %.asint5, 1
+  %gep50.asptr = inttoptr i32 %gep50 to [0 x i8]*
+; CHECK-NOT: %gep50.asptr = inttoptr i32 %gep50 to [0 x i8]*
+  store [0 x i8] %.field24, [0 x i8]* %gep50.asptr, align 1
+; CHECK-NOT: store [0 x i8] %.field24, [0 x i8]* %gep50.asptr, align 1
+  %gep52 = add i32 %.asint5, 1
+  %gep52.asptr = inttoptr i32 %gep52 to i8*
+  store i8 %.field27, i8* %gep52.asptr, align 1
+  %27 = load i8* %8, align 1
+  %cond10 = icmp eq i8 %27, 0
+  br i1 %cond10, label %next, label %match_else6
+
+match_else6:                                      ; preds = %then
+  %gep55 = add i32 %.asint5, 1
+  %__llmatch5.bc = bitcast i8* %__llmatch5 to i32*
+  store i32 %gep55, i32* %__llmatch5.bc, align 1
+  %__llmatch5.bc23 = bitcast i8* %__llmatch5 to i32*
+  %28 = load i32* %__llmatch5.bc23, align 1
+  %.asptr26 = inttoptr i32 %28 to i8*
+  %29 = load i8* %.asptr26, align 1
+  store i8 %29, i8* %c, align 1
+  %p.bc24 = bitcast i8* %p to i32*
+  %30 = load i32* %p.bc24, align 1
+  %31 = load i8* %c, align 1
+  %.asptr27 = inttoptr i32 %30 to i8*
+  store i8 %31, i8* %.asptr27, align 1
+  br label %"`loop`"
+}
+
+define internal i32 @"_ZN3str6traits23TotalOrd$__extensions__3cmp69h96bba0320007768f6adbf4b6f49860e1e3d6288b2613ce406ea5e56333029e20lBaD4v0.0E"(i32, i32) {
+"function top level":
+  %__make_return_pointer = alloca i8, align 1
+  %__self = alloca i8, i32 4, align 4
+  %__arg = alloca i8, i32 4, align 4
+  %i = alloca i8, i32 4, align 4
+  %2 = alloca i8, i32 40, align 8
+  %.asint = ptrtoint i8* %2 to i32
+  %3 = alloca i8, i32 20, align 8
+  %.asint1 = ptrtoint i8* %3 to i32
+  %4 = alloca i8, i32 20, align 8
+  %.asint2 = ptrtoint i8* %4 to i32
+  %5 = alloca i8, i32 3, align 8
+  %.asint3 = ptrtoint i8* %5 to i32
+  %__llmatch = alloca i8, i32 4, align 4
+  %s_b = alloca i8, align 1
+  %s_b.asint = ptrtoint i8* %s_b to i32
+  %__llmatch2 = alloca i8, i32 4, align 4
+  %o_b = alloca i8, align 1
+  %o_b.asint = ptrtoint i8* %o_b to i32
+  %6 = alloca i8, i32 3, align 8
+  %.asint4 = ptrtoint i8* %6 to i32
+  %7 = alloca i8, align 1
+  %8 = alloca i8, align 1
+  %9 = alloca i8, i32 4, align 4
+  %10 = alloca i8, i32 4, align 4
+  %.asint8 = ptrtoint i8* %10 to i32
+  %11 = alloca i8, i32 4, align 4
+  %12 = alloca i8, i32 4, align 4
+  %.asint10 = ptrtoint i8* %12 to i32
+  %__self.bc = bitcast i8* %__self to i32*
+  store i32 %0, i32* %__self.bc, align 1
+  %__arg.bc = bitcast i8* %__arg to i32*
+  store i32 %1, i32* %__arg.bc, align 1
+  %__self.bc19 = bitcast i8* %__self to i32*
+  %13 = load i32* %__self.bc19, align 1
+  %__arg.bc20 = bitcast i8* %__arg to i32*
+  %14 = load i32* %__arg.bc20, align 1
+  %i.bc = bitcast i8* %i to i32*
+  store i32 %.asint, i32* %i.bc, align 1
+  br label %"`loop`"
+
+return:                                           ; preds = %match_case8, %match_case7, %match_case
+  %15 = load i8* %__make_return_pointer, align 1
+  %.ret_ext = zext i8 %15 to i32
+  ret i32 %.ret_ext
+
+"`loop`":                                         ; preds = %match_else, %"function top level"
+  %i.bc21 = bitcast i8* %i to i32*
+  %16 = load i32* %i.bc21, align 1
+  %17 = alloca i8, i32 3, align 8
+  %.asint11 = ptrtoint i8* %17 to i32
+  %.field = load i8* %17, align 1
+  %gep = add i32 %.asint11, 1
+  %gep.asptr = inttoptr i32 %gep to [0 x i8]*
+; CHECK-NOT: %gep.asptr = inttoptr i32 %gep to [0 x i8]*
+  %.field11 = load [0 x i8]* %gep.asptr, align 1
+; CHECK-NOT: %.field11 = load [0 x i8]* %gep.asptr, align 1
+  %gep4 = add i32 %.asint11, 1
+  %gep4.asptr = inttoptr i32 %gep4 to [2 x i8]*
+; CHECK: %gep4.asptr = inttoptr i32 %gep4 to i16*
+  %.field14 = load [2 x i8]* %gep4.asptr, align 1
+; CHECK %.field14 = load i16* %gep4.asptr, align 1
+  store i8 %.field, i8* %5, align 1
+  %gep7 = add i32 %.asint3, 1
+  %gep7.asptr = inttoptr i32 %gep7 to [0 x i8]*
+; CHECK-NOT: %gep7.asptr = inttoptr i32 %gep7 to [0 x i8]*
+  store [0 x i8] %.field11, [0 x i8]* %gep7.asptr, align 1
+; CHECK-NOT: store [0 x i8] %.field11, [0 x i8]* %gep7.asptr, align 1
+  %gep9 = add i32 %.asint3, 1
+  %gep9.asptr = inttoptr i32 %gep9 to [2 x i8]*
+; CHECK: %gep9.asptr = inttoptr i32 %gep9 to i16*
+  store [2 x i8] %.field14, [2 x i8]* %gep9.asptr, align 1
+; CHECK: store i16 %.field14, i16* %gep9.asptr, align 1
+  %.field20 = load i8* %5, align 1
+  %gep12 = add i32 %.asint3, 1
+  %gep12.asptr = inttoptr i32 %gep12 to [0 x i8]*
+; CHECK-NOT: %gep12.asptr = inttoptr i32 %gep12 to [0 x i8]*
+  %.field23 = load [0 x i8]* %gep12.asptr, align 1
+; CHECK-NOT: %.field23 = load [0 x i8]* %gep12.asptr, align 1
+  %gep14 = add i32 %.asint3, 1
+  %gep14.asptr = inttoptr i32 %gep14 to [2 x i8]*
+; CHECK: %gep14.asptr = inttoptr i32 %gep14 to i16*
+  %.field26 = load [2 x i8]* %gep14.asptr, align 1
+; CHECK: %.field26 = load i16* %gep14.asptr, align 1
+  store i8 %.field20, i8* %6, align 1
+  %gep17 = add i32 %.asint4, 1
+  %gep17.asptr = inttoptr i32 %gep17 to [0 x i8]*
+; CHECK-NOT: %gep17.asptr = inttoptr i32 %gep17 to [0 x i8]*
+  store [0 x i8] %.field23, [0 x i8]* %gep17.asptr, align 1
+; CHECK-NOT: store [0 x i8] %.field23, [0 x i8]* %gep17.asptr, align 1
+  %gep19 = add i32 %.asint4, 1
+  %gep19.asptr = inttoptr i32 %gep19 to [2 x i8]*
+; CHECK: %gep19.asptr = inttoptr i32 %gep19 to i16*
+  store [2 x i8] %.field26, [2 x i8]* %gep19.asptr, align 1
+; CHECK: store i16 %.field26, i16* %gep19.asptr, align 1
+  %18 = load i8* %6, align 1
+  %cond = icmp eq i8 %18, 0
+  br i1 %cond, label %match_case, label %match_else
+
+match_else:                                       ; preds = %"`loop`"
+  %gep22 = add i32 %.asint4, 1
+  %gep25 = add i32 %gep22, 1
+  %__llmatch.bc = bitcast i8* %__llmatch to i32*
+  store i32 %gep22, i32* %__llmatch.bc, align 1
+  %__llmatch2.bc = bitcast i8* %__llmatch2 to i32*
+  store i32 %gep25, i32* %__llmatch2.bc, align 1
+  %__llmatch.bc22 = bitcast i8* %__llmatch to i32*
+  %19 = load i32* %__llmatch.bc22, align 1
+  %.asptr = inttoptr i32 %19 to i8*
+  %20 = load i8* %.asptr, align 1
+  store i8 %20, i8* %s_b, align 1
+  %__llmatch2.bc23 = bitcast i8* %__llmatch2 to i32*
+  %21 = load i32* %__llmatch2.bc23, align 1
+  %.asptr31 = inttoptr i32 %21 to i8*
+  %22 = load i8* %.asptr31, align 1
+  store i8 %22, i8* %o_b, align 1
+  %.ret_trunc = trunc i32 undef to i8
+  store i8 %.ret_trunc, i8* %7, align 1
+  %23  = load i8* %7, align 1
+  %24 = load i8* %7, align 1
+  store i8 %24, i8* %8, align 1
+  %25 = load i8* %8, align 1
+  switch i8 %25, label %"`loop`" [
+    i8 1, label %match_case7
+    i8 -1, label %match_case8
+  ]
+
+match_case:                                       ; preds = %"`loop`"
+  %__self.bc24 = bitcast i8* %__self to i32*
+  %26 = load i32* %__self.bc24, align 1
+  %.bc = bitcast i8* %9 to i32*
+  store i32 undef, i32* %.bc, align 1
+  %.bc25 = bitcast i8* %9 to i32*
+  %27 = load i32* %.bc25, align 1
+  %28 = load i32* %.bc25, align 1
+  %.bc26 = bitcast i8* %10 to i32*
+  store i32 %28, i32* %.bc26, align 1
+  %__arg.bc27 = bitcast i8* %__arg to i32*
+  %29 = load i32* %__arg.bc27, align 1
+  %.bc28 = bitcast i8* %11 to i32*
+  store i32 undef, i32* %.bc28, align 1
+  %.bc29 = bitcast i8* %11 to i32*
+  %30 = load i32* %.bc29, align 1
+  %31 = load i32* %.bc29, align 1
+  %.bc30 = bitcast i8* %12 to i32*
+  store i32 %31, i32* %.bc30, align 1
+  %.ret_trunc32 = trunc i32 undef to i8
+  store i8 %.ret_trunc32, i8* %__make_return_pointer, align 1
+  br label %return
+
+match_case7:                                      ; preds = %match_else
+  store i8 1, i8* %__make_return_pointer, align 1
+  br label %return
+
+match_case8:                                      ; preds = %match_else
+  store i8 -1, i8* %__make_return_pointer, align 1
+  br label %return
+}
\ No newline at end of file
diff --git a/test/Transforms/NaCl/replace-ptrs-with-ints.ll b/test/Transforms/NaCl/replace-ptrs-with-ints.ll
new file mode 100644
index 000000000000..d5486208e7e4
--- /dev/null
+++ b/test/Transforms/NaCl/replace-ptrs-with-ints.ll
@@ -0,0 +1,634 @@
+; RUN: opt < %s -replace-ptrs-with-ints -S | FileCheck %s
+
+target datalayout = "p:32:32:32"
+
+
+%struct = type { i32, i32 }
+
+declare %struct* @addr_taken_func(%struct*)
+
+@addr_of_func = global %struct* (%struct*)* @addr_taken_func
+; CHECK: @addr_of_func = global %struct* (%struct*)* bitcast (i32 (i32)* @addr_taken_func to %struct* (%struct*)*)
+
+@blockaddr = global i8* blockaddress(@indirectbr, %l1)
+; CHECK: @blockaddr = global i8* blockaddress(@indirectbr, %l1)
+
+
+define i8* @pointer_arg(i8* %ptr, i64 %non_ptr) {
+  ret i8* %ptr
+}
+; CHECK-LABEL: define i32 @pointer_arg(i32 %ptr, i64 %non_ptr) {
+; CHECK-NEXT: ret i32 %ptr
+; CHECK-NEXT: }
+
+
+declare i8* @declared_func(i8*, i64)
+; CHECK-LABEL: declare i32 @declared_func(i32, i64)
+
+
+define void @self_reference_phi(i8* %ptr) {
+entry:
+  br label %loop
+loop:
+  %x = phi i8* [ %x, %loop ], [ %ptr, %entry ]
+  br label %loop
+}
+; CHECK-LABEL: define void @self_reference_phi(i32 %ptr) {
+; CHECK: %x = phi i32 [ %x, %loop ], [ %ptr, %entry ]
+
+; Self-referencing bitcasts are possible in unreachable basic blocks.
+; It is not very likely that we will encounter this, but we handle it
+; for completeness.
+define void @self_reference_bitcast(i8** %dest) {
+  ret void
+unreachable_loop:
+  store i8* %self_ref, i8** %dest
+  %self_ref = bitcast i8* %self_ref to i8*
+  store i8* %self_ref, i8** %dest
+  br label %unreachable_loop
+}
+; CHECK-LABEL: define void @self_reference_bitcast(i32 %dest) {
+; CHECK: store i32 undef, i32* %dest.asptr
+; CHECK: store i32 undef, i32* %dest.asptr
+
+define void @circular_reference_bitcasts(i8** %dest) {
+  ret void
+unreachable_loop:
+  store i8* %cycle1, i8** %dest
+  %cycle1 = bitcast i8* %cycle2 to i8*
+  %cycle2 = bitcast i8* %cycle1 to i8*
+  br label %unreachable_loop
+}
+; CHECK-LABEL: define void @circular_reference_bitcasts(i32 %dest) {
+; CHECK: store i32 undef, i32* %dest.asptr
+
+define void @circular_reference_inttoptr(i8** %dest) {
+  ret void
+unreachable_loop:
+  %ptr = inttoptr i32 %int to i8*
+  %int = ptrtoint i8* %ptr to i32
+  store i8* %ptr, i8** %dest
+  br label %unreachable_loop
+}
+; CHECK-LABEL: define void @circular_reference_inttoptr(i32 %dest) {
+; CHECK: store i32 undef, i32* %dest.asptr
+
+define i8* @forwards_reference(%struct** %ptr) {
+  br label %block1
+block2:
+  ; Forwards reference to %val.
+  %cast = bitcast %struct* %val to i8*
+  br label %block3
+block1:
+  %val = load %struct** %ptr
+  br label %block2
+block3:
+  ; Backwards reference to a forwards reference that has already been
+  ; resolved.
+  ret i8* %cast
+}
+; CHECK-LABEL: define i32 @forwards_reference(i32 %ptr) {
+; CHECK-NEXT: br label %block1
+; CHECK-LABEL: block2:
+; CHECK-NEXT: br label %block3
+; CHECK-LABEL: block1:
+; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i32*
+; CHECK-NEXT: %val = load i32* %ptr.asptr
+; CHECK-NEXT: br label %block2
+; CHECK-LABEL: block3:
+; CHECK-NEXT: ret i32 %val
+
+
+define i8* @phi_multiple_entry(i1 %arg, i8* %ptr) {
+entry:
+  br i1 %arg, label %done, label %done
+done:
+  %result = phi i8* [ %ptr, %entry ], [ %ptr, %entry ]
+  ret i8* %result
+}
+; CHECK-LABEL: define i32 @phi_multiple_entry(i1 %arg, i32 %ptr) {
+; CHECK: %result = phi i32 [ %ptr, %entry ], [ %ptr, %entry ]
+
+
+define i8* @select(i1 %cond, i8* %val1, i8* %val2) {
+  %r = select i1 %cond, i8* %val1, i8* %val2
+  ret i8* %r
+}
+; CHECK-LABEL: define i32 @select(i1 %cond, i32 %val1, i32 %val2) {
+; CHECK-NEXT: %r = select i1 %cond, i32 %val1, i32 %val2
+
+
+define i32* @ptrtoint_same_size(i32* %ptr) {
+  %a = ptrtoint i32* %ptr to i32
+  %b = add i32 %a, 4
+  %c = inttoptr i32 %b to i32*
+  ret i32* %c
+}
+; CHECK-LABEL: define i32 @ptrtoint_same_size(i32 %ptr) {
+; CHECK-NEXT: %b = add i32 %ptr, 4
+; CHECK-NEXT: ret i32 %b
+
+
+define i32* @ptrtoint_different_size(i32* %ptr) {
+  %a = ptrtoint i32* %ptr to i64
+  %b = add i64 %a, 4
+  %c = inttoptr i64 %b to i32*
+  ret i32* %c
+}
+; CHECK-LABEL: define i32 @ptrtoint_different_size(i32 %ptr) {
+; CHECK-NEXT: %a = zext i32 %ptr to i64
+; CHECK-NEXT: %b = add i64 %a, 4
+; CHECK-NEXT: %c = trunc i64 %b to i32
+; CHECK-NEXT: ret i32 %c
+
+define i8 @ptrtoint_truncates_var(i32* %ptr) {
+  %a = ptrtoint i32* %ptr to i8
+  ret i8 %a
+}
+; CHECK-LABEL: define i8 @ptrtoint_truncates_var(i32 %ptr) {
+; CHECK-NEXT: %a = trunc i32 %ptr to i8
+
+define i8 @ptrtoint_truncates_global() {
+  %a = ptrtoint i32* @var to i8
+  ret i8 %a
+}
+; CHECK-LABEL: define i8 @ptrtoint_truncates_global() {
+; CHECK-NEXT: %expanded = ptrtoint i32* @var to i32
+; CHECK-NEXT: %a = trunc i32 %expanded to i8
+
+
+define i32* @pointer_bitcast(i64* %ptr) {
+  %cast = bitcast i64* %ptr to i32*
+  ret i32* %cast
+}
+; CHECK-LABEL: define i32 @pointer_bitcast(i32 %ptr) {
+; CHECK-NEXT: ret i32 %ptr
+
+; Same-type non-pointer bitcasts happen to be left alone by this pass.
+define i32 @no_op_bitcast(i32 %val) {
+  %val2 = bitcast i32 %val to i32
+  ret i32 %val2
+}
+; CHECK-LABEL: define i32 @no_op_bitcast(i32 %val) {
+; CHECK-NEXT: %val2 = bitcast i32 %val to i32
+
+define i64 @kept_bitcast(double %d) {
+  %i = bitcast double %d to i64
+  ret i64 %i
+}
+; CHECK-LABEL: define i64 @kept_bitcast(double %d) {
+; CHECK-NEXT: %i = bitcast double %d to i64
+
+
+define i32 @constant_pointer_null() {
+  %val = ptrtoint i32* null to i32
+  ret i32 %val
+}
+; CHECK-LABEL: define i32 @constant_pointer_null() {
+; CHECK-NEXT: ret i32 0
+
+define i32 @constant_pointer_undef() {
+  %val = ptrtoint i32* undef to i32
+  ret i32 %val
+}
+; CHECK-LABEL: define i32 @constant_pointer_undef() {
+; CHECK-NEXT: ret i32 undef
+
+define i16* @constant_pointer_null_load() {
+  %val = load i16** null
+  ret i16* %val
+}
+; CHECK-LABEL: define i32 @constant_pointer_null_load() {
+; CHECK-NEXT: %.asptr = inttoptr i32 0 to i32*
+; CHECK-NEXT: %val = load i32* %.asptr
+
+define i16* @constant_pointer_undef_load() {
+  %val = load i16** undef
+  ret i16* %val
+}
+; CHECK-LABEL: define i32 @constant_pointer_undef_load() {
+; CHECK-NEXT: %.asptr = inttoptr i32 undef to i32*
+; CHECK-NEXT: %val = load i32* %.asptr
+
+
+define i8 @load(i8* %ptr) {
+  %x = load i8* %ptr
+  ret i8 %x
+}
+; CHECK-LABEL: define i8 @load(i32 %ptr) {
+; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i8*
+; CHECK-NEXT: %x = load i8* %ptr.asptr
+
+define void @store(i8* %ptr, i8 %val) {
+  store i8 %val, i8* %ptr
+  ret void
+}
+; CHECK-LABEL: define void @store(i32 %ptr, i8 %val) {
+; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i8*
+; CHECK-NEXT: store i8 %val, i8* %ptr.asptr
+
+
+define i8* @load_ptr(i8** %ptr) {
+  %x = load i8** %ptr
+  ret i8* %x
+}
+; CHECK-LABEL: define i32 @load_ptr(i32 %ptr) {
+; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i32*
+; CHECK-NEXT: %x = load i32* %ptr.asptr
+
+define void @store_ptr(i8** %ptr, i8* %val) {
+  store i8* %val, i8** %ptr
+  ret void
+}
+; CHECK-LABEL: define void @store_ptr(i32 %ptr, i32 %val) {
+; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i32*
+; CHECK-NEXT: store i32 %val, i32* %ptr.asptr
+
+
+define i8 @load_attrs(i8* %ptr) {
+  %x = load atomic volatile i8* %ptr seq_cst, align 128
+  ret i8 %x
+}
+; CHECK-LABEL: define i8 @load_attrs(i32 %ptr) {
+; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i8*
+; CHECK-NEXT: %x = load atomic volatile i8* %ptr.asptr seq_cst, align 128
+
+define void @store_attrs(i8* %ptr, i8 %val) {
+  store atomic volatile i8 %val, i8* %ptr singlethread release, align 256
+  ret void
+}
+; CHECK-LABEL: define void @store_attrs(i32 %ptr, i8 %val) {
+; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i8*
+; CHECK-NEXT: store atomic volatile i8 %val, i8* %ptr.asptr singlethread release, align 256
+
+
+define i32 @cmpxchg(i32* %ptr, i32 %a, i32 %b) {
+  %r = cmpxchg i32* %ptr, i32 %a, i32 %b seq_cst
+  ret i32 %r
+}
+; CHECK-LABEL: define i32 @cmpxchg(i32 %ptr, i32 %a, i32 %b) {
+; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i32*
+; CHECK-NEXT: %r = cmpxchg i32* %ptr.asptr, i32 %a, i32 %b seq_cst
+
+define i32 @atomicrmw(i32* %ptr, i32 %x) {
+  %r = atomicrmw add i32* %ptr, i32 %x seq_cst
+  ret i32 %r
+}
+; CHECK-LABEL: define i32 @atomicrmw(i32 %ptr, i32 %x) {
+; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i32*
+; CHECK-NEXT: %r = atomicrmw add i32* %ptr.asptr, i32 %x seq_cst
+
+
+define i8* @indirect_call(i8* (i8*)* %func, i8* %arg) {
+  %result = call i8* %func(i8* %arg)
+  ret i8* %result
+}
+; CHECK-LABEL: define i32 @indirect_call(i32 %func, i32 %arg) {
+; CHECK-NEXT: %func.asptr = inttoptr i32 %func to i32 (i32)*
+; CHECK-NEXT: %result = call i32 %func.asptr(i32 %arg)
+; CHECK-NEXT: ret i32 %result
+
+
+; Test forwards reference
+define i8* @direct_call1(i8* %arg) {
+  %result = call i8* @direct_call2(i8* %arg)
+  ret i8* %result
+}
+; CHECK-LABEL: define i32 @direct_call1(i32 %arg) {
+; CHECK-NEXT: %result = call i32 @direct_call2(i32 %arg)
+; CHECK-NEXT: ret i32 %result
+
+; Test backwards reference
+define i8* @direct_call2(i8* %arg) {
+  %result = call i8* @direct_call1(i8* %arg)
+  ret i8* %result
+}
+; CHECK-LABEL: define i32 @direct_call2(i32 %arg) {
+; CHECK-NEXT: %result = call i32 @direct_call1(i32 %arg)
+; CHECK-NEXT: ret i32 %result
+
+
+@var = global i32 0
+
+define i32* @get_addr_of_global() {
+  ret i32* @var
+}
+; CHECK-LABEL: define i32 @get_addr_of_global() {
+; CHECK-NEXT: %expanded = ptrtoint i32* @var to i32
+; CHECK-NEXT: ret i32 %expanded
+
+define %struct* (%struct*)* @get_addr_of_func() {
+  ret %struct* (%struct*)* @addr_taken_func
+}
+; CHECK-LABEL: define i32 @get_addr_of_func() {
+; CHECK-NEXT: %expanded = ptrtoint i32 (i32)* @addr_taken_func to i32
+; CEHCK-NEXT: ret i32 %expanded
+
+
+define i32 @load_global() {
+  %val = load i32* @var
+  ret i32 %val
+}
+; CHECK-LABEL: define i32 @load_global() {
+; CHECK-NEXT: %val = load i32* @var
+; CHECK-NEXT: ret i32 %val
+
+define i16 @load_global_bitcast() {
+  %ptr = bitcast i32* @var to i16*
+  %val = load i16* %ptr
+  ret i16 %val
+}
+; CHECK-LABEL: define i16 @load_global_bitcast() {
+; CHECK-NEXT: %var.bc = bitcast i32* @var to i16*
+; CHECK-NEXT: %val = load i16* %var.bc
+; CHECK-NEXT: ret i16 %val
+
+
+declare void @receive_alloca(%struct* %ptr)
+
+define void @alloca_fixed() {
+  %buf = alloca %struct, align 128
+  call void @receive_alloca(%struct* %buf)
+  ret void
+}
+; CHECK-LABEL: define void @alloca_fixed() {
+; CHECK-NEXT: %buf = alloca i8, i32 8, align 128
+; CHECK-NEXT: %buf.asint = ptrtoint i8* %buf to i32
+; CHECK-NEXT: call void @receive_alloca(i32 %buf.asint)
+
+; When the size passed to alloca is a constant, it should be a
+; constant in the output too.
+define void @alloca_fixed_array() {
+  %buf = alloca %struct, i32 100
+  call void @receive_alloca(%struct* %buf)
+  ret void
+}
+; CHECK-LABEL: define void @alloca_fixed_array() {
+; CHECK-NEXT: %buf = alloca i8, i32 800, align 8
+; CHECK-NEXT: %buf.asint = ptrtoint i8* %buf to i32
+; CHECK-NEXT: call void @receive_alloca(i32 %buf.asint)
+
+define void @alloca_variable(i32 %size) {
+  %buf = alloca %struct, i32 %size
+  call void @receive_alloca(%struct* %buf)
+  ret void
+}
+; CHECK-LABEL: define void @alloca_variable(i32 %size) {
+; CHECK-NEXT: %buf.alloca_mul = mul i32 8, %size
+; CHECK-NEXT: %buf = alloca i8, i32 %buf.alloca_mul
+; CHECK-NEXT: %buf.asint = ptrtoint i8* %buf to i32
+; CHECK-NEXT: call void @receive_alloca(i32 %buf.asint)
+
+define void @alloca_alignment_i32() {
+  %buf = alloca i32
+  ret void
+}
+; CHECK-LABEL: void @alloca_alignment_i32() {
+; CHECK-NEXT: alloca i8, i32 4, align 4
+
+define void @alloca_alignment_double() {
+  %buf = alloca double
+  ret void
+}
+; CHECK-LABEL: void @alloca_alignment_double() {
+; CHECK-NEXT: alloca i8, i32 8, align 8
+
+define void @alloca_lower_alignment() {
+  %buf = alloca i32, align 1
+  ret void
+}
+; CHECK-LABEL: void @alloca_lower_alignment() {
+; CHECK-NEXT: alloca i8, i32 4, align 1
+
+
+; This tests for a bug in which, when processing the store's %buf2
+; operand, ReplacePtrsWithInts accidentally strips off the ptrtoint
+; cast that it previously introduced for the 'alloca', causing an
+; internal sanity check to fail.
+define void @alloca_cast_stripping() {
+  %buf = alloca i32
+  %buf1 = ptrtoint i32* %buf to i32
+  %buf2 = inttoptr i32 %buf1 to i32*
+  store i32 0, i32* %buf2
+  ret void
+}
+; CHECK-LABEL: define void @alloca_cast_stripping() {
+; CHECK-NEXT: %buf = alloca i8, i32 4
+; CHECK-NEXT: %buf.bc = bitcast i8* %buf to i32*
+; CHECK-NEXT: store i32 0, i32* %buf.bc
+
+
+define i1 @compare(i8* %ptr1, i8* %ptr2) {
+  %cmp = icmp ult i8* %ptr1, %ptr2
+  ret i1 %cmp
+}
+; CHECK-LABEL: define i1 @compare(i32 %ptr1, i32 %ptr2) {
+; CHECK-NEXT: %cmp = icmp ult i32 %ptr1, %ptr2
+
+
+declare i8* @llvm.some.intrinsic(i8* %ptr)
+
+define i8* @preserve_intrinsic_type(i8* %ptr) {
+  %result = call i8* @llvm.some.intrinsic(i8* %ptr)
+  ret i8* %result
+}
+; CHECK-LABEL: define i32 @preserve_intrinsic_type(i32 %ptr) {
+; CHECK-NEXT: %ptr.asptr = inttoptr i32 %ptr to i8*
+; CHECK-NEXT: %result = call i8* @llvm.some.intrinsic(i8* %ptr.asptr)
+; CHECK-NEXT: %result.asint = ptrtoint i8* %result to i32
+; CHECK-NEXT: ret i32 %result.asint
+
+
+; Just check that the pass does not crash on inline asm.
+define i16* @inline_asm1(i8* %ptr) {
+  %val = call i16* asm "foo", "=r,r"(i8* %ptr)
+  ret i16* %val
+}
+
+define i16** @inline_asm2(i8** %ptr) {
+  %val = call i16** asm "foo", "=r,r"(i8** %ptr)
+  ret i16** %val
+}
+
+
+declare void @llvm.dbg.declare(metadata, metadata)
+declare void @llvm.dbg.value(metadata, i64, metadata)
+
+define void @debug_declare(i32 %val) {
+  ; We normally expect llvm.dbg.declare to be used on an alloca.
+  %var = alloca i32
+  tail call void @llvm.dbg.declare(metadata !{i32* %var}, metadata !{})
+  tail call void @llvm.dbg.declare(metadata !{i32 %val}, metadata !{})
+  ret void
+}
+; CHECK-LABEL: define void @debug_declare(i32 %val) {
+; CHECK-NEXT: %var = alloca i8, i32 4, align 4
+; CHECK-NEXT: ret void
+
+; For now, debugging info for values is lost.  replaceAllUsesWith()
+; does not work for metadata references -- it converts them to nulls.
+; This makes dbg.value too tricky to handle for now.
+define void @debug_value(i32 %val, i8* %ptr) {
+  tail call void @llvm.dbg.value(metadata !{i32 %val}, i64 1, metadata !{})
+  tail call void @llvm.dbg.value(metadata !{i8* %ptr}, i64 2, metadata !{})
+  ret void
+}
+; CHECK-LABEL: define void @debug_value(i32 %val, i32 %ptr) {
+; CHECK-NEXT: ret void
+
+
+declare void @llvm.lifetime.start(i64 %size, i8* %ptr)
+declare {}* @llvm.invariant.start(i64 %size, i8* %ptr)
+declare void @llvm.invariant.end({}* %desc, i64 %size, i8* %ptr)
+
+; GVN can introduce the following horrible corner case of a lifetime
+; marker referencing a PHI node.  But we convert the phi to i32 type,
+; and lifetime.start doesn't work on an inttoptr converting an i32 phi
+; to a pointer.  Because of this, we just strip out all lifetime
+; markers.
+
+define void @alloca_lifetime_via_phi() {
+entry:
+  %buf = alloca i8
+  br label %block
+block:
+  %phi = phi i8* [ %buf, %entry ]
+  call void @llvm.lifetime.start(i64 -1, i8* %phi)
+  ret void
+}
+; CHECK-LABEL: define void @alloca_lifetime_via_phi() {
+; CHECK: %phi = phi i32 [ %buf.asint, %entry ]
+; CHECK-NEXT: ret void
+
+define void @alloca_lifetime() {
+  %buf = alloca i8
+  call void @llvm.lifetime.start(i64 -1, i8* %buf)
+  ret void
+}
+; CHECK-LABEL: define void @alloca_lifetime() {
+; CHECK-NEXT: %buf = alloca i8
+; CHECK-NEXT: ret void
+
+define void @alloca_lifetime_via_bitcast() {
+  %buf = alloca i32
+  %buf_cast = bitcast i32* %buf to i8*
+  call void @llvm.lifetime.start(i64 -1, i8* %buf_cast)
+  ret void
+}
+; CHECK-LABEL: define void @alloca_lifetime_via_bitcast() {
+; CHECK-NEXT: %buf = alloca i8, i32 4
+; CHECK-NEXT: ret void
+
+define void @strip_invariant_markers() {
+  %buf = alloca i8
+  %start = call {}* @llvm.invariant.start(i64 1, i8* %buf)
+  call void @llvm.invariant.end({}* %start, i64 1, i8* %buf)
+  ret void
+}
+; CHECK-LABEL: define void @strip_invariant_markers() {
+; CHECK-NEXT: %buf = alloca i8
+; CHECK-NEXT: ret void
+
+
+; "nocapture" and "noalias" only apply to pointers, so must be stripped.
+define void @nocapture_attr(i8* nocapture noalias %ptr) {
+  ret void
+}
+; CHECK-LABEL: define void @nocapture_attr(i32 %ptr) {
+
+; "nounwind" should be preserved.
+define void @nounwind_func_attr() nounwind {
+  ret void
+}
+; CHECK: define void @nounwind_func_attr() [[NOUNWIND:#[0-9]+]] {
+
+define void @nounwind_call_attr() {
+  call void @nounwind_func_attr() nounwind
+  ret void
+}
+; CHECK-LABEL: define void @nounwind_call_attr() {
+; CHECK: call void @nounwind_func_attr() {{.*}}[[NOUNWIND]]
+
+define fastcc void @fastcc_func() {
+  ret void
+}
+; CHECK-LABEL: define fastcc void @fastcc_func() {
+
+define void @fastcc_call() {
+  call fastcc void @fastcc_func()
+  ret void
+}
+; CHECK-LABEL: define void @fastcc_call() {
+; CHECK-NEXT: call fastcc void @fastcc_func()
+
+
+; Just check that the pass does not crash on getelementptr.  (The pass
+; should not depend unnecessarily on ExpandGetElementPtr having been
+; run.)
+define i8* @getelementptr(i8* %ptr) {
+  %gep = getelementptr i8* %ptr, i32 10
+  ret i8* %gep
+}
+
+; Just check that the pass does not crash on va_arg.
+define i32* @va_arg(i8* %valist) {
+  %r = va_arg i8* %valist, i32*
+  ret i32* %r
+}
+
+
+define void @indirectbr(i8* %addr) {
+  indirectbr i8* %addr, [ label %l1, label %l2 ]
+l1:
+  ret void
+l2:
+  ret void
+}
+; CHECK-LABEL: define void @indirectbr(i32 %addr) {
+; CHECK-NEXT: %addr.asptr = inttoptr i32 %addr to i8*
+; CHECK-NEXT: indirectbr i8* %addr.asptr, [label %l1, label %l2]
+
+
+define i8* @invoke(i8* %val) {
+  %result = invoke i8* @direct_call1(i8* %val)
+      to label %cont unwind label %lpad
+cont:
+  ret i8* %result
+lpad:
+  %lp = landingpad { i8*, i32 } personality void (i8*)* @personality_func cleanup
+  %p = extractvalue { i8*, i32 } %lp, 0
+  %s = insertvalue { i8*, i32 } %lp, i8* %val, 0
+  ret i8* %p
+}
+; CHECK-LABEL: define i32 @invoke(i32 %val) {
+; CHECK-NEXT: %result = invoke i32 @direct_call1(i32 %val)
+; CHECK-NEXT:         to label %cont unwind label %lpad
+; CHECK: %lp = landingpad { i8*, i32 } personality void (i8*)* bitcast (void (i32)* @personality_func to void (i8*)*)
+; CHECK: %p = extractvalue { i8*, i32 } %lp, 0
+; CHECK-NEXT: %p.asint = ptrtoint i8* %p to i32
+; CHECK-NEXT: %val.asptr = inttoptr i32 %val to i8*
+; CHECK-NEXT: %s = insertvalue { i8*, i32 } %lp, i8* %val.asptr, 0
+; CHECK-NEXT: ret i32 %p.asint
+
+define void @personality_func(i8* %arg) {
+  ret void
+}
+
+
+declare i32 @llvm.eh.typeid.for(i8*)
+
+@typeid = global i32 0
+
+; The argument here must be left as a bitcast, otherwise the backend
+; rejects it.
+define void @typeid_for() {
+  %bc = bitcast i32* @typeid to i8*
+  call i32 @llvm.eh.typeid.for(i8* %bc)
+  ret void
+}
+; CHECK-LABEL: define void @typeid_for() {
+; CHECK-NEXT: %typeid.bc = bitcast i32* @typeid to i8*
+; CHECK-NEXT: call i32 @llvm.eh.typeid.for(i8* %typeid.bc)
+
+
+; CHECK: attributes {{.*}}[[NOUNWIND]] = { nounwind }
diff --git a/test/Transforms/NaCl/resolve-aliases.ll b/test/Transforms/NaCl/resolve-aliases.ll
new file mode 100644
index 000000000000..82ad54d74e95
--- /dev/null
+++ b/test/Transforms/NaCl/resolve-aliases.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -resolve-aliases -S | FileCheck %s
+
+; CHECK-NOT: @alias
+
+@r1 = internal global i32 zeroinitializer
+@a1 = alias i32* @r1
+define i32* @usea1() {
+; CHECK: ret i32* @r1
+  ret i32* @a1
+}
+
+@funcalias = alias i32* ()* @usea1
+; CHECK: @usefuncalias
+define void @usefuncalias() {
+; CHECK: call i32* @usea1
+  %1 = call i32* @funcalias()
+  ret void
+}
+
+@bc1 = global i8* bitcast (i32* @r1 to i8*)
+@bcalias = alias i8* bitcast (i32* @r1 to i8*)
+
+; CHECK: @usebcalias
+define i8* @usebcalias() {
+; CHECK: ret i8* bitcast (i32* @r1 to i8*)
+  ret i8* @bcalias
+}
+
+
+@fa2 = alias i32* ()* @funcalias
+; CHECK: @usefa2
+define void @usefa2() {
+; CHECK: call i32* @usea1
+  call i32* @fa2()
+  ret void
+}
diff --git a/test/Transforms/NaCl/resolve-pnacl-intrinsics-x86-32-16-bit-atomics-hack.ll b/test/Transforms/NaCl/resolve-pnacl-intrinsics-x86-32-16-bit-atomics-hack.ll
new file mode 100644
index 000000000000..fc0e7c70f6f7
--- /dev/null
+++ b/test/Transforms/NaCl/resolve-pnacl-intrinsics-x86-32-16-bit-atomics-hack.ll
@@ -0,0 +1,138 @@
+; RUN: opt < %s -resolve-pnacl-intrinsics -S -mtriple=i386-unknown-nacl | \
+; RUN:   FileCheck %s -check-prefix=CLEANED
+; RUN: opt < %s -resolve-pnacl-intrinsics -S -mtriple=i386-unknown-nacl | \
+; RUN:   FileCheck %s
+
+; CLEANED-NOT: call {{.*}} @llvm.nacl.atomic
+
+; Supplement to resolve-pnacl-intrinsics.ll that tests the 16-bit hack
+; for x86-32. All of the RMW cases are the same except for one
+; operation.
+
+; These declarations must be here because the function pass expects
+; to find them. In real life they're inserted by the translator
+; before the function pass runs.
+declare i32 @setjmp(i8*)
+declare void @longjmp(i8*, i32)
+
+declare i16 @llvm.nacl.atomic.rmw.i16(i32, i16*, i16, i32)
+declare i16 @llvm.nacl.atomic.cmpxchg.i16(i16*, i16, i16, i32, i32)
+
+; CHECK: @test_fetch_and_add_i16
+define i16 @test_fetch_and_add_i16(i16* %ptr, i16 %value) {
+; CHECK-NEXT:  call void asm sideeffect "", "~{memory}"()
+; CHECK-NEXT:  %uintptr = ptrtoint i16* %ptr to i32
+; CHECK-NEXT:  %aligneduintptr = and i32 %uintptr, -4
+; CHECK-NEXT:  %aligned32 = and i32 %uintptr, 3
+; CHECK-NEXT:  %ptr32 = inttoptr i32 %aligneduintptr to i32*
+; CHECK-NEXT:  %isaligned32 = icmp eq i32 %aligned32, 0
+; CHECK-NEXT:  br i1 %isaligned32, label %atomic16aligned32, label %atomic16aligned16
+;
+; CHECK: atomic16successor:
+; CHECK-NEXT:  %1 = phi i16 [ %truncval, %atomic16aligned32 ], [ %shval, %atomic16aligned16 ]
+; CHECK-NEXT:  call void asm sideeffect "", "~{memory}"()
+; CHECK-NEXT:  ret i16 %1
+;
+; CHECK: atomic16aligned32:
+; CHECK-NEXT:  %loaded = load atomic i32* %ptr32 seq_cst, align 4
+; CHECK-NEXT:  %truncval = trunc i32 %loaded to i16
+; CHECK-NEXT:  %res = add i16 %truncval, %value
+; CHECK-NEXT:  %mergeres = zext i16 %res to i32
+; CHECK-NEXT:  %maskedloaded = and i32 %loaded, -65536
+; CHECK-NEXT:  %finalres = or i32 %mergeres, %maskedloaded
+; CHECK-NEXT:  %oldval = cmpxchg i32* %ptr32, i32 %loaded, i32 %finalres seq_cst
+; CHECK-NEXT:  %success = icmp eq i32 %oldval, %loaded
+; CHECK-NEXT:  br i1 %success, label %atomic16successor, label %atomic16aligned32
+;
+; CHECK: atomic16aligned16:
+; CHECK-NEXT:  %loaded1 = load atomic i32* %ptr32 seq_cst, align 4
+; CHECK-NEXT:  %lshr = lshr i32 %loaded1, 16
+; CHECK-NEXT:  %shval = trunc i32 %lshr to i16
+; CHECK-NEXT:  %res2 = add i16 %shval, %value
+; CHECK-NEXT:  %zext = zext i16 %res2 to i32
+; CHECK-NEXT:  %mergeres3 = shl i32 %zext, 16
+; CHECK-NEXT:  %maskedloaded4 = and i32 %loaded1, 65535
+; CHECK-NEXT:  %finalres5 = or i32 %mergeres3, %maskedloaded4
+; CHECK-NEXT:  %oldval6 = cmpxchg i32* %ptr32, i32 %loaded1, i32 %finalres5 seq_cst
+; CHECK-NEXT:  %success7 = icmp eq i32 %oldval6, %loaded1
+; CHECK-NEXT:  br i1 %success7, label %atomic16successor, label %atomic16aligned16
+  %1 = call i16 @llvm.nacl.atomic.rmw.i16(i32 1, i16* %ptr, i16 %value, i32 6)
+  ret i16 %1
+}
+
+; CHECK: @test_fetch_and_sub_i16
+define i16 @test_fetch_and_sub_i16(i16* %ptr, i16 %value) {
+  ; CHECK:   %res = sub i16 %truncval, %value
+  ; CHECK:   %res2 = sub i16 %shval, %value
+  %1 = call i16 @llvm.nacl.atomic.rmw.i16(i32 2, i16* %ptr, i16 %value, i32 6)
+  ret i16 %1
+}
+
+; CHECK: @test_fetch_and_or_i16
+define i16 @test_fetch_and_or_i16(i16* %ptr, i16 %value) {
+  ; CHECK:   %res = or i16 %truncval, %value
+  ; CHECK:   %res2 = or i16 %shval, %value
+  %1 = call i16 @llvm.nacl.atomic.rmw.i16(i32 3, i16* %ptr, i16 %value, i32 6)
+  ret i16 %1
+}
+
+; CHECK: @test_fetch_and_and_i16
+define i16 @test_fetch_and_and_i16(i16* %ptr, i16 %value) {
+  ; CHECK:   %res = and i16 %truncval, %value
+  ; CHECK:   %res2 = and i16 %shval, %value
+  %1 = call i16 @llvm.nacl.atomic.rmw.i16(i32 4, i16* %ptr, i16 %value, i32 6)
+  ret i16 %1
+}
+
+; CHECK: @test_fetch_and_xor_i16
+define i16 @test_fetch_and_xor_i16(i16* %ptr, i16 %value) {
+  ; CHECK:   %res = xor i16 %truncval, %value
+  ; CHECK:   %res2 = xor i16 %shval, %value
+  %1 = call i16 @llvm.nacl.atomic.rmw.i16(i32 5, i16* %ptr, i16 %value, i32 6)
+  ret i16 %1
+}
+
+; CHECK: @test_val_compare_and_swap_i16
+define i16 @test_val_compare_and_swap_i16(i16* %ptr, i16 %oldval, i16 %newval) {
+; CHECK-NEXT:  call void asm sideeffect "", "~{memory}"()
+; CHECK-NEXT:  %uintptr = ptrtoint i16* %ptr to i32
+; CHECK-NEXT:  %aligneduintptr = and i32 %uintptr, -4
+; CHECK-NEXT:  %aligned32 = and i32 %uintptr, 3
+; CHECK-NEXT:  %ptr32 = inttoptr i32 %aligneduintptr to i32*
+; CHECK-NEXT:  %isaligned32 = icmp eq i32 %aligned32, 0
+; CHECK-NEXT:  br i1 %isaligned32, label %atomic16aligned32, label %atomic16aligned16
+;
+; CHECK: atomic16successor:
+; CHECK-NEXT:  %1 = phi i16 [ %truncval, %atomic16aligned32 ], [ %shval, %atomic16aligned16 ]
+; CHECK-NEXT:  call void asm sideeffect "", "~{memory}"()
+; CHECK-NEXT:  ret i16 %1
+;
+; CHECK: atomic16aligned32:
+; CHECK-NEXT:  %loaded = load atomic i32* %ptr32 seq_cst, align 4
+; CHECK-NEXT:  %truncval = trunc i32 %loaded to i16
+; CHECK-NEXT:  %mergeres = zext i16 %newval to i32
+; CHECK-NEXT:  %maskedloaded = and i32 %loaded, -65536
+; CHECK-NEXT:  %finalres = or i32 %mergeres, %maskedloaded
+; CHECK-NEXT:  %zext = zext i16 %oldval to i32
+; CHECK-NEXT:  %expected = or i32 %maskedloaded, %zext
+; CHECK-NEXT:  %oldval1 = cmpxchg i32* %ptr32, i32 %expected, i32 %finalres seq_cst
+; CHECK-NEXT:  %success = icmp eq i32 %oldval1, %loaded
+; CHECK-NEXT:  br i1 %success, label %atomic16successor, label %atomic16aligned32
+;
+; CHECK: atomic16aligned16:
+; CHECK-NEXT:  %loaded2 = load atomic i32* %ptr32 seq_cst, align 4
+; CHECK-NEXT:  %lshr = lshr i32 %loaded2, 16
+; CHECK-NEXT:  %shval = trunc i32 %lshr to i16
+; CHECK-NEXT:  %zext3 = zext i16 %newval to i32
+; CHECK-NEXT:  %mergeres4 = shl i32 %zext3, 16
+; CHECK-NEXT:  %maskedloaded5 = and i32 %loaded2, 65535
+; CHECK-NEXT:  %finalres6 = or i32 %mergeres4, %maskedloaded5
+; CHECK-NEXT:  %zext7 = zext i16 %oldval to i32
+; CHECK-NEXT:  %shl = shl i32 %zext7, 16
+; CHECK-NEXT:  %expected8 = or i32 %maskedloaded5, %shl
+; CHECK-NEXT:  %oldval9 = cmpxchg i32* %ptr32, i32 %expected8, i32 %finalres6 seq_cst
+; CHECK-NEXT:  %success10 = icmp eq i32 %oldval9, %loaded2
+; CHECK-NEXT:  br i1 %success10, label %atomic16successor, label %atomic16aligned16
+ %1 = call i16 @llvm.nacl.atomic.cmpxchg.i16(i16* %ptr, i16 %oldval, i16 %newval, i32 6, i32 6)
+  ret i16 %1
+}
diff --git a/test/Transforms/NaCl/resolve-pnacl-intrinsics.ll b/test/Transforms/NaCl/resolve-pnacl-intrinsics.ll
new file mode 100644
index 000000000000..0a297057fe26
--- /dev/null
+++ b/test/Transforms/NaCl/resolve-pnacl-intrinsics.ll
@@ -0,0 +1,213 @@
+; RUN: opt < %s -resolve-pnacl-intrinsics -S | FileCheck %s -check-prefix=CLEANED
+; RUN: opt < %s -resolve-pnacl-intrinsics -S | FileCheck %s
+
+; CLEANED-NOT: call i32 @llvm.nacl.setjmp
+; CLEANED-NOT: call void @llvm.nacl.longjmp
+; CLEANED-NOT: call {{.*}} @llvm.nacl.atomic
+
+declare i32 @llvm.nacl.setjmp(i8*)
+declare void @llvm.nacl.longjmp(i8*, i32)
+
+; Intrinsic name mangling is based on overloaded parameters only,
+; including return type. Note that all pointers parameters are
+; overloaded on type-pointed-to in Intrinsics.td, and are therefore
+; mangled on the type-pointed-to only.
+declare i8 @llvm.nacl.atomic.load.i8(i8*, i32)
+declare i16 @llvm.nacl.atomic.load.i16(i16*, i32)
+declare i32 @llvm.nacl.atomic.load.i32(i32*, i32)
+declare i64 @llvm.nacl.atomic.load.i64(i64*, i32)
+declare void @llvm.nacl.atomic.store.i8(i8, i8*, i32)
+declare void @llvm.nacl.atomic.store.i16(i16, i16*, i32)
+declare void @llvm.nacl.atomic.store.i32(i32, i32*, i32)
+declare void @llvm.nacl.atomic.store.i64(i64, i64*, i32)
+declare i8 @llvm.nacl.atomic.rmw.i8(i32, i8*, i8, i32)
+declare i16 @llvm.nacl.atomic.rmw.i16(i32, i16*, i16, i32)
+declare i32 @llvm.nacl.atomic.rmw.i32(i32, i32*, i32, i32)
+declare i64 @llvm.nacl.atomic.rmw.i64(i32, i64*, i64, i32)
+declare i8 @llvm.nacl.atomic.cmpxchg.i8(i8*, i8, i8, i32, i32)
+declare i16 @llvm.nacl.atomic.cmpxchg.i16(i16*, i16, i16, i32, i32)
+declare i32 @llvm.nacl.atomic.cmpxchg.i32(i32*, i32, i32, i32, i32)
+declare i64 @llvm.nacl.atomic.cmpxchg.i64(i64*, i64, i64, i32, i32)
+declare void @llvm.nacl.atomic.fence(i32)
+declare void @llvm.nacl.atomic.fence.all()
+declare i1 @llvm.nacl.atomic.is.lock.free(i32, i8*)
+
+; These declarations must be here because the function pass expects
+; to find them. In real life they're inserted by the translator
+; before the function pass runs.
+declare i32 @setjmp(i8*)
+declare void @longjmp(i8*, i32)
+
+define i32 @call_setjmp(i8* %arg) {
+  %val = call i32 @llvm.nacl.setjmp(i8* %arg)
+; CHECK: %val = call i32 @setjmp(i8* %arg)
+  ret i32 %val
+}
+
+define void @call_longjmp(i8* %arg, i32 %num) {
+  call void @llvm.nacl.longjmp(i8* %arg, i32 %num)
+; CHECK: call void @longjmp(i8* %arg, i32 %num)
+  ret void
+}
+
+; atomics.
+
+; CHECK: @test_fetch_and_add_i32
+define i32 @test_fetch_and_add_i32(i32* %ptr, i32 %value) {
+  ; CHECK: %1 = atomicrmw add i32* %ptr, i32 %value seq_cst
+  %1 = call i32 @llvm.nacl.atomic.rmw.i32(i32 1, i32* %ptr, i32 %value, i32 6)
+  ret i32 %1
+}
+
+; CHECK: @test_fetch_and_sub_i32
+define i32 @test_fetch_and_sub_i32(i32* %ptr, i32 %value) {
+  ; CHECK: %1 = atomicrmw sub i32* %ptr, i32 %value seq_cst
+  %1 = call i32 @llvm.nacl.atomic.rmw.i32(i32 2, i32* %ptr, i32 %value, i32 6)
+  ret i32 %1
+}
+
+; CHECK: @test_fetch_and_or_i32
+define i32 @test_fetch_and_or_i32(i32* %ptr, i32 %value) {
+  ; CHECK: %1 = atomicrmw or i32* %ptr, i32 %value seq_cst
+  %1 = call i32 @llvm.nacl.atomic.rmw.i32(i32 3, i32* %ptr, i32 %value, i32 6)
+  ret i32 %1
+}
+
+; CHECK: @test_fetch_and_and_i32
+define i32 @test_fetch_and_and_i32(i32* %ptr, i32 %value) {
+  ; CHECK: %1 = atomicrmw and i32* %ptr, i32 %value seq_cst
+  %1 = call i32 @llvm.nacl.atomic.rmw.i32(i32 4, i32* %ptr, i32 %value, i32 6)
+  ret i32 %1
+}
+
+; CHECK: @test_fetch_and_xor_i32
+define i32 @test_fetch_and_xor_i32(i32* %ptr, i32 %value) {
+  ; CHECK: %1 = atomicrmw xor i32* %ptr, i32 %value seq_cst
+  %1 = call i32 @llvm.nacl.atomic.rmw.i32(i32 5, i32* %ptr, i32 %value, i32 6)
+  ret i32 %1
+}
+
+; CHECK: @test_val_compare_and_swap_i32
+define i32 @test_val_compare_and_swap_i32(i32* %ptr, i32 %oldval, i32 %newval) {
+  ; CHECK: %1 = cmpxchg i32* %ptr, i32 %oldval, i32 %newval seq_cst
+  %1 = call i32 @llvm.nacl.atomic.cmpxchg.i32(i32* %ptr, i32 %oldval, i32 %newval, i32 6, i32 6)
+  ret i32 %1
+}
+
+; CHECK: @test_c11_fence
+define void @test_c11_fence() {
+  ; CHECK: fence seq_cst
+  call void @llvm.nacl.atomic.fence(i32 6)
+  ret void
+}
+
+; CHECK: @test_synchronize
+define void @test_synchronize() {
+  ; CHECK: call void asm sideeffect "", "~{memory}"()
+  ; CHECK: fence seq_cst
+  ; CHECK: call void asm sideeffect "", "~{memory}"()
+  call void @llvm.nacl.atomic.fence.all()
+  ret void
+}
+
+; CHECK: @test_is_lock_free_1
+define i1 @test_is_lock_free_1(i8* %ptr) {
+  ; CHECK: ret i1 {{true|false}}
+  %res = call i1 @llvm.nacl.atomic.is.lock.free(i32 1, i8* %ptr)
+  ret i1 %res
+}
+
+; CHECK: @test_is_lock_free_2
+define i1 @test_is_lock_free_2(i16* %ptr) {
+  ; CHECK: ret i1 {{true|false}}
+  %ptr2 = bitcast i16* %ptr to i8*
+  %res = call i1 @llvm.nacl.atomic.is.lock.free(i32 2, i8* %ptr2)
+  ret i1 %res
+}
+
+; CHECK: @test_is_lock_free_4
+define i1 @test_is_lock_free_4(i32* %ptr) {
+  ; CHECK: ret i1 {{true|false}}
+  %ptr2 = bitcast i32* %ptr to i8*
+  %res = call i1 @llvm.nacl.atomic.is.lock.free(i32 4, i8* %ptr2)
+  ret i1 %res
+}
+
+; CHECK: @test_is_lock_free_8
+define i1 @test_is_lock_free_8(i64* %ptr) {
+  ; CHECK: ret i1 {{true|false}}
+  %ptr2 = bitcast i64* %ptr to i8*
+  %res = call i1 @llvm.nacl.atomic.is.lock.free(i32 8, i8* %ptr2)
+  ret i1 %res
+}
+
+; CHECK: @test_lock_test_and_set_i32
+define i32 @test_lock_test_and_set_i32(i32* %ptr, i32 %value) {
+  ; CHECK: %1 = atomicrmw xchg i32* %ptr, i32 %value seq_cst
+  %1 = call i32 @llvm.nacl.atomic.rmw.i32(i32 6, i32* %ptr, i32 %value, i32 6)
+  ret i32 %1
+}
+
+; CHECK: @test_lock_release_i32
+define void @test_lock_release_i32(i32* %ptr) {
+  ; Note that the 'release' was changed to a 'seq_cst'.
+  ; CHECK: store atomic i32 0, i32* %ptr seq_cst, align 4
+  call void @llvm.nacl.atomic.store.i32(i32 0, i32* %ptr, i32 6)
+  ret void
+}
+
+; CHECK: @test_atomic_load_i8
+define zeroext i8 @test_atomic_load_i8(i8* %ptr) {
+  ; CHECK: %1 = load atomic i8* %ptr seq_cst, align 1
+  %1 = call i8 @llvm.nacl.atomic.load.i8(i8* %ptr, i32 6)
+  ret i8 %1
+}
+
+; CHECK: @test_atomic_store_i8
+define void @test_atomic_store_i8(i8* %ptr, i8 zeroext %value) {
+  ; CHECK: store atomic i8 %value, i8* %ptr seq_cst, align 1
+  call void @llvm.nacl.atomic.store.i8(i8 %value, i8* %ptr, i32 6)
+  ret void
+}
+
+; CHECK: @test_atomic_load_i16
+define zeroext i16 @test_atomic_load_i16(i16* %ptr) {
+  ; CHECK: %1 = load atomic i16* %ptr seq_cst, align 2
+  %1 = call i16 @llvm.nacl.atomic.load.i16(i16* %ptr, i32 6)
+  ret i16 %1
+}
+
+; CHECK: @test_atomic_store_i16
+define void @test_atomic_store_i16(i16* %ptr, i16 zeroext %value) {
+  ; CHECK: store atomic i16 %value, i16* %ptr seq_cst, align 2
+  call void @llvm.nacl.atomic.store.i16(i16 %value, i16* %ptr, i32 6)
+  ret void
+}
+
+; CHECK: @test_atomic_load_i32
+define i32 @test_atomic_load_i32(i32* %ptr) {
+  ; CHECK: %1 = load atomic i32* %ptr seq_cst, align 4
+  %1 = call i32 @llvm.nacl.atomic.load.i32(i32* %ptr, i32 6)
+  ret i32 %1
+}
+
+; CHECK: @test_atomic_store_i32
+define void @test_atomic_store_i32(i32* %ptr, i32 %value) {
+  ; CHECK: store atomic i32 %value, i32* %ptr seq_cst, align 4
+  call void @llvm.nacl.atomic.store.i32(i32 %value, i32* %ptr, i32 6)
+  ret void
+}
+
+; CHECK: @test_atomic_load_i64
+define i64 @test_atomic_load_i64(i64* %ptr) {
+  ; CHECK: %1 = load atomic i64* %ptr seq_cst, align 8
+  %1 = call i64 @llvm.nacl.atomic.load.i64(i64* %ptr, i32 6)
+  ret i64 %1
+}
+
+; CHECK: @test_atomic_store_i64
+define void @test_atomic_store_i64(i64* %ptr, i64 %value) {
+  ; CHECK: store atomic i64 %value, i64* %ptr seq_cst, align 8
+  call void @llvm.nacl.atomic.store.i64(i64 %value, i64* %ptr, i32 6)
+  ret void
+}
diff --git a/test/Transforms/NaCl/rewrite-call-with-libfunc-argument.ll b/test/Transforms/NaCl/rewrite-call-with-libfunc-argument.ll
new file mode 100644
index 000000000000..56ee2d2c078e
--- /dev/null
+++ b/test/Transforms/NaCl/rewrite-call-with-libfunc-argument.ll
@@ -0,0 +1,18 @@
+; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s
+
+; See https://code.google.com/p/nativeclient/issues/detail?id=3706
+; Make sure that when @longjmp is used as an argument in a call instruction,
+; the rewrite pass does the right thing and doesn't get confused.
+
+; CHECK: define internal void @longjmp(i64* %env, i32 %val) {
+
+declare void @longjmp(i64*, i32)
+
+declare void @somefunc(i32, void (i64*, i32)*, i32)
+
+define void @foo() {
+entry:
+  call void @somefunc(i32 1, void (i64*, i32)* @longjmp, i32 2)
+; CHECK: call void @somefunc(i32 1, void (i64*, i32)* @longjmp, i32 2)
+  ret void
+}
diff --git a/test/Transforms/NaCl/rewrite-flt-rounds.ll b/test/Transforms/NaCl/rewrite-flt-rounds.ll
new file mode 100644
index 000000000000..cb1a7e4a9924
--- /dev/null
+++ b/test/Transforms/NaCl/rewrite-flt-rounds.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -rewrite-llvm-intrinsic-calls -S | FileCheck %s
+; RUN: opt < %s -rewrite-llvm-intrinsic-calls -S | FileCheck %s -check-prefix=CLEANED
+; Test the @llvm.flt.rounds part of the RewriteLLVMIntrinsics pass
+
+declare i32 @llvm.flt.rounds()
+
+; No declaration or definition of llvm.flt.rounds() should remain.
+; CLEANED-NOT: @llvm.flt.rounds
+
+define i32 @call_flt_rounds() {
+; CHECK: call_flt_rounds
+; CHECK-NEXT: ret i32 1
+  %val = call i32 @llvm.flt.rounds()
+  ret i32 %val
+}
+
+; A more complex example with a number of calls in several BBs.
+define i32 @multiple_calls(i64* %arg, i32 %num) {
+; CHECK: multiple_calls
+entryblock:
+; CHECK: entryblock
+  %v1 = call i32 @llvm.flt.rounds()
+  br label %block1
+block1:
+; CHECK: block1:
+; CHECK-NEXT: %v3 = add i32 1, 1
+  %v2 = call i32 @llvm.flt.rounds()
+  %v3 = add i32 %v2, %v1
+  br label %exitblock
+exitblock:
+; CHECK: exitblock:
+; CHECK-NEXT: %v4 = add i32 1, %v3
+; CHECK-NEXT: %v6 = add i32 1, %v4
+  %v4 = add i32 %v2, %v3
+  %v5 = call i32 @llvm.flt.rounds()
+  %v6 = add i32 %v5, %v4
+  ret i32 %v6
+}
diff --git a/test/Transforms/NaCl/rewrite-libcalls-wrong-signature.ll b/test/Transforms/NaCl/rewrite-libcalls-wrong-signature.ll
new file mode 100644
index 000000000000..3ab64d9dd26e
--- /dev/null
+++ b/test/Transforms/NaCl/rewrite-libcalls-wrong-signature.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s
+; Check how the pass behaves in the presence of library functions with wrong
+; signatures.
+
+declare i8 @longjmp(i64)
+
+@flongjmp = global i8 (i64)* @longjmp
+; CHECK: @flongjmp = global i8 (i64)* bitcast (void (i64*, i32)* @longjmp to i8 (i64)*)
+
+; CHECK: define internal void @longjmp(i64* %env, i32 %val)
+
+declare i8* @memcpy(i32)
+
+define i8* @call_bad_memcpy(i32 %arg) {
+  %result = call i8* @memcpy(i32 %arg)
+  ret i8* %result
+}
+
+; CHECK: define i8* @call_bad_memcpy(i32 %arg) {
+; CHECK:   %result = call i8* bitcast (i8* (i8*, i8*, i32)* @memcpy to i8* (i32)*)(i32 %arg)
+
+declare i8 @setjmp()
+
+; This simulates a case where the original C file had a correct setjmp
+; call but due to linking order a wrong declaration made it into the
+; IR. In this case, the correct call is bitcasted to the correct type.
+; The pass should treat this properly by creating a direct intrinsic
+; call instead of going through the wrapper.
+define i32 @call_valid_setjmp(i64* %buf) {
+  %result = call i32 bitcast (i8 ()* @setjmp to i32 (i64*)*)(i64* %buf)
+  ret i32 %result
+}
+
+; CHECK:      define i32 @call_valid_setjmp(i64* %buf) {
+; CHECK-NEXT:   %jmp_buf_i8 = bitcast i64* %buf to i8*
+; CHECK-NEXT:   %result = call i32 @llvm.nacl.setjmp(i8* %jmp_buf_i8)
+; CHECK-NEXT:   ret i32 %result
+; CHECK-NEXT: }
diff --git a/test/Transforms/NaCl/rewrite-longjmp-no-store.ll b/test/Transforms/NaCl/rewrite-longjmp-no-store.ll
new file mode 100644
index 000000000000..134593ad3971
--- /dev/null
+++ b/test/Transforms/NaCl/rewrite-longjmp-no-store.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s
+; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s -check-prefix=CLEANED
+; Test that when there are no uses other than calls to longjmp,
+; no function body is generated.
+
+declare void @longjmp(i64*, i32)
+
+; No declaration or definition of longjmp() should remain.
+; CLEANED-NOT: @longjmp
+
+define void @call_longjmp(i64* %arg, i32 %num) {
+  call void @longjmp(i64* %arg, i32 %num)
+; CHECK: call void @llvm.nacl.longjmp(i8* %jmp_buf_i8, i32 %num)
+  ret void
+}
+
diff --git a/test/Transforms/NaCl/rewrite-longjmp-noncall-uses.ll b/test/Transforms/NaCl/rewrite-longjmp-noncall-uses.ll
new file mode 100644
index 000000000000..ed7818ec9688
--- /dev/null
+++ b/test/Transforms/NaCl/rewrite-longjmp-noncall-uses.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s
+; Check that the rewrite pass behaves correctly in the presence 
+; of various uses of longjmp that are not calls.
+
+@fp = global void (i64*, i32)* @longjmp, align 8
+; CHECK: @fp = global void (i64*, i32)* @longjmp, align 8
+@arrfp = global [3 x void (i64*, i32)*] [void (i64*, i32)* null, void (i64*, i32)* @longjmp, void (i64*, i32)* null], align 16
+; CHECK: @arrfp = global [3 x void (i64*, i32)*] [void (i64*, i32)* null, void (i64*, i32)* @longjmp, void (i64*, i32)* null], align 16
+
+; CHECK: define internal void @longjmp(i64* %env, i32 %val) {
+
+declare void @longjmp(i64*, i32)
+
+declare void @somefunc(i8*)
+
+define void @foo() {
+entry:
+  call void @somefunc(i8* bitcast (void (i64*, i32)* @longjmp to i8*))
+; CHECK: call void @somefunc(i8* bitcast (void (i64*, i32)* @longjmp to i8*))
+  ret void
+}
diff --git a/test/Transforms/NaCl/rewrite-memfuncs-no-store.ll b/test/Transforms/NaCl/rewrite-memfuncs-no-store.ll
new file mode 100644
index 000000000000..3b57950c1ca3
--- /dev/null
+++ b/test/Transforms/NaCl/rewrite-memfuncs-no-store.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s
+; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s -check-prefix=CLEANED
+
+declare i8* @memcpy(i8*, i8*, i32)
+declare i8* @memmove(i8*, i8*, i32)
+declare i8* @memset(i8*, i32, i32)
+
+; No declaration or definition of the library functions should remain, since
+; the only uses of mem* functions are calls.
+; CLEANED-NOT: @memcpy
+; CLEANED-NOT: @memmove
+; CLEANED-NOT: @memset
+
+define i8* @call_memcpy(i8* %dest, i8* %src, i32 %len) {
+  %result = call i8* @memcpy(i8* %dest, i8* %src, i32 %len)
+  ret i8* %result
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 false)
+; CHECK: ret i8* %dest
+}
+
+define i8* @call_memmove(i8* %dest, i8* %src, i32 %len) {
+  %result = call i8* @memmove(i8* %dest, i8* %src, i32 %len)
+  ret i8* %result
+; CHECK: call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 false)
+; CHECK: ret i8* %dest
+}
+
+define i8* @call_memset(i8* %dest, i32 %c, i32 %len) {
+  %result = call i8* @memset(i8* %dest, i32 %c, i32 %len)
+  ret i8* %result
+; CHECK: %trunc_byte = trunc i32 %c to i8
+; CHECK: call void @llvm.memset.p0i8.i32(i8* %dest, i8 %trunc_byte, i32 %len, i32 1, i1 false)
+; CHECK: ret i8* %dest
+}
+
+; CHECK: declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
+
+; CHECK: declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
+
+; CHECK: declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
diff --git a/test/Transforms/NaCl/rewrite-memfuncs-noncall-uses.ll b/test/Transforms/NaCl/rewrite-memfuncs-noncall-uses.ll
new file mode 100644
index 000000000000..5c6bdfdcb596
--- /dev/null
+++ b/test/Transforms/NaCl/rewrite-memfuncs-noncall-uses.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s
+; Check that the rewrite pass behaves correctly in the presence 
+; of various uses of mem* that are not calls.
+
+@fpcpy = global i8* (i8*, i8*, i32)* @memcpy
+; CHECK: @fpcpy = global i8* (i8*, i8*, i32)* @memcpy
+@fpmove = global i8* (i8*, i8*, i32)* @memmove
+; CHECK: @fpmove = global i8* (i8*, i8*, i32)* @memmove
+@fpset = global i8* (i8*, i32, i32)* @memset
+; CHECK: @fpset = global i8* (i8*, i32, i32)* @memset
+
+; CHECK: define internal i8* @memcpy(i8* %dest, i8* %src, i32 %len) {
+; CHECK:   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 false)
+; CHECK:   ret i8* %dest
+; CHECK: }
+
+; CHECK: define internal i8* @memmove(i8* %dest, i8* %src, i32 %len) {
+; CHECK:   call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 false)
+; CHECK:   ret i8* %dest
+; CHECK: }
+
+; CHECK: define internal i8* @memset(i8* %dest, i32 %val, i32 %len) {
+; CHECK:   %trunc_byte = trunc i32 %val to i8
+; CHECK:   call void @llvm.memset.p0i8.i32(i8* %dest, i8 %trunc_byte, i32 %len, i32 1, i1 false)
+; CHECK:   ret i8* %dest
+; CHECK: }
+
+declare i8* @memcpy(i8*, i8*, i32)
+declare i8* @memmove(i8*, i8*, i32)
+declare i8* @memset(i8*, i32, i32)
diff --git a/test/Transforms/NaCl/rewrite-prefetch.ll b/test/Transforms/NaCl/rewrite-prefetch.ll
new file mode 100644
index 000000000000..d498c2012ef0
--- /dev/null
+++ b/test/Transforms/NaCl/rewrite-prefetch.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -rewrite-llvm-intrinsic-calls -S | FileCheck %s
+; RUN: opt < %s -rewrite-llvm-intrinsic-calls -S | FileCheck %s -check-prefix=CLEANED
+; Test the @llvm.prefetch part of the RewriteLLVMIntrinsics pass
+
+declare void @llvm.prefetch(i8 *%ptr, i32 %rw, i32 %locality, i32 %cache_type)
+
+; No declaration or definition of llvm.prefetch() should remain.
+; CLEANED-NOT: @llvm.prefetch
+
+define void @call_prefetch(i8 *%ptr, i32 %rw, i32 %locality, i32 %cache_type) {
+; CHECK: call_prefetch
+; CHECK-NEXT: ret void
+  call void @llvm.prefetch(i8 *%ptr, i32 %rw, i32 %locality, i32 %cache_type)
+  ret void
+}
+
+; A more complex example with a number of calls in several BBs.
+define void @multiple_calls(i8 *%ptr, i32 %rw, i32 %locality, i32 %cache_type) {
+; CHECK: multiple_calls
+entryblock:
+; CHECK: entryblock
+; CHECK-NEXT: br
+  call void @llvm.prefetch(i8 *%ptr, i32 %rw, i32 %locality, i32 %cache_type)
+  br label %block1
+block1:
+; CHECK: block1:
+; CHECK-NEXT: br
+  call void @llvm.prefetch(i8 *%ptr, i32 %rw, i32 %locality, i32 %cache_type)
+  br label %exitblock
+exitblock:
+; CHECK: exitblock:
+; CHECK-NEXT: ret void
+  call void @llvm.prefetch(i8 *%ptr, i32 %rw, i32 %locality, i32 %cache_type)
+  ret void
+}
diff --git a/test/Transforms/NaCl/rewrite-setjmp-store-error.ll b/test/Transforms/NaCl/rewrite-setjmp-store-error.ll
new file mode 100644
index 000000000000..5919711b5dd3
--- /dev/null
+++ b/test/Transforms/NaCl/rewrite-setjmp-store-error.ll
@@ -0,0 +1,14 @@
+; RUN: opt < %s -rewrite-pnacl-library-calls -S 2>&1 | FileCheck %s
+; XFAIL: *
+; Test that the pass enforces not being able to store the address
+; of setjmp.
+
+declare i32 @setjmp(i64*)
+
+define i32 @takeaddr_setjmp(i64* %arg) {
+  %fp = alloca i32 (i64*)*, align 8
+; CHECK: Taking the address of setjmp is invalid
+  store i32 (i64*)* @setjmp, i32 (i64*)** %fp, align 8
+  ret i32 7
+}
+
diff --git a/test/Transforms/NaCl/rewrite-setlongjmp-calls.ll b/test/Transforms/NaCl/rewrite-setlongjmp-calls.ll
new file mode 100644
index 000000000000..f34f004d7f39
--- /dev/null
+++ b/test/Transforms/NaCl/rewrite-setlongjmp-calls.ll
@@ -0,0 +1,76 @@
+; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s
+; RUN: opt < %s -rewrite-pnacl-library-calls -S | FileCheck %s -check-prefix=CLEANED
+; Test the RewritePNaClLibraryCalls pass
+
+declare i32 @setjmp(i64*)
+declare void @longjmp(i64*, i32)
+
+; No declaration or definition of setjmp() should remain.
+; CLEANED-NOT: @setjmp
+
+; Since the address of longjmp is being taken here, a body is generated
+; for it, which does a cast and calls an intrinsic
+
+; CHECK: define internal void @longjmp(i64* %env, i32 %val) {
+; CHECK: entry:
+; CHECK:   %jmp_buf_i8 = bitcast i64* %env to i8*
+; CHECK:   call void @llvm.nacl.longjmp(i8* %jmp_buf_i8, i32 %val)
+; CHECK:   unreachable
+; CHECK: }
+
+define i32 @call_setjmp(i64* %arg) {
+; CHECK-NOT: call i32 @setjmp
+; CHECK: %jmp_buf_i8 = bitcast i64* %arg to i8*
+; CHECK-NEXT: %val = call i32 @llvm.nacl.setjmp(i8* %jmp_buf_i8)
+  %val = call i32 @setjmp(i64* %arg)
+  ret i32 %val
+}
+
+define void @call_longjmp(i64* %arg, i32 %num) {
+; CHECK-NOT: call void @longjmp
+; CHECK: %jmp_buf_i8 = bitcast i64* %arg to i8*
+; CHECK-NEXT: call void @llvm.nacl.longjmp(i8* %jmp_buf_i8, i32 %num)
+  call void @longjmp(i64* %arg, i32 %num)
+  ret void
+}
+
+define i32 @takeaddr_longjmp(i64* %arg, i32 %num) {
+  %fp = alloca void (i64*, i32)*, align 8
+; CHECK: store void (i64*, i32)* @longjmp, void (i64*, i32)** %fp
+  store void (i64*, i32)* @longjmp, void (i64*, i32)** %fp, align 8
+  ret i32 7
+}
+
+; A more complex example with a number of calls in several BBs
+define void @multiple_calls(i64* %arg, i32 %num) {
+entryblock:
+; CHECK: entryblock
+; CHECK: bitcast
+; CHECK-NEXT: call void @llvm.nacl.longjmp
+  call void @longjmp(i64* %arg, i32 %num)
+  br label %block1
+block1:
+; CHECK: block1
+; CHECK: bitcast
+; CHECK-NEXT: call void @llvm.nacl.longjmp
+  call void @longjmp(i64* %arg, i32 %num)
+; CHECK: call i32 @llvm.nacl.setjmp
+  %val = call i32 @setjmp(i64* %arg)
+  %num2 = add i32 %val, %num
+; CHECK: bitcast
+; CHECK-NEXT: call void @llvm.nacl.longjmp
+  call void @longjmp(i64* %arg, i32 %num2)
+  br label %exitblock
+exitblock:
+  %num3 = add i32 %num, %num
+  call void @longjmp(i64* %arg, i32 %num3)
+  %num4 = add i32 %num, %num3
+; CHECK: bitcast
+; CHECK-NEXT: call void @llvm.nacl.longjmp
+  call void @longjmp(i64* %arg, i32 %num4)
+  ret void
+}
+
+; CHECK: declare i32 @llvm.nacl.setjmp(i8*)
+; CHECK: declare void @llvm.nacl.longjmp(i8*, i32)
+
diff --git a/test/Transforms/NaCl/strip-attributes.ll b/test/Transforms/NaCl/strip-attributes.ll
new file mode 100644
index 000000000000..b0ef4f4cb68a
--- /dev/null
+++ b/test/Transforms/NaCl/strip-attributes.ll
@@ -0,0 +1,128 @@
+; RUN: opt -S -nacl-strip-attributes < %s | FileCheck %s
+
+
+@var = unnamed_addr global i32 0
+; CHECK: @var = global i32 0
+
+
+define fastcc void @func_attrs(i32 inreg, i32 zeroext)
+    unnamed_addr noreturn nounwind readonly align 8 {
+  ret void
+}
+; CHECK-LABEL: define void @func_attrs(i32, i32) {
+
+define hidden void @hidden_visibility() {
+  ret void
+}
+; CHECK-LABEL: define void @hidden_visibility() {
+
+define protected void @protected_visibility() {
+  ret void
+}
+; CHECK-LABEL: define void @protected_visibility() {
+
+
+define void @call_attrs() {
+  call fastcc void @func_attrs(i32 inreg 10, i32 zeroext 20) noreturn nounwind readonly
+  ret void
+}
+; CHECK-LABEL: define void @call_attrs()
+; CHECK: call void @func_attrs(i32 10, i32 20){{$}}
+
+
+; We currently don't attempt to strip attributes from intrinsic
+; declarations because the reader automatically inserts attributes
+; based on built-in knowledge of intrinsics, so it is difficult to get
+; rid of them here.
+; Actually, we have to remove these attrs. If we don't, the IR Rust passes to pnacl-clang
+; will have unknown attributes.
+declare i8* @llvm.nacl.read.tp()
+; CHECK: declare i8* @llvm.nacl.read.tp()
+
+define void @arithmetic_attrs() {
+  %add = add nsw i32 1, 2
+  %shl = shl nuw i32 3, 4
+  %lshr = lshr exact i32 2, 1
+  ret void
+}
+; CHECK-LABEL: define void @arithmetic_attrs() {
+; CHECK-NEXT: %add = add i32 1, 2
+; CHECK-NEXT: %shl = shl i32 3, 4
+; CHECK-NEXT: %lshr = lshr i32 2, 1
+
+
+; Implicit default alignments are changed to explicit alignments.
+define void @default_alignment_attrs(float %f, double %d) {
+  load i8* null
+  load i32* null
+  load float* null
+  load double* null
+
+  store i8 100, i8* null
+  store i32 100, i32* null
+  store float %f, float* null
+  store double %d, double* null
+  ret void
+}
+; CHECK-LABEL: define void @default_alignment_attrs
+; CHECK-NEXT: load i8* null, align 1
+; CHECK-NEXT: load i32* null, align 1
+; CHECK-NEXT: load float* null, align 4
+; CHECK-NEXT: load double* null, align 8
+; CHECK-NEXT: store i8 100, i8* null, align 1
+; CHECK-NEXT: store i32 100, i32* null, align 1
+; CHECK-NEXT: store float %f, float* null, align 4
+; CHECK-NEXT: store double %d, double* null, align 8
+
+define void @reduce_alignment_assumptions() {
+  load i32* null, align 4
+  load float* null, align 2
+  load float* null, align 4
+  load float* null, align 8
+  load double* null, align 2
+  load double* null, align 8
+  load double* null, align 16
+
+  ; Higher alignment assumptions must be retained for atomics.
+  load atomic i32* null seq_cst, align 4
+  load atomic i32* null seq_cst, align 8
+  store atomic i32 100, i32* null seq_cst, align 4
+  store atomic i32 100, i32* null seq_cst, align 8
+  ret void
+}
+; CHECK-LABEL: define void @reduce_alignment_assumptions
+; CHECK-NEXT: load i32* null, align 1
+; CHECK-NEXT: load float* null, align 1
+; CHECK-NEXT: load float* null, align 4
+; CHECK-NEXT: load float* null, align 4
+; CHECK-NEXT: load double* null, align 1
+; CHECK-NEXT: load double* null, align 8
+; CHECK-NEXT: load double* null, align 8
+; CHECK-NEXT: load atomic i32* null seq_cst, align 4
+; CHECK-NEXT: load atomic i32* null seq_cst, align 4
+; CHECK-NEXT: store atomic i32 100, i32* null seq_cst, align 4
+; CHECK-NEXT: store atomic i32 100, i32* null seq_cst, align 4
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+; CHECK-NOT: declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
+; CHECK: declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+; CHECK-NOT: declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
+; CHECK: declare void @llvm.memmove.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)
+; CHECK-NOT: declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
+; CHECK: declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i32, i1)
+
+define void @reduce_memcpy_alignment_assumptions(i8* %ptr) {
+  call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %ptr,
+                                       i32 20, i32 4, i1 false)
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %ptr, i8* %ptr,
+                                        i32 20, i32 4, i1 false)
+  call void @llvm.memset.p0i8.i32(i8* %ptr, i8 99,
+                                  i32 20, i32 4, i1 false)
+  ret void
+}
+; CHECK-LABEL: define void @reduce_memcpy_alignment_assumptions
+; CHECK-NEXT: call void @llvm.memcpy.{{.*}}  i32 20, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memmove.{{.*}} i32 20, i32 1, i1 false)
+; CHECK-NEXT: call void @llvm.memset.{{.*}}  i32 20, i32 1, i1 false)
diff --git a/test/Transforms/NaCl/strip-branchweight-metadata.ll b/test/Transforms/NaCl/strip-branchweight-metadata.ll
new file mode 100644
index 000000000000..61d3a6d5af47
--- /dev/null
+++ b/test/Transforms/NaCl/strip-branchweight-metadata.ll
@@ -0,0 +1,29 @@
+; RUN: opt -S -strip-metadata %s | FileCheck %s
+
+; Test that !prof metadata is removed from branches
+; CHECK: @foo
+; CHECK-NOT: !prof
+define i32 @foo(i32 %c) {
+  switch i32 %c, label %3 [
+    i32 5, label %4
+    i32 0, label %1
+    i32 4, label %2
+  ], !prof !0
+
+; <label>:1                                       ; preds = %0
+  br label %4
+
+; <label>:2                                       ; preds = %0
+  br label %4
+
+; <label>:3                                       ; preds = %0
+  br label %4
+
+; <label>:4                                       ; preds = %0, %3, %2, %1
+  %.0 = phi i32 [ -1, %1 ], [ 99, %2 ], [ 1, %3 ], [ 0, %0 ]
+  ret i32 %.0
+}
+
+; CHECK: ret i32 %.0
+; CHECK-NOT: !0 =
+!0 = metadata !{metadata !"branch_weights", i32 4, i32 256, i32 8, i32 4}
diff --git a/test/Transforms/NaCl/strip-meta-leaves-debug.ll b/test/Transforms/NaCl/strip-meta-leaves-debug.ll
new file mode 100644
index 000000000000..11e386392549
--- /dev/null
+++ b/test/Transforms/NaCl/strip-meta-leaves-debug.ll
@@ -0,0 +1,47 @@
+; RUN: opt -S -strip-metadata %s | FileCheck %s
+; RUN: opt -S -strip-metadata -strip-debug %s | FileCheck %s --check-prefix=NODEBUG
+
+define i32 @foo(i32 %c) {
+; CHECK-LABEL: @foo
+; CHECK-NEXT: call void @llvm.dbg{{.*}}, !dbg
+; CHECK-NEXT: ret{{.*}}, !dbg
+; NODEBUG-LABEL: @foo
+; NODEBUG-NOT: !dbg
+  tail call void @llvm.dbg.value(metadata !{i32 %c}, i64 0, metadata !9), !dbg !10
+  ret i32 %c, !dbg !11
+}
+
+; CHECK: @llvm.dbg.value
+; NODEBUG: ret i32
+; NODEBUG-NOT: @llvm.dbg.value
+declare void @llvm.dbg.value(metadata, i64, metadata) #1
+
+; CHECK-NOT: MadeUpMetadata
+!MadeUpMetadata = !{}
+
+; CHECK: !llvm.dbg.cu
+!llvm.dbg.cu = !{!0}
+
+; CHECK-NOT: llvm.module.flags
+!llvm.module.flags = !{ !12, !13 }
+
+; CHECK: !0 =
+!0 = metadata !{i32 786449, i32 0, i32 12, metadata !"test.c", metadata !"/tmp", metadata !"clang version 3.3 (trunk 176732) (llvm/trunk 176733)", i1 true, i1 true, metadata !"", i32 0, metadata !1, metadata !1, metadata !2, metadata !1, metadata !""} ; [ DW_TAG_compile_unit ] [/tmp/test.c] [DW_LANG_C99]
+!1 = metadata !{i32 0}
+!2 = metadata !{metadata !3}
+!3 = metadata !{i32 786478, i32 0, metadata !4, metadata !"foo", metadata !"foo", metadata !"", metadata !4, i32 1, metadata !5, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, i32 (i32)* @foo, null, null, metadata !8, i32 1} ; [ DW_TAG_subprogram ] [line 1] [def] [foo]
+!4 = metadata !{i32 786473, metadata !"test.c", metadata !"/tmp", null} ; [ DW_TAG_file_type ]
+!5 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !6, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!6 = metadata !{metadata !7, metadata !7}
+!7 = metadata !{i32 786468, null, metadata !"int", null, i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
+!8 = metadata !{metadata !9}
+!9 = metadata !{i32 786689, metadata !3, metadata !"c", metadata !4, i32 16777217, metadata !7, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [c] [line 1]
+!10 = metadata !{i32 1, i32 0, metadata !3, null}
+; CHECK: !11 =
+!11 = metadata !{i32 2, i32 0, metadata !3, null}
+; CHECK-NOT: !12 =
+!12 = metadata !{ i32 6, metadata !"Linker Options",
+     metadata !{
+        metadata !{ metadata !"-lz" },
+        metadata !{ metadata !"-framework", metadata !"Cocoa" } } }
+!13 = metadata !{i32 1, metadata !"Debug Info Version", i32 1}
\ No newline at end of file
diff --git a/test/Transforms/NaCl/strip-tbaa-metadata.ll b/test/Transforms/NaCl/strip-tbaa-metadata.ll
new file mode 100644
index 000000000000..c555af67122b
--- /dev/null
+++ b/test/Transforms/NaCl/strip-tbaa-metadata.ll
@@ -0,0 +1,36 @@
+; RUN: opt -S -strip-metadata %s | FileCheck %s
+
+; Test that !tbaa is removed from loads/stores.
+; CHECK: @foo
+; CHECK-NOT: !tbaa
+define double @foo(i32* nocapture %ptr1, double* nocapture %ptr2) nounwind readonly {
+  store i32 99999, i32* %ptr1, align 1, !tbaa !0
+  %1 = load double* %ptr2, align 8, !tbaa !3
+  ret double %1
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+; Test that !tbaa is removed from calls.
+; CHECK: @bar
+; CHECK-NOT: !tbaa
+define void @bar(i8* nocapture %p, i8* nocapture %q,
+       i8* nocapture %s) nounwind {
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %p, i8* %q,
+                                            i64 16, i32 1, i1 false), !tbaa !4
+  store i8 2, i8* %s, align 1, !tbaa !5
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %q, i8* %p,
+                                            i64 16, i32 1, i1 false), !tbaa !4
+; CHECK ret void
+  ret void
+}
+
+; Test that the metadata nodes aren't left over.
+; CHECK-NOT: !0 =
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
+!3 = metadata !{metadata !"double", metadata !1}
+!4 = metadata !{metadata !"A", metadata !1}
+!5 = metadata !{metadata !"B", metadata !1}
diff --git a/test/lit.cfg b/test/lit.cfg
index 962728d0cb4c..0f8ef4a70d5b 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -241,6 +241,11 @@ for pattern in [r"\bbugpoint\b(?!-)",
                 r"\bllvm-size\b",
                 r"\bllvm-tblgen\b",
                 r"\bllvm-c-test\b",
+                r"\bpnacl-freeze\b",
+                r"\bpnacl-thaw\b",
+                r"\bpnacl-bcanalyzer\b",
+                r"\bpnacl-bccompress\b",
+                r"\bpnacl-abicheck\b",
                 # Match llvmc but not -llvmc
                 NOHYPHEN + r"\bllvmc\b",
                 # Match lto but not -lto
diff --git a/tools/CMakeLists.txt b/tools/CMakeLists.txt
index 12e10fd0bbd1..36a1ba0d9a6c 100644
--- a/tools/CMakeLists.txt
+++ b/tools/CMakeLists.txt
@@ -10,6 +10,7 @@ add_llvm_tool_subdirectory(llvm-dis)
 add_llvm_tool_subdirectory(llvm-mc)
 
 add_llvm_tool_subdirectory(llc)
+add_llvm_tool_subdirectory(pnacl-benchmark)
 add_llvm_tool_subdirectory(llvm-ar)
 add_llvm_tool_subdirectory(llvm-nm)
 add_llvm_tool_subdirectory(llvm-size)
@@ -38,6 +39,11 @@ add_llvm_tool_subdirectory(llvm-stress)
 add_llvm_tool_subdirectory(llvm-mcmarkup)
 
 add_llvm_tool_subdirectory(llvm-symbolizer)
+add_llvm_tool_subdirectory(pnacl-abicheck)
+add_llvm_tool_subdirectory(pnacl-bcanalyzer)
+add_llvm_tool_subdirectory(pnacl-freeze)
+add_llvm_tool_subdirectory(pnacl-bccompress)
+add_llvm_tool_subdirectory(pnacl-thaw)
 
 add_llvm_tool_subdirectory(llvm-c-test)
 
diff --git a/tools/LLVMBuild.txt b/tools/LLVMBuild.txt
index 93b8d98dcba4..9dec47a6ecb0 100644
--- a/tools/LLVMBuild.txt
+++ b/tools/LLVMBuild.txt
@@ -16,7 +16,7 @@
 ;===------------------------------------------------------------------------===;
 
 [common]
-subdirectories = bugpoint llc lli llvm-ar llvm-as llvm-bcanalyzer llvm-cov llvm-diff llvm-dis llvm-dwarfdump llvm-extract llvm-jitlistener llvm-link llvm-lto llvm-mc llvm-nm llvm-objdump llvm-rtdyld llvm-size macho-dump opt llvm-mcmarkup
+subdirectories = bugpoint llc lli llvm-ar llvm-as llvm-bcanalyzer llvm-cov llvm-diff llvm-dis llvm-dwarfdump llvm-extract llvm-jitlistener llvm-link llvm-lto llvm-mc llvm-nm llvm-objdump llvm-rtdyld llvm-size macho-dump opt llvm-mcmarkup pnacl-abicheck pnacl-bcanalyzer pnacl-freeze pnacl-thaw pnacl-benchmark pnacl-bccompress
 
 [component_0]
 type = Group
diff --git a/tools/Makefile b/tools/Makefile
index be872548e313..96f4150a6ca5 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -31,7 +31,9 @@ PARALLEL_DIRS := opt llvm-as llvm-dis llc llvm-ar llvm-nm llvm-link \
                  lli llvm-extract llvm-mc bugpoint llvm-bcanalyzer llvm-diff \
                  macho-dump llvm-objdump llvm-readobj llvm-rtdyld \
                  llvm-dwarfdump llvm-cov llvm-size llvm-stress llvm-mcmarkup \
-                 llvm-symbolizer obj2yaml yaml2obj llvm-c-test
+                 llvm-symbolizer obj2yaml yaml2obj llvm-c-test pnacl-abicheck \
+                 pnacl-freeze pnacl-thaw pnacl-bcanalyzer pnacl-benchmark \
+                 pnacl-bccompress
 
 # If Intel JIT Events support is configured, build an extra tool to test it.
 ifeq ($(USE_INTEL_JITEVENTS), 1)
diff --git a/tools/llc/LLVMBuild.txt b/tools/llc/LLVMBuild.txt
index 45cdc6498f86..e8ba87ef04da 100644
--- a/tools/llc/LLVMBuild.txt
+++ b/tools/llc/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = llc
 parent = Tools
-required_libraries = AsmParser BitReader IRReader all-targets
+required_libraries = AsmParser BitReader IRReader all-targets NaClAnalysis NaClBitReader
diff --git a/tools/llc/Makefile b/tools/llc/Makefile
index 71bce4dc1adf..11354b495570 100644
--- a/tools/llc/Makefile
+++ b/tools/llc/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL := ../..
 TOOLNAME := llc
-LINK_COMPONENTS := all-targets bitreader asmparser irreader
+LINK_COMPONENTS := all-targets bitreader asmparser irreader naclanalysis naclbitreader nacltransforms
 
 # Support plugins.
 NO_DEAD_STRIP := 1
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index a3bd72347d60..0ba200eb4cb0 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -15,6 +15,8 @@
 
 
 #include "llvm/ADT/Triple.h"
+#include "llvm/Analysis/NaCl.h" // @LOCALMOD
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"  // @LOCALMOD
 #include "llvm/CodeGen/CommandFlags.h"
 #include "llvm/CodeGen/LinkAllAsmWriterComponents.h"
 #include "llvm/CodeGen/LinkAllCodegenComponents.h"
@@ -31,6 +33,7 @@
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/Host.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Transforms/NaCl.h" // @LOCALMOD
 #include "llvm/Support/PluginLoader.h"
 #include "llvm/Support/PrettyStackTrace.h"
 #include "llvm/Support/Signals.h"
@@ -43,6 +46,16 @@
 #include <memory>
 using namespace llvm;
 
+static cl::opt<FileFormat>
+InputFileFormat(
+    "bitcode-format",
+    cl::desc("Define format of input file:"),
+    cl::values(
+        clEnumValN(LLVMFormat, "llvm", "LLVM file (default)"),
+        clEnumValN(PNaClFormat, "pnacl", "PNaCl bitcode file"),
+        clEnumValEnd),
+    cl::init(LLVMFormat));
+
 // General options for llc.  Other pass-specific options are specified
 // within the corresponding llc passes, and target-specific options
 // and back-end code generation options are specified with the target machine.
@@ -211,7 +224,7 @@ static int compileModule(char **argv, LLVMContext &Context) {
 
   // If user just wants to list available options, skip module loading
   if (!SkipModule) {
-    M.reset(ParseIRFile(InputFilename, Err, Context));
+    M.reset(ParseIRFile(InputFilename, Err, Context, InputFileFormat));
     mod = M.get();
     if (mod == 0) {
       Err.print(argv[0], errs());
diff --git a/tools/llvm-dis/CMakeLists.txt b/tools/llvm-dis/CMakeLists.txt
index 06ac051602ea..effc3453ba08 100644
--- a/tools/llvm-dis/CMakeLists.txt
+++ b/tools/llvm-dis/CMakeLists.txt
@@ -1,5 +1,6 @@
 set(LLVM_LINK_COMPONENTS
   BitReader
+  NaClBitReader
   Core
   Support
   )
diff --git a/tools/llvm-dis/LLVMBuild.txt b/tools/llvm-dis/LLVMBuild.txt
index 4525010c1fc9..cf1cbf7a40ea 100644
--- a/tools/llvm-dis/LLVMBuild.txt
+++ b/tools/llvm-dis/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = llvm-dis
 parent = Tools
-required_libraries = Analysis BitReader
+required_libraries = Analysis BitReader NaClBitReader
diff --git a/tools/llvm-dis/Makefile b/tools/llvm-dis/Makefile
index aeeeed0d68c9..0719006a15a7 100644
--- a/tools/llvm-dis/Makefile
+++ b/tools/llvm-dis/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL := ../..
 TOOLNAME := llvm-dis
-LINK_COMPONENTS := bitreader analysis
+LINK_COMPONENTS := bitreader naclbitreader analysis
 
 # This tool has no plugins, optimize startup time.
 TOOL_NO_EXPORTS := 1
diff --git a/tools/llvm-dis/llvm-dis.cpp b/tools/llvm-dis/llvm-dis.cpp
index 9fb056510c00..00c39894c916 100644
--- a/tools/llvm-dis/llvm-dis.cpp
+++ b/tools/llvm-dis/llvm-dis.cpp
@@ -18,11 +18,13 @@
 
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"  // @LOCALMOD
 #include "llvm/DebugInfo.h"
 #include "llvm/IR/AssemblyAnnotationWriter.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
+#include "llvm/IRReader/IRReader.h"  // @LOCALMOD
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/DataStream.h"
 #include "llvm/Support/FormattedStream.h"
@@ -51,6 +53,16 @@ static cl::opt<bool>
 ShowAnnotations("show-annotations",
                 cl::desc("Add informational comments to the .ll file"));
 
+
+static cl::opt<FileFormat>
+InputFileFormat(
+    "bitcode-format",
+    cl::desc("Define format of input bitcode file:"),
+    cl::values(
+        clEnumValN(LLVMFormat, "llvm", "LLVM bitcode file (default)"),
+        clEnumValN(PNaClFormat, "pnacl", "PNaCl bitcode file"),
+        clEnumValEnd),
+    cl::init(LLVMFormat));
 namespace {
 
 static void printDebugLoc(const DebugLoc &DL, formatted_raw_ostream &OS) {
@@ -133,8 +145,21 @@ int main(int argc, char **argv) {
       DisplayFilename = "<stdin>";
     else
       DisplayFilename = InputFilename;
-    M.reset(getStreamedBitcodeModule(DisplayFilename, streamer, Context,
-                                     &ErrorMessage));
+
+    switch (InputFileFormat) {
+      case LLVMFormat:
+        M.reset(getStreamedBitcodeModule(DisplayFilename, streamer, Context,
+                                         &ErrorMessage));
+        break;
+      case PNaClFormat:
+        M.reset(getNaClStreamedBitcodeModule(DisplayFilename, streamer, Context,
+                                             &ErrorMessage));
+        break;
+      default:
+        ErrorMessage = "Don't understand specified bitcode format";
+        break;
+    }
+
     if(M.get() != 0) {
       if (error_code EC = M->materializeAllPermanently()) {
         ErrorMessage = EC.message();
diff --git a/tools/opt/LLVMBuild.txt b/tools/opt/LLVMBuild.txt
index 77b94469edd5..a947a4eb25ee 100644
--- a/tools/opt/LLVMBuild.txt
+++ b/tools/opt/LLVMBuild.txt
@@ -19,4 +19,4 @@
 type = Tool
 name = opt
 parent = Tools
-required_libraries = AsmParser BitReader BitWriter IRReader IPO Instrumentation Scalar ObjCARC all-targets
+required_libraries = AsmParser BitReader BitWriter IRReader IPO Instrumentation Scalar ObjCARC NaClTransforms NaClAnalysis all-targets
diff --git a/tools/opt/Makefile b/tools/opt/Makefile
index fa2d01455319..a07efbbbb876 100644
--- a/tools/opt/Makefile
+++ b/tools/opt/Makefile
@@ -9,7 +9,7 @@
 
 LEVEL := ../..
 TOOLNAME := opt
-LINK_COMPONENTS := bitreader bitwriter asmparser irreader instrumentation scalaropts objcarcopts ipo vectorize all-targets
+LINK_COMPONENTS := bitreader bitwriter asmparser irreader instrumentation scalaropts objcarcopts ipo vectorize nacltransforms naclanalysis naclbitwriter naclbitreader all-targets
 
 # Support plugins.
 NO_DEAD_STRIP := 1
diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp
index 49bb3c3b19fc..a631017f6436 100644
--- a/tools/opt/opt.cpp
+++ b/tools/opt/opt.cpp
@@ -21,6 +21,8 @@
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/RegionPass.h"
 #include "llvm/Bitcode/BitcodeWriterPass.h"
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/Bitcode/NaCl/NaClBitcodeWriterPass.h"
 #include "llvm/CodeGen/CommandFlags.h"
 #include "llvm/DebugInfo.h"
 #include "llvm/IR/DataLayout.h"
@@ -47,6 +49,7 @@
 #include "llvm/Target/TargetLibraryInfo.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
+#include "llvm/Transforms/NaCl.h"  // @LOCALMOD
 #include <algorithm>
 #include <memory>
 using namespace llvm;
@@ -139,6 +142,18 @@ static cl::opt<bool>
 OptLevelO3("O3",
            cl::desc("Optimization level 3. Similar to clang -O3"));
 
+// @LOCALMOD-BEGIN
+static cl::opt<bool>
+PNaClABISimplifyPreOpt(
+    "pnacl-abi-simplify-preopt",
+    cl::desc("PNaCl ABI simplifications for before optimizations"));
+
+static cl::opt<bool>
+PNaClABISimplifyPostOpt(
+    "pnacl-abi-simplify-postopt",
+    cl::desc("PNaCl ABI simplifications for after optimizations"));
+// @LOCALMOD-END
+
 static cl::opt<std::string>
 TargetTriple("mtriple", cl::desc("Override target triple for module"));
 
@@ -184,6 +199,16 @@ DefaultDataLayout("default-data-layout",
           cl::desc("data layout string to use if not specified by module"),
           cl::value_desc("layout-string"), cl::init(""));
 
+static cl::opt<FileFormat>
+OutputFileFormat(
+    "bitcode-format",
+    cl::desc("Define format of generated bitcode file:"),
+    cl::values(
+        clEnumValN(LLVMFormat, "llvm", "LLVM bitcode file (default)"),
+        clEnumValN(PNaClFormat, "pnacl", "PNaCl bitcode file"),
+        clEnumValEnd),
+    cl::init(LLVMFormat));
+
 namespace {
 
 struct BreakpointPrinter : public ModulePass {
@@ -421,6 +446,41 @@ int main(int argc, char **argv) {
   initializeInstCombine(Registry);
   initializeInstrumentation(Registry);
   initializeTarget(Registry);
+  // @LOCALMOD-BEGIN
+  initializeAddPNaClExternalDeclsPass(Registry);
+  initializeCanonicalizeMemIntrinsicsPass(Registry);
+  initializeExpandArithWithOverflowPass(Registry);
+  initializeExpandByValPass(Registry);
+  initializeExpandConstantExprPass(Registry);
+  initializeExpandCtorsPass(Registry);
+  initializeExpandGetElementPtrPass(Registry);
+  initializeExpandSmallArgumentsPass(Registry);
+  initializeExpandStructRegsPass(Registry);
+  initializeExpandTlsConstantExprPass(Registry);
+  initializeExpandTlsPass(Registry);
+  initializeExpandVarArgsPass(Registry);
+  initializeFlattenGlobalsPass(Registry);
+  initializeGlobalCleanupPass(Registry);
+  initializeInsertDivideCheckPass(Registry);
+  initializePNaClABIVerifyFunctionsPass(Registry);
+  initializePNaClABIVerifyModulePass(Registry);
+  initializePNaClSjLjEHPass(Registry);
+  initializePromoteI1OpsPass(Registry);
+  initializePromoteIntegersPass(Registry);
+  initializePromoteSimpleStructsPass(Registry);
+  initializePromoteReturnedStructsPass(Registry);
+  initializePromoteStructureArgsPass(Registry);
+  initializeReplaceAggregatesWithIntsPass(Registry);
+  initializeRemoveAsmMemoryPass(Registry);
+  initializeReplacePtrsWithIntsPass(Registry);
+  initializeResolveAliasesPass(Registry);
+  initializeResolvePNaClIntrinsicsPass(Registry);
+  initializeRewriteAtomicsPass(Registry);
+  initializeRewriteLLVMIntrinsicsPass(Registry);
+  initializeRewritePNaClLibraryCallsPass(Registry);
+  initializeStripAttributesPass(Registry);
+  initializeStripMetadataPass(Registry);
+  // @LOCALMOD-END
 
   cl::ParseCommandLineOptions(argc, argv,
     "llvm .bc -> .bc modular optimizer and analysis printer\n");
@@ -600,6 +660,20 @@ int main(int argc, char **argv) {
       OptLevelO3 = false;
     }
 
+    // @LOCALMOD-BEGIN
+    if (PNaClABISimplifyPreOpt &&
+        PNaClABISimplifyPreOpt.getPosition() < PassList.getPosition(i)) {
+      PNaClABISimplifyAddPreOptPasses(Passes);
+      PNaClABISimplifyPreOpt = false;
+    }
+
+    if (PNaClABISimplifyPostOpt &&
+        PNaClABISimplifyPostOpt.getPosition() < PassList.getPosition(i)) {
+      PNaClABISimplifyAddPostOptPasses(Passes);
+      PNaClABISimplifyPostOpt = false;
+    }
+    // @LOCALMOD-END
+
     const PassInfo *PassInf = PassList[i];
     Pass *P = 0;
     if (PassInf->getTargetMachineCtor())
@@ -674,6 +748,14 @@ int main(int argc, char **argv) {
     FPasses->doFinalization();
   }
 
+  // @LOCALMOD-BEGIN
+  if (PNaClABISimplifyPreOpt)
+    PNaClABISimplifyAddPreOptPasses(Passes);
+
+  if (PNaClABISimplifyPostOpt)
+    PNaClABISimplifyAddPostOptPasses(Passes);
+  // @LOCALMOD-END
+
   // Check that the module is well formed on completion of optimization
   if (!NoVerify && !VerifyEach)
     Passes.add(createVerifierPass());
@@ -682,8 +764,13 @@ int main(int argc, char **argv) {
   if (!NoOutput && !AnalyzeOnly) {
     if (OutputAssembly)
       Passes.add(createPrintModulePass(Out->os()));
-    else
+    else if(OutputFileFormat == LLVMFormat)
       Passes.add(createBitcodeWriterPass(Out->os()));
+    else if(OutputFileFormat == PNaClFormat)
+      Passes.add(createNaClBitcodeWriterPass(Out->os()));
+    else {
+      llvm_unreachable("unknown bitcode format");
+    }
   }
 
   // Before executing passes, print the final values of the LLVM options.
diff --git a/tools/pnacl-abicheck/CMakeLists.txt b/tools/pnacl-abicheck/CMakeLists.txt
new file mode 100644
index 000000000000..fda6d26ac8f8
--- /dev/null
+++ b/tools/pnacl-abicheck/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS bitreader naclbitreader irreader asmparser naclanalysis)
+
+add_llvm_tool(pnacl-abicheck
+  pnacl-abicheck.cpp
+  )
diff --git a/tools/pnacl-abicheck/LLVMBuild.txt b/tools/pnacl-abicheck/LLVMBuild.txt
new file mode 100644
index 000000000000..9e45f87f0ad9
--- /dev/null
+++ b/tools/pnacl-abicheck/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/pnacl-abicheck/LLVMBuild.txt ---------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = pnacl-abicheck
+parent = Tools
+required_libraries = AsmParser BitReader NaClBitReader IRReader NaClAnalysis
diff --git a/tools/pnacl-abicheck/Makefile b/tools/pnacl-abicheck/Makefile
new file mode 100644
index 000000000000..97e2d223990a
--- /dev/null
+++ b/tools/pnacl-abicheck/Makefile
@@ -0,0 +1,16 @@
+#===- tools/pnacl-abicheck/Makefile ------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+TOOLNAME := pnacl-abicheck
+LINK_COMPONENTS := bitreader asmparser naclbitreader irreader naclanalysis
+
+include $(LEVEL)/Makefile.common
+
+
diff --git a/tools/pnacl-abicheck/pnacl-abicheck.cpp b/tools/pnacl-abicheck/pnacl-abicheck.cpp
new file mode 100644
index 000000000000..fc5463523583
--- /dev/null
+++ b/tools/pnacl-abicheck/pnacl-abicheck.cpp
@@ -0,0 +1,87 @@
+//===-- pnacl-abicheck.cpp - Check PNaCl bitcode ABI ----------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tool checks files for compliance with the PNaCl bitcode ABI
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/Analysis/NaCl.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/SourceMgr.h"
+#include <string>
+
+using namespace llvm;
+
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
+
+static cl::opt<bool>
+Quiet("q", cl::desc("Do not print error messages"));
+
+static cl::opt<FileFormat>
+InputFileFormat(
+    "bitcode-format",
+    cl::desc("Define format of input file:"),
+    cl::values(
+        clEnumValN(LLVMFormat, "llvm", "LLVM file (default)"),
+        clEnumValN(PNaClFormat, "pnacl", "PNaCl bitcode file"),
+        clEnumValEnd),
+    cl::init(LLVMFormat));
+
+// Print any errors collected by the error reporter. Return true if
+// there were any.
+static bool CheckABIVerifyErrors(PNaClABIErrorReporter &Reporter,
+                                 const Twine &Name) {
+  bool HasErrors = Reporter.getErrorCount() > 0;
+  if (HasErrors) {
+    if (!Quiet) {
+      outs() << "ERROR: " << Name << " is not valid PNaCl bitcode:\n";
+      Reporter.printErrors(outs());
+    }
+  }
+  Reporter.reset();
+  return HasErrors;
+}
+
+int main(int argc, char **argv) {
+  LLVMContext &Context = getGlobalContext();
+  SMDiagnostic Err;
+  cl::ParseCommandLineOptions(argc, argv, "PNaCl Bitcode ABI checker\n");
+
+  OwningPtr<Module> Mod(
+      ParseIRFile(InputFilename, Err, Context, InputFileFormat));
+  if (Mod.get() == 0) {
+    Err.print(argv[0], errs());
+    return 1;
+  }
+  PNaClABIErrorReporter ABIErrorReporter;
+  ABIErrorReporter.setNonFatal();
+  bool ErrorsFound = false;
+  // Manually run the passes so we can tell the user which function had the
+  // error. No need for a pass manager since it's just one pass.
+  OwningPtr<ModulePass> ModuleChecker(
+      createPNaClABIVerifyModulePass(&ABIErrorReporter));
+  ModuleChecker->runOnModule(*Mod);
+  ErrorsFound |= CheckABIVerifyErrors(ABIErrorReporter, "Module");
+  OwningPtr<FunctionPass> FunctionChecker(
+      createPNaClABIVerifyFunctionsPass(&ABIErrorReporter));
+  for (Module::iterator MI = Mod->begin(), ME = Mod->end(); MI != ME; ++MI) {
+    FunctionChecker->runOnFunction(*MI);
+    ErrorsFound |= CheckABIVerifyErrors(ABIErrorReporter,
+                                        "Function " + MI->getName());
+  }
+
+  return ErrorsFound ? 1 : 0;
+}
diff --git a/tools/pnacl-bcanalyzer/CMakeLists.txt b/tools/pnacl-bcanalyzer/CMakeLists.txt
new file mode 100644
index 000000000000..0cf17b888654
--- /dev/null
+++ b/tools/pnacl-bcanalyzer/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS naclbitreader)
+
+add_llvm_tool(pnacl-bcanalyzer
+  pnacl-bcanalyzer.cpp
+  )
diff --git a/tools/pnacl-bcanalyzer/LLVMBuild.txt b/tools/pnacl-bcanalyzer/LLVMBuild.txt
new file mode 100644
index 000000000000..2944fca4b054
--- /dev/null
+++ b/tools/pnacl-bcanalyzer/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/pnacl-bcanalyzer/LLVMBuild.txt -------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = pnacl-bcanalyzer
+parent = Tools
+required_libraries = NaClBitReader
diff --git a/tools/pnacl-bcanalyzer/Makefile b/tools/pnacl-bcanalyzer/Makefile
new file mode 100644
index 000000000000..d3ec1a81a957
--- /dev/null
+++ b/tools/pnacl-bcanalyzer/Makefile
@@ -0,0 +1,17 @@
+##===- tools/pnacl-bcanalyzer/Makefile ---------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+TOOLNAME := pnacl-bcanalyzer
+LINK_COMPONENTS := naclbitreader
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/tools/pnacl-bcanalyzer/pnacl-bcanalyzer.cpp b/tools/pnacl-bcanalyzer/pnacl-bcanalyzer.cpp
new file mode 100644
index 000000000000..7497152f5e50
--- /dev/null
+++ b/tools/pnacl-bcanalyzer/pnacl-bcanalyzer.cpp
@@ -0,0 +1,74 @@
+//===-- pnacl-bcanalyzer.cpp - Bitcode Analyzer -------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tool is a thin wrapper over NaClBitcodeAnalyzer; see
+// NaClBitcodeAnalyzer.h for more details.
+// 
+// Invoke in the following manner:
+//
+//  pnacl-bcanalyzer [options]      - Read frozen PNaCl bitcode from stdin
+//  pnacl-bcanalyzer [options] x.bc - Read frozen PNaCl bitcode from the x.bc
+//                                    file
+// Run with -help to see supported options.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "pnacl-bcanalyzer"
+
+#include "llvm/Bitcode/NaCl/NaClBitcodeAnalyzer.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+
+static cl::opt<std::string>
+  InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
+
+static cl::opt<bool> OptDump("dump", cl::desc("Dump low level bitcode trace"));
+
+static cl::opt<bool>
+ OptDumpRecords("dump-records",
+         cl::desc("Dump contents of records in bitcode, leaving out"
+                  " all bitstreaming information (including abbreviations)"),
+         cl::init(false));
+
+static cl::opt<unsigned> OpsPerLine(
+    "operands-per-line",
+    cl::desc("Number of operands to print per dump line. 0 implies "
+             "all operands will be printed on the same line (default)"),
+    cl::init(0));
+
+static cl::opt<bool> NoHistogram("disable-histogram",
+                                 cl::desc("Do not print per-code histogram"));
+
+static cl::opt<bool>
+NonSymbolic("non-symbolic",
+            cl::desc("Emit numeric info in dump even if"
+                     " symbolic info is available"));
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+  cl::ParseCommandLineOptions(argc, argv, "pnacl-bcanalyzer file analyzer\n");
+
+  AnalysisDumpOptions DumpOptions;
+  DumpOptions.DoDump = OptDumpRecords || OptDump;
+  DumpOptions.DumpOnlyRecords = OptDumpRecords;
+  DumpOptions.OpsPerLine = OpsPerLine;
+  DumpOptions.NoHistogram = NoHistogram;
+  DumpOptions.NonSymbolic = NonSymbolic;
+
+  return AnalyzeBitcodeInFile(InputFilename, outs(), DumpOptions);
+}
diff --git a/tools/pnacl-bccompress/CMakeLists.txt b/tools/pnacl-bccompress/CMakeLists.txt
new file mode 100644
index 000000000000..c513afea9b42
--- /dev/null
+++ b/tools/pnacl-bccompress/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS naclbitreader naclbitwriter)
+
+add_llvm_tool(pnacl-bccompress
+  pnacl-bccompress.cpp
+  )
diff --git a/tools/pnacl-bccompress/LLVMBuild.txt b/tools/pnacl-bccompress/LLVMBuild.txt
new file mode 100644
index 000000000000..1f842f5d03ee
--- /dev/null
+++ b/tools/pnacl-bccompress/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/pnacl-bccompress/LLVMBuild.txt -------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = pnacl-bccompress
+parent = Tools
+required_libraries = NaClBitReader NaClBitWriter
diff --git a/tools/pnacl-bccompress/Makefile b/tools/pnacl-bccompress/Makefile
new file mode 100644
index 000000000000..9c3a5f2a1cfb
--- /dev/null
+++ b/tools/pnacl-bccompress/Makefile
@@ -0,0 +1,17 @@
+##===- tools/pnacl-bccompress/Makefile ---------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+TOOLNAME := pnacl-bccompress
+LINK_COMPONENTS := naclbitreader naclbitwriter
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/tools/pnacl-bccompress/pnacl-bccompress.cpp b/tools/pnacl-bccompress/pnacl-bccompress.cpp
new file mode 100644
index 000000000000..3303e803fa53
--- /dev/null
+++ b/tools/pnacl-bccompress/pnacl-bccompress.cpp
@@ -0,0 +1,827 @@
+//===-- pnacl-bccompress.cpp - Bitcode (abbrev) compression ---------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This tool may be invoked in the following manner:
+//  pnacl-bccompress [options] bcin.pexe -o bcout.pexe
+//      - Read frozen PNaCl bitcode from the bcin.pexe and introduce
+//        abbreviations to compress it into bcout.pexe.
+//
+//  Options:
+//      --help      - Output information about command line switches
+//
+// This tool analyzes the data in bcin.pexe, and determines what
+// abbreviations can be added to compress the bitcode file. The result
+// is written to bcout.pexe.
+//
+// A bitcode file has two types of abbreviations. The first are Global
+// abbreviations that apply to all instances of a particular type of
+// block.  These abbreviations appear in the BlockInfo block of the
+// bitcode file.
+//
+// The second type of abbreviations are local to a particular instance
+// of a block. They are defined by abbreviations processed by the
+// ProcessRecordAbbrev method of class NaClBitcodeParser.
+//
+// In pnacl-bccompress, for simplicity, we will only add global
+// abbreviations. Local abbreviations are converted to corresponding
+// global abbreviations, so that they can be added as global
+// abbreviations.
+//
+// The process of compressing is done by reading the input file
+// twice. In the first round, the records are read and analyzed,
+// generating a set of (global) abbreviations to use to generate the
+// compressed output file. Then, the input file is read again, and for
+// each record, the best fitting global abbreviation is found (or it
+// figures out that leaving the record unabbreviated is best) and
+// writes the record out accordingly.
+//
+// TODO(kschimpf): The current implementation does a trivial solution
+// for the first round.  It just converts all abbreviations (local and
+// global) into global abbreviations.  Future refinements of this file
+// will generate better (and more intelligent) global abbreviations,
+// based on the records found on the first read of the bitcode file.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h"
+#include "llvm/Bitcode/NaCl/NaClBitcodeParser.h"
+#include "llvm/Bitcode/NaCl/NaClBitstreamReader.h"
+#include "llvm/Bitcode/NaCl/NaClBitstreamWriter.h"
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include <set>
+#include <map>
+
+namespace {
+
+using namespace llvm;
+
+static cl::opt<std::string>
+  InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
+
+
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("Specify output filename"),
+               cl::value_desc("filename"), cl::init("-"));
+
+/// Error - All bitcode analysis errors go through this function,
+/// making this a good place to breakpoint if debugging.
+static bool Error(const std::string &Err) {
+  errs() << Err << "\n";
+  return true;
+}
+
+// For debugging. Prints out the abbreviation in readable form to outs().
+static void PrintAbbrev(const NaClBitCodeAbbrev *Abbrev) {
+  outs() << "Abbrev [";
+  // ContinuationCount>0 implies that the current operand is a
+  // continuation of previous operand(s).
+  unsigned ContinuationCount = 0;
+  for (unsigned i = 0; i < Abbrev->getNumOperandInfos(); ++i) {
+    if (i > 0 && ContinuationCount == 0) outs() << ", ";
+    outs() << "(" << i << ") ";
+    const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(i);
+    if (Op.isLiteral()) {
+      outs() << Op.getLiteralValue();
+    } else if (Op.isEncoding()) {
+      switch (Op.getEncoding()) {
+      case NaClBitCodeAbbrevOp::Fixed:
+        outs() << "Fixed(" << Op.getEncodingData() << ")";
+        break;
+      case NaClBitCodeAbbrevOp::VBR:
+        outs() << "VBR(" << Op.getEncodingData() << ")";
+        break;
+      case NaClBitCodeAbbrevOp::Array:
+        outs() << "Array:";
+        ++ContinuationCount;
+        continue;
+      case NaClBitCodeAbbrevOp::Char6:
+        outs() << "Char6";
+        break;
+      case NaClBitCodeAbbrevOp::Blob:
+        outs() << "Blob";
+        break;
+      default:
+        outs() << "??";
+        break;
+      }
+    } else {
+      outs() << "??";
+    }
+    if (ContinuationCount) --ContinuationCount;
+  }
+  outs() << "]\n";
+}
+
+// Reads the input file into the given buffer.
+static bool ReadAndBuffer(OwningPtr<MemoryBuffer> &MemBuf) {
+  if (error_code ec =
+      MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), MemBuf)) {
+    return Error("Error reading '" + InputFilename + "': " + ec.message());
+  }
+
+  if (MemBuf->getBufferSize() % 4 != 0)
+    return Error("Bitcode stream should be a multiple of 4 bytes in length");
+  return false;
+}
+
+/// Defines the list of abbreviations associated with a block.
+class BlockAbbrevs {
+public:
+  // Vector to hold the (ordered) list of abbreviations.
+  typedef SmallVector<NaClBitCodeAbbrev*, 32> AbbrevVector;
+
+  BlockAbbrevs(unsigned BlockID)
+      : BlockID(BlockID) {}
+
+  ~BlockAbbrevs() {
+    for (AbbrevVector::const_iterator
+             Iter = Abbrevs.begin(), IterEnd = Abbrevs.end();
+         Iter != IterEnd; ++Iter) {
+      (*Iter)->dropRef();
+    }
+  }
+
+  // Constant used to denote that a given abbreviation is not in the
+  // set of abbreviations defined by the block.
+  static const int NO_SUCH_ABBREVIATION = -1;
+
+  // Returns the index to the corresponding abbreviation, if it
+  // exists.  Otherwise return NO_SUCH_ABBREVIATION;
+  int FindAbbreviation(const NaClBitCodeAbbrev *Abbrev) const {
+    for (unsigned i = 0; i < Abbrevs.size(); ++i) {
+      if (*Abbrevs[i] == *Abbrev) return i;
+    }
+    return NO_SUCH_ABBREVIATION;
+  }
+
+  /// Adds the given abbreviation to the set of global abbreviations
+  /// defined for the block. Guarantees that duplicate abbreviations
+  /// are not added to the block. Note: Code takes ownership of
+  /// the given abbreviation.
+  void AddAbbreviation(NaClBitCodeAbbrev *Abbrev) {
+    int Index = FindAbbreviation(Abbrev);
+    if (Index != NO_SUCH_ABBREVIATION) {
+      // Already defined, don't install.
+      Abbrev->dropRef();
+      return;
+    }
+
+    // New abbreviation. Add.
+    Abbrevs.push_back(Abbrev);
+  }
+
+  /// The block ID associated with the block.
+  unsigned GetBlockID() const {
+    return BlockID;
+  }
+
+  // Returns the number of abbreviations associated with the block.
+  unsigned GetNumberAbbreviations() const {
+    return Abbrevs.size();
+  }
+
+  /// Returns the abbreviation associated with the given abbreviation
+  /// index.
+  NaClBitCodeAbbrev *GetIndexedAbbrev(unsigned index) {
+    if (index >= Abbrevs.size()) return 0;
+    return Abbrevs[index];
+  }
+
+  /// Converts to corresponding bitstream abbreviation index.
+  static unsigned ConvertToBitstreamAbbrevIndex(unsigned Index) {
+    /// Note: the abbreviation indices are ordered using the
+    /// position in AbbrevVector Abbrevs, which is dynamically created.
+    /// Hence, we convert it by moving the index past the manditory
+    // abbreviation indices defined by the bitstream reader/writer.
+    return Index + naclbitc::FIRST_APPLICATION_ABBREV;
+  }
+
+  /// Converts bitstream abbreviation index back to corresponding
+  /// block abbreviation index.
+  static unsigned ConvertToAbbrevIndex(unsigned Index) {
+    return Index - naclbitc::FIRST_APPLICATION_ABBREV;
+  }
+
+private:
+  // The block ID for which abbreviations are being associated.
+  unsigned BlockID;
+  // The list of abbreviations defined for the block.
+  AbbrevVector Abbrevs;
+};
+
+/// Defines a map from block ID's to the corresponding abbreviation
+/// map to use.
+typedef DenseMap<unsigned, BlockAbbrevs*> BlockAbbrevsMapType;
+
+/// Parses the bitcode file, analyzes it, and generates the
+/// corresponding lists of global abbreviations to use in the
+/// generated (compressed) bitcode file.
+class NaClAnalyzeParser : public NaClBitcodeParser {
+  NaClAnalyzeParser(const NaClAnalyzeParser&)
+      LLVM_DELETED_FUNCTION;
+  void operator=(const NaClAnalyzeParser&)
+      LLVM_DELETED_FUNCTION;
+
+public:
+  // Creates the analysis parser, which will fill the given
+  // BlockAbbrevsMap with appropriate abbreviations, after
+  // analyzing the bitcode file defined by Cursor.
+  NaClAnalyzeParser(NaClBitstreamCursor &Cursor,
+                               BlockAbbrevsMapType &BlockAbbrevsMap)
+      : NaClBitcodeParser(Cursor),
+        BlockAbbrevsMap(BlockAbbrevsMap)
+  {}
+
+  virtual ~NaClAnalyzeParser() {}
+
+  virtual bool Error(const std::string &Message) {
+    // Use local error routine so that all errors are treated uniformly.
+    return ::Error(Message);
+  }
+
+  virtual bool ParseBlock(unsigned BlockID);
+
+  // Mapping from block ID's to the corresponding list of abbreviations
+  // associated with that block.
+  BlockAbbrevsMapType &BlockAbbrevsMap;
+};
+
+class NaClBlockAnalyzeParser : public NaClBitcodeParser {
+  NaClBlockAnalyzeParser(const NaClBlockAnalyzeParser&)
+      LLVM_DELETED_FUNCTION;
+  void operator=(NaClBlockAnalyzeParser&)
+      LLVM_DELETED_FUNCTION;
+
+public:
+  /// Top-level constructor to build the top-level block with the
+  /// given BlockID, and collect data (for compression) in that block.
+  NaClBlockAnalyzeParser(unsigned BlockID,
+                         NaClAnalyzeParser *Context)
+      : NaClBitcodeParser(BlockID, Context), Context(Context)
+  {}
+
+  virtual ~NaClBlockAnalyzeParser() {}
+
+protected:
+  /// Nested constructor to parse a block within a block.  Creates a
+  /// block parser to parse a block with the given BlockID, and
+  /// collect data (for compression) in that block.
+  NaClBlockAnalyzeParser(unsigned BlockID,
+                         NaClBlockAnalyzeParser *EnclosingParser)
+      : NaClBitcodeParser(BlockID, EnclosingParser),
+        Context(EnclosingParser->Context)
+  {}
+
+public:
+  virtual bool Error(const std::string &Message) {
+    // Use local error routine so that all errors are treated uniformly.
+    return ::Error(Message);
+  }
+
+  virtual bool ParseBlock(unsigned BlockID) {
+    NaClBlockAnalyzeParser Parser(BlockID, this);
+    return Parser.ParseThisBlock();
+  }
+
+  virtual void EnterBlock(unsigned NumWords) {
+    // Make sure that we have a block abbreviations record defined for
+    // the corresponding block ID.
+    unsigned BlockID = GetBlockID();
+    BlockAbbrevs *Abbrevs = Context->BlockAbbrevsMap[BlockID];
+    if (Abbrevs == 0) {
+      Abbrevs = new BlockAbbrevs(BlockID);
+      Context->BlockAbbrevsMap[BlockID] = Abbrevs;
+    }
+  }
+
+  virtual void ProcessRecordAbbrev() {
+    // Convert the local abbreviation to a corresponding global
+    // abbreviation.
+
+    // TODO(kschimpf): Replace this with appropriate code once we are
+    // building our own abbreviations.
+
+    AddAbbreviation(GetBlockID(), CopyAbbreviation(
+        Record.GetCursor().GetNewestAbbrev()));
+  }
+
+  virtual void ExitBlockInfo() {
+    // Now extract out global abbreviations and put into corresponding
+    // block abbreviations map, so that they will be used when the
+    // bitcode is compressed.
+
+    // TODO(kschimpf): Replace with appropriate code once we are
+    // building our own abbreviations.
+
+    NaClBitstreamReader &Reader = Record.GetReader();
+    SmallVector<unsigned, 12> BlockIDs;
+    Reader.GetBlockInfoBlockIDs(BlockIDs);
+    for (SmallVectorImpl<unsigned>::const_iterator
+             IDIter = BlockIDs.begin(), IDIterEnd = BlockIDs.end();
+         IDIter != IDIterEnd; ++IDIter) {
+      unsigned BlockID = *IDIter;
+      if (const NaClBitstreamReader::BlockInfo *Info =
+          Reader.getBlockInfo(BlockID)) {
+        for (std::vector<NaClBitCodeAbbrev*>::const_iterator
+                 AbbrevIter = Info->Abbrevs.begin(),
+                 AbbrevIterEnd = Info->Abbrevs.end();
+             AbbrevIter != AbbrevIterEnd;
+             ++AbbrevIter) {
+          AddAbbreviation(BlockID, CopyAbbreviation(*AbbrevIter));
+        }
+      }
+    }
+  }
+
+protected:
+  // The context parser, defining locals to parsing blocks.
+  NaClAnalyzeParser *Context;
+
+  // Creates a copy of the given abbreviation.
+  NaClBitCodeAbbrev *CopyAbbreviation(const NaClBitCodeAbbrev *Abbrev) const {
+    NaClBitCodeAbbrev *Copy = new NaClBitCodeAbbrev();
+    for (unsigned I=0, IEnd = Abbrev->getNumOperandInfos();
+         I != IEnd; ++I) {
+      Copy->Add(NaClBitCodeAbbrevOp(Abbrev->getOperandInfo(I)));
+    }
+    return Copy;
+  }
+
+  // Adds the abbreviation to the list of abbreviations for the given
+  // block.
+  void AddAbbreviation(unsigned BlockID, NaClBitCodeAbbrev *Abbrev) {
+    // Get block abbreviations.
+    BlockAbbrevs* Abbrevs = Context->BlockAbbrevsMap[BlockID];
+    if (Abbrevs == 0) {
+      Abbrevs = new BlockAbbrevs(BlockID);
+      Context->BlockAbbrevsMap[BlockID] = Abbrevs;
+    }
+    // Read abbreviation and add as a global abbreviation.
+    Abbrevs->AddAbbreviation(Abbrev);
+  }
+
+};
+
+bool NaClAnalyzeParser::ParseBlock(unsigned BlockID) {
+  NaClBlockAnalyzeParser Parser(BlockID, this);
+  return Parser.ParseThisBlock();
+}
+
+// Read in bitcode, analyze data, and figure out set of abbreviations
+// to use, from memory buffer MemBuf containing the input bitcode file.
+static bool AnalyzeBitcode(OwningPtr<MemoryBuffer> &MemBuf,
+                           BlockAbbrevsMapType &BlockAbbrevsMap) {
+  // TODO(kschimpf): The current code only extracts abbreviations
+  // defined in the bitcode file. This code needs to be updated to
+  // collect data distributions and figure out better (global)
+  // abbreviations to use.
+
+  const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();
+  const unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize();
+
+  // First read header and verify it is good.
+  NaClBitcodeHeader Header;
+  if (Header.Read(BufPtr, EndBufPtr) || !Header.IsSupported())
+    return Error("Invalid PNaCl bitcode header");
+
+  // Create a bitstream reader to read the bitcode file.
+  NaClBitstreamReader StreamFile(BufPtr, EndBufPtr);
+  NaClBitstreamCursor Stream(StreamFile);
+
+  // Parse the the bitcode file.
+  NaClAnalyzeParser Parser(Stream, BlockAbbrevsMap);
+  while (!Stream.AtEndOfStream()) {
+    if (Parser.Parse()) return true;
+  }
+
+  return false;
+}
+
+/// Parses the input bitcode file and generates the corresponding
+/// compressed bitcode file, by replacing abbreviations in the input
+/// file with the corresponding abbreviations defined in
+/// BlockAbbrevsMap.
+class NaClBitcodeCopyParser : public NaClBitcodeParser {
+public:
+  // Top-level constructor to build the appropriate block parser
+  // using the given BlockAbbrevsMap to define abbreviations.
+  NaClBitcodeCopyParser(NaClBitstreamCursor &Cursor,
+                        BlockAbbrevsMapType &BlockAbbrevsMap,
+                        NaClBitstreamWriter &Writer)
+      : NaClBitcodeParser(Cursor),
+        BlockAbbrevsMap(BlockAbbrevsMap),
+        Writer(Writer),
+        FoundFirstBlockInfo(false)
+  {}
+
+  virtual ~NaClBitcodeCopyParser() {}
+
+  virtual bool Error(const std::string &Message) {
+    // Use local error routine so that all errors are treated uniformly.
+    return ::Error(Message);
+  }
+
+  virtual bool ParseBlock(unsigned BlockID);
+
+  // The abbreviations to use for the copied bitcode.
+  BlockAbbrevsMapType &BlockAbbrevsMap;
+
+  // The bitstream to copy the compressed bitcode into.
+  NaClBitstreamWriter &Writer;
+
+  // True if we have already found the first block info block.
+  // Used to make sure we don't use abbreviations until we
+  // have put them into the bitcode file.
+  bool FoundFirstBlockInfo;
+};
+
+class NaClBlockCopyParser : public NaClBitcodeParser {
+public:
+  // Top-level constructor to build the appropriate block parser.
+  NaClBlockCopyParser(unsigned BlockID,
+                      NaClBitcodeCopyParser *Context)
+      : NaClBitcodeParser(BlockID, Context),
+        Context(Context),
+        BlockAbbreviations(0)
+  {}
+
+  virtual ~NaClBlockCopyParser() {}
+
+protected:
+  // The context defining state associated with the block parser.
+  NaClBitcodeCopyParser *Context;
+
+  // Masks out the top-32 bits of a uint64_t value.
+  static const uint64_t Mask32 = 0xFFFFFFFF00000000;
+
+  // Defines the number of bits used to print VBR array field values.
+  static const unsigned DefaultVBRBits = 6;
+
+  // The block abbreviations defined for this block (initialized by
+  // EnterBlock).
+  BlockAbbrevs *BlockAbbreviations;
+
+  /// Constructor to parse nested blocks.  Creates a block parser to
+  /// parse in a block with the given BlockID, and write the block
+  /// back out using the abbreviations in BlockAbbrevsMap.
+  NaClBlockCopyParser(unsigned BlockID,
+                      NaClBlockCopyParser *EnclosingParser)
+      : NaClBitcodeParser(BlockID, EnclosingParser),
+        Context(EnclosingParser->Context)
+  {}
+
+  virtual bool Error(const std::string &Message) {
+    // Use local error routine so that all errors are treated uniformly.
+    return ::Error(Message);
+  }
+
+  virtual bool ParseBlock(unsigned BlockID) {
+    NaClBlockCopyParser Parser(BlockID, this);
+    return Parser.ParseThisBlock();
+  }
+
+  virtual void EnterBlock(unsigned NumWords) {
+    unsigned BlockID = GetBlockID();
+    BlockAbbreviations = Context->BlockAbbrevsMap[BlockID];
+
+    // Enter the subblock.
+    NaClBitcodeSelectorAbbrev Selector(
+        BlockAbbrevs::ConvertToBitstreamAbbrevIndex(
+            BlockAbbreviations->GetNumberAbbreviations()-1));
+    Context->Writer.EnterSubblock(BlockID, Selector);
+  }
+
+  virtual void ExitBlock() {
+    Context->Writer.ExitBlock();
+  }
+
+  virtual void ExitBlockInfo() {
+    assert(!Context->FoundFirstBlockInfo &&
+           "Input bitcode has more that one BlockInfoBlock");
+    Context->FoundFirstBlockInfo = true;
+
+    // Generate global abbreviations within a blockinfo block.
+    Context->Writer.EnterBlockInfoBlock();
+    for (BlockAbbrevsMapType::const_iterator
+             Iter = Context->BlockAbbrevsMap.begin(),
+             IterEnd = Context->BlockAbbrevsMap.end();
+         Iter != IterEnd; ++Iter) {
+      unsigned BlockID = Iter->first;
+      BlockAbbrevs *Abbrevs = Iter->second;
+      if (Abbrevs == 0) continue;
+      for (unsigned i = 0; i < Abbrevs->GetNumberAbbreviations(); ++i) {
+        NaClBitCodeAbbrev *Abbrev = Abbrevs->GetIndexedAbbrev(i);
+        Context->Writer.EmitBlockInfoAbbrev(BlockID, Abbrev);
+      }
+    }
+    Context->Writer.ExitBlock();
+  }
+
+  virtual void ProcessRecord() {
+    // Find best fitting abbreviation to use, and print out the record
+    // using that abbreviations.
+    unsigned AbbrevIndex = GetRecordAbbrevIndex();
+
+    NaClBitcodeRecord::RecordVector &Values = Record.GetValues();
+    if (AbbrevIndex == naclbitc::UNABBREV_RECORD) {
+      Context->Writer.EmitRecord(Record.GetCode(), Values, 0);
+    } else {
+      Context->Writer.EmitRecord(Record.GetCode(), Values, AbbrevIndex);
+    }
+  }
+
+  /// Returns the abbreviation (index) to use for the corresponding
+  /// read record.
+  unsigned GetRecordAbbrevIndex() {
+
+    // Note: We can't use abbreviations till they have been inserted
+    // into the bitcode file. So give up if the record appears before
+    // where they are inserted (which is where the first BlockInfo
+    // block appears in the input bitcode file).
+    if (!Context->FoundFirstBlockInfo)
+      return naclbitc::UNABBREV_RECORD;
+
+    BlockAbbrevs *Abbrevs = BlockAbbreviations;
+    unsigned NumCandidates = Abbrevs->GetNumberAbbreviations();
+    unsigned BestIndex = 0; // Ignored unless found candidate.
+    unsigned BestScore = 0; // Number of bits associated with BestIndex.
+    bool FoundCandidate = false;
+    for (unsigned Index = 0; Index < NumCandidates; ++Index) {
+      uint64_t NumBits = 0;
+      if (CanUseAbbreviation(Abbrevs->GetIndexedAbbrev(Index), NumBits)) {
+        if (!FoundCandidate || NumBits < BestScore) {
+          // Use this as candidate.
+          BestIndex = Index;
+          BestScore = NumBits;
+          FoundCandidate = true;
+        }
+      }
+    }
+    if (FoundCandidate && BestScore <= UnabbreviatedSize()) {
+      return BlockAbbrevs::ConvertToBitstreamAbbrevIndex(BestIndex);
+    }
+    else {
+      return naclbitc::UNABBREV_RECORD;
+    }
+  }
+
+  // Computes the number of bits that will be generated by the
+  // corresponding read record, if no abbreviation is used.
+  uint64_t UnabbreviatedSize() {
+    uint64_t NumBits = MatchVBRBits(Record.GetCode(), DefaultVBRBits);
+    const NaClBitcodeRecord::RecordVector &Values = Record.GetValues();
+    size_t NumValues = Values.size();
+    NumBits += MatchVBRBits(NumValues, DefaultVBRBits);
+    for (size_t Index = 0; Index < NumValues; ++Index) {
+      NumBits += MatchVBRBits(Values[Index], DefaultVBRBits);
+    }
+    return NumBits;
+  }
+
+  /// Simple container class to convert the values of the
+  /// corresponding read record to the form expected by
+  /// abbreviations. That is, the record code is prefixed
+  /// to the set of values in the record.
+  struct AbbrevValues {
+  public:
+    AbbrevValues(const NaClBitcodeRecord &Record)
+        : Code(Record.GetCode()), Values(Record.GetValues()) {}
+
+    size_t size() const {
+      return Values.size()+1;
+    }
+
+    uint64_t operator[](size_t index) const {
+      return index == 0 ? Code : Values[index-1];
+    }
+
+  private:
+    uint64_t Code;
+    const NaClBitcodeRecord::RecordVector &Values;
+  };
+
+  // Returns true if the given abbreviation can be used to represent the
+  // record. Sets NumBits to the number of bits the abbreviation will
+  // generate. Note: Value of NumBits is undefined if this function
+  // return false.
+  bool CanUseAbbreviation(NaClBitCodeAbbrev *Abbrev, uint64_t &NumBits) {
+    NumBits = 0;
+    unsigned OpIndex = 0;
+    unsigned OpIndexEnd = Abbrev->getNumOperandInfos();
+    AbbrevValues Values(Record);
+    size_t ValueIndex = 0;
+    size_t ValueIndexEnd = Values.size();
+    while (ValueIndex < ValueIndexEnd && OpIndex < OpIndexEnd) {
+      const NaClBitCodeAbbrevOp &Op = Abbrev->getOperandInfo(OpIndex);
+      if (Op.isLiteral()) {
+        if (CanUseSimpleAbbrevOp(Op, Values[ValueIndex], NumBits)) {
+          ++ValueIndex;
+          ++OpIndex;
+          continue;
+        } else {
+          return false;
+        }
+      }
+      switch (Op.getEncoding()) {
+      case NaClBitCodeAbbrevOp::Array: {
+        assert(OpIndex+2 == OpIndexEnd);
+        const NaClBitCodeAbbrevOp &ElmtOp =
+            Abbrev->getOperandInfo(OpIndex+1);
+
+        // Add size of array.
+        NumBits += MatchVBRBits(Values.size()-ValueIndex, DefaultVBRBits);
+
+        // Add size of each field.
+        for (; ValueIndex != ValueIndexEnd; ++ValueIndex) {
+          uint64_t FieldBits=0;
+          if (!CanUseSimpleAbbrevOp(ElmtOp, Values[ValueIndex], FieldBits)) {
+            return false;
+          }
+          NumBits += FieldBits;
+        }
+        return true;
+      }
+      case NaClBitCodeAbbrevOp::Blob:
+        assert(OpIndex+1 == OpIndexEnd);
+        // Add size of blob.
+        NumBits += MatchVBRBits(Values.size()-ValueIndex, DefaultVBRBits);
+
+        // We don't know how many bits are needed to word align, so we
+        // will assume 32. This makes blob more expensive than array
+        // unless there is a lot of elements that can modeled using
+        // fewer bits.
+        NumBits += 32;
+
+        // Add size of each byte in blob.
+        for (; ValueIndex != ValueIndexEnd; ++ValueIndex) {
+          if (Values[ValueIndex] >= 256) {
+            return false;
+          }
+          NumBits += 8;
+        }
+        return true;
+      default: {
+        if (CanUseSimpleAbbrevOp(Op, Values[ValueIndex], NumBits)) {
+          ++ValueIndex;
+          ++OpIndex;
+          break;
+        }
+        return false;
+      }
+      }
+    }
+
+    return ValueIndex == ValueIndexEnd && OpIndex == OpIndexEnd;
+  }
+
+  // Returns true if the given abbreviation Op defines a single value,
+  // and can be applied to the given Val. Adds the number of bits the
+  // abbreviation Op will generate to NumBits if Op applies.
+  static bool CanUseSimpleAbbrevOp(const NaClBitCodeAbbrevOp &Op,
+                                   uint64_t Val,
+                                   uint64_t &NumBits) {
+    if (Op.isLiteral())
+      return Val == Op.getLiteralValue();
+
+    switch (Op.getEncoding()) {
+    case NaClBitCodeAbbrevOp::Array:
+    case NaClBitCodeAbbrevOp::Blob:
+      return false;
+    case NaClBitCodeAbbrevOp::Fixed: {
+      uint64_t Width = Op.getEncodingData();
+      if (!MatchFixedBits(Val, Width))
+        return false;
+      NumBits += Width;
+      return true;
+    }
+    case NaClBitCodeAbbrevOp::VBR:
+      if (unsigned Width = MatchVBRBits(Val, Op.getEncodingData())) {
+        NumBits += Width;
+        return true;
+      } else {
+        return false;
+      }
+    case NaClBitCodeAbbrevOp::Char6:
+      if (!NaClBitCodeAbbrevOp::isChar6(Val)) return false;
+      NumBits += 6;
+      return true;
+    default:
+      assert(0 && "Bad abbreviation operator");
+      return false;
+    }
+  }
+
+  // Returns true if the given Val can be represented by abbreviation
+  // operand Fixed(Width).
+  static bool MatchFixedBits(uint64_t Val, unsigned Width) {
+    // Note: The reader only allows up to 32 bits for fixed values.
+    if (Val & Mask32) return false;
+    if (Val & ~(~0U >> (32-Width))) return false;
+    return true;
+  }
+
+  // Returns the number of bits needed to represent Val by abbreviation
+  // operand VBR(Width). Note: Returns 0 if Val can't be represented
+  // by VBR(Width).
+  static unsigned MatchVBRBits(uint64_t Val, unsigned Width) {
+    if (Width == 0) return 0;
+    unsigned NumBits = 0;
+    while (1) {
+      // values emitted Width-1 bits at a time (plus a continue bit).
+      NumBits += Width;
+      if ((Val & (1U << (Width-1))) == 0)
+        return NumBits;
+      Val >>= Width-1;
+    }
+  }
+};
+
+bool NaClBitcodeCopyParser::ParseBlock(unsigned BlockID) {
+  NaClBlockCopyParser Parser(BlockID, this);
+  return Parser.ParseThisBlock();
+}
+
+// Read in bitcode, and write it back out using the abbreviations in
+// BlockAbbrevsMap, from memory buffer MemBuf containing the input
+// bitcode file.
+static bool CopyBitcode(OwningPtr<MemoryBuffer> &MemBuf,
+                        BlockAbbrevsMapType &BlockAbbrevsMap) {
+
+  const unsigned char *BufPtr = (const unsigned char *)MemBuf->getBufferStart();
+  const unsigned char *EndBufPtr = BufPtr+MemBuf->getBufferSize();
+
+  // Read header. No verification is needed since AnalyzeBitcode has
+  // already checked it.
+  NaClBitcodeHeader Header;
+  if (Header.Read(BufPtr, EndBufPtr))
+    return Error("Invalid PNaCl bitcode header");
+
+  // Create the bitcode reader.
+  NaClBitstreamReader StreamFile(BufPtr, EndBufPtr);
+  NaClBitstreamCursor Stream(StreamFile);
+
+  // Create the bitcode writer.
+  SmallVector<char, 0> OutputBuffer;
+  OutputBuffer.reserve(256*1024);
+  NaClBitstreamWriter StreamWriter(OutputBuffer);
+
+  // Emit the file header.
+  NaClWriteHeader(Header, StreamWriter);
+
+  // Set up the parser.
+  NaClBitcodeCopyParser Parser(Stream, BlockAbbrevsMap, StreamWriter);
+
+  // Parse the bitcode and copy.
+  while (!Stream.AtEndOfStream()) {
+    if (Parser.Parse()) return true;
+  }
+
+  // Write out the copied results.
+  std::string ErrorInfo;
+  OwningPtr<tool_output_file> OutFile(
+      new tool_output_file(OutputFilename.c_str(), ErrorInfo,
+                           sys::fs::F_Binary));
+  if (!ErrorInfo.empty())
+    return Error(ErrorInfo);
+
+  // Write the generated bitstream to "Out".
+  OutFile->os().write((char*)&OutputBuffer.front(),
+                      OutputBuffer.size());
+  OutFile->keep();
+
+  return false;
+}
+
+}  // namespace
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+  cl::ParseCommandLineOptions(argc, argv, "pnacl-bccompress file analyzer\n");
+
+  OwningPtr<MemoryBuffer> MemBuf;
+  if (ReadAndBuffer(MemBuf)) return 1;
+  BlockAbbrevsMapType BlockAbbrevsMap;
+  if (AnalyzeBitcode(MemBuf, BlockAbbrevsMap)) return 1;
+  if (CopyBitcode(MemBuf, BlockAbbrevsMap)) return 1;
+  return 0;
+}
diff --git a/tools/pnacl-benchmark/CMakeLists.txt b/tools/pnacl-benchmark/CMakeLists.txt
new file mode 100644
index 000000000000..f1522860707e
--- /dev/null
+++ b/tools/pnacl-benchmark/CMakeLists.txt
@@ -0,0 +1,6 @@
+set(LLVM_LINK_COMPONENTS ${LLVM_TARGETS_TO_BUILD} bitreader naclbitreader
+    irreader asmparser naclanalysis nacltransforms)
+
+add_llvm_tool(pnacl-benchmark
+  pnacl-benchmark.cpp
+  )
diff --git a/tools/pnacl-benchmark/LLVMBuild.txt b/tools/pnacl-benchmark/LLVMBuild.txt
new file mode 100644
index 000000000000..c00c7a15f8ab
--- /dev/null
+++ b/tools/pnacl-benchmark/LLVMBuild.txt
@@ -0,0 +1,22 @@
+;===- ./tools/pnacl-benchmark/LLVMBuild.txt --------------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = pnacl-benchmark
+parent = Tools
+required_libraries = AsmParser BitReader NaClBitReader IRReader all-targets NaClAnalysis NaClTransforms
diff --git a/tools/pnacl-benchmark/Makefile b/tools/pnacl-benchmark/Makefile
new file mode 100644
index 000000000000..da3deb45e6a5
--- /dev/null
+++ b/tools/pnacl-benchmark/Makefile
@@ -0,0 +1,16 @@
+#===- tools/pnacl-benchmark/Makefile -----------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+TOOLNAME := pnacl-benchmark
+LINK_COMPONENTS := all-targets bitreader naclbitreader irreader \
+                   asmparser naclanalysis nacltransforms
+
+include $(LEVEL)/Makefile.common
+
diff --git a/tools/pnacl-benchmark/pnacl-benchmark.cpp b/tools/pnacl-benchmark/pnacl-benchmark.cpp
new file mode 100644
index 000000000000..15be79cf5568
--- /dev/null
+++ b/tools/pnacl-benchmark/pnacl-benchmark.cpp
@@ -0,0 +1,230 @@
+//===-- pnacl-benchmark.cpp -----------------------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// pnacl-benchmark: various benchmarking tools for the PNaCl LLVM toolchain.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Bitcode/NaCl/NaClBitcodeAnalyzer.h"
+#include "llvm/Bitcode/NaCl/NaClBitcodeHeader.h"
+#include "llvm/Bitcode/NaCl/NaClBitcodeParser.h"
+#include "llvm/Bitcode/NaCl/NaClBitstreamReader.h"
+#include "llvm/Bitcode/NaCl/NaClLLVMBitCodes.h"
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/system_error.h"
+#include "llvm/Support/ToolOutputFile.h"
+#include "llvm/Support/Timer.h"
+#include <memory>
+#include <vector>
+
+using namespace llvm;
+
+
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<input bitcode>"), cl::init("-"));
+
+static cl::opt<unsigned>
+NumRuns("num-runs", cl::desc("Number of runs"), cl::init(1));
+
+/// Used in a lexical block to measure and report the block's execution time.
+///
+/// \param N block name
+/// \param InputSize optional size of input operated upon. If given, the
+///                  throughput will be reported as well in MB/sec.
+class TimingOperationBlock {
+public:
+  TimingOperationBlock(StringRef N, size_t InputSize=0)
+    : InputSize(InputSize) {
+    outs() << "Timing: " << N << "... ";
+    TStart = TimeRecord::getCurrentTime(true);
+  }
+
+  ~TimingOperationBlock() {
+    TimeRecord TEnd = TimeRecord::getCurrentTime(false);
+    double elapsed = TEnd.getWallTime() - TStart.getWallTime();
+    outs() << format("%.3lf", elapsed) << " sec";
+
+    if (InputSize != 0) {
+      double MBPerSec = (InputSize / elapsed) / 1000000.0;
+      outs() << format(" [%.3lf MB/sec]", MBPerSec);
+    }
+    outs() << "\n";
+  }
+private:
+  TimeRecord TStart;
+  size_t InputSize;
+};
+
+/// Simple parsing of bitcode with some basic bookkeeping that simulates doing
+/// "something" with it.
+class DummyBitcodeParser : public NaClBitcodeParser {
+public:
+  DummyBitcodeParser(NaClBitstreamCursor &Cursor)
+    : NaClBitcodeParser(Cursor) {
+    resetCounters();
+  }
+
+  DummyBitcodeParser(unsigned BlockID, DummyBitcodeParser *EnclosingBlock)
+    : NaClBitcodeParser(BlockID, EnclosingBlock) {
+    resetCounters();
+  }
+
+  virtual bool ParseBlock(unsigned BlockID) {
+    DummyBitcodeParser Parser(BlockID, this);
+    return Parser.ParseThisBlock();
+  }
+
+  virtual void EnterBlock(unsigned NumberWords) {
+    NumBlocks++;
+    BlockNames.push_back("<unknown>");
+  }
+
+  virtual void ProcessRecord() {
+    NumRecords++;
+    RecordCodes.push_back(Record.GetCode());
+
+    const NaClBitcodeRecord::RecordVector &Values = Record.GetValues();
+    for (unsigned i = 0, e = Values.size(); i != e; ++i) {
+      RecordValues.push_back((int64_t) Values[i]);
+    }
+  }
+
+private:
+  void resetCounters() {
+    NumBlocks = NumRecords = 0;
+    BlockNames.clear();
+    RecordCodes.clear();
+    RecordValues.clear();
+  }
+
+  uint64_t NumBlocks, NumRecords;
+  std::vector<StringRef> BlockNames;
+  std::vector<unsigned> RecordCodes;
+  std::vector<StringRef> RecordCodeNames;
+  std::vector<int64_t> RecordValues;
+};
+
+void BenchmarkIRParsing() {
+  outs() << "Benchmarking IR parsing...\n";
+  OwningPtr<MemoryBuffer> FileBuf;
+  error_code ec = MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), FileBuf);
+  if (ec) {
+    report_fatal_error("Could not open input file: " + ec.message());
+  }
+
+  size_t BufSize = FileBuf->getBufferSize();
+  const uint8_t *BufPtr =
+    reinterpret_cast<const uint8_t*>(FileBuf->getBufferStart());
+  const uint8_t *EndBufPtr =
+    reinterpret_cast<const uint8_t*>(FileBuf->getBufferEnd());
+
+  // Since MemoryBuffer may use mmap, make sure to first touch all bytes in the
+  // input buffer to make sure it's actually in memory.
+  volatile uint8_t *Slot = new uint8_t;
+  for (const uint8_t *S = BufPtr; S != EndBufPtr; ++S) {
+    *Slot = *S;
+  }
+
+  delete Slot;
+  outs() << "Read bitcode into buffer. Size=" << BufSize << "\n";
+
+  // Trivial copy into a new buffer with a cascading XOR that simulates
+  // "touching" every byte in the buffer in a simple way.
+  {
+    TimingOperationBlock T("Simple XOR copy", BufSize);
+    volatile uint8_t *OutBuf = new uint8_t[BufSize];
+    OutBuf[0] = 1;
+    size_t N = 1;
+    // Run over the input buffer from start to end-1; run over the output buffer
+    // from 1 to end.
+    for (const uint8_t *S = BufPtr; S != EndBufPtr - 1; ++S, ++N) {
+      OutBuf[N] = OutBuf[N - 1] ^ *S;
+    }
+    delete[] OutBuf;
+  }
+
+  // Simulate simple bitcode parsing. See DummyBitcodeParser for more details.
+  {
+    TimingOperationBlock T("Bitcode block parsing", BufSize);
+    NaClBitcodeHeader Header;
+
+    if (Header.Read(BufPtr, EndBufPtr)) {
+      report_fatal_error("Invalid PNaCl bitcode header");
+    }
+
+    if (!Header.IsSupported()) {
+      errs() << "Warning: " << Header.Unsupported() << "\n";
+    }
+
+    if (!Header.IsReadable()) {
+      report_fatal_error("Bitcode file is not readable");
+    }
+
+    NaClBitstreamReader StreamFile(BufPtr, EndBufPtr);
+    NaClBitstreamCursor Stream(StreamFile);
+    DummyBitcodeParser Parser(Stream);
+    while (!Stream.AtEndOfStream()) {
+      if (Parser.Parse()) {
+        report_fatal_error("Parsing failed");
+      }
+    }
+  }
+
+  // Running bitcode analysis (what bcanalyzer does).
+  // Note that quite a bit of time here is spent on emitting I/O into nulls().
+  {
+    TimingOperationBlock T("Running bitcode analysis", BufSize);
+
+    AnalysisDumpOptions DumpOptions;
+    DumpOptions.DoDump = true;
+    DumpOptions.DumpOnlyRecords = false;
+    AnalyzeBitcodeInBuffer(*FileBuf, nulls(), DumpOptions);
+  }
+
+  // Actual LLVM IR parsing and formation from the bitcode
+  {
+    TimingOperationBlock T("LLVM IR parsing", BufSize);
+    SMDiagnostic Err;
+    Module *M = ParseIRFile(InputFilename,
+                            Err,
+                            getGlobalContext(),
+                            PNaClFormat);
+
+    if (!M) {
+      report_fatal_error("Unable to NaClParseIRFile");
+    }
+  }
+}
+
+int main(int argc, char **argv) {
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+  cl::ParseCommandLineOptions(argc, argv, "pnacl-benchmark\n");
+
+  for (unsigned i = 0; i < NumRuns; i++) {
+    BenchmarkIRParsing();
+  }
+
+  return 0;
+}
diff --git a/tools/pnacl-freeze/CMakeLists.txt b/tools/pnacl-freeze/CMakeLists.txt
new file mode 100644
index 000000000000..fca58c7d5a3b
--- /dev/null
+++ b/tools/pnacl-freeze/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS bitreader naclbitwriter naclbitreader)
+
+add_llvm_tool(pnacl-freeze
+  pnacl-freeze.cpp
+  )
diff --git a/tools/pnacl-freeze/LLVMBuild.txt b/tools/pnacl-freeze/LLVMBuild.txt
new file mode 100644
index 000000000000..e17f5c0f878d
--- /dev/null
+++ b/tools/pnacl-freeze/LLVMBuild.txt
@@ -0,0 +1,16 @@
+;===- ./tools/pnacl-freeze/LLVMBuild.txt -----------------------*- Conf -*--===;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = pnacl-freeze
+parent = Tools
+required_libraries = NaClBitWriter NaClBitReader BitReader
diff --git a/tools/pnacl-freeze/Makefile b/tools/pnacl-freeze/Makefile
new file mode 100644
index 000000000000..ceeb409181a8
--- /dev/null
+++ b/tools/pnacl-freeze/Makefile
@@ -0,0 +1,17 @@
+##===- tools/pnacl-freeze/Makefile -------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+TOOLNAME := pnacl-freeze
+LINK_COMPONENTS := naclbitwriter naclbitreader bitreader
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/tools/pnacl-freeze/pnacl-freeze.cpp b/tools/pnacl-freeze/pnacl-freeze.cpp
new file mode 100644
index 000000000000..69343c30983c
--- /dev/null
+++ b/tools/pnacl-freeze/pnacl-freeze.cpp
@@ -0,0 +1,97 @@
+/* Copyright 2013 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+//===-- pnacl-freeze.cpp - The low-level NaCl bitcode freezer     --------===//
+//
+//===----------------------------------------------------------------------===//
+//
+// Generates NaCl pexe wire format.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/LLVMContext.h"
+// Note: We need the following to provide the API for calling the NaCl
+// Bitcode Writer to generate the frozen file.
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
+// Note: We need the following to provide the API for calling the (LLVM)
+// Bitcode Reader to read in the corresonding pexe file to freeze.
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DataStream.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/ToolOutputFile.h"
+
+using namespace llvm;
+
+
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("Specify output filename"),
+	       cl::value_desc("filename"), cl::init("-"));
+
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<pexe file>"), cl::init("-"));
+
+static void WriteOutputFile(const Module *M) {
+
+  std::string ErrorInfo;
+  OwningPtr<tool_output_file> Out
+    (new tool_output_file(OutputFilename.c_str(), ErrorInfo,
+			  sys::fs::F_Binary));
+  if (!ErrorInfo.empty()) {
+    errs() << ErrorInfo << '\n';
+    exit(1);
+  }
+
+  NaClWriteBitcodeToFile(M, Out->os(), /* AcceptSupportedOnly = */ false);
+
+  // Declare success.
+  Out->keep();
+}
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+
+  LLVMContext &Context = getGlobalContext();
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  cl::ParseCommandLineOptions(argc, argv, "Generates NaCl pexe wire format\n");
+
+  std::string ErrorMessage;
+  std::auto_ptr<Module> M;
+
+  // Use the bitcode streaming interface
+  DataStreamer *streamer = getDataFileStreamer(InputFilename, &ErrorMessage);
+  if (streamer) {
+    std::string DisplayFilename;
+    if (InputFilename == "-")
+      DisplayFilename = "<stdin>";
+    else
+      DisplayFilename = InputFilename;
+    M.reset(getStreamedBitcodeModule(DisplayFilename, streamer, Context,
+                                     &ErrorMessage));
+    error_code result = M->materializeAll();
+    if(M.get() != 0 && result)
+      M.reset();
+    if(result)
+      ErrorMessage = result.message();
+  }
+
+  if (M.get() == 0) {
+    errs() << argv[0] << ": ";
+    if (ErrorMessage.size())
+      errs() << ErrorMessage << "\n";
+    else
+      errs() << "bitcode didn't read correctly.\n";
+    return 1;
+  }
+
+  WriteOutputFile(M.get());
+  return 0;
+}
diff --git a/tools/pnacl-thaw/CMakeLists.txt b/tools/pnacl-thaw/CMakeLists.txt
new file mode 100644
index 000000000000..91b818efe642
--- /dev/null
+++ b/tools/pnacl-thaw/CMakeLists.txt
@@ -0,0 +1,5 @@
+set(LLVM_LINK_COMPONENTS bitwriter naclbitreader)
+
+add_llvm_tool(pnacl-thaw
+  pnacl-thaw.cpp
+  )
diff --git a/tools/pnacl-thaw/LLVMBuild.txt b/tools/pnacl-thaw/LLVMBuild.txt
new file mode 100644
index 000000000000..864da2cbd544
--- /dev/null
+++ b/tools/pnacl-thaw/LLVMBuild.txt
@@ -0,0 +1,16 @@
+;===- ./tools/pnacl-thaw/LLVMBuild.txt -----------------------*- Conf -*--===;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Tool
+name = pnacl-thaw
+parent = Tools
+required_libraries = BitWriter NaClBitReader
diff --git a/tools/pnacl-thaw/Makefile b/tools/pnacl-thaw/Makefile
new file mode 100644
index 000000000000..8e7699e185bf
--- /dev/null
+++ b/tools/pnacl-thaw/Makefile
@@ -0,0 +1,17 @@
+##===- tools/pnacl-thaw/Makefile ---------------------------*- Makefile -*-===##
+# 
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+# 
+##===----------------------------------------------------------------------===##
+
+LEVEL := ../..
+TOOLNAME := pnacl-thaw
+LINK_COMPONENTS := bitwriter naclbitreader
+
+# This tool has no plugins, optimize startup time.
+TOOL_NO_EXPORTS := 1
+
+include $(LEVEL)/Makefile.common
diff --git a/tools/pnacl-thaw/pnacl-thaw.cpp b/tools/pnacl-thaw/pnacl-thaw.cpp
new file mode 100644
index 000000000000..11a69ea72b17
--- /dev/null
+++ b/tools/pnacl-thaw/pnacl-thaw.cpp
@@ -0,0 +1,99 @@
+/* Copyright 2013 The Native Client Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can
+ * be found in the LICENSE file.
+ */
+
+//===-- pnacl-thaw.cpp - The low-level NaCl bitcode thawer ----------------===//
+//
+//===----------------------------------------------------------------------===//
+//
+// Converts NaCl wire format back to LLVM bitcode.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/IR/LLVMContext.h"
+// Note: We need the following to provide the API for calling the NaCl
+// Bitcode Reader to read the frozen file.
+#include "llvm/Bitcode/NaCl/NaClReaderWriter.h"
+// Note: We need the following to provide the API for calling the (LLVM)
+// Bitcode Writer to generate the corresponding LLVM bitcode file.
+#include "llvm/Bitcode/ReaderWriter.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DataStream.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/PrettyStackTrace.h"
+#include "llvm/Support/Signals.h"
+#include "llvm/Support/ToolOutputFile.h"
+
+using namespace llvm;
+
+static cl::opt<std::string>
+OutputFilename("o", cl::desc("Specify thawed pexe filename"),
+	       cl::value_desc("filename"), cl::init("-"));
+
+static cl::opt<std::string>
+InputFilename(cl::Positional, cl::desc("<frozen file>"), cl::init("-"));
+
+static void WriteOutputFile(const Module *M) {
+
+  std::string ErrorInfo;
+  OwningPtr<tool_output_file> Out
+    (new tool_output_file(OutputFilename.c_str(), ErrorInfo,
+			  sys::fs::F_Binary));
+  if (!ErrorInfo.empty()) {
+    errs() << ErrorInfo << '\n';
+    exit(1);
+  }
+
+  WriteBitcodeToFile(M, Out->os());
+
+  // Declare success.
+  Out->keep();
+}
+
+int main(int argc, char **argv) {
+  // Print a stack trace if we signal out.
+  sys::PrintStackTraceOnErrorSignal();
+  PrettyStackTraceProgram X(argc, argv);
+
+  LLVMContext &Context = getGlobalContext();
+  llvm_shutdown_obj Y;  // Call llvm_shutdown() on exit.
+
+  cl::ParseCommandLineOptions(
+      argc, argv, "Converts NaCl pexe wire format into LLVM bitcode format\n");
+
+  std::string ErrorMessage;
+  std::auto_ptr<Module> M;
+
+  // Use the bitcode streaming interface
+  DataStreamer *streamer = getDataFileStreamer(InputFilename, &ErrorMessage);
+  if (streamer) {
+    std::string DisplayFilename;
+    if (InputFilename == "-")
+      DisplayFilename = "<stdin>";
+    else
+      DisplayFilename = InputFilename;
+    M.reset(getNaClStreamedBitcodeModule(
+        DisplayFilename, streamer, Context,
+        &ErrorMessage, /*AcceptSupportedOnly=*/false));
+
+    error_code result = M->materializeAll();
+    if(M.get() != 0 && result)
+      M.reset();
+    if(result)
+      ErrorMessage = result.message();
+  }
+
+  if (M.get() == 0) {
+    errs() << argv[0] << ": ";
+    if (ErrorMessage.size())
+      errs() << ErrorMessage << "\n";
+    else
+      errs() << "bitcode didn't read correctly.\n";
+    return 1;
+  }
+
+  WriteOutputFile(M.get());
+  return 0;
+}