bytereader.h 14.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
// -*- mode: C++ -*-

// Copyright (c) 2010 Google Inc. All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#ifndef COMMON_DWARF_BYTEREADER_H__
#define COMMON_DWARF_BYTEREADER_H__

#include <stdint.h>

#include <string>

#include "common/dwarf/types.h"
#include "common/dwarf/dwarf2enums.h"

namespace dwarf2reader {

// We can't use the obvious name of LITTLE_ENDIAN and BIG_ENDIAN
// because it conflicts with a macro
enum Endianness {
  ENDIANNESS_BIG,
  ENDIANNESS_LITTLE
};

// A ByteReader knows how to read single- and multi-byte values of
// various endiannesses, sizes, and encodings, as used in DWARF
// debugging information and Linux C++ exception handling data.
class ByteReader {
 public:
  // Construct a ByteReader capable of reading one-, two-, four-, and
  // eight-byte values according to ENDIANNESS, absolute machine-sized
  // addresses, DWARF-style "initial length" values, signed and
  // unsigned LEB128 numbers, and Linux C++ exception handling data's
  // encoded pointers.
  explicit ByteReader(enum Endianness endianness);
  virtual ~ByteReader();

  // Read a single byte from BUFFER and return it as an unsigned 8 bit
  // number.
  uint8 ReadOneByte(const uint8_t *buffer) const;

  // Read two bytes from BUFFER and return them as an unsigned 16 bit
  // number, using this ByteReader's endianness.
  uint16 ReadTwoBytes(const uint8_t *buffer) const;

  // Read four bytes from BUFFER and return them as an unsigned 32 bit
  // number, using this ByteReader's endianness. This function returns
  // a uint64 so that it is compatible with ReadAddress and
  // ReadOffset. The number it returns will never be outside the range
  // of an unsigned 32 bit integer.
  uint64 ReadFourBytes(const uint8_t *buffer) const;

  // Read eight bytes from BUFFER and return them as an unsigned 64
  // bit number, using this ByteReader's endianness.
  uint64 ReadEightBytes(const uint8_t *buffer) const;

  // Read an unsigned LEB128 (Little Endian Base 128) number from
  // BUFFER and return it as an unsigned 64 bit integer. Set LEN to
  // the number of bytes read.
  //
  // The unsigned LEB128 representation of an integer N is a variable
  // number of bytes:
  //
  // - If N is between 0 and 0x7f, then its unsigned LEB128
  //   representation is a single byte whose value is N.
  //
  // - Otherwise, its unsigned LEB128 representation is (N & 0x7f) |
  //   0x80, followed by the unsigned LEB128 representation of N /
  //   128, rounded towards negative infinity.
  //
  // In other words, we break VALUE into groups of seven bits, put
  // them in little-endian order, and then write them as eight-bit
  // bytes with the high bit on all but the last.
  uint64 ReadUnsignedLEB128(const uint8_t *buffer, size_t *len) const;

  // Read a signed LEB128 number from BUFFER and return it as an
  // signed 64 bit integer. Set LEN to the number of bytes read.
  //
  // The signed LEB128 representation of an integer N is a variable
  // number of bytes:
  //
  // - If N is between -0x40 and 0x3f, then its signed LEB128
  //   representation is a single byte whose value is N in two's
  //   complement.
  //
  // - Otherwise, its signed LEB128 representation is (N & 0x7f) |
  //   0x80, followed by the signed LEB128 representation of N / 128,
  //   rounded towards negative infinity.
  //
  // In other words, we break VALUE into groups of seven bits, put
  // them in little-endian order, and then write them as eight-bit
  // bytes with the high bit on all but the last.
  int64 ReadSignedLEB128(const uint8_t *buffer, size_t *len) const;

  // Indicate that addresses on this architecture are SIZE bytes long. SIZE
  // must be either 4 or 8. (DWARF allows addresses to be any number of
  // bytes in length from 1 to 255, but we only support 32- and 64-bit
  // addresses at the moment.) You must call this before using the
  // ReadAddress member function.
  //
  // For data in a .debug_info section, or something that .debug_info
  // refers to like line number or macro data, the compilation unit
  // header's address_size field indicates the address size to use. Call
  // frame information doesn't indicate its address size (a shortcoming of
  // the spec); you must supply the appropriate size based on the
  // architecture of the target machine.
  void SetAddressSize(uint8 size);

  // Return the current address size, in bytes. This is either 4,
  // indicating 32-bit addresses, or 8, indicating 64-bit addresses.
  uint8 AddressSize() const { return address_size_; }

  // Read an address from BUFFER and return it as an unsigned 64 bit
  // integer, respecting this ByteReader's endianness and address size. You
  // must call SetAddressSize before calling this function.
  uint64 ReadAddress(const uint8_t *buffer) const;

  // DWARF actually defines two slightly different formats: 32-bit DWARF
  // and 64-bit DWARF. This is *not* related to the size of registers or
  // addresses on the target machine; it refers only to the size of section
  // offsets and data lengths appearing in the DWARF data. One only needs
  // 64-bit DWARF when the debugging data itself is larger than 4GiB.
  // 32-bit DWARF can handle x86_64 or PPC64 code just fine, unless the
  // debugging data itself is very large.
  //
  // DWARF information identifies itself as 32-bit or 64-bit DWARF: each
  // compilation unit and call frame information entry begins with an
  // "initial length" field, which, in addition to giving the length of the
  // data, also indicates the size of section offsets and lengths appearing
  // in that data. The ReadInitialLength member function, below, reads an
  // initial length and sets the ByteReader's offset size as a side effect.
  // Thus, in the normal process of reading DWARF data, the appropriate
  // offset size is set automatically. So, you should only need to call
  // SetOffsetSize if you are using the same ByteReader to jump from the
  // midst of one block of DWARF data into another.

  // Read a DWARF "initial length" field from START, and return it as
  // an unsigned 64 bit integer, respecting this ByteReader's
  // endianness. Set *LEN to the length of the initial length in
  // bytes, either four or twelve. As a side effect, set this
  // ByteReader's offset size to either 4 (if we see a 32-bit DWARF
  // initial length) or 8 (if we see a 64-bit DWARF initial length).
  //
  // A DWARF initial length is either:
  //
  // - a byte count stored as an unsigned 32-bit value less than
  //   0xffffff00, indicating that the data whose length is being
  //   measured uses the 32-bit DWARF format, or
  //
  // - The 32-bit value 0xffffffff, followed by a 64-bit byte count,
  //   indicating that the data whose length is being measured uses
  //   the 64-bit DWARF format.
  uint64 ReadInitialLength(const uint8_t *start, size_t *len);

  // Read an offset from BUFFER and return it as an unsigned 64 bit
  // integer, respecting the ByteReader's endianness. In 32-bit DWARF, the
  // offset is 4 bytes long; in 64-bit DWARF, the offset is eight bytes
  // long. You must call ReadInitialLength or SetOffsetSize before calling
  // this function; see the comments above for details.
  uint64 ReadOffset(const uint8_t *buffer) const;

  // Return the current offset size, in bytes.
  // A return value of 4 indicates that we are reading 32-bit DWARF.
  // A return value of 8 indicates that we are reading 64-bit DWARF.
  uint8 OffsetSize() const { return offset_size_; }

  // Indicate that section offsets and lengths are SIZE bytes long. SIZE
  // must be either 4 (meaning 32-bit DWARF) or 8 (meaning 64-bit DWARF).
  // Usually, you should not call this function yourself; instead, let a
  // call to ReadInitialLength establish the data's offset size
  // automatically.
  void SetOffsetSize(uint8 size);

  // The Linux C++ ABI uses a variant of DWARF call frame information
  // for exception handling. This data is included in the program's
  // address space as the ".eh_frame" section, and intepreted at
  // runtime to walk the stack, find exception handlers, and run
  // cleanup code. The format is mostly the same as DWARF CFI, with
  // some adjustments made to provide the additional
  // exception-handling data, and to make the data easier to work with
  // in memory --- for example, to allow it to be placed in read-only
  // memory even when describing position-independent code.
  //
  // In particular, exception handling data can select a number of
  // different encodings for pointers that appear in the data, as
  // described by the DwarfPointerEncoding enum. There are actually
  // four axes(!) to the encoding:
  //
  // - The pointer size: pointers can be 2, 4, or 8 bytes long, or use
  //   the DWARF LEB128 encoding.
  //
  // - The pointer's signedness: pointers can be signed or unsigned.
  //
  // - The pointer's base address: the data stored in the exception
  //   handling data can be the actual address (that is, an absolute
  //   pointer), or relative to one of a number of different base
  //   addreses --- including that of the encoded pointer itself, for
  //   a form of "pc-relative" addressing.
  //
  // - The pointer may be indirect: it may be the address where the
  //   true pointer is stored. (This is used to refer to things via
  //   global offset table entries, program linkage table entries, or
  //   other tricks used in position-independent code.)
  //
  // There are also two options that fall outside that matrix
  // altogether: the pointer may be omitted, or it may have padding to
  // align it on an appropriate address boundary. (That last option
  // may seem like it should be just another axis, but it is not.)

  // Indicate that the exception handling data is loaded starting at
  // SECTION_BASE, and that the start of its buffer in our own memory
  // is BUFFER_BASE. This allows us to find the address that a given
  // byte in our buffer would have when loaded into the program the
  // data describes. We need this to resolve DW_EH_PE_pcrel pointers.
  void SetCFIDataBase(uint64 section_base, const uint8_t *buffer_base);

  // Indicate that the base address of the program's ".text" section
  // is TEXT_BASE. We need this to resolve DW_EH_PE_textrel pointers.
  void SetTextBase(uint64 text_base);

  // Indicate that the base address for DW_EH_PE_datarel pointers is
  // DATA_BASE. The proper value depends on the ABI; it is usually the
  // address of the global offset table, held in a designated register in
  // position-independent code. You will need to look at the startup code
  // for the target system to be sure. I tried; my eyes bled.
  void SetDataBase(uint64 data_base);

  // Indicate that the base address for the FDE we are processing is
  // FUNCTION_BASE. This is the start address of DW_EH_PE_funcrel
  // pointers. (This encoding does not seem to be used by the GNU
  // toolchain.)
  void SetFunctionBase(uint64 function_base);

  // Indicate that we are no longer processing any FDE, so any use of
  // a DW_EH_PE_funcrel encoding is an error.
  void ClearFunctionBase();

  // Return true if ENCODING is a valid pointer encoding.
  bool ValidEncoding(DwarfPointerEncoding encoding) const;

  // Return true if we have all the information we need to read a
  // pointer that uses ENCODING. This checks that the appropriate
  // SetFooBase function for ENCODING has been called.
  bool UsableEncoding(DwarfPointerEncoding encoding) const;

  // Read an encoded pointer from BUFFER using ENCODING; return the
  // absolute address it represents, and set *LEN to the pointer's
  // length in bytes, including any padding for aligned pointers.
  //
  // This function calls 'abort' if ENCODING is invalid or refers to a
  // base address this reader hasn't been given, so you should check
  // with ValidEncoding and UsableEncoding first if you would rather
  // die in a more helpful way.
  uint64 ReadEncodedPointer(const uint8_t *buffer,
                            DwarfPointerEncoding encoding,
                            size_t *len) const;

  Endianness GetEndianness() const;
 private:

  // Function pointer type for our address and offset readers.
  typedef uint64 (ByteReader::*AddressReader)(const uint8_t *) const;

  // Read an offset from BUFFER and return it as an unsigned 64 bit
  // integer.  DWARF2/3 define offsets as either 4 or 8 bytes,
  // generally depending on the amount of DWARF2/3 info present.
  // This function pointer gets set by SetOffsetSize.
  AddressReader offset_reader_;

  // Read an address from BUFFER and return it as an unsigned 64 bit
  // integer.  DWARF2/3 allow addresses to be any size from 0-255
  // bytes currently.  Internally we support 4 and 8 byte addresses,
  // and will CHECK on anything else.
  // This function pointer gets set by SetAddressSize.
  AddressReader address_reader_;

  Endianness endian_;
  uint8 address_size_;
  uint8 offset_size_;

  // Base addresses for Linux C++ exception handling data's encoded pointers.
  bool have_section_base_, have_text_base_, have_data_base_;
  bool have_function_base_;
  uint64 section_base_, text_base_, data_base_, function_base_;
  const uint8_t *buffer_base_;
};

}  // namespace dwarf2reader

#endif  // COMMON_DWARF_BYTEREADER_H__