macho_reader.h 17 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
// -*- mode: C++ -*-

// Copyright (c) 2010, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>

// macho_reader.h: A class for parsing Mach-O files.

#ifndef BREAKPAD_COMMON_MAC_MACHO_READER_H_
#define BREAKPAD_COMMON_MAC_MACHO_READER_H_

#include <mach-o/loader.h>
#include <mach-o/fat.h>
#include <stdint.h>
#include <stdlib.h>
#include <unistd.h>

#include <map>
#include <string>
#include <vector>

#include "common/byte_cursor.h"
#include "common/mac/super_fat_arch.h"

namespace google_breakpad {
namespace mach_o {

using std::map;
using std::string;
using std::vector;

// The Mac headers don't specify particular types for these groups of
// constants, but defining them here provides some documentation
// value.  We also give them the same width as the fields in which
// they appear, which makes them a bit easier to use with ByteCursors.
typedef uint32_t Magic;
typedef uint32_t FileType;
typedef uint32_t FileFlags;
typedef uint32_t LoadCommandType;
typedef uint32_t SegmentFlags;
typedef uint32_t SectionFlags;

// A parser for fat binary files, used to store universal binaries.
// When applied to a (non-fat) Mach-O file, this behaves as if the
// file were a fat file containing a single object file.
class FatReader {
 public:

  // A class for reporting errors found while parsing fat binary files. The
  // default definitions of these methods print messages to stderr.
  class Reporter {
   public:
    // Create a reporter that attributes problems to |filename|.
    explicit Reporter(const string &filename) : filename_(filename) { }

    virtual ~Reporter() { }

    // The data does not begin with a fat binary or Mach-O magic number.
    // This is a fatal error.
    virtual void BadHeader();

    // The Mach-O fat binary file ends abruptly, without enough space
    // to contain an object file it claims is present.
    virtual void MisplacedObjectFile();

    // The file ends abruptly: either it is not large enough to hold a
    // complete header, or the header implies that contents are present
    // beyond the actual end of the file.
    virtual void TooShort();

   private:
    // The filename to which the reader should attribute problems.
    string filename_;
  };

  // Create a fat binary file reader that uses |reporter| to report problems.
  explicit FatReader(Reporter *reporter) : reporter_(reporter) { }

  // Read the |size| bytes at |buffer| as a fat binary file. On success,
  // return true; on failure, report the problem to reporter_ and return
  // false.
  //
  // If the data is a plain Mach-O file, rather than a fat binary file,
  // then the reader behaves as if it had found a fat binary file whose
  // single object file is the Mach-O file.
  bool Read(const uint8_t *buffer, size_t size);

  // Return an array of 'SuperFatArch' structures describing the
  // object files present in this fat binary file. Set |size| to the
  // number of elements in the array.
  //
  // Assuming Read returned true, the entries are validated: it is safe to
  // assume that the offsets and sizes in each SuperFatArch refer to subranges
  // of the bytes passed to Read.
  //
  // If there are no object files in this fat binary, then this
  // function can return NULL.
  //
  // The array is owned by this FatReader instance; it will be freed when
  // this FatReader is destroyed.
  //
  // This function returns a C-style array instead of a vector to make it
  // possible to use the result with OS X functions like NXFindBestFatArch,
  // so that the symbol dumper will behave consistently with other OS X
  // utilities that work with fat binaries.
  const SuperFatArch* object_files(size_t *count) const {
    *count = object_files_.size();
    if (object_files_.size() > 0)
      return &object_files_[0];
    return NULL;
  }

 private:
  // We use this to report problems parsing the file's contents. (WEAK)
  Reporter *reporter_;

  // The contents of the fat binary or Mach-O file we're parsing. We do not
  // own the storage it refers to.
  ByteBuffer buffer_;

  // The magic number of this binary, in host byte order.
  Magic magic_;

  // The list of object files in this binary.
  // object_files_.size() == fat_header.nfat_arch
  vector<SuperFatArch> object_files_;
};

// A segment in a Mach-O file. All these fields have been byte-swapped as
// appropriate for use by the executing architecture.
struct Segment {
  // The ByteBuffers below point into the bytes passed to the Reader that
  // created this Segment.

  ByteBuffer section_list;    // This segment's section list.
  ByteBuffer contents;        // This segment's contents.

  // This segment's name.
  string name;

  // The address at which this segment should be loaded in memory. If
  // bits_64 is false, only the bottom 32 bits of this value are valid.
  uint64_t vmaddr;

  // The size of this segment when loaded into memory. This may be larger
  // than contents.Size(), in which case the extra area will be
  // initialized with zeros. If bits_64 is false, only the bottom 32 bits
  // of this value are valid.
  uint64_t vmsize;

  // The maximum and initial VM protection of this segment's contents.
  uint32_t maxprot;
  uint32_t initprot;

  // The number of sections in section_list.
  uint32_t nsects;

  // Flags describing this segment, from SegmentFlags.
  uint32_t flags;

  // True if this is a 64-bit section; false if it is a 32-bit section.
  bool bits_64;
};

// A section in a Mach-O file. All these fields have been byte-swapped as
// appropriate for use by the executing architecture.
struct Section {
  // This section's contents. This points into the bytes passed to the
  // Reader that created this Section.
  ByteBuffer contents;

  // This section's name.
  string section_name;  // section[_64].sectname
  // The name of the segment this section belongs to.
  string segment_name;  // section[_64].segname

  // The address at which this section's contents should be loaded in
  // memory. If bits_64 is false, only the bottom 32 bits of this value
  // are valid.
  uint64_t address;

  // The contents of this section should be loaded into memory at an
  // address which is a multiple of (two raised to this power).
  uint32_t align;

  // Flags from SectionFlags describing the section's contents.
  uint32_t flags;

  // We don't support reading relocations yet.

  // True if this is a 64-bit section; false if it is a 32-bit section.
  bool bits_64;
};

// A map from section names to Sections.
typedef map<string, Section> SectionMap;

// A reader for a Mach-O file.
//
// This does not handle fat binaries; see FatReader above. FatReader
// provides a friendly interface for parsing data that could be either a
// fat binary or a Mach-O file.
class Reader {
 public:

  // A class for reporting errors found while parsing Mach-O files. The
  // default definitions of these member functions print messages to
  // stderr.
  class Reporter {
   public:
    // Create a reporter that attributes problems to |filename|.
    explicit Reporter(const string &filename) : filename_(filename) { }
    virtual ~Reporter() { }

    // Reporter functions for fatal errors return void; the reader will
    // definitely return an error to its caller after calling them

    // The data does not begin with a Mach-O magic number, or the magic
    // number does not match the expected value for the cpu architecture.
    // This is a fatal error.
    virtual void BadHeader();

    // The data contained in a Mach-O fat binary (|cpu_type|, |cpu_subtype|)
    // does not match the expected CPU architecture
    // (|expected_cpu_type|, |expected_cpu_subtype|).
    virtual void CPUTypeMismatch(cpu_type_t cpu_type,
                                 cpu_subtype_t cpu_subtype,
                                 cpu_type_t expected_cpu_type,
                                 cpu_subtype_t expected_cpu_subtype);

    // The file ends abruptly: either it is not large enough to hold a
    // complete header, or the header implies that contents are present
    // beyond the actual end of the file.
    virtual void HeaderTruncated();

    // The file's load command region, as given in the Mach-O header, is
    // too large for the file.
    virtual void LoadCommandRegionTruncated();

    // The file's Mach-O header claims the file contains |claimed| load
    // commands, but the I'th load command, of type |type|, extends beyond
    // the end of the load command region, as given by the Mach-O header.
    // If |type| is zero, the command's type was unreadable.
    virtual void LoadCommandsOverrun(size_t claimed, size_t i,
                                     LoadCommandType type);

    // The contents of the |i|'th load command, of type |type|, extend beyond
    // the size given in the load command's header.
    virtual void LoadCommandTooShort(size_t i, LoadCommandType type);

    // The LC_SEGMENT or LC_SEGMENT_64 load command for the segment named
    // |name| is too short to hold the sections that its header says it does.
    // (This more specific than LoadCommandTooShort.)
    virtual void SectionsMissing(const string &name);

    // The segment named |name| claims that its contents lie beyond the end
    // of the file.
    virtual void MisplacedSegmentData(const string &name);

    // The section named |section| in the segment named |segment| claims that
    // its contents do not lie entirely within the segment.
    virtual void MisplacedSectionData(const string &section,
                                      const string &segment);

    // The LC_SYMTAB command claims that symbol table contents are located
    // beyond the end of the file.
    virtual void MisplacedSymbolTable();

    // An attempt was made to read a Mach-O file of the unsupported
    // CPU architecture |cpu_type|.
    virtual void UnsupportedCPUType(cpu_type_t cpu_type);

   private:
    string filename_;
  };

  // A handler for sections parsed from a segment. The WalkSegmentSections
  // member function accepts an instance of this class, and applies it to
  // each section defined in a given segment.
  class SectionHandler {
   public:
    virtual ~SectionHandler() { }

    // Called to report that the segment's section list contains |section|.
    // This should return true if the iteration should continue, or false
    // if it should stop.
    virtual bool HandleSection(const Section &section) = 0;
  };

  // A handler for the load commands in a Mach-O file.
  class LoadCommandHandler {
   public:
    LoadCommandHandler() { }
    virtual ~LoadCommandHandler() { }

    // When called from WalkLoadCommands, the following handler functions
    // should return true if they wish to continue iterating over the load
    // command list, or false if they wish to stop iterating.
    //
    // When called from LoadCommandIterator::Handle or Reader::Handle,
    // these functions' return values are simply passed through to Handle's
    // caller.
    //
    // The definitions provided by this base class simply return true; the
    // default is to silently ignore sections whose member functions the
    // subclass doesn't override.

    // COMMAND is load command we don't recognize. We provide only the
    // command type and a ByteBuffer enclosing the command's data (If we
    // cannot parse the command type or its size, we call
    // reporter_->IncompleteLoadCommand instead.)
    virtual bool UnknownCommand(LoadCommandType type,
                                const ByteBuffer &contents) {
      return true;
    }

    // The load command is LC_SEGMENT or LC_SEGMENT_64, defining a segment
    // with the properties given in |segment|.
    virtual bool SegmentCommand(const Segment &segment) {
      return true;
    }

    // The load command is LC_SYMTAB. |entries| holds the array of nlist
    // entries, and |names| holds the strings the entries refer to.
    virtual bool SymtabCommand(const ByteBuffer &entries,
                               const ByteBuffer &names) {
      return true;
    }

    // Add handler functions for more load commands here as needed.
  };

  // Create a Mach-O file reader that reports problems to |reporter|.
  explicit Reader(Reporter *reporter)
      : reporter_(reporter) { }

  // Read the given data as a Mach-O file. The reader retains pointers
  // into the data passed, so the data should live as long as the reader
  // does. On success, return true; on failure, return false.
  //
  // At most one of these functions should be invoked once on each Reader
  // instance.
  bool Read(const uint8_t *buffer,
            size_t size,
            cpu_type_t expected_cpu_type,
            cpu_subtype_t expected_cpu_subtype);
  bool Read(const ByteBuffer &buffer,
            cpu_type_t expected_cpu_type,
            cpu_subtype_t expected_cpu_subtype) {
    return Read(buffer.start,
                buffer.Size(),
                expected_cpu_type,
                expected_cpu_subtype);
  }

  // Return this file's characteristics, as found in the Mach-O header.
  cpu_type_t    cpu_type()    const { return cpu_type_; }
  cpu_subtype_t cpu_subtype() const { return cpu_subtype_; }
  FileType      file_type()   const { return file_type_; }
  FileFlags     flags()       const { return flags_; }

  // Return true if this is a 64-bit Mach-O file, false if it is a 32-bit
  // Mach-O file.
  bool bits_64() const { return bits_64_; }

  // Return true if this is a big-endian Mach-O file, false if it is
  // little-endian.
  bool big_endian() const { return big_endian_; }

  // Apply |handler| to each load command in this Mach-O file, stopping when
  // a handler function returns false. If we encounter a malformed load
  // command, report it via reporter_ and return false. Return true if all
  // load commands were parseable and all handlers returned true.
  bool WalkLoadCommands(LoadCommandHandler *handler) const;

  // Set |segment| to describe the segment named |name|, if present. If
  // found, |segment|'s byte buffers refer to a subregion of the bytes
  // passed to Read. If we find the section, return true; otherwise,
  // return false.
  bool FindSegment(const string &name, Segment *segment) const;

  // Apply |handler| to each section defined in |segment|. If |handler| returns
  // false, stop iterating and return false. If all calls to |handler| return
  // true and we reach the end of the section list, return true.
  bool WalkSegmentSections(const Segment &segment, SectionHandler *handler)
    const;

  // Clear |section_map| and then populate it with a map of the sections
  // in |segment|, from section names to Section structures.
  // Each Section's contents refer to bytes in |segment|'s contents.
  // On success, return true; if a problem occurs, report it and return false.
  bool MapSegmentSections(const Segment &segment, SectionMap *section_map)
    const;

 private:
  // Used internally.
  class SegmentFinder;
  class SectionMapper;

  // We use this to report problems parsing the file's contents. (WEAK)
  Reporter *reporter_;

  // The contents of the Mach-O file we're parsing. We do not own the
  // storage it refers to.
  ByteBuffer buffer_;

  // True if this file is big-endian.
  bool big_endian_;

  // True if this file is a 64-bit Mach-O file.
  bool bits_64_;

  // This file's cpu type and subtype.
  cpu_type_t cpu_type_;        // mach_header[_64].cputype
  cpu_subtype_t cpu_subtype_;  // mach_header[_64].cpusubtype

  // This file's type.
  FileType file_type_;         // mach_header[_64].filetype

  // The region of buffer_ occupied by load commands.
  ByteBuffer load_commands_;

  // The number of load commands in load_commands_.
  uint32_t load_command_count_;  // mach_header[_64].ncmds

  // This file's header flags.
  FileFlags flags_;
};

}  // namespace mach_o
}  // namespace google_breakpad

#endif  // BREAKPAD_COMMON_MAC_MACHO_READER_H_