dwarf_line_to_module.h 7.76 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
// -*- mode: c++ -*-

// Copyright (c) 2010 Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>

// The DwarfLineToModule class accepts line number information from a
// DWARF parser and adds it to a google_breakpad::Module. The Module
// can write that data out as a Breakpad symbol file.

#ifndef COMMON_LINUX_DWARF_LINE_TO_MODULE_H
#define COMMON_LINUX_DWARF_LINE_TO_MODULE_H

#include <string>

#include "common/module.h"
#include "common/dwarf/dwarf2reader.h"
#include "common/using_std_string.h"

namespace google_breakpad {

// A class for producing a vector of google_breakpad::Module::Line
// instances from parsed DWARF line number data.  
//
// An instance of this class can be provided as a handler to a
// dwarf2reader::LineInfo DWARF line number information parser. The
// handler accepts source location information from the parser and
// uses it to produce a vector of google_breakpad::Module::Line
// objects, referring to google_breakpad::Module::File objects added
// to a particular google_breakpad::Module.
//
// GNU toolchain omitted sections support:
// ======================================
//
// Given the right options, the GNU toolchain will omit unreferenced
// functions from the final executable. Unfortunately, when it does so, it
// does not remove the associated portions of the DWARF line number
// program; instead, it gives the DW_LNE_set_address instructions referring
// to the now-deleted code addresses of zero. Given this input, the DWARF
// line parser will call AddLine with a series of lines starting at address
// zero. For example, here is the output from 'readelf -wl' for a program
// with four functions, the first three of which have been omitted:
//
//   Line Number Statements:
//    Extended opcode 2: set Address to 0x0
//    Advance Line by 14 to 15
//    Copy
//    Special opcode 48: advance Address by 3 to 0x3 and Line by 1 to 16
//    Special opcode 119: advance Address by 8 to 0xb and Line by 2 to 18
//    Advance PC by 2 to 0xd
//    Extended opcode 1: End of Sequence
// 
//    Extended opcode 2: set Address to 0x0
//    Advance Line by 14 to 15
//    Copy
//    Special opcode 48: advance Address by 3 to 0x3 and Line by 1 to 16
//    Special opcode 119: advance Address by 8 to 0xb and Line by 2 to 18
//    Advance PC by 2 to 0xd
//    Extended opcode 1: End of Sequence
// 
//    Extended opcode 2: set Address to 0x0
//    Advance Line by 19 to 20
//    Copy
//    Special opcode 48: advance Address by 3 to 0x3 and Line by 1 to 21
//    Special opcode 76: advance Address by 5 to 0x8 and Line by 1 to 22
//    Advance PC by 2 to 0xa
//    Extended opcode 1: End of Sequence
// 
//    Extended opcode 2: set Address to 0x80483a4
//    Advance Line by 23 to 24
//    Copy
//    Special opcode 202: advance Address by 14 to 0x80483b2 and Line by 1 to 25
//    Special opcode 76: advance Address by 5 to 0x80483b7 and Line by 1 to 26
//    Advance PC by 6 to 0x80483bd
//    Extended opcode 1: End of Sequence
//
// Instead of collecting runs of lines describing code that is not there,
// we try to recognize and drop them. Since the linker doesn't explicitly
// distinguish references to dropped sections from genuine references to
// code at address zero, we must use a heuristic. We have chosen:
//
// - If a line starts at address zero, omit it. (On the platforms
//   breakpad targets, it is extremely unlikely that there will be code
//   at address zero.)
//
// - If a line starts immediately after an omitted line, omit it too.
class DwarfLineToModule: public dwarf2reader::LineInfoHandler {
 public:
  // As the DWARF line info parser passes us line records, add source
  // files to MODULE, and add all lines to the end of LINES. LINES
  // need not be empty. If the parser hands us a zero-length line, we
  // omit it. If the parser hands us a line that extends beyond the
  // end of the address space, we clip it. It's up to our client to
  // sort out which lines belong to which functions; we don't add them
  // to any particular function in MODULE ourselves.
  DwarfLineToModule(Module *module, const string& compilation_dir,
                    vector<Module::Line> *lines)
      : module_(module),
        compilation_dir_(compilation_dir),
        lines_(lines),
        highest_file_number_(-1),
        omitted_line_end_(0),
        warned_bad_file_number_(false),
        warned_bad_directory_number_(false) { }
  
  ~DwarfLineToModule() { }

  void DefineDir(const string &name, uint32 dir_num);
  void DefineFile(const string &name, int32 file_num,
                  uint32 dir_num, uint64 mod_time,
                  uint64 length);
  void AddLine(uint64 address, uint64 length,
               uint32 file_num, uint32 line_num, uint32 column_num);

 private:

  typedef std::map<uint32, string> DirectoryTable;
  typedef std::map<uint32, Module::File *> FileTable;

  // The module we're contributing debugging info to. Owned by our
  // client.
  Module *module_;

  // The compilation directory for the current compilation unit whose
  // lines are being accumulated.
  string compilation_dir_;

  // The vector of lines we're accumulating. Owned by our client.
  //
  // In a Module, as in a breakpad symbol file, lines belong to
  // specific functions, but DWARF simply assigns lines to addresses;
  // one must infer the line/function relationship using the
  // functions' beginning and ending addresses. So we can't add these
  // to the appropriate function from module_ until we've read the
  // function info as well. Instead, we accumulate lines here, and let
  // whoever constructed this sort it all out.
  vector<Module::Line> *lines_;

  // A table mapping directory numbers to paths.
  DirectoryTable directories_;

  // A table mapping file numbers to Module::File pointers.
  FileTable files_;

  // The highest file number we've seen so far, or -1 if we've seen
  // none.  Used for dynamically defined file numbers.
  int32 highest_file_number_;
  
  // This is the ending address of the last line we omitted, or zero if we
  // didn't omit the previous line. It is zero before we have received any
  // AddLine calls.
  uint64 omitted_line_end_;

  // True if we've warned about:
  bool warned_bad_file_number_; // bad file numbers
  bool warned_bad_directory_number_; // bad directory numbers
};

} // namespace google_breakpad

#endif // COMMON_LINUX_DWARF_LINE_TO_MODULE_H