byte_cursor.h 9.35 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
// -*- mode: c++ -*-

// Copyright (c) 2010, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// Original author: Jim Blandy <jimb@mozilla.com> <jimb@red-bean.com>

// byte_cursor.h: Classes for parsing values from a buffer of bytes.
// The ByteCursor class provides a convenient interface for reading
// fixed-size integers of arbitrary endianness, being thorough about
// checking for buffer overruns.

#ifndef COMMON_BYTE_CURSOR_H_
#define COMMON_BYTE_CURSOR_H_

#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <string>

#include "common/using_std_string.h"

namespace google_breakpad {

// A buffer holding a series of bytes.
struct ByteBuffer {
  ByteBuffer() : start(0), end(0) { }
  ByteBuffer(const uint8_t *set_start, size_t set_size)
      : start(set_start), end(set_start + set_size) { }
  ~ByteBuffer() { };

  // Equality operators. Useful in unit tests, and when we're using
  // ByteBuffers to refer to regions of a larger buffer.
  bool operator==(const ByteBuffer &that) const {
    return start == that.start && end == that.end;
  }
  bool operator!=(const ByteBuffer &that) const {
    return start != that.start || end != that.end;
  }

  // Not C++ style guide compliant, but this definitely belongs here.
  size_t Size() const {
    assert(start <= end);
    return end - start;
  }

  const uint8_t *start, *end;
};

// A cursor pointing into a ByteBuffer that can parse numbers of various
// widths and representations, strings, and data blocks, advancing through
// the buffer as it goes. All ByteCursor operations check that accesses
// haven't gone beyond the end of the enclosing ByteBuffer.
class ByteCursor {
 public:
  // Create a cursor reading bytes from the start of BUFFER. By default, the
  // cursor reads multi-byte values in little-endian form.
  ByteCursor(const ByteBuffer *buffer, bool big_endian = false)
      : buffer_(buffer), here_(buffer->start), 
        big_endian_(big_endian), complete_(true) { }

  // Accessor and setter for this cursor's endianness flag.
  bool big_endian() const { return big_endian_; }
  void set_big_endian(bool big_endian) { big_endian_ = big_endian; }

  // Accessor and setter for this cursor's current position. The setter
  // returns a reference to this cursor.
  const uint8_t *here() const { return here_; }
  ByteCursor &set_here(const uint8_t *here) {
    assert(buffer_->start <= here && here <= buffer_->end);
    here_ = here;
    return *this;
  }

  // Return the number of bytes available to read at the cursor.
  size_t Available() const { return size_t(buffer_->end - here_); }

  // Return true if this cursor is at the end of its buffer.
  bool AtEnd() const { return Available() == 0; }

  // When used as a boolean value this cursor converts to true if all
  // prior reads have been completed, or false if we ran off the end
  // of the buffer.
  operator bool() const { return complete_; }

  // Read a SIZE-byte integer at this cursor, signed if IS_SIGNED is true,
  // unsigned otherwise, using the cursor's established endianness, and set
  // *RESULT to the number. If we read off the end of our buffer, clear
  // this cursor's complete_ flag, and store a dummy value in *RESULT.
  // Return a reference to this cursor.
  template<typename T>
  ByteCursor &Read(size_t size, bool is_signed, T *result) {
    if (CheckAvailable(size)) {
      T v = 0;
      if (big_endian_) {
        for (size_t i = 0; i < size; i++)
          v = (v << 8) + here_[i];
      } else {
        // This loop condition looks weird, but size_t is unsigned, so
        // decrementing i after it is zero yields the largest size_t value.
        for (size_t i = size - 1; i < size; i--)
          v = (v << 8) + here_[i];
      }
      if (is_signed && size < sizeof(T)) {
        size_t sign_bit = (T)1 << (size * 8 - 1);
        v = (v ^ sign_bit) - sign_bit;
      }
      here_ += size;
      *result = v;
    } else {
      *result = (T) 0xdeadbeef;
    }
    return *this;
  }

  // Read an integer, using the cursor's established endianness and
  // *RESULT's size and signedness, and set *RESULT to the number. If we
  // read off the end of our buffer, clear this cursor's complete_ flag.
  // Return a reference to this cursor.
  template<typename T>
  ByteCursor &operator>>(T &result) {
    bool T_is_signed = (T)-1 < 0;
    return Read(sizeof(T), T_is_signed, &result); 
  }

  // Copy the SIZE bytes at the cursor to BUFFER, and advance this
  // cursor to the end of them. If we read off the end of our buffer,
  // clear this cursor's complete_ flag, and set *POINTER to NULL.
  // Return a reference to this cursor.
  ByteCursor &Read(uint8_t *buffer, size_t size) {
    if (CheckAvailable(size)) {
      memcpy(buffer, here_, size);
      here_ += size;
    }
    return *this;
  }

  // Set STR to a copy of the '\0'-terminated string at the cursor. If the
  // byte buffer does not contain a terminating zero, clear this cursor's
  // complete_ flag, and set STR to the empty string. Return a reference to
  // this cursor.
  ByteCursor &CString(string *str) {
    const uint8_t *end
      = static_cast<const uint8_t *>(memchr(here_, '\0', Available()));
    if (end) {
      str->assign(reinterpret_cast<const char *>(here_), end - here_);
      here_ = end + 1;
    } else {
      str->clear();
      here_ = buffer_->end;
      complete_ = false;
    }
    return *this;
  }

  // Like CString(STR), but extract the string from a fixed-width buffer
  // LIMIT bytes long, which may or may not contain a terminating '\0'
  // byte. Specifically:
  //
  // - If there are not LIMIT bytes available at the cursor, clear the
  //   cursor's complete_ flag and set STR to the empty string.
  //
  // - Otherwise, if the LIMIT bytes at the cursor contain any '\0'
  //   characters, set *STR to a copy of the bytes before the first '\0',
  //   and advance the cursor by LIMIT bytes.
  //   
  // - Otherwise, set *STR to a copy of those LIMIT bytes, and advance the
  //   cursor by LIMIT bytes.
  ByteCursor &CString(string *str, size_t limit) {
    if (CheckAvailable(limit)) {
      const uint8_t *end
        = static_cast<const uint8_t *>(memchr(here_, '\0', limit));
      if (end)
        str->assign(reinterpret_cast<const char *>(here_), end - here_);
      else
        str->assign(reinterpret_cast<const char *>(here_), limit);
      here_ += limit;
    } else {
      str->clear();
    }
    return *this;
  }

  // Set *POINTER to point to the SIZE bytes at the cursor, and advance
  // this cursor to the end of them. If SIZE is omitted, don't move the
  // cursor. If we read off the end of our buffer, clear this cursor's
  // complete_ flag, and set *POINTER to NULL. Return a reference to this
  // cursor.
  ByteCursor &PointTo(const uint8_t **pointer, size_t size = 0) {
    if (CheckAvailable(size)) {
      *pointer = here_;
      here_ += size;
    } else {
      *pointer = NULL;
    }
    return *this;
  }

  // Skip SIZE bytes at the cursor. If doing so would advance us off
  // the end of our buffer, clear this cursor's complete_ flag, and
  // set *POINTER to NULL. Return a reference to this cursor.
  ByteCursor &Skip(size_t size) {
    if (CheckAvailable(size))
      here_ += size;
    return *this;
  }

 private:
  // If there are at least SIZE bytes available to read from the buffer,
  // return true. Otherwise, set here_ to the end of the buffer, set
  // complete_ to false, and return false.
  bool CheckAvailable(size_t size) {
    if (Available() >= size) {
      return true;
    } else {
      here_ = buffer_->end;
      complete_ = false;
      return false;
    }
  }

  // The buffer we're reading bytes from.
  const ByteBuffer *buffer_;

  // The next byte within buffer_ that we'll read.
  const uint8_t *here_;

  // True if we should read numbers in big-endian form; false if we
  // should read in little-endian form.
  bool big_endian_;

  // True if we've been able to read all we've been asked to.
  bool complete_;
};

}  // namespace google_breakpad

#endif  // COMMON_BYTE_CURSOR_H_