0%

encode和decode

Posted on 2021-12-18 Edited on 2024-09-09 In storage

struct object_t {
  std::string name;

  ......

  void encode(ceph::buffer::list &bl) const {
    using ceph::encode;
    encode(name, bl);
  }
  void decode(ceph::buffer::list::const_iterator &bl) {
    using ceph::decode;
    decode(name, bl);
  }
};
WRITE_CLASS_ENCODER(object_t)

*** From: src/include/object.h ***

对于Ceph中的每一种需要存储的资源在进行存储前都要进行encode操作，然后再将其写入硬盘。对于读取同样，在从硬盘获取到数据后需要进行decode操作。而每种需要存储资源如何encode和decode当然要由资源自己来决定。所以在资源的class或struct中要实现encode和decode方法。

WRITE_CLASS_ENCODER(object_t)干了些啥呢。。。

// see denc.h for ENCODE_DUMP_PATH discussion and definition.
#ifdef ENCODE_DUMP_PATH
# define ENCODE_DUMP_PRE()                      \
  unsigned pre_off = bl.length()
# define ENCODE_DUMP_POST(cl)                                           \
  do {                                                                  \
    static int i = 0;                                                   \
    i++;                                                                \
    int bits = 0;                                                       \
    for (unsigned t = i; t; bits++)                                     \
      t &= t - 1;                                                       \
    if (bits > 2)                                                       \
      break;                                                            \
    char fn[PATH_MAX];                                                  \
    snprintf(fn, sizeof(fn), ENCODE_STRINGIFY(ENCODE_DUMP_PATH) "/%s__%d.%x", #cl, getpid(), i++); \
    int fd = ::open(fn, O_WRONLY|O_TRUNC|O_CREAT|O_CLOEXEC|O_BINARY, 0644);             \
    if (fd >= 0) {                                                      \
      ::ceph::bufferlist sub;                                           \
      sub.substr_of(bl, pre_off, bl.length() - pre_off);                \
      sub.write_fd(fd);                                                 \
      ::close(fd);                                                      \
    }                                                                   \
  } while (0)
#else
# define ENCODE_DUMP_PRE()
# define ENCODE_DUMP_POST(cl)
#endif


#define WRITE_CLASS_ENCODER(cl)                                         \
  inline void encode(const cl& c, ::ceph::buffer::list &bl, uint64_t features=0) { \
    ENCODE_DUMP_PRE(); c.encode(bl); ENCODE_DUMP_POST(cl); }            \
  inline void decode(cl &c, ::ceph::bufferlist::const_iterator &p) { c.decode(p); }

*** From: src/include/encoding.h ***

看了上面的代码应该能了解到WRITE_CLASS_ENCODER(object_t)是对encode和decode函数的重载。这是入口，然后再调用其资源自身encode或decode方法。

那么对于一些基础类型（如：int、string等）是如果encode和decode的呢？

int类型

// int types

#define WRITE_INTTYPE_ENCODER(type, etype)                              \
  inline void encode(type v, ::ceph::bufferlist& bl, uint64_t features=0) { \
    ceph_##etype e;                                                     \
    e = v;                                                              \
    ::ceph::encode_raw(e, bl);                                          \
  }                                                                     \
  inline void decode(type &v, ::ceph::bufferlist::const_iterator& p) {  \
    ceph_##etype e;                                                     \
    ::ceph::decode_raw(e, p);                                           \
    v = e;                                                              \
  }

WRITE_INTTYPE_ENCODER(uint64_t, le64)
WRITE_INTTYPE_ENCODER(int64_t, le64)
WRITE_INTTYPE_ENCODER(uint32_t, le32)
WRITE_INTTYPE_ENCODER(int32_t, le32)
WRITE_INTTYPE_ENCODER(uint16_t, le16)
WRITE_INTTYPE_ENCODER(int16_t, le16)

*** From: src/include/encoding.h ***

int类型的encode和decode又调用了encode_raw和decode_raw。真是一层套一层啊～（俄罗斯套娃嘛）～

// base types

template<class T>
inline void encode_raw(const T& t, bufferlist& bl)
{
  bl.append((char*)&t, sizeof(t));
}
template<class T>
inline void decode_raw(T& t, bufferlist::const_iterator &p)
{
  p.copy(sizeof(t), (char*)&t);
}

#define WRITE_RAW_ENCODER(type)                                         \
  inline void encode(const type &v, ::ceph::bufferlist& bl, uint64_t features=0) { ::ceph::encode_raw(v, bl); } \
  inline void decode(type &v, ::ceph::bufferlist::const_iterator& p) { ::ceph::decode_raw(v, p); }

WRITE_RAW_ENCODER(__u8)
#ifndef _CHAR_IS_SIGNED
WRITE_RAW_ENCODER(__s8)
#endif
WRITE_RAW_ENCODER(char)
WRITE_RAW_ENCODER(ceph_le64)
WRITE_RAW_ENCODER(ceph_le32)
WRITE_RAW_ENCODER(ceph_le16)

*** From: src/include/encoding.h ***

base比较简单，就是无论int几个字节，都是从低到高一个字节一个字节的写下去，再一个字节一个字节的读出来。。。

float类型

#define WRITE_FLTTYPE_ENCODER(type, itype, etype)                       \
  static_assert(sizeof(type) == sizeof(itype));                         \
  static_assert(std::numeric_limits<type>::is_iec559,                   \
              "floating-point type not using IEEE754 format");          \
  inline void encode(type v, ::ceph::bufferlist& bl, uint64_t features=0) { \
    ceph_##etype e;                                                     \
    e = *reinterpret_cast<itype *>(&v);                                 \
    ::ceph::encode_raw(e, bl);                                          \
  }                                                                     \
  inline void decode(type &v, ::ceph::bufferlist::const_iterator& p) {  \
    ceph_##etype e;                                                     \
    ::ceph::decode_raw(e, p);                                           \
    *reinterpret_cast<itype *>(&v) = e;                                 \
  }

WRITE_FLTTYPE_ENCODER(float, uint32_t, le32)
WRITE_FLTTYPE_ENCODER(double, uint64_t, le64)

*** From: src/include/encoding.h ***

float类型关键在于reinterpret_cast将一个浮点数转换为整数。更多关于reinterpret_cast的内容

string

// string
inline void encode(std::string_view s, bufferlist& bl, uint64_t features=0)
{
  __u32 len = s.length();
  encode(len, bl);
  if (len)
    bl.append(s.data(), len);
}
inline void encode(const std::string& s, bufferlist& bl, uint64_t features=0)
{
  return encode(std::string_view(s), bl, features);
}
inline void decode(std::string& s, bufferlist::const_iterator& p)
{
  __u32 len;
  decode(len, p);
  s.clear();
  p.copy(len, s);
}

inline void encode_nohead(std::string_view s, bufferlist& bl)
{
  bl.append(s.data(), s.length());
}
inline void encode_nohead(const std::string& s, bufferlist& bl)
{
  encode_nohead(std::string_view(s), bl);
}
inline void decode_nohead(int len, std::string& s, bufferlist::const_iterator& p)
{
  s.clear();
  p.copy(len, s);
}

// const char* (encode only, string compatible)
inline void encode(const char *s, bufferlist& bl)
{
  encode(std::string_view(s, strlen(s)), bl);
}

*** From: src/include/encoding.h ***

string的encode和decode分两种，一种是有“害的”(head)，一种是无“害的”。有“害的”需要先记录string的长度，再记录string的内容；无“害的”直接记录内容，单再decode过程中需要制定长度。总之这个长度总要有个人来记。好鸡肋！

整个的encode和decode的过程用到了一个bufferlist类型，那么这个bufferlist又是个什么结构呢，详细请见ceph中的buffer