单元测试

这里我们使用了 vitest 来作为测试框架,可以开箱即用的使用 TypeScript 来编写测试。创建一个单独的 test 文件夹,然后我们把所有的测试都放在里面。首先是针对 Cell 的测试,我们把它放在 test/cell.test.ts 中,检查 Cell 的二进制格式和各种数据访问是不是都符合我们的预期:

test('PointerCell', () => {
  const cell = PointerCell.create(Buffer.from('a'), 2, 4000);
  expect(cell.type).toBe(CellType.Pointer);
  expect(cell.keySize).toBe(1);
  expect(cell.key.toString()).toBe('a');
  expect(cell.childPageId).toBe(2);
  expect(cell.size).toBe(9);
  expect(cell.offset).toBe(4000);
});

test('KeyValueCell', () => {
  const cell = KeyValueCell.create(Buffer.from('b'), serialize(1), 3000);
  expect(cell.type).toBe(CellType.KeyValue);
  expect(cell.keySize).toBe(1);
  expect(cell.key.toString()).toBe('b');
  expect(cell.valueSize).toBe(9);
  expect(deserialize(cell.value)).toBe(1);
  expect(cell.size).toBe(18);
  expect(cell.offset).toBe(3000);
});
test('PointerCell', () => {
  const cell = PointerCell.create(Buffer.from('a'), 2, 4000);
  expect(cell.type).toBe(CellType.Pointer);
  expect(cell.keySize).toBe(1);
  expect(cell.key.toString()).toBe('a');
  expect(cell.childPageId).toBe(2);
  expect(cell.size).toBe(9);
  expect(cell.offset).toBe(4000);
});

test('KeyValueCell', () => {
  const cell = KeyValueCell.create(Buffer.from('b'), serialize(1), 3000);
  expect(cell.type).toBe(CellType.KeyValue);
  expect(cell.keySize).toBe(1);
  expect(cell.key.toString()).toBe('b');
  expect(cell.valueSize).toBe(9);
  expect(deserialize(cell.value)).toBe(1);
  expect(cell.size).toBe(18);
  expect(cell.offset).toBe(3000);
});

package.json 中添加一个 vitest run 的 script,再执行 npm test,就能看到测试结果了。

接下来是 BTreeNode, 再创建一个 test/btreenode.test.ts 文件,由于查询数据需要先有数据存进去了才行,这里我们先实现一个最简单的插入数据的场景,就是在文件是一个刚创建的新的空文件的时候,此时它还没有任何数据,当执行 set 命令时,需要先通过 Pager 分配一个页,然后再直接把数据写入到这个页中。

export class BTree {
  //...
  public saveNodeToFile(node: BTreeNode) {
    this.pager.writePageById(node.id, node.buffer);
  }

  public insert(key: Buffer, value: Buffer) {
    // 没有根节点的话就先创建一个
    if (!this.root) {
      const [id, buf] = this.pager.allocNewPage();
      this.root = new BTreeNode(id, buf);
      this.pager.setRootPage(id, buf);
    }

    // 先找到 key 应该存放的叶子节点
    const node = this.cursor.findLeafNodeByKey(this.root, key);

    // 再插入到其中
    node.insertKeyValueCell(key, value);
    this.saveNodeToFile(node);
  }
}
export class BTree {
  //...
  public saveNodeToFile(node: BTreeNode) {
    this.pager.writePageById(node.id, node.buffer);
  }

  public insert(key: Buffer, value: Buffer) {
    // 没有根节点的话就先创建一个
    if (!this.root) {
      const [id, buf] = this.pager.allocNewPage();
      this.root = new BTreeNode(id, buf);
      this.pager.setRootPage(id, buf);
    }

    // 先找到 key 应该存放的叶子节点
    const node = this.cursor.findLeafNodeByKey(this.root, key);

    // 再插入到其中
    node.insertKeyValueCell(key, value);
    this.saveNodeToFile(node);
  }
}

来看看 Pager 的 allocNewPage 方法:

// 增加涉及到节点的一些静态属性和方法
export class BTreeNode {
  public static readonly HEADER_SIZE = 8;
  public static readonly DEFAULT_FREE_START = 8;
  public static readonly DEFAULT_CELL_START = PAGE_SIZE;
  public static readonly CELL_AREA_END = PAGE_SIZE;
  public static readonly CELL_POINTER_SIZE = 2;

  // 创建一个合法的节点头部的 buffer
  public static createEmptyHeader(): Buffer {
    const buf = Buffer.alloc(BTreeNode.HEADER_SIZE);
    buf.writeInt8(PageType.EMPTY, 0); // page_type
    buf.writeInt16BE(BTreeNode.DEFAULT_FREE_START, 1); // free_start
    buf.writeInt16BE(BTreeNode.DEFAULT_CELL_START, 3); // cell_area_start
    return buf;
  }

  // ...
}

export class Pager {
  // ...

  // 分配一个新的页,并返回这个页的 id 和 buffer
  // 注意:这个方法会更新文件头部的最大页的 id 并把新分配的页写入到文件中
  public allocNewPage(): [number, Buffer] {
    if (!this.header) {
      throw new Error('file header not initialized');
    }
    const buf = Buffer.alloc(PAGE_SIZE);
    const header = BTreeNode.createEmptyHeader();
    header.copy(buf);
    const id = this.header.maxPageId + 1;
    this.saveHeaderToFile();
    this.writePageById(id, buf);
    return [id, buf];
  }

  // 设置根节点时也需要把根节点的 id 写入到文件的头部中
  public setRootPage(id: number, buf: Buffer) {
    this.header!.rootPageId = id;
    this.saveHeaderToFile();
  }
}
// 增加涉及到节点的一些静态属性和方法
export class BTreeNode {
  public static readonly HEADER_SIZE = 8;
  public static readonly DEFAULT_FREE_START = 8;
  public static readonly DEFAULT_CELL_START = PAGE_SIZE;
  public static readonly CELL_AREA_END = PAGE_SIZE;
  public static readonly CELL_POINTER_SIZE = 2;

  // 创建一个合法的节点头部的 buffer
  public static createEmptyHeader(): Buffer {
    const buf = Buffer.alloc(BTreeNode.HEADER_SIZE);
    buf.writeInt8(PageType.EMPTY, 0); // page_type
    buf.writeInt16BE(BTreeNode.DEFAULT_FREE_START, 1); // free_start
    buf.writeInt16BE(BTreeNode.DEFAULT_CELL_START, 3); // cell_area_start
    return buf;
  }

  // ...
}

export class Pager {
  // ...

  // 分配一个新的页,并返回这个页的 id 和 buffer
  // 注意:这个方法会更新文件头部的最大页的 id 并把新分配的页写入到文件中
  public allocNewPage(): [number, Buffer] {
    if (!this.header) {
      throw new Error('file header not initialized');
    }
    const buf = Buffer.alloc(PAGE_SIZE);
    const header = BTreeNode.createEmptyHeader();
    header.copy(buf);
    const id = this.header.maxPageId + 1;
    this.saveHeaderToFile();
    this.writePageById(id, buf);
    return [id, buf];
  }

  // 设置根节点时也需要把根节点的 id 写入到文件的头部中
  public setRootPage(id: number, buf: Buffer) {
    this.header!.rootPageId = id;
    this.saveHeaderToFile();
  }
}

在创建了一个根节点之后,会通过 findLeafNodeByKey 找到这个根节点,然后我们调用 insertKeyValueCell 方法来插入数据。这里的插入就不需要考虑放不放得下了,直接塞进去就好了:

export class BTreeNode {
  // 修改节点的类型,计算出需要插入的 cell 的大小,然后调用一个 `insertCell` 方法
  public insertKeyValueCell(key: Buffer, value: Buffer) {
    if (this.isEmptyNode()) {
      this.pageType = PageType.LEAF;
    }
    const size = KeyValueCell.calcSize(key.length, value.length);
    const offset = this.cellAreaStart - size;
    const cell = KeyValueCell.create(key, value, offset);
    this.insertCell(cell);
  }

  // 这里主要是为了复用,对于 `KeyValueCell` 和 `PointerCell` 都会调用这个方法
  private insertCell(cell: KeyValueCell | PointerCell) {
    const currentCellOffsets = this.cellOffsets;
    const offset = cell.offset;

    // 首先二分查找到第一个比 key 大的的值,也就是我们打算插入的位置
    const i = findIndexOfFirstGreatorElement(
      currentCellOffsets,
      cell.key,
      (a, b) => Buffer.compare(this.readCellByPointer(a)!.key, b)
    );

    // 然后分别检查一下几种情况
    // 记得同时需要更新 cell 的指针,使他们按 key 的大小保持有序
    // 这里的指针即 cell 的 offset
    if (i === -1) {
      const c = this.readCellByIndex(-1);
      if (c && BTreeNode.isEqualKey(c.key, cell.key)) {
        currentCellOffsets.pop();
      }
      currentCellOffsets.push(offset);
    } else if (i === 0) {
      currentCellOffsets.unshift(offset);
    } else {
      const c = this.readCellByIndex(i - 1);
      if (c && BTreeNode.isEqualKey(c.key, cell.key)) {
        // replace it if it was equal
        currentCellOffsets.splice(i - 1, 1, offset);
      } else {
        // otherwise put it after i - 1 position
        currentCellOffsets.splice(i, 0, offset);
      }
    }

    // cell 本身则按照插入顺序从后往前写入
    cell.buffer.copy(this.buffer, this.cellAreaStart - cell.size);

    // 然后更新节点中相关的元数据
    this.cellOffsets = currentCellOffsets;
    this.freeStart = this.freeStart + BTreeNode.CELL_POINTER_SIZE;
    this.cellAreaStart = offset;
  }
}
export class BTreeNode {
  // 修改节点的类型,计算出需要插入的 cell 的大小,然后调用一个 `insertCell` 方法
  public insertKeyValueCell(key: Buffer, value: Buffer) {
    if (this.isEmptyNode()) {
      this.pageType = PageType.LEAF;
    }
    const size = KeyValueCell.calcSize(key.length, value.length);
    const offset = this.cellAreaStart - size;
    const cell = KeyValueCell.create(key, value, offset);
    this.insertCell(cell);
  }

  // 这里主要是为了复用,对于 `KeyValueCell` 和 `PointerCell` 都会调用这个方法
  private insertCell(cell: KeyValueCell | PointerCell) {
    const currentCellOffsets = this.cellOffsets;
    const offset = cell.offset;

    // 首先二分查找到第一个比 key 大的的值,也就是我们打算插入的位置
    const i = findIndexOfFirstGreatorElement(
      currentCellOffsets,
      cell.key,
      (a, b) => Buffer.compare(this.readCellByPointer(a)!.key, b)
    );

    // 然后分别检查一下几种情况
    // 记得同时需要更新 cell 的指针,使他们按 key 的大小保持有序
    // 这里的指针即 cell 的 offset
    if (i === -1) {
      const c = this.readCellByIndex(-1);
      if (c && BTreeNode.isEqualKey(c.key, cell.key)) {
        currentCellOffsets.pop();
      }
      currentCellOffsets.push(offset);
    } else if (i === 0) {
      currentCellOffsets.unshift(offset);
    } else {
      const c = this.readCellByIndex(i - 1);
      if (c && BTreeNode.isEqualKey(c.key, cell.key)) {
        // replace it if it was equal
        currentCellOffsets.splice(i - 1, 1, offset);
      } else {
        // otherwise put it after i - 1 position
        currentCellOffsets.splice(i, 0, offset);
      }
    }

    // cell 本身则按照插入顺序从后往前写入
    cell.buffer.copy(this.buffer, this.cellAreaStart - cell.size);

    // 然后更新节点中相关的元数据
    this.cellOffsets = currentCellOffsets;
    this.freeStart = this.freeStart + BTreeNode.CELL_POINTER_SIZE;
    this.cellAreaStart = offset;
  }
}

我们可以通过编写测试来验证一下结果,创建一个 test/btree.test.ts

describe('BTree', () => {
  let file = './test/fixture/btree.db';
  let fd: number;
  let pager: Pager;
  let cursor: Cursor;
  let btree: BTree;

  beforeEach(() => {
    fd = fs.openSync(file, 'w+');
    pager = new Pager(fd);
    pager.verifyFileHeader();

    cursor = new Cursor(pager);
    btree = new BTree(pager, cursor);
  });

  afterEach(() => {
    fs.closeSync(fd);
    if (fs.existsSync(file)) {
      fs.unlinkSync(file);
    }
  });

  test('insert item', () => {
    btree.insert(Buffer.from('c'), serialize(3));
    btree.insert(Buffer.from('d'), serialize(4));
    btree.insert(Buffer.from('a'), serialize(1));
    btree.insert(Buffer.from('b'), serialize(2));

    expect(btree.root).toBeTruthy();
    expect(btree.root?.id).toBe(1);

    expect(btree.find(Buffer.from('a'))).toBeTruthy();
    expect(deserialize(btree.find(Buffer.from('a'))!)).toBe(1);
    expect(btree.find(Buffer.from('b'))).toBeTruthy();
    expect(deserialize(btree.find(Buffer.from('b'))!)).toBe(2);
    expect(btree.find(Buffer.from('c'))).toBeTruthy();
    expect(deserialize(btree.find(Buffer.from('c'))!)).toBe(3);
    expect(btree.find(Buffer.from('d'))).toBeTruthy();
    expect(deserialize(btree.find(Buffer.from('d'))!)).toBe(4);
  });
});
describe('BTree', () => {
  let file = './test/fixture/btree.db';
  let fd: number;
  let pager: Pager;
  let cursor: Cursor;
  let btree: BTree;

  beforeEach(() => {
    fd = fs.openSync(file, 'w+');
    pager = new Pager(fd);
    pager.verifyFileHeader();

    cursor = new Cursor(pager);
    btree = new BTree(pager, cursor);
  });

  afterEach(() => {
    fs.closeSync(fd);
    if (fs.existsSync(file)) {
      fs.unlinkSync(file);
    }
  });

  test('insert item', () => {
    btree.insert(Buffer.from('c'), serialize(3));
    btree.insert(Buffer.from('d'), serialize(4));
    btree.insert(Buffer.from('a'), serialize(1));
    btree.insert(Buffer.from('b'), serialize(2));

    expect(btree.root).toBeTruthy();
    expect(btree.root?.id).toBe(1);

    expect(btree.find(Buffer.from('a'))).toBeTruthy();
    expect(deserialize(btree.find(Buffer.from('a'))!)).toBe(1);
    expect(btree.find(Buffer.from('b'))).toBeTruthy();
    expect(deserialize(btree.find(Buffer.from('b'))!)).toBe(2);
    expect(btree.find(Buffer.from('c'))).toBeTruthy();
    expect(deserialize(btree.find(Buffer.from('c'))!)).toBe(3);
    expect(btree.find(Buffer.from('d'))).toBeTruthy();
    expect(deserialize(btree.find(Buffer.from('d'))!)).toBe(4);
  });
});

Voilà,如果没有意外的话,针对只有一个根节点的插入和查找就这样完成了!