Skip to content

Commit 5573ad2

Browse files
committed
merge hash index and overflow file into data.kz
1 parent 2a590a1 commit 5573ad2

34 files changed

+288
-451
lines changed

src/include/storage/db_file_id.h

Lines changed: 0 additions & 28 deletions
This file was deleted.

src/include/storage/index/hash_index.h

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ class OnDiskHashIndex {
6767
template<typename T>
6868
class HashIndex final : public OnDiskHashIndex {
6969
public:
70-
HashIndex(MemoryManager& memoryManager, DBFileIDAndName dbFileIDAndName, FileHandle* fileHandle,
70+
HashIndex(MemoryManager& memoryManager, FileHandle* fileHandle,
7171
OverflowFileHandle* overflowFileHandle, DiskArrayCollection& diskArrays, uint64_t indexPos,
7272
ShadowFile* shadowFile, const HashIndexHeader& indexHeaderForReadTrx,
7373
HashIndexHeader& indexHeaderForWriteTrx);
@@ -278,7 +278,6 @@ class HashIndex final : public OnDiskHashIndex {
278278
const transaction::Transaction* transaction, slot_id_t pSlotId);
279279

280280
private:
281-
DBFileIDAndName dbFileIDAndName;
282281
ShadowFile* shadowFile;
283282
uint64_t headerPageIdx;
284283
FileHandle* fileHandle;
@@ -307,9 +306,9 @@ inline bool HashIndex<common::ku_string_t>::equals(const transaction::Transactio
307306

308307
class PrimaryKeyIndex {
309308
public:
310-
PrimaryKeyIndex(const DBFileIDAndName& dbFileIDAndName, bool readOnly, bool inMemMode,
311-
common::PhysicalTypeID keyDataType, MemoryManager& memoryManager, ShadowFile* shadowFile,
312-
common::VirtualFileSystem* vfs, main::ClientContext* context);
309+
PrimaryKeyIndex(FileHandle* dataFH, bool inMemMode, common::PhysicalTypeID keyDataType,
310+
MemoryManager& memoryManager, ShadowFile* shadowFile, common::page_idx_t firstHeaderPage,
311+
common::page_idx_t overflowHeaderPage);
313312

314313
~PrimaryKeyIndex();
315314

@@ -392,17 +391,20 @@ class PrimaryKeyIndex {
392391

393392
void writeHeaders();
394393

394+
void serialize(common::Serializer& serializer) const;
395+
395396
private:
396397
common::PhysicalTypeID keyDataTypeID;
397398
FileHandle* fileHandle;
398399
std::unique_ptr<OverflowFile> overflowFile;
399400
std::vector<std::unique_ptr<OnDiskHashIndex>> hashIndices;
400401
std::vector<HashIndexHeader> hashIndexHeadersForReadTrx;
401402
std::vector<HashIndexHeader> hashIndexHeadersForWriteTrx;
402-
DBFileIDAndName dbFileIDAndName;
403403
ShadowFile& shadowFile;
404404
// Stores both primary and overflow slots
405405
std::unique_ptr<DiskArrayCollection> hashIndexDiskArrays;
406+
common::page_idx_t firstHeaderPage;
407+
common::page_idx_t overflowHeaderPage;
406408
};
407409

408410
} // namespace storage

src/include/storage/shadow_utils.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ class ShadowUtils {
3636
// Where possible, updatePage/insertNewPage should be used instead
3737
static ShadowPageAndFrame createShadowVersionIfNecessaryAndPinPage(
3838
common::page_idx_t originalPage, bool insertingNewPage, FileHandle& fileHandle,
39-
DBFileID dbFileID, ShadowFile& shadowFile);
39+
ShadowFile& shadowFile);
4040

4141
static std::pair<FileHandle*, common::page_idx_t> getFileHandleAndPhysicalPageIdxToPin(
4242
FileHandle& fileHandle, common::page_idx_t pageIdx, const ShadowFile& shadowFile,
@@ -47,16 +47,16 @@ class ShadowUtils {
4747
const std::function<void(uint8_t*)>& readOp);
4848

4949
static common::page_idx_t insertNewPage(
50-
FileHandle& fileHandle, DBFileID dbFileID, ShadowFile& shadowFile,
50+
FileHandle& fileHandle, ShadowFile& shadowFile,
5151
const std::function<void(uint8_t*)>& insertOp = [](uint8_t*) -> void {
5252
// DO NOTHING.
5353
});
5454

5555
// Note: This function updates a page "transactionally", i.e., creates the WAL version of the
5656
// page if it doesn't exist. For the original page to be updated, the current WRITE trx needs to
5757
// commit and checkpoint.
58-
static void updatePage(FileHandle& fileHandle, DBFileID dbFileID,
59-
common::page_idx_t originalPageIdx, bool isInsertingNewPage, ShadowFile& shadowFile,
58+
static void updatePage(FileHandle& fileHandle, common::page_idx_t originalPageIdx,
59+
bool isInsertingNewPage, ShadowFile& shadowFile,
6060
const std::function<void(uint8_t*)>& updateOp);
6161
};
6262
} // namespace storage

src/include/storage/storage_manager.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,11 @@ class KUZU_API StorageManager {
2929

3030
static void recover(main::ClientContext& clientContext);
3131

32-
void createTable(catalog::CatalogEntry* entry, main::ClientContext* context);
32+
void createTable(catalog::CatalogEntry* entry, const main::ClientContext* context);
3333

3434
void checkpoint(main::ClientContext& clientContext);
3535
void finalizeCheckpoint(main::ClientContext& clientContext);
36-
void rollbackCheckpoint(main::ClientContext& clientContext);
36+
void rollbackCheckpoint(const main::ClientContext& clientContext);
3737

3838
Table* getTable(common::table_id_t tableID) {
3939
std::lock_guard lck{mtx};
@@ -54,9 +54,10 @@ class KUZU_API StorageManager {
5454

5555
void loadTables(const catalog::Catalog& catalog, common::VirtualFileSystem* vfs,
5656
main::ClientContext* context);
57-
void createNodeTable(catalog::NodeTableCatalogEntry* entry, main::ClientContext* context);
57+
void createNodeTable(catalog::NodeTableCatalogEntry* entry);
5858
void createRelTable(catalog::RelTableCatalogEntry* entry);
59-
void createRelTableGroup(catalog::RelGroupCatalogEntry* entry, main::ClientContext* context);
59+
void createRelTableGroup(const catalog::RelGroupCatalogEntry* entry,
60+
const main::ClientContext* context);
6061

6162
void reclaimDroppedTables(const main::ClientContext& clientContext);
6263

src/include/storage/storage_structure/disk_array.h

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,9 @@ struct PIPUpdates {
101101
class DiskArrayInternal {
102102
public:
103103
// Used when loading from file
104-
DiskArrayInternal(FileHandle& fileHandle, DBFileID dbFileID,
105-
const DiskArrayHeader& headerForReadTrx, DiskArrayHeader& headerForWriteTrx,
106-
ShadowFile* shadowFile, uint64_t elementSize, bool bypassShadowing = false);
104+
DiskArrayInternal(FileHandle& fileHandle, const DiskArrayHeader& headerForReadTrx,
105+
DiskArrayHeader& headerForWriteTrx, ShadowFile* shadowFile, uint64_t elementSize,
106+
bool bypassShadowing = false);
107107

108108
virtual ~DiskArrayInternal() = default;
109109

@@ -241,7 +241,6 @@ class DiskArrayInternal {
241241
protected:
242242
PageStorageInfo storageInfo;
243243
FileHandle& fileHandle;
244-
DBFileID dbFileID;
245244
const DiskArrayHeader& header;
246245
DiskArrayHeader& headerForWriteTrx;
247246
bool hasTransactionalUpdates;
@@ -267,10 +266,10 @@ class DiskArray {
267266
// If bypassWAL is set, the buffer manager is used to pages new to this transaction to the
268267
// original file, but does not handle flushing them. BufferManager::flushAllDirtyPagesInFrames
269268
// should be called on this file handle exactly once during prepare commit.
270-
DiskArray(FileHandle& fileHandle, DBFileID dbFileID, const DiskArrayHeader& headerForReadTrx,
269+
DiskArray(FileHandle& fileHandle, const DiskArrayHeader& headerForReadTrx,
271270
DiskArrayHeader& headerForWriteTrx, ShadowFile* shadowFile, bool bypassWAL = false)
272-
: diskArray(fileHandle, dbFileID, headerForReadTrx, headerForWriteTrx, shadowFile,
273-
sizeof(U), bypassWAL) {}
271+
: diskArray(fileHandle, headerForReadTrx, headerForWriteTrx, shadowFile, sizeof(U),
272+
bypassWAL) {}
274273

275274
// Note: This function is to be used only by the WRITE trx.
276275
// The return value is the idx of val in array.

src/include/storage/storage_structure/disk_array_collection.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ class DiskArrayCollection {
2727
static_assert(std::has_unique_object_representations_v<HeaderPage>);
2828

2929
public:
30-
DiskArrayCollection(FileHandle& fileHandle, DBFileID dbFileID, ShadowFile& shadowFile,
30+
DiskArrayCollection(FileHandle& fileHandle, ShadowFile& shadowFile,
3131
common::page_idx_t firstHeaderPage = 0, bool bypassShadowing = false);
3232

3333
void checkpoint();
@@ -52,15 +52,14 @@ class DiskArrayCollection {
5252
->headers[idx % HeaderPage::NUM_HEADERS_PER_PAGE];
5353
auto& writeHeader = headersForWriteTrx[idx / HeaderPage::NUM_HEADERS_PER_PAGE]
5454
->headers[idx % HeaderPage::NUM_HEADERS_PER_PAGE];
55-
return std::make_unique<DiskArray<T>>(fileHandle, dbFileID, readHeader, writeHeader,
56-
&shadowFile, bypassShadowing);
55+
return std::make_unique<DiskArray<T>>(fileHandle, readHeader, writeHeader, &shadowFile,
56+
bypassShadowing);
5757
}
5858

5959
size_t addDiskArray();
6060

6161
private:
6262
FileHandle& fileHandle;
63-
DBFileID dbFileID;
6463
ShadowFile& shadowFile;
6564
bool bypassShadowing;
6665
common::page_idx_t headerPagesOnDisk;

src/include/storage/storage_structure/overflow_file.h

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
#include "storage/storage_utils.h"
1313
#include "storage/wal/shadow_file.h"
1414
#include "storage/wal/wal.h"
15-
#include "transaction/transaction.h"
1615

1716
namespace kuzu {
1817
namespace storage {
@@ -28,7 +27,8 @@ class OverflowFileHandle {
2827
// Moving the handle would invalidate those pointers
2928
OverflowFileHandle(OverflowFileHandle&& other) = delete;
3029

31-
std::string readString(transaction::TransactionType trxType, const common::ku_string_t& str);
30+
std::string readString(transaction::TransactionType trxType,
31+
const common::ku_string_t& str) const;
3232

3333
bool equals(transaction::TransactionType trxType, std::string_view keyToLookup,
3434
const common::ku_string_t& keyInEntry) const;
@@ -81,16 +81,11 @@ class OverflowFile {
8181

8282
public:
8383
// For reading an existing overflow file
84-
OverflowFile(const DBFileIDAndName& dbFileIdAndName, MemoryManager& memoryManager,
85-
ShadowFile* shadowFile, bool readOnly, common::VirtualFileSystem* vfs,
86-
main::ClientContext* context);
84+
OverflowFile(FileHandle* dataFH, MemoryManager& memoryManager, ShadowFile* shadowFile,
85+
common::page_idx_t headerPageIdx);
8786

8887
virtual ~OverflowFile() = default;
8988

90-
// For creating an overflow file from scratch
91-
static void createEmptyFiles(const std::string& fName, common::VirtualFileSystem* vfs,
92-
main::ClientContext* context);
93-
9489
// Handles contain a reference to the overflow file
9590
OverflowFile(OverflowFile&& other) = delete;
9691

@@ -111,13 +106,16 @@ class OverflowFile {
111106
}
112107

113108
protected:
114-
explicit OverflowFile(const DBFileIDAndName& dbFileIdAndName, MemoryManager& memoryManager);
109+
explicit OverflowFile(FileHandle* dataFH, MemoryManager& memoryManager);
115110

116111
common::page_idx_t getNewPageIdx() {
117112
// If this isn't the first call reserving the page header, then the header flag must be set
118113
// prior to this
119-
KU_ASSERT(pageCounter == HEADER_PAGE_IDX || headerChanged);
120-
return pageCounter.fetch_add(1);
114+
if (fileHandle) {
115+
return fileHandle->addNewPage();
116+
} else {
117+
return pageCounter.fetch_add(1);
118+
}
121119
}
122120

123121
private:
@@ -133,18 +131,18 @@ class OverflowFile {
133131
std::vector<std::unique_ptr<OverflowFileHandle>> handles;
134132
StringOverflowFileHeader header;
135133
common::page_idx_t numPagesOnDisk;
136-
DBFileID dbFileID;
137134
FileHandle* fileHandle;
138135
ShadowFile* shadowFile;
139136
MemoryManager& memoryManager;
140137
std::atomic<common::page_idx_t> pageCounter;
141138
std::atomic<bool> headerChanged;
139+
common::page_idx_t headerPageIdx;
142140
};
143141

144142
class InMemOverflowFile final : public OverflowFile {
145143
public:
146-
explicit InMemOverflowFile(const DBFileIDAndName& dbFileIDAndName, MemoryManager& memoryManager)
147-
: OverflowFile{dbFileIDAndName, memoryManager} {}
144+
explicit InMemOverflowFile(MemoryManager& memoryManager)
145+
: OverflowFile{nullptr, memoryManager} {}
148146
};
149147

150148
} // namespace storage

src/include/storage/storage_utils.h

Lines changed: 5 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -11,21 +11,10 @@
1111
#include "main/client_context.h"
1212
#include "main/db_config.h"
1313
#include "main/settings.h"
14-
#include "storage/db_file_id.h"
1514

1615
namespace kuzu {
1716
namespace storage {
1817

19-
class StorageManager;
20-
21-
struct DBFileIDAndName {
22-
DBFileID dbFileID;
23-
std::string fName;
24-
25-
DBFileIDAndName(DBFileID dbFileID, std::string fName)
26-
: dbFileID{dbFileID}, fName{std::move(fName)} {};
27-
};
28-
2918
struct PageCursor {
3019
PageCursor(common::page_idx_t pageIdx, uint32_t posInPage)
3120
: pageIdx{pageIdx}, elemPosInPage{posInPage} {};
@@ -49,10 +38,11 @@ struct PageUtils {
4938
auto numBytesPerNullEntry = common::NullMask::NUM_BITS_PER_NULL_ENTRY >> 3;
5039
auto numNullEntries =
5140
hasNull ?
52-
(uint32_t)ceil((double)common::KUZU_PAGE_SIZE /
53-
(double)(((uint64_t)elementSize
41+
static_cast<uint32_t>(ceil(
42+
static_cast<double>(common::KUZU_PAGE_SIZE) /
43+
static_cast<double>((static_cast<uint64_t>(elementSize)
5444
<< common::NullMask::NUM_BITS_PER_NULL_ENTRY_LOG2) +
55-
numBytesPerNullEntry)) :
45+
numBytesPerNullEntry))) :
5646
0;
5747
return (common::KUZU_PAGE_SIZE - (numNullEntries * numBytesPerNullEntry)) / elementSize;
5848
}
@@ -82,7 +72,7 @@ class StorageUtils {
8272
// TODO: Constrain T1 and T2 to numerics.
8373
template<typename T1, typename T2>
8474
static uint64_t divideAndRoundUpTo(T1 v1, T2 v2) {
85-
return std::ceil((double)v1 / (double)v2);
75+
return std::ceil(static_cast<double>(v1) / static_cast<double>(v2));
8676
}
8777

8878
static std::string getColumnName(const std::string& propertyName, ColumnType type,
@@ -101,10 +91,6 @@ class StorageUtils {
10191
return std::make_pair(nodeGroupIdx, offsetInChunk);
10292
}
10393

104-
static std::string getNodeIndexFName(const common::VirtualFileSystem* vfs,
105-
const std::string& directory, const common::table_id_t& tableID,
106-
common::FileVersionType dbFileType);
107-
10894
static std::string getDataFName(common::VirtualFileSystem* vfs, const std::string& directory) {
10995
return vfs->joinPath(directory, common::StorageConstants::DATA_FILE_NAME);
11096
}
@@ -116,17 +102,6 @@ class StorageUtils {
116102
common::StorageConstants::METADATA_FILE_NAME_FOR_WAL);
117103
}
118104

119-
static DBFileIDAndName getNodeIndexIDAndFName(common::VirtualFileSystem* vfs,
120-
const std::string& directory, common::table_id_t tableID) {
121-
auto fName = getNodeIndexFName(vfs, directory, tableID, common::FileVersionType::ORIGINAL);
122-
return {DBFileID::newPKIndexFileID(tableID), fName};
123-
}
124-
125-
static std::string getOverflowFileName(const std::string& fName) {
126-
return appendSuffixOrInsertBeforeWALSuffix(fName,
127-
common::StorageConstants::OVERFLOW_FILE_SUFFIX);
128-
}
129-
130105
static std::string getCatalogFilePath(common::VirtualFileSystem* vfs,
131106
const std::string& directory, common::FileVersionType dbFileType) {
132107
return vfs->joinPath(directory, dbFileType == common::FileVersionType::ORIGINAL ?

src/include/storage/store/column.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
#include "catalog/catalog.h"
44
#include "common/null_mask.h"
55
#include "common/types/types.h"
6-
#include "storage/db_file_id.h"
76
#include "storage/store/column_chunk_data.h"
87
#include "storage/store/column_reader_writer.h"
98

@@ -137,7 +136,6 @@ class Column {
137136

138137
protected:
139138
std::string name;
140-
DBFileID dbFileID;
141139
common::LogicalType dataType;
142140
FileHandle* dataFH;
143141
MemoryManager* mm;

0 commit comments

Comments
 (0)