Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 15 additions & 10 deletions src/common/column_matrix.cc
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
/**
* Copyright 2017-2023, XGBoost Contributors
* Copyright 2017-2025, XGBoost Contributors
* \brief Utility for fast column-wise access
*/
#include "column_matrix.h"

#include <algorithm> // for transform
#include <cstddef> // for size_t
#include <cstdint> // for uint64_t, uint8_t
#include <limits> // for numeric_limits
#include <type_traits> // for remove_reference_t
#include <vector> // for vector

#include "../data/gradient_index.h" // for GHistIndexMatrix
#include "io.h" // for AlignedResourceReadStream, AlignedFileWriteStream
#include "xgboost/base.h" // for bst_feaature_t
#include "xgboost/span.h" // for Span
#include "../common/ref_resource_view.h" // for MakeFixedVecWithMalloc
#include "../data/gradient_index.h" // for GHistIndexMatrix
#include "io.h" // for AlignedResourceReadStream, AlignedFileWriteStream
#include "xgboost/base.h" // for bst_feaature_t

namespace xgboost::common {
void ColumnMatrix::InitStorage(GHistIndexMatrix const& gmat, double sparse_threshold) {
void ColumnMatrix::InitStorage(GHistIndexMatrix const& gmat, double sparse_threshold,
int n_threads) {
auto const nfeature = gmat.Features();
const size_t nrow = gmat.Size();
// identify type of each column
Expand Down Expand Up @@ -61,18 +61,19 @@ void ColumnMatrix::InitStorage(GHistIndexMatrix const& gmat, double sparse_thres
auto storage_size =
feature_offsets_.back() * static_cast<std::underlying_type_t<BinTypeSize>>(bins_type_size_);

index_ = common::MakeFixedVecWithMalloc(storage_size, std::uint8_t{0});
index_ = common::MakeFixedVecWithMalloc(storage_size, std::uint8_t{0}, n_threads);

if (!all_dense_column) {
row_ind_ = common::MakeFixedVecWithMalloc(feature_offsets_[nfeature], std::size_t{0});
row_ind_ = common::MakeFixedVecWithMalloc(feature_offsets_[nfeature],
std::size_t{0}, n_threads);
}

// store least bin id for each feature
index_base_ = const_cast<uint32_t*>(gmat.cut.Ptrs().data());

any_missing_ = !gmat.IsDense();

missing_ = MissingIndicator{0, false};
missing_ = MissingIndicator{feature_offsets_, type_, any_missing_};
}

// IO procedures for external memory.
Expand All @@ -93,6 +94,9 @@ bool ColumnMatrix::Read(AlignedResourceReadStream* fi, uint32_t const* index_bas
if (!common::ReadVec(fi, &missing_.storage)) {
return false;
}
if (!common::ReadVec(fi, &missing_.feature_offsets_padded)) {
return false;
}
missing_.InitView();

index_base_ = index_base;
Expand All @@ -113,6 +117,7 @@ std::size_t ColumnMatrix::Write(AlignedFileWriteStream* fo) const {
bytes += common::WriteVec(fo, row_ind_);
bytes += common::WriteVec(fo, feature_offsets_);
bytes += common::WriteVec(fo, missing_.storage);
bytes += common::WriteVec(fo, missing_.feature_offsets_padded);

bytes += fo->Write(bins_type_size_);
bytes += fo->Write(any_missing_);
Expand Down
Loading
Loading