Skip to content

Commit 9816284

Browse files
committed
feat(hgraph): add remove support for graph indexes
- Add mark-remove and force-remove paths for HGraph, including batch force-remove handling and entry-point updates - Preserve remove-mode compatibility and fix graph/label-table move semantics for delete support - Fix sparse reverse-edge updates and search/remove lock ordering, with regression coverage for remove flows This enables graph indexes to remove vectors safely while keeping reverse-edge, label-table, and concurrent search behavior consistent. Signed-off-by: LHT129 <tianlan.lht@antgroup.com>
1 parent d7ae028 commit 9816284

29 files changed

+1339
-67
lines changed

include/vsag/index.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,16 @@ enum class RemoveMode {
6969
* this mode is fast */
7070
MARK_REMOVE = 0,
7171

72-
/** remove the vector from index and repair the index, but not shrink the index,
73-
* this mode is heavy */
74-
REMOVE_AND_REPAIR = 1,
72+
/** remove the vector from index and repair the index, this mode is heavy */
73+
FORCE_REMOVE = 1,
74+
75+
/** backward-compatible alias kept for existing public API users */
76+
REMOVE_AND_REPAIR = FORCE_REMOVE,
77+
78+
/** backward-compatible alias for the mixed-style enumerator introduced earlier */
79+
ForceRemove = FORCE_REMOVE,
7580
};
81+
7682
class Index {
7783
public:
7884
/**

src/algorithm/hgraph.cpp

Lines changed: 161 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -727,6 +727,7 @@ HGraph::build_by_odescent(const DatasetPtr& data) {
727727

728728
std::vector<int64_t>
729729
HGraph::Add(const DatasetPtr& data, AddMode mode) {
730+
std::shared_lock force_remove_rlock(this->force_remove_mutex_);
730731
std::vector<int64_t> failed_ids;
731732
auto base_dim = data->GetDim();
732733
if (data_type_ != DataTypes::DATA_TYPE_SPARSE) {
@@ -867,6 +868,7 @@ HGraph::KnnSearch(const DatasetPtr& query,
867868
(1 <= params.ef_search) and (params.ef_search <= ef_search_threshold),
868869
fmt::format("ef_search({}) must in range[1, {}]", params.ef_search, ef_search_threshold));
869870

871+
std::shared_lock force_remove_rlock(this->force_remove_mutex_);
870872
std::shared_lock shared_lock(this->global_mutex_);
871873
// check k
872874
CHECK_ARGUMENT(k > 0, fmt::format("k({}) must be greater than 0", k));
@@ -1125,6 +1127,7 @@ HGraph::RangeSearch(const DatasetPtr& query,
11251127
CHECK_ARGUMENT(limited_size != 0,
11261128
fmt::format("limited_size({}) must not be equal to 0", limited_size));
11271129

1130+
std::shared_lock force_remove_rlock(this->force_remove_mutex_);
11281131
std::shared_lock shared_lock(this->global_mutex_);
11291132

11301133
InnerSearchParam search_param;
@@ -1912,46 +1915,168 @@ HGraph::Remove(const std::vector<int64_t>& ids, RemoveMode mode) {
19121915
delete_count_ += delete_count;
19131916
return delete_count;
19141917
}
1915-
for (const auto& id : ids) {
1916-
InnerIdType inner_id;
1917-
{
1918-
std::shared_lock lock(this->label_lookup_mutex_);
1919-
inner_id = this->label_table_->GetIdByLabel(id);
1920-
}
1921-
if (inner_id == this->entry_point_id_) {
1922-
bool find_new_ep = false;
1923-
while (not route_graphs_.empty()) {
1924-
auto& upper_graph = route_graphs_.back();
1925-
Vector<InnerIdType> neighbors(allocator_);
1926-
upper_graph->GetNeighbors(this->entry_point_id_, neighbors);
1927-
for (const auto& nb_id : neighbors) {
1928-
if (inner_id == nb_id) {
1929-
continue;
1930-
}
1931-
this->entry_point_id_ = nb_id;
1932-
find_new_ep = true;
1933-
break;
1934-
}
1935-
if (find_new_ep) {
1936-
break;
1937-
}
1938-
route_graphs_.pop_back();
1918+
1919+
if (mode == RemoveMode::FORCE_REMOVE) {
1920+
std::unique_lock<std::shared_mutex> wlock(this->force_remove_mutex_);
1921+
for (const auto& id : ids) {
1922+
delete_count += this->force_remove_one(id);
1923+
}
1924+
if (delete_count != 0) {
1925+
this->shrink_to_fit();
1926+
}
1927+
return delete_count;
1928+
}
1929+
1930+
throw VsagException(ErrorType::INVALID_ARGUMENT, "RemoveMode not supported");
1931+
}
1932+
1933+
void
1934+
HGraph::find_new_entry_point() {
1935+
bool find_new_ep = false;
1936+
auto inner_id = this->entry_point_id_;
1937+
while (not route_graphs_.empty()) {
1938+
auto& upper_graph = route_graphs_.back();
1939+
Vector<InnerIdType> neighbors(allocator_);
1940+
upper_graph->GetNeighbors(this->entry_point_id_, neighbors);
1941+
for (const auto& nb_id : neighbors) {
1942+
if (inner_id == nb_id) {
1943+
continue;
19391944
}
1945+
this->entry_point_id_ = nb_id;
1946+
find_new_ep = true;
1947+
break;
19401948
}
1941-
{
1942-
{
1943-
std::scoped_lock<std::shared_mutex> wlock(this->global_mutex_);
1944-
for (int level = static_cast<int>(route_graphs_.size()) - 1; level >= 0; --level) {
1945-
this->route_graphs_[level]->DeleteNeighborsById(inner_id);
1946-
}
1947-
this->bottom_graph_->DeleteNeighborsById(inner_id);
1949+
if (find_new_ep) {
1950+
break;
1951+
}
1952+
route_graphs_.pop_back();
1953+
}
1954+
}
1955+
1956+
void
1957+
HGraph::graph_force_remove_one(const InnerIdType& inner_id,
1958+
const FlattenInterfacePtr& flatten,
1959+
const GraphInterfacePtr& graph) {
1960+
Vector<InnerIdType> forward_neighbors(allocator_);
1961+
graph->GetNeighbors(inner_id, forward_neighbors);
1962+
Vector<InnerIdType> reverse_neighbors(allocator_);
1963+
graph->GetIncomingNeighbors(inner_id, reverse_neighbors);
1964+
if (forward_neighbors.empty() && reverse_neighbors.empty()) {
1965+
return;
1966+
}
1967+
1968+
UnorderedSet<InnerIdType> affected_nodes(allocator_);
1969+
auto current_count = this->total_count_.load();
1970+
for (const auto& n : forward_neighbors) {
1971+
if (n < current_count) {
1972+
affected_nodes.insert(n);
1973+
}
1974+
}
1975+
for (const auto& n : reverse_neighbors) {
1976+
if (n < current_count) {
1977+
affected_nodes.insert(n);
1978+
}
1979+
}
1980+
1981+
auto max_degree = graph->MaximumDegree();
1982+
1983+
for (const auto& neighbor : affected_nodes) {
1984+
LockGuard lock(neighbors_mutex_, neighbor);
1985+
1986+
Vector<InnerIdType> neighbors_of_neighbor(allocator_);
1987+
graph->GetNeighbors(neighbor, neighbors_of_neighbor);
1988+
1989+
UnorderedSet<InnerIdType> candidate_set(allocator_);
1990+
for (const auto& nb : neighbors_of_neighbor) {
1991+
if (nb != inner_id) {
1992+
candidate_set.insert(nb);
19481993
}
1949-
std::scoped_lock label_lock(this->label_lookup_mutex_);
1950-
this->label_table_->MarkRemove(id);
1951-
delete_count++;
19521994
}
1995+
for (const auto& nb : forward_neighbors) {
1996+
if (nb != inner_id && nb != neighbor) {
1997+
candidate_set.insert(nb);
1998+
}
1999+
}
2000+
2001+
Vector<InnerIdType> candidate_list(allocator_);
2002+
auto current_count = this->total_count_.load();
2003+
for (const auto& candidate : candidate_set) {
2004+
if (candidate < current_count) {
2005+
candidate_list.emplace_back(candidate);
2006+
}
2007+
}
2008+
2009+
select_edges_by_heuristic(
2010+
candidate_list, neighbor, max_degree, flatten, allocator_, alpha_);
2011+
2012+
graph->InsertNeighborsById(neighbor, candidate_list);
2013+
}
2014+
2015+
Vector<InnerIdType> empty_neighbor(allocator_);
2016+
graph->InsertNeighborsById(inner_id, empty_neighbor);
2017+
}
2018+
2019+
void
2020+
HGraph::move_id(InnerIdType from, InnerIdType to) {
2021+
basic_flatten_codes_->Move(from, to);
2022+
if (high_precise_codes_) {
2023+
high_precise_codes_->Move(from, to);
2024+
}
2025+
2026+
if (extra_infos_) {
2027+
extra_infos_->Move(from, to);
2028+
}
2029+
2030+
bottom_graph_->Move(from, to);
2031+
for (const auto& route_graph : route_graphs_) {
2032+
route_graph->Move(from, to);
2033+
}
2034+
2035+
label_table_->Move(from, to);
2036+
2037+
if (entry_point_id_ == from) {
2038+
entry_point_id_ = to;
2039+
}
2040+
}
2041+
2042+
uint32_t
2043+
HGraph::force_remove_one(int64_t label) {
2044+
InnerIdType inner_id;
2045+
{
2046+
std::shared_lock lock(this->label_lookup_mutex_);
2047+
inner_id = this->label_table_->GetIdByLabel(label);
2048+
}
2049+
if (inner_id == this->entry_point_id_) {
2050+
this->find_new_entry_point();
2051+
}
2052+
2053+
graph_force_remove_one(inner_id, basic_flatten_codes_, bottom_graph_);
2054+
2055+
for (const auto& route_graph : route_graphs_) {
2056+
graph_force_remove_one(inner_id, basic_flatten_codes_, route_graph);
2057+
}
2058+
InnerIdType swap_id = this->total_count_.load() - 1;
2059+
2060+
if (swap_id != inner_id) {
2061+
this->move_id(swap_id, inner_id);
2062+
}
2063+
this->total_count_--;
2064+
return 1;
2065+
}
2066+
2067+
void
2068+
HGraph::shrink_to_fit() {
2069+
auto total_count = this->total_count_.load();
2070+
2071+
basic_flatten_codes_->ShrinkToFit(total_count);
2072+
if (high_precise_codes_) {
2073+
high_precise_codes_->ShrinkToFit(total_count);
2074+
}
2075+
bottom_graph_->ShrinkToFit(total_count);
2076+
for (const auto& route_graph : route_graphs_) {
2077+
route_graph->ShrinkToFit(total_count);
19532078
}
1954-
return delete_count;
2079+
label_table_->ShrinkToFit(total_count);
19552080
}
19562081

19572082
void
@@ -2173,6 +2298,7 @@ HGraph::SearchWithRequest(const SearchRequest& request) const {
21732298
(1 <= params.ef_search) and (params.ef_search <= ef_search_threshold),
21742299
fmt::format("ef_search({}) must in range[1, {}]", params.ef_search, ef_search_threshold));
21752300

2301+
std::shared_lock force_remove_rlock(this->force_remove_mutex_);
21762302
std::shared_lock shared_lock(this->global_mutex_);
21772303
// check k
21782304
CHECK_ARGUMENT(k > 0, fmt::format("k({}) must be greater than 0", k));

src/algorithm/hgraph.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,23 @@ class HGraph : public InnerIndexInterface {
324324
void
325325
deserialize_basic_info_v0_14(StreamReader& reader);
326326

327+
uint32_t
328+
force_remove_one(int64_t label);
329+
330+
void
331+
find_new_entry_point();
332+
333+
void
334+
graph_force_remove_one(const InnerIdType& inner_id,
335+
const FlattenInterfacePtr& flatten,
336+
const GraphInterfacePtr& graph);
337+
338+
void
339+
move_id(InnerIdType from, InnerIdType to);
340+
341+
void
342+
shrink_to_fit();
343+
327344
private:
328345
void
329346
reorder(const void* query,
@@ -390,6 +407,7 @@ class HGraph : public InnerIndexInterface {
390407
mutable std::shared_mutex global_mutex_;
391408
mutable MutexArrayPtr neighbors_mutex_;
392409
mutable std::shared_mutex add_mutex_;
410+
mutable std::shared_mutex force_remove_mutex_;
393411

394412
std::atomic<InnerIdType> max_capacity_{0};
395413

src/algorithm/hgraph_parameter.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ HGraphParameter::FromJson(const JsonType& json) {
9191
if (graph_param != nullptr) {
9292
hierarchical_graph_param->remove_flag_bit_ = graph_param->remove_flag_bit_;
9393
hierarchical_graph_param->support_delete_ = graph_param->support_remove_;
94+
hierarchical_graph_param->use_reverse_edges_ = graph_param->use_reverse_edges_;
9495
} else {
9596
hierarchical_graph_param->support_delete_ = false;
9697
}

src/datacell/compressed_graph_datacell.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ CompressedGraphDataCell::CompressedGraphDataCell(const CompressedGraphDatacellPa
3434
if (graph_param->support_duplicate_) {
3535
this->InitDuplicateTracker();
3636
}
37+
if (graph_param->use_reverse_edges_) {
38+
throw VsagException(ErrorType::UNSUPPORTED_INDEX_OPERATION,
39+
"CompressedGraphDataCell does not support reverse edges");
40+
}
3741
}
3842

3943
CompressedGraphDataCell::~CompressedGraphDataCell() {

src/datacell/compressed_graph_datacell_parameter.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ class CompressedGraphDatacellParameter : public GraphInterfaceParameter {
3636
if (json.Contains(SUPPORT_DUPLICATE)) {
3737
this->support_duplicate_ = json[SUPPORT_DUPLICATE].GetBool();
3838
}
39+
if (json.Contains(HGRAPH_USE_REVERSE_EDGES_KEY)) {
40+
this->use_reverse_edges_ = json[HGRAPH_USE_REVERSE_EDGES_KEY].GetBool();
41+
}
3942
}
4043

4144
JsonType
@@ -44,6 +47,7 @@ class CompressedGraphDatacellParameter : public GraphInterfaceParameter {
4447
json[GRAPH_PARAM_MAX_DEGREE_KEY].SetInt(this->max_degree_);
4548
json[GRAPH_STORAGE_TYPE_KEY].SetString(GRAPH_STORAGE_TYPE_VALUE_COMPRESSED);
4649
json[SUPPORT_DUPLICATE].SetBool(this->support_duplicate_);
50+
json[HGRAPH_USE_REVERSE_EDGES_KEY].SetBool(this->use_reverse_edges_);
4751
return json;
4852
}
4953

@@ -72,6 +76,14 @@ class CompressedGraphDatacellParameter : public GraphInterfaceParameter {
7276
graph_param->support_duplicate_);
7377
return false;
7478
}
79+
if (use_reverse_edges_ != graph_param->use_reverse_edges_) {
80+
logger::error(
81+
"CompressedGraphDatacellParameter::CheckCompatibility: "
82+
"use_reverse_edges_ mismatch: {} vs {}",
83+
use_reverse_edges_,
84+
graph_param->use_reverse_edges_);
85+
return false;
86+
}
7587
return true;
7688
}
7789
};

src/datacell/extra_info_datacell.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,9 @@ class ExtraInfoDataCell : public ExtraInfoInterface {
8383
int64_t
8484
GetMemoryUsage() const override;
8585

86+
void
87+
Move(InnerIdType from, InnerIdType to) override;
88+
8689
inline void
8790
SetIO(std::shared_ptr<BasicIO<IOTmpl>> io) {
8891
this->io_ = io;
@@ -181,4 +184,17 @@ ExtraInfoDataCell<IOTmpl>::GetMemoryUsage() const {
181184
}
182185
return memory;
183186
}
187+
188+
template <typename IOTmpl>
189+
void
190+
ExtraInfoDataCell<IOTmpl>::Move(InnerIdType from, InnerIdType to) {
191+
bool need_release = false;
192+
const char* extra_info = this->GetExtraInfoById(from, need_release);
193+
this->io_->Write(reinterpret_cast<const uint8_t*>(extra_info),
194+
extra_info_size_,
195+
static_cast<uint64_t>(to) * static_cast<uint64_t>(extra_info_size_));
196+
if (need_release) {
197+
this->io_->Release(reinterpret_cast<const uint8_t*>(extra_info));
198+
}
199+
}
184200
} // namespace vsag

src/datacell/extra_info_interface.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,12 @@ class ExtraInfoInterface {
114114
virtual void
115115
DisableForceInMemory(){};
116116

117+
virtual void
118+
Move(InnerIdType from, InnerIdType to) {
119+
throw VsagException(ErrorType::INTERNAL_ERROR,
120+
"Move not implemented in ExtraInfoInterface");
121+
}
122+
117123
public:
118124
InnerIdType total_count_{0};
119125
InnerIdType max_capacity_{0};

0 commit comments

Comments
 (0)