diff --git a/kg_rag/test/test_spoke_api_empty_response.py b/kg_rag/test/test_spoke_api_empty_response.py new file mode 100644 index 0000000..2d7597e --- /dev/null +++ b/kg_rag/test/test_spoke_api_empty_response.py @@ -0,0 +1,72 @@ +""" +Unit test for issue #42: IndexError when SPOKE API returns empty list for a node. +https://github.com/BaranziniLab/KG_RAG/issues/42 +""" +import unittest +from unittest.mock import patch, MagicMock + + +class TestGetContextUsingSpokeApiEmptyResponse(unittest.TestCase): + + @patch("kg_rag.utility.get_spoke_api_resp") + def test_empty_node_context_does_not_raise(self, mock_api): + """When SPOKE API returns an empty list, function should not raise IndexError.""" + from kg_rag.utility import get_context_using_spoke_api + + # First call returns types endpoint data + types_response = MagicMock() + types_response.json.return_value = { + "nodes": {"Disease": {}, "Protein": {}, "Compound": {}}, + "edges": {"DaG": {}, "CtD": {}}, + } + + # Second call (neighborhood endpoint) returns empty list — the bug scenario + empty_response = MagicMock() + empty_response.json.return_value = [] + + mock_api.side_effect = [types_response, empty_response] + + context, df = get_context_using_spoke_api("neurofibromatosis 2") + + self.assertEqual(context, "") + self.assertTrue(df.empty) + + @patch("kg_rag.utility.get_spoke_api_resp") + def test_non_empty_node_context_includes_identifier(self, mock_api): + """When SPOKE API returns data, identifier info is appended to context.""" + from kg_rag.utility import get_context_using_spoke_api + + types_response = MagicMock() + types_response.json.return_value = { + "nodes": {"Disease": {}, "Gene": {}}, + "edges": {"DaG_association": {}}, + } + + # Edge types in SPOKE use underscores (e.g. "DaG_association") + # Nodes have no underscore in neo4j_type + node_response = MagicMock() + node_response.json.return_value = [ + { + "data": { + "neo4j_type": "Disease", + "id": "D001", + "properties": { + "name": "psoriasis", + "identifier": "DOID:8893", + "source": "DOID", + }, + } + }, + ] + + mock_api.side_effect = [types_response, node_response] + + context, df = get_context_using_spoke_api("psoriasis") + + # Identifier info should be appended when node_context is non-empty + self.assertIn("DOID", context) + self.assertIn("DOID:8893", context) + + +if __name__ == "__main__": + unittest.main() diff --git a/kg_rag/utility.py b/kg_rag/utility.py index 801b04a..b5b890b 100644 --- a/kg_rag/utility.py +++ b/kg_rag/utility.py @@ -125,7 +125,8 @@ def get_context_using_spoke_api(node_value): merge_2.loc[:, "predicate"] = merge_2.edge_type.apply(lambda x:x.split("_")[0]) merge_2.loc[:, "context"] = merge_2.source + " " + merge_2.predicate.str.lower() + " " + merge_2.target + " and Provenance of this association is " + merge_2.provenance + "." context = merge_2.context.str.cat(sep=' ') - context += node_value + " has a " + node_context[0]["data"]["properties"]["source"] + " identifier of " + node_context[0]["data"]["properties"]["identifier"] + " and Provenance of this is from " + node_context[0]["data"]["properties"]["source"] + "." + if node_context: + context += node_value + " has a " + node_context[0]["data"]["properties"]["source"] + " identifier of " + node_context[0]["data"]["properties"]["identifier"] + " and Provenance of this is from " + node_context[0]["data"]["properties"]["source"] + "." return context, merge_2 # if edge_evidence: