WIP implementation of microservice for SolProp

JacksonBurns · JacksonBurns · commit 0db9a9e4fbd1 · 2026-03-19T23:01:01.000-04:00
dHsolv working, others need work - why isn't SolubilityCalculation
importable?
diff --git a/environment.yml b/environment.yml
@@ -4,15 +4,8 @@
 name: rmg_website
 channels:
   - conda-forge
-  - fhvermei
 dependencies:
   - django==4.2
-  - pip
-  - pip:
-      - git+https://github.com/bp-kelley/descriptastorus@2.5.0
   - python>=3.9
   - xlsxwriter
-  - scipy>=1.9,<=1.10 # need 1.9.0 or greater for milp but < 1.11 because of gilbrat/gibrat deprecation for compat. with descriptastorus
-  - fhvermei::chemprop_solvation>=0.0.3
-  - solprop_ml>=1.2 
   - cantera==2.6.0*
diff --git a/microservices/solprop/Dockerfile b/microservices/solprop/Dockerfile
@@ -0,0 +1,22 @@
+FROM ghcr.io/prefix-dev/pixi:latest
+RUN apt update && apt-get install -y --no-install-recommends \
+    build-essential \
+    rename \
+    wget \
+    unzip \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+RUN git config --global http.sslverify false
+WORKDIR /app
+
+COPY server.py .
+
+RUN pixi init && \
+    pixi project channel add conda-forge && \
+    pixi project channel add fhvermei && \
+    pixi add python=3.9 'solprop_ml=1.2' "chemprop_solvation>=0.0.3" fastapi uvicorn pydantic requests "scipy<1.11"
+RUN pixi add --pypi "descriptastorus @ git+https://github.com/bp-kelley/descriptastorus@2.5.0"
+
+EXPOSE 8000
+
+CMD ["pixi", "run", "--manifest-path", "/app/pixi.toml", "uvicorn", "server:app", "--host", "0.0.0.0", "--port", "8000"]
diff --git a/microservices/solprop/README.md b/microservices/solprop/README.md
@@ -0,0 +1,6 @@
+Wraps the solprop package into a callable microservice.
+
+With docker installed, run `docker build -t solprop_service .` in this directory to build the image.
+After, run `docker run -d -p 8000:8000 --name solprop solprop_service` to start the server.
+
+A prebuilt version of this image is also available on the ReactionMechanismGenerator DockerHub.
diff --git a/microservices/solprop/server.py b/microservices/solprop/server.py
@@ -0,0 +1,130 @@
+import sys
+
+# Hide Uvicorn's CLI arguments from solprop's internal argument parser
+sys.argv = [sys.argv[0]]
+
+from fastapi import FastAPI
+from pydantic import BaseModel
+import pandas as pd
+import numpy as np
+from typing import Optional
+
+from chemprop_solvation.solvation_estimator import load_DirectML_Gsolv_estimator, load_DirectML_Hsolv_estimator, load_SoluteML_estimator
+from solvation_predictor.solubility.SolubilityCalculations import SolubilityCalculations
+from solvation_predictor.solubility.SolubilityPredictions import SolubilityPredictions
+from solvation_predictor.solubility.SolubilityData import SolubilityData
+from solvation_predictor.solubility.SolubilityModels import SolubilityModels
+
+app = FastAPI()
+
+dGsolv_estimator = load_DirectML_Gsolv_estimator()
+dHsolv_estimator = load_DirectML_Hsolv_estimator()
+
+solub_models = SolubilityModels(
+    load_ghsolv=True, load_g=True, load_h=True,
+    reduced_number=False, load_saq=True,
+    load_solute=True, logger=None, verbose=False
+)
+SoluteML_estimator = load_SoluteML_estimator()
+
+# should format requests like this to get validation
+class SolubilityRequest(BaseModel):
+    solvent_smiles: Optional[str] = None
+    solute_smiles: Optional[str] = None
+    temperature: Optional[float] = None
+    reference_solvent: Optional[str] = None
+    reference_solubility: Optional[float] = None
+    hsub298: Optional[float] = None
+    cp_gas_298: Optional[float] = None
+    cp_solid_298: Optional[float] = None
+    use_reference: bool = False
+    
+@app.post("/dGsolv_estimator")
+def _dGsolv_estimator(req):
+    result = dGsolv_estimator.predict([[req["solvent_smiles"], req["solute_smiles"]]])
+    return {
+        "avg_pred": result[0],
+        "epi_unc": result[1],
+        "valid_indices": result[2]
+    }
+
+
+@app.post("/dHsolv_estimator")
+def _dHsolv_estimator(req):
+    result = dHsolv_estimator.predict([[req["solvent_smiles"], req["solute_smiles"]]])
+    return {
+        "avg_pred": result[0],
+        "epi_unc": result[1],
+        "valid_indices": result[2]
+    }
+
+
+@app.post("/SoluteML_estimator")
+def _SoluteML_estimator(req):
+    result = SoluteML_estimator.predict([req["solute_smiles"]])
+    return {
+        "avg_pred": result[0],
+        "epi_unc": result[1],
+        "valid_indices": result[2]
+    }
+
+
+# TODO: convert these into proper pydantic models and fastapi endpoints
+def calc_solubility_no_ref(solvent_smiles=None, solute_smiles=None, temp=None, hsub298=None, cp_gas_298=None,
+                           cp_solid_298=None):
+    """
+    Calculate solubility with no reference solvent and reference solubility
+    """
+    hsubl_298 = np.array([hsub298]) if hsub298 is not None else None
+    Cp_solid = np.array([cp_solid_298]) if cp_solid_298 is not None else None
+    Cp_gas = np.array([cp_gas_298]) if cp_gas_298 is not None else None
+
+    # Create dataframe with solvent and solute data
+    data = {
+        'solvent_smiles': [solvent_smiles],
+        'solute_smiles': [solute_smiles],
+        'temperature': [temp],
+        'reference_solubility': [None],
+        'reference_solvent': [None],
+    }
+    df = pd.DataFrame(data)
+
+    solub_data = SolubilityData(df=df)
+    predictions = SolubilityPredictions(predict_aqueous=True, predict_reference_solvents=False, 
+                                        predict_t_dep=True, predict_solute_parameters=True, 
+                                        data=solub_data, models=solub_models,  verbose=False)
+    calculations = SolubilityCalculations(predictions=predictions, calculate_aqueous=True,
+                                          calculate_reference_solvents=False, calculate_t_dep=True,
+                                          calculate_t_dep_with_t_dep_hdiss=True, verbose=False,
+                                          hsubl_298=hsubl_298, Cp_solid=Cp_solid, Cp_gas=Cp_gas)
+    return calculations
+
+
+def calc_solubility_with_ref(solvent_smiles=None, solute_smiles=None, temp=None, ref_solvent_smiles=None,
+                             ref_solubility298=None, hsub298=None, cp_gas_298=None, cp_solid_298=None):
+    """
+    Calculate solubility with a reference solvent and reference solubility
+    """
+    hsubl_298 = np.array([hsub298]) if hsub298 is not None else None
+    Cp_solid = np.array([cp_solid_298]) if cp_solid_298 is not None else None
+    Cp_gas = np.array([cp_gas_298]) if cp_gas_298 is not None else None
+
+    data = {
+        'solvent_smiles': [solvent_smiles],
+        'solute_smiles': [solute_smiles],
+        'temperature': [temp],
+        'reference_solubility': [ref_solubility298],
+        'reference_solvent': [ref_solvent_smiles],
+    }
+    df = pd.DataFrame(data)
+
+    solub_data = SolubilityData(df=df)
+    predictions = SolubilityPredictions(predict_aqueous=False, predict_reference_solvents=True, 
+                                        predict_t_dep=True, predict_solute_parameters=True, 
+                                        data=solub_data, models=solub_models,  verbose=False)
+    calculations = SolubilityCalculations(predictions=predictions, calculate_aqueous=False,
+                                          calculate_reference_solvents=True, calculate_t_dep=True,
+                                          calculate_t_dep_with_t_dep_hdiss=True, verbose=False,
+                                          hsubl_298=hsubl_298, Cp_solid=Cp_solid, Cp_gas=Cp_gas)
+    return calculations
+
diff --git a/microservices/solprop/test.py b/microservices/solprop/test.py
@@ -0,0 +1,101 @@
+import pytest
+from fastapi.testclient import TestClient
+from unittest.mock import patch
+
+# IMPORTANT: Assuming your server file is named `main.py`
+from main import app
+
+client = TestClient(app)
+
+# ---------------------------------------------------------
+# Mock Data Constants
+# ---------------------------------------------------------
+MOCK_SOLVENT = "CCO"  # Ethanol
+MOCK_SOLUTE = "CC(=O)O" # Acetic Acid
+MOCK_PREDICT_RESULT = (
+    [[ -4.52 ]],  # avg_pred
+    [[ 0.15 ]],   # epi_unc
+    [[ 0 ]]       # valid_indices
+)
+
+# ---------------------------------------------------------
+# Tests
+# ---------------------------------------------------------
+
+@patch('main.dGsolv_estimator')
+def test_dGsolv_estimator_success(mock_estimator):
+    # Setup the mock to return our fake ML data
+    mock_estimator.predict.return_value = MOCK_PREDICT_RESULT
+    
+    payload = {
+        "solvent_smiles": MOCK_SOLVENT,
+        "solute_smiles": MOCK_SOLUTE
+    }
+    
+    response = client.post("/dGsolv_estimator", json=payload)
+    
+    # Verify the HTTP response
+    assert response.status_code == 200
+    
+    # Verify the JSON payload structure matches your endpoint design
+    data = response.json()
+    assert "avg_pred" in data
+    assert "epi_unc" in data
+    assert "valid_indices" in data
+    
+    # Verify the values
+    assert data["avg_pred"] == [[-4.52]]
+    
+    # Verify the underlying ML model was called correctly by the API
+    mock_estimator.predict.assert_called_once_with([[MOCK_SOLVENT, MOCK_SOLUTE]])
+
+
+@patch('main.dHsolv_estimator')
+def test_dHsolv_estimator_success(mock_estimator):
+    mock_estimator.predict.return_value = MOCK_PREDICT_RESULT
+    
+    payload = {
+        "solvent_smiles": MOCK_SOLVENT,
+        "solute_smiles": MOCK_SOLUTE
+    }
+    
+    response = client.post("/dHsolv_estimator", json=payload)
+    
+    assert response.status_code == 200
+    assert response.json() == {
+        "avg_pred": [[-4.52]],
+        "epi_unc": [[0.15]],
+        "valid_indices": [[0]]
+    }
+    mock_estimator.predict.assert_called_once_with([[MOCK_SOLVENT, MOCK_SOLUTE]])
+
+
+@patch('main.SoluteML_estimator')
+def test_SoluteML_estimator_success(mock_estimator):
+    # SoluteML only returns predictions based on the solute
+    mock_estimator.predict.return_value = MOCK_PREDICT_RESULT
+    
+    payload = {
+        "solute_smiles": MOCK_SOLUTE
+    }
+    
+    response = client.post("/SoluteML_estimator", json=payload)
+    
+    assert response.status_code == 200
+    assert response.json() == {
+        "avg_pred": [[-4.52]],
+        "epi_unc": [[0.15]],
+        "valid_indices": [[0]]
+    }
+    mock_estimator.predict.assert_called_once_with([MOCK_SOLUTE])
+
+
+def test_missing_fields_fail():
+    # If using Pydantic, FastAPI should automatically reject bad payloads
+    # This test ensures we get a 422 if we send an empty body
+    response = client.post("/dGsolv_estimator", json={})
+    
+    # If you implement req: SolubilityRequest, this will correctly be 422
+    # If you implement req: dict, it will fail inside the function with a KeyError.
+    # It is highly recommended to use the Pydantic model so this returns 422.
+    assert response.status_code in [422, 500]
diff --git a/rmgweb/database/views.py b/rmgweb/database/views.py
diff --git a/rmgweb/secretsettings.py.example b/rmgweb/secretsettings.py.example