ReactionMechanismGenerator · sevyharris · Apr 15, 2026 · Apr 13, 2026 · Apr 12, 2026 · Apr 12, 2026
diff --git a/rmgpy/data/kinetics/family.py b/rmgpy/data/kinetics/family.py
@@ -4440,8 +4440,9 @@ def extract_source_from_comments(self, reaction):
             [Family_Label, {'template': originalTemplate,
                             'degeneracy': degeneracy,
                             'exact': boolean_exact?,
-                            'rules': a list of (original rate rule entry, weight in average)
-                            'training': a list of (original rate rule entry associated with training entry, original training entry, weight in average)}]
+                            'rules': a list of (original rate rule entry, weight in average),
+                            'training': a list of (original rate rule entry associated with training entry, original training entry, weight in average),
+                            'autogenerated': boolean for whether kinetics come from autogenerated subgraph isomorphic decision tree}]
 
 
         where Exact is a boolean of whether the rate is an exact match, Template is
@@ -4455,6 +4456,7 @@ def extract_source_from_comments(self, reaction):
         rules = None
         training_entries = None
         degeneracy = 1
+        autogenerated = False
 
         training_reaction_pattern = r'Matched reaction\s*(\d+).*in.*training'
         degeneracy_pattern = r'Multiplied by reaction path degeneracy\s*(\d+)'
@@ -4494,6 +4496,7 @@ def extract_source_from_comments(self, reaction):
         autogen_node_matches = re.search(autogen_node_search_pattern, full_comment_string)
         template_matches = re.search(template_pattern, full_comment_string)
         if autogen_node_matches is not None:  # autogenerated trees
+            autogenerated = True
             template_str = autogen_node_matches.group(1).split('Multiplied by reaction path degeneracy')[0].strip()
             template_str = template_str.split('in family')[0].strip()
             tokens = template_str.split()
@@ -4510,7 +4513,7 @@ def extract_source_from_comments(self, reaction):
             raise ValueError(f'Could not find rate rule in comments for reaction {reaction}.')
         rules, training_entries = self.get_sources_for_template(template)
         source_dict = {'template': template, 'degeneracy': degeneracy, 'exact': exact_rule,
-                       'rules': rules, 'training': training_entries}
+                       'rules': rules, 'training': training_entries, 'autogenerated': autogenerated}
 
         # Source of the kinetics is from rate rules
         return False, [self.label, source_dict]

diff --git a/rmgpy/data/thermo.py b/rmgpy/data/thermo.py
@@ -2724,7 +2724,8 @@ def extract_source_from_comments(self, species):
 
         source = {'Library': String_Name_of_Library_Used,
                   'QM': String_of_Method_Used,
-                  'GAV': Dictionary_of_Groups_Used 
+                  'GAV': Dictionary_of_Groups_Used,
+                  'ADS': Dictionary_of_Adsorption_Group_Used,
                   }
 
         The Dictionary_of_Groups_Used looks like 
@@ -2743,14 +2744,54 @@ def extract_source_from_comments(self, species):
             # Store the level of the calculation, which is the 2nd token in the comments
             source['QM'] = tokens[1]
 
+        elif comment.startswith('Gas phase thermo'):
+            # Handle adsorption correction thermo data of the following format:
+            # Library example
+            # Gas phase thermo for C(T) from Thermo library: primaryThermoLibrary.
+            # Adsorption correction: + Thermo group additivity estimation: adsorptionPt111(Cq*)
+
+            # GAV example
+            # Gas phase thermo for [CH]CC from Thermo group additivity estimation: group(Cs-CsCsHH) + group(Cs-CsHHH) + group(Cs-CsHHH) + radical(CCJ2_triplet).
+            # Adsorption correction: + Thermo group additivity estimation: adsorptionPt111(C=*RCR3)"
+
+            comment = comment.replace(r'\n', ' ')
+            comment = comment.replace('\n', ' ')
+            assert 'Adsorption correction:' in comment, f'adsorption correction in unrecognized format {comment}'
+
+            # Handle the gas-phase portion first
+            gas_comment = comment.split('Adsorption correction: + ')[0].strip()
+            gas_comment = gas_comment.replace('.', '', -1)  # delete the . at the end if it exists
+            gas_comment = gas_comment[gas_comment.find('from ', len('Gas phase thermo for ')) + len('from '):]
+            dummy_gas_phase_species = Species()
+            dummy_gas_phase_species.thermo = NASA()
+            dummy_gas_phase_species.thermo.comment = gas_comment
+            source = self.extract_source_from_comments(dummy_gas_phase_species)
+
+            # This is an adsorption correction
+            # comment is split into two parts: the gas phase, and the surface adsorption corection
+            ads_correction_comment = comment.split('Adsorption correction: +')[-1].strip()
+            dummy_adsorption_correction_species = Species()
+            dummy_adsorption_correction_species.thermo = NASA()
+            dummy_adsorption_correction_species.thermo.comment = ads_correction_comment
+            source['ADS'] = self.extract_source_from_comments(dummy_adsorption_correction_species)['GAV']
+
+            return source
+
         # Check for group additivity contributions to the thermo in this species            
 
-        # The contribution of the groups can be either additive or substracting
+        # The contribution of the groups can be either additive or subtracting
         # after changes to the polycyclic algorithm
 
         comment = comment.replace(' + ', ' +')
         comment = comment.replace(' - ', ' -')
-        tokens = comment.split()
+
+        # Sometimes chemkin splits the comment with newlines
+        # this allows us to get rid of newlines while ensuring separate
+        # groups are still split by spaces
+        comment = comment.replace(')\n+', ') +')
+        comment = comment.replace(')\n-', ') -')
+        comment = comment.replace('\n', '')
+        tokens = comment.split(' ')
 
         groups = {}
         group_types = list(self.groups.keys())

diff --git a/rmgpy/tools/loader.py b/rmgpy/tools/loader.py
@@ -50,6 +50,7 @@ def load_rmg_job(
     use_java=False,
     use_chemkin_names=False,
     check_duplicates=True,
+    surface_path=None,
 ):
     # The argument is an RMG-Py input file
     rmg = load_rmg_py_job(
@@ -59,13 +60,14 @@ def load_rmg_job(
         generate_images,
         use_chemkin_names=use_chemkin_names,
         check_duplicates=check_duplicates,
+        surface_path=surface_path,
     )
 
     return rmg
 
 
 def load_rmg_py_job(input_file, chemkin_file=None, species_dict=None, generate_images=True,
-                    use_chemkin_names=False, check_duplicates=True):
+                    use_chemkin_names=False, check_duplicates=True, surface_path=None):
     """
     Load the results of an RMG-Py job generated from the given `input_file`.
     """
@@ -83,7 +85,8 @@ def load_rmg_py_job(input_file, chemkin_file=None, species_dict=None, generate_i
         species_dict = os.path.join(os.path.dirname(input_file), 'chemkin', 'species_dictionary.txt')
     species_list, reaction_list = load_chemkin_file(chemkin_file, species_dict,
                                                     use_chemkin_names=use_chemkin_names,
-                                                    check_duplicates=check_duplicates)
+                                                    check_duplicates=check_duplicates,
+                                                    surface_path=surface_path)
 
     # Created "observed" versions of all reactive species that are not explicitly
     # identified as  "constant" species