Skip to content

Commit ec286ea

Browse files
committed
Handle thermo comment with newline through group name
Sometimes chemkin writes a thermo comment with a newline splitting the group name. Then extract_source_from_comments fails to get the group. This commit removes the newline and splits up the comment by spaces when extracting the groups from the comment. It also adds a test.
1 parent 9a5490a commit ec286ea

File tree

2 files changed

+51
-1
lines changed

2 files changed

+51
-1
lines changed

rmgpy/data/thermo.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2750,7 +2750,14 @@ def extract_source_from_comments(self, species):
27502750

27512751
comment = comment.replace(' + ', ' +')
27522752
comment = comment.replace(' - ', ' -')
2753-
tokens = comment.split()
2753+
2754+
# Sometimes chemkin splits the comment with newlines
2755+
# this allows us to get rid of newlines while ensuring separate
2756+
# groups are still split by spaces
2757+
comment = comment.replace(')\n+', ') +')
2758+
comment = comment.replace(')\n-', ') -')
2759+
comment = comment.replace('\n', '')
2760+
tokens = comment.split(' ')
27542761

27552762
groups = {}
27562763
group_types = list(self.groups.keys())

test/rmgpy/data/thermoTest.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,49 @@ def test_parse_thermo_comments(self):
529529
assert source["GAV"]["ring"][0][1] == -1 # the weight of benzene contribution should be -1
530530
assert source["GAV"]["group"][0][1] == 2 # weight of the group(Cs-CsCsHH) conbtribution should be 2
531531

532+
# Check extract source from comment with tricky group(labelA)\n+ group(labelB) case (newline instead of space to split)
533+
tricky_newline_plus_sp = Species(smiles="[O]C(O)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)C(F)(F)F")
534+
tricky_newline_plus_sp.thermo = NASA()
535+
tricky_newline_plus_sp.thermo.comment = 'Thermo group additivity estimation: group(O2s-CsH) + group(O2s-CsH) + group(CsCsCsFF) + longDistanceInteraction_noncyclic(Cs(F)2-Cs(F)2) +\nlongDistanceInteraction_noncyclic(Cs(F)2-Cs(F)2) + longDistanceInteraction_noncyclic(CsF2-CsF2-CsF2) +\nlongDistanceInteraction_noncyclic(CsF2-CsF2-CsF2) + group(CsCsCsFF) + longDistanceInteraction_noncyclic(Cs(F)2-Cs(F)2) +\nlongDistanceInteraction_noncyclic(Cs(F)2-Cs(F)2) + longDistanceInteraction_noncyclic(CsF2-CsF2-CsF2) +\nlongDistanceInteraction_noncyclic(CsF2-CsF2-CsF2) + group(CsCsCsFF) + longDistanceInteraction_noncyclic(Cs(F)2-Cs(F)2) +\nlongDistanceInteraction_noncyclic(Cs(F)2-Cs(F)2) + longDistanceInteraction_noncyclic(CsF2-CsF2-CsF2) +\nlongDistanceInteraction_noncyclic(CsF2-CsF2-CsF2) + group(CsCsCsFF) + longDistanceInteraction_noncyclic(Cs(F)2-Cs(F)2) +\nlongDistanceInteraction_noncyclic(Cs(F)2-Cs(F)2) + longDistanceInteraction_noncyclic(CsF2-CsF2-CsF2) + group(CsCsCsFF) +\nlongDistanceInteraction_noncyclic(Cs(F)2-Cs(F)) + longDistanceInteraction_noncyclic(Cs(F)2-Cs(F)2) + longDistanceInteraction_noncyclic(CsF2-CsF2-CsF2)\n+ group(CsCsCsFF) + longDistanceInteraction_noncyclic(Cs(F)2-Cs(F)2) + longDistanceInteraction_noncyclic(CsF2-CsF2-CsF2) + group(CsCFOO) +\ngroup(CsCsFFF) + longDistanceInteraction_noncyclic(Cs(F)3-Cs(F)2) + longDistanceInteraction_noncyclic(Cs(F)3-R-Cs(F)2) + radical(O2sj(Cs-F1sO2sCs))'
536+
source = self.database.extract_source_from_comments(tricky_newline_plus_sp)
537+
assert source['GAV']['group'][0][0].label == "O2s-CsH"
538+
assert source['GAV']['group'][0][1] == 2
539+
assert source['GAV']['group'][1][0].label == "CsCsCsFF"
540+
assert source['GAV']['group'][1][1] == 6
541+
assert source['GAV']['group'][2][0].label == "CsCFOO"
542+
assert source['GAV']['group'][2][1] == 1
543+
assert source['GAV']['group'][3][0].label == "CsCsFFF"
544+
assert source['GAV']['group'][3][1] == 1
545+
assert source['GAV']['longDistanceInteraction_noncyclic'][0][0].label == "Cs(F)2-Cs(F)2"
546+
assert source['GAV']['longDistanceInteraction_noncyclic'][0][1] == 10
547+
assert source['GAV']['longDistanceInteraction_noncyclic'][1][0].label == "CsF2-CsF2-CsF2"
548+
assert source['GAV']['longDistanceInteraction_noncyclic'][1][1] == 9
549+
assert source['GAV']['longDistanceInteraction_noncyclic'][2][0].label == "Cs(F)2-Cs(F)"
550+
assert source['GAV']['longDistanceInteraction_noncyclic'][2][1] == 1
551+
assert source['GAV']['longDistanceInteraction_noncyclic'][3][0].label == "Cs(F)3-Cs(F)2"
552+
assert source['GAV']['longDistanceInteraction_noncyclic'][3][1] == 1
553+
assert source['GAV']['longDistanceInteraction_noncyclic'][4][0].label == "Cs(F)3-R-Cs(F)2"
554+
assert source['GAV']['longDistanceInteraction_noncyclic'][4][1] == 1
555+
assert source['GAV']['radical'][0][0].label == "O2sj(Cs-F1sO2sCs)"
556+
assert source['GAV']['radical'][0][1] == 1
557+
558+
# check extract source from comment with newline through group name
559+
newline_sp = Species(smiles="[O]OC(C#N)(C)C")
560+
newline_sp.thermo = NASA()
561+
newline_sp.thermo.comment = 'Thermo group additivity estimation: group(O2s-OsCs) + group(O2s-OsH) + group(N3t-(Cs)Ct) + group(Cs-(Cds-Cds)CsCsOs) + group(Cs-CsHHH) + group(Cs-\nCsHHH) + group(Ct-N3tCs) + radical(C3COOJ)'
562+
source = self.database.extract_source_from_comments(newline_sp)
563+
assert "GAV" in source
564+
assert source['GAV']['group'][0][0].label == "O2s-OsCs"
565+
assert source['GAV']['group'][1][0].label == "O2s-OsH"
566+
assert source['GAV']['group'][2][0].label == "N3t-(Cs)Ct"
567+
assert source['GAV']['group'][3][0].label == "Cs-(Cds-Cds)CsCsOs"
568+
assert source['GAV']['group'][4][0].label == "Cs-CsHHH"
569+
assert source['GAV']['group'][5][0].label == "Ct-N3tCs"
570+
assert source["GAV"]["radical"][0][0].label == "C3COOJ"
571+
assert source["GAV"]["radical"][0][1] == 1
572+
assert all(source['GAV']['group'][x][1] == 1 for x in [0, 1, 2, 3, 5])
573+
assert source['GAV']['group'][4][1] == 2
574+
532575
def test_species_thermo_generation_hbi_library(self):
533576
"""Test thermo generation for species objects for HBI correction on library value.
534577

0 commit comments

Comments
 (0)