From 8e9c6babdd8f9c8b185412218edd9289295829c8 Mon Sep 17 00:00:00 2001 From: Lester Hedges Date: Mon, 15 Jun 2026 21:33:31 +0100 Subject: [PATCH] Expose max_path and max_ring_size kwargs. --- nodes/playground/prepareFEP.ipynb | 70 +------------------ nodes/playground/prepareFEP.py | 18 +++++ src/BioSimSpace/Align/_merge.py | 38 +++++++++- .../Sandpit/Exscientia/Align/_merge.py | 38 +++++++++- 4 files changed, 91 insertions(+), 73 deletions(-) diff --git a/nodes/playground/prepareFEP.ipynb b/nodes/playground/prepareFEP.ipynb index f9bc375e..d698d974 100644 --- a/nodes/playground/prepareFEP.ipynb +++ b/nodes/playground/prepareFEP.ipynb @@ -118,49 +118,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "node.addInput(\"input1\", BSS.Gateway.FileSet(help=\"A topology and coordinates file\"))\n", - "node.addInput(\"input2\", BSS.Gateway.FileSet(help=\"A topology and coordinates file\"))\n", - "node.addInput(\n", - " \"prematch\",\n", - " BSS.Gateway.String(\n", - " help=\"list of atom indices that are matched between input2 and input1. Syntax is of the format 1-3,4-8,9-11... Ignored if a mapping is provided\",\n", - " default=\"\",\n", - " ),\n", - ")\n", - "node.addInput(\n", - " \"mapping\",\n", - " BSS.Gateway.File(\n", - " help=\"csv file that contains atom indices in input1 mapped ot atom indices in input2\",\n", - " optional=True,\n", - " ),\n", - ")\n", - "node.addInput(\n", - " \"timeout\",\n", - " BSS.Gateway.Time(\n", - " help=\"The timeout for the maximum common substructure search\",\n", - " default=10 * BSS.Units.Time.second,\n", - " ),\n", - ")\n", - "node.addInput(\n", - " \"allow_ring_breaking\",\n", - " BSS.Gateway.Boolean(\n", - " help=\"Whether to allow opening/closing of rings during merge\", default=False\n", - " ),\n", - ")\n", - "node.addInput(\n", - " \"allow_ring_size_change\",\n", - " BSS.Gateway.Boolean(\n", - " help=\"Whether to allow ring size changes during merge\", default=False\n", - " ),\n", - ")\n", - "node.addInput(\n", - " \"output\",\n", - " BSS.Gateway.String(\n", - " help=\"The root name for the files describing the perturbation input1->input2.\"\n", - " ),\n", - ")" - ] + "source": "node.addInput(\"input1\", BSS.Gateway.FileSet(help=\"A topology and coordinates file\"))\nnode.addInput(\"input2\", BSS.Gateway.FileSet(help=\"A topology and coordinates file\"))\nnode.addInput(\n \"prematch\",\n BSS.Gateway.String(\n help=\"list of atom indices that are matched between input2 and input1. Syntax is of the format 1-3,4-8,9-11... Ignored if a mapping is provided\",\n default=\"\",\n ),\n)\nnode.addInput(\n \"mapping\",\n BSS.Gateway.File(\n help=\"csv file that contains atom indices in input1 mapped ot atom indices in input2\",\n optional=True,\n ),\n)\nnode.addInput(\n \"timeout\",\n BSS.Gateway.Time(\n help=\"The timeout for the maximum common substructure search\",\n default=10 * BSS.Units.Time.second,\n ),\n)\nnode.addInput(\n \"allow_ring_breaking\",\n BSS.Gateway.Boolean(\n help=\"Whether to allow opening/closing of rings during merge\", default=False\n ),\n)\nnode.addInput(\n \"allow_ring_size_change\",\n BSS.Gateway.Boolean(\n help=\"Whether to allow ring size changes during merge\", default=False\n ),\n)\nnode.addInput(\n \"max_path\",\n BSS.Gateway.Integer(\n help=\"Maximum path length used when searching for rings. Increase for very large macrocycles.\",\n default=50,\n minimum=1,\n ),\n)\nnode.addInput(\n \"max_ring_size\",\n BSS.Gateway.Integer(\n help=\"Maximum ring size considered when checking for ring size changes.\",\n default=24,\n minimum=1,\n ),\n)\nnode.addInput(\n \"output\",\n BSS.Gateway.String(\n help=\"The root name for the files describing the perturbation input1->input2.\"\n ),\n)" }, { "cell_type": "code", @@ -279,29 +237,7 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [ - "# Align lig2 to lig1 based on the best mapping (inverted). The molecule is aligned based\n", - "# on a root mean squared displacement fit to find the optimal translation vector\n", - "# (as opposed to merely taking the difference of centroids).\n", - "lig2 = BSS.Align.rmsdAlign(lig2, lig1, inverted_mapping)\n", - "\n", - "# Merge the two ligands based on the mapping.\n", - "merged = BSS.Align.merge(\n", - " lig1,\n", - " lig2,\n", - " mapping,\n", - " allow_ring_breaking=node.getInput(\"allow_ring_breaking\"),\n", - " allow_ring_size_change=node.getInput(\"allow_ring_size_change\"),\n", - ")\n", - "\n", - "# Create a composite system\n", - "system1.removeMolecules(lig1)\n", - "system1.addMolecules(merged)\n", - "\n", - "# Make sure the box vectors are in reduced form.\n", - "system1.reduceBoxVectors()\n", - "system1.rotateBoxVectors()" - ] + "source": "# Align lig2 to lig1 based on the best mapping (inverted). The molecule is aligned based\n# on a root mean squared displacement fit to find the optimal translation vector\n# (as opposed to merely taking the difference of centroids).\nlig2 = BSS.Align.rmsdAlign(lig2, lig1, inverted_mapping)\n\n# Merge the two ligands based on the mapping.\nmerged = BSS.Align.merge(\n lig1,\n lig2,\n mapping,\n allow_ring_breaking=node.getInput(\"allow_ring_breaking\"),\n allow_ring_size_change=node.getInput(\"allow_ring_size_change\"),\n max_path=node.getInput(\"max_path\"),\n max_ring_size=node.getInput(\"max_ring_size\"),\n)\n\n# Create a composite system\nsystem1.removeMolecules(lig1)\nsystem1.addMolecules(merged)\n\n# Make sure the box vectors are in reduced form.\nsystem1.reduceBoxVectors()\nsystem1.rotateBoxVectors()" }, { "cell_type": "code", @@ -421,4 +357,4 @@ }, "nbformat": 4, "nbformat_minor": 4 -} +} \ No newline at end of file diff --git a/nodes/playground/prepareFEP.py b/nodes/playground/prepareFEP.py index a98e2189..afd8476e 100644 --- a/nodes/playground/prepareFEP.py +++ b/nodes/playground/prepareFEP.py @@ -127,6 +127,22 @@ def loadMapping(mapping_file): help="Whether to allow ring size changes during merge", default=False ), ) +node.addInput( + "max_path", + BSS.Gateway.Integer( + help="Maximum path length used when searching for rings. Increase for very large macrocycles.", + default=50, + minimum=1, + ), +) +node.addInput( + "max_ring_size", + BSS.Gateway.Integer( + help="Maximum ring size considered when checking for ring size changes.", + default=24, + minimum=1, + ), +) node.addInput( "output", BSS.Gateway.String( @@ -235,6 +251,8 @@ def loadMapping(mapping_file): mapping, allow_ring_breaking=node.getInput("allow_ring_breaking"), allow_ring_size_change=node.getInput("allow_ring_size_change"), + max_path=node.getInput("max_path"), + max_ring_size=node.getInput("max_ring_size"), ) # Create a composite system diff --git a/src/BioSimSpace/Align/_merge.py b/src/BioSimSpace/Align/_merge.py index eeff53d3..24b50649 100644 --- a/src/BioSimSpace/Align/_merge.py +++ b/src/BioSimSpace/Align/_merge.py @@ -36,6 +36,8 @@ def merge( fix_perturbable_zero_sigmas=True, force=False, roi=None, + max_path=50, + max_ring_size=24, property_map0={}, property_map1={}, **kwargs, @@ -75,6 +77,16 @@ def merge( The region of interest to merge. Consists of a list of ROI residue indices. + max_path : int + Maximum path length used when searching for rings. The default of + 50 covers typical macrocycles. Increase if larger rings need to be + detected. + + max_ring_size : int + Maximum ring size considered when checking for ring size changes. + The default of 24 covers most drug-like macrocycles. Rings larger + than this threshold are not subject to ring-size-change detection. + property_map0 : dict A dictionary that maps "properties" in this molecule to their user defined values. This allows the user to refer to properties @@ -136,6 +148,12 @@ def merge( if not isinstance(force, bool): raise TypeError("'force' must be of type 'bool'") + if not isinstance(max_path, int) or max_path < 1: + raise TypeError("'max_path' must be a positive integer") + + if not isinstance(max_ring_size, int) or max_ring_size < 1: + raise TypeError("'max_ring_size' must be a positive integer") + if not isinstance(mapping, dict): raise TypeError("'mapping' must be of type 'dict'.") else: @@ -1202,7 +1220,14 @@ def merge( # Combined ring check — calls find_paths once per connectivity. is_ring_broken, is_ring_size_change = _check_ring( - c0, conn1, idx, idy, idx_map, idy_map + c0, + conn1, + idx, + idy, + idx_map, + idy_map, + max_path=max_path, + max_ring_size=max_ring_size, ) # A ring was broken and it is not allowed. @@ -1268,7 +1293,14 @@ def merge( # Combined ring check — calls find_paths once per connectivity. is_ring_broken, is_ring_size_change = _check_ring( - c1, conn0, idx, idy, idx_map, idy_map + c1, + conn0, + idx, + idy, + idx_map, + idy_map, + max_path=max_path, + max_ring_size=max_ring_size, ) # A ring was broken and it is not allowed. @@ -1494,7 +1526,7 @@ def merge( return mol -def _check_ring(conn0, conn1, idx0, idy0, idx1, idy1, max_path=50, max_ring_size=12): +def _check_ring(conn0, conn1, idx0, idy0, idx1, idy1, max_path=50, max_ring_size=24): """ Internal function to test whether a perturbation opens/closes a ring or changes its size for a given pair of atoms. diff --git a/src/BioSimSpace/Sandpit/Exscientia/Align/_merge.py b/src/BioSimSpace/Sandpit/Exscientia/Align/_merge.py index d5841a72..c4102611 100644 --- a/src/BioSimSpace/Sandpit/Exscientia/Align/_merge.py +++ b/src/BioSimSpace/Sandpit/Exscientia/Align/_merge.py @@ -35,6 +35,8 @@ def merge( allow_ring_size_change=False, force=False, roi=None, + max_path=50, + max_ring_size=24, property_map0={}, property_map1={}, **kwargs, @@ -70,6 +72,16 @@ def merge( roi : list The region of interest to merge. Consist of two lists of atom indices. + max_path : int + Maximum path length used when searching for rings. The default of + 50 covers typical macrocycles. Increase if larger rings need to be + detected. + + max_ring_size : int + Maximum ring size considered when checking for ring size changes. + The default of 24 covers most drug-like macrocycles. Rings larger + than this threshold are not subject to ring-size-change detection. + property_map0 : dict A dictionary that maps "properties" in this molecule to their user defined values. This allows the user to refer to properties @@ -127,6 +139,12 @@ def merge( if not isinstance(force, bool): raise TypeError("'force' must be of type 'bool'") + if not isinstance(max_path, int) or max_path < 1: + raise TypeError("'max_path' must be a positive integer") + + if not isinstance(max_ring_size, int) or max_ring_size < 1: + raise TypeError("'max_ring_size' must be a positive integer") + if not isinstance(mapping, dict): raise TypeError("'mapping' must be of type 'dict'.") else: @@ -1194,7 +1212,14 @@ def merge( # Combined ring check — calls find_paths once per connectivity. is_ring_broken, is_ring_size_change = _check_ring( - c0, conn, idx, idy, idx_map, idy_map + c0, + conn, + idx, + idy, + idx_map, + idy_map, + max_path=max_path, + max_ring_size=max_ring_size, ) # A ring was broken and it is not allowed. @@ -1260,7 +1285,14 @@ def merge( # Combined ring check — calls find_paths once per connectivity. is_ring_broken, is_ring_size_change = _check_ring( - c1, conn, idx, idy, idx_map, idy_map + c1, + conn, + idx, + idy, + idx_map, + idy_map, + max_path=max_path, + max_ring_size=max_ring_size, ) # A ring was broken and it is not allowed. @@ -1360,7 +1392,7 @@ def merge( return mol -def _check_ring(conn0, conn1, idx0, idy0, idx1, idy1, max_path=50, max_ring_size=12): +def _check_ring(conn0, conn1, idx0, idy0, idx1, idy1, max_path=50, max_ring_size=24): """ Internal function to test whether a perturbation opens/closes a ring or changes its size for a given pair of atoms.