小分子 pdb准化为sdf

python 复制代码
import os
from rdkit import Chem
import subprocess


small_mol_path = "pdbs"

from rdkit.Chem import rdDetermineBonds

def load_pdb_with_adaptive_bonds(pdb_file):
    # ==========================================
    # STRATEGY 1: RDKit with Strict CONECT Rules
    # ==========================================
    # IMPORTANT: proximityBonding=False forces RDKit to ONLY use the CONECT block, 
    # preventing false bonds from compressed 3D coordinates.
    mol = Chem.MolFromPDBFile(pdb_file, removeHs=False, sanitize=False, proximityBonding=False)
    
    if mol is None:
        print(f"[{pdb_file}] Warning: Initial RDKit read failed.")
    else:
        mol.UpdatePropertyCache(strict=False)
        possible_charges = [0, 1, -1, 2, -2]
        
        for charge in possible_charges:
            try:
                mol_test = Chem.Mol(mol)
                rdDetermineBonds.DetermineBondOrders(mol_test, charge=charge)
                Chem.SanitizeMol(mol_test)
                # print(f"[{pdb_file}] Success via RDKit! Charge: {charge}")
                return mol_test
            except Exception:
                continue

    # ==========================================
    # STRATEGY 2: OpenBabel Fallback (Robust)
    # ==========================================
    # If RDKit's strict 3D geometry checker rejects the strained molecule,
    # fallback to OpenBabel's heuristic bond assigner.
    print(f"[{pdb_file}] RDKit bond determination failed with posssible_charges {possible_charges}. Falling back to OpenBabel...")
    
    base_name, ext = os.path.splitext(pdb_file)
    temp_mol2 = f"{base_name}_temp_ob.mol2"
    
    # Use OpenBabel to convert PDB to MOL2 (which hardcodes the bond orders)
    command = ["~/anaconda3/envs/UniMoMo/bin/obabel", pdb_file, "-O", temp_mol2]
    
    try:
        subprocess.run(command, check=True, capture_output=True)
        # Read the resulting MOL2 file back into RDKit
        fallback_mol = Chem.MolFromMol2File(temp_mol2, removeHs=False, sanitize=True)
        
        # Clean up temp file
        if os.path.exists(temp_mol2):
            os.remove(temp_mol2)
            
        if fallback_mol is not None:
            print(f"[{pdb_file}] Success via OpenBabel Fallback!")
            return fallback_mol
            
    except subprocess.CalledProcessError as e:
        print(f"[{pdb_file}] OpenBabel Fallback Error: {e.stderr.decode('utf-8')}")

    # If both strategies fail
    print(f"[{pdb_file}] CRITICAL: Could not process molecule via any method.")
    return None
        
def save_mol_to_sdf(mol, output_path):
    # Ensure the molecule object exists
    if mol is None:
        print("Molecule object is None, cannot save.")
        return
    
    # Create an SDWriter object
    writer = Chem.SDWriter(output_path)
    # Write the molecule to the file
    writer.write(mol)
    writer.close()
    # print(f"Successfully saved bond-complete molecule to {output_path}")    
python 复制代码
ligand_mol = load_pdb_with_adaptive_bonds("ligand_with_h.pdb")
save_mol_to_sdf(ligand_mol, "refined_ligand.sdf")
相关推荐
voidmort27 分钟前
3. 微调(Fine-tuning)与强化学习(RL)的核心思想
python·深度学习·算法
biter down1 小时前
基于 Pywinauto 的 QQ 音乐 GUI 自动化测试实践
python
人道领域1 小时前
【LeetCode刷题日记】669.修剪二叉搜索树
开发语言·python·算法
EntyIU2 小时前
mineru从安装部署到测试使用完整指南
python·ocr
安替-AnTi3 小时前
厚朴 APK 搜索接口分析
python·apk·解析·taobao
山川湖海3 小时前
AI时代快速学编程语言的陷阱(以Python为例)
大数据·人工智能·python
H Journey3 小时前
Supervisor 进程管理工具介绍
python·supervisor·linux 运维
春日见4 小时前
5分钟入门强化学习之动态规划算法与实现
大数据·人工智能·python·算法·机器学习·计算机视觉
DeniuHe4 小时前
sklearn 中所有交叉验证数据集划分方式完整总结
人工智能·python·sklearn
DeniuHe4 小时前
sklearn中不同交叉验证方法的场景适配
人工智能·python·sklearn