提取网络特征(extract features)

本文仅为记录代码

def main():

python 复制代码
if __name__ == '__main__':
    # setup random seed
    setup(seed=42)
    # Avoid the pylint warning.
    a = MolVocab
    # supress rdkit logger
    lg = RDLogger.logger()
    lg.setLevel(RDLogger.CRITICAL)

    # Initialize MolVocab
    mol_vocab = MolVocab

    args = parse_args()
    if args.parser_name == 'finetune':
        logger = create_logger(name='train', save_dir=args.save_dir, quiet=False)
        cross_validate(args, logger)
    elif args.parser_name == 'pretrain':
        logger = create_logger(name='pretrain', save_dir=args.save_dir)
        pretrain_model(args, logger)
    elif args.parser_name == "eval":
        logger = create_logger(name='eval', save_dir=args.save_dir, quiet=False)
        cross_validate(args, logger)
    elif args.parser_name == 'fingerprint':
        train_args = get_newest_train_args()
        logger = create_logger(name='fingerprint', save_dir=None, quiet=False)
        feas = generate_fingerprints(args, logger)
        np.savez_compressed(args.output_path, fps=feas)
    elif args.parser_name == 'predict':
        train_args = get_newest_train_args()
        avg_preds, test_smiles = make_predictions(args, train_args)
        write_prediction(avg_preds, test_smiles, args)

def generate_fingerprints:

python 复制代码
def generate_fingerprints(args: Namespace, logger: Logger = None) -> List[List[float]]:
    """
    Generate the fingerprints.

    :param logger:
    :param args: Arguments.
    :return: A list of lists of target fingerprints.
    """

    checkpoint_path = args.checkpoint_paths[0]
    if logger is None:
        logger = create_logger('fingerprints', quiet=False)
    print('Loading data')
    test_data = get_data(path=args.data_path,
                         args=args,
                         use_compound_names=False,
                         max_data_size=float("inf"),
                         skip_invalid_smiles=False)
    test_data = MoleculeDataset(test_data)

    logger.info(f'Total size = {len(test_data):,}')
    logger.info(f'Generating...')
    # Load model
    model = load_checkpoint(checkpoint_path, cuda=args.cuda, current_args=args, logger=logger)
    model_preds = do_generate(
        model=model,
        data=test_data,
        args=args
    )

    return model_preds

do_generate:

python 复制代码
def do_generate(model: nn.Module,
                data: MoleculeDataset,
                args: Namespace,
                ) -> List[List[float]]:
    """
    Do the fingerprint generation on a dataset using the pre-trained models.

    :param model: A model.
    :param data: A MoleculeDataset.
    :param args: A StandardScaler object fit on the training targets.
    :return: A list of fingerprints.
    """
    model.eval()
    args.bond_drop_rate = 0
    preds = []

    mol_collator = MolCollator(args=args, shared_dict={})

    num_workers = 4
    mol_loader = DataLoader(data,
                            batch_size=32,
                            shuffle=False,
                            num_workers=num_workers,
                            collate_fn=mol_collator)
    for item in mol_loader:
        _, batch, features_batch, _, _ = item
        with torch.no_grad():
            batch_preds = model(batch, features_batch)
            preds.extend(batch_preds.data.cpu().numpy())
    return preds
相关推荐
翱翔的苍鹰几秒前
一个简单的法律问答机器人实现思路
人工智能·深度学习·语言模型·自然语言处理
njsgcs1 分钟前
我要fork openclaw了 ai自己写skill
人工智能
林深现海4 分钟前
【刘二大人】PyTorch深度学习实践笔记 —— 第三集:梯度下降(凝练版)
pytorch·笔记·深度学习
小W与影刀RPA4 分钟前
【影刀RPA】:智能过滤敏感词,高效输出表格
大数据·人工智能·python·低代码·自动化·rpa·影刀rpa
铁蛋AI编程实战8 分钟前
DeepSeek mHC 架构 + Agent 实战大模型开发指南
人工智能·架构·开源
源于花海16 分钟前
迁移学习简明手册——迁移学习相关研究学者
人工智能·机器学习·迁移学习·研究学者
OPEN-Source19 分钟前
开源工具轻松实现高清视频修复
人工智能·视频处理
EW Frontier19 分钟前
【ISAC+抗干扰+信号识别】5G ISAC+深度学习!破解智能交通“自干扰”难题,V2X通信准确率近100%【附代码】
人工智能·深度学习·5g·调制识别·抗干扰·isac
QUDONG_biubiubiu19 分钟前
DeepSeek推出OCR 2模型!瞄准高难度文档识别
人工智能·深度学习·deepseek·deepseek-ocr 2
szcsun520 分钟前
机器学习(四)--无监督学习
人工智能·学习·机器学习