基于Pytorch和RDKit建立QSAR模型

举报
DrugAI 发表于 2021/07/15 04:51:55 2021/07/15
3.8k+ 0 0
【摘要】 尝试使用pytorch和RDKit构建QSAR模型 环境依赖 pip install pprintpip install argparse#安装rdkitconda install -c rdkit rdkit#安装Pytorchconda install pytorch-cpu -c pytorch 基于Pytorch和RDKit的QSAR模型代码: &l...

尝试使用pytorch和RDKit构建QSAR模型

环境依赖

  1. pip install pprint

  2. pip install argparse

  3. #安装rdkit

  4. conda install -c rdkit rdkit

  5. #安装Pytorch

  6. conda install pytorch-cpu -c pytorch

基于Pytorch和RDKit的QSAR模型代码:

<阅读原文,代码效果更佳>

#!usr/bin/python3

import pprint

import argparse

import torch

import torch.optim as optim

from torch import nn as nn

import torch.nn.functional as F

from torch.autograd import Variable

from rdkit import Chem

from rdkit.Chem import AllChem

from rdkit.Chem import DataStructs

import numpy as np

#from sklearn import preprocessing

def base_parser():

    parser = argparse.ArgumentParser("This is simple test of pytorch")

    parser.add_argument("trainset", help="sdf for train")

    parser.add_argument("testset", help="sdf for test")

    parser.add_argument("--epochs", default=150)

    return parser

parser = base_parser()

args = parser.parse_args()

traindata = [mol for mol in Chem.SDMolSupplier(args.trainset) if mol is not None]

testdata = [mol for mol in Chem.SDMolSupplier(args.testset) if mol is not None]

def molsfeaturizer(mols):

    fps = []

    for mol in mols:

        arr = np.zeros((0,))

        fp = AllChem.GetMorganFingerprintAsBitVect(mol, 2)

        DataStructs.ConvertToNumpyArray(fp, arr)

        fps.append(arr)

    fps = np.array(fps, dtype = np.float)

    return fps

classes = {"(A) low":0, "(B) medium":1, "(C) high":2}

#classes = {"(A) low":0, "(B) medium":1, "(C) high":1}

trainx = molsfeaturizer(traindata)

testx = molsfeaturizer(testdata)

# for pytorch, y must be long type!!

trainy = np.array([classes[mol.GetProp("SOL_classification")] for mol in traindata], dtype=np.int64)

testy = np.array([classes[mol.GetProp("SOL_classification")] for mol in testdata], dtype=np.int64)

#在pytorch中构建模型,定义每个层和整个结构

X_train = torch.from_numpy(trainx)

X_test = torch.from_numpy(testx)

Y_train = torch.from_numpy(trainy)

Y_test = torch.from_numpy(testy)

print(X_train.size(),Y_train.size())

print(X_test.size(), Y_train.size())

class QSAR_mlp(nn.Module):

    def __init__(self):

        super(QSAR_mlp, self).__init__()

        self.fc1 = nn.Linear(2048, 524)

        self.fc2 = nn.Linear(524, 10)

        self.fc3 = nn.Linear(10, 10)

        self.fc4 = nn.Linear(10,3)

    def forward(self, x):

        x = x.view(-1, 2048)

        h1 = F.relu(self.fc1(x))

        h2 = F.relu(self.fc2(h1))

        h3 = F.relu(self.fc3(h2))

        output = F.sigmoid(self.fc4(h3))

        return output

#构建训练和预测模型

model = QSAR_mlp()

print(model)

losses = []

optimizer = optim.Adam( model.parameters(), lr=0.005)

for epoch in range(args.epochs):

    data, target = Variable(X_train).float(), Variable(Y_train).long()

    optimizer.zero_grad()

    y_pred = model(data)

    loss = F.cross_entropy(y_pred, target)

    print("Loss: {}".format(loss.data[0]))

    loss.backward()

    optimizer.step()

pred_y = model(Variable(X_test).float())

predicted = torch.max(pred_y, 1)[1]

for i in range(len(predicted)):

    print("pred:{}, target:{}".format(predicted.data[i], Y_test[i]))

print( "Accuracy: {}".format(sum(p==t for p,t in zip(predicted.data, Y_test))/len(Y_test)))

测试模型

python qsar_pytorch.py solubility.train.sdf solubility.test.sdf


文章来源: drugai.blog.csdn.net,作者:DrugAI,版权归原作者所有,如需转载,请联系作者。

原文链接:drugai.blog.csdn.net/article/details/105683676

【版权声明】本文为华为云社区用户转载文章,如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱: cloudbbs@huaweicloud.com
  • 点赞
  • 收藏
  • 关注作者

作者其他文章

评论(0

抱歉,系统识别当前为高风险访问,暂不支持该操作

    全部回复

    上滑加载中

    设置昵称

    在此一键设置昵称,即可参与社区互动!

    *长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。

    *长度不超过10个汉字或20个英文字符,设置后3个月内不可修改。