脚本1 批量在文件的第一行替换为文件名
import os path='POSCAR' files= os.listdir(dir) for file in files: path_mol=path+'/'+file path_mol2=path+'-2'+'/'+file content = open(path_mol, 'r') contnt_replace = open(path_mol2, 'w') line = content.readlines() line[0]=file.replace('.mol','')+'\n' #去掉后缀 for i in line: contnt_replace.write(i)调用RDkit批量计算分子描述符
import os import rdkit import pandas as pd from rdkit import Chem from rdkit.Chem import Descriptors from rdkit.ML.Descriptors import MoleculeDescriptors path='path_1' mols=[] files= os.listdir(path) for file in files: mol = Chem.MolFromMolFile(path+'/'+file) mols.append(mol) #定义描述符列表 descs = [desc_name[0] for desc_name in Descriptors._descList] #计算描述符 desc_calc = MoleculeDescriptors.MolecularDescriptorCalculator(descs) descriptors = pd.DataFrame([desc_calc.CalcDescriptors(mol) for mol in mols]) descriptors.columns = descs #将mol对象转换为smiles smiles = [Chem.MolToSmiles(mol) for mol in mols] descriptors.index = smiles y_name = '_Name' y = pd.DataFrame([mol.GetProp(y_name) for mol in mols]) y.index = smiles y.columns = [y_name] #保存数据 dataset = pd.concat([y, descriptors], axis=1) dataset.to_csv('minidatabase.csv')0:N1:0 0:C2:1 0:C2:2 0:C2:3 0:C2:4 0:H3:5 0:C2:6 0:H3:7 0:C2:8
0:N 1:C 2:C
14 12 12
