import os import time import urllib import random ##from cinfony import cdk, rdkit, pybel from cinfony import cdk def getfromPubChem(N = 100, filename = "dataset.sdf"): url = ("http://pubchem.ncbi.nlm.nih.gov/summary/" "summary.cgi?cid=%d&disopt=DisplaySDF") tot = 0 ans = [] while tot < N: cid = random.randint(1, 24000000) sdfile = urllib.urlopen(url % cid).read().rstrip() if pybel.readstring("sdf", sdfile).data['PUBCHEM_COMPONENT_COUNT'] != "1": continue ans.append(sdfile) tot += 1 print >> open("dataset.sdf", "w"), "\n".join(ans) return "".join(ans) if __name__ == "__main__": N = 100 # getfromPubChem(N) ## toolkits = [cdk, rdkit, pybel] toolkits = [cdk] iters = [x.readfile("sdf", "dataset.sdf") for x in toolkits] output_sdg = """ """ output_depict = """
SDGPubChemOASA RDKitCDKCDK
DepictionPubChemOASA RDKit develChemBioGridOASA
""" url = "http://pubchem.ncbi.nlm.nih.gov/image/imgsrv.fcgi?t=l&cid=%s" ## for i, (cdkmol, rdkmol, pybelmol) in enumerate(zip(*iters)): for i, (cdkmol,) in enumerate(zip(*iters)): # time.sleep(1) if cdkmol.data['PUBCHEM_COMPONENT_COUNT'] != "1": continue title = cdkmol.data['Title'] if title=="1373132": # BKChem error continue print title # urllib.urlretrieve(url % title, "sdg/%d_pubchem.png" % i) # pybelmol.draw(filename="sdg/%d_pubchem_oasa.png" % i, show=False, usecoords=True) # pybelmol.draw(filename="sdg/%d_pybel_oasa.png" % i, show=False) # rdkmol.draw(filename="sdg/%d_pubchem_rdk.png" % i, show=False, usecoords=True) # rdkmol.draw(filename="sdg/%d_pubchem_rdkdev.png" % i, show=False, newdraw=True, usecoords=True) # rdkmol.draw(filename="sdg/%d_rdk_newdraw.png" % i, show=False, newdraw=True) # cdkmol.draw(filename="sdg/%d_cdk_oasa.png" % i, show=False) # cdkmol.draw(filename="sdg/%d_cdk_web.jpeg" % i, show=False, web=True) cdkmol.draw(filename="sdg/%d_pubchem_cdk.png" % i, show=False, usecoords=True) output_sdg += """ """ % (title, i, i, i, i, i) output_depict += """ """ % (title, i, i, i, i, i) end = "
DepictionPubChemRDKit RDKit develCDKOASA
%s
%s
" output_sdg += end output_depict += end print >> open("sdg.html", "w"), output_sdg print >> open("depict.html", "w"), output_depict