import os
import time
import urllib
import random
##from cinfony import cdk, rdkit, pybel
from cinfony import cdk
def getfromPubChem(N = 100, filename = "dataset.sdf"):
url = ("http://pubchem.ncbi.nlm.nih.gov/summary/"
"summary.cgi?cid=%d&disopt=DisplaySDF")
tot = 0
ans = []
while tot < N:
cid = random.randint(1, 24000000)
sdfile = urllib.urlopen(url % cid).read().rstrip()
if pybel.readstring("sdf", sdfile).data['PUBCHEM_COMPONENT_COUNT'] != "1":
continue
ans.append(sdfile)
tot += 1
print >> open("dataset.sdf", "w"), "\n".join(ans)
return "".join(ans)
if __name__ == "__main__":
N = 100
# getfromPubChem(N)
## toolkits = [cdk, rdkit, pybel]
toolkits = [cdk]
iters = [x.readfile("sdf", "dataset.sdf") for x in toolkits]
output_sdg = """
SDG | PubChem | OASA |
RDKit | CDK | CDK |
Depiction | PubChem | OASA |
RDKit devel | ChemBioGrid | OASA |
"""
output_depict = """
Depiction | PubChem | RDKit |
RDKit devel | CDK | OASA |
"""
url = "http://pubchem.ncbi.nlm.nih.gov/image/imgsrv.fcgi?t=l&cid=%s"
## for i, (cdkmol, rdkmol, pybelmol) in enumerate(zip(*iters)):
for i, (cdkmol,) in enumerate(zip(*iters)):
# time.sleep(1)
if cdkmol.data['PUBCHEM_COMPONENT_COUNT'] != "1":
continue
title = cdkmol.data['Title']
if title=="1373132": # BKChem error
continue
print title
# urllib.urlretrieve(url % title, "sdg/%d_pubchem.png" % i)
# pybelmol.draw(filename="sdg/%d_pubchem_oasa.png" % i, show=False, usecoords=True)
# pybelmol.draw(filename="sdg/%d_pybel_oasa.png" % i, show=False)
# rdkmol.draw(filename="sdg/%d_pubchem_rdk.png" % i, show=False, usecoords=True)
# rdkmol.draw(filename="sdg/%d_pubchem_rdkdev.png" % i, show=False, newdraw=True, usecoords=True)
# rdkmol.draw(filename="sdg/%d_rdk_newdraw.png" % i, show=False, newdraw=True)
# cdkmol.draw(filename="sdg/%d_cdk_oasa.png" % i, show=False)
# cdkmol.draw(filename="sdg/%d_cdk_web.jpeg" % i, show=False, web=True)
cdkmol.draw(filename="sdg/%d_pubchem_cdk.png" % i, show=False, usecoords=True)
output_sdg += """
| %s |
|
|
|
|
|
""" % (title, i, i, i, i, i)
output_depict += """
| %s |
|
|
|
|
|
""" % (title, i, i, i, i, i)
end = "
"
output_sdg += end
output_depict += end
print >> open("sdg.html", "w"), output_sdg
print >> open("depict.html", "w"), output_depict