1 package uk.ac.ebi.intenz.tools.export;
2
3 import java.io.File;
4 import java.io.FileOutputStream;
5 import java.io.IOException;
6 import java.io.OutputStream;
7 import java.lang.reflect.InvocationTargetException;
8 import java.sql.Connection;
9 import java.sql.SQLException;
10 import java.text.SimpleDateFormat;
11 import java.util.ArrayList;
12 import java.util.Collection;
13 import java.util.Collections;
14 import java.util.HashMap;
15 import java.util.List;
16 import java.util.Map;
17 import java.util.Properties;
18
19 import javax.xml.bind.JAXBException;
20 import javax.xml.bind.MarshalException;
21
22 import org.apache.commons.cli.CommandLine;
23 import org.apache.commons.cli.GnuParser;
24 import org.apache.commons.cli.HelpFormatter;
25 import org.apache.commons.cli.OptionBuilder;
26 import org.apache.commons.cli.Options;
27 import org.apache.commons.cli.ParseException;
28 import org.apache.log4j.Logger;
29 import org.xml.sax.SAXException;
30
31 import uk.ac.ebi.biobabel.util.db.OracleDatabaseInstance;
32 import uk.ac.ebi.intenz.biopax.level2.Biopax;
33 import uk.ac.ebi.intenz.domain.constants.Status;
34 import uk.ac.ebi.intenz.domain.enzyme.EnzymeClass;
35 import uk.ac.ebi.intenz.domain.enzyme.EnzymeCommissionNumber;
36 import uk.ac.ebi.intenz.domain.enzyme.EnzymeCommissionNumber.Type;
37 import uk.ac.ebi.intenz.domain.enzyme.EnzymeEntry;
38 import uk.ac.ebi.intenz.domain.enzyme.EnzymeSubSubclass;
39 import uk.ac.ebi.intenz.domain.enzyme.EnzymeSubclass;
40 import uk.ac.ebi.intenz.domain.exceptions.DomainException;
41 import uk.ac.ebi.intenz.mapper.EnzymeClassMapper;
42 import uk.ac.ebi.intenz.mapper.EnzymeEntryMapper;
43 import uk.ac.ebi.intenz.mapper.EnzymeSubSubclassMapper;
44 import uk.ac.ebi.intenz.mapper.EnzymeSubclassMapper;
45 import uk.ac.ebi.intenz.stats.IIntEnzStatistics;
46 import uk.ac.ebi.intenz.stats.db.IntEnzDbStatistics;
47 import uk.ac.ebi.rhea.mapper.MapperException;
48
49 public class ExporterApp {
50
51 private enum Format {
52 INTENZ_XML("intenzXml"),
53 SITEMAP("sitemap"),
54 BIOPAX("biopax"),
55 KEGG_ENZYME("keggEnzyme");
56 private String cliOption;
57 private Format(String cliOption){
58 this.cliOption = cliOption;
59 }
60 }
61
62 public static final Logger LOGGER = Logger.getLogger(ExporterApp.class);
63
64 private Properties spotlights;
65
66 private Connection intenzConnection;
67
68 private IIntEnzStatistics stats;
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105 @SuppressWarnings({ "static-access" })
106 public static void main(String[] args)
107 throws ClassNotFoundException, SQLException, MapperException, IOException, DomainException {
108 Options options = new Options();
109 options.addOption(OptionBuilder.isRequired()
110 .hasArg().withArgName("config")
111 .withDescription("IntEnz database configuration")
112 .create("intenzDb"));
113 options.addOption(OptionBuilder
114 .hasArg().withArgName("file name")
115 .withDescription("[optional] Export IntEnz as BioPAX")
116 .create(Format.BIOPAX.cliOption));
117 options.addOption(OptionBuilder
118 .hasArg().withArgName("dir name")
119 .withDescription("[optional] Export IntEnz as IntEnzXML")
120 .create(Format.INTENZ_XML.cliOption));
121 options.addOption(OptionBuilder
122 .hasArg().withArgName("file name")
123 .withDescription("[optional] Export IntEnz as KEGG enzyme")
124 .create(Format.KEGG_ENZYME.cliOption));
125 options.addOption(OptionBuilder
126 .hasArg().withArgName("file name")
127 .withDescription("[optional] Export IntEnz as sitemap")
128 .create(Format.SITEMAP.cliOption));
129 options.addOption(OptionBuilder
130 .hasArg().withArgName("EC number")
131 .withDescription("[optional] Export only one entry")
132 .create("ec"));
133 CommandLine cl = null;
134 try {
135 cl = new GnuParser().parse(options, args);
136 } catch (ParseException e){
137 new HelpFormatter().printHelp(ExporterApp.class.getName(), options);
138 return;
139 }
140
141 ExporterApp app = new ExporterApp(cl.getOptionValue("intenzDb"));
142 Collection<EnzymeEntry> enzymes =
143 app.getEnzymeList(cl.getOptionValue("ec"));
144 Map<String, Object> descriptions =
145 ExporterApp.getDescriptions(app.intenzConnection);
146 for (EnzymeEntry enzyme : enzymes) {
147 String classEc = String.valueOf(enzyme.getEc().getEc1());
148 String subclassEc = classEc + "." + String.valueOf(enzyme.getEc().getEc2());
149 String subSubclassEc = subclassEc + "." + String.valueOf(enzyme.getEc().getEc3());
150 enzyme.setClassName(((EnzymeClass) descriptions.get(classEc)).getName());
151 enzyme.setSubclassName(((EnzymeSubclass) descriptions.get(subclassEc)).getName());
152 enzyme.setSubSubclassName(((EnzymeSubSubclass) descriptions.get(subSubclassEc)).getName());
153 }
154 LOGGER.info("Intenz exporter - Release " + app.stats.getReleaseNumber());
155 if (cl.hasOption(Format.INTENZ_XML.cliOption)){
156 try {
157 String xmlDir = cl.getOptionValue(Format.INTENZ_XML.cliOption);
158 app.exportXML(enzymes, descriptions, xmlDir);
159 } catch (Exception e) {
160 LOGGER.error(e.getMessage(), e);
161 }
162 }
163 if (cl.hasOption(Format.SITEMAP.cliOption)){
164 try {
165 String sitemapFile = cl.getOptionValue(Format.SITEMAP.cliOption);
166 app.exportSitemap(enzymes, descriptions, sitemapFile);
167 } catch (Exception e) {
168 LOGGER.error(e.getMessage(), e);
169 }
170 }
171 if (cl.hasOption(Format.BIOPAX.cliOption)){
172 try {
173 String biopaxFile = cl.getOptionValue(Format.BIOPAX.cliOption);
174 app.exportBiopax(enzymes, biopaxFile);
175 } catch (Exception e) {
176 LOGGER.error(e.getMessage(), e);
177 }
178 }
179 if (cl.hasOption(Format.KEGG_ENZYME.cliOption)){
180 try {
181 String keggFile = cl.getOptionValue(Format.KEGG_ENZYME.cliOption);
182 app.exportKegg(enzymes, keggFile);
183 } catch (Exception e) {
184 LOGGER.error(e.getMessage(), e);
185 }
186 }
187 }
188
189 protected ExporterApp(String dbConfig)
190 throws SQLException, IOException, DomainException {
191 intenzConnection = OracleDatabaseInstance.getInstance(dbConfig)
192 .getConnection();
193 stats = new IntEnzDbStatistics(intenzConnection);
194 }
195
196 @Override
197 protected void finalize() throws Throwable {
198 if (intenzConnection != null) intenzConnection.close();
199 }
200
201
202
203
204
205
206
207
208
209 protected Collection<EnzymeEntry> getEnzymeList(String ecString)
210 throws SQLException, MapperException, DomainException{
211 Collection<EnzymeEntry> enzymeList = null;
212 EnzymeEntryMapper mapper = new EnzymeEntryMapper();
213 if (ecString != null){
214 EnzymeCommissionNumber ec = EnzymeCommissionNumber.valueOf(ecString);
215 Status status = ec.getType().equals(Type.PRELIMINARY)?
216 Status.PRELIMINARY : Status.APPROVED;
217 enzymeList = Collections.singletonList(
218 mapper.findByEc(ec.getEc1(), ec.getEc2(), ec.getEc3(),
219 ec.getEc4(), status, intenzConnection));
220 } else {
221 LOGGER.info("Retrieving IntEnz entries");
222 enzymeList = mapper.exportAllEntries(intenzConnection);
223 LOGGER.info("Retrieved IntEnz entries");
224 }
225 return enzymeList;
226 }
227
228
229
230
231
232
233
234
235
236
237
238
239 public static Map<String, Object> getDescriptions(Connection con)
240 throws SQLException, DomainException{
241 LOGGER.info("Retrieving IntEnz descriptions");
242 Map<String, Object> descriptions = new HashMap<String, Object>();
243 EnzymeClassMapper classMapper = new EnzymeClassMapper();
244 EnzymeSubclassMapper subclassMapper = new EnzymeSubclassMapper();
245 EnzymeSubSubclassMapper subsubclassMapper = new EnzymeSubSubclassMapper();
246 for (Object o : classMapper.findAll(con)) {
247 EnzymeClass enzymeClass = (EnzymeClass) o;
248 descriptions.put(enzymeClass.getEc().toString(), enzymeClass);
249 }
250 for (Object o : subclassMapper.findAll(con)) {
251 EnzymeSubclass enzymeSubclass = (EnzymeSubclass) o;
252 descriptions.put(enzymeSubclass.getEc().toString(), enzymeSubclass);
253 }
254 for (Object o : subsubclassMapper.findAll(con)) {
255 EnzymeSubSubclass enzymeSubsubclass = (EnzymeSubSubclass) o;
256 descriptions.put(enzymeSubsubclass.getEc().toString(), enzymeSubsubclass);
257 }
258 LOGGER.info("Retrieved IntEnz descriptions");
259 return Collections.unmodifiableMap(descriptions);
260 }
261
262
263
264
265
266
267
268
269 protected void exportXML(Collection<EnzymeEntry> enzymeList,
270 Map<String, Object> descriptions, String toDir) throws Exception {
271 OutputStream os = null;
272 checkWritable(toDir);
273 String releaseDate = new SimpleDateFormat("yyyy-MM-dd")
274 .format(stats.getReleaseDate());
275 LOGGER.info("Intenz exporter - Release " + stats.getReleaseNumber());
276 LOGGER.info("Outputting XML to " + toDir);
277 XmlExporter exporter = new XmlExporter();
278 exporter.setDescriptions(descriptions);
279 for (XmlExporter.Flavour flavour : XmlExporter.Flavour.values()){
280 exporter.setFlavour(flavour);
281 File flavourDir = new File(toDir, flavour.toString());
282 flavourDir.mkdir();
283 LOGGER.info("Single-entry XML start");
284 List<EnzymeEntry> validEntriesList = new ArrayList<EnzymeEntry>();
285
286 for (EnzymeEntry entry : enzymeList) {
287 String classEc = "EC_" + String.valueOf(entry.getEc().getEc1());
288 String subclassEc = classEc + "." + String.valueOf(entry.getEc().getEc2());
289 String subsubclassEc = subclassEc + "." + String.valueOf(entry.getEc().getEc3());
290 String dirTree = classEc + "/" + subclassEc + "/" + subsubclassEc;
291 File subsubclassDir = new File(flavourDir, dirTree);
292 subsubclassDir.mkdirs();
293 File outputFile = new File(subsubclassDir, "EC_" + entry.getEc().toString() + ".xml");
294 try {
295 os = new FileOutputStream(outputFile);
296 exporter.export(entry,
297 String.valueOf(stats.getReleaseNumber()),
298 releaseDate, os);
299 validEntriesList.add(entry);
300 } catch (MarshalException e) {
301 LOGGER.warn(entry.getEc().toString(), e);
302 } finally {
303 if (os != null) os.close();
304 }
305 }
306 LOGGER.info("Single-entry XML end");
307
308 File treeFile = new File(flavourDir, "intenz.xml");
309 try {
310 os = new FileOutputStream(treeFile);
311 LOGGER.info("Whole tree XML start");
312 exporter.export(validEntriesList,
313 String.valueOf(stats.getReleaseNumber()),
314 releaseDate, os);
315 LOGGER.info("Whole tree XML end");
316 } catch (Exception e) {
317 LOGGER.error("Whole tree dump", e);
318 } finally {
319 if (os != null) os.close();
320 }
321 }
322 }
323
324 protected void exportSitemap(Collection<EnzymeEntry> enzymeList,
325 Map<String, Object> descriptions, String sitemapFile)
326 throws IOException, JAXBException, SAXException{
327 final String queryUrl = "http://www.ebi.ac.uk/intenz/query?cmd=SearchEC&q=";
328 final String spotlightUrl = "http://www.ebi.ac.uk/intenz/spotlight.jsp?ec=";
329 File sitemap = new File(sitemapFile);
330 checkWritable(sitemap.getParent());
331 if (!sitemap.exists()) sitemap.createNewFile();
332 OutputStream os = null;
333
334 Collection<String> urls = new ArrayList<String>();
335
336 for (EnzymeEntry entry : enzymeList) {
337 StringBuffer sb = new StringBuffer(queryUrl);
338 String ec = entry.getEc().toString();
339 sb.append(ec);
340 urls.add(sb.toString());
341 }
342
343 spotlights = new Properties();
344 spotlights.load(ExporterApp.class.getClassLoader()
345 .getResourceAsStream("spotlights.properties"));
346 for (Object ec : spotlights.keySet()){
347 StringBuffer spotSb = new StringBuffer(spotlightUrl);
348 spotSb.append((String) ec);
349 urls.add(spotSb.toString());
350 }
351
352 for (String ec : descriptions.keySet()){
353 StringBuffer sb = new StringBuffer(queryUrl);
354 sb.append(ec);
355 urls.add(sb.toString());
356 }
357
358 try {
359 os = new FileOutputStream(sitemap);
360 SitemapExporter exporter = new SitemapExporter();
361 exporter.export(urls, os);
362 } finally {
363 if (os != null) os.close();
364 }
365 }
366
367 protected void exportBiopax(Collection<EnzymeEntry> enzymeList, String biopaxFile)
368 throws IOException, IllegalAccessException, InvocationTargetException{
369 OutputStream os = null;
370 LOGGER.info("Outputting BioPAX to " + biopaxFile);
371 try {
372 File owlFile = new File(biopaxFile);
373 checkWritable(owlFile.getParent());
374 if (!owlFile.exists()) owlFile.createNewFile();
375 os = new FileOutputStream(owlFile);
376 Biopax.write(enzymeList, os);
377 } finally {
378 if (os != null) os.close();
379 }
380 }
381
382 protected void exportKegg(Collection<EnzymeEntry> enzymes, String keggFile)
383 throws Exception {
384 OutputStream os = null;
385 try {
386 File keggEnzymeFile = new File(keggFile);
387 checkWritable(keggEnzymeFile.getParent());
388 if (!keggEnzymeFile.exists()) keggEnzymeFile.createNewFile();
389 os = new FileOutputStream(keggEnzymeFile);
390 KeggExporter exporter = new KeggExporter();
391 exporter.export(enzymes, os);
392 } finally {
393 if (os != null) os.close();
394 }
395
396 }
397
398 private void checkWritable(String toDir) throws IOException{
399 File outputDir = new File(toDir);
400 if (outputDir.exists()){
401 if (!outputDir.canWrite()){
402 String msg = "Cannot write to " + toDir;
403 LOGGER.error(msg);
404 throw new IOException();
405 }
406 } else if (!outputDir.mkdirs()){
407 String msg = "Cannot create output directory " + toDir;
408 LOGGER.error(msg);
409 throw new IOException(msg);
410 }
411 }
412
413 }