1 package uk.ac.ebi.intenz.tools.export;
2
3 import java.io.File;
4 import java.io.FileOutputStream;
5 import java.io.IOException;
6 import java.io.OutputStream;
7 import java.lang.reflect.InvocationTargetException;
8 import java.sql.Connection;
9 import java.sql.SQLException;
10 import java.text.SimpleDateFormat;
11 import java.util.*;
12
13 import javax.xml.bind.JAXBException;
14
15 import org.apache.commons.cli.*;
16 import org.apache.log4j.Logger;
17 import org.xml.sax.SAXException;
18
19 import uk.ac.ebi.biobabel.util.db.OracleDatabaseInstance;
20 import uk.ac.ebi.intenz.biopax.level2.Biopax;
21 import uk.ac.ebi.intenz.domain.constants.Status;
22 import uk.ac.ebi.intenz.domain.enzyme.*;
23 import uk.ac.ebi.intenz.domain.enzyme.EnzymeCommissionNumber.Type;
24 import uk.ac.ebi.intenz.domain.exceptions.DomainException;
25 import uk.ac.ebi.intenz.mapper.EnzymeClassMapper;
26 import uk.ac.ebi.intenz.mapper.EnzymeEntryMapper;
27 import uk.ac.ebi.intenz.mapper.EnzymeSubSubclassMapper;
28 import uk.ac.ebi.intenz.mapper.EnzymeSubclassMapper;
29 import uk.ac.ebi.intenz.stats.IIntEnzStatistics;
30 import uk.ac.ebi.intenz.stats.db.IntEnzDbStatistics;
31 import uk.ac.ebi.rhea.mapper.MapperException;
32
33 public class ExporterApp {
34
35 private enum Format {
36 INTENZ_XML("intenzXml"),
37 SITEMAP("sitemap"),
38 BIOPAX("biopax"),
39 KEGG_ENZYME("keggEnzyme");
40 private String cliOption;
41 private Format(String cliOption){
42 this.cliOption = cliOption;
43 }
44 }
45
46 public static final Logger LOGGER = Logger.getLogger(ExporterApp.class);
47
48 private Properties spotlights;
49
50 private Connection intenzConnection;
51
52 private IIntEnzStatistics stats;
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89 @SuppressWarnings({ "static-access" })
90 public static void main(String[] args)
91 throws ClassNotFoundException, SQLException, MapperException, IOException, DomainException {
92 Options options = new Options();
93 options.addOption(OptionBuilder.isRequired()
94 .hasArg().withArgName("config")
95 .withDescription("IntEnz database configuration")
96 .create("intenzDb"));
97 options.addOption(OptionBuilder
98 .hasArg().withArgName("file name")
99 .withDescription("[optional] Export IntEnz as BioPAX")
100 .create(Format.BIOPAX.cliOption));
101 options.addOption(OptionBuilder
102 .hasArg().withArgName("dir name")
103 .withDescription("[optional] Export IntEnz as IntEnzXML")
104 .create(Format.INTENZ_XML.cliOption));
105 options.addOption(OptionBuilder
106 .hasArg().withArgName("file name")
107 .withDescription("[optional] Export IntEnz as KEGG enzyme")
108 .create(Format.KEGG_ENZYME.cliOption));
109 options.addOption(OptionBuilder
110 .hasArg().withArgName("file name")
111 .withDescription("[optional] Export IntEnz as sitemap")
112 .create(Format.SITEMAP.cliOption));
113 options.addOption(OptionBuilder
114 .hasArg().withArgName("EC number")
115 .withDescription("[optional] Export only one entry")
116 .create("ec"));
117 CommandLine cl = null;
118 try {
119 cl = new GnuParser().parse(options, args);
120 } catch (ParseException e){
121 new HelpFormatter().printHelp(ExporterApp.class.getName(), options);
122 return;
123 }
124
125 ExporterApp app = new ExporterApp(cl.getOptionValue("intenzDb"));
126 List<EnzymeEntry> enzymes =
127 app.getEnzymeList(cl.getOptionValue("ec"));
128 Map<String, Object> descriptions =
129 ExporterApp.getDescriptions(app.intenzConnection);
130 for (EnzymeEntry enzyme : enzymes) {
131 String classEc = String.valueOf(enzyme.getEc().getEc1());
132 String subclassEc = classEc + "." + String.valueOf(enzyme.getEc().getEc2());
133 String subSubclassEc = subclassEc + "." + String.valueOf(enzyme.getEc().getEc3());
134 enzyme.setClassName(((EnzymeClass) descriptions.get(classEc)).getName());
135 enzyme.setSubclassName(((EnzymeSubclass) descriptions.get(subclassEc)).getName());
136 enzyme.setSubSubclassName(((EnzymeSubSubclass) descriptions.get(subSubclassEc)).getName());
137 }
138 LOGGER.info("Intenz exporter - Release " + app.stats.getReleaseNumber());
139 if (cl.hasOption(Format.INTENZ_XML.cliOption)){
140 try {
141 String xmlDir = cl.getOptionValue(Format.INTENZ_XML.cliOption);
142 app.exportXML(enzymes, descriptions, xmlDir);
143 } catch (Exception e) {
144 LOGGER.error(e.getMessage(), e);
145 }
146 }
147 if (cl.hasOption(Format.SITEMAP.cliOption)){
148 try {
149 String sitemapFile = cl.getOptionValue(Format.SITEMAP.cliOption);
150 app.exportSitemap(enzymes, descriptions, sitemapFile);
151 } catch (Exception e) {
152 LOGGER.error(e.getMessage(), e);
153 }
154 }
155 if (cl.hasOption(Format.BIOPAX.cliOption)){
156 try {
157 String biopaxFile = cl.getOptionValue(Format.BIOPAX.cliOption);
158 app.exportBiopax(enzymes, biopaxFile);
159 } catch (Exception e) {
160 LOGGER.error(e.getMessage(), e);
161 }
162 }
163 if (cl.hasOption(Format.KEGG_ENZYME.cliOption)){
164 try {
165 String keggFile = cl.getOptionValue(Format.KEGG_ENZYME.cliOption);
166 app.exportKegg(enzymes, keggFile);
167 } catch (Exception e) {
168 LOGGER.error(e.getMessage(), e);
169 }
170 }
171 }
172
173 protected ExporterApp(String dbConfig)
174 throws SQLException, IOException, DomainException {
175 intenzConnection = OracleDatabaseInstance.getInstance(dbConfig)
176 .getConnection();
177 stats = new IntEnzDbStatistics(intenzConnection);
178 }
179
180 @Override
181 protected void finalize() throws Throwable {
182 if (intenzConnection != null) intenzConnection.close();
183 }
184
185
186
187
188
189
190
191
192 protected List<EnzymeEntry> getEnzymeList(String ecString)
193 throws SQLException, MapperException, DomainException{
194 List<EnzymeEntry> enzymeList = null;
195 EnzymeEntryMapper mapper = new EnzymeEntryMapper();
196 if (ecString != null){
197 EnzymeCommissionNumber ec = EnzymeCommissionNumber.valueOf(ecString);
198 Status status = ec.getType().equals(Type.PRELIMINARY)?
199 Status.PRELIMINARY : Status.APPROVED;
200 enzymeList = Collections.singletonList(
201 mapper.findByEc(ec.getEc1(), ec.getEc2(), ec.getEc3(),
202 ec.getEc4(), status, intenzConnection));
203 } else {
204 LOGGER.info("Retrieving IntEnz entries");
205 enzymeList = mapper.exportAllEntries(intenzConnection);
206 LOGGER.info("Retrieved IntEnz entries");
207 }
208 return enzymeList;
209 }
210
211
212
213
214
215
216
217
218
219
220
221
222 public static Map<String, Object> getDescriptions(Connection con)
223 throws SQLException, DomainException{
224 LOGGER.info("Retrieving IntEnz descriptions");
225 Map<String, Object> descriptions = new HashMap<String, Object>();
226 EnzymeClassMapper classMapper = new EnzymeClassMapper();
227 EnzymeSubclassMapper subclassMapper = new EnzymeSubclassMapper();
228 EnzymeSubSubclassMapper subsubclassMapper = new EnzymeSubSubclassMapper();
229 for (Object o : classMapper.findAll(con)) {
230 EnzymeClass enzymeClass = (EnzymeClass) o;
231 descriptions.put(enzymeClass.getEc().toString(), enzymeClass);
232 }
233 for (Object o : subclassMapper.findAll(con)) {
234 EnzymeSubclass enzymeSubclass = (EnzymeSubclass) o;
235 descriptions.put(enzymeSubclass.getEc().toString(), enzymeSubclass);
236 }
237 for (Object o : subsubclassMapper.findAll(con)) {
238 EnzymeSubSubclass enzymeSubsubclass = (EnzymeSubSubclass) o;
239 descriptions.put(enzymeSubsubclass.getEc().toString(), enzymeSubsubclass);
240 }
241 LOGGER.info("Retrieved IntEnz descriptions");
242 return Collections.unmodifiableMap(descriptions);
243 }
244
245
246
247
248
249
250
251
252 protected void exportXML(Collection<EnzymeEntry> enzymeList,
253 Map<String, Object> descriptions, String toDir) throws Exception {
254 OutputStream os = null;
255 checkWritable(toDir);
256 LOGGER.info("Intenz exporter - Release " + stats.getReleaseNumber());
257 LOGGER.info("Outputting XML to " + toDir);
258 XmlExporter exporter = new XmlExporter();
259 exporter.setDescriptions(descriptions);
260 exporter.setReleaseDate(new SimpleDateFormat("yyyy-MM-dd")
261 .format(stats.getReleaseDate()));
262 exporter.setReleaseNumber(stats.getReleaseNumber());
263 for (XmlExporter.Flavour flavour : XmlExporter.Flavour.values()){
264 exporter.setFlavour(flavour);
265 File flavourDir = new File(toDir, flavour.toString());
266 flavourDir.mkdir();
267 LOGGER.info("Single-entry XML start");
268 List<EnzymeEntry> validEntriesList = new ArrayList<EnzymeEntry>();
269
270 for (EnzymeEntry entry : enzymeList) {
271 String classEc = "EC_" + String.valueOf(entry.getEc().getEc1());
272 String subclassEc = classEc + "." + String.valueOf(entry.getEc().getEc2());
273 String subsubclassEc = subclassEc + "." + String.valueOf(entry.getEc().getEc3());
274 String dirTree = classEc + "/" + subclassEc + "/" + subsubclassEc;
275 File subsubclassDir = new File(flavourDir, dirTree);
276 subsubclassDir.mkdirs();
277 File outputFile = new File(subsubclassDir, "EC_" + entry.getEc().toString() + ".xml");
278 try {
279 os = new FileOutputStream(outputFile);
280 exporter.export(entry, os);
281 validEntriesList.add(entry);
282 } catch (Exception e) {
283
284 LOGGER.warn(entry.getEc().toString(), e);
285 } finally {
286 if (os != null) os.close();
287 }
288 }
289 LOGGER.info("Single-entry XML end");
290
291 File treeFile = new File(flavourDir, "intenz.xml");
292 try {
293 os = new FileOutputStream(treeFile);
294 LOGGER.info("Whole tree XML start");
295 exporter.export(validEntriesList, os);
296 LOGGER.info("Whole tree XML end");
297 } catch (Exception e) {
298 LOGGER.error("Whole tree dump", e);
299 } finally {
300 if (os != null) os.close();
301 }
302 }
303 }
304
305 protected void exportSitemap(Collection<EnzymeEntry> enzymeList,
306 Map<String, Object> descriptions, String sitemapFile)
307 throws IOException, JAXBException, SAXException{
308 final String queryUrl = "http://www.ebi.ac.uk/intenz/query?cmd=SearchEC&q=";
309 final String spotlightUrl = "http://www.ebi.ac.uk/intenz/spotlight.jsp?ec=";
310 File sitemap = new File(sitemapFile);
311 checkWritable(sitemap.getParent());
312 if (!sitemap.exists()) sitemap.createNewFile();
313 OutputStream os = null;
314
315 Collection<String> urls = new ArrayList<String>();
316
317 for (EnzymeEntry entry : enzymeList) {
318 StringBuffer sb = new StringBuffer(queryUrl);
319 String ec = entry.getEc().toString();
320 sb.append(ec);
321 urls.add(sb.toString());
322 }
323
324 spotlights = new Properties();
325 spotlights.load(ExporterApp.class.getClassLoader()
326 .getResourceAsStream("spotlights.properties"));
327 for (Object ec : spotlights.keySet()){
328 StringBuffer spotSb = new StringBuffer(spotlightUrl);
329 spotSb.append((String) ec);
330 urls.add(spotSb.toString());
331 }
332
333 for (String ec : descriptions.keySet()){
334 StringBuffer sb = new StringBuffer(queryUrl);
335 sb.append(ec);
336 urls.add(sb.toString());
337 }
338
339 try {
340 os = new FileOutputStream(sitemap);
341 SitemapExporter exporter = new SitemapExporter();
342 exporter.export(urls, os);
343 } finally {
344 if (os != null) os.close();
345 }
346 }
347
348 protected void exportBiopax(Collection<EnzymeEntry> enzymeList, String biopaxFile)
349 throws IOException, IllegalAccessException, InvocationTargetException{
350 OutputStream os = null;
351 LOGGER.info("Outputting BioPAX to " + biopaxFile);
352 try {
353 File owlFile = new File(biopaxFile);
354 checkWritable(owlFile.getParent());
355 if (!owlFile.exists()) owlFile.createNewFile();
356 os = new FileOutputStream(owlFile);
357 Biopax.write(enzymeList, os);
358 } finally {
359 if (os != null) os.close();
360 }
361 }
362
363 protected void exportKegg(Collection<EnzymeEntry> enzymes, String keggFile)
364 throws Exception {
365 OutputStream os = null;
366 try {
367 File keggEnzymeFile = new File(keggFile);
368 checkWritable(keggEnzymeFile.getParent());
369 if (!keggEnzymeFile.exists()) keggEnzymeFile.createNewFile();
370 os = new FileOutputStream(keggEnzymeFile);
371 KeggExporter exporter = new KeggExporter();
372 exporter.export(enzymes, os);
373 } finally {
374 if (os != null) os.close();
375 }
376
377 }
378
379 private void checkWritable(String toDir) throws IOException{
380 File outputDir = new File(toDir);
381 if (outputDir.exists()){
382 if (!outputDir.canWrite()){
383 String msg = "Cannot write to " + toDir;
384 LOGGER.error(msg);
385 throw new IOException();
386 }
387 } else if (!outputDir.mkdirs()){
388 String msg = "Cannot create output directory " + toDir;
389 LOGGER.error(msg);
390 throw new IOException(msg);
391 }
392 }
393
394 }