View Javadoc
1   package uk.ac.ebi.intenz.tools.export;
2   
3   import java.io.File;
4   import java.io.FileOutputStream;
5   import java.io.IOException;
6   import java.io.OutputStream;
7   import java.lang.reflect.InvocationTargetException;
8   import java.sql.Connection;
9   import java.sql.SQLException;
10  import java.text.SimpleDateFormat;
11  import java.util.ArrayList;
12  import java.util.Collection;
13  import java.util.Collections;
14  import java.util.HashMap;
15  import java.util.List;
16  import java.util.Map;
17  import java.util.Properties;
18  
19  import javax.xml.bind.JAXBException;
20  import javax.xml.bind.MarshalException;
21  
22  import org.apache.commons.cli.CommandLine;
23  import org.apache.commons.cli.GnuParser;
24  import org.apache.commons.cli.HelpFormatter;
25  import org.apache.commons.cli.OptionBuilder;
26  import org.apache.commons.cli.Options;
27  import org.apache.commons.cli.ParseException;
28  import org.apache.log4j.Logger;
29  import org.xml.sax.SAXException;
30  
31  import uk.ac.ebi.biobabel.util.db.OracleDatabaseInstance;
32  import uk.ac.ebi.intenz.biopax.level2.Biopax;
33  import uk.ac.ebi.intenz.domain.constants.Status;
34  import uk.ac.ebi.intenz.domain.enzyme.EnzymeClass;
35  import uk.ac.ebi.intenz.domain.enzyme.EnzymeCommissionNumber;
36  import uk.ac.ebi.intenz.domain.enzyme.EnzymeCommissionNumber.Type;
37  import uk.ac.ebi.intenz.domain.enzyme.EnzymeEntry;
38  import uk.ac.ebi.intenz.domain.enzyme.EnzymeSubSubclass;
39  import uk.ac.ebi.intenz.domain.enzyme.EnzymeSubclass;
40  import uk.ac.ebi.intenz.domain.exceptions.DomainException;
41  import uk.ac.ebi.intenz.mapper.EnzymeClassMapper;
42  import uk.ac.ebi.intenz.mapper.EnzymeEntryMapper;
43  import uk.ac.ebi.intenz.mapper.EnzymeSubSubclassMapper;
44  import uk.ac.ebi.intenz.mapper.EnzymeSubclassMapper;
45  import uk.ac.ebi.intenz.stats.IIntEnzStatistics;
46  import uk.ac.ebi.intenz.stats.db.IntEnzDbStatistics;
47  import uk.ac.ebi.rhea.mapper.MapperException;
48  
49  public class ExporterApp {
50  
51      private enum Format {
52      	INTENZ_XML("intenzXml"),
53      	SITEMAP("sitemap"),
54      	BIOPAX("biopax"),
55      	KEGG_ENZYME("keggEnzyme");
56      	private String cliOption;
57      	private Format(String cliOption){
58      		this.cliOption = cliOption;
59      	}
60  	}
61      
62      public static final Logger LOGGER = Logger.getLogger(ExporterApp.class);
63      
64  	private Properties spotlights;
65  
66  	private Connection intenzConnection;
67      // Object to retrieve release number and date from:
68      private IIntEnzStatistics stats;
69  
70      /**
71       * Exports IntEnz data in the following formats:
72       * <ul>
73       * 	   <li>XML (both <i>flavours</i> ASCII and XCHARS), using the
74       * 			{@link uk.ac.ebi.intenz.tools.export.XmlExporter XmlExporter}
75       *          class.</li>
76       * 	   <li>Site map XML file (<code>sitemap.xml</code>) to be used in
77       * 		   	{@link http://www.google.com/webmasters/sitemaps Google sitemaps}
78       * 			to make every IntEnz entry available to Google indexing.
79       * 			Other search engines accept this standard too.</li>
80       * 	   <li><a href="http://www.biopax.org">BioPAX</a>, using the biopax
81       *          module.</li>
82       * 	   <li><a href="ftp://ftp.genome.jp/pub/kegg/ligand/ligand.txt">KEGG
83       * 			enzyme</a>.</li>
84       * </ul>
85       * @param args
86       * <ul>
87       * 	<li>-intenzDb &lt;config&gt;: database configuration file for IntEnz.</li>
88       *	<li>[-intenzXml &lt;output dir&gt;]: export as <a
89       * 		href="http://intenz.sourceforge.net/intenz-xml/index.html">IntEnzXML</a>
90       * 		.</li>
91       * 	<li>[-biopax &lt;file name&gt;]: export as one <a
92       * 		href="http://www.biopax.org">BioPAX</a> OWL file.</li>
93       * 	<li>[-sitemap &lt;file name&gt;]: export as a
94  	 * 		<a href="http://www.sitemaps.org">Sitemap</a> XML file.</li>
95       * 	<li>[-keggEnzyme &lt;file name&gt;]: export as a KEGG enzyme file.</li>
96       * 	<li>[-ec &lt;EC number&gt;]: export only the passed EC number.
97       * 				if not set, all of the public entries are exported.</li>
98       * </ul>
99       * @throws DomainException
100      * @throws IOException
101      * @throws SQLException
102      * @throws MapperException
103      * @throws ClassNotFoundException
104      */
105     @SuppressWarnings({ "static-access" })
106     public static void main(String[] args)
107     throws ClassNotFoundException, SQLException, MapperException, IOException, DomainException {
108 		Options options = new Options();
109 		options.addOption(OptionBuilder.isRequired()
110 				.hasArg().withArgName("config")
111 				.withDescription("IntEnz database configuration")
112 				.create("intenzDb"));
113 		options.addOption(OptionBuilder
114 				.hasArg().withArgName("file name")
115 				.withDescription("[optional] Export IntEnz as BioPAX")
116 				.create(Format.BIOPAX.cliOption));
117 		options.addOption(OptionBuilder
118 				.hasArg().withArgName("dir name")
119 				.withDescription("[optional] Export IntEnz as IntEnzXML")
120 				.create(Format.INTENZ_XML.cliOption));
121 		options.addOption(OptionBuilder
122 				.hasArg().withArgName("file name")
123 				.withDescription("[optional] Export IntEnz as KEGG enzyme")
124 				.create(Format.KEGG_ENZYME.cliOption));
125 		options.addOption(OptionBuilder
126 				.hasArg().withArgName("file name")
127 				.withDescription("[optional] Export IntEnz as sitemap")
128 				.create(Format.SITEMAP.cliOption));
129 		options.addOption(OptionBuilder
130 				.hasArg().withArgName("EC number")
131 				.withDescription("[optional] Export only one entry")
132 				.create("ec"));
133 		CommandLine cl = null;
134 		try {
135 			cl = new GnuParser().parse(options, args);
136 		} catch (ParseException e){
137 			new HelpFormatter().printHelp(ExporterApp.class.getName(), options);
138 			return;
139 		}
140 		
141         ExporterApp app = new ExporterApp(cl.getOptionValue("intenzDb"));
142         Collection<EnzymeEntry> enzymes =
143         		app.getEnzymeList(cl.getOptionValue("ec"));
144         Map<String, Object> descriptions =
145         		ExporterApp.getDescriptions(app.intenzConnection);
146         for (EnzymeEntry enzyme : enzymes) {
147         	String classEc = String.valueOf(enzyme.getEc().getEc1());
148         	String subclassEc = classEc + "." + String.valueOf(enzyme.getEc().getEc2());
149         	String subSubclassEc = subclassEc + "." + String.valueOf(enzyme.getEc().getEc3());
150 			enzyme.setClassName(((EnzymeClass) descriptions.get(classEc)).getName());
151 			enzyme.setSubclassName(((EnzymeSubclass) descriptions.get(subclassEc)).getName());
152 			enzyme.setSubSubclassName(((EnzymeSubSubclass) descriptions.get(subSubclassEc)).getName());
153 		}
154         LOGGER.info("Intenz exporter - Release " + app.stats.getReleaseNumber());
155         if (cl.hasOption(Format.INTENZ_XML.cliOption)){
156             try {
157             	String xmlDir = cl.getOptionValue(Format.INTENZ_XML.cliOption);
158                 app.exportXML(enzymes, descriptions, xmlDir);
159             } catch (Exception e) {
160                 LOGGER.error(e.getMessage(), e);
161             }
162         }
163         if (cl.hasOption(Format.SITEMAP.cliOption)){
164             try {
165             	String sitemapFile = cl.getOptionValue(Format.SITEMAP.cliOption);
166                 app.exportSitemap(enzymes, descriptions, sitemapFile);
167             } catch (Exception e) {
168                 LOGGER.error(e.getMessage(), e);
169             }
170         }
171         if (cl.hasOption(Format.BIOPAX.cliOption)){
172             try {
173             	String biopaxFile = cl.getOptionValue(Format.BIOPAX.cliOption);
174                 app.exportBiopax(enzymes, biopaxFile);
175             } catch (Exception e) {
176                 LOGGER.error(e.getMessage(), e);
177             }
178         }
179         if (cl.hasOption(Format.KEGG_ENZYME.cliOption)){
180         	try {
181         		String keggFile = cl.getOptionValue(Format.KEGG_ENZYME.cliOption);
182         		app.exportKegg(enzymes, keggFile);
183             } catch (Exception e) {
184                 LOGGER.error(e.getMessage(), e);
185         	}
186         }
187     }
188 
189     protected ExporterApp(String dbConfig)
190     throws SQLException, IOException, DomainException {
191         intenzConnection = OracleDatabaseInstance.getInstance(dbConfig)
192         		.getConnection();
193         stats = new IntEnzDbStatistics(intenzConnection);
194     }
195 
196     @Override
197 	protected void finalize() throws Throwable {
198     	if (intenzConnection != null) intenzConnection.close();
199 	}
200 
201 	/**
202      * Gets the list of enzymes to be exported.
203      * @param con
204      * @param ec An EC number. If <code>null</code>, every exportable enzyme is
205      * 		included.
206      * @throws SQLException
207      * @throws DomainException
208      */
209     protected Collection<EnzymeEntry> getEnzymeList(String ecString)
210     throws SQLException, MapperException, DomainException{
211         Collection<EnzymeEntry> enzymeList = null;
212         EnzymeEntryMapper mapper = new EnzymeEntryMapper();
213 		if (ecString != null){
214         	EnzymeCommissionNumber ec = EnzymeCommissionNumber.valueOf(ecString);
215         	Status status = ec.getType().equals(Type.PRELIMINARY)?
216         			Status.PRELIMINARY : Status.APPROVED;
217     		enzymeList  = Collections.singletonList(
218     				mapper.findByEc(ec.getEc1(), ec.getEc2(), ec.getEc3(),
219     						ec.getEc4(), status, intenzConnection));
220         } else {
221             LOGGER.info("Retrieving IntEnz entries");
222         	enzymeList = mapper.exportAllEntries(intenzConnection);
223             LOGGER.info("Retrieved IntEnz entries");
224         }
225 		return enzymeList;
226     }
227 
228     /**
229      * Builds a map of EC numbers (as String) to <code>EnzymeClass</code>,
230      * <code>EnzymeSubClass</code> or <code>EnzymeSubSubClass</code> objects
231      * from which to retrieve names and descriptions.
232      * @param con a database connection.
233      * @return a map of EC numbers (as String) to <code>EnzymeClass</code>,
234      * 		<code>EnzymeSubClass</code> or <code>EnzymeSubSubClass</code>
235      * 		objects.
236      * @throws SQLException
237      * @throws DomainException
238      */
239     public static Map<String, Object> getDescriptions(Connection con)
240     throws SQLException, DomainException{
241         LOGGER.info("Retrieving IntEnz descriptions");
242         Map<String, Object> descriptions = new HashMap<String, Object>();
243         EnzymeClassMapper classMapper = new EnzymeClassMapper();
244         EnzymeSubclassMapper subclassMapper = new EnzymeSubclassMapper();
245         EnzymeSubSubclassMapper subsubclassMapper = new EnzymeSubSubclassMapper();
246         for (Object o : classMapper.findAll(con)) {
247             EnzymeClass enzymeClass = (EnzymeClass) o;
248             descriptions.put(enzymeClass.getEc().toString(), enzymeClass);
249         }
250         for (Object o : subclassMapper.findAll(con)) {
251             EnzymeSubclass enzymeSubclass = (EnzymeSubclass) o;
252             descriptions.put(enzymeSubclass.getEc().toString(), enzymeSubclass);
253         }
254         for (Object o : subsubclassMapper.findAll(con)) {
255             EnzymeSubSubclass enzymeSubsubclass = (EnzymeSubSubclass) o;
256             descriptions.put(enzymeSubsubclass.getEc().toString(), enzymeSubsubclass);
257         }
258         LOGGER.info("Retrieved IntEnz descriptions");
259         return Collections.unmodifiableMap(descriptions);
260     }
261 
262     /**
263      * Exports data in XML format.
264      * @param enzymeList 
265      * @param descriptions 
266      * @param toDir destination directory for XML files.
267      * @throws Exception
268      */
269     protected void exportXML(Collection<EnzymeEntry> enzymeList,
270     		Map<String, Object> descriptions, String toDir) throws Exception {
271         OutputStream os = null;
272         checkWritable(toDir);
273         String releaseDate = new SimpleDateFormat("yyyy-MM-dd")
274                 .format(stats.getReleaseDate());
275         LOGGER.info("Intenz exporter - Release " + stats.getReleaseNumber());
276         LOGGER.info("Outputting XML to " + toDir);
277             XmlExporter exporter = new XmlExporter();
278             exporter.setDescriptions(descriptions);
279             for (XmlExporter.Flavour flavour : XmlExporter.Flavour.values()){
280                 exporter.setFlavour(flavour);
281                 File flavourDir = new File(toDir, flavour.toString());
282                 flavourDir.mkdir();
283                 LOGGER.info("Single-entry XML start");
284                 List<EnzymeEntry> validEntriesList = new ArrayList<EnzymeEntry>();
285                 // Export single-entry files:
286                 for (EnzymeEntry entry : enzymeList) {
287                     String classEc = "EC_" + String.valueOf(entry.getEc().getEc1());
288                     String subclassEc = classEc + "." + String.valueOf(entry.getEc().getEc2());
289                     String subsubclassEc = subclassEc + "." + String.valueOf(entry.getEc().getEc3());
290                     String dirTree = classEc + "/" + subclassEc + "/" + subsubclassEc;
291                     File subsubclassDir =  new File(flavourDir, dirTree);
292                     subsubclassDir.mkdirs();
293                     File outputFile = new File(subsubclassDir, "EC_" + entry.getEc().toString() + ".xml");
294                     try {
295                         os = new FileOutputStream(outputFile);
296                         exporter.export(entry,
297                                 String.valueOf(stats.getReleaseNumber()),
298                                 releaseDate, os);
299                         validEntriesList.add(entry);
300                     } catch (MarshalException e) {
301                         LOGGER.warn(entry.getEc().toString(), e);
302                     } finally {
303                         if (os != null) os.close();
304                     }
305                 }
306                 LOGGER.info("Single-entry XML end");
307                 // Export whole tree (only valid entries):
308                 File treeFile = new File(flavourDir, "intenz.xml");
309                 try {
310                     os = new FileOutputStream(treeFile);
311                     LOGGER.info("Whole tree XML start");
312                     exporter.export(validEntriesList,
313                             String.valueOf(stats.getReleaseNumber()),
314                             releaseDate, os);
315                     LOGGER.info("Whole tree XML end");
316                 } catch (Exception e) {
317                     LOGGER.error("Whole tree dump", e);
318                 } finally {
319                     if (os != null) os.close();
320                 }
321             }
322     }
323 
324     protected void exportSitemap(Collection<EnzymeEntry> enzymeList,
325     		Map<String, Object> descriptions, String sitemapFile)
326     throws IOException, JAXBException, SAXException{
327     	final String queryUrl = "http://www.ebi.ac.uk/intenz/query?cmd=SearchEC&q=";
328     	final String spotlightUrl = "http://www.ebi.ac.uk/intenz/spotlight.jsp?ec=";
329     	File sitemap = new File(sitemapFile);
330         checkWritable(sitemap.getParent());
331     	if (!sitemap.exists()) sitemap.createNewFile();
332     	OutputStream os = null;
333     	// Build the list of URLs:
334     	Collection<String> urls = new ArrayList<String>();
335 		// Enzymes:
336     	for (EnzymeEntry entry : enzymeList) {
337     		StringBuffer sb = new StringBuffer(queryUrl);
338 			String ec = entry.getEc().toString();
339     		sb.append(ec);
340     		urls.add(sb.toString());
341 		}
342 		// Spotlights:
343 		spotlights = new Properties();
344         spotlights.load(ExporterApp.class.getClassLoader()
345         		.getResourceAsStream("spotlights.properties"));
346 		for (Object ec : spotlights.keySet()){
347 			StringBuffer spotSb = new StringBuffer(spotlightUrl);
348 			spotSb.append((String) ec);
349 			urls.add(spotSb.toString());
350 		}
351 		// Classes, subclasses and subsubclasses:
352 		for (String ec : descriptions.keySet()){
353 			StringBuffer sb = new StringBuffer(queryUrl);
354 			sb.append(ec);
355     		urls.add(sb.toString());
356 		}
357 		// Build the sitemap:
358     	try {
359         	os = new FileOutputStream(sitemap);
360 			SitemapExporter exporter = new SitemapExporter();
361 			exporter.export(urls, os);
362     	} finally {
363             if (os != null) os.close();
364     	}
365     }
366 
367     protected void exportBiopax(Collection<EnzymeEntry> enzymeList, String biopaxFile)
368     throws IOException, IllegalAccessException, InvocationTargetException{
369         OutputStream os = null;
370         LOGGER.info("Outputting BioPAX to " + biopaxFile);
371         try {
372             File owlFile = new File(biopaxFile);
373             checkWritable(owlFile.getParent());
374             if (!owlFile.exists()) owlFile.createNewFile();
375             os = new FileOutputStream(owlFile);
376             Biopax.write(enzymeList, os);
377         } finally {
378             if (os != null) os.close();
379         }
380     }
381 
382     protected void exportKegg(Collection<EnzymeEntry> enzymes, String keggFile)
383     throws Exception {
384 		OutputStream os = null;
385     	try {
386 			File keggEnzymeFile = new File(keggFile);
387             checkWritable(keggEnzymeFile.getParent());
388 			if (!keggEnzymeFile.exists()) keggEnzymeFile.createNewFile();
389 			os = new FileOutputStream(keggEnzymeFile);
390 			KeggExporter exporter = new KeggExporter();
391 			exporter.export(enzymes, os);
392         } finally {
393             if (os != null) os.close();
394 		}
395 		
396 	}
397 
398 	private void checkWritable(String toDir) throws IOException{
399         File outputDir = new File(toDir);
400         if (outputDir.exists()){
401             if (!outputDir.canWrite()){
402             	String msg = "Cannot write to " + toDir;
403                 LOGGER.error(msg);
404                 throw new IOException();
405             }
406         } else if (!outputDir.mkdirs()){
407         	String msg = "Cannot create output directory " + toDir;
408             LOGGER.error(msg);
409             throw new IOException(msg);
410         }
411     }
412 
413 }