View Javadoc

1   package uk.ac.ebi.intenz.tools.export;
2   
3   import java.io.File;
4   import java.io.FileOutputStream;
5   import java.io.IOException;
6   import java.io.OutputStream;
7   import java.lang.reflect.InvocationTargetException;
8   import java.sql.Connection;
9   import java.sql.SQLException;
10  import java.text.SimpleDateFormat;
11  import java.util.*;
12  
13  import javax.xml.bind.JAXBException;
14  
15  import org.apache.commons.cli.*;
16  import org.apache.log4j.Logger;
17  import org.xml.sax.SAXException;
18  
19  import uk.ac.ebi.biobabel.util.db.OracleDatabaseInstance;
20  import uk.ac.ebi.intenz.biopax.level2.Biopax;
21  import uk.ac.ebi.intenz.domain.constants.Status;
22  import uk.ac.ebi.intenz.domain.enzyme.*;
23  import uk.ac.ebi.intenz.domain.enzyme.EnzymeCommissionNumber.Type;
24  import uk.ac.ebi.intenz.domain.exceptions.DomainException;
25  import uk.ac.ebi.intenz.mapper.EnzymeClassMapper;
26  import uk.ac.ebi.intenz.mapper.EnzymeEntryMapper;
27  import uk.ac.ebi.intenz.mapper.EnzymeSubSubclassMapper;
28  import uk.ac.ebi.intenz.mapper.EnzymeSubclassMapper;
29  import uk.ac.ebi.intenz.stats.IIntEnzStatistics;
30  import uk.ac.ebi.intenz.stats.db.IntEnzDbStatistics;
31  import uk.ac.ebi.rhea.mapper.MapperException;
32  
33  public class ExporterApp {
34  
35      private enum Format {
36      	INTENZ_XML("intenzXml"),
37      	SITEMAP("sitemap"),
38      	BIOPAX("biopax"),
39      	KEGG_ENZYME("keggEnzyme");
40      	private String cliOption;
41      	private Format(String cliOption){
42      		this.cliOption = cliOption;
43      	}
44  	}
45      
46      public static final Logger LOGGER = Logger.getLogger(ExporterApp.class);
47      
48  	private Properties spotlights;
49  
50  	private Connection intenzConnection;
51      // Object to retrieve release number and date from:
52      private IIntEnzStatistics stats;
53  
54      /**
55       * Exports IntEnz data in the following formats:
56       * <ul>
57       * 	   <li>XML (both <i>flavours</i> ASCII and XCHARS), using the
58       * 			{@link uk.ac.ebi.intenz.tools.export.XmlExporter XmlExporter}
59       *          class.</li>
60       * 	   <li>Site map XML file (<code>sitemap.xml</code>) to be used in
61       * 		   	<a href="http://www.google.com/webmasters/sitemaps">Google
62       * 		   	sitemaps</a> to make every IntEnz entry available to Google
63       * 		   	indexing. Other search engines accept this standard too.</li>
64       * 	   <li><a href="http://www.biopax.org">BioPAX</a>, using the biopax
65       *          module.</li>
66       * 	   <li><a href="ftp://ftp.genome.jp/pub/kegg/ligand/ligand.txt">KEGG
67       * 			enzyme</a>.</li>
68       * </ul>
69       * @param args
70       * <ul>
71       * 	<li>-intenzDb &lt;config&gt;: database configuration file for IntEnz.</li>
72       *	<li>[-intenzXml &lt;output dir&gt;]: export as <a
73       * 		href="http://intenz.sourceforge.net/intenz-xml/index.html">IntEnzXML</a>
74       * 		.</li>
75       * 	<li>[-biopax &lt;file name&gt;]: export as one <a
76       * 		href="http://www.biopax.org">BioPAX</a> OWL file.</li>
77       * 	<li>[-sitemap &lt;file name&gt;]: export as a
78  	 * 		<a href="http://www.sitemaps.org">Sitemap</a> XML file.</li>
79       * 	<li>[-keggEnzyme &lt;file name&gt;]: export as a KEGG enzyme file.</li>
80       * 	<li>[-ec &lt;EC number&gt;]: export only the passed EC number.
81       * 				if not set, all of the public entries are exported.</li>
82       * </ul>
83       * @throws DomainException
84       * @throws IOException
85       * @throws SQLException
86       * @throws MapperException
87       * @throws ClassNotFoundException
88       */
89      @SuppressWarnings({ "static-access" })
90      public static void main(String[] args)
91      throws ClassNotFoundException, SQLException, MapperException, IOException, DomainException {
92  		Options options = new Options();
93  		options.addOption(OptionBuilder.isRequired()
94  				.hasArg().withArgName("config")
95  				.withDescription("IntEnz database configuration")
96  				.create("intenzDb"));
97  		options.addOption(OptionBuilder
98  				.hasArg().withArgName("file name")
99  				.withDescription("[optional] Export IntEnz as BioPAX")
100 				.create(Format.BIOPAX.cliOption));
101 		options.addOption(OptionBuilder
102 				.hasArg().withArgName("dir name")
103 				.withDescription("[optional] Export IntEnz as IntEnzXML")
104 				.create(Format.INTENZ_XML.cliOption));
105 		options.addOption(OptionBuilder
106 				.hasArg().withArgName("file name")
107 				.withDescription("[optional] Export IntEnz as KEGG enzyme")
108 				.create(Format.KEGG_ENZYME.cliOption));
109 		options.addOption(OptionBuilder
110 				.hasArg().withArgName("file name")
111 				.withDescription("[optional] Export IntEnz as sitemap")
112 				.create(Format.SITEMAP.cliOption));
113 		options.addOption(OptionBuilder
114 				.hasArg().withArgName("EC number")
115 				.withDescription("[optional] Export only one entry")
116 				.create("ec"));
117 		CommandLine cl = null;
118 		try {
119 			cl = new GnuParser().parse(options, args);
120 		} catch (ParseException e){
121 			new HelpFormatter().printHelp(ExporterApp.class.getName(), options);
122 			return;
123 		}
124 		
125         ExporterApp app = new ExporterApp(cl.getOptionValue("intenzDb"));
126         List<EnzymeEntry> enzymes =
127         		app.getEnzymeList(cl.getOptionValue("ec"));
128         Map<String, Object> descriptions =
129         		ExporterApp.getDescriptions(app.intenzConnection);
130         for (EnzymeEntry enzyme : enzymes) {
131         	String classEc = String.valueOf(enzyme.getEc().getEc1());
132         	String subclassEc = classEc + "." + String.valueOf(enzyme.getEc().getEc2());
133         	String subSubclassEc = subclassEc + "." + String.valueOf(enzyme.getEc().getEc3());
134 			enzyme.setClassName(((EnzymeClass) descriptions.get(classEc)).getName());
135 			enzyme.setSubclassName(((EnzymeSubclass) descriptions.get(subclassEc)).getName());
136 			enzyme.setSubSubclassName(((EnzymeSubSubclass) descriptions.get(subSubclassEc)).getName());
137 		}
138         LOGGER.info("Intenz exporter - Release " + app.stats.getReleaseNumber());
139         if (cl.hasOption(Format.INTENZ_XML.cliOption)){
140             try {
141             	String xmlDir = cl.getOptionValue(Format.INTENZ_XML.cliOption);
142                 app.exportXML(enzymes, descriptions, xmlDir);
143             } catch (Exception e) {
144                 LOGGER.error(e.getMessage(), e);
145             }
146         }
147         if (cl.hasOption(Format.SITEMAP.cliOption)){
148             try {
149             	String sitemapFile = cl.getOptionValue(Format.SITEMAP.cliOption);
150                 app.exportSitemap(enzymes, descriptions, sitemapFile);
151             } catch (Exception e) {
152                 LOGGER.error(e.getMessage(), e);
153             }
154         }
155         if (cl.hasOption(Format.BIOPAX.cliOption)){
156             try {
157             	String biopaxFile = cl.getOptionValue(Format.BIOPAX.cliOption);
158                 app.exportBiopax(enzymes, biopaxFile);
159             } catch (Exception e) {
160                 LOGGER.error(e.getMessage(), e);
161             }
162         }
163         if (cl.hasOption(Format.KEGG_ENZYME.cliOption)){
164         	try {
165         		String keggFile = cl.getOptionValue(Format.KEGG_ENZYME.cliOption);
166         		app.exportKegg(enzymes, keggFile);
167             } catch (Exception e) {
168                 LOGGER.error(e.getMessage(), e);
169         	}
170         }
171     }
172 
173     protected ExporterApp(String dbConfig)
174     throws SQLException, IOException, DomainException {
175         intenzConnection = OracleDatabaseInstance.getInstance(dbConfig)
176         		.getConnection();
177         stats = new IntEnzDbStatistics(intenzConnection);
178     }
179 
180     @Override
181 	protected void finalize() throws Throwable {
182     	if (intenzConnection != null) intenzConnection.close();
183 	}
184 
185 	/**
186      * Gets the list of enzymes to be exported.
187      * @param ecString An EC number. If <code>null</code>, every exportable
188      *      enzyme is included.
189      * @throws SQLException
190      * @throws DomainException
191      */
192     protected List<EnzymeEntry> getEnzymeList(String ecString)
193     throws SQLException, MapperException, DomainException{
194         List<EnzymeEntry> enzymeList = null;
195         EnzymeEntryMapper mapper = new EnzymeEntryMapper();
196 		if (ecString != null){
197         	EnzymeCommissionNumber ec = EnzymeCommissionNumber.valueOf(ecString);
198         	Status status = ec.getType().equals(Type.PRELIMINARY)?
199         			Status.PRELIMINARY : Status.APPROVED;
200     		enzymeList  = Collections.singletonList(
201     				mapper.findByEc(ec.getEc1(), ec.getEc2(), ec.getEc3(),
202     						ec.getEc4(), status, intenzConnection));
203         } else {
204             LOGGER.info("Retrieving IntEnz entries");
205         	enzymeList = mapper.exportAllEntries(intenzConnection);
206             LOGGER.info("Retrieved IntEnz entries");
207         }
208 		return enzymeList;
209     }
210 
211     /**
212      * Builds a map of EC numbers (as String) to <code>EnzymeClass</code>,
213      * <code>EnzymeSubClass</code> or <code>EnzymeSubSubClass</code> objects
214      * from which to retrieve names and descriptions.
215      * @param con a database connection.
216      * @return a map of EC numbers (as String) to <code>EnzymeClass</code>,
217      * 		<code>EnzymeSubClass</code> or <code>EnzymeSubSubClass</code>
218      * 		objects.
219      * @throws SQLException
220      * @throws DomainException
221      */
222     public static Map<String, Object> getDescriptions(Connection con)
223     throws SQLException, DomainException{
224         LOGGER.info("Retrieving IntEnz descriptions");
225         Map<String, Object> descriptions = new HashMap<String, Object>();
226         EnzymeClassMapper classMapper = new EnzymeClassMapper();
227         EnzymeSubclassMapper subclassMapper = new EnzymeSubclassMapper();
228         EnzymeSubSubclassMapper subsubclassMapper = new EnzymeSubSubclassMapper();
229         for (Object o : classMapper.findAll(con)) {
230             EnzymeClass enzymeClass = (EnzymeClass) o;
231             descriptions.put(enzymeClass.getEc().toString(), enzymeClass);
232         }
233         for (Object o : subclassMapper.findAll(con)) {
234             EnzymeSubclass enzymeSubclass = (EnzymeSubclass) o;
235             descriptions.put(enzymeSubclass.getEc().toString(), enzymeSubclass);
236         }
237         for (Object o : subsubclassMapper.findAll(con)) {
238             EnzymeSubSubclass enzymeSubsubclass = (EnzymeSubSubclass) o;
239             descriptions.put(enzymeSubsubclass.getEc().toString(), enzymeSubsubclass);
240         }
241         LOGGER.info("Retrieved IntEnz descriptions");
242         return Collections.unmodifiableMap(descriptions);
243     }
244 
245     /**
246      * Exports data in XML format.
247      * @param enzymeList 
248      * @param descriptions 
249      * @param toDir destination directory for XML files.
250      * @throws Exception
251      */
252     protected void exportXML(Collection<EnzymeEntry> enzymeList,
253     		Map<String, Object> descriptions, String toDir) throws Exception {
254         OutputStream os = null;
255         checkWritable(toDir);
256         LOGGER.info("Intenz exporter - Release " + stats.getReleaseNumber());
257         LOGGER.info("Outputting XML to " + toDir);
258             XmlExporter exporter = new XmlExporter();
259             exporter.setDescriptions(descriptions);
260             exporter.setReleaseDate(new SimpleDateFormat("yyyy-MM-dd")
261                     .format(stats.getReleaseDate()));
262             exporter.setReleaseNumber(stats.getReleaseNumber());
263             for (XmlExporter.Flavour flavour : XmlExporter.Flavour.values()){
264                 exporter.setFlavour(flavour);
265                 File flavourDir = new File(toDir, flavour.toString());
266                 flavourDir.mkdir();
267                 LOGGER.info("Single-entry XML start");
268                 List<EnzymeEntry> validEntriesList = new ArrayList<EnzymeEntry>();
269                 // Export single-entry files:
270                 for (EnzymeEntry entry : enzymeList) {
271                     String classEc = "EC_" + String.valueOf(entry.getEc().getEc1());
272                     String subclassEc = classEc + "." + String.valueOf(entry.getEc().getEc2());
273                     String subsubclassEc = subclassEc + "." + String.valueOf(entry.getEc().getEc3());
274                     String dirTree = classEc + "/" + subclassEc + "/" + subsubclassEc;
275                     File subsubclassDir =  new File(flavourDir, dirTree);
276                     subsubclassDir.mkdirs();
277                     File outputFile = new File(subsubclassDir, "EC_" + entry.getEc().toString() + ".xml");
278                     try {
279                         os = new FileOutputStream(outputFile);
280                         exporter.export(entry, os);
281                         validEntriesList.add(entry);
282                     } catch (Exception e) {
283                         // Continue with any other entries
284                         LOGGER.warn(entry.getEc().toString(), e);
285                     } finally {
286                         if (os != null) os.close();
287                     }
288                 }
289                 LOGGER.info("Single-entry XML end");
290                 // Export whole tree (only valid entries):
291                 File treeFile = new File(flavourDir, "intenz.xml");
292                 try {
293                     os = new FileOutputStream(treeFile);
294                     LOGGER.info("Whole tree XML start");
295                     exporter.export(validEntriesList, os);
296                     LOGGER.info("Whole tree XML end");
297                 } catch (Exception e) {
298                     LOGGER.error("Whole tree dump", e);
299                 } finally {
300                     if (os != null) os.close();
301                 }
302             }
303     }
304 
305     protected void exportSitemap(Collection<EnzymeEntry> enzymeList,
306     		Map<String, Object> descriptions, String sitemapFile)
307     throws IOException, JAXBException, SAXException{
308     	final String queryUrl = "http://www.ebi.ac.uk/intenz/query?cmd=SearchEC&q=";
309     	final String spotlightUrl = "http://www.ebi.ac.uk/intenz/spotlight.jsp?ec=";
310     	File sitemap = new File(sitemapFile);
311         checkWritable(sitemap.getParent());
312     	if (!sitemap.exists()) sitemap.createNewFile();
313     	OutputStream os = null;
314     	// Build the list of URLs:
315     	Collection<String> urls = new ArrayList<String>();
316 		// Enzymes:
317     	for (EnzymeEntry entry : enzymeList) {
318     		StringBuffer sb = new StringBuffer(queryUrl);
319 			String ec = entry.getEc().toString();
320     		sb.append(ec);
321     		urls.add(sb.toString());
322 		}
323 		// Spotlights:
324 		spotlights = new Properties();
325         spotlights.load(ExporterApp.class.getClassLoader()
326         		.getResourceAsStream("spotlights.properties"));
327 		for (Object ec : spotlights.keySet()){
328 			StringBuffer spotSb = new StringBuffer(spotlightUrl);
329 			spotSb.append((String) ec);
330 			urls.add(spotSb.toString());
331 		}
332 		// Classes, subclasses and subsubclasses:
333 		for (String ec : descriptions.keySet()){
334 			StringBuffer sb = new StringBuffer(queryUrl);
335 			sb.append(ec);
336     		urls.add(sb.toString());
337 		}
338 		// Build the sitemap:
339     	try {
340         	os = new FileOutputStream(sitemap);
341 			SitemapExporter exporter = new SitemapExporter();
342 			exporter.export(urls, os);
343     	} finally {
344             if (os != null) os.close();
345     	}
346     }
347 
348     protected void exportBiopax(Collection<EnzymeEntry> enzymeList, String biopaxFile)
349     throws IOException, IllegalAccessException, InvocationTargetException{
350         OutputStream os = null;
351         LOGGER.info("Outputting BioPAX to " + biopaxFile);
352         try {
353             File owlFile = new File(biopaxFile);
354             checkWritable(owlFile.getParent());
355             if (!owlFile.exists()) owlFile.createNewFile();
356             os = new FileOutputStream(owlFile);
357             Biopax.write(enzymeList, os);
358         } finally {
359             if (os != null) os.close();
360         }
361     }
362 
363     protected void exportKegg(Collection<EnzymeEntry> enzymes, String keggFile)
364     throws Exception {
365 		OutputStream os = null;
366     	try {
367 			File keggEnzymeFile = new File(keggFile);
368             checkWritable(keggEnzymeFile.getParent());
369 			if (!keggEnzymeFile.exists()) keggEnzymeFile.createNewFile();
370 			os = new FileOutputStream(keggEnzymeFile);
371 			KeggExporter exporter = new KeggExporter();
372 			exporter.export(enzymes, os);
373         } finally {
374             if (os != null) os.close();
375 		}
376 		
377 	}
378 
379 	private void checkWritable(String toDir) throws IOException{
380         File outputDir = new File(toDir);
381         if (outputDir.exists()){
382             if (!outputDir.canWrite()){
383             	String msg = "Cannot write to " + toDir;
384                 LOGGER.error(msg);
385                 throw new IOException();
386             }
387         } else if (!outputDir.mkdirs()){
388         	String msg = "Cannot create output directory " + toDir;
389             LOGGER.error(msg);
390             throw new IOException(msg);
391         }
392     }
393 
394 }