View Javadoc

1   package uk.ac.ebi.intenz.tools.release;
2   
3   
4   import java.io.IOException;
5   import java.sql.Connection;
6   import java.sql.DriverManager;
7   import java.sql.PreparedStatement;
8   import java.sql.ResultSet;
9   import java.sql.SQLException;
10  import java.sql.Statement;
11  import java.util.ArrayList;
12  import java.util.List;
13  
14  import org.apache.log4j.Logger;
15  
16  import uk.ac.ebi.biobabel.util.db.DatabaseInstance;
17  import uk.ac.ebi.biobabel.util.db.OracleDatabaseInstance;
18  import uk.ac.ebi.intenz.domain.constants.EnzymeViewConstant;
19  import uk.ac.ebi.intenz.domain.enzyme.EnzymeClass;
20  import uk.ac.ebi.intenz.domain.enzyme.EnzymeEntry;
21  import uk.ac.ebi.intenz.domain.enzyme.EnzymeSubSubclass;
22  import uk.ac.ebi.intenz.domain.enzyme.EnzymeSubclass;
23  import uk.ac.ebi.intenz.domain.exceptions.DomainException;
24  import uk.ac.ebi.intenz.mapper.EnzymeClassMapper;
25  import uk.ac.ebi.intenz.mapper.EnzymeEntryMapper;
26  import uk.ac.ebi.intenz.mapper.EnzymeSubSubclassMapper;
27  import uk.ac.ebi.intenz.mapper.EnzymeSubclassMapper;
28  import uk.ac.ebi.intenz.tools.release.helper.EnzymeClassHelper;
29  import uk.ac.ebi.intenz.tools.release.helper.EnzymeEntryHelper;
30  import uk.ac.ebi.intenz.tools.release.helper.EnzymeSubSubclassHelper;
31  import uk.ac.ebi.intenz.tools.release.helper.EnzymeSubclassHelper;
32  import uk.ac.ebi.xchars.SpecialCharacters;
33  
34  /**
35   * This class creates populates the free text search table (intenz_text).
36   *
37   * @author Michael Darsow
38   * @version $Revision: 1.2 $ $Date: 2008/05/27 15:47:24 $
39   */
40  public class IntEnzText {
41  
42  	private static final Logger LOGGER = Logger.getLogger(IntEnzText.class);
43  	
44    /**
45     * SQL statement used for loading the list of all approved enzymes.
46     * <p/>
47     * Because of modified enzymes which still have the same EC number
48     * (and are both approved) only the most up-to-date
49     * version should be loaded. This is done by the sub-statement.
50     */
51    private static String FIND_ALL = new StringBuilder()
52  		  .append("SELECT enzyme_id, ec1, ec2, ec3, ec4,")
53  		  .append(" history, note, status, source, active")
54  		  .append(" FROM enzyme.enzymes")
55    		  .append(" WHERE status IN ('OK','PR','PM') AND source = 'INTENZ'")
56    		  .append(" AND enzyme_id NOT IN")
57    		  .append(" (SELECT before_id FROM enzyme.history_events")
58    		  .append(" WHERE event_class = 'MOD')")
59    		  .append(" ORDER BY ec1, ec2, ec3, ec4")
60    		  .toString();
61  
62      private static String DELETE_ALL = "DELETE FROM intenz_text";
63  
64      private static String INSERT = new StringBuilder()
65      	.append("INSERT INTO intenz_text (enzyme_id, ec, common_name,")
66      	.append(" status, source, text_order, text)")
67      	.append(" VALUES ( ?, ?, ?, ?, ?, ?, ? )")
68      	.toString();
69      
70    /** 
71     * Refreshes the intenz_text table.<br/>
72     * The first and only parameter is the database instance to be updated.
73     * @param args 
74     */
75    public static void main(String[] args) {
76      final SpecialCharacters encoding = SpecialCharacters.getInstance(null);
77  
78        if (args.length == 0){
79            LOGGER.error("IntEnzText needs one parameter");
80            System.exit(1);
81        }
82  
83        String instanceName = args[0];
84        DatabaseInstance instance = null;
85        try {
86            instance = OracleDatabaseInstance.getInstance(instanceName);
87        } catch (IOException e) {
88            LOGGER.error("Missing database configuration for " + instanceName, e);
89            System.exit(2);
90        }
91  
92        if (instance == null){
93            LOGGER.error("Missing database parameter(s)");
94            System.exit(3);
95        }
96  
97  
98      Connection con = null;
99      try {
100       Class.forName(instance.getDriver());
101       String url =  instance.getOracleThinUrl();
102       con = DriverManager.getConnection(url, instance.getUser(), instance.getPassword());
103       con.setAutoCommit(false);
104     } catch (Exception e) {
105       LOGGER.error("Could not open connection to " + instanceName, e);
106       System.exit(4);
107     }
108 
109       Statement deleteAllStatement = null;
110       try {
111           deleteAllStatement = con.createStatement();
112           LOGGER.info("Deleting from intenz_text table... ");
113           deleteAllStatement.execute(DELETE_ALL);
114           LOGGER.info("Deleted!");
115       } catch (SQLException e) {
116           LOGGER.error("Could not clear table intenz_text on " + instanceName, e);
117           try {
118               if (deleteAllStatement != null) deleteAllStatement.close();
119               if (con != null) con.close();
120           } catch (SQLException e2) {
121               e2.printStackTrace();
122           }
123           System.exit(5);
124       }
125 
126     PreparedStatement findAllStatement = null, insertStatement = null,
127     	selectEcStatement = null;
128     ResultSet rs = null;
129     EnzymeEntryMapper enzymeEntryMapper = new EnzymeEntryMapper();
130     EnzymeClassMapper enzymeClassMapper = new EnzymeClassMapper();
131     EnzymeSubclassMapper enzymeSubclassMapper = new EnzymeSubclassMapper();
132     EnzymeSubSubclassMapper enzymeSubSubclassMapper = new EnzymeSubSubclassMapper();
133 
134     // switch this to IntEnz to view all
135 //    final EnzymeViewConstant view = EnzymeViewConstant.IUBMB;
136     final EnzymeViewConstant view = EnzymeViewConstant.INTENZ;
137 
138     LOGGER.info("Starting INTENZ_TEXT indexing...");
139     long timeElapsed = System.currentTimeMillis();
140     try {
141       int countEnzymes = 0;
142       insertStatement = con.prepareStatement(INSERT);
143       selectEcStatement = con.prepareStatement("SELECT ec1 FROM classes");
144 
145       // Enzymes
146       LOGGER.info("   Loading enzymes ... ");
147       findAllStatement = con.prepareStatement(FIND_ALL);
148       rs = findAllStatement.executeQuery();
149 
150       while (rs.next()) {
151         countEnzymes++;
152         Long id = new Long(rs.getLong(1));
153         try {
154           EnzymeEntry enzymeEntry = enzymeEntryMapper.findById(id, con);
155           List textParts = getXmlParts(new StringBuffer(EnzymeEntryHelper.toXML(enzymeEntry, encoding, view, true)));
156 
157           for (int iii = 0; iii < textParts.size(); iii++) {
158             String text = (String) textParts.get(iii);
159             insertStatement.setLong(1, enzymeEntry.getId().longValue());
160             insertStatement.setString(2, enzymeEntry.getEc().toString());
161             String commonName = enzymeEntry.getCommonName(EnzymeViewConstant.INTENZ).getName();
162             if(commonName == null) commonName = enzymeEntry.getCommonName(view).getName();
163             insertStatement.setString(3, commonName);
164             insertStatement.setString(4, enzymeEntry.getStatus().getCode());
165             insertStatement.setString(5, enzymeEntry.getSource().toString());
166             insertStatement.setInt(6, (iii + 1));
167             insertStatement.setString(7, text);
168             insertStatement.executeUpdate();
169             insertStatement.clearParameters();
170           }
171 //          con.commit();
172 
173         } catch (DomainException e) {
174           e.printStackTrace();
175         } catch (Exception e) {
176             e.printStackTrace();
177         }
178       }
179       LOGGER.info("   ... loaded and written (XML) " + countEnzymes + " enzymes.");
180 
181       // Classes
182       LOGGER.info("   Loading classes ...");
183       int idFake = 0; // classes do not have an ID but it is needed for the population.
184       rs = selectEcStatement.executeQuery();
185       while (rs.next()) {
186         idFake++;
187         String ec1 = rs.getString(1);
188         try {
189           EnzymeClass enzymeClass = enzymeClassMapper.find(ec1, con);
190           List textParts = getXmlParts(new StringBuffer(EnzymeClassHelper.toXML(enzymeClass, encoding)));
191 
192           for (int iii = 0; iii < textParts.size(); iii++) {
193             String text = (String) textParts.get(iii);
194             insertStatement.setInt(1, idFake);
195             insertStatement.setString(2, enzymeClass.getEc().toString());
196             insertStatement.setString(3, enzymeClass.getName());
197             insertStatement.setString(4, "OK");
198             insertStatement.setString(5, "IUBMB");
199             insertStatement.setInt(6, (iii + 1));
200             insertStatement.setString(7, text);
201             insertStatement.executeUpdate();
202             insertStatement.clearParameters();
203           }
204 //          con.commit();
205         } catch (DomainException e) {
206           e.printStackTrace();
207         }
208       }
209       LOGGER.info("   ... classes loaded.");
210 
211       // Subclasses
212       LOGGER.info("   Loading subclasses ...");
213       selectEcStatement = con.prepareStatement("SELECT ec1, ec2 FROM subclasses");
214       rs = selectEcStatement.executeQuery();
215       while (rs.next()) {
216         idFake++;
217         String ec1 = rs.getString(1);
218         String ec2 = rs.getString(2);
219         try {
220           EnzymeSubclass enzymeSubclass = enzymeSubclassMapper.find(ec1, ec2, con);
221           List textParts = getXmlParts(new StringBuffer(EnzymeSubclassHelper.toXML(enzymeSubclass, encoding)));
222 
223           for (int iii = 0; iii < textParts.size(); iii++) {
224             String text = (String) textParts.get(iii);
225             insertStatement.setInt(1, idFake);
226             insertStatement.setString(2, enzymeSubclass.getEc().toString());
227             insertStatement.setString(3, enzymeSubclass.getName());
228             insertStatement.setString(4, "OK");
229             insertStatement.setString(5, "IUBMB");
230             insertStatement.setInt(6, (iii + 1));
231             insertStatement.setString(7, text);
232             insertStatement.executeUpdate();
233             insertStatement.clearParameters();
234           }
235 //          con.commit();
236         } catch (DomainException e) {
237           e.printStackTrace();
238         }
239       }
240       LOGGER.info("   ... subclasses loaded.");
241 
242       // SubSubclasses
243       LOGGER.info("   Loading sub-subclasses ...");
244       selectEcStatement = con.prepareStatement("SELECT ec1, ec2, ec3 FROM subsubclasses");
245       rs = selectEcStatement.executeQuery();
246       while (rs.next()) {
247         idFake++;
248         int ec1 = rs.getInt(1);
249         int ec2 = rs.getInt(2);
250         int ec3 = rs.getInt(3);
251         try {
252           EnzymeSubSubclass enzymeSubSubclass = enzymeSubSubclassMapper.find(ec1, ec2, ec3, con);
253           List textParts = getXmlParts(new StringBuffer(EnzymeSubSubclassHelper.toXML(enzymeSubSubclass, encoding)));
254 
255           for (int iii = 0; iii < textParts.size(); iii++) {
256             String text = (String) textParts.get(iii);
257             insertStatement.setInt(1, idFake);
258             insertStatement.setString(2, enzymeSubSubclass.getEc().toString());
259             insertStatement.setString(3, enzymeSubSubclass.getName());
260             insertStatement.setString(4, "OK");
261             insertStatement.setString(5, "IUBMB");
262             insertStatement.setInt(6, (iii + 1));
263             insertStatement.setString(7, text);
264             insertStatement.executeUpdate();
265             insertStatement.clearParameters();
266           }
267 //          con.commit();
268         } catch (Exception e) {
269           e.printStackTrace();
270         }
271       }
272       LOGGER.info("   ... sub-subclasses loaded.");
273 
274       con.commit();
275 
276     } catch (SQLException e) {
277       try {
278         con.rollback();
279       } catch (SQLException e1) {
280         e1.printStackTrace();
281       }
282       e.printStackTrace();
283     } finally {
284       try {
285         if (findAllStatement != null) findAllStatement.close();
286         if (insertStatement != null) insertStatement.close();
287         if (selectEcStatement != null) selectEcStatement.close();
288       } catch (SQLException e) {
289         e.printStackTrace();
290       }
291     }
292 
293     try {
294       con.close();
295     } catch (SQLException e) {
296       e.printStackTrace();
297     }
298 
299     LOGGER.info("... population successfully ended.");
300     if (System.currentTimeMillis() > timeElapsed) {
301       timeElapsed = System.currentTimeMillis() - timeElapsed;
302       LOGGER.info("Time elapsed: " + getElapsedTime(timeElapsed / 1000));
303     }
304   }
305 
306   private static List getXmlParts(StringBuffer xml) {
307     List textParts = new ArrayList();
308 
309     while (xml.length() > 4000) {
310       textParts.add(xml.substring(0, 4000));
311       xml = xml.delete(0, 4000);
312     }
313     textParts.add(xml.toString());
314 
315     return textParts;
316   }
317 
318   private static String getElapsedTime(long seconds) {
319     StringBuffer elapsedTime = new StringBuffer();
320     LOGGER.info("seconds = " + seconds);
321     if (seconds > 3599) {
322       elapsedTime.append(seconds / 3600);
323       elapsedTime.append("h ");
324       seconds %= 3600;
325     } else {
326       if (seconds > 59) {
327         elapsedTime.append(seconds / 60);
328         elapsedTime.append("m ");
329         seconds %= 60;
330       }
331     }
332     elapsedTime.append(seconds);
333     elapsedTime.append("s");
334 
335     return elapsedTime.toString();
336   }
337 }