package javajs.util; import java.io.BufferedReader; import java.util.Hashtable; import java.util.Map; import javajs.api.GenericCifDataParser; import javajs.api.GenericLineReader; // BH 11/21/16 -- adds support for array grouping [...] - used in 2016-format magCIF files /** * * A CIF 1.0 tokenizer class for dealing with quoted strings in CIF files. * * Subclassed by org.jmol.adapters.readers.cif.Cif2DataParser * * Greek letters implemented in Jmol 13.3.9 and only for * titles and space groups. All other mark ups ignored. * *

* regarding the treatment of single quotes vs. primes in * cif file, PMR wrote: *

* * There is a formal grammar for CIF * (see http://www.iucr.org/iucr-top/cif/index.html) * which confirms this. The textual explanation is *

* 14. Matching single or double quote characters (' or ") may * be used to bound a string representing a non-simple data value * provided the string does not extend over more than one line. *

* 15. Because data values are invariably separated from other * tokens in the file by white space, such a quote-delimited * character string may contain instances of the character used * to delimit the string provided they are not followed by white * space. For example, the data item ** _example 'a dog's life' * * is legal; the data value is a dog's life. *

* [PMR - the terminating character(s) are quote+whitespace. * That would mean that: ** _example 'Jones' life' * * would be an error *

* The CIF format was developed in that late 1980's under the aegis of the * International Union of Crystallography (I am a consultant to the COMCIFs * committee). It was ratified by the Union and there have been several * workshops. mmCIF is an extension of CIF which includes a relational * structure. The formal publications are: *

* Hall, S. R. (1991). "The STAR File: A New Format for Electronic Data * Transfer and Archiving", J. Chem. Inform. Comp. Sci., 31, 326-333. * Hall, S. R., Allen, F. H. and Brown, I. D. (1991). "The Crystallographic * Information File (CIF): A New Standard Archive File for Crystallography", * Acta Cryst., A47, 655-685. * Hall, S.R. & Spadaccini, N. (1994). "The STAR File: Detailed * Specifications," J. Chem. Info. Comp. Sci., 34, 505-508. *

*/ public class CifDataParser implements GenericCifDataParser { protected int getVersion() { return 1; } /** * The maximum number of columns (data keys) passed to the parser or found in the file * for a given loop_ or category.subkey listing. * */ public static final int KEY_MAX = 100; private GenericLineReader reader; private BufferedReader br; /** * from buffered reader */ protected String line; /** * working string (buffer) * */ protected String str; /** * pointer to current character on str */ protected int ich; /** * length of str * */ protected int cch; /** * whether we are processing an unquoted value or key */ protected boolean wasUnquoted; /** * optional token terminator; in CIF 2.0 could be } or ] */ protected char cterm = '\0'; /** * string to return for CIF data value . and ? */ protected String nullString = "\0"; /** * A flag to create and return Java objects, not strings. * Used only by Jmol scripting x = getProperty("cifInfo", filename). */ protected boolean asObject; /** * debugging flag passed from reader; unused * */ protected boolean debugging; /** * private processing fields * */ private Object strPeeked; private int ichPeeked; protected int columnCount; protected String[] columnNames; private Object[] columnData = new Object[KEY_MAX]; private boolean isLoop; protected boolean haveData; /** * comments at the top of a file, including #\#CIF_2.0, for example */ private SB fileHeader = new SB(); private boolean isHeader = true; private boolean skipToken; /** * Set the string value of what is returned for "." and "?" * * @param nullString null here returns "." and "?"; default is "\0" * */ public void setNullValue(String nullString) { this.nullString = nullString; } /** * A global, static map that contains field information. The assumption is that * if we read a set of fields for, say, atom_site, once in a lifetime, then * that should be good forever. Those are static lists. Or should be.... */ protected static Map htFields = new Hashtable(); //////////////////////////////////////////////////////////////// // special tokenizer class //////////////////////////////////////////////////////////////// public CifDataParser() { // for reflection } @Override public Object getColumnData(int i) { return columnData[i]; } @Override public int getColumnCount() { return columnCount; } @Override public String getColumnName(int i) { return columnNames[i]; } /** * A Chemical Information File data parser. * * set() should be called immediately upon construction. * * Two options; one of reader or br should be null, or reader will be * ignored. Just simpler this way... * * @param reader Anything that can deliver a line of text or null * @param br A standard BufferedReader. * @param debugging * */ @Override public CifDataParser set(GenericLineReader reader, BufferedReader br, boolean debugging) { this.reader = reader; this.br = br; this.debugging = debugging; return this; } /** * * @return commented-out section at the start of a CIF file. * */ @Override public String getFileHeader() { return fileHeader.toString(); } /** * Parses all CIF data for a reader defined in the constructor * into a standard Map structure and close the BufferedReader if * it exists. * * @return Hashtable of models Vector of Hashtable data */ @Override public Map getAllCifData() { return getAllCifDataType(); } public Map getAllCifDataType(String... types) { if (types != null) { if (types.length == 0) types = null; else for (int i = 0; i < types.length; i++) types[i] = fixKey(types[i]); } line = ""; String key; Map data = null, data0 = null; Map allData = new Hashtable(); Lst> models = new Lst>(); allData.put("models", models); asObject = (getVersion() >= 2); nullString = null; Lst> saveFrames = new Lst>(); try { while ((key = getNextToken()) != null) { if (key.startsWith("global_") || key.startsWith("data_")) { models.addLast(data0 = data = new Hashtable()); data.put("name", key); continue; } if (key.startsWith("loop_")) { getAllCifLoopData(data, types); continue; } if (key.startsWith("save_")) { if (key.equals("save_")) { int n = saveFrames.size(); if (n == 0) { System.out.println("CIF ERROR ? save_ without corresponding save_xxxx"); data = data0; } else { data = saveFrames.removeItemAt(n - 1); } } else { saveFrames.addLast(data); Map d = data; data = new Hashtable(); d.put(key, data); } continue; } if (key.charAt(0) != '_') { System.out.println("CIF ERROR ? should be an underscore: " + key); } else { Object value = (asObject ? getNextTokenObject() : getNextToken()); if (value == null) { System.out.println("CIF ERROR ? end of file; data missing: " + key); } else { key = fixKey(key); if (types == null || checkKey(types, key)) data.put(key, value); } } } } catch (Exception e) { // ? } asObject = false; try { if (br != null) br.close(); } catch (Exception e) { // ? } nullString = "\0"; return allData; } /** * create our own list of keywords and for each one create a list of data * associated with that keyword. For example, a list of all x coordinates, * then a list of all y coordinates, etc. * * @param data * @throws Exception */ @SuppressWarnings("unchecked") private void getAllCifLoopData(Map data, String[] types) throws Exception { String key; Lst keyWords = new Lst(); Object o; boolean skipping = false; while ((o = peekToken()) != null && o instanceof String && ((String) o).charAt(0) == '_') { key = fixKey((String) getTokenPeeked()); keyWords.addLast(key); if (types == null || checkKey(types, key)) data.put(key, new Lst()); else skipping = true; } columnCount = keyWords.size(); if (columnCount == 0) return; isLoop = true; if (skipping) skipLoop(false); else while (getData()) for (int i = 0; i < columnCount; i++) ((Lst