ForceBalance API  1.1
Automated optimization of force fields and empirical potentials
 All Classes Namespaces Files Functions Variables Pages
nifty.py
Go to the documentation of this file.
1 """@package forcebalance.nifty Nifty functions, intended to be imported by any module within ForceBalance.
2 
3 Table of Contents:
4 - I/O formatting
5 - Math: Variable manipulation, linear algebra, least squares polynomial fitting
6 - Pickle: Expand Python's own pickle to accommodate writing XML etree objects
7 - Commands for submitting things to the Work Queue
8 - Various file and process management functions
9 - Development stuff (not commonly used)
10 
11 Named after the mighty Sniffy Handy Nifty (King Sniffy)
12 
13 @author Lee-Ping Wang
14 @date 12/2011
15 """
16 
17 from select import select
18 import os, sys, shutil
19 from re import match, sub
20 import numpy as np
21 import itertools
22 from numpy import array, diag, dot, eye, mat, mean, transpose
23 from numpy.linalg import norm, svd
24 import threading
25 import pickle
26 import time
27 import subprocess
28 from subprocess import PIPE, STDOUT
29 from collections import OrderedDict, defaultdict
30 
31 import forcebalance
32 from forcebalance.output import *
33 logger = getLogger(__name__)
34 
35 # import IPython as ip # For debugging
36 
37 ## Boltzmann constant
38 kb = 0.0083144100163
39 ## Q-Chem to GMX unit conversion for energy
40 eqcgmx = 2625.5002
41 ## Q-Chem to GMX unit conversion for force
42 fqcgmx = -49621.9
43 ## One bohr equals this many angstroms
44 bohrang = 0.529177249
45 
46 #=========================#
47 # I/O formatting #
48 #=========================#
49 def pvec1d(vec1d, precision=1, loglevel=INFO):
50  """Printout of a 1-D vector.
51 
52  @param[in] vec1d a 1-D vector
53  """
54  v2a = array(vec1d)
55  for i in range(v2a.shape[0]):
56  logger.log(loglevel, "%% .%ie " % precision % v2a[i])
57  logger.log(loglevel, '\n')
58 
59 def pmat2d(mat2d, precision=1, loglevel=INFO):
60  """Printout of a 2-D matrix.
61 
62  @param[in] mat2d a 2-D matrix
63  """
64  m2a = array(mat2d)
65  for i in range(m2a.shape[0]):
66  for j in range(m2a.shape[1]):
67  logger.log(loglevel, "%% .%ie " % precision % m2a[i][j])
68  logger.log(loglevel, '\n')
69 
70 def encode(l):
71  return [[len(list(group)),name] for name, group in itertools.groupby(l)]
72 
73 def segments(e):
74  # Takes encoded input.
75  begins = np.array([sum([k[0] for k in e][:j]) for j,i in enumerate(e) if i[1] == 1])
76  lens = np.array([i[0] for i in e if i[1] == 1])
77  return [(i, i+j) for i, j in zip(begins, lens)]
78 
79 def commadash(l):
80  # Formats a list like [27, 28, 29, 30, 31, 88, 89, 90, 91, 100, 136, 137, 138, 139]
81  # into '27-31,88-91,100,136-139
82  L = sorted(l)
83  if len(L) == 0:
84  return "(empty)"
85  L.append(L[-1]+1)
86  LL = [i in L for i in range(L[-1])]
87  return ','.join('%i-%i' % (i[0]+1,i[1]) if (i[1]-1 > i[0]) else '%i' % (i[0]+1) for i in segments(encode(LL)))
88 
89 def uncommadash(s):
90  # Takes a string like '27-31,88-91,100,136-139'
91  # and turns it into a list like [27, 28, 29, 30, 31, 88, 89, 90, 91, 100, 136, 137, 138, 139]
92  L = []
93  try:
94  for w in s.split(','):
95  ws = w.split('-')
96  a = int(ws[0])-1
97  if len(ws) == 1:
98  b = int(ws[0])
99  elif len(ws) == 2:
100  b = int(ws[1])
101  else:
102  logger.warning("Dash-separated list cannot exceed length 2\n")
103  raise
104  if a < 0 or b <= 0 or b <= a:
105  if a < 0 or b <= 0:
106  logger.warning("Items in list cannot be zero or negative: %d %d\n" % (a, b))
107  else:
108  logger.warning("Second number cannot be larger than first: %d %d\n" % (a, b))
109  raise
110  newL = range(a,b)
111  if any([i in L for i in newL]):
112  logger.warning("Duplicate entries found in list\n")
113  raise
114  L += newL
115  if sorted(L) != L:
116  logger.warning("List is out of order\n")
117  raise
118  except:
119  raise Exception('Invalid string for converting to list of numbers: %s' % s)
120  return L
121 
122 #list(itertools.chain(*[range(*(int(w.split('-')[0])-1, int(w.split('-')[1]) if len(w.split('-')) == 2 else int(w.split('-')[0]))) for w in Mao.split(',')]))
123 
124 def printcool(text,sym="#",bold=False,color=2,ansi=None,bottom='-',minwidth=50,center=True):
125  """Cool-looking printout for slick formatting of output.
126 
127  @param[in] text The string that the printout is based upon. This function
128  will print out the string, ANSI-colored and enclosed in the symbol
129  for example:\n
130  <tt> ################# </tt>\n
131  <tt> ### I am cool ### </tt>\n
132  <tt> ################# </tt>
133  @param[in] sym The surrounding symbol\n
134  @param[in] bold Whether to use bold print
135 
136  @param[in] color The ANSI color:\n
137  1 red\n
138  2 green\n
139  3 yellow\n
140  4 blue\n
141  5 magenta\n
142  6 cyan\n
143  7 white
144 
145  @param[in] bottom The symbol for the bottom bar
146 
147  @param[in] minwidth The minimum width for the box, if the text is very short
148  then we insert the appropriate number of padding spaces
149 
150  @return bar The bottom bar is returned for the user to print later, e.g. to mark off a 'section'
151  """
152  def newlen(l):
153  return len(sub("\x1b\[[0-9;]*m","",line))
154  text = text.split('\n')
155  width = max(minwidth,max([newlen(line) for line in text]))
156  bar = ''.join(["=" for i in range(width + 6)])
157  bar = sym + bar + sym
158  #bar = ''.join([sym for i in range(width + 8)])
159  logger.info('\r'+bar + '\n')
160  for line in text:
161  if center:
162  padleft = ' ' * ((width - newlen(line)) / 2)
163  else:
164  padleft = ''
165  padright = ' '* (width - newlen(line) - len(padleft))
166  if ansi != None:
167  ansi = str(ansi)
168  logger.info("%s| \x1b[%sm%s " % (sym, ansi, padleft)+line+" %s\x1b[0m |%s\n" % (padright, sym))
169  elif color != None:
170  if color == 0 and bold:
171  logger.info("%s| \x1b[1m%s " % (sym, padleft) + line + " %s\x1b[0m |%s\n" % (padright, sym))
172  elif color == 0:
173  logger.info("%s| %s " % (sym, padleft)+line+" %s |%s\n" % (padright, sym))
174  else:
175  logger.info("%s| \x1b[%s9%im%s " % (sym, bold and "1;" or "", color, padleft)+line+" %s\x1b[0m |%s\n" % (padright, sym))
176  # if color == 3 or color == 7:
177  # print "%s\x1b[40m\x1b[%s9%im%s" % (''.join([sym for i in range(3)]), bold and "1;" or "", color, padleft),line,"%s\x1b[0m%s" % (padright, ''.join([sym for i in range(3)]))
178  # else:
179  # print "%s\x1b[%s9%im%s" % (''.join([sym for i in range(3)]), bold and "1;" or "", color, padleft),line,"%s\x1b[0m%s" % (padright, ''.join([sym for i in range(3)]))
180  else:
181  warn_press_key("Inappropriate use of printcool")
182  logger.info(bar + '\n')
183  botbar = ''.join([bottom for i in range(width + 8)])
184  return botbar + '\n'
185 
186 def printcool_dictionary(Dict,title="General options",bold=False,color=2,keywidth=25,topwidth=50,center=True,leftpad=0):
187  """See documentation for printcool; this is a nice way to print out keys/values in a dictionary.
188 
189  The keys in the dictionary are sorted before printing out.
190 
191  @param[in] dict The dictionary to be printed
192  @param[in] title The title of the printout
193  """
194  if Dict == None: return
195  bar = printcool(title,bold=bold,color=color,minwidth=topwidth,center=center)
196  def magic_string(str):
197  # This cryptic command returns a string with the number of characters specified as a variable. :P
198  # Useful for printing nice-looking dictionaries, i guess.
199  #print "\'%%-%is\' %% '%s'" % (keywidth,str.replace("'","\\'").replace('"','\\"'))
200  return eval("\'%%-%is\' %% '%s'" % (keywidth,str.replace("'","\\'").replace('"','\\"')))
201  if isinstance(Dict, OrderedDict):
202  logger.info('\n'.join([' '*leftpad + "%s %s " % (magic_string(str(key)),str(Dict[key])) for key in Dict if Dict[key] != None]))
203  else:
204  logger.info('\n'.join([' '*leftpad + "%s %s " % (magic_string(str(key)),str(Dict[key])) for key in sorted([i for i in Dict]) if Dict[key] != None]))
205  logger.info("\n%s" % bar)
206 
207 #===============================#
208 #| Math: Variable manipulation |#
209 #===============================#
210 def isint(word):
211  """ONLY matches integers! If you have a decimal point? None shall pass!
212 
213  @param[in] word String (for instance, '123', '153.0', '2.', '-354')
214  @return answer Boolean which specifies whether the string is an integer (only +/- sign followed by digits)
215 
216  """
217  return match('^[-+]?[0-9]+$',word)
218 
219 def isfloat(word):
220  """Matches ANY number; it can be a decimal, scientific notation, what have you
221  CAUTION - this will also match an integer.
223  @param[in] word String (for instance, '123', '153.0', '2.', '-354')
224  @return answer Boolean which specifies whether the string is any number
225 
226  """
227  return match('^[-+]?[0-9]*\.?[0-9]*([eEdD][-+]?[0-9]+)?$',word)
228 
229 def isdecimal(word):
230  """Matches things with a decimal only; see isint and isfloat.
231 
232  @param[in] word String (for instance, '123', '153.0', '2.', '-354')
233  @return answer Boolean which specifies whether the string is a number with a decimal point
234 
235  """
236  return isfloat(word) and not isint(word)
237 
238 def floatornan(word):
239  """Returns a big number if we encounter NaN.
240 
241  @param[in] word The string to be converted
242  @return answer The string converted to a float; if not a float, return 1e10
243  @todo I could use suggestions for making this better.
244  """
245  big = 1e10
246  if isfloat(word):
247  return float(word)
248  else:
249  logger.info("Setting %s to % .1e\n" % big)
250  return big
251 
252 def col(vec):
253  """
254  Given any list, array, or matrix, return a 1-column matrix.
255 
256  Input:
257  vec = The input vector that is to be made into a column
258 
259  Output:
260  A column matrix
261  """
262  return mat(array(vec).reshape(-1, 1))
263 
264 def row(vec):
265  """Given any list, array, or matrix, return a 1-row matrix.
266 
267  @param[in] vec The input vector that is to be made into a row
268 
269  @return answer A row matrix
270  """
271  return mat(array(vec).reshape(1, -1))
272 
273 def flat(vec):
274  """Given any list, array, or matrix, return a single-index array.
275 
276  @param[in] vec The data to be flattened
277  @return answer The flattened data
278  """
279  return array(vec).reshape(-1)
280 
281 #====================================#
282 #| Math: Vectors and linear algebra |#
283 #====================================#
284 def orthogonalize(vec1, vec2):
285  """Given two vectors vec1 and vec2, project out the component of vec1
286  that is along the vec2-direction.
287 
288  @param[in] vec1 The projectee (i.e. output is some modified version of vec1)
289  @param[in] vec2 The projector (component subtracted out from vec1 is parallel to this)
290  @return answer A copy of vec1 but with the vec2-component projected out.
291  """
292  v2u = vec2/norm(vec2)
293  return vec1 - v2u*dot(vec1, v2u)
294 
295 def invert_svd(X,thresh=1e-12):
296 
297  """
298 
299  Invert a matrix using singular value decomposition.
300  @param[in] X The matrix to be inverted
301  @param[in] thresh The SVD threshold; eigenvalues below this are not inverted but set to zero
302  @return Xt The inverted matrix
303 
304  """
305 
306  u,s,vh = svd(X, full_matrices=0)
307  uh = mat(transpose(u))
308  v = mat(transpose(vh))
309  si = s.copy()
310  for i in range(s.shape[0]):
311  if abs(s[i]) > thresh:
312  si[i] = 1./s[i]
313  else:
314  si[i] = 0.0
315  si = mat(diag(si))
316  Xt = v*si*uh
317  return Xt
318 
319 #==============================#
320 #| Linear least squares |#
321 #==============================#
322 def get_least_squares(x, y, w = None, thresh=1e-12):
323  """
324  @code
325  __ __
326  | |
327  | 1 (x0) (x0)^2 (x0)^3 |
328  | 1 (x1) (x1)^2 (x1)^3 |
329  | 1 (x2) (x2)^2 (x2)^3 |
330  | 1 (x3) (x3)^2 (x3)^3 |
331  | 1 (x4) (x4)^2 (x4)^3 |
332  |__ __|
333 
334  @endcode
335 
336  @param[in] X (2-D array) An array of X-values (see above)
337  @param[in] Y (array) An array of Y-values (only used in getting the least squares coefficients)
338  @param[in] w (array) An array of weights, hopefully normalized to one.
339  @param[out] Beta The least-squares coefficients
340  @param[out] Hat The hat matrix that takes linear combinations of data y-values to give fitted y-values (weights)
341  @param[out] yfit The fitted y-values
342  @param[out] MPPI The Moore-Penrose pseudoinverse (multiply by Y to get least-squares coefficients, multiply by dY/dk to get derivatives of least-squares coefficients)
343  """
344  # X is a 'tall' matrix.
345  X = mat(x)
346  Y = col(y)
347  n_x = X.shape[0]
348  n_fit = X.shape[1]
349  if n_fit > n_x:
350  logger.warning("Argh? It seems like this problem is underdetermined!\n")
351  # Build the weight matrix.
352  if w != None:
353  if len(w) != n_x:
354  warn_press_key("The weight array length (%i) must be the same as the number of 'X' data points (%i)!" % len(w), n_x)
355  w /= mean(w)
356  WH = mat(diag(w**0.5))
357  else:
358  WH = mat(eye(n_x))
359  # Make the Moore-Penrose Pseudoinverse.
360  # if n_fit == n_x:
361  # MPPI = np.linalg.inv(WH*X)
362  # else:
363  # This resembles the formula (X'WX)^-1 X' W^1/2
364  MPPI = np.linalg.pinv(WH*X)
365  Beta = MPPI * WH * Y
366  Hat = WH * X * MPPI
367  yfit = flat(Hat * Y)
368  # Return three things: the least-squares coefficients, the hat matrix (turns y into yfit), and yfit
369  # We could get these all from MPPI, but I might get confused later on, so might as well do it here :P
370  return Beta, Hat, yfit, MPPI
371 
372 #===========================================#
373 #| John's statisticalInefficiency function |#
374 #===========================================#
375 def statisticalInefficiency(A_n, B_n=None, fast=False, mintime=3, warn=True):
376 
377  """
378  Compute the (cross) statistical inefficiency of (two) timeseries.
379 
380  Notes
381  The same timeseries can be used for both A_n and B_n to get the autocorrelation statistical inefficiency.
382  The fast method described in Ref [1] is used to compute g.
383 
384  References
385  [1] J. D. Chodera, W. C. Swope, J. W. Pitera, C. Seok, and K. A. Dill. Use of the weighted
386  histogram analysis method for the analysis of simulated and parallel tempering simulations.
387  JCTC 3(1):26-41, 2007.
388 
389  Examples
390 
391  Compute statistical inefficiency of timeseries data with known correlation time.
392 
393  >>> import timeseries
394  >>> A_n = timeseries.generateCorrelatedTimeseries(N=100000, tau=5.0)
395  >>> g = statisticalInefficiency(A_n, fast=True)
396 
397  @param[in] A_n (required, numpy array) - A_n[n] is nth value of
398  timeseries A. Length is deduced from vector.
399 
400  @param[in] B_n (optional, numpy array) - B_n[n] is nth value of
401  timeseries B. Length is deduced from vector. If supplied, the
402  cross-correlation of timeseries A and B will be estimated instead of
403  the autocorrelation of timeseries A.
404 
405  @param[in] fast (optional, boolean) - if True, will use faster (but
406  less accurate) method to estimate correlation time, described in
407  Ref. [1] (default: False)
408 
409  @param[in] mintime (optional, int) - minimum amount of correlation
410  function to compute (default: 3) The algorithm terminates after
411  computing the correlation time out to mintime when the correlation
412  function furst goes negative. Note that this time may need to be
413  increased if there is a strong initial negative peak in the
414  correlation function.
415 
416  @return g The estimated statistical inefficiency (equal to 1 + 2
417  tau, where tau is the correlation time). We enforce g >= 1.0.
418 
419  """
420 
421  # Create numpy copies of input arguments.
422  A_n = array(A_n)
423  if B_n is not None:
424  B_n = array(B_n)
425  else:
426  B_n = array(A_n)
427  # Get the length of the timeseries.
428  N = A_n.size
429  # Be sure A_n and B_n have the same dimensions.
430  if(A_n.shape != B_n.shape):
431  raise ParameterError('A_n and B_n must have same dimensions.')
432  # Initialize statistical inefficiency estimate with uncorrelated value.
433  g = 1.0
434  # Compute mean of each timeseries.
435  mu_A = A_n.mean()
436  mu_B = B_n.mean()
437  # Make temporary copies of fluctuation from mean.
438  dA_n = A_n.astype(np.float64) - mu_A
439  dB_n = B_n.astype(np.float64) - mu_B
440  # Compute estimator of covariance of (A,B) using estimator that will ensure C(0) = 1.
441  sigma2_AB = (dA_n * dB_n).mean() # standard estimator to ensure C(0) = 1
442  # Trap the case where this covariance is zero, and we cannot proceed.
443  if(sigma2_AB == 0):
444  if warn:
445  logger.warning('Sample covariance sigma_AB^2 = 0 -- cannot compute statistical inefficiency')
446  return 1.0
447  # Accumulate the integrated correlation time by computing the normalized correlation time at
448  # increasing values of t. Stop accumulating if the correlation function goes negative, since
449  # this is unlikely to occur unless the correlation function has decayed to the point where it
450  # is dominated by noise and indistinguishable from zero.
451  t = 1
452  increment = 1
453  while (t < N-1):
454  # compute normalized fluctuation correlation function at time t
455  C = sum( dA_n[0:(N-t)]*dB_n[t:N] + dB_n[0:(N-t)]*dA_n[t:N] ) / (2.0 * float(N-t) * sigma2_AB)
456  # Terminate if the correlation function has crossed zero and we've computed the correlation
457  # function at least out to 'mintime'.
458  if (C <= 0.0) and (t > mintime):
459  break
460  # Accumulate contribution to the statistical inefficiency.
461  g += 2.0 * C * (1.0 - float(t)/float(N)) * float(increment)
462  # Increment t and the amount by which we increment t.
463  t += increment
464  # Increase the interval if "fast mode" is on.
465  if fast: increment += 1
466  # g must be at least unity
467  if (g < 1.0): g = 1.0
468  # Return the computed statistical inefficiency.
469  return g
470 
471 #==============================#
472 #| XML Pickle stuff |#
473 #==============================#
474 try:
475  from lxml import etree
476 except:
477  logger.warning("lxml module import failed (You can't use OpenMM or XML force fields)\n")
478 ## Pickle uses 'flags' to pickle and unpickle different variable types.
479 ## Here we use the letter 'x' to signify that the variable type is an XML file.
480 XMLFILE='x'
481 
482 class Pickler_LP(pickle.Pickler):
483  """ A subclass of the python Pickler that implements pickling of _ElementTree types. """
484  def __init__(self, file, protocol=None):
485  pickle.Pickler.__init__(self, file, protocol)
486  ## The element tree is saved as a string.
487  def save_etree(self, obj):
488  try:
489  ## Convert the element tree to string.
490  String = etree.tostring(obj)
491  ## The rest is copied from the Pickler class
492  if self.bin:
493  logger.error("self.bin is True, not sure what to do with myself\n")
494  raw_input()
495  else:
496  self.write(XMLFILE + repr(String) + '\n')
497  self.memoize(String)
498  except:
499  warn_once("Cannot save XML files; if using OpenMM install libxml2+libxslt+lxml. Otherwise don't worry.")
500  try:
501  self.dispatch[etree._ElementTree] = save_etree
502  except:
503  warn_once("Cannot save XML files; if using OpenMM install libxml2+libxslt+lxml. Otherwise don't worry.")
504 
505 class Unpickler_LP(pickle.Unpickler):
506  """ A subclass of the python Unpickler that implements unpickling of _ElementTree types. """
507  def __init__(self, file):
508  pickle.Unpickler.__init__(self, file)
509  def load_etree(self):
510  try:
511  ## This stuff is copied from the Unpickler class
512  rep = self.readline()[:-1]
513  for q in "\"'": # double or single quote
514  if rep.startswith(q):
515  if not rep.endswith(q):
516  raise ValueError, "insecure string pickle"
517  rep = rep[len(q):-len(q)]
518  break
519  else:
520  raise ValueError, "insecure string pickle"
521  ## The string is converted to an _ElementTree type before it is finally loaded.
522  self.append(etree.ElementTree(etree.fromstring(rep.decode("string-escape"))))
523  except:
524  warn_once("Cannot load XML files; if using OpenMM install libxml2+libxslt+lxml. Otherwise don't worry.")
525  try:
526  self.dispatch[XMLFILE] = load_etree
527  except:
528  warn_once("Cannot load XML files; if using OpenMM install libxml2+libxslt+lxml. Otherwise don't worry.")
529 
530 def lp_dump(obj, file, protocol=None):
531  """ Use this instead of pickle.dump for pickling anything that contains _ElementTree types. """
532  Pickler_LP(file, protocol).dump(obj)
533 
534 def lp_load(file):
535  """ Use this instead of pickle.load for unpickling anything that contains _ElementTree types. """
536  return Unpickler_LP(file).load()
537 
538 #==============================#
539 #| Work Queue stuff |#
540 #==============================#
541 try:
542  import work_queue
543 except:
544  logger.warning("Work Queue library import fail (You can't queue up jobs using Work Queue)\n")
545 
546 # Global variable corresponding to the Work Queue object
547 WORK_QUEUE = None
548 
549 # Global variable containing a mapping from target names to Work Queue task IDs
550 WQIDS = defaultdict(list)
551 
552 def getWorkQueue():
553  global WORK_QUEUE
554  return WORK_QUEUE
556 def getWQIds():
557  global WQIDS
558  return WQIDS
559 
560 def createWorkQueue(wq_port, debug=True):
561  global WORK_QUEUE
562  if debug:
563  work_queue.set_debug_flag('all')
564  WORK_QUEUE = work_queue.WorkQueue(port=wq_port, catalog=True, exclusive=False, shutdown=False)
565  WORK_QUEUE.tasks_failed = 0 # Counter for tasks that fail at the application level
566  WORK_QUEUE.specify_name('forcebalance')
567  #WORK_QUEUE.specify_keepalive_timeout(8640000)
568  WORK_QUEUE.specify_keepalive_interval(8640000)
569 
571  # Convenience function to destroy the Work Queue objects.
572  global WORK_QUEUE, WQIDS
573  WORK_QUEUE = None
574  WQIDS = defaultdict(list)
575 
576 def queue_up(wq, command, input_files, output_files, tgt=None, verbose=True):
577  """
578  Submit a job to the Work Queue.
579 
580  @param[in] wq (Work Queue Object)
581  @param[in] command (string) The command to run on the remote worker.
582  @param[in] input_files (list of files) A list of locations of the input files.
583  @param[in] output_files (list of files) A list of locations of the output files.
584  """
585  global WQIDS
586  task = work_queue.Task(command)
587  cwd = os.getcwd()
588  for f in input_files:
589  lf = os.path.join(cwd,f)
590  task.specify_input_file(lf,f,cache=False)
591  for f in output_files:
592  lf = os.path.join(cwd,f)
593  task.specify_output_file(lf,f,cache=False)
594  task.specify_algorithm(work_queue.WORK_QUEUE_SCHEDULE_FCFS)
595  task.specify_tag(command)
596  taskid = wq.submit(task)
597  if verbose:
598  logger.info("Submitting command '%s' to the Work Queue, taskid %i\n" % (command, taskid))
599  if tgt != None:
600  WQIDS[tgt.name].append(taskid)
601  else:
602  WQIDS["None"].append(taskid)
603 
604 def queue_up_src_dest(wq, command, input_files, output_files, tgt=None, verbose=True):
605  """
606  Submit a job to the Work Queue. This function is a bit fancier in that we can explicitly
607  specify where the input files come from, and where the output files go to.
608 
609  @param[in] wq (Work Queue Object)
610  @param[in] command (string) The command to run on the remote worker.
611  @param[in] input_files (list of 2-tuples) A list of local and
612  remote locations of the input files.
613  @param[in] output_files (list of 2-tuples) A list of local and
614  remote locations of the output files.
615  """
616  global WQIDS
617  task = work_queue.Task(command)
618  for f in input_files:
619  # print f[0], f[1]
620  task.specify_input_file(f[0],f[1],cache=False)
621  for f in output_files:
622  # print f[0], f[1]
623  task.specify_output_file(f[0],f[1],cache=False)
624  task.specify_algorithm(work_queue.WORK_QUEUE_SCHEDULE_FCFS)
625  task.specify_tag(command)
626  taskid = wq.submit(task)
627  if verbose:
628  logger.info("Submitting command '%s' to the Work Queue, taskid %i\n" % (command, taskid))
629  if tgt != None:
630  WQIDS[tgt.name].append(taskid)
631  else:
632  WQIDS["None"].append(taskid)
633 
634 def wq_wait1(wq, wait_time=10, wait_intvl=1, print_time=60, verbose=False):
635  """ This function waits ten seconds to see if a task in the Work Queue has finished. """
636  global WQIDS
637  if verbose: logger.info('---\n')
638  if wait_intvl >= wait_time:
639  wait_time = wait_intvl
640  numwaits = 1
641  else:
642  numwaits = wait_time / wait_intvl
643  for sec in range(numwaits):
644  task = wq.wait(wait_intvl)
645  if task:
646  exectime = task.cmd_execution_time/1000000
647  if verbose:
648  logger.info('A job has finished!\n')
649  logger.info('Job name = ' + task.tag + 'command = ' + task.command + '\n')
650  logger.info("status = " + task.status + '\n')
651  logger.info("return_status = " + task.return_status)
652  logger.info("result = " + task.result)
653  logger.info("host = " + task.hostname + '\n')
654  logger.info("execution time = " + exectime)
655  logger.info("total_bytes_transferred = " + task.total_bytes_transferred + '\n')
656  if task.result != 0:
657  oldid = task.id
658  oldhost = task.hostname
659  tgtname = "None"
660  for tnm in WQIDS:
661  if task.id in WQIDS[tnm]:
662  tgtname = tnm
663  WQIDS[tnm].remove(task.id)
664  taskid = wq.submit(task)
665  logger.warning("Command '%s' (task %i) failed on host %s (%i seconds), resubmitted: taskid %i\n" % (task.command, oldid, oldhost, exectime, taskid))
666  WQIDS[tgtname].append(taskid)
667  wq.tasks_failed += 1
668  else:
669  if exectime > print_time: # Assume that we're only interested in printing jobs that last longer than a minute.
670  logger.info("Command '%s' (task %i) finished successfully on host %s (%i seconds)\n" % (task.command, task.id, task.hostname, exectime))
671  for tnm in WQIDS:
672  if task.id in WQIDS[tnm]:
673  WQIDS[tnm].remove(task.id)
674  del task
675  try:
676  # Full workers were added with CCTools 4.0.1
677  nbusy = wq.stats.workers_busy + wq.stats.workers_full
678  except:
679  nbusy = wq.stats.workers_busy
680 
681  try:
682  Complete = wq.stats.total_tasks_complete - wq.tasks_failed
683  Total = wq.stats.total_tasks_dispatched - wq.tasks_failed
684  except:
685  logger.warning("wq object has no tasks_failed attribute, please use createWorkQueue() function.\n")
686  Complete = wq.stats.total_tasks_complete
687  Total = wq.stats.total_tasks_dispatched
688 
689  if verbose:
690  logger.info("Workers: %i init, %i ready, %i busy, %i total joined, %i total removed\n" \
691  % (wq.stats.workers_init, wq.stats.workers_ready, nbusy, wq.stats.total_workers_joined, wq.stats.total_workers_removed))
692  logger.info("Tasks: %i running, %i waiting, %i total dispatched, %i total complete\n" \
693  % (wq.stats.tasks_running,wq.stats.tasks_waiting,Total,Complete))
694  logger.info("Data: %i / %i kb sent/received\n" % (wq.stats.total_bytes_sent/1000, wq.stats.total_bytes_received/1024))
695  else:
696  logger.info("\r%s : %i/%i workers busy; %i/%i jobs complete\r" %\
697  (time.ctime(),
698  nbusy, (wq.stats.total_workers_joined - wq.stats.total_workers_removed),
699  Complete, Total))
700  if time.time() - wq_wait1.t0 > 900:
701  wq_wait1.t0 = time.time()
702  logger.info('\n')
703 wq_wait1.t0 = time.time()
704 
705 def wq_wait(wq, wait_time=10, wait_intvl=10, print_time=60, verbose=False):
706  """ This function waits until the work queue is completely empty. """
707  while not wq.empty():
708  wq_wait1(wq, wait_time=wait_time, wait_intvl=wait_intvl, print_time=print_time, verbose=verbose)
709 
710 #=====================================#
711 #| File and process management stuff |#
712 #=====================================#
713 def GoInto(Dir):
714  if os.path.exists(Dir):
715  if os.path.isdir(Dir): pass
716  else: raise Exception("Tried to create directory %s, it exists but isn't a directory" % newdir)
717  else:
718  os.makedirs(Dir)
719  os.chdir(Dir)
720 
721 def allsplit(Dir):
722  # Split a directory into all directories involved.
723  s = os.path.split(os.path.normpath(Dir))
724  if s[1] == '' or s[1] == '.' : return []
725  return allsplit(s[0]) + [s[1]]
726 
727 def Leave(Dir):
728  if os.path.split(os.getcwd())[1] != Dir:
729  raise Exception("Trying to leave directory %s, but we're actually in directory %s (check your code)" % (Dir,os.path.split(os.getcwd())[1]))
730  for i in range(len(allsplit(Dir))):
731  os.chdir('..')
732 
733 # Dictionary containing specific error messages for specific missing files or file patterns
734 specific_lst = [(['mdrun','grompp','trjconv','g_energy','g_traj'], "Make sure to install GROMACS and add it to your path (or set the gmxpath option)"),
735  (['force.mdin', 'stage.leap'], "This file is needed for setting up AMBER force matching targets"),
736  (['conf.pdb', 'mono.pdb'], "This file is needed for setting up OpenMM condensed phase property targets"),
737  (['liquid.xyz', 'liquid.key', 'mono.xyz', 'mono.key'], "This file is needed for setting up OpenMM condensed phase property targets"),
738  (['dynamic', 'analyze', 'minimize', 'testgrad', 'vibrate', 'optimize', 'polarize', 'superpose'], "Make sure to install TINKER and add it to your path (or set the tinkerpath option)"),
739  (['runcuda.sh', 'npt.py', 'npt_tinker.py'], "This file belongs in the ForceBalance source directory, not sure why it is missing"),
740  (['input.xyz'], "This file is needed for TINKER molecular property targets"),
741  (['.*key$', '.*xyz$'], "I am guessing this file is probably needed by TINKER"),
742  (['.*gro$', '.*top$', '.*itp$', '.*mdp$', '.*ndx$'], "I am guessing this file is probably needed by GROMACS")
743  ]
744 
745 # Build a dictionary mapping all of the keys in the above lists to their error messages
746 specific_dct = dict(list(itertools.chain(*[[(j,i[1]) for j in i[0]] for i in specific_lst])))
747 
748 def MissingFileInspection(fnm):
749  fnm = os.path.split(fnm)[1]
750  answer = ""
751  for key in specific_dct:
752  if answer == "":
753  answer += "\n"
754  if match(key, fnm):
755  answer += "%s\n" % specific_dct[key]
756  return answer
757 
758 def LinkFile(src, dest):
759  if os.path.exists(src):
760  if os.path.exists(dest):
761  if os.path.islink(dest): pass
762  else: raise Exception("Tried to create symbolic link %s to %s, destination exists but isn't a symbolic link" % (src, dest))
763  else:
764  os.symlink(src, dest)
765  else:
766  raise Exception("Tried to create symbolic link %s to %s, but source file doesn't exist%s" % (src,dest,MissingFileInspection(src)))
767 
768 
769 def CopyFile(src, dest):
770  if os.path.exists(src):
771  if os.path.exists(dest):
772  if os.path.islink(dest):
773  raise Exception("Tried to copy %s to %s, destination exists but it's a symbolic link" % (src, dest))
774  else:
775  shutil.copy2(src, dest)
776  else:
777  raise Exception("Tried to copy %s to %s, but source file doesn't exist%s" % (src,dest,MissingFileInspection(src)))
778 
779 def link_dir_contents(abssrcdir, absdestdir):
780  for fnm in os.listdir(abssrcdir):
781  srcfnm = os.path.join(abssrcdir, fnm)
782  destfnm = os.path.join(absdestdir, fnm)
783  if os.path.isfile(srcfnm):
784  if not os.path.exists(destfnm):
785  #print "Linking %s to %s" % (srcfnm, destfnm)
786  os.symlink(srcfnm, destfnm)
787 
788 def remove_if_exists(fnm):
789  """ Remove the file if it exists (doesn't return an error). """
790  if os.path.exists(fnm):
791  os.remove(fnm)
792 
793 def which(fnm):
794  # Get the location of a file. Works only on UNIX-like file systems.
795  try:
796  return os.path.split(os.popen('which %s 2> /dev/null' % fnm).readlines()[0].strip())[0]
797  except:
798  return ''
799 
800 def _exec(command, print_to_screen = False, outfnm = None, logfnm = None, stdin = "", print_command = True, copy_stderr = False, persist = False, expand_cr=False, **kwargs):
801  """Runs command line using subprocess, optionally returning stdout.
802  Options:
803  command (required) = Name of the command you want to execute
804  outfnm (optional) = Name of the output file name (overwritten if exists)
805  logfnm (optional) = Name of the log file name (appended if exists)
806  stdin (optional) = A string to be passed to stdin, as if it were typed (use newline character to mimic Enter key)
807  print_command = Whether to print the command.
808  copy_stderr = Whether to copy the stderr stream to the stdout stream; useful for GROMACS which prints out everything to stderr (argh.)
809  expand_cr = Whether to expand carriage returns into newlines (useful for GROMACS mdrun).
810  persist = Continue execution even if the command gives a nonzero return code.
811  """
812  # Dictionary of options to be passed to the Popen object.
813  cmd_options={'shell':(type(command) is str), 'stdin':PIPE, 'stdout':PIPE, 'stderr':PIPE, 'universal_newlines':expand_cr}
815  # "write to file" : Function for writing some characters to the log and/or output files.
816  def wtf(out):
817  if logfnm != None:
818  with open(logfnm,'a+') as f:
819  f.write(out)
820  f.flush()
821  if outfnm != None:
822  with open(outfnm,'w+' if wtf.first else 'a+') as f:
823  f.write(out)
824  f.flush()
825  wtf.first = False
826  wtf.first = True
827 
828  # Preserve backwards compatibility; sometimes None gets passed to stdin.
829  if stdin == None: stdin = ""
830 
831  if print_command:
832  logger.info("Executing process: \x1b[92m%-50s\x1b[0m%s%s%s\n" % (' '.join(command) if type(command) is list else command,
833  " Output: %s" % outfnm if outfnm != None else "",
834  " Append: %s" % logfnm if logfnm != None else "",
835  (" Stdin: %s" % stdin.replace('\n','\\n')) if stdin else ""))
836  wtf("Executing process: %s%s\n" % (command, (" Stdin: %s" % stdin.replace('\n','\\n')) if stdin else ""))
837 
838  cmd_options.update(kwargs)
839  p = subprocess.Popen(command, **cmd_options)
840 
841  stdout = ""
842  stderr = ""
843 
844  p.stdin.write(stdin)
845  p.stdin.close()
846 
847  while True:
848  reads = [p.stdout.fileno(), p.stderr.fileno()]
849  ret = select(reads, [], [])
850  for fd in ret[0]:
851  if fd == p.stdout.fileno():
852  read = p.stdout.readline()
853  if print_to_screen: sys.stdout.write(read)
854  stdout += read
855  wtf(read)
856  if fd == p.stderr.fileno():
857  read = p.stderr.readline()
858  if print_to_screen: sys.stderr.write(read)
859  stderr += read
860  if copy_stderr:
861  stdout += read
862  wtf(read)
863  if not read:
864  break
865 
866  p.wait()
867 
868  if p.returncode != 0:
869  if stderr:
870  logger.warning("Received an error message:\n")
871  logger.warning("\n[====] \x1b[91mError Message\x1b[0m [====]\n")
872  logger.warning(stderr)
873  logger.warning("[====] \x1b[91mEnd o'Message\x1b[0m [====]\n")
874  if persist:
875  logger.info("%s gave a return code of %i (it may have crashed) -- carrying on\n" % (command, p.returncode))
876  else:
877  # This code (commented out) would not throw an exception, but instead exit with the returncode of the crashed program.
878  # sys.stderr.write("\x1b[1;94m%s\x1b[0m gave a return code of %i (\x1b[91mit may have crashed\x1b[0m)\n" % (command, p.returncode))
879  # sys.exit(p.returncode)
880  raise Exception("\x1b[1;94m%s\x1b[0m gave a return code of %i (\x1b[91mit may have crashed\x1b[0m)\n" % (command, p.returncode))
881 
882  # Return the output in the form of a list of lines, so we can loop over it using "for line in output".
883  return stdout.split('\n')
884 
885 def warn_press_key(warning, timeout=10):
886  if type(warning) is str:
887  logger.warning(warning + '\n')
888  elif type(warning) is list:
889  for line in warning:
890  logger.warning(line + '\n')
891  else:
892  logger.warning("You're not supposed to pass me a variable of this type: " + str(type(warning)))
893  if sys.stdin.isatty():
894  logger.warning("\x1b[1;91mPress Enter or wait %i seconds (I assume no responsibility for what happens after this!)\x1b[0m\n" % timeout)
895  try: rlist, wlist, xlist = select([sys.stdin], [], [], timeout)
896  except: pass
897 
898 def warn_once(warning, warnhash = None):
899  """ Prints a warning but will only do so once in a given run. """
900  if warnhash == None:
901  warnhash = warning
902  if warnhash in warn_once.already:
903  return
904  warn_once.already.add(warnhash)
905  if type(warning) is str:
906  logger.info(warning + '\n')
907  elif type(warning) is list:
908  for line in warning:
909  logger.info(line + '\n')
910 warn_once.already = set()
912 #=========================================#
913 #| Development stuff (not commonly used) |#
914 #=========================================#
915 def concurrent_map(func, data):
916  """
917  Similar to the bultin function map(). But spawn a thread for each argument
918  and apply `func` concurrently.
919 
920  Note: unlike map(), we cannot take an iterable argument. `data` should be an
921  indexable sequence.
922  """
923 
924  N = len(data)
925  result = [None] * N
927  # wrapper to dispose the result in the right slot
928  def task_wrapper(i):
929  result[i] = func(data[i])
930 
931  threads = [threading.Thread(target=task_wrapper, args=(i,)) for i in xrange(N)]
932  for t in threads:
933  t.start()
934  for t in threads:
935  t.join()
936 
937  return result
938 
939 
940 def multiopen(arg):
941  """
942  This function be given any of several variable types
943  (single file name, file object, or list of lines, or a list of the above)
944  and give a list of files:
945 
946  [file1, file2, file3 ... ]
947 
948  each of which can then be iterated over:
949 
950  [[file1_line1, file1_line2 ... ], [file2_line1, file2_line2 ... ]]
951  """
952  if type(arg) == str:
953  # A single file name
954  fins = [open(arg)]
955  elif type(arg) == file:
956  # A file object
957  fins = [[arg]]
958  elif type(arg) == list:
959  if all([type(l) == str for l in arg]):
960  # A list of lines (as in, open(file).readlines()) is expected to end with \n on most of the lines.
961  if any([match("^.*\n$",l) for l in arg]):
962  fins = [[arg]]
963  # In contrast, a list of file names doesn't have \n characters.
964  else:
965  fins = [open(l) for l in arg]
966  elif all([type(l) == file or type(l) == list for l in arg]):
967  fins = arg
968  else:
969  logger.info("What did you give this program as input?\n")
970  logger.info(str(arg) + '\n')
971  exit(1)
972  else:
973  logger.info("What did you give this program as input?\n")
974  logger.info(str(arg) + '\n')
975  exit(1)
976  return fins
A subclass of the python Pickler that implements pickling of _ElementTree types.
Definition: nifty.py:500
def multiopen
This function be given any of several variable types (single file name, file object, or list of lines, or a list of the above) and give a list of files:
Definition: nifty.py:980
def pmat2d
Printout of a 2-D matrix.
Definition: nifty.py:66
def orthogonalize
Given two vectors vec1 and vec2, project out the component of vec1 that is along the vec2-direction...
Definition: nifty.py:304
def createWorkQueue
Definition: nifty.py:580
def destroyWorkQueue
Definition: nifty.py:590
def printcool_dictionary
See documentation for printcool; this is a nice way to print out keys/values in a dictionary...
Definition: nifty.py:198
def getWorkQueue
Definition: nifty.py:572
def encode
Definition: nifty.py:73
def queue_up
Submit a job to the Work Queue.
Definition: nifty.py:605
def remove_if_exists
Remove the file if it exists (doesn&#39;t return an error).
Definition: nifty.py:814
def GoInto
Definition: nifty.py:737
A subclass of the python Unpickler that implements unpickling of _ElementTree types.
Definition: nifty.py:524
def col
Given any list, array, or matrix, return a 1-column matrix.
Definition: nifty.py:271
def isfloat
Matches ANY number; it can be a decimal, scientific notation, what have you CAUTION - this will also ...
Definition: nifty.py:233
def wq_wait
This function waits until the work queue is completely empty.
Definition: nifty.py:730
def statisticalInefficiency
Compute the (cross) statistical inefficiency of (two) timeseries.
Definition: nifty.py:434
def allsplit
Definition: nifty.py:745
def flat
Given any list, array, or matrix, return a single-index array.
Definition: nifty.py:290
def which
Definition: nifty.py:818
def isdecimal
Matches things with a decimal only; see isint and isfloat.
Definition: nifty.py:243
def invert_svd
Invert a matrix using singular value decomposition.
Definition: nifty.py:317
def LinkFile
Definition: nifty.py:782
def wq_wait1
This function waits ten seconds to see if a task in the Work Queue has finished.
Definition: nifty.py:658
def lp_load
Use this instead of pickle.load for unpickling anything that contains _ElementTree types...
Definition: nifty.py:555
def uncommadash
Definition: nifty.py:92
def queue_up_src_dest
Submit a job to the Work Queue.
Definition: nifty.py:637
def getWQIds
Definition: nifty.py:576
def CopyFile
Definition: nifty.py:793
def warn_press_key
Definition: nifty.py:911
def segments
Definition: nifty.py:76
def MissingFileInspection
Definition: nifty.py:772
def Leave
Definition: nifty.py:751
def printcool
Cool-looking printout for slick formatting of output.
Definition: nifty.py:155
def pvec1d
Printout of a 1-D vector.
Definition: nifty.py:55
def link_dir_contents
Definition: nifty.py:803
def concurrent_map
Similar to the bultin function map().
Definition: nifty.py:950
def isint
ONLY matches integers! If you have a decimal point? None shall pass!
Definition: nifty.py:222
def lp_dump
Use this instead of pickle.dump for pickling anything that contains _ElementTree types.
Definition: nifty.py:550
def get_least_squares
Definition: nifty.py:358
def row
Given any list, array, or matrix, return a 1-row matrix.
Definition: nifty.py:281
def floatornan
Returns a big number if we encounter NaN.
Definition: nifty.py:253
def warn_once
Prints a warning but will only do so once in a given run.
Definition: nifty.py:926
def commadash
Definition: nifty.py:82