Source code for kapteyn.tabarray

#----------------------------------------------------------------------
# FILE:    tabarray.py
# PURPOSE: This module provides a class which allows the user to read,
#          write and manipulate simple table-like structures.
#          It is based on NumPy and the table-reading part has been
#          optimized for speed.  When the flexibility of SciPy's
#          read_array() function is not needed, Tabarray can
#          be considered as an alternative. 
# AUTHOR:  J.P.Terlouw, University of Groningen, The Netherlands
# DATE:    September 29, 2008
# UPDATE:  October 9, 2008
# VERSION: 1.0
#
# (C) University of Groningen
# Kapteyn Astronomical Institute
# Groningen, The Netherlands
#----------------------------------------------------------------------
"""
================
Module tabarray
================

.. sectionauthor:: Hans Terlouw <gipsy@astro.rug.nl>

.. highlight:: python
   :linenothreshold: 1000

Module tabarray provides a class which allows the user to read,
write and manipulate simple table-like structures.
It is based on NumPy and the table-reading part has been
optimized for speed.  When the flexibility of SciPy's
read_array() function is not needed, Tabarray can
be considered as an alternative.

Class tabarray
--------------

.. autoclass:: tabarray(source[, comchar='#!', sepchar=' \\\\t', lines=None, bad=None, segsep=None])
   :members:


Functions
---------

.. autofunction:: readColumns

.. autofunction:: writeColumns
"""

import numpy, string
from collections.abc import Sequence
from kapteyn.ascarray import ascarray

def isSequenceType(obj):
        return isinstance(obj, Sequence)

[docs]class tabarray(numpy.ndarray):

   """
Tabarray is a subclass of NumPy's ndarray.  It provides all of
ndarray's functionality as well as some extra methods and attributes. 

:param source:
   the object from which the tabarray object is constructed.  It can be a
   2-dimensional NumPy array, a list or tuple containing the table columns
   as 1-dimensional NumPy arrays, or a string with the name of a text file
   containing the table.  Only in the latter case the other arguments are
   meaningful.
:param comchar:
   a string with characters which are used to designate comments in the
   input file.  The occurrence of any of these characters on a line causes
   the rest of the line to be ignored.  Empty lines and lines containing
   only a comment are also ignored.
:param sepchar:
   a string containing the
   column separation characters to be used.  Columns are separated by any
   combination of these characters. 
:param lines:
   a two-element tuple or list specifying a range of lines
   to be read.  Line numbers are counted from one and the range is
   inclusive.  So (1,10) specifies the first 10 lines of a file.  Comment
   lines are included in the count.  If any element of the tuple or list is
   zero, this limit is ignored.  So (1,0) specifies the whole file, just
   like the default None. 
:param bad:
   is a number to be substituted for any field which cannot be
   decoded as a number.  The default None causes a ValueError exception to
   be raised in such cases.
:param segsep:
   a string containing segment separation characters. If any of
   these characters is present in a comment block, this comment block
   is taken as the end of the current segment. The default None indicates
   that every comment block will separate segments.
:raises:
   :exc:`IOError`, when the file cannot be opened.

   :exc:`IndexError`, when a line with an inconsistent number of fields is
   encountered in the input file.

   :exc:`ValueError`: when a field cannot be decoded as a number and
   no alternative value was specified.

**Attributes:**

.. attribute:: nrows

   the number of rows

.. attribute:: ncols

   the number of columns

.. attribute:: segments

   a list with slice objects which can be used to address the different
   segments from the table. Segments are parts of the table which
   are separated by comment blocks which meet the conditions specified
   by argument *segsep*. The following example illustrates how a program
   can iterate over all segments::

      from kapteyn.tabarray import tabarray

      coasts = tabarray('world.txt')

      for segment in coasts.segments:
         coast = coasts[segment]


**Methods:**

"""

   def __new__(cls, source, comchar='#!', sepchar=' \t', lines=None,
               bad=None, segsep=None):
      if isinstance(source, numpy.ndarray):
         return source.view(cls)
      elif isinstance(source, tuple) or isinstance(source, list):
         return numpy.column_stack(source).view(tabarray)
      else:
         arrayspec = ascarray(source, comchar, sepchar, lines, bad, segsep)
         array = arrayspec[0].view(cls)
         array.segments = arrayspec[1] 
         return array

   def __init__(self, source, comchar=None, sepchar=None, lines=None,
               bad=None, segsep=None):
      self.nrows, self.ncols = self.shape
      try:
         self.segments
      except:
         self.segments = [slice(0,self.nrows)]

   def __array_finalize__(self, obj):
      try:
         self.nrows, self.ncols = self.shape
      except:
         pass
      try:
         self.segments = [slice(0,self.nrows)]
      except:
         pass

[docs]   def columns(self, cols=None):
      """
:param cols:
   a tuple or list with the numbers (zero-relative) of the columns
   to be extracted.
:returns: a NumPy array.

Extract specified columns from a tabarray and return an array containing
these columns.  Cols is a tuple or list with the column numbers.  As the
first index of the resulting array is the column number, multiple
assignment is possible.  E.g., ``x,y = t.columns((2,3))`` delivers columns 2
and 3 in variables x and y.  Default: return all columns.

"""
      if cols is None:
         return self.T.view(numpy.ndarray)
      else:
         return self.take(cols, 1).T.view(numpy.ndarray)

[docs]   def rows(self, rows=None):
      """
:param rows:
   a tuple or list containing the numbers (zero-relative) of the rows
   to be extracted.
:return: a new tabarray.

This method extracts specified rows from a tabarray and returns a new tabarray. 
Rows is a tuple or list containing the row numbers to be extracted. 
Normal Python indexing applies, so (0, -1) specifies the first and the
last row.  Default: return whole tabarray. 
"""
      if rows is None:
         return self
      else:
         return self.take(rows, 0)

[docs]   def writeto(self, filename, rows=None, cols=None, comment=[], format=[]):
      """
Write the contents of a tabarray to a file.

:param filename:
   the name of the file to be written.
:param rows:
   a tuple or list with a selection of the rows (zero-relative) te be written.
   Default: all rows. 
:param columns:
   a tuple or list with a selection of the columns (zero-relative)
   to be written. Default: all columns. 
:param comment:
   a list with text strings which will be inserted as comments in the
   output file.  These comments will be prefixed by the hash character (#).
:param format:
   a list with format strings for formatting the output, one element per
   column, e.g., ``['%5d', ' %10.7f', ' %g']``. 



"""
      arrout = self.rows(rows)
      if cols is not None:
         arrout = arrout.take(cols, 1)
      f = open(filename, 'w')
      for line in comment:
         f.write('# %s \n' % line)
      columns = list(range(arrout.ncols))
      if not format:
         format = ['%10g ']*arrout.ncols
      for line in range(arrout.nrows):
         outline = ' '
         for column in columns:
            outline += format[column] % arrout[line, column]
         outline = outline.rstrip() + '\n'
         f.write(outline)
      f.close()

[docs]def readColumns(filename, comment='!#', cols='all', sepchar=', \t',
                rows=None, lines=None, bad=0.0,
                rowslice=(None,), colslice=(None,)):
   """
TableIO-compatible function for directly extracting table data from a file.

:param filename:
   a string with the name of a text file containing the table.
:param comment:
   a string with characters which are used to designate comments in the
   input file.  The occurrence of any of these characters on a line causes 
   the rest of the line to be ignored.  Empty lines and lines containing
   only a comment are also ignored. 
:param cols:
   a tuple or list with the column numbers or a scalar with one column number.
:param sepchar:
   a string containing the column separation characters to be used. 
   Columns are separated by any combination of these characters. 
:param rows:
   a tuple or list containing the row numbers to be extracted.
:param lines:
   a two-element tuple or list specifying a range of lines to be read. 
   Line numbers are counted from one and the range is inclusive.  So (1,10)
   specifies the first 10 lines of a file.  Comment lines are included in
   the count.  If any element of the tuple or list is zero, this limit is
   ignored.  So (1,0) specifies the whole file, just like the default None. 
:param bad:
   a number to be substituted for any field which cannot be decoded
   as a number.
:param rowslice:
   a tuple containing a Python slice indicating which rows should be selected.
   If this argument is used in combination with the argument *rows*, the latter
   should be expressed in terms of the *new* row numbers after slicing.
   Example: ``rowslice=(10, None)`` selects all rows, beginning with the
   eleventh (the first row has number 0) and ``rowslice=(10, 13)`` selects
   row numbers 10, 11 and 12.
:param colslice:
   a tuple containing a Python slice indicating which columns should be
   selected.
   If this argument is used in combination with the argument *cols*, the latter
   should be expressed in terms of the *new* column numbers after slicing.
   Selection is analogous to *rowslice*.

"""
   if cols=='all':
      cols = None
   if cols is not None and not isSequenceType(cols):
      cols = [cols]
   return tabarray(filename, comment, sepchar=sepchar, lines=lines, bad=bad
                  )[slice(*rowslice),slice(*colslice)].rows(rows).columns(cols)

[docs]def writeColumns(filename, list, comment=[]):
   """
TableIO-compatible function for directly writing table data to a file.

:param filename:
   the name of the file to be written;
:param list:
   a list containing the columns to be written.
:param comment:
   a list with text strings which will be inserted as comments in the
   output file. These comments will be prefixed by the hash character (#).
"""
   tabarray(list).writeto(filename, comment=comment)

__version__ = '1.3'
__docformat__ = 'restructuredtext'
Source code for kapteyn.tabarray

Table of Contents

Search