__all__ = ['savetxt', 'loadtxt', 'genfromtxt', 'ndfromtxt', 'mafromtxt',
'recfromtxt', 'recfromcsv', 'load', 'loads', 'save', 'savez',
'savez_compressed', 'packbits', 'unpackbits', 'fromregex', 'DataSource']
import numpy as np
import format
import sys
import os
import re
import sys
import itertools
import warnings
import weakref
from operator import itemgetter
from cPickle import load as _cload, loads
from _datasource import DataSource
from _compiled_base import packbits, unpackbits
from _iotools import LineSplitter, NameValidator, StringConverter, \
ConverterError, ConverterLockError, ConversionWarning, \
_is_string_like, has_nested_fields, flatten_dtype, \
easy_dtype, _bytes_to_name
from numpy.compat import asbytes, asstr, asbytes_nested, bytes
if sys.version_info[0] >= 3:
from io import BytesIO
else:
from cStringIO import StringIO as BytesIO
_string_like = _is_string_like
def seek_gzip_factory(f):
"""Use this factory to produce the class so that we can do a lazy
import on gzip.
"""
import gzip
class GzipFile(gzip.GzipFile):
def seek(self, offset, whence=0):
# figure out new position (we can only seek forwards)
if whence == 1:
offset = self.offset + offset
if whence not in [0, 1]:
raise IOError("Illegal argument")
if offset < self.offset:
# for negative seek, rewind and do positive seek
self.rewind()
count = offset - self.offset
for i in range(count // 1024):
self.read(1024)
self.read(count % 1024)
def tell(self):
return self.offset
if isinstance(f, str):
f = GzipFile(f)
elif isinstance(f, gzip.GzipFile):
# cast to our GzipFile if its already a gzip.GzipFile
try:
name = f.name
except AttributeError:
# Backward compatibility for <= 2.5
name = f.filename
mode = f.mode
f = GzipFile(fileobj=f.fileobj, filename=name)
f.mode = mode
return f
class BagObj(object):
"""
BagObj(obj)
Convert attribute look-ups to getitems on the object passed in.
Parameters
----------
obj : class instance
Object on which attribute look-up is performed.
Examples
--------
>>> from numpy.lib.npyio import BagObj as BO
>>> class BagDemo(object):
... def __getitem__(self, key): # An instance of BagObj(BagDemo)
... # will call this method when any
... # attribute look-up is required
... result = "Doesn't matter what you want, "
... return result + "you're gonna get this"
...
>>> demo_obj = BagDemo()
>>> bagobj = BO(demo_obj)
>>> bagobj.hello_there
"Doesn't matter what you want, you're gonna get this"
>>> bagobj.I_can_be_anything
"Doesn't matter what you want, you're gonna get this"
"""
def __init__(self, obj):
# Use weakref to make NpzFile objects collectable by refcount
self._obj = weakref.proxy(obj)
def __getattribute__(self, key):
try:
return object.__getattribute__(self, '_obj')[key]
except KeyError:
raise AttributeError(key)
def zipfile_factory(*args, **kwargs):
import zipfile
if sys.version_info >= (2, 5):
kwargs['allowZip64'] = True
return zipfile.ZipFile(*args, **kwargs)
class NpzFile(object):
"""
NpzFile(fid)
A dictionary-like object with lazy-loading of files in the zipped
archive provided on construction.
`NpzFile` is used to load files in the NumPy ``.npz`` data archive
format. It assumes that files in the archive have a ".npy" extension,
other files are ignored.
The arrays and file strings are lazily loaded on either
getitem access using ``obj['key']`` or attribute lookup using
``obj.f.key``. A list of all files (without ".npy" extensions) can
be obtained with ``obj.files`` and the ZipFile object itself using
``obj.zip``.
Attributes
----------
files : list of str
List of all files in the archive with a ".npy" extension.
zip : ZipFile instance
The ZipFile object initialized with the zipped archive.
f : BagObj instance
An object on which attribute can be performed as an alternative
to getitem access on the `NpzFile` instance itself.
Parameters
----------
fid : file or str
The zipped archive to open. This is either a file-like object
or a string containing the path to the archive.
own_fid : bool, optional
Whether NpzFile should close the file handle.
Requires that `fid` is a file-like object.
Examples
--------
>>> from tempfile import TemporaryFile
>>> outfile = TemporaryFile()
>>> x = np.arange(10)
>>> y = np.sin(x)
>>> np.savez(outfile, x=x, y=y)
>>> outfile.seek(0)
>>> npz = np.load(outfile)
>>> isinstance(npz, np.lib.io.NpzFile)
True
>>> npz.files
['y', 'x']
>>> npz['x'] # getitem access
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
>>> npz.f.x # attribute lookup
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
"""
def __init__(self, fid, own_fid=False):
# Import is postponed to here since zipfile depends on gzip, an optional
# component of the so-called standard library.
_zip = zipfile_factory(fid)
self._files = _zip.namelist()
self.files = []
for x in self._files:
if x.endswith('.npy'):
self.files.append(x[:-4])
else:
self.files.append(x)
self.zip = _zip
self.f = BagObj(self)
if own_fid:
self.fid = fid
else:
self.fid = None
def __enter__(self):
return self
def __exit__(self, exc_type, exc_value, traceback):
self.close()
def close(self):
"""
Close the file.
"""
if self.zip is not None:
self.zip.close()
self.zip = None
if self.fid is not None:
self.fid.close()
self.fid = None
self.f = None # break reference cycle
def __del__(self):
self.close()
def __getitem__(self, key):
# FIXME: This seems like it will copy strings around
# more than is strictly necessary. The zipfile
# will read the string and then
# the format.read_array will copy the string
# to another place in memory.
# It would be better if the zipfile could read
# (or at least uncompress) the data
# directly into the array memory.
member = 0
if key in self._files:
member = 1
elif key in self.files:
member = 1
key += '.npy'
if member:
bytes = self.zip.read(key)
if bytes.startswith(format.MAGIC_PREFIX):
value = BytesIO(bytes)
return format.read_array(value)
else:
return bytes
else:
raise KeyError("%s is not a file in the archive" % key)
def __iter__(self):
return iter(self.files)
def items(self):
"""
Return a list of tuples, with each tuple (filename, array in file).
"""
return [(f, self[f]) for f in self.files]
def iteritems(self):
"""Generator that returns tuples (filename, array in file)."""
for f in self.files:
yield (f, self[f])
def keys(self):
"""Return files in the archive with a ".npy" extension."""
return self.files
def iterkeys(self):
"""Return an iterator over the files in the archive."""
return self.__iter__()
def __contains__(self, key):
return self.files.__contains__(key)
def load(file, mmap_mode=None):
"""
Load an array(s) or pickled objects from .npy, .npz, or pickled files.
Parameters
----------
file : file-like object or string
The file to read. It must support ``seek()`` and ``read()`` methods.
If the filename extension is ``.gz``, the file is first decompressed.
mmap_mode: {
None, 'r+', 'r', 'w+', 'c'
}, optional
If not None, then memory-map the file, using the given mode
(see `numpy.memmap` for a detailed description of the modes).
A memory-mapped array is kept on disk. However, it can be accessed
and sliced like any ndarray. Memory mapping is especially useful for
accessing small fragments of large files without reading the entire
file into memory.
Returns
-------
result : array, tuple, dict, etc.
Data stored in the file. For '.npz' files, the returned instance of
NpzFile class must be closed to avoid leaking file descriptors.
Raises
------
IOError
If the input file does not exist or cannot be read.
See Also
--------
save, savez, loadtxt
memmap : Create a memory-map to an array stored in a file on disk.
Notes
-----
- If the file contains pickle data, then whatever object is stored
in the pickle is returned.
- If the file is a ``.npy`` file, then a single array is returned.
- If the file is a ``.npz`` file, then a dictionary-like object is
returned, containing ``{
filename: array
}`` key-value pairs, one for
each file in the archive.
- If the file is a ``.npz`` file, the returned value supports the context
manager protocol in a similar fashion to the open function::
with load('foo.npz') as data:
a = data['a']
The underlyling file descriptor is closed when exiting the 'with' block.
Examples
--------
Store data to disk, and load it again:
>>> np.save('/tmp/123', np.array([[1, 2, 3], [4, 5, 6]]))
>>> np.load('/tmp/123.npy')
array([[1, 2, 3],
[4, 5, 6]])
Store compressed data to disk, and load it again:
>>> a=np.arr
...
...
(文件超长,未完全显示,请下载后阅读剩余部分)
...
展开> <收缩