Commit 7612a462 authored by W. Trevor King's avatar W. Trevor King
Browse files

Fixes to string/byte handling for Python 3 compatibility.

I don't know if encoding information is embedded in the IGOR files or
not.  Currently the stock parser just leaves everything it reads in in
bytes.  For compatibility, the igorpy module attempts to convert those
byte strings to Unicode, but it just assumes that the encoding used in
the file matches the locale or default encoding used by your system.
This could be a portability issue.  Until commit

  commit fe7006e3
  Author: W. Trevor King <wking@tremily.us>
  Date:   Sat Jul 21 07:50:09 2012 -0400

    Replace igor.igorpy parsing with translations from igor.packed.load.

The igorpy parser used sys.getfilesystemencoding() to guess the
encoding, but that encoding is actually used to encode file names, not
file contents.  locale.getpreferredencoding is a better guess, but
it's still just a guess.
parent 4776df19
......@@ -70,10 +70,14 @@ class StaticStringField (_DynamicField):
wave_data[self.name] = d
def _normalize_string(self, d):
if hasattr(d, 'tostring'):
if isinstance(d, bytes):
pass
elif hasattr(d, 'tobytes'):
d = d.tobytes()
elif hasattr(d, 'tostring'): # Python 2 compatibility
d = d.tostring()
else:
d = ''.join(d)
d = b''.join(d)
if self._array_size_field:
start = 0
strings = []
......@@ -449,7 +453,7 @@ class DynamicLabelsField (DynamicStringField):
wave_structure = parents[-1]
wave_data = self._get_structure_data(parents, data, wave_structure)
bin_header = wave_data['bin_header']
d = ''.join(wave_data[self.name])
d = b''.join(wave_data[self.name])
dim_labels = []
start = 0
for size in bin_header[self._size_field]:
......@@ -457,7 +461,7 @@ class DynamicLabelsField (DynamicStringField):
if end > start:
dim_data = d[start:end]
# split null-delimited strings
labels = dim_data.split(chr(0))
labels = dim_data.split(b'\x00')
start = end
else:
labels = []
......@@ -494,10 +498,10 @@ class DynamicStringIndicesDataField (_DynamicField):
for i,offset in enumerate(wave_data['sIndices']):
if offset > start:
chars = wdata[start:offset]
strings.append(''.join(chars))
strings.append(b''.join(chars))
start = offset
elif offset == start:
strings.append('')
strings.append(b'')
else:
raise ValueError((offset, wave_data['sIndices']))
wdata = _numpy.array(strings)
......
......@@ -15,6 +15,7 @@ PTN003.ifn and TN003.ifn.
"""
from __future__ import absolute_import
import io as _io
import locale as _locale
import re as _re
import sys as _sys
......@@ -37,6 +38,7 @@ from .record.variables import VariablesRecord as _VariablesRecord
__version__='0.10'
ENCODING = _locale.getpreferredencoding() or _sys.getdefaultencoding()
PYKEYWORDS = set(('and','as','assert','break','class','continue',
'def','elif','else','except','exec','finally',
'for','global','if','import','in','is','lambda',
......@@ -84,7 +86,7 @@ class Wave(IgorObject):
"""
def __init__(self, record):
d = record.wave['wave']
self.name = d['wave_header']['bname']
self.name = d['wave_header']['bname'].decode(ENCODING)
self.data = d['wData']
self.fs = d['wave_header']['fsValid']
self.fstop = d['wave_header']['topFullScale']
......@@ -100,8 +102,8 @@ class Wave(IgorObject):
sfA = d['wave_header']['sfA']
sfB = d['wave_header']['sfB']
# TODO find example with multiple data units
self.data_units = [d['data_units']]
self.axis_units = [d['dimension_units']]
self.data_units = [d['data_units'].decode(ENCODING)]
self.axis_units = [d['dimension_units'].decode(ENCODING)]
self.data_units.extend(['']*(_MAXDIMS-len(self.data_units)))
self.data_units = tuple(self.data_units)
self.axis_units.extend(['']*(_MAXDIMS-len(self.axis_units)))
......@@ -257,7 +259,8 @@ def _convert(packed_experiment, ignore_unknown=True):
r = None
if isinstance(record, _FolderStartRecord):
path = stack[-1].path+[record.null_terminated_text]
path = stack[-1].path + [
record.null_terminated_text.decode(ENCODING)]
folder = Folder(path)
stack[-1].append(folder)
stack.append(folder)
......
......@@ -42,5 +42,6 @@ class UnusedRecord (Record):
class TextRecord (Record):
def __init__(self, *args, **kwargs):
super(TextRecord, self).__init__(*args, **kwargs)
self.text = str(self.data).replace('\r\n', '\n').replace('\r', '\n')
self.null_terminated_text = self.text.split('\x00', 1)[0]
self.text = bytes(self.data).replace(
b'\r\n', b'\n').replace(b'\r', b'\n')
self.null_terminated_text = self.text.split(b'\x00', 1)[0]
......@@ -4,6 +4,7 @@ r"""Test the igor.igorpy compatibility layer by loading sample files.
>>> from pprint import pprint
>>> import igor.igorpy as igor
>>> igor.ENCODING = 'UTF-8'
Load a packed experiment:
......@@ -74,9 +75,9 @@ Waves:
...
0.00077303, 0.00038651, 0. ]), array([], dtype=float64), array([], dtype=float64), array([], dtype=float64)]
>>> d.W_plrX5.data_units
('', '', '', '')
(u'', '', '', '')
>>> d.W_plrX5.axis_units
('', '', '', '')
(u'', '', '', '')
>>> d.W_plrX5.data # doctest: +ELLIPSIS
array([ 1.83690956e-17, 2.69450769e-02, 7.65399113e-02,
1.44305170e-01, 2.23293692e-01, 3.04783821e-01,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment