[segyio] 235/376: Don't assume time for sample_interval

Wed Sep 20 08:04:38 UTC 2017

This is an automated email from the git hooks/post-receive script.

jokva-guest pushed a commit to branch debian
in repository segyio.

commit 94d455683ab8ddde03f557a7d6976d8eb8b141d1
Author: Jørgen Kvalsvik <jokva at statoil.com>
Date:   Wed Mar 8 15:10:15 2017 +0100

    Don't assume time for sample_interval
    
    The segy_sample_interval function would always assume that the interval
    between sample describes distance-in-time (ms), but this means
    information is lost was in meters (which would be interpreted as
    millimeters). Instead, this function now returns the inferred interval
    without modifications, leaving it up to the caller to determine what the
    interval describes, and typically has more information available.
    
    This means that the sample_indexes function in python.tools now changes
    behaviour and uses microseconds (as per the specification) rather than
    milliseconds. To support the typical use of this function,
    segyio.tools.scale_samples has been added.
---
 lib/include/segyio/segy.h |  2 +-
 lib/src/segy.c            | 51 ++++++++++++++++++++++++++---------------------
 mex/SegySpec.m            |  2 +-
 python/segyio/_segyio.c   | 10 +++++-----
 python/segyio/tools.py    | 32 ++++++++++++++++++++++++++++-
 python/test/tools.py      | 26 +++++++++++++++++++++---
 6 files changed, 89 insertions(+), 34 deletions(-)

diff --git a/lib/include/segyio/segy.h b/lib/include/segyio/segy.h
index 338ca65..766856b 100644
--- a/lib/include/segyio/segy.h
+++ b/lib/include/segyio/segy.h
@@ -53,7 +53,7 @@ int segy_write_binheader( segy_file*, const char* buf );
  * allocates 2 octets for this, so it comfortably sits inside an int
  */
 int segy_samples( const char* binheader );
-int segy_sample_interval( segy_file*, float* dt );
+int segy_sample_interval( segy_file*, float fallback , float* dt );
 /* exception: the int returned is an enum, SEGY_SORTING, not an error code */
 int segy_format( const char* binheader );
 int segy_get_field( const char* traceheader, int field, int32_t* f );
diff --git a/lib/src/segy.c b/lib/src/segy.c
index 6f3363d..8882d61 100644
--- a/lib/src/segy.c
+++ b/lib/src/segy.c
@@ -737,7 +737,7 @@ int segy_traces( segy_file* fp,
     return SEGY_OK;
 }
 
-int segy_sample_interval( segy_file* fp, float* dt ) {
+int segy_sample_interval( segy_file* fp, float fallback, float* dt ) {
 
     char bin_header[ SEGY_BINARY_HEADER_SIZE ];
     char trace_header[ SEGY_TRACE_HEADER_SIZE ];
@@ -756,29 +756,34 @@ int segy_sample_interval( segy_file* fp, float* dt ) {
         return err;
     }
 
-    // microseconds: us
-    int binary_header_dt_us;
-    int trace_header_dt_us;
-
-    segy_get_bfield(bin_header, SEGY_BIN_INTERVAL, &binary_header_dt_us);
-    segy_get_field(trace_header, SEGY_TR_SAMPLE_INTER, &trace_header_dt_us);
-
-    // milliseconds: ms
-    float binary_header_dt_ms = binary_header_dt_us/1000.0;
-    float trace_header_dt_ms = trace_header_dt_us/1000.0;
-
-    if (trace_header_dt_us==0 && binary_header_dt_us==0) {
-        //noop
-    } else if (binary_header_dt_us == 0) {
-        *dt = trace_header_dt_ms;
-    } else if (trace_header_dt_us == 0) {
-        *dt = binary_header_dt_ms;
-    } else if (trace_header_dt_us == binary_header_dt_us) {
-        *dt = trace_header_dt_ms;
-    }
+    int bindt;
+    int trdt;
+
+    segy_get_bfield( bin_header, SEGY_BIN_INTERVAL, &bindt );
+    segy_get_field( trace_header, SEGY_TR_SAMPLE_INTER, &trdt );
+
+    float binary_header_dt = bindt;
+    float trace_header_dt = trdt;
 
-    return 0;
+    /*
+     * 3 cases:
+     * * When the trace header and binary header disagree on a (non-zero)
+     *   sample interval; choose neither and opt for the fallback.
+     * * When both sample intervals are zero: opt for the fallback.
+     * * Otherwise, choose the interval from the non-zero header.
+     */
 
+    *dt = fallback;
+    if( binary_header_dt == 0 && trace_header_dt != 0 )
+        *dt = trace_header_dt;
+
+    if( trace_header_dt == 0 && binary_header_dt != 0 )
+        *dt = binary_header_dt;
+
+    if( trace_header_dt == binary_header_dt && trace_header_dt != 0 )
+        *dt = trace_header_dt;
+
+    return SEGY_OK;
 }
 
 int segy_sample_indices( segy_file* fp,
@@ -787,7 +792,7 @@ int segy_sample_indices( segy_file* fp,
                          int count,
                          float* buf ) {
 
-    int err = segy_sample_interval(fp, &dt);
+    int err = segy_sample_interval(fp, dt, &dt);
     if (err != 0) {
         return err;
     }
diff --git a/mex/SegySpec.m b/mex/SegySpec.m
index a30eec2..25e7b02 100644
--- a/mex/SegySpec.m
+++ b/mex/SegySpec.m
@@ -33,7 +33,7 @@ classdef SegySpec
 
             obj.sample_format = uint32(SegySampleFormat(spec.sample_format));
             obj.trace_sorting_format = TraceSortingFormat(spec.trace_sorting_format);
-            obj.sample_indexes = spec.sample_indexes;
+            obj.sample_indexes = ((spec.sample_indexes - t0) / 1000.0) + t0;
             obj.crossline_indexes = uint32(spec.crossline_indexes);
             obj.inline_indexes = uint32(spec.inline_indexes);
             obj.offset_count = uint32(spec.offset_count);
diff --git a/python/segyio/_segyio.c b/python/segyio/_segyio.c
index db9217b..4d475df 100644
--- a/python/segyio/_segyio.c
+++ b/python/segyio/_segyio.c
@@ -615,17 +615,17 @@ static PyObject *py_get_dt(PyObject *self, PyObject *args) {
     errno = 0;
 
     PyObject *file_capsule = NULL;
-    float dt;
-    PyArg_ParseTuple(args, "Of", &file_capsule, &dt);
+    float fallback;
+    PyArg_ParseTuple(args, "Of", &file_capsule, &fallback);
     segy_file *p_FILE = get_FILE_pointer_from_capsule(file_capsule);
 
     if (PyErr_Occurred()) { return NULL; }
 
-    int error = segy_sample_interval(p_FILE, &dt);
+    float dt;
+    int error = segy_sample_interval(p_FILE, fallback, &dt);
     if (error != 0) { return py_handle_segy_error(error, errno); }
 
-    double ddt = dt;
-    return PyFloat_FromDouble(ddt);
+    return PyFloat_FromDouble( dt );
 }
 
 
diff --git a/python/segyio/tools.py b/python/segyio/tools.py
index d7bd667..f334dba 100644
--- a/python/segyio/tools.py
+++ b/python/segyio/tools.py
@@ -1,6 +1,6 @@
 import segyio
 import numpy as np
-
+import itertools as itr
 
 def dt(segyfile, fallback_dt=4):
     """
@@ -121,3 +121,33 @@ def cube(f):
     smps = len(f.samples)
     dims = (fast, slow, smps) if offs == 1 else (fast, slow, offs, smps)
     return f.trace.raw[:].reshape(dims)
+
+def scale_samples(samples):
+    """ Guess unit and scale of the sample intervals
+
+    :type samples: iterable[int]
+    :rtype: tuple[numpy.ndarray,str]
+
+    Attempt to figure out if the sample values represent depth (in meters) or
+    time (in milliseconds).
+    """
+
+    samples = np.array(samples)
+
+    # this is VERY unlikely, but in the case of a file with only one sample per
+    # trace, assume depth (meter)
+    if len(samples) == 1:
+        return (samples, 'm')
+
+    # heuristic: since the spec suggests microseconds for step between samples,
+    # and 4ms is *very* common for step size, any large difference between two
+    # values probably means that the unit is time. A step size of 100 meters is
+    # quite unlikely, but this threshold is arbitrary and might require tuning
+    # in the future
+    depth = abs(samples[0] - samples[1]) < 100
+
+    if not depth:
+        samples = samples / 1000
+
+    unit = 'm' if depth else 'ms'
+    return (samples, unit)
diff --git a/python/test/tools.py b/python/test/tools.py
index 6db9bd0..5c98588 100644
--- a/python/test/tools.py
+++ b/python/test/tools.py
@@ -38,15 +38,16 @@ class ToolsTest(TestCase):
                 f.bin[BinField.Interval] = dt_us
                 f.header[0][TraceField.TRACE_SAMPLE_INTERVAL] = dt_us
                 f.flush()
-                np.testing.assert_almost_equal(segyio.dt(f), dt_us / 1000)
+                np.testing.assert_almost_equal(segyio.dt(f), dt_us)
 
     def test_sample_indexes(self):
         with segyio.open(self.filename, "r") as f:
             indexes = segyio.sample_indexes(f)
-            self.assertListEqual(indexes, [t * 4.0 for t in range(len(f.samples))])
+            step = 4000.0
+            self.assertListEqual(indexes, [t * step for t in range(len(f.samples))])
 
             indexes = segyio.sample_indexes(f, t0=1.5)
-            self.assertListEqual(indexes, [1.5 + t * 4.0 for t in range(len(f.samples))])
+            self.assertListEqual(indexes, [1.5 + t * step for t in range(len(f.samples))])
 
             indexes = segyio.sample_indexes(f, t0=1.5, dt_override=3.21)
             self.assertListEqual(indexes, [1.5 + t * 3.21 for t in range(len(f.samples))])
@@ -93,3 +94,22 @@ class ToolsTest(TestCase):
             dims = (len(f.ilines), len(f.xlines), len(f.offsets), len(f.samples))
             x = segyio.tools.collect(f.trace[:]).reshape(dims)
             self.assertTrue(np.all(x == segyio.tools.cube(f)))
+
+    def test_scale_samples(self):
+        with segyio.open(self.filename) as f:
+            samples, unit = segyio.tools.scale_samples(f.samples)
+            self.assertEqual('ms', unit)
+            self.assertListEqual([4.0 * i for i in range(len(samples))],
+                                 list(samples))
+
+            meters = [10 * i for i in range(len(samples))]
+            f._samples = np.array(meters)
+            samples, unit = segyio.tools.scale_samples(f.samples)
+            self.assertEqual('m', unit)
+            self.assertListEqual(meters, list(samples))
+
+    def test_scale_samples_pylist(self):
+        sampleslist = [10 * i for i in range(10)]
+        samplesnp = np.array(sampleslist)
+        eq = np.all(samplesnp == segyio.tools.scale_samples(samplesnp)[0])
+        self.assertTrue(eq)

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/segyio.git