[segyio] 209/376: subtr support for start/stop/step

Wed Sep 20 08:04:34 UTC 2017

This is an automated email from the git hooks/post-receive script.

jokva-guest pushed a commit to branch debian
in repository segyio.

commit 0edba852c434f214358a4be04179a51cea12589e
Author: Jørgen Kvalsvik <jokva at statoil.com>
Date:   Wed Feb 22 12:41:51 2017 +0100

    subtr support for start/stop/step
    
    Replaces the first-last interface of read/write_subtr with
    start/stop/step, analoguous to python slices. With a step of 1, the most
    common case, this covers the old chunk read, but the step allows greater
    flexibility and maps nicely to features commonly found in higher-level
    languages.
---
 lib/include/segyio/segy.h |  31 +++++--
 lib/src/segy.c            | 158 ++++++++++++++++++++++++++++++-----
 lib/test/segy.c           | 208 +++++++++++++++++++++++++++++++++++++++++++++-
 python/segyio/_segyio.c   |   2 +
 4 files changed, 367 insertions(+), 32 deletions(-)

diff --git a/lib/include/segyio/segy.h b/lib/include/segyio/segy.h
index 6247ef4..d7982b1 100644
--- a/lib/include/segyio/segy.h
+++ b/lib/include/segyio/segy.h
@@ -150,23 +150,44 @@ int segy_writetrace( segy_file*,
 
 /*
  * read/write sub traces, with the same assumption and requirements as
- * segy_readtrace. fst and lst are *indices*, not byte offsets, so
+ * segy_readtrace. start and stop are *indices*, not byte offsets, so
  * segy_readsubtr(fp, traceno, 10, 12, ...) reads samples 10 through 12, and
  * not bytes 10 through 12.
+ *
+ * start and stop are in the range [start,stop), so start=0, stop=5, step=2
+ * yields [0, 2, 4], whereas stop=4 yields [0, 2]
+ *
+ * When step is negative, the subtrace will be read in reverse. If step is
+ * negative and [0,n) is desired, pass use -1 for stop. Other negative values
+ * are undefined. If the range [n, m) where m is larger than the samples is
+ * considered undefined. Any [n, m) where distance(n,m) > samples is undefined.
+ *
+ * The parameter rangebuf is a pointer to a buffer of at least abs(stop-start)
+ * size. This is largely intended for script-C boundaries. In code paths where
+ * step is not 1 or -1, and mmap is not activated, these functions will
+ * *allocate* a buffer to read data from file in chunks. This is a significant
+ * speedup over multiple fread calls, at the cost of a clunkier interface. This
+ * is a tradeoff, since this function is often called in an inner loop. If
+ * you're fine with these functions allocating and freeing this buffer for you,
+ * rangebuf can be NULL.
  */
 int segy_readsubtr( segy_file*,
                     int traceno,
-                    int fst,
-                    int lst,
+                    int start,
+                    int stop,
+                    int step,
                     float* buf,
+                    float* rangebuf,
                     long trace0,
                     int trace_bsize );
 
 int segy_writesubtr( segy_file*,
                      int traceno,
-                     int fst,
-                     int lst,
+                     int start,
+                     int stop,
+                     int step,
                      const float* buf,
+                     float* rangebuf,
                      long trace0,
                      int trace_bsize );
 
diff --git a/lib/src/segy.c b/lib/src/segy.c
index 83e8838..785e370 100644
--- a/lib/src/segy.c
+++ b/lib/src/segy.c
@@ -1032,7 +1032,7 @@ int segy_lines_count( segy_file* fp,
     return SEGY_OK;
 }
 
-/* 
+/*
  * segy_*line_length is rather pointless as a computation, but serve a purpose
  * as an abstraction as the detail on how exactly a length is defined is usually uninteresting
  */
@@ -1090,52 +1090,119 @@ int segy_crossline_indices( segy_file* fp,
 
 static inline int subtr_seek( segy_file* fp,
                               int traceno,
-                              int fst,
-                              int lst,
+                              int start,
+                              int stop,
                               long trace0,
                               int trace_bsize ) {
     /*
      * Optimistically assume that indices are correct by the time they're given
      * to subtr_seek.
      */
-    assert( lst >= fst && fst >= 0 );
+    int min = start < stop ? start : stop + 1;
     assert( sizeof( float ) == 4 );
-    assert( (lst - fst) * sizeof( float ) <= (size_t)trace_bsize );
+    assert( start >= 0 );
+    assert( stop >= -1 );
+    assert( abs(stop - start) * (int)sizeof( float ) <= trace_bsize );
 
-    // skip the trace header and skip everything before fst.
-    trace0 += (fst * sizeof( float )) + SEGY_TRACE_HEADER_SIZE;
+    // skip the trace header and skip everything before min
+    trace0 += (min * (int)sizeof( float )) + SEGY_TRACE_HEADER_SIZE;
     return segy_seek( fp, traceno, trace0, trace_bsize );
 }
 
+static int slicelength( int start, int stop, int step ) {
+    if( ( step < 0 && stop >= start ) ||
+        ( step > 0 && start >= stop ) ) return 0;
+
+    if( step < 0 ) return (stop - start + 1) / step + 1;
+
+    return (stop - start - 1) / step + 1;
+}
+
+static int reverse( float* arr, int elems ) {
+    float tmp;
+    const int last = elems - 1;
+    for( int i = 0; i < elems / 2; ++i ) {
+        tmp = arr[ i ];
+        arr[ i ] = arr[ last - i ];
+        arr[ last - i ] = tmp;
+    }
+
+    return SEGY_OK;
+}
 
 int segy_readtrace( segy_file* fp,
                     int traceno,
                     float* buf,
                     long trace0,
                     int trace_bsize ) {
-    const int lst = trace_bsize / sizeof( float );
-    return segy_readsubtr( fp, traceno, 0, lst, buf, trace0, trace_bsize );
+    const int stop = trace_bsize / sizeof( float );
+    return segy_readsubtr( fp, traceno, 0, stop, 1, buf, NULL, trace0, trace_bsize );
 }
 
 int segy_readsubtr( segy_file* fp,
                     int traceno,
-                    int fst,
-                    int lst,
+                    int start,
+                    int stop,
+                    int step,
                     float* buf,
+                    float* rangebuf,
                     long trace0,
                     int trace_bsize ) {
 
-    int err = subtr_seek( fp, traceno, fst, lst, trace0, trace_bsize );
+    int err = subtr_seek( fp, traceno, start, stop, trace0, trace_bsize );
     if( err != SEGY_OK ) return err;
 
+    const size_t elems = abs( stop - start );
+
+    // most common case: step == abs(1), reading contiguously
+    if( step == 1 || step == -1 ) {
+
+        if( fp->addr ) {
+            memcpy( buf, fp->cur, sizeof( float ) * elems );
+        } else {
+            const size_t readc = fread( buf, sizeof( float ), elems, fp->fp );
+            if( readc != elems ) return SEGY_FREAD_ERROR;
+        }
+
+        if( step == -1 ) reverse( buf, elems );
+
+        return SEGY_OK;
+    }
+
+    // step != 1, i.e. do strided reads
+    int defstart = start < stop ? 0 : elems - 1;
+    int slicelen = slicelength( start, stop, step );
+
     if( fp->addr ) {
-        memcpy( buf, fp->cur, sizeof( float ) * ( lst - fst ) );
+        float* cur = (float*)fp->cur + defstart;
+        for( ; slicelen > 0; cur += step, ++buf, --slicelen )
+            *buf = *cur;
+
         return SEGY_OK;
     }
 
-    const size_t readc = fread( buf, sizeof( float ), lst - fst, fp->fp );
-    if( readc != lst - fst ) return SEGY_FREAD_ERROR;
+    /*
+     * fread fallback: read the full chunk [start, stop) to avoid multiple
+     * fread calls (which are VERY expensive, measured to about 10x the cost of
+     * a single read when reading every other trace). If rangebuf is NULL, the
+     * caller has not supplied a buffer for us to use (likely if it's a
+     * one-off, and we heap-alloc a buffer. This way the function is safer to
+     * use, but with a significant performance penalty when no buffer is
+     * supplied.
+     */
+    float* tracebuf = rangebuf ? rangebuf : malloc( elems * sizeof( float ) );
+
+    const size_t readc = fread( tracebuf, sizeof( float ), elems, fp->fp );
+    if( readc != elems ) {
+        free( tracebuf );
+        return SEGY_FREAD_ERROR;
+    }
+
+    float* cur = tracebuf + defstart;
+    for( ; slicelen > 0; cur += step, --slicelen, ++buf )
+        *buf = *cur;
 
+    free( tracebuf );
     return SEGY_OK;
 }
 
@@ -1145,28 +1212,73 @@ int segy_writetrace( segy_file* fp,
                      long trace0,
                      int trace_bsize ) {
 
-    const int lst = trace_bsize / sizeof( float );
-    return segy_writesubtr( fp, traceno, 0, lst, buf, trace0, trace_bsize );
+    const int stop = trace_bsize / sizeof( float );
+    return segy_writesubtr( fp, traceno, 0, stop, 1, buf, NULL, trace0, trace_bsize );
 }
 
 int segy_writesubtr( segy_file* fp,
                      int traceno,
-                     int fst,
-                     int lst,
+                     int start,
+                     int stop,
+                     int step,
                      const float* buf,
+                     float* rangebuf,
                      long trace0,
                      int trace_bsize ) {
 
-    int err = subtr_seek( fp, traceno, fst, lst, trace0, trace_bsize );
+    int err = subtr_seek( fp, traceno, start, stop, trace0, trace_bsize );
     if( err != SEGY_OK ) return err;
 
+    const size_t elems = abs( stop - start );
+
+    if( step == 1 ) {
+        /*
+         * most common case: step == 1, writing contiguously
+         * -1 is not covered here as it would require reversing the input buffer
+         * (which is const), which in turn may require a memory allocation. It will
+         * be handled by the stride-aware code path
+         */
+        if( fp->addr ) {
+            memcpy( fp->cur, buf, sizeof( float ) * elems );
+        } else {
+            const size_t writec = fwrite( buf, sizeof( float ), elems, fp->fp );
+            if( writec != elems ) return SEGY_FWRITE_ERROR;
+        }
+
+        return SEGY_OK;
+    }
+
+    // step != 1, i.e. do strided reads
+    int defstart = start < stop ? 0 : elems - 1;
+    int slicelen = slicelength( start, stop, step );
+
     if( fp->addr ) {
-        memcpy( fp->cur, buf, sizeof( float ) * ( lst - fst ) );
+        /* if mmap is on, strided write is trivial and fast */
+        float* cur = (float*)fp->cur + defstart;
+        for( ; slicelen > 0; cur += step, ++buf, --slicelen )
+            *cur = *buf;
+
         return SEGY_OK;
     }
 
-    const size_t writec = fwrite( buf, sizeof( float ), lst - fst, fp->fp );
-    if( writec != lst - fst ) return SEGY_FWRITE_ERROR;
+    const int elemsize = elems * sizeof( float );
+    float* tracebuf = rangebuf ? rangebuf : malloc( elemsize );
+
+    // like in readsubtr, read a larger chunk and then step through that
+    const size_t readc = fread( tracebuf, sizeof( float ), elems, fp->fp );
+    if( readc != elems ) { free( tracebuf ); return SEGY_FREAD_ERROR; }
+    /* rewind, because fread advances the file pointer */
+    err = fseek( fp->fp, -elemsize, SEEK_CUR );
+    if( err != 0 ) { free( tracebuf ); return SEGY_FSEEK_ERROR; }
+
+    float* cur = tracebuf + defstart;
+    for( ; slicelen > 0; cur += step, --slicelen, ++buf )
+        *cur = *buf;
+
+    const size_t writec = fwrite( tracebuf, sizeof( float ), elems, fp->fp );
+    free( tracebuf );
+
+    if( writec != elems ) return SEGY_FWRITE_ERROR;
 
     return SEGY_OK;
 }
diff --git a/lib/test/segy.c b/lib/test/segy.c
index d6a9164..90a4073 100644
--- a/lib/test/segy.c
+++ b/lib/test/segy.c
@@ -108,6 +108,193 @@ static void test_interpret_file() {
     segy_close( fp );
 }
 
+static void test_read_subtr( bool mmap ) {
+    const char *file = "test-data/small.sgy";
+    segy_file* fp = segy_open( file, "rb" );
+
+    const int format = SEGY_IBM_FLOAT_4_BYTE;
+    int trace0 = 3600;
+    int trace_bsize = 50 * 4;
+    int err = 0;
+
+    if( mmap ) segy_mmap( fp );
+
+    float buf[ 5 ];
+    /* read a strided chunk across the middle of all traces */
+    /* should read samples 3, 8, 13, 18 */
+    err = segy_readsubtr( fp,
+                          10,
+                          3,        /* start */
+                          19,       /* stop */
+                          5,        /* step */
+                          buf,
+                          NULL,
+                          trace0,
+                          trace_bsize );
+    assertTrue( err == 0, "Unable to correctly read subtrace" );
+    segy_to_native( format, 4, buf );
+    assertClose( 3.20003, buf[ 0 ], 1e-6 );
+    assertClose( 3.20008, buf[ 1 ], 1e-6 );
+    assertClose( 3.20013, buf[ 2 ], 1e-6 );
+    assertClose( 3.20018, buf[ 3 ], 1e-6 );
+
+    err = segy_readsubtr( fp,
+                          10,
+                          18,       /* start */
+                          2,        /* stop */
+                          -5,       /* step */
+                          buf,
+                          NULL,
+                          trace0,
+                          trace_bsize );
+    assertTrue( err == 0, "Unable to correctly read subtrace" );
+    segy_to_native( format, 4, buf );
+    assertClose( 3.20003, buf[ 3 ], 1e-6 );
+    assertClose( 3.20008, buf[ 2 ], 1e-6 );
+    assertClose( 3.20013, buf[ 1 ], 1e-6 );
+    assertClose( 3.20018, buf[ 0 ], 1e-6 );
+
+    err = segy_readsubtr( fp,
+                          10,
+                          3,        /* start */
+                          -1,       /* stop */
+                          -1,       /* step */
+                          buf,
+                          NULL,
+                          trace0,
+                          trace_bsize );
+    assertTrue( err == 0, "Unable to correctly read subtrace" );
+    segy_to_native( format, 4, buf );
+    assertClose( 3.20000, buf[ 3 ], 1e-6 );
+    assertClose( 3.20001, buf[ 2 ], 1e-6 );
+    assertClose( 3.20002, buf[ 1 ], 1e-6 );
+    assertClose( 3.20003, buf[ 0 ], 1e-6 );
+
+    err = segy_readsubtr( fp,
+                          10,
+                          24,       /* start */
+                          -1,       /* stop */
+                          -5,       /* step */
+                          buf,
+                          NULL,
+                          trace0,
+                          trace_bsize );
+    assertTrue( err == 0, "Unable to correctly read subtrace" );
+    segy_to_native( format, 5, buf );
+    assertClose( 3.20004, buf[ 4 ], 1e-6 );
+    assertClose( 3.20009, buf[ 3 ], 1e-6 );
+    assertClose( 3.20014, buf[ 2 ], 1e-6 );
+    assertClose( 3.20019, buf[ 1 ], 1e-6 );
+    assertClose( 3.20024, buf[ 0 ], 1e-6 );
+
+    segy_close( fp );
+}
+
+static void test_write_subtr( bool mmap ) {
+    const char *file = "test-data/write-subtr.sgy";
+    segy_file* fp = segy_open( file, "w+b" );
+
+    const int format = SEGY_IBM_FLOAT_4_BYTE;
+    int trace0 = 3600;
+    int trace_bsize = 50 * 4;
+    int err = 0;
+
+    if( mmap ) segy_mmap( fp );
+
+    // since write-subtr.segy is an empty file we must pre-write a full trace
+    // to make sure *something* exists, otherwise we'd get a read error.
+    float dummytrace[ 50 ] = { 0 };
+    err = segy_writetrace( fp, 10, dummytrace, trace0, trace_bsize );
+    assertTrue( err == 0, "Error in write-subtr setup." );
+
+    float buf[ 5 ];
+    float out[ 5 ] = { 2.0, 2.1, 2.2, 2.3, 2.4 };
+    segy_from_native( format, 5, out );
+
+    /* read a strided chunk across the middle of all traces */
+    /* should read samples 3, 8, 13, 18 */
+    err = segy_writesubtr( fp,
+                           10,
+                           3,        /* start */
+                           19,       /* stop */
+                           5,        /* step */
+                           out,
+                           NULL,
+                           trace0,
+                           trace_bsize );
+    assertTrue( err == 0, "Unable to correctly write subtrace" );
+    err = segy_readsubtr( fp,
+                          10,
+                          3,        /* start */
+                          19,       /* stop */
+                          5,        /* step */
+                          buf,
+                          NULL,
+                          trace0,
+                          trace_bsize );
+    assertTrue( err == 0, "Unable to correctly read subtrace" );
+    segy_to_native( format, 4, buf );
+    assertClose( 2.0, buf[ 0 ], 1e-6 );
+    assertClose( 2.1, buf[ 1 ], 1e-6 );
+    assertClose( 2.2, buf[ 2 ], 1e-6 );
+    assertClose( 2.3, buf[ 3 ], 1e-6 );
+    err = segy_writesubtr( fp,
+                           10,
+                           18,       /* start */
+                           2,        /* stop */
+                           -5,       /* step */
+                           out,
+                           NULL,
+                           trace0,
+                           trace_bsize );
+    assertTrue( err == 0, "Unable to correctly write subtrace" );
+    err = segy_readsubtr( fp,
+                          10,
+                          18,       /* start */
+                          2,        /* stop */
+                          -5,       /* step */
+                          buf,
+                          NULL,
+                          trace0,
+                          trace_bsize );
+    assertTrue( err == 0, "Unable to correctly read subtrace" );
+    segy_to_native( format, 4, buf );
+    assertClose( 2.3, buf[ 3 ], 1e-6 );
+    assertClose( 2.2, buf[ 2 ], 1e-6 );
+    assertClose( 2.1, buf[ 1 ], 1e-6 );
+    assertClose( 2.0, buf[ 0 ], 1e-6 );
+
+    /* write back-to-front, read front-to-back */
+    err = segy_writesubtr( fp,
+                           10,
+                           24,       /* start */
+                           -1,       /* stop */
+                           -5,       /* step */
+                           out,
+                           NULL,
+                           trace0,
+                           trace_bsize );
+    assertTrue( err == 0, "Unable to correctly write subtrace" );
+    err = segy_readsubtr( fp,
+                          10,
+                          4,        /* start */
+                          25,       /* stop */
+                          5,        /* step */
+                          buf,
+                          NULL,
+                          trace0,
+                          trace_bsize );
+    assertTrue( err == 0, "Unable to correctly read subtrace" );
+    segy_to_native( format, 5, buf );
+    assertClose( 2.0, buf[ 4 ], 1e-6 );
+    assertClose( 2.1, buf[ 3 ], 1e-6 );
+    assertClose( 2.2, buf[ 2 ], 1e-6 );
+    assertClose( 2.3, buf[ 1 ], 1e-6 );
+    assertClose( 2.4, buf[ 0 ], 1e-6 );
+
+    segy_close( fp );
+}
+
 /* Prestack test for when we have an approperiate prestack file
 
 void test_interpret_file_prestack() {
@@ -550,16 +737,16 @@ static void test_file_error_codes() {
     assertTrue( err == SEGY_FSEEK_ERROR, "Could seek in invalid file." );
 
     int l1, l2;
-    err = segy_readsubtr( fp, 0, 2, 0, NULL, 3600, 350 );
+    err = segy_readsubtr( fp, 0, 2, 0, 1, NULL, NULL, 3600, 350 );
     assertTrue( err == SEGY_INVALID_ARGS, "Could pass fst larger than lst" );
 
-    err = segy_readsubtr( fp, 0, -1, 10, NULL, 3600, 350 );
+    err = segy_readsubtr( fp, 0, -1, 10, 1, NULL, NULL, 3600, 350 );
     assertTrue( err == SEGY_INVALID_ARGS, "Could pass negative fst" );
 
-    err = segy_writesubtr( fp, 0, 2, 0, NULL, 3600, 350 );
+    err = segy_writesubtr( fp, 0, 2, 0, 1, NULL, NULL, 3600, 350 );
     assertTrue( err == SEGY_INVALID_ARGS, "Could pass fst larger than lst" );
 
-    err = segy_writesubtr( fp, 0, -1, 10, NULL, 3600, 350 );
+    err = segy_writesubtr( fp, 0, -1, 10, 1, NULL, NULL, 3600, 350 );
     assertTrue( err == SEGY_INVALID_ARGS, "Could pass negative fst" );
 
     err = segy_count_lines( fp, 0, 1, &l1, &l2, 3600, 350 );
@@ -617,6 +804,19 @@ int main() {
     test_text_header();
     puts("test traceh");
     test_trace_header_errors();
+
+    puts("test readsubtr(mmap)");
+    test_read_subtr( true );
+
+    puts("test readsubtr(no-mmap)");
+    test_read_subtr( false );
+
+    /*
+     * this test *creates* a new file, which is currently won't mmap (since we
+     * cannot determine the size, as this does not require any geometry.
+     */
+    puts("test writesubtr(no-mmap)");
+    test_write_subtr( false );
     /*
      * due to its barely-defined behavorial nature, this test is commented out
      * for most runs, as it would trip up the memcheck test
diff --git a/python/segyio/_segyio.c b/python/segyio/_segyio.c
index 347dd9f..8e71778 100644
--- a/python/segyio/_segyio.c
+++ b/python/segyio/_segyio.c
@@ -988,7 +988,9 @@ static PyObject *py_read_depth_slice(PyObject *self, PyObject *args) {
                                trace_no * offsets,
                                depth,
                                depth + 1,
+                               1,
                                buf++,
+                               NULL,
                                trace0, trace_bsize);
     }
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/segyio.git