[segyio] 104/376: Primitive mmap support

Jørgen Kvalsvik jokva-guest at moszumanska.debian.org
Wed Sep 20 08:04:16 UTC 2017


This is an automated email from the git hooks/post-receive script.

jokva-guest pushed a commit to branch debian
in repository segyio.

commit 51401568d8b17019b50424eaed9a4d5d84f8ac11
Author: Jørgen Kvalsvik <jokva at statoil.com>
Date:   Wed Nov 2 15:55:48 2016 +0100

    Primitive mmap support
    
    If the system has the mmap system call available (really any posix
    compliant system) the segy_mmap function will attempt to memory map the
    file in question.
    
    This is considered an experimental, optional feature and the scheme is
    rather naïve - the full file will be mmap'd, not just a view into it,
    meaning it will likely cause forms of system failure for
    way-larger-than-memory files. However, with non-regular access such as
    reading lines or every nth trace or similar performance is vastly
    improved.
---
 applications/segyinfo.c    |   8 ++-
 applications/segyinspect.c |  15 +++--
 cmake/check_includes.cmake |   5 +-
 python/segyio/_segyio.c    |   3 +
 src/segyio/segy.c          | 144 +++++++++++++++++++++++++++++++++++++--------
 src/segyio/segy.h          |   5 +-
 6 files changed, 148 insertions(+), 32 deletions(-)

diff --git a/applications/segyinfo.c b/applications/segyinfo.c
index 5e59cd6..47f45f1 100644
--- a/applications/segyinfo.c
+++ b/applications/segyinfo.c
@@ -1,5 +1,6 @@
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include <time.h>
 
 #include <segyio/segy.h>
@@ -27,9 +28,9 @@ static inline int maximum( int x, int y ) {
 
 int main(int argc, char* argv[]) {
     
-    if( argc != 2 ) {
+    if( argc < 2 ) {
         puts("Missing argument, expected run signature:");
-        printf("  %s <segy_file>\n", argv[0]);
+        printf("  %s <segy_file> [mmap]\n", argv[0]);
         exit(1);
     }
 
@@ -39,6 +40,9 @@ int main(int argc, char* argv[]) {
         exit( 3 );
     }
 
+    if( argc > 2 && strcmp( argv[ 2 ], "mmap" ) == 0 )
+        segy_mmap( fp );
+
     int err;
     char header[ SEGY_BINARY_HEADER_SIZE ];
     err = segy_binheader( fp, header );
diff --git a/applications/segyinspect.c b/applications/segyinspect.c
index 61750c4..30ca5da 100644
--- a/applications/segyinspect.c
+++ b/applications/segyinspect.c
@@ -1,5 +1,6 @@
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include <time.h>
 #include <segyio/segy.h>
 
@@ -37,9 +38,9 @@ static const char* getFastestDirectionName( int sorting ) {
 
 int main(int argc, char* argv[]) {
 
-    if (!(argc == 2 || argc == 4)) {
+    if( argc < 2 ) {
         puts("Missing argument, expected run signature:");
-        printf("  %s <segy_file> [INLINE_BYTE CROSSLINE_BYTE]\n", argv[0]);
+        printf("  %s <segy_file> [mmap] [INLINE_BYTE CROSSLINE_BYTE]\n", argv[0]);
         printf("  Inline and crossline bytes default to: 189 and 193\n");
         exit(1);
     }
@@ -47,9 +48,13 @@ int main(int argc, char* argv[]) {
     int xl_field = CROSSLINE_3D;
     int il_field = INLINE_3D;
 
-    if (argc == 4) {
-        il_field = atoi(argv[2]);
-        xl_field = atoi(argv[3]);
+    bool memory_map = argc > 2 && strcmp( argv[ 2 ], "mmap" ) == 0;
+
+    if( ( memory_map && argc > 4 ) || ( !memory_map && argc > 2 ) ) {
+        int argindex = memory_map ? 2 : 3;
+
+        il_field = atoi(argv[ argindex + 0 ]);
+        xl_field = atoi(argv[ argindex + 1 ]);
     }
 
     clock_t start = clock();
diff --git a/cmake/check_includes.cmake b/cmake/check_includes.cmake
index 9ca9dcd..04d36b5 100644
--- a/cmake/check_includes.cmake
+++ b/cmake/check_includes.cmake
@@ -16,4 +16,7 @@ else()
     message(FATAL_ERROR "Could not find htons.")
 endif()
 
-
+check_include_file("sys/mman.h" HAVE_SYS_MMAN_H)
+if (HAVE_SYS_MMAN_H)
+    add_definitions("-DHAVE_MMAP")
+endif()
diff --git a/python/segyio/_segyio.c b/python/segyio/_segyio.c
index 156f1e9..d0120c4 100644
--- a/python/segyio/_segyio.c
+++ b/python/segyio/_segyio.c
@@ -70,6 +70,9 @@ static PyObject *py_FILE_open(PyObject *self, PyObject *args) {
     if (p_FILE == NULL) {
         return PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
     }
+
+    segy_mmap( p_FILE );
+
     return PyCapsule_New(p_FILE, "segy_file*", (PyCapsule_Destructor) py_FILE_destructor);
 }
 
diff --git a/src/segyio/segy.c b/src/segyio/segy.c
index 4b7bbb2..fa630fd 100644
--- a/src/segyio/segy.c
+++ b/src/segyio/segy.c
@@ -1,3 +1,8 @@
+#ifdef HAVE_MMAP
+  #define _POSIX_SOURCE
+  #include <sys/mman.h>
+#endif //HAVE_MMAP
+
 #ifdef HAVE_NETINET_IN_H
 #include <netinet/in.h>
 #elif HAVE_ARPA_INET_H
@@ -9,6 +14,7 @@
 #include <assert.h>
 #include <stdlib.h>
 #include <string.h>
+#include <stdio.h>
 
 #include <segyio/segy.h>
 #include <segyio/util.h>
@@ -294,8 +300,36 @@ static int bfield_size[] = {
     [- HEADER_SIZE + BIN_Unassigned2]           =  0,
 };
 
+/*
+ * Determine the file size in bytes. If this function succeeds, the file
+ * pointer will be reset to wherever it was before this call. If this call
+ * fails for some reason, the return value is 0 and the file pointer location
+ * will be determined by the behaviour of fseek.
+ */
+static int file_size( FILE* fp, size_t* size ) {
+    const long prev_pos = ftell( fp );
+
+    int err = fseek( fp, 0, SEEK_END );
+    if( err != 0 ) return SEGY_FSEEK_ERROR;
+
+    const size_t sz = ftell( fp );
+    err = fseek( fp, prev_pos, SEEK_SET );
+    if( err != 0 ) return SEGY_FSEEK_ERROR;
+
+    *size = sz;
+    return SEGY_OK;
+}
+
+/*
+ * addr is NULL if mmap is not found under compilation or if the file is
+ * not requested mmap'd. If so, the fallback code path of FILE* is taken
+ */
 struct segy_file_handle {
+    void* addr;
+    void* cur;
     FILE* fp;
+    size_t fsize;
+    char mode[ 4 ];
 };
 
 segy_file* segy_open( const char* path, const char* mode ) {
@@ -303,7 +337,7 @@ segy_file* segy_open( const char* path, const char* mode ) {
 
     if( !fp ) return NULL;
 
-    segy_file* file = malloc( sizeof( segy_file ) );
+    segy_file* file = calloc( 1, sizeof( segy_file ) );
 
     if( !file ) {
         fclose( fp );
@@ -311,11 +345,51 @@ segy_file* segy_open( const char* path, const char* mode ) {
     }
 
     file->fp = fp;
+    strncpy( file->mode, mode, 3 );
+
     return file;
 }
 
+int segy_mmap( segy_file* fp ) {
+#ifndef HAVE_MMAP
+    return SEGY_MMAP_INVALID;
+#else
+
+    int err = file_size( fp->fp, &fp->fsize );
+
+    if( err != 0 ) return SEGY_FSEEK_ERROR;
+
+    bool rw = strstr( fp->mode, "+" ) || strstr( fp->mode, "w" );
+    const int prot =  rw ? PROT_READ | PROT_WRITE : PROT_READ;
+
+    int fd = fileno( fp->fp );
+    void* addr = mmap( NULL, fp->fsize, prot, MAP_SHARED, fd, 0 );
+
+    if( addr == MAP_FAILED )
+        return SEGY_MMAP_ERROR;
+
+    fp->addr = fp->cur = addr;
+    return SEGY_OK;
+#endif //HAVE_MMAP
+}
+
 int segy_flush( segy_file* fp, bool async ) {
-    return fflush( fp->fp );
+    int syncerr = 0;
+
+#ifdef HAVE_MMAP
+    if( fp->addr ) {
+        int flag = async ? MS_ASYNC : MS_SYNC;
+        syncerr = msync( fp->addr, fp->fsize, flag );
+    }
+#endif //HAVE_MMAP
+
+    if( syncerr != 0 ) return syncerr;
+
+    int flusherr = fflush( fp->fp );
+
+    if( flusherr != 0 ) return SEGY_FWRITE_ERROR;
+
+    return SEGY_OK;
 }
 
 long segy_ftell( segy_file* fp ) {
@@ -323,7 +397,19 @@ long segy_ftell( segy_file* fp ) {
 }
 
 int segy_close( segy_file* fp ) {
-    int err = fclose( fp->fp );
+    int err = segy_flush( fp, false );
+
+#ifdef HAVE_MMAP
+    if( !fp->addr ) goto no_mmap;
+
+    err = munmap( fp->addr, fp->fsize );
+    if( err != 0 )
+        err = SEGY_MMAP_ERROR;
+
+no_mmap:
+#endif //HAVE_MMAP
+
+    fclose( fp->fp );
     free( fp );
     return err;
 }
@@ -471,6 +557,14 @@ int segy_seek( segy_file* fp,
 
     trace_bsize += SEGY_TRACE_HEADER_SIZE;
     const long pos = trace0 + ( (long)trace * (long)trace_bsize );
+
+    if( fp->addr ) {
+        if( (size_t)pos >= fp->fsize ) return SEGY_FSEEK_ERROR;
+
+        fp->cur = (char*)fp->addr + pos;
+        return SEGY_OK;
+    }
+
     const int err = fseek( fp->fp, pos, SEEK_SET );
     if( err != 0 ) return SEGY_FSEEK_ERROR;
     return SEGY_OK;
@@ -485,6 +579,11 @@ int segy_traceheader( segy_file* fp,
     const int err = segy_seek( fp, traceno, trace0, trace_bsize );
     if( err != 0 ) return err;
 
+    if( fp->addr ) {
+        memcpy( buf, fp->cur, SEGY_TRACE_HEADER_SIZE );
+        return SEGY_OK;
+    }
+
     const size_t readc = fread( buf, 1, SEGY_TRACE_HEADER_SIZE, fp->fp );
 
     if( readc != SEGY_TRACE_HEADER_SIZE )
@@ -502,6 +601,11 @@ int segy_write_traceheader( segy_file* fp,
     const int err = segy_seek( fp, traceno, trace0, trace_bsize );
     if( err != 0 ) return err;
 
+    if( fp->addr ) {
+        memcpy( fp->cur, buf, SEGY_TRACE_HEADER_SIZE );
+        return SEGY_OK;
+    }
+
     const size_t writec = fwrite( buf, 1, SEGY_TRACE_HEADER_SIZE, fp->fp );
 
     if( writec != SEGY_TRACE_HEADER_SIZE )
@@ -511,26 +615,6 @@ int segy_write_traceheader( segy_file* fp,
 }
 
 /*
- * Determine the file size in bytes. If this function succeeds, the file
- * pointer will be reset to wherever it was before this call. If this call
- * fails for some reason, the return value is 0 and the file pointer location
- * will be determined by the behaviour of fseek.
- */
-static int file_size( FILE* fp, size_t* size ) {
-    const long prev_pos = ftell( fp );
-
-    int err = fseek( fp, 0, SEEK_END );
-    if( err != 0 ) return SEGY_FSEEK_ERROR;
-
-    const size_t sz = ftell( fp );
-    err = fseek( fp, prev_pos, SEEK_SET );
-    if( err != 0 ) return SEGY_FSEEK_ERROR;
-
-    *size = sz;
-    return SEGY_OK;
-}
-
-/*
  * Return the number of traces in the file. The file pointer won't change after
  * this call unless fseek itself fails.
  *
@@ -879,6 +963,10 @@ int segy_crossline_indices( segy_file* fp,
 
 
 static int skip_traceheader( segy_file* fp ) {
+    if( fp->addr ) {
+        fp->cur = (char*)fp->cur + SEGY_TRACE_HEADER_SIZE;
+        return SEGY_OK;
+    }
     const int err = fseek( fp->fp, SEGY_TRACE_HEADER_SIZE, SEEK_CUR );
     if( err != 0 ) return SEGY_FSEEK_ERROR;
     return SEGY_OK;
@@ -896,6 +984,11 @@ int segy_readtrace( segy_file* fp,
     err = skip_traceheader( fp );
     if( err != 0 ) return err;
 
+    if( fp->addr ) {
+        memcpy( buf, fp->cur, trace_bsize );
+        return SEGY_OK;
+    }
+
     const size_t readc = fread( buf, 1, trace_bsize, fp->fp );
     if( readc != trace_bsize ) return SEGY_FREAD_ERROR;
 
@@ -916,6 +1009,11 @@ int segy_writetrace( segy_file* fp,
     err = skip_traceheader( fp );
     if( err != 0 ) return err;
 
+    if( fp->addr ) {
+        memcpy( fp->cur, buf, trace_bsize );
+        return SEGY_OK;
+    }
+
     const size_t writec = fwrite( buf, 1, trace_bsize, fp->fp );
     if( writec != trace_bsize )
         return SEGY_FWRITE_ERROR;
diff --git a/src/segyio/segy.h b/src/segyio/segy.h
index 9479f62..14b0e82 100644
--- a/src/segyio/segy.h
+++ b/src/segyio/segy.h
@@ -32,6 +32,7 @@ struct segy_file_handle;
 typedef struct segy_file_handle segy_file;
 
 segy_file* segy_open( const char* path, const char* mode );
+int segy_mmap( segy_file* );
 int segy_flush( segy_file*, bool async );
 int segy_close( segy_file* );
 
@@ -385,7 +386,9 @@ typedef enum {
     SEGY_MISSING_LINE_INDEX,
     SEGY_INVALID_OFFSETS,
     SEGY_TRACE_SIZE_MISMATCH,
-    SEGY_INVALID_ARGS
+    SEGY_INVALID_ARGS,
+    SEGY_MMAP_ERROR,
+    SEGY_MMAP_INVALID,
 } SEGY_ERROR;
 
 

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/debian-science/packages/segyio.git



More information about the debian-science-commits mailing list