[iortcw] 57/95: All: Rend2: Add VAO cache for static surfaces

Simon McVittie smcv at debian.org
Fri Sep 8 10:42:13 UTC 2017


This is an automated email from the git hooks/post-receive script.

smcv pushed a commit to tag 1.51
in repository iortcw.

commit 19fb63193f430b31d0cc0d711c60776d631eb735
Author: MAN-AT-ARMS <M4N4T4RMS at gmail.com>
Date:   Sun Apr 30 15:04:37 2017 -0400

    All: Rend2:  Add VAO cache for static surfaces
    
    Remove support for draw range elements, multi draw arrays, world vao creation, surface merging
---
 MP/code/rend2/qgl.h           |  10 -
 MP/code/rend2/tr_bsp.c        | 426 ------------------------------------------
 MP/code/rend2/tr_cmds.c       |   4 +-
 MP/code/rend2/tr_extensions.c |  12 --
 MP/code/rend2/tr_init.c       |   4 -
 MP/code/rend2/tr_local.h      |  40 ++--
 MP/code/rend2/tr_model.c      |   6 -
 MP/code/rend2/tr_shade.c      | 116 ++----------
 MP/code/rend2/tr_sky.c        |  18 +-
 MP/code/rend2/tr_surface.c    | 153 ++++++---------
 MP/code/rend2/tr_vbo.c        | 314 +++++++++++++++++++++++++++++++
 MP/code/rend2/tr_world.c      |  48 +----
 MP/code/renderer/qgl.h        |  10 -
 SP/code/rend2/qgl.h           |  10 -
 SP/code/rend2/tr_bsp.c        | 418 -----------------------------------------
 SP/code/rend2/tr_cmds.c       |   4 +-
 SP/code/rend2/tr_extensions.c |  12 --
 SP/code/rend2/tr_init.c       |   4 -
 SP/code/rend2/tr_local.h      |  39 +---
 SP/code/rend2/tr_model.c      |   6 -
 SP/code/rend2/tr_shade.c      | 115 ++----------
 SP/code/rend2/tr_sky.c        |  18 +-
 SP/code/rend2/tr_surface.c    | 153 ++++++---------
 SP/code/rend2/tr_vbo.c        | 314 +++++++++++++++++++++++++++++++
 SP/code/rend2/tr_world.c      |  46 +----
 SP/code/renderer/qgl.h        |  10 -
 26 files changed, 821 insertions(+), 1489 deletions(-)

diff --git a/MP/code/rend2/qgl.h b/MP/code/rend2/qgl.h
index 79eac8f..edec10d 100644
--- a/MP/code/rend2/qgl.h
+++ b/MP/code/rend2/qgl.h
@@ -462,19 +462,11 @@ extern void (APIENTRYP qglPNTrianglesfATI)(GLenum pname, GLfloat param);
 
 // GL function loader, based on https://gist.github.com/rygorous/16796a0c876cf8a5f542caddb55bce8a
 
-// OpenGL 1.2, was GL_EXT_draw_range_elements
-#define QGL_1_2_PROCS \
-	GLE(void, DrawRangeElements, GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices) \
-
 // OpenGL 1.3, was GL_ARB_texture_compression
 #define QGL_1_3_PROCS \
 	GLE(void, CompressedTexImage2D, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const void *data) \
 	GLE(void, CompressedTexSubImage2D, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void *data) \
 
-// OpenGL 1.4, was GL_EXT_multi_draw_arrays
-#define QGL_1_4_PROCS \
-	GLE(void, MultiDrawElements, GLenum mode, const GLsizei *count, GLenum type, const GLvoid* *indices, GLsizei primcount) \
-
 // OpenGL 1.5, was GL_ARB_vertex_buffer_object and GL_ARB_occlusion_query
 #define QGL_1_5_PROCS \
 	GLE(void, GenQueries, GLsizei n, GLuint *ids) \
@@ -723,9 +715,7 @@ extern void (APIENTRYP qglPNTrianglesfATI)(GLenum pname, GLfloat param);
 	GLE(GLvoid, NamedFramebufferRenderbufferEXT, GLuint framebuffer, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer) \
 
 #define GLE(ret, name, ...) typedef ret APIENTRY name##proc(__VA_ARGS__); extern name##proc * qgl##name;
-QGL_1_2_PROCS;
 QGL_1_3_PROCS;
-QGL_1_4_PROCS;
 QGL_1_5_PROCS;
 QGL_2_0_PROCS;
 QGL_EXT_framebuffer_object_PROCS;
diff --git a/MP/code/rend2/tr_bsp.c b/MP/code/rend2/tr_bsp.c
index f437db3..4f89711 100644
--- a/MP/code/rend2/tr_bsp.c
+++ b/MP/code/rend2/tr_bsp.c
@@ -1913,429 +1913,6 @@ void R_MovePatchSurfacesToHunk( void ) {
 }
 
 /*
-=================
-BSPSurfaceCompare
-compare function for qsort()
-=================
-*/
-static int BSPSurfaceCompare(const void *a, const void *b)
-{
-	msurface_t   *aa, *bb;
-
-	aa = *(msurface_t **) a;
-	bb = *(msurface_t **) b;
-
-	// shader first
-	if(aa->shader->sortedIndex < bb->shader->sortedIndex)
-		return -1;
-
-	else if(aa->shader->sortedIndex > bb->shader->sortedIndex)
-		return 1;
-
-	// by fogIndex
-	if(aa->fogIndex < bb->fogIndex)
-		return -1;
-
-	else if(aa->fogIndex > bb->fogIndex)
-		return 1;
-
-	// by cubemapIndex
-	if(aa->cubemapIndex < bb->cubemapIndex)
-		return -1;
-
-	else if(aa->cubemapIndex > bb->cubemapIndex)
-		return 1;
-
-	// by leaf
-	if (s_worldData.surfacesViewCount[aa - s_worldData.surfaces] < s_worldData.surfacesViewCount[bb - s_worldData.surfaces])
-		return -1;
-
-	else if (s_worldData.surfacesViewCount[aa - s_worldData.surfaces] > s_worldData.surfacesViewCount[bb - s_worldData.surfaces])
-		return 1;
-
-	// by surface number
-	if (aa < bb)
-		return -1;
-
-	else if (aa > bb)
-		return 1;
-
-
-	return 0;
-}
-
-
-static void CopyVert(const srfVert_t * in, srfVert_t * out)
-{
-	VectorCopy(in->xyz,      out->xyz);
-	VectorCopy4(in->tangent, out->tangent);
-	VectorCopy4(in->normal,   out->normal);
-	VectorCopy4(in->lightdir, out->lightdir);
-
-	VectorCopy2(in->st,       out->st);
-	VectorCopy2(in->lightmap, out->lightmap);
-
-	VectorCopy4(in->color,    out->color);
-}
-
-
-/*
-===============
-R_CreateWorldVaos
-===============
-*/
-static void R_CreateWorldVaos(void)
-{
-	int             i, j, k;
-
-	int             numVerts;
-	srfVert_t      *verts;
-
-	int             numIndexes;
-	glIndex_t      *indexes;
-
-    int             numSortedSurfaces, numSurfaces;
-	msurface_t   *surface, **firstSurf, **lastSurf, **currSurf;
-	msurface_t  **surfacesSorted;
-
-	vao_t *vao;
-
-	int maxVboSize = 4 * 1024 * 1024;
-
-	int             startTime, endTime;
-
-	startTime = ri.Milliseconds();
-
-	// mark surfaces with best matching leaf, using overlapping bounds
-	// using surfaceViewCount[] as leaf number, and surfacesDlightBits[] as coverage * 256
-	for (i = 0; i < s_worldData.numWorldSurfaces; i++)
-	{
-		s_worldData.surfacesViewCount[i] = -1;
-	}
-
-	for (i = 0; i < s_worldData.numWorldSurfaces; i++)
-	{
-		s_worldData.surfacesDlightBits[i] = 0;
-	}
-
-	for (i = s_worldData.numDecisionNodes; i < s_worldData.numnodes; i++)
-	{
-		mnode_t *leaf = s_worldData.nodes + i;
-
-		for (j = leaf->firstmarksurface; j < leaf->firstmarksurface + leaf->nummarksurfaces; j++)
-		{
-			int surfaceNum = s_worldData.marksurfaces[j];
-			msurface_t *surface = s_worldData.surfaces + surfaceNum;
-			float coverage = 1.0f;
-			int iCoverage;
-
-			for (k = 0; k < 3; k++)
-			{
-				float left, right;
-
-				if (leaf->mins[k] > surface->cullinfo.bounds[1][k] || surface->cullinfo.bounds[0][k] > leaf->maxs[k])
-				{
-					coverage = 0.0f;
-					break;
-				}
-
-				left  = MAX(leaf->mins[k], surface->cullinfo.bounds[0][k]);
-				right = MIN(leaf->maxs[k], surface->cullinfo.bounds[1][k]);
-
-				// nudge a bit in case this is an axis aligned wall
-				coverage *= right - left + 1.0f/256.0f;
-			}
-
-			iCoverage = coverage * 256;
-
-			if (iCoverage > s_worldData.surfacesDlightBits[surfaceNum])
-			{
-				s_worldData.surfacesDlightBits[surfaceNum] = iCoverage;
-				s_worldData.surfacesViewCount[surfaceNum] = i - s_worldData.numDecisionNodes;
-			}
-		}
-	}
-
-	for (i = 0; i < s_worldData.numWorldSurfaces; i++)
-	{
-		s_worldData.surfacesDlightBits[i] = 0;
-	}
-
-	// count surfaces
-	numSortedSurfaces = 0;
-	for(surface = s_worldData.surfaces; surface < s_worldData.surfaces + s_worldData.numWorldSurfaces; surface++)
-	{
-		srfBspSurface_t *bspSurf;
-		shader_t *shader = surface->shader;
-
-		if (shader->isPortal || shader->isSky || ShaderRequiresCPUDeforms(shader))
-			continue;
-
-		// check for this now so we can use srfBspSurface_t* universally in the rest of the function
-		if (!(*surface->data == SF_FACE || *surface->data == SF_GRID || *surface->data == SF_TRIANGLES))
-			continue;
-
-		bspSurf = (srfBspSurface_t *) surface->data;
-
-		if (!bspSurf->numIndexes || !bspSurf->numVerts)
-			continue;
-
-		numSortedSurfaces++;
-	}
-
-	// presort surfaces
-	surfacesSorted = ri.Z_Malloc(numSortedSurfaces * sizeof(*surfacesSorted));
-
-	j = 0;
-	for(surface = s_worldData.surfaces; surface < s_worldData.surfaces + s_worldData.numWorldSurfaces; surface++)
-	{
-		srfBspSurface_t *bspSurf;
-		shader_t *shader = surface->shader;
-
-		if (shader->isPortal || shader->isSky || ShaderRequiresCPUDeforms(shader))
-			continue;
-
-		// check for this now so we can use srfBspSurface_t* universally in the rest of the function
-		if (!(*surface->data == SF_FACE || *surface->data == SF_GRID || *surface->data == SF_TRIANGLES))
-			continue;
-
-		bspSurf = (srfBspSurface_t *) surface->data;
-
-		if (!bspSurf->numIndexes || !bspSurf->numVerts)
-			continue;
-
-		surfacesSorted[j++] = surface;
-	}
-
-	qsort(surfacesSorted, numSortedSurfaces, sizeof(*surfacesSorted), BSPSurfaceCompare);
-
-	k = 0;
-	for(firstSurf = lastSurf = surfacesSorted; firstSurf < surfacesSorted + numSortedSurfaces; firstSurf = lastSurf)
-	{
-		int currVboSize;
-
-		// Find range of surfaces to place in a VAO by:
-		// - Collecting a number of surfaces which fit under maxVboSize, or
-		// - All the surfaces with a single shader which go over maxVboSize
-		currVboSize = 0;
-		while (currVboSize < maxVboSize && lastSurf < surfacesSorted + numSortedSurfaces)
-		{
-			int addVboSize, currShaderIndex;
-
-			addVboSize = 0;
-			currShaderIndex = (*lastSurf)->shader->sortedIndex;
-
-			for(currSurf = lastSurf; currSurf < surfacesSorted + numSortedSurfaces && (*currSurf)->shader->sortedIndex == currShaderIndex; currSurf++)
-			{
-				srfBspSurface_t *bspSurf = (srfBspSurface_t *) (*currSurf)->data;
-
-				addVboSize += bspSurf->numVerts * sizeof(srfVert_t);
-			}
-
-			if (currVboSize != 0 && addVboSize + currVboSize > maxVboSize)
-				break;
-
-			lastSurf = currSurf;
-
-			currVboSize += addVboSize;
-		}
-
-		// count verts/indexes/surfaces
-		numVerts = 0;
-		numIndexes = 0;
-		numSurfaces = 0;
-		for (currSurf = firstSurf; currSurf < lastSurf; currSurf++)
-		{
-			srfBspSurface_t *bspSurf = (srfBspSurface_t *) (*currSurf)->data;
-
-			numVerts += bspSurf->numVerts;
-			numIndexes += bspSurf->numIndexes;
-			numSurfaces++;
-		}
-
-		ri.Printf(PRINT_ALL, "...calculating world VAO %d ( %i verts %i tris )\n", k, numVerts, numIndexes / 3);
-
-		// create arrays
-		verts = ri.Hunk_AllocateTempMemory(numVerts * sizeof(srfVert_t));
-		indexes = ri.Hunk_AllocateTempMemory(numIndexes * sizeof(glIndex_t));
-
-		// set up indices and copy vertices
-		numVerts = 0;
-		numIndexes = 0;
-		for (currSurf = firstSurf; currSurf < lastSurf; currSurf++)
-		{
-			srfBspSurface_t *bspSurf = (srfBspSurface_t *) (*currSurf)->data;
-			glIndex_t *surfIndex;
-
-			bspSurf->firstIndex = numIndexes;
-			bspSurf->minIndex = numVerts + bspSurf->indexes[0];
-			bspSurf->maxIndex = numVerts + bspSurf->indexes[0];
-
-			for(i = 0, surfIndex = bspSurf->indexes; i < bspSurf->numIndexes; i++, surfIndex++)
-			{
-				indexes[numIndexes++] = numVerts + *surfIndex;
-				bspSurf->minIndex = MIN(bspSurf->minIndex, numVerts + *surfIndex);
-				bspSurf->maxIndex = MAX(bspSurf->maxIndex, numVerts + *surfIndex);
-			}
-
-			bspSurf->firstVert = numVerts;
-
-			for(i = 0; i < bspSurf->numVerts; i++)
-			{
-				CopyVert(&bspSurf->verts[i], &verts[numVerts++]);
-			}
-		}
-
-		vao = R_CreateVao2(va("staticBspModel%i_VAO", k), numVerts, verts, numIndexes, indexes);
-
-		// point bsp surfaces to VAO
-		for (currSurf = firstSurf; currSurf < lastSurf; currSurf++)
-		{
-			srfBspSurface_t *bspSurf = (srfBspSurface_t *) (*currSurf)->data;
-
-			bspSurf->vao = vao;
-		}
-
-		ri.Hunk_FreeTempMemory(indexes);
-		ri.Hunk_FreeTempMemory(verts);
-
-		k++;
-	}
-
-	if (r_mergeLeafSurfaces->integer)
-	{
-		msurface_t *mergedSurf;
-
-		// count merged surfaces
-		int numMergedSurfaces = 0, numUnmergedSurfaces = 0;
-		for(firstSurf = lastSurf = surfacesSorted; firstSurf < surfacesSorted + numSortedSurfaces; firstSurf = lastSurf)
-		{
-			for (lastSurf++ ; lastSurf < surfacesSorted + numSortedSurfaces; lastSurf++)
-			{
-				int lastSurfLeafIndex, firstSurfLeafIndex;
-
-				if ((*lastSurf)->shader         != (*firstSurf)->shader
-				 || (*lastSurf)->fogIndex       != (*firstSurf)->fogIndex
-				 || (*lastSurf)->cubemapIndex   != (*firstSurf)->cubemapIndex)
-					break;
-
-				lastSurfLeafIndex  = s_worldData.surfacesViewCount[*lastSurf  - s_worldData.surfaces];
-				firstSurfLeafIndex = s_worldData.surfacesViewCount[*firstSurf - s_worldData.surfaces];
-
-				if (lastSurfLeafIndex != firstSurfLeafIndex)
-					break;
-			}
-
-			// don't merge single surfaces
-			if (firstSurf + 1 == lastSurf)
-			{
-				numUnmergedSurfaces++;
-				continue;
-			}
-
-			numMergedSurfaces++;
-		}
-
-		// Allocate merged surfaces
-		s_worldData.mergedSurfaces = ri.Hunk_Alloc(sizeof(*s_worldData.mergedSurfaces) * numMergedSurfaces, h_low);
-		s_worldData.mergedSurfacesViewCount = ri.Hunk_Alloc(sizeof(*s_worldData.mergedSurfacesViewCount) * numMergedSurfaces, h_low);
-		s_worldData.mergedSurfacesDlightBits = ri.Hunk_Alloc(sizeof(*s_worldData.mergedSurfacesDlightBits) * numMergedSurfaces, h_low);
-		s_worldData.mergedSurfacesPshadowBits = ri.Hunk_Alloc(sizeof(*s_worldData.mergedSurfacesPshadowBits) * numMergedSurfaces, h_low);
-		s_worldData.numMergedSurfaces = numMergedSurfaces;
-		
-		// view surfaces are like mark surfaces, except negative ones represent merged surfaces
-		// -1 represents 0, -2 represents 1, and so on
-		s_worldData.viewSurfaces = ri.Hunk_Alloc(sizeof(*s_worldData.viewSurfaces) * s_worldData.nummarksurfaces, h_low);
-
-		// actually merge surfaces
-		mergedSurf = s_worldData.mergedSurfaces;
-		for(firstSurf = lastSurf = surfacesSorted; firstSurf < surfacesSorted + numSortedSurfaces; firstSurf = lastSurf)
-		{
-			srfBspSurface_t *bspSurf, *vaoSurf;
-
-			for ( lastSurf++ ; lastSurf < surfacesSorted + numSortedSurfaces; lastSurf++)
-			{
-				int lastSurfLeafIndex, firstSurfLeafIndex;
-
-				if ((*lastSurf)->shader         != (*firstSurf)->shader
-				 || (*lastSurf)->fogIndex       != (*firstSurf)->fogIndex
-				 || (*lastSurf)->cubemapIndex   != (*firstSurf)->cubemapIndex)
-					break;
-
-				lastSurfLeafIndex  = s_worldData.surfacesViewCount[*lastSurf  - s_worldData.surfaces];
-				firstSurfLeafIndex = s_worldData.surfacesViewCount[*firstSurf - s_worldData.surfaces];
-
-				if (lastSurfLeafIndex != firstSurfLeafIndex)
-					break;
-			}
-
-			// don't merge single surfaces
-			if (firstSurf + 1 == lastSurf)
-				continue;
-
-			bspSurf = (srfBspSurface_t *)(*firstSurf)->data;
-
-			vaoSurf = ri.Hunk_Alloc(sizeof(*vaoSurf), h_low);
-			memset(vaoSurf, 0, sizeof(*vaoSurf));
-			vaoSurf->surfaceType = SF_VAO_MESH;
-
-			vaoSurf->vao = bspSurf->vao;
-
-			vaoSurf->firstIndex = bspSurf->firstIndex;
-			vaoSurf->minIndex = bspSurf->minIndex;
-			vaoSurf->maxIndex = bspSurf->maxIndex;
-
-			ClearBounds(vaoSurf->cullBounds[0], vaoSurf->cullBounds[1]);
-			for (currSurf = firstSurf; currSurf < lastSurf; currSurf++)
-			{
-				srfBspSurface_t *currBspSurf = (srfBspSurface_t *)(*currSurf)->data;
-
-				vaoSurf->numVerts   += currBspSurf->numVerts;
-				vaoSurf->numIndexes += currBspSurf->numIndexes;
-				vaoSurf->minIndex = MIN(vaoSurf->minIndex, currBspSurf->minIndex);
-				vaoSurf->maxIndex = MAX(vaoSurf->maxIndex, currBspSurf->maxIndex);
-				AddPointToBounds((*currSurf)->cullinfo.bounds[0], vaoSurf->cullBounds[0], vaoSurf->cullBounds[1]);
-				AddPointToBounds((*currSurf)->cullinfo.bounds[1], vaoSurf->cullBounds[0], vaoSurf->cullBounds[1]);
-			}
-
-			VectorCopy(vaoSurf->cullBounds[0], mergedSurf->cullinfo.bounds[0]);
-			VectorCopy(vaoSurf->cullBounds[1], mergedSurf->cullinfo.bounds[1]);
-
-			mergedSurf->cullinfo.type =  CULLINFO_BOX;
-			mergedSurf->data          =  (surfaceType_t *)vaoSurf;
-			mergedSurf->fogIndex      =  (*firstSurf)->fogIndex;
-			mergedSurf->cubemapIndex  =  (*firstSurf)->cubemapIndex;
-			mergedSurf->shader        =  (*firstSurf)->shader;
-
-			// change surfacesViewCount[] from leaf index to viewSurface index - 1 so we can redirect later
-			// subtracting 2 (viewSurface index - 1) to avoid collision with -1 (no leaf)
-			for (currSurf = firstSurf; currSurf < lastSurf; currSurf++)
-				s_worldData.surfacesViewCount[*currSurf - s_worldData.surfaces] = -((int)(mergedSurf - s_worldData.mergedSurfaces)) - 2;
-
-			mergedSurf++;
-		}
-
-		// direct viewSurfaces to merged and unmerged surfaces
-		for (i = 0; i < s_worldData.nummarksurfaces; i++)
-		{
-			int viewSurfaceIndex = s_worldData.surfacesViewCount[s_worldData.marksurfaces[i]] + 1;
-			s_worldData.viewSurfaces[i] = (viewSurfaceIndex < 0) ? viewSurfaceIndex : s_worldData.marksurfaces[i];
-		}
-
-		ri.Printf(PRINT_ALL, "Processed %d mergeable surfaces into %d merged, %d unmerged\n",
-			numSortedSurfaces, numMergedSurfaces, numUnmergedSurfaces);
-	}
-
-	for (i = 0; i < s_worldData.numWorldSurfaces; i++)
-		s_worldData.surfacesViewCount[i] = -1;
-
-	ri.Free(surfacesSorted);
-
-	endTime = ri.Milliseconds();
-	ri.Printf(PRINT_ALL, "world VAOs calculation time = %5.2f seconds\n", (endTime - startTime) / 1000.0);
-}
-
-/*
 ===============
 R_LoadSurfaces
 ===============
@@ -3798,9 +3375,6 @@ void RE_LoadWorldMap( const char *name ) {
 		}
 	}
 
-	// create static VAOS from the world
-	R_CreateWorldVaos();
-
 	s_worldData.dataSize = (byte *)ri.Hunk_Alloc( 0, h_low ) - startMarker;
 
 	// only set tr.world now that we know the entire level has loaded properly
diff --git a/MP/code/rend2/tr_cmds.c b/MP/code/rend2/tr_cmds.c
index e4b0b3f..720b388 100644
--- a/MP/code/rend2/tr_cmds.c
+++ b/MP/code/rend2/tr_cmds.c
@@ -73,8 +73,8 @@ void R_PerformanceCounters( void ) {
 	}
 	else if (r_speeds->integer == 7 )
 	{
-		ri.Printf( PRINT_ALL, "VAO draws: static %i dynamic %i\nMultidraws: %i merged %i\n",
-			backEnd.pc.c_staticVaoDraws, backEnd.pc.c_dynamicVaoDraws, backEnd.pc.c_multidraws, backEnd.pc.c_multidrawsMerged );
+		ri.Printf( PRINT_ALL, "VAO draws: static %i dynamic %i\n",
+			backEnd.pc.c_staticVaoDraws, backEnd.pc.c_dynamicVaoDraws);
 		ri.Printf( PRINT_ALL, "GLSL binds: %i  draws: gen %i light %i fog %i dlight %i\n",
 			backEnd.pc.c_glslShaderBinds, backEnd.pc.c_genericDraws, backEnd.pc.c_lightallDraws, backEnd.pc.c_fogDraws, backEnd.pc.c_dlightDraws);
 	}
diff --git a/MP/code/rend2/tr_extensions.c b/MP/code/rend2/tr_extensions.c
index f0b24b7..37bbe84 100644
--- a/MP/code/rend2/tr_extensions.c
+++ b/MP/code/rend2/tr_extensions.c
@@ -31,9 +31,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include "tr_dsa.h"
 
 #define GLE(ret, name, ...) name##proc * qgl##name;
-QGL_1_2_PROCS;
 QGL_1_3_PROCS;
-QGL_1_4_PROCS;
 QGL_1_5_PROCS;
 QGL_2_0_PROCS;
 QGL_EXT_framebuffer_object_PROCS;
@@ -67,19 +65,9 @@ void GLimp_InitExtraExtensions()
 	// GL function loader, based on https://gist.github.com/rygorous/16796a0c876cf8a5f542caddb55bce8a
 #define GLE(ret, name, ...) qgl##name = (name##proc *) SDL_GL_GetProcAddress("gl" #name);
 
-	// OpenGL 1.2, was GL_EXT_draw_range_elements
-	QGL_1_2_PROCS;
-	glRefConfig.drawRangeElements = !!r_ext_draw_range_elements->integer;
-	ri.Printf(PRINT_ALL, result[glRefConfig.drawRangeElements], "glDrawRangeElements()");
-
 	// OpenGL 1.3, was GL_ARB_texture_compression
 	QGL_1_3_PROCS;
 
-	// OpenGL 1.4, was GL_EXT_multi_draw_arrays
-	QGL_1_4_PROCS;
-	glRefConfig.multiDrawArrays = !!r_ext_multi_draw_arrays->integer;
-	ri.Printf(PRINT_ALL, result[glRefConfig.multiDrawArrays], "glMultiDrawElements()");
-
 	// OpenGL 1.5, was GL_ARB_vertex_buffer_object and GL_ARB_occlusion_query
 	QGL_1_5_PROCS;
 	glRefConfig.occlusionQuery = qtrue;
diff --git a/MP/code/rend2/tr_init.c b/MP/code/rend2/tr_init.c
index 2a3b40b..94b37a2 100644
--- a/MP/code/rend2/tr_init.c
+++ b/MP/code/rend2/tr_init.c
@@ -117,8 +117,6 @@ cvar_t  *r_ati_truform_pointmode;   // linear/cubic
 cvar_t  *r_ati_fsaa_samples;        //DAJ valids are 1, 2, 4
 //----(SA)	end
 
-cvar_t  *r_ext_draw_range_elements;
-cvar_t  *r_ext_multi_draw_arrays;
 cvar_t  *r_ext_framebuffer_object;
 cvar_t  *r_ext_texture_float;
 cvar_t  *r_ext_framebuffer_multisample;
@@ -1302,8 +1300,6 @@ void R_Register( void ) {
 
 	r_ext_texture_env_add = ri.Cvar_Get( "r_ext_texture_env_add", "1", CVAR_ARCHIVE | CVAR_LATCH );
 
-	r_ext_draw_range_elements = ri.Cvar_Get( "r_ext_draw_range_elements", "1", CVAR_ARCHIVE | CVAR_LATCH);
-	r_ext_multi_draw_arrays = ri.Cvar_Get( "r_ext_multi_draw_arrays", "1", CVAR_ARCHIVE | CVAR_LATCH);
 	r_ext_framebuffer_object = ri.Cvar_Get( "r_ext_framebuffer_object", "1", CVAR_ARCHIVE | CVAR_LATCH);
 	r_ext_texture_float = ri.Cvar_Get( "r_ext_texture_float", "1", CVAR_ARCHIVE | CVAR_LATCH);
 	r_ext_framebuffer_multisample = ri.Cvar_Get( "r_ext_framebuffer_multisample", "0", CVAR_ARCHIVE | CVAR_LATCH);
diff --git a/MP/code/rend2/tr_local.h b/MP/code/rend2/tr_local.h
index 18fb33d..f1e0eb1 100644
--- a/MP/code/rend2/tr_local.h
+++ b/MP/code/rend2/tr_local.h
@@ -1041,8 +1041,6 @@ typedef struct srfBspSurface_s
 	// BSP VBO offsets
 	int             firstVert;
 	int             firstIndex;
-	glIndex_t       minIndex;
-	glIndex_t       maxIndex;
 
 	// static render data
 	vao_t          *vao;
@@ -1117,8 +1115,6 @@ typedef struct srfVaoMdvMesh_s
 	// backEnd stats
 	int             numIndexes;
 	int             numVerts;
-	glIndex_t       minIndex;
-	glIndex_t       maxIndex;
 
 	// static render data
 	vao_t          *vao;
@@ -1247,15 +1243,8 @@ typedef struct {
 	int         *surfacesDlightBits;
 	int			*surfacesPshadowBits;
 
-	int			numMergedSurfaces;
-	msurface_t	*mergedSurfaces;
-	int         *mergedSurfacesViewCount;
-	int         *mergedSurfacesDlightBits;
-	int			*mergedSurfacesPshadowBits;
-
 	int nummarksurfaces;
 	int         *marksurfaces;
-	int         *viewSurfaces;
 
 	int numfogs;
 	fog_t       *fogs;
@@ -1506,8 +1495,6 @@ typedef struct {
 
 	qboolean    intelGraphics;
 
-	qboolean    drawRangeElements;
-	qboolean    multiDrawArrays;
 	qboolean	occlusionQuery;
 
 	int glslMajorVersion;
@@ -1545,9 +1532,6 @@ typedef struct {
 	int     c_staticVaoDraws;
 	int     c_dynamicVaoDraws;
 
-	int     c_multidraws;
-	int     c_multidrawsMerged;
-
 	int c_dlightVertexes;
 	int c_dlightIndexes;
 
@@ -1868,8 +1852,6 @@ extern cvar_t   *r_ext_texture_env_add;
 extern cvar_t   *r_ext_texture_filter_anisotropic;  //DAJ from EF
 extern cvar_t	*r_ext_max_anisotropy;
 
-extern  cvar_t  *r_ext_draw_range_elements;
-extern  cvar_t  *r_ext_multi_draw_arrays;
 extern  cvar_t  *r_ext_framebuffer_object;
 extern  cvar_t  *r_ext_texture_float;
 extern  cvar_t  *r_ext_framebuffer_multisample;
@@ -2214,8 +2196,6 @@ typedef struct stageVars
 	vec2_t texcoords[NUM_TEXTURE_BUNDLES][SHADER_MAX_VERTEXES];
 } stageVars_t;
 
-#define MAX_MULTIDRAW_PRIMITIVES	256
-
 typedef struct shaderCommands_s
 {
 	glIndex_t	indexes[SHADER_MAX_INDEXES] QALIGN(16);
@@ -2231,6 +2211,7 @@ typedef struct shaderCommands_s
 	void *attribPointers[ATTR_INDEX_COUNT];
 	vao_t       *vao;
 	qboolean    useInternalVao;
+	qboolean    useCacheVao;
 
 	stageVars_t	svars QALIGN(16);
 
@@ -2248,14 +2229,6 @@ typedef struct shaderCommands_s
 
 	int numIndexes;
 	int numVertexes;
-	glIndex_t   minIndex;
-	glIndex_t   maxIndex;
-
-	int         multiDrawPrimitives;
-	GLsizei     multiDrawNumIndexes[MAX_MULTIDRAW_PRIMITIVES];
-	glIndex_t  *multiDrawFirstIndex[MAX_MULTIDRAW_PRIMITIVES];
-	glIndex_t   multiDrawMinIndex[MAX_MULTIDRAW_PRIMITIVES];
-	glIndex_t   multiDrawMaxIndex[MAX_MULTIDRAW_PRIMITIVES];
 
 	// info extracted from current shader
 	int numPasses;
@@ -2270,7 +2243,7 @@ void RB_EndSurface( void );
 void RB_CheckOverflow( int verts, int indexes );
 #define RB_CHECKOVERFLOW( v,i ) if ( tess.numVertexes + ( v ) >= SHADER_MAX_VERTEXES || tess.numIndexes + ( i ) >= SHADER_MAX_INDEXES ) {RB_CheckOverflow( v,i );}
 
-void R_DrawElementsVao( int numIndexes, glIndex_t firstIndex, glIndex_t minIndex, glIndex_t maxIndex );
+void R_DrawElements( int numIndexes, glIndex_t firstIndex );
 void RB_StageIteratorGeneric( void );
 void RB_StageIteratorSky( void );
 void RB_StageIteratorVertexLitTexture( void );
@@ -2408,6 +2381,15 @@ void            R_VaoList_f(void);
 
 void            RB_UpdateTessVao(unsigned int attribBits);
 
+void VaoCache_Commit(void);
+void VaoCache_Init(void);
+void VaoCache_BindVao(void);
+void VaoCache_CheckAdd(qboolean *endSurface, qboolean *recycleVertexBuffer, qboolean *recycleIndexBuffer, int numVerts, int numIndexes);
+void VaoCache_RecycleVertexBuffer(void);
+void VaoCache_RecycleIndexBuffer(void);
+void VaoCache_InitQueue(void);
+void VaoCache_AddSurface(srfVert_t *verts, int numVerts, glIndex_t *indexes, int numIndexes);
+
 /*
 ============================================================
 
diff --git a/MP/code/rend2/tr_model.c b/MP/code/rend2/tr_model.c
index dac962d..3a93f44 100644
--- a/MP/code/rend2/tr_model.c
+++ b/MP/code/rend2/tr_model.c
@@ -1038,9 +1038,6 @@ static qboolean R_LoadMDC( model_t *mod, int lod, void *buffer, const char *modN
 			vaoSurf->numIndexes = surf->numIndexes;
 			vaoSurf->numVerts = surf->numVerts;
 			
-			vaoSurf->minIndex = 0;
-			vaoSurf->maxIndex = surf->numVerts - 1;
-
 			vaoSurf->vao = R_CreateVao(va("staticMD3Mesh_VAO '%s'", surf->name), data, dataSize, (byte *)surf->indexes, surf->numIndexes * sizeof(*surf->indexes), VAO_USAGE_STATIC);
 
 			vaoSurf->vao->attribs[ATTR_INDEX_POSITION].enabled = 1;
@@ -1521,9 +1518,6 @@ static qboolean R_LoadMD3(model_t * mod, int lod, void *buffer, const char *modN
 			vaoSurf->numIndexes = surf->numIndexes;
 			vaoSurf->numVerts = surf->numVerts;
 			
-			vaoSurf->minIndex = 0;
-			vaoSurf->maxIndex = surf->numVerts - 1;
-
 			vaoSurf->vao = R_CreateVao(va("staticMD3Mesh_VAO '%s'", surf->name), data, dataSize, (byte *)surf->indexes, surf->numIndexes * sizeof(*surf->indexes), VAO_USAGE_STATIC);
 
 			vaoSurf->vao->attribs[ATTR_INDEX_POSITION].enabled = 1;
diff --git a/MP/code/rend2/tr_shade.c b/MP/code/rend2/tr_shade.c
index fab4f16..6ce9c6c 100644
--- a/MP/code/rend2/tr_shade.c
+++ b/MP/code/rend2/tr_shade.c
@@ -47,42 +47,9 @@ R_DrawElements
 ==================
 */
 
-void R_DrawElementsVao( int numIndexes, glIndex_t firstIndex, glIndex_t minIndex, glIndex_t maxIndex )
+void R_DrawElements( int numIndexes, glIndex_t firstIndex)
 {
-	if (glRefConfig.drawRangeElements)
-		qglDrawRangeElements(GL_TRIANGLES, minIndex, maxIndex, numIndexes, GL_INDEX_TYPE, BUFFER_OFFSET(firstIndex * sizeof(glIndex_t)));
-	else
-		qglDrawElements(GL_TRIANGLES, numIndexes, GL_INDEX_TYPE, BUFFER_OFFSET(firstIndex * sizeof(glIndex_t)));
-	
-}
-
-
-static void R_DrawMultiElementsVao( int multiDrawPrimitives, glIndex_t *multiDrawMinIndex, glIndex_t *multiDrawMaxIndex,
-	GLsizei *multiDrawNumIndexes, glIndex_t **multiDrawFirstIndex)
-{
-	if (glRefConfig.multiDrawArrays && multiDrawPrimitives > 1)
-	{
-		qglMultiDrawElements(GL_TRIANGLES, multiDrawNumIndexes, GL_INDEX_TYPE, (const GLvoid **)multiDrawFirstIndex, multiDrawPrimitives);
-	}
-	else
-	{
-		int i;
-
-		if (glRefConfig.drawRangeElements)
-		{
-			for (i = 0; i < multiDrawPrimitives; i++)
-			{
-				qglDrawRangeElements(GL_TRIANGLES, multiDrawMinIndex[i], multiDrawMaxIndex[i], multiDrawNumIndexes[i], GL_INDEX_TYPE, multiDrawFirstIndex[i]);
-			}
-		}
-		else
-		{
-			for (i = 0; i < multiDrawPrimitives; i++)
-			{
-				qglDrawElements(GL_TRIANGLES, multiDrawNumIndexes[i], GL_INDEX_TYPE, multiDrawFirstIndex[i]);
-			}
-		}
-	}
+	qglDrawElements(GL_TRIANGLES, numIndexes, GL_INDEX_TYPE, BUFFER_OFFSET(firstIndex * sizeof(glIndex_t)));
 }
 
 
@@ -163,14 +130,7 @@ static void DrawTris (shaderCommands_t *input) {
 		VectorSet4(color, 1, 1, 1, 1);
 		GLSL_SetUniformVec4(sp, UNIFORM_COLOR, color);
 
-		if (input->multiDrawPrimitives)
-		{
-			R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex);
-		}
-		else
-		{
-			R_DrawElementsVao(input->numIndexes, input->firstIndex, input->minIndex, input->maxIndex);
-		}
+		R_DrawElements(input->numIndexes, input->firstIndex);
 	}
 
 	qglDepthRange( 0, 1 );
@@ -188,6 +148,7 @@ static void DrawNormals (shaderCommands_t *input) {
 	//FIXME: implement this
 }
 
+
 /*
 ==============
 RB_BeginSurface
@@ -204,7 +165,6 @@ void RB_BeginSurface( shader_t *shader, int fogNum, int cubemapIndex ) {
 	tess.numIndexes = 0;
 	tess.firstIndex = 0;
 	tess.numVertexes = 0;
-	tess.multiDrawPrimitives = 0;
 	tess.shader = state;
 	tess.fogNum = fogNum;
 	tess.cubemapIndex = cubemapIndex;
@@ -214,6 +174,7 @@ void RB_BeginSurface( shader_t *shader, int fogNum, int cubemapIndex ) {
 	tess.numPasses = state->numUnfoggedPasses;
 	tess.currentStageIteratorFunc = state->optimalStageIteratorFunc;
 	tess.useInternalVao = qtrue;
+	tess.useCacheVao = qfalse;
 
 	tess.shaderTime = backEnd.refdef.floatTime - tess.shader->timeOffset;
 	if (tess.shader->clampTime && tess.shaderTime >= tess.shader->clampTime) {
@@ -227,7 +188,6 @@ void RB_BeginSurface( shader_t *shader, int fogNum, int cubemapIndex ) {
 }
 
 
-
 extern float EvalWaveForm( const waveForm_t *wf );
 extern float EvalWaveFormClamped( const waveForm_t *wf );
 
@@ -458,15 +418,7 @@ static void ProjectDlightTexture( void ) {
 			GL_State( GLS_ATEST_GT_0 | GLS_SRCBLEND_DST_COLOR | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL );
 		}
 
-		if (tess.multiDrawPrimitives)
-		{
-			shaderCommands_t *input = &tess;
-			R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex);
-		}
-		else
-		{
-			R_DrawElementsVao(tess.numIndexes, tess.firstIndex, tess.minIndex, tess.maxIndex);
-		}
+		R_DrawElements(tess.numIndexes, tess.firstIndex);
 
 		backEnd.pc.c_totalIndexes += tess.numIndexes;
 		backEnd.pc.c_dlightIndexes += tess.numIndexes;
@@ -942,14 +894,7 @@ static void ForwardDlight( void ) {
 		// draw
 		//
 
-		if (input->multiDrawPrimitives)
-		{
-			R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex);
-		}
-		else
-		{
-			R_DrawElementsVao(input->numIndexes, input->firstIndex, input->minIndex, input->maxIndex);
-		}
+		R_DrawElements(input->numIndexes, input->firstIndex);
 
 		backEnd.pc.c_totalIndexes += tess.numIndexes;
 		backEnd.pc.c_dlightIndexes += tess.numIndexes;
@@ -1018,20 +963,14 @@ static void ProjectPshadowVBOGLSL( void ) {
 		// draw
 		//
 
-		if (input->multiDrawPrimitives)
-		{
-			R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex);
-		}
-		else
-		{
-			R_DrawElementsVao(input->numIndexes, input->firstIndex, input->minIndex, input->maxIndex);
-		}
+		R_DrawElements(input->numIndexes, input->firstIndex);
 
 		backEnd.pc.c_totalIndexes += tess.numIndexes;
 		//backEnd.pc.c_dlightIndexes += tess.numIndexes;
 	}
 }
 
+
 /*
 ===================
 RB_FogPass
@@ -1165,15 +1104,7 @@ static void RB_FogPass( int wolfFog ) {
 		GL_State( GLS_SRCBLEND_SRC_ALPHA | GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA );
 	}
 
-	if (tess.multiDrawPrimitives)
-	{
-		shaderCommands_t *input = &tess;
-		R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex);
-	}
-	else
-	{
-		R_DrawElementsVao(tess.numIndexes, tess.firstIndex, tess.minIndex, tess.maxIndex);
-	}
+	R_DrawElements(tess.numIndexes, tess.firstIndex);
 }
 
 
@@ -1194,6 +1125,7 @@ static unsigned int RB_CalcShaderVertexAttribs( shaderCommands_t *input )
 	return vertexAttribs;
 }
 
+
 static void RB_IterateStagesGeneric( shaderCommands_t *input )
 {
 	int stage;
@@ -1667,14 +1599,7 @@ static void RB_IterateStagesGeneric( shaderCommands_t *input )
 		//
 		// draw
 		//
-		if (input->multiDrawPrimitives)
-		{
-			R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex);
-		}
-		else
-		{
-			R_DrawElementsVao(input->numIndexes, input->firstIndex, input->minIndex, input->maxIndex);
-		}
+		R_DrawElements(input->numIndexes, input->firstIndex);
 
 		// allow skipping out to show just lightmaps during development
 		if ( r_lightmap->integer && ( pStage->bundle[0].isLightmap || pStage->bundle[1].isLightmap ) )
@@ -1748,20 +1673,12 @@ static void RB_RenderShadowmap( shaderCommands_t *input )
 			// draw
 			//
 
-			if (input->multiDrawPrimitives)
-			{
-				R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex);
-			}
-			else
-			{
-				R_DrawElementsVao(input->numIndexes, input->firstIndex, input->minIndex, input->maxIndex);
-			}
+			R_DrawElements(input->numIndexes, input->firstIndex);
 		}
 	}
 }
 
 
-
 /*
 ** RB_StageIteratorGeneric
 */
@@ -1983,6 +1900,12 @@ void RB_EndSurface( void ) {
 		return;
 	}
 
+	if (tess.useCacheVao)
+	{
+		// upload indexes now
+		VaoCache_Commit();
+	}
+
 	//
 	// update performance counters
 	//
@@ -2009,7 +1932,6 @@ void RB_EndSurface( void ) {
 	tess.numIndexes = 0;
 	tess.numVertexes = 0;
 	tess.firstIndex = 0;
-	tess.multiDrawPrimitives = 0;
 
 	GLimp_LogComment( "----------\n" );
 }
diff --git a/MP/code/rend2/tr_sky.c b/MP/code/rend2/tr_sky.c
index 10dca6c..c4972d0 100644
--- a/MP/code/rend2/tr_sky.c
+++ b/MP/code/rend2/tr_sky.c
@@ -380,8 +380,6 @@ static void DrawSkySide( struct image_s *image, const int mins[2], const int max
 	int s, t;
 	int firstVertex = tess.numVertexes;
 	//int firstIndex = tess.numIndexes;
-	int minIndex = tess.minIndex;
-	int maxIndex = tess.maxIndex;
 	vec4_t color;
 
 	//tess.numVertexes = 0;
@@ -431,9 +429,6 @@ static void DrawSkySide( struct image_s *image, const int mins[2], const int max
 		}
 	}
 
-	tess.minIndex = firstVertex;
-	tess.maxIndex = tess.numVertexes;
-
 	// FIXME: A lot of this can probably be removed for speed, and refactored into a more convenient function
 	RB_UpdateTessVao(ATTR_POSITION | ATTR_TEXCOORD);
 /*
@@ -478,8 +473,6 @@ static void DrawSkySide( struct image_s *image, const int mins[2], const int max
 		GLSL_SetUniformVec4(sp, UNIFORM_DIFFUSETEXOFFTURB, vector);
 	}
 
-	R_DrawElementsVao(tess.numIndexes - tess.firstIndex, tess.firstIndex, tess.minIndex, tess.maxIndex);
-
 	//qglDrawElements(GL_TRIANGLES, tess.numIndexes - tess.firstIndex, GL_INDEX_TYPE, BUFFER_OFFSET(tess.firstIndex * sizeof(glIndex_t)));
 	
 	//R_BindNullVBO();
@@ -488,8 +481,6 @@ static void DrawSkySide( struct image_s *image, const int mins[2], const int max
 	tess.numIndexes = tess.firstIndex;
 	tess.numVertexes = firstVertex;
 	tess.firstIndex = 0;
-	tess.minIndex = minIndex;
-	tess.maxIndex = maxIndex;
 }
 
 static void DrawSkySideInner( struct image_s *image, const int mins[2], const int maxs[2] )
@@ -497,8 +488,6 @@ static void DrawSkySideInner( struct image_s *image, const int mins[2], const in
 	int s, t;
 	int firstVertex = tess.numVertexes;
 	//int firstIndex = tess.numIndexes;
-	int minIndex = tess.minIndex;
-	int maxIndex = tess.maxIndex;
 	vec4_t color;
 
 	//tess.numVertexes = 0;
@@ -548,9 +537,6 @@ static void DrawSkySideInner( struct image_s *image, const int mins[2], const in
 		}
 	}
 
-	tess.minIndex = firstVertex;
-	tess.maxIndex = tess.numVertexes;
-
 	// FIXME: A lot of this can probably be removed for speed, and refactored into a more convenient function
 	RB_UpdateTessVao(ATTR_POSITION | ATTR_TEXCOORD);
 /*
@@ -595,7 +581,7 @@ static void DrawSkySideInner( struct image_s *image, const int mins[2], const in
 		GLSL_SetUniformVec4(sp, UNIFORM_DIFFUSETEXOFFTURB, vector);
 	}
 
-	R_DrawElementsVao(tess.numIndexes - tess.firstIndex, tess.firstIndex, tess.minIndex, tess.maxIndex);
+	R_DrawElements(tess.numIndexes - tess.firstIndex, tess.firstIndex);
 
 	//qglDrawElements(GL_TRIANGLES, tess.numIndexes - tess.firstIndex, GL_INDEX_TYPE, BUFFER_OFFSET(tess.firstIndex * sizeof(glIndex_t)));
 	
@@ -605,8 +591,6 @@ static void DrawSkySideInner( struct image_s *image, const int mins[2], const in
 	tess.numIndexes = tess.firstIndex;
 	tess.numVertexes = firstVertex;
 	tess.firstIndex = 0;
-	tess.minIndex = minIndex;
-	tess.maxIndex = maxIndex;
 }
 
 static void DrawSkyBox( shader_t *shader ) {
diff --git a/MP/code/rend2/tr_surface.c b/MP/code/rend2/tr_surface.c
index b652745..7b79e27 100644
--- a/MP/code/rend2/tr_surface.c
+++ b/MP/code/rend2/tr_surface.c
@@ -75,7 +75,7 @@ void RB_CheckOverflow( int verts, int indexes ) {
 
 void RB_CheckVao(vao_t *vao)
 {
-	if (vao != glState.currentVao || tess.multiDrawPrimitives >= MAX_MULTIDRAW_PRIMITIVES)
+	if (vao != glState.currentVao)
 	{
 		RB_EndSurface();
 		RB_BeginSurface( tess.shader, tess.fogNum, tess.cubemapIndex );
@@ -213,18 +213,14 @@ void RB_InstantQuad2(vec4_t quadVerts[4], vec2_t texCoords[4])
 	tess.indexes[tess.numIndexes++] = 0;
 	tess.indexes[tess.numIndexes++] = 2;
 	tess.indexes[tess.numIndexes++] = 3;
-	tess.minIndex = 0;
-	tess.maxIndex = 3;
 
 	RB_UpdateTessVao(ATTR_POSITION | ATTR_TEXCOORD);
 
-	R_DrawElementsVao(tess.numIndexes, tess.firstIndex, tess.minIndex, tess.maxIndex);
+	R_DrawElements(tess.numIndexes, tess.firstIndex);
 
 	tess.numIndexes = 0;
 	tess.numVertexes = 0;
 	tess.firstIndex = 0;
-	tess.minIndex = 0;
-	tess.maxIndex = 0;
 }
 
 void RB_InstantQuad(vec4_t quadVerts[4])
@@ -438,118 +434,88 @@ static void RB_SurfaceVertsAndIndexes( int numVerts, srfVert_t *verts, int numIn
 	tess.numVertexes += numVerts;
 }
 
-static qboolean RB_SurfaceVao(vao_t *vao, int numVerts, int numIndexes, int firstIndex, int minIndex, int maxIndex, int dlightBits, int pshadowBits, qboolean shaderCheck)
+static qboolean RB_SurfaceVaoCached(int numVerts, srfVert_t *verts, int numIndexes, glIndex_t *indexes, int dlightBits, int pshadowBits)
 {
-	int i, mergeForward, mergeBack;
-	GLvoid *firstIndexOffset, *lastIndexOffset;
+	qboolean recycleVertexBuffer = qfalse;
+	qboolean recycleIndexBuffer = qfalse;
+	qboolean endSurface = qfalse;
 
-	if (!vao)
-	{
+	if (!(!ShaderRequiresCPUDeforms(tess.shader) && !tess.shader->isSky && !tess.shader->isPortal))
 		return qfalse;
-	}
 
-	if (shaderCheck && !(!ShaderRequiresCPUDeforms(tess.shader) && !tess.shader->isSky && !tess.shader->isPortal))
-	{
+	if (!numIndexes || !numVerts)
 		return qfalse;
-	}
 
-	RB_CheckVao(vao);
+	VaoCache_BindVao();
 
 	tess.dlightBits |= dlightBits;
 	tess.pshadowBits |= pshadowBits;
 
-	// merge this into any existing multidraw primitives
-	mergeForward = -1;
-	mergeBack = -1;
-	firstIndexOffset = BUFFER_OFFSET(firstIndex * sizeof(glIndex_t));
-	lastIndexOffset  = BUFFER_OFFSET((firstIndex + numIndexes) * sizeof(glIndex_t));
+	VaoCache_CheckAdd(&endSurface, &recycleVertexBuffer, &recycleIndexBuffer, numVerts, numIndexes);
 
-	if (tess.multiDrawPrimitives && r_mergeMultidraws->integer)
+	if (endSurface)
 	{
-		i = 0;
+		RB_EndSurface();
+		RB_BeginSurface(tess.shader, tess.fogNum, tess.cubemapIndex);
+	}
 
-		if (r_mergeMultidraws->integer == 1)
-		{
-			// lazy merge, only check the last primitive
-			i = tess.multiDrawPrimitives - 1;
-		}
+	if (recycleVertexBuffer)
+		VaoCache_RecycleVertexBuffer();
 
-		for (; i < tess.multiDrawPrimitives; i++)
-		{
-			if (firstIndexOffset == tess.multiDrawFirstIndex[i] + tess.multiDrawNumIndexes[i])
-			{
-				mergeBack = i;
+	if (recycleIndexBuffer)
+		VaoCache_RecycleIndexBuffer();
 
-				if (mergeForward != -1)
-					break;
-			}
+	if (!tess.numVertexes)
+		VaoCache_InitQueue();
 
-			if (lastIndexOffset == tess.multiDrawFirstIndex[i])
-			{
-				mergeForward = i;
+	VaoCache_AddSurface(verts, numVerts, indexes, numIndexes);
 
-				if (mergeBack != -1)
-					break;
-			}
-		}
-	}
+	tess.numIndexes += numIndexes;
+	tess.numVertexes += numVerts;
+	tess.useInternalVao = qfalse;
+	tess.useCacheVao = qtrue;
 
-	if (mergeBack != -1 && mergeForward == -1)
-	{
-		tess.multiDrawNumIndexes[mergeBack] += numIndexes;
-		tess.multiDrawMinIndex[mergeBack] = MIN(tess.multiDrawMinIndex[mergeBack], minIndex);
-		tess.multiDrawMaxIndex[mergeBack] = MAX(tess.multiDrawMaxIndex[mergeBack], maxIndex);
-		backEnd.pc.c_multidrawsMerged++;
-	}
-	else if (mergeBack == -1 && mergeForward != -1)
+	return qtrue;
+}
+
+
+static qboolean RB_SurfaceVao(vao_t *vao, int numVerts, int numIndexes, int firstIndex, int dlightBits, int pshadowBits, qboolean shaderCheck)
+{
+	if (!vao)
 	{
-		tess.multiDrawNumIndexes[mergeForward] += numIndexes;
-		tess.multiDrawFirstIndex[mergeForward]  = firstIndexOffset;
-		tess.multiDrawMinIndex[mergeForward] = MIN(tess.multiDrawMinIndex[mergeForward], minIndex);
-		tess.multiDrawMaxIndex[mergeForward] = MAX(tess.multiDrawMaxIndex[mergeForward], maxIndex);
-		backEnd.pc.c_multidrawsMerged++;
+		return qfalse;
 	}
-	else if (mergeBack != -1 && mergeForward != -1)
-	{
-		tess.multiDrawNumIndexes[mergeBack] += numIndexes + tess.multiDrawNumIndexes[mergeForward];
-		tess.multiDrawMinIndex[mergeBack] = MIN(tess.multiDrawMinIndex[mergeBack], MIN(tess.multiDrawMinIndex[mergeForward], minIndex));
-		tess.multiDrawMaxIndex[mergeBack] = MAX(tess.multiDrawMaxIndex[mergeBack], MAX(tess.multiDrawMaxIndex[mergeForward], maxIndex));
-		tess.multiDrawPrimitives--;
 
-		if (mergeForward != tess.multiDrawPrimitives)
-		{
-			tess.multiDrawNumIndexes[mergeForward] = tess.multiDrawNumIndexes[tess.multiDrawPrimitives];
-			tess.multiDrawFirstIndex[mergeForward] = tess.multiDrawFirstIndex[tess.multiDrawPrimitives];
-			tess.multiDrawMinIndex[mergeForward] = tess.multiDrawMinIndex[tess.multiDrawPrimitives];
-			tess.multiDrawMaxIndex[mergeForward] = tess.multiDrawMaxIndex[tess.multiDrawPrimitives];
-		}
-		backEnd.pc.c_multidrawsMerged += 2;
-	}
-	else //if (mergeBack == -1 && mergeForward == -1)
+	if (shaderCheck && !(!ShaderRequiresCPUDeforms(tess.shader) && !tess.shader->isSky && !tess.shader->isPortal))
 	{
-		tess.multiDrawNumIndexes[tess.multiDrawPrimitives] = numIndexes;
-		tess.multiDrawFirstIndex[tess.multiDrawPrimitives] = firstIndexOffset;
-		tess.multiDrawMinIndex[tess.multiDrawPrimitives] = minIndex;
-		tess.multiDrawMaxIndex[tess.multiDrawPrimitives] = maxIndex;
-		tess.multiDrawPrimitives++;
+		return qfalse;
 	}
 
-	backEnd.pc.c_multidraws++;
+	RB_CheckVao(vao);
 
-	tess.numIndexes  += numIndexes;
-	tess.numVertexes += numVerts;
+	tess.dlightBits |= dlightBits;
+	tess.pshadowBits |= pshadowBits;
+
+	RB_EndSurface();
+	RB_BeginSurface(tess.shader, tess.fogNum, tess.cubemapIndex);
+
+	backEnd.pc.c_staticVaoDraws++;
+
+	tess.numIndexes = numIndexes;
+	tess.numVertexes = numVerts;
 
 	return qtrue;
 }
 
+
 /*
 =============
 RB_SurfaceTriangles
 =============
 */
 static void RB_SurfaceTriangles( srfBspSurface_t *srf ) {
-	if( RB_SurfaceVao (srf->vao, srf->numVerts, srf->numIndexes,
-				srf->firstIndex, srf->minIndex, srf->maxIndex, srf->dlightBits, srf->pshadowBits, qtrue ) )
+	if (RB_SurfaceVaoCached(srf->numVerts, srf->verts, srf->numIndexes,
+		srf->indexes, srf->dlightBits, srf->pshadowBits))
 	{
 		return;
 	}
@@ -612,8 +578,6 @@ static void RB_SurfaceBeam( void ) {
 	tess.numVertexes = 0;
 	tess.numIndexes = 0;
 	tess.firstIndex = 0;
-	tess.minIndex = 0;
-	tess.maxIndex = 0;
 
 	for ( i = 0; i <= NUM_BEAM_SEGS; i++ ) {
 		VectorCopy(start_points[ i % NUM_BEAM_SEGS ], tess.xyz[tess.numVertexes++]);
@@ -630,9 +594,6 @@ static void RB_SurfaceBeam( void ) {
 		tess.indexes[tess.numIndexes++] = 1  + (i + 1) * 2;
 	}
 
-	tess.minIndex = 0;
-	tess.maxIndex = tess.numVertexes;
-
 	// FIXME: A lot of this can probably be removed for speed, and refactored into a more convenient function
 	RB_UpdateTessVao(ATTR_POSITION);
 	
@@ -642,13 +603,11 @@ static void RB_SurfaceBeam( void ) {
 
 	GLSL_SetUniformVec4(sp, UNIFORM_COLOR, colorRed);
 
-	R_DrawElementsVao(tess.numIndexes, tess.firstIndex, tess.minIndex, tess.maxIndex);
+	R_DrawElements(tess.numIndexes, tess.firstIndex);
 
 	tess.numIndexes = 0;
 	tess.numVertexes = 0;
 	tess.firstIndex = 0;
-	tess.minIndex = 0;
-	tess.maxIndex = 0;
 }
 
 //================================================================================
@@ -993,8 +952,8 @@ RB_SurfaceFace
 ==============
 */
 static void RB_SurfaceFace( srfBspSurface_t *srf ) {
-	if( RB_SurfaceVao (srf->vao, srf->numVerts, srf->numIndexes,
-					srf->firstIndex, srf->minIndex, srf->maxIndex, srf->dlightBits, srf->pshadowBits, qtrue ) )
+	if (RB_SurfaceVaoCached(srf->numVerts, srf->verts, srf->numIndexes,
+		srf->indexes, srf->dlightBits, srf->pshadowBits))
 	{
 		return;
 	}
@@ -1061,8 +1020,8 @@ static void RB_SurfaceGrid( srfBspSurface_t *srf ) {
 	int     pshadowBits;
 	//int		*vDlightBits;
 
-	if( RB_SurfaceVao (srf->vao, srf->numVerts, srf->numIndexes,
-					srf->firstIndex, srf->minIndex, srf->maxIndex, srf->dlightBits, srf->pshadowBits, qtrue ) )
+	if (RB_SurfaceVaoCached(srf->numVerts, srf->verts, srf->numIndexes,
+		srf->indexes, srf->dlightBits, srf->pshadowBits))
 	{
 		return;
 	}
@@ -1314,7 +1273,7 @@ static void RB_SurfaceFlare( srfFlare_t *surf ) {
 static void RB_SurfaceVaoMesh(srfBspSurface_t * srf)
 {
 	RB_SurfaceVao (srf->vao, srf->numVerts, srf->numIndexes, srf->firstIndex,
-			srf->minIndex, srf->maxIndex, srf->dlightBits, srf->pshadowBits, qfalse );
+			srf->dlightBits, srf->pshadowBits, qfalse );
 }
 
 void RB_SurfaceVaoMdvMesh(srfVaoMdvMesh_t * surface)
@@ -1344,8 +1303,6 @@ void RB_SurfaceVaoMdvMesh(srfVaoMdvMesh_t * surface)
 
 	tess.numIndexes = surface->numIndexes;
 	tess.numVertexes = surface->numVerts;
-	tess.minIndex = surface->minIndex;
-	tess.maxIndex = surface->maxIndex;
 
 	//mdvModel = surface->mdvModel;
 	//mdvSurface = surface->mdvSurface;
diff --git a/MP/code/rend2/tr_vbo.c b/MP/code/rend2/tr_vbo.c
index 0ac63a4..e4d0ca3 100644
--- a/MP/code/rend2/tr_vbo.c
+++ b/MP/code/rend2/tr_vbo.c
@@ -499,6 +499,8 @@ void R_InitVaos(void)
 
 	R_BindNullVao();
 
+	VaoCache_Init();
+
 	GL_CheckErrors();
 }
 
@@ -650,3 +652,315 @@ void RB_UpdateTessVao(unsigned int attribBits)
 		qglBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, tess.numIndexes * sizeof(tess.indexes[0]), tess.indexes);
 	}
 }
+
+// FIXME: This sets a limit of 65536 verts/262144 indexes per static surface
+// This is higher than the old vq3 limits but is worth noting
+#define VAOCACHE_QUEUE_MAX_SURFACES (1 << 10)
+#define VAOCACHE_QUEUE_MAX_VERTEXES (1 << 16)
+#define VAOCACHE_QUEUE_MAX_INDEXES (VAOCACHE_QUEUE_MAX_VERTEXES * 4)
+
+typedef struct queuedSurface_s
+{
+	srfVert_t *vertexes;
+	int numVerts;
+	glIndex_t *indexes;
+	int numIndexes;
+}
+queuedSurface_t;
+
+static struct
+{
+	queuedSurface_t surfaces[VAOCACHE_QUEUE_MAX_SURFACES];
+	int numSurfaces;
+
+	srfVert_t vertexes[VAOCACHE_QUEUE_MAX_VERTEXES];
+	int vertexCommitSize;
+
+	glIndex_t indexes[VAOCACHE_QUEUE_MAX_INDEXES];
+	int indexCommitSize;
+}
+vcq;
+
+#define VAOCACHE_MAX_SURFACES (1 << 16)
+#define VAOCACHE_MAX_BATCHES (1 << 10)
+
+// srfVert_t is 60 bytes
+// assuming each vert is referenced 4 times, need 16 bytes (4 glIndex_t) per vert
+// -> need about 4/15ths the space for indexes as vertexes
+#define VAOCACHE_VERTEX_BUFFER_SIZE (16 * 1024 * 1024)
+#define VAOCACHE_INDEX_BUFFER_SIZE (5 * 1024 * 1024)
+
+typedef struct buffered_s
+{
+	void *data;
+	int size;
+	int bufferOffset;
+}
+buffered_t;
+
+static struct
+{
+	vao_t *vao;
+	buffered_t surfaceIndexSets[VAOCACHE_MAX_SURFACES];
+	int numSurfaces;
+
+	int batchLengths[VAOCACHE_MAX_BATCHES];
+	int numBatches;
+
+	int vertexOffset;
+	int indexOffset;
+}
+vc;
+
+void VaoCache_Commit(void)
+{
+	buffered_t *indexSet;
+	int *batchLength;
+	queuedSurface_t *surf, *end = vcq.surfaces + vcq.numSurfaces;
+
+	R_BindVao(vc.vao);
+
+	// Search for a matching batch
+	// FIXME: Use faster search
+	indexSet = vc.surfaceIndexSets;
+	batchLength = vc.batchLengths;
+	for (; batchLength < vc.batchLengths + vc.numBatches; batchLength++)
+	{
+		if (*batchLength == vcq.numSurfaces)
+		{
+			buffered_t *indexSet2 = indexSet;
+			for (surf = vcq.surfaces; surf < end; surf++, indexSet2++)
+			{
+				if (surf->indexes != indexSet2->data || (surf->numIndexes * sizeof(glIndex_t)) != indexSet2->size)
+					break;
+			}
+
+			if (surf == end)
+				break;
+		}
+
+		indexSet += *batchLength;
+	}
+
+	// If found, use it
+	if (indexSet < vc.surfaceIndexSets + vc.numSurfaces)
+	{
+		tess.firstIndex = indexSet->bufferOffset / sizeof(glIndex_t);
+		//ri.Printf(PRINT_ALL, "firstIndex %d numIndexes %d as %d\n", tess.firstIndex, tess.numIndexes, batchLength - vc.batchLengths);
+		//ri.Printf(PRINT_ALL, "vc.numSurfaces %d vc.numBatches %d\n", vc.numSurfaces, vc.numBatches);
+	}
+	// If not, rebuffer the batch
+	// FIXME: keep track of the vertexes so we don't have to reupload them every time
+	else
+	{
+		srfVert_t *dstVertex = vcq.vertexes;
+		glIndex_t *dstIndex = vcq.indexes;
+
+		batchLength = vc.batchLengths + vc.numBatches;
+		*batchLength = vcq.numSurfaces;
+		vc.numBatches++;
+
+		tess.firstIndex = vc.indexOffset / sizeof(glIndex_t);
+		vcq.vertexCommitSize = 0;
+		vcq.indexCommitSize = 0;
+		for (surf = vcq.surfaces; surf < end; surf++)
+		{
+			glIndex_t *srcIndex = surf->indexes;
+			int vertexesSize = surf->numVerts * sizeof(srfVert_t);
+			int indexesSize = surf->numIndexes * sizeof(glIndex_t);
+			int i, indexOffset = (vc.vertexOffset + vcq.vertexCommitSize) / sizeof(srfVert_t);
+
+			Com_Memcpy(dstVertex, surf->vertexes, vertexesSize);
+			dstVertex += surf->numVerts;
+
+			vcq.vertexCommitSize += vertexesSize;
+
+			indexSet = vc.surfaceIndexSets + vc.numSurfaces;
+			indexSet->data = surf->indexes;
+			indexSet->size = indexesSize;
+			indexSet->bufferOffset = vc.indexOffset + vcq.indexCommitSize;
+			vc.numSurfaces++;
+
+			for (i = 0; i < surf->numIndexes; i++)
+				*dstIndex++ = *srcIndex++ + indexOffset;
+
+			vcq.indexCommitSize += indexesSize;
+		}
+
+		//ri.Printf(PRINT_ALL, "committing %d to %d, %d to %d as %d\n", vcq.vertexCommitSize, vc.vertexOffset, vcq.indexCommitSize, vc.indexOffset, batchLength - vc.batchLengths);
+
+		if (vcq.vertexCommitSize)
+		{
+			qglBindBuffer(GL_ARRAY_BUFFER, vc.vao->vertexesVBO);
+			qglBufferSubData(GL_ARRAY_BUFFER, vc.vertexOffset, vcq.vertexCommitSize, vcq.vertexes);
+			vc.vertexOffset += vcq.vertexCommitSize;
+		}
+
+		if (vcq.indexCommitSize)
+		{
+			qglBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vc.vao->indexesIBO);
+			qglBufferSubData(GL_ELEMENT_ARRAY_BUFFER, vc.indexOffset, vcq.indexCommitSize, vcq.indexes);
+			vc.indexOffset += vcq.indexCommitSize;
+		}
+	}
+}
+
+void VaoCache_Init(void)
+{
+	srfVert_t vert;
+	int dataSize;
+
+	vc.vao = R_CreateVao("VaoCache", NULL, VAOCACHE_VERTEX_BUFFER_SIZE, NULL, VAOCACHE_INDEX_BUFFER_SIZE, VAO_USAGE_DYNAMIC);
+
+	vc.vao->attribs[ATTR_INDEX_POSITION].enabled       = 1;
+	vc.vao->attribs[ATTR_INDEX_TEXCOORD].enabled       = 1;
+	vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].enabled     = 1;
+	vc.vao->attribs[ATTR_INDEX_NORMAL].enabled         = 1;
+	vc.vao->attribs[ATTR_INDEX_TANGENT].enabled        = 1;
+	vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].enabled = 1;
+	vc.vao->attribs[ATTR_INDEX_COLOR].enabled          = 1;
+
+	vc.vao->attribs[ATTR_INDEX_POSITION].count       = 3;
+	vc.vao->attribs[ATTR_INDEX_TEXCOORD].count       = 2;
+	vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].count     = 2;
+	vc.vao->attribs[ATTR_INDEX_NORMAL].count         = 4;
+	vc.vao->attribs[ATTR_INDEX_TANGENT].count        = 4;
+	vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].count = 4;
+	vc.vao->attribs[ATTR_INDEX_COLOR].count          = 4;
+
+	vc.vao->attribs[ATTR_INDEX_POSITION].type             = GL_FLOAT;
+	vc.vao->attribs[ATTR_INDEX_TEXCOORD].type             = GL_FLOAT;
+	vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].type           = GL_FLOAT;
+	vc.vao->attribs[ATTR_INDEX_NORMAL].type               = GL_SHORT;
+	vc.vao->attribs[ATTR_INDEX_TANGENT].type              = GL_SHORT;
+	vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].type       = GL_SHORT;
+	vc.vao->attribs[ATTR_INDEX_COLOR].type                = GL_UNSIGNED_SHORT;
+
+	vc.vao->attribs[ATTR_INDEX_POSITION].normalized       = GL_FALSE;
+	vc.vao->attribs[ATTR_INDEX_TEXCOORD].normalized       = GL_FALSE;
+	vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].normalized     = GL_FALSE;
+	vc.vao->attribs[ATTR_INDEX_NORMAL].normalized         = GL_TRUE;
+	vc.vao->attribs[ATTR_INDEX_TANGENT].normalized        = GL_TRUE;
+	vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].normalized = GL_TRUE;
+	vc.vao->attribs[ATTR_INDEX_COLOR].normalized          = GL_TRUE;
+
+	vc.vao->attribs[ATTR_INDEX_POSITION].offset       = 0;        dataSize  = sizeof(vert.xyz);
+	vc.vao->attribs[ATTR_INDEX_TEXCOORD].offset       = dataSize; dataSize += sizeof(vert.st);
+	vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].offset     = dataSize; dataSize += sizeof(vert.lightmap);
+	vc.vao->attribs[ATTR_INDEX_NORMAL].offset         = dataSize; dataSize += sizeof(vert.normal);
+	vc.vao->attribs[ATTR_INDEX_TANGENT].offset        = dataSize; dataSize += sizeof(vert.tangent);
+	vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].offset = dataSize; dataSize += sizeof(vert.lightdir);
+	vc.vao->attribs[ATTR_INDEX_COLOR].offset          = dataSize; dataSize += sizeof(vert.color);
+
+	vc.vao->attribs[ATTR_INDEX_POSITION].stride       = dataSize;
+	vc.vao->attribs[ATTR_INDEX_TEXCOORD].stride       = dataSize;
+	vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].stride     = dataSize;
+	vc.vao->attribs[ATTR_INDEX_NORMAL].stride         = dataSize;
+	vc.vao->attribs[ATTR_INDEX_TANGENT].stride        = dataSize;
+	vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].stride = dataSize;
+	vc.vao->attribs[ATTR_INDEX_COLOR].stride          = dataSize;
+
+	Vao_SetVertexPointers(vc.vao);
+
+	vc.numSurfaces = 0;
+	vc.numBatches = 0;
+	vc.vertexOffset = 0;
+	vc.indexOffset = 0;
+	vcq.vertexCommitSize = 0;
+	vcq.indexCommitSize = 0;
+	vcq.numSurfaces = 0;
+}
+
+void VaoCache_BindVao(void)
+{
+	R_BindVao(vc.vao);
+}
+
+void VaoCache_CheckAdd(qboolean *endSurface, qboolean *recycleVertexBuffer, qboolean *recycleIndexBuffer, int numVerts, int numIndexes)
+{
+	int vertexesSize = sizeof(srfVert_t) * numVerts;
+	int indexesSize = sizeof(glIndex_t) * numIndexes;
+
+	if (vc.vao->vertexesSize < vc.vertexOffset + vcq.vertexCommitSize + vertexesSize)
+	{
+		//ri.Printf(PRINT_ALL, "out of space in vertex cache: %d < %d + %d + %d\n", vc.vao->vertexesSize, vc.vertexOffset, vc.vertexCommitSize, vertexesSize);
+		*recycleVertexBuffer = qtrue;
+		*recycleIndexBuffer = qtrue;
+		*endSurface = qtrue;
+	}
+
+	if (vc.vao->indexesSize < vc.indexOffset + vcq.indexCommitSize + indexesSize)
+	{
+		//ri.Printf(PRINT_ALL, "out of space in index cache\n");
+		*recycleIndexBuffer = qtrue;
+		*endSurface = qtrue;
+	}
+
+	if (vc.numSurfaces + vcq.numSurfaces >= VAOCACHE_MAX_SURFACES)
+	{
+		//ri.Printf(PRINT_ALL, "out of surfaces in index cache\n");
+		*recycleIndexBuffer = qtrue;
+		*endSurface = qtrue;
+	}
+
+	if (vc.numBatches >= VAOCACHE_MAX_BATCHES)
+	{
+		//ri.Printf(PRINT_ALL, "out of batches in index cache\n");
+		*recycleIndexBuffer = qtrue;
+		*endSurface = qtrue;
+	}
+
+	if (vcq.numSurfaces >= VAOCACHE_QUEUE_MAX_SURFACES)
+	{
+		//ri.Printf(PRINT_ALL, "out of queued surfaces\n");
+		*endSurface = qtrue;
+	}
+
+	if (VAOCACHE_QUEUE_MAX_VERTEXES * sizeof(srfVert_t) < vcq.vertexCommitSize + vertexesSize)
+	{
+		//ri.Printf(PRINT_ALL, "out of queued vertexes\n");
+		*endSurface = qtrue;
+	}
+
+	if (VAOCACHE_QUEUE_MAX_INDEXES * sizeof(glIndex_t) < vcq.indexCommitSize + indexesSize)
+	{
+		//ri.Printf(PRINT_ALL, "out of queued indexes\n");
+		*endSurface = qtrue;
+	}
+}
+
+void VaoCache_RecycleVertexBuffer(void)
+{
+	qglBindBuffer(GL_ARRAY_BUFFER, vc.vao->vertexesVBO);
+	qglBufferData(GL_ARRAY_BUFFER, vc.vao->vertexesSize, NULL, GL_DYNAMIC_DRAW);
+	vc.vertexOffset = 0;
+}
+
+void VaoCache_RecycleIndexBuffer(void)
+{
+	qglBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vc.vao->indexesIBO);
+	qglBufferData(GL_ELEMENT_ARRAY_BUFFER, vc.vao->indexesSize, NULL, GL_DYNAMIC_DRAW);
+	vc.indexOffset = 0;
+	vc.numSurfaces = 0;
+	vc.numBatches = 0;
+}
+
+void VaoCache_InitQueue(void)
+{
+	vcq.vertexCommitSize = 0;
+	vcq.indexCommitSize = 0;
+	vcq.numSurfaces = 0;
+}
+
+void VaoCache_AddSurface(srfVert_t *verts, int numVerts, glIndex_t *indexes, int numIndexes)
+{
+	queuedSurface_t *queueEntry = vcq.surfaces + vcq.numSurfaces;
+	queueEntry->vertexes = verts;
+	queueEntry->numVerts = numVerts;
+	queueEntry->indexes = indexes;
+	queueEntry->numIndexes = numIndexes;
+	vcq.numSurfaces++;
+
+	vcq.vertexCommitSize += sizeof(srfVert_t) * numVerts;;
+	vcq.indexCommitSize += sizeof(glIndex_t) * numIndexes;
+}
diff --git a/MP/code/rend2/tr_world.c b/MP/code/rend2/tr_world.c
index 3153f02..43ba9ff 100644
--- a/MP/code/rend2/tr_world.c
+++ b/MP/code/rend2/tr_world.c
@@ -43,7 +43,7 @@ static qboolean	R_CullSurface( msurface_t *surf ) {
 		return qfalse;
 	}
 
-	if ( *surf->data == SF_GRID && r_nocurves->integer ) {
+	if ( r_nocurves->integer && *surf->data == SF_GRID ) {
 		return qtrue;
 	}
 
@@ -627,43 +627,23 @@ static void R_RecursiveWorldNode( mnode_t *node, uint32_t planeBits, uint32_t dl
 			tr.viewParms.visBounds[1][2] = node->maxs[2];
 		}
 
-		// add merged and unmerged surfaces
-		if (tr.world->viewSurfaces && !r_nocurves->integer)
-			view = tr.world->viewSurfaces + node->firstmarksurface;
-		else
-			view = tr.world->marksurfaces + node->firstmarksurface;
+		// add surfaces
+		view = tr.world->marksurfaces + node->firstmarksurface;
 
 		c = node->nummarksurfaces;
 		while (c--) {
 			// just mark it as visible, so we don't jump out of the cache derefencing the surface
 			surf = *view;
-			if (surf < 0)
+			if (tr.world->surfacesViewCount[surf] != tr.viewCount)
 			{
-				if (tr.world->mergedSurfacesViewCount[-surf - 1] != tr.viewCount)
-				{
-					tr.world->mergedSurfacesViewCount[-surf - 1]  = tr.viewCount;
-					tr.world->mergedSurfacesDlightBits[-surf - 1] = dlightBits;
-					tr.world->mergedSurfacesPshadowBits[-surf - 1] = pshadowBits;
-				}
-				else
-				{
-					tr.world->mergedSurfacesDlightBits[-surf - 1] |= dlightBits;
-					tr.world->mergedSurfacesPshadowBits[-surf - 1] |= pshadowBits;
-				}
+				tr.world->surfacesViewCount[surf] = tr.viewCount;
+				tr.world->surfacesDlightBits[surf] = dlightBits;
+				tr.world->surfacesPshadowBits[surf] = pshadowBits;
 			}
 			else
 			{
-				if (tr.world->surfacesViewCount[surf] != tr.viewCount)
-				{
-					tr.world->surfacesViewCount[surf] = tr.viewCount;
-					tr.world->surfacesDlightBits[surf] = dlightBits;
-					tr.world->surfacesPshadowBits[surf] = pshadowBits;
-				}
-				else
-				{
-					tr.world->surfacesDlightBits[surf] |= dlightBits;
-					tr.world->surfacesPshadowBits[surf] |= pshadowBits;
-				}
+				tr.world->surfacesDlightBits[surf] |= dlightBits;
+				tr.world->surfacesPshadowBits[surf] |= pshadowBits;
 			}
 			view++;
 		}
@@ -877,16 +857,6 @@ void R_AddWorldSurfaces( void ) {
 			R_AddWorldSurface( surf, surf->shader, tr.world->surfacesDlightBits[i], tr.world->surfacesPshadowBits[i] );
 			tr.refdef.dlightMask |= tr.world->surfacesDlightBits[i];
 		}
-		for (i = 0; i < tr.world->numMergedSurfaces; i++)
-		{
-			if (tr.world->mergedSurfacesViewCount[i] != tr.viewCount)
-				continue;
-
-			surf = (msurface_t*)tr.world->mergedSurfaces + i;
-
-			R_AddWorldSurface( surf, surf->shader, tr.world->mergedSurfacesDlightBits[i], tr.world->mergedSurfacesPshadowBits[i] );
-			tr.refdef.dlightMask |= tr.world->mergedSurfacesDlightBits[i];
-		}
 
 		tr.refdef.dlightMask = ~tr.refdef.dlightMask;
 	}
diff --git a/MP/code/renderer/qgl.h b/MP/code/renderer/qgl.h
index 6f69d23..1e8fa1c 100644
--- a/MP/code/renderer/qgl.h
+++ b/MP/code/renderer/qgl.h
@@ -521,19 +521,11 @@ extern void (APIENTRYP qglPNTrianglesfATI)(GLenum pname, GLfloat param);
 
 // GL function loader, based on https://gist.github.com/rygorous/16796a0c876cf8a5f542caddb55bce8a
 
-// OpenGL 1.2, was GL_EXT_draw_range_elements
-#define QGL_1_2_PROCS \
-	GLE(void, DrawRangeElements, GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices) \
-
 // OpenGL 1.3, was GL_ARB_texture_compression
 #define QGL_1_3_PROCS \
 	GLE(void, CompressedTexImage2D, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const void *data) \
 	GLE(void, CompressedTexSubImage2D, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void *data) \
 
-// OpenGL 1.4, was GL_EXT_multi_draw_arrays
-#define QGL_1_4_PROCS \
-	GLE(void, MultiDrawElements, GLenum mode, const GLsizei *count, GLenum type, const GLvoid* *indices, GLsizei primcount) \
-
 // OpenGL 1.5, was GL_ARB_vertex_buffer_object and GL_ARB_occlusion_query
 #define QGL_1_5_PROCS \
 	GLE(void, GenQueries, GLsizei n, GLuint *ids) \
@@ -782,9 +774,7 @@ extern void (APIENTRYP qglPNTrianglesfATI)(GLenum pname, GLfloat param);
 	GLE(GLvoid, NamedFramebufferRenderbufferEXT, GLuint framebuffer, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer) \
 
 #define GLE(ret, name, ...) typedef ret APIENTRY name##proc(__VA_ARGS__); extern name##proc * qgl##name;
-QGL_1_2_PROCS;
 QGL_1_3_PROCS;
-QGL_1_4_PROCS;
 QGL_1_5_PROCS;
 QGL_2_0_PROCS;
 QGL_EXT_framebuffer_object_PROCS;
diff --git a/SP/code/rend2/qgl.h b/SP/code/rend2/qgl.h
index bac919c..bafa29b 100644
--- a/SP/code/rend2/qgl.h
+++ b/SP/code/rend2/qgl.h
@@ -462,19 +462,11 @@ extern void (APIENTRYP qglPNTrianglesfATI)(GLenum pname, GLfloat param);
 
 // GL function loader, based on https://gist.github.com/rygorous/16796a0c876cf8a5f542caddb55bce8a
 
-// OpenGL 1.2, was GL_EXT_draw_range_elements
-#define QGL_1_2_PROCS \
-	GLE(void, DrawRangeElements, GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices) \
-
 // OpenGL 1.3, was GL_ARB_texture_compression
 #define QGL_1_3_PROCS \
 	GLE(void, CompressedTexImage2D, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const void *data) \
 	GLE(void, CompressedTexSubImage2D, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void *data) \
 
-// OpenGL 1.4, was GL_EXT_multi_draw_arrays
-#define QGL_1_4_PROCS \
-	GLE(void, MultiDrawElements, GLenum mode, const GLsizei *count, GLenum type, const GLvoid* *indices, GLsizei primcount) \
-
 // OpenGL 1.5, was GL_ARB_vertex_buffer_object and GL_ARB_occlusion_query
 #define QGL_1_5_PROCS \
 	GLE(void, GenQueries, GLsizei n, GLuint *ids) \
@@ -723,9 +715,7 @@ extern void (APIENTRYP qglPNTrianglesfATI)(GLenum pname, GLfloat param);
 	GLE(GLvoid, NamedFramebufferRenderbufferEXT, GLuint framebuffer, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer) \
 
 #define GLE(ret, name, ...) typedef ret APIENTRY name##proc(__VA_ARGS__); extern name##proc * qgl##name;
-QGL_1_2_PROCS;
 QGL_1_3_PROCS;
-QGL_1_4_PROCS;
 QGL_1_5_PROCS;
 QGL_2_0_PROCS;
 QGL_EXT_framebuffer_object_PROCS;
diff --git a/SP/code/rend2/tr_bsp.c b/SP/code/rend2/tr_bsp.c
index 3c5266b..81e5872 100644
--- a/SP/code/rend2/tr_bsp.c
+++ b/SP/code/rend2/tr_bsp.c
@@ -1910,421 +1910,6 @@ void R_MovePatchSurfacesToHunk( void ) {
 }
 
 /*
-=================
-BSPSurfaceCompare
-compare function for qsort()
-=================
-*/
-static int BSPSurfaceCompare(const void *a, const void *b)
-{
-	msurface_t   *aa, *bb;
-
-	aa = *(msurface_t **) a;
-	bb = *(msurface_t **) b;
-
-	// shader first
-	if(aa->shader->sortedIndex < bb->shader->sortedIndex)
-		return -1;
-
-	else if(aa->shader->sortedIndex > bb->shader->sortedIndex)
-		return 1;
-
-	// by fogIndex
-	if(aa->fogIndex < bb->fogIndex)
-		return -1;
-
-	else if(aa->fogIndex > bb->fogIndex)
-		return 1;
-
-	// by leaf
-	if (s_worldData.surfacesViewCount[aa - s_worldData.surfaces] < s_worldData.surfacesViewCount[bb - s_worldData.surfaces])
-		return -1;
-
-	else if (s_worldData.surfacesViewCount[aa - s_worldData.surfaces] > s_worldData.surfacesViewCount[bb - s_worldData.surfaces])
-		return 1;
-
-	// by surface number
-	if (aa < bb)
-		return -1;
-
-	else if (aa > bb)
-		return 1;
-
-	return 0;
-}
-
-
-static void CopyVert(const srfVert_t * in, srfVert_t * out)
-{
-	VectorCopy(in->xyz,      out->xyz);
-	VectorCopy4(in->tangent, out->tangent);
-	VectorCopy4(in->normal,   out->normal);
-	VectorCopy4(in->lightdir, out->lightdir);
-
-	VectorCopy2(in->st,       out->st);
-	VectorCopy2(in->lightmap, out->lightmap);
-
-	VectorCopy4(in->color,    out->color);
-}
-
-
-/*
-===============
-R_CreateWorldVaos
-===============
-*/
-static void R_CreateWorldVaos(void)
-{
-	int             i, j, k;
-
-	int             numVerts;
-	srfVert_t      *verts;
-
-	int             numIndexes;
-	glIndex_t      *indexes;
-
-    int             numSortedSurfaces, numSurfaces;
-	msurface_t   *surface, **firstSurf, **lastSurf, **currSurf;
-	msurface_t  **surfacesSorted;
-
-	vao_t *vao;
-
-	int maxVboSize = 4 * 1024 * 1024;
-
-	int             startTime, endTime;
-
-	startTime = ri.Milliseconds();
-
-	// mark surfaces with best matching leaf, using overlapping bounds
-	// using surfaceViewCount[] as leaf number, and surfacesDlightBits[] as coverage * 256
-	for (i = 0; i < s_worldData.numWorldSurfaces; i++)
-	{
-		s_worldData.surfacesViewCount[i] = -1;
-	}
-
-	for (i = 0; i < s_worldData.numWorldSurfaces; i++)
-	{
-		s_worldData.surfacesDlightBits[i] = 0;
-	}
-
-	for (i = s_worldData.numDecisionNodes; i < s_worldData.numnodes; i++)
-	{
-		mnode_t *leaf = s_worldData.nodes + i;
-
-		for (j = leaf->firstmarksurface; j < leaf->firstmarksurface + leaf->nummarksurfaces; j++)
-		{
-			int surfaceNum = s_worldData.marksurfaces[j];
-			msurface_t *surface = s_worldData.surfaces + surfaceNum;
-			float coverage = 1.0f;
-			int iCoverage;
-
-			for (k = 0; k < 3; k++)
-			{
-				float left, right;
-
-				if (leaf->mins[k] > surface->cullinfo.bounds[1][k] || surface->cullinfo.bounds[0][k] > leaf->maxs[k])
-				{
-					coverage = 0.0f;
-					break;
-				}
-
-				left  = MAX(leaf->mins[k], surface->cullinfo.bounds[0][k]);
-				right = MIN(leaf->maxs[k], surface->cullinfo.bounds[1][k]);
-
-				// nudge a bit in case this is an axis aligned wall
-				coverage *= right - left + 1.0f/256.0f;
-			}
-
-			iCoverage = coverage * 256;
-
-			if (iCoverage > s_worldData.surfacesDlightBits[surfaceNum])
-			{
-				s_worldData.surfacesDlightBits[surfaceNum] = iCoverage;
-				s_worldData.surfacesViewCount[surfaceNum] = i - s_worldData.numDecisionNodes;
-			}
-		}
-	}
-
-	for (i = 0; i < s_worldData.numWorldSurfaces; i++)
-	{
-		s_worldData.surfacesDlightBits[i] = 0;
-	}
-
-	// count surfaces
-	numSortedSurfaces = 0;
-	for(surface = s_worldData.surfaces; surface < s_worldData.surfaces + s_worldData.numWorldSurfaces; surface++)
-	{
-		srfBspSurface_t *bspSurf;
-		shader_t *shader = surface->shader;
-
-		if (shader->isPortal || shader->isSky || ShaderRequiresCPUDeforms(shader))
-			continue;
-
-		// check for this now so we can use srfBspSurface_t* universally in the rest of the function
-		if (!(*surface->data == SF_FACE || *surface->data == SF_GRID || *surface->data == SF_TRIANGLES))
-			continue;
-
-		bspSurf = (srfBspSurface_t *) surface->data;
-
-		if (!bspSurf->numIndexes || !bspSurf->numVerts)
-			continue;
-
-		numSortedSurfaces++;
-	}
-
-	// presort surfaces
-	surfacesSorted = ri.Z_Malloc(numSortedSurfaces * sizeof(*surfacesSorted));
-
-	j = 0;
-	for(surface = s_worldData.surfaces; surface < s_worldData.surfaces + s_worldData.numWorldSurfaces; surface++)
-	{
-		srfBspSurface_t *bspSurf;
-		shader_t *shader = surface->shader;
-
-		if (shader->isPortal || shader->isSky || ShaderRequiresCPUDeforms(shader))
-			continue;
-
-		// check for this now so we can use srfBspSurface_t* universally in the rest of the function
-		if (!(*surface->data == SF_FACE || *surface->data == SF_GRID || *surface->data == SF_TRIANGLES))
-			continue;
-
-		bspSurf = (srfBspSurface_t *) surface->data;
-
-		if (!bspSurf->numIndexes || !bspSurf->numVerts)
-			continue;
-
-		surfacesSorted[j++] = surface;
-	}
-
-	qsort(surfacesSorted, numSortedSurfaces, sizeof(*surfacesSorted), BSPSurfaceCompare);
-
-	k = 0;
-	for(firstSurf = lastSurf = surfacesSorted; firstSurf < surfacesSorted + numSortedSurfaces; firstSurf = lastSurf)
-	{
-		int currVboSize;
-
-		// Find range of surfaces to place in a VAO by:
-		// - Collecting a number of surfaces which fit under maxVboSize, or
-		// - All the surfaces with a single shader which go over maxVboSize
-		currVboSize = 0;
-		while (currVboSize < maxVboSize && lastSurf < surfacesSorted + numSortedSurfaces)
-		{
-			int addVboSize, currShaderIndex;
-
-			addVboSize = 0;
-			currShaderIndex = (*lastSurf)->shader->sortedIndex;
-
-			for(currSurf = lastSurf; currSurf < surfacesSorted + numSortedSurfaces && (*currSurf)->shader->sortedIndex == currShaderIndex; currSurf++)
-			{
-				srfBspSurface_t *bspSurf = (srfBspSurface_t *) (*currSurf)->data;
-
-				addVboSize += bspSurf->numVerts * sizeof(srfVert_t);
-			}
-
-			if (currVboSize != 0 && addVboSize + currVboSize > maxVboSize)
-				break;
-
-			lastSurf = currSurf;
-
-			currVboSize += addVboSize;
-		}
-
-		// count verts/indexes/surfaces
-		numVerts = 0;
-		numIndexes = 0;
-		numSurfaces = 0;
-		for (currSurf = firstSurf; currSurf < lastSurf; currSurf++)
-		{
-			srfBspSurface_t *bspSurf = (srfBspSurface_t *) (*currSurf)->data;
-
-			numVerts += bspSurf->numVerts;
-			numIndexes += bspSurf->numIndexes;
-			numSurfaces++;
-		}
-
-		ri.Printf(PRINT_ALL, "...calculating world VAO %d ( %i verts %i tris )\n", k, numVerts, numIndexes / 3);
-
-		// create arrays
-		verts = ri.Hunk_AllocateTempMemory(numVerts * sizeof(srfVert_t));
-		indexes = ri.Hunk_AllocateTempMemory(numIndexes * sizeof(glIndex_t));
-
-		// set up indices and copy vertices
-		numVerts = 0;
-		numIndexes = 0;
-		for (currSurf = firstSurf; currSurf < lastSurf; currSurf++)
-		{
-			srfBspSurface_t *bspSurf = (srfBspSurface_t *) (*currSurf)->data;
-			glIndex_t *surfIndex;
-
-			bspSurf->firstIndex = numIndexes;
-			bspSurf->minIndex = numVerts + bspSurf->indexes[0];
-			bspSurf->maxIndex = numVerts + bspSurf->indexes[0];
-
-			for(i = 0, surfIndex = bspSurf->indexes; i < bspSurf->numIndexes; i++, surfIndex++)
-			{
-				indexes[numIndexes++] = numVerts + *surfIndex;
-				bspSurf->minIndex = MIN(bspSurf->minIndex, numVerts + *surfIndex);
-				bspSurf->maxIndex = MAX(bspSurf->maxIndex, numVerts + *surfIndex);
-			}
-
-			bspSurf->firstVert = numVerts;
-
-			for(i = 0; i < bspSurf->numVerts; i++)
-			{
-				CopyVert(&bspSurf->verts[i], &verts[numVerts++]);
-			}
-		}
-
-		vao = R_CreateVao2(va("staticBspModel%i_VAO", k), numVerts, verts, numIndexes, indexes);
-
-		// point bsp surfaces to VAO
-		for (currSurf = firstSurf; currSurf < lastSurf; currSurf++)
-		{
-			srfBspSurface_t *bspSurf = (srfBspSurface_t *) (*currSurf)->data;
-
-			bspSurf->vao = vao;
-		}
-
-		ri.Hunk_FreeTempMemory(indexes);
-		ri.Hunk_FreeTempMemory(verts);
-
-		k++;
-	}
-
-	if (r_mergeLeafSurfaces->integer)
-	{
-		msurface_t *mergedSurf;
-
-		// count merged surfaces
-		int numMergedSurfaces = 0, numUnmergedSurfaces = 0;
-		for(firstSurf = lastSurf = surfacesSorted; firstSurf < surfacesSorted + numSortedSurfaces; firstSurf = lastSurf)
-		{
-			for (lastSurf++ ; lastSurf < surfacesSorted + numSortedSurfaces; lastSurf++)
-			{
-				int lastSurfLeafIndex, firstSurfLeafIndex;
-
-				if ((*lastSurf)->shader         != (*firstSurf)->shader
-				 || (*lastSurf)->fogIndex       != (*firstSurf)->fogIndex
-				 || (*lastSurf)->cubemapIndex   != (*firstSurf)->cubemapIndex)
-					break;
-
-				lastSurfLeafIndex  = s_worldData.surfacesViewCount[*lastSurf  - s_worldData.surfaces];
-				firstSurfLeafIndex = s_worldData.surfacesViewCount[*firstSurf - s_worldData.surfaces];
-
-				if (lastSurfLeafIndex != firstSurfLeafIndex)
-					break;
-			}
-
-			// don't merge single surfaces
-			if (firstSurf + 1 == lastSurf)
-			{
-				numUnmergedSurfaces++;
-				continue;
-			}
-
-			numMergedSurfaces++;
-		}
-
-		// Allocate merged surfaces
-		s_worldData.mergedSurfaces = ri.Hunk_Alloc(sizeof(*s_worldData.mergedSurfaces) * numMergedSurfaces, h_low);
-		s_worldData.mergedSurfacesViewCount = ri.Hunk_Alloc(sizeof(*s_worldData.mergedSurfacesViewCount) * numMergedSurfaces, h_low);
-		s_worldData.mergedSurfacesDlightBits = ri.Hunk_Alloc(sizeof(*s_worldData.mergedSurfacesDlightBits) * numMergedSurfaces, h_low);
-		s_worldData.mergedSurfacesPshadowBits = ri.Hunk_Alloc(sizeof(*s_worldData.mergedSurfacesPshadowBits) * numMergedSurfaces, h_low);
-		s_worldData.numMergedSurfaces = numMergedSurfaces;
-		
-		// view surfaces are like mark surfaces, except negative ones represent merged surfaces
-		// -1 represents 0, -2 represents 1, and so on
-		s_worldData.viewSurfaces = ri.Hunk_Alloc(sizeof(*s_worldData.viewSurfaces) * s_worldData.nummarksurfaces, h_low);
-
-		// actually merge surfaces
-		mergedSurf = s_worldData.mergedSurfaces;
-		for(firstSurf = lastSurf = surfacesSorted; firstSurf < surfacesSorted + numSortedSurfaces; firstSurf = lastSurf)
-		{
-			srfBspSurface_t *bspSurf, *vaoSurf;
-
-			for ( lastSurf++ ; lastSurf < surfacesSorted + numSortedSurfaces; lastSurf++)
-			{
-				int lastSurfLeafIndex, firstSurfLeafIndex;
-
-				if ((*lastSurf)->shader         != (*firstSurf)->shader
-				 || (*lastSurf)->fogIndex       != (*firstSurf)->fogIndex
-				 || (*lastSurf)->cubemapIndex   != (*firstSurf)->cubemapIndex)
-					break;
-
-				lastSurfLeafIndex  = s_worldData.surfacesViewCount[*lastSurf  - s_worldData.surfaces];
-				firstSurfLeafIndex = s_worldData.surfacesViewCount[*firstSurf - s_worldData.surfaces];
-
-				if (lastSurfLeafIndex != firstSurfLeafIndex)
-					break;
-			}
-
-			// don't merge single surfaces
-			if (firstSurf + 1 == lastSurf)
-				continue;
-
-			bspSurf = (srfBspSurface_t *)(*firstSurf)->data;
-
-			vaoSurf = ri.Hunk_Alloc(sizeof(*vaoSurf), h_low);
-			memset(vaoSurf, 0, sizeof(*vaoSurf));
-			vaoSurf->surfaceType = SF_VAO_MESH;
-
-			vaoSurf->vao = bspSurf->vao;
-
-			vaoSurf->firstIndex = bspSurf->firstIndex;
-			vaoSurf->minIndex = bspSurf->minIndex;
-			vaoSurf->maxIndex = bspSurf->maxIndex;
-
-			ClearBounds(vaoSurf->cullBounds[0], vaoSurf->cullBounds[1]);
-			for (currSurf = firstSurf; currSurf < lastSurf; currSurf++)
-			{
-				srfBspSurface_t *currBspSurf = (srfBspSurface_t *)(*currSurf)->data;
-
-				vaoSurf->numVerts   += currBspSurf->numVerts;
-				vaoSurf->numIndexes += currBspSurf->numIndexes;
-				vaoSurf->minIndex = MIN(vaoSurf->minIndex, currBspSurf->minIndex);
-				vaoSurf->maxIndex = MAX(vaoSurf->maxIndex, currBspSurf->maxIndex);
-				AddPointToBounds((*currSurf)->cullinfo.bounds[0], vaoSurf->cullBounds[0], vaoSurf->cullBounds[1]);
-				AddPointToBounds((*currSurf)->cullinfo.bounds[1], vaoSurf->cullBounds[0], vaoSurf->cullBounds[1]);
-			}
-
-			VectorCopy(vaoSurf->cullBounds[0], mergedSurf->cullinfo.bounds[0]);
-			VectorCopy(vaoSurf->cullBounds[1], mergedSurf->cullinfo.bounds[1]);
-
-			mergedSurf->cullinfo.type =  CULLINFO_BOX;
-			mergedSurf->data          =  (surfaceType_t *)vaoSurf;
-			mergedSurf->fogIndex      =  (*firstSurf)->fogIndex;
-			mergedSurf->cubemapIndex  =  (*firstSurf)->cubemapIndex;
-			mergedSurf->shader        =  (*firstSurf)->shader;
-
-			// change surfacesViewCount[] from leaf index to viewSurface index - 1 so we can redirect later
-			// subtracting 2 (viewSurface index - 1) to avoid collision with -1 (no leaf)
-			for (currSurf = firstSurf; currSurf < lastSurf; currSurf++)
-				s_worldData.surfacesViewCount[*currSurf - s_worldData.surfaces] = -((int)(mergedSurf - s_worldData.mergedSurfaces)) - 2;
-
-			mergedSurf++;
-		}
-
-		// direct viewSurfaces to merged and unmerged surfaces
-		for (i = 0; i < s_worldData.nummarksurfaces; i++)
-		{
-			int viewSurfaceIndex = s_worldData.surfacesViewCount[s_worldData.marksurfaces[i]] + 1;
-			s_worldData.viewSurfaces[i] = (viewSurfaceIndex < 0) ? viewSurfaceIndex : s_worldData.marksurfaces[i];
-		}
-
-		ri.Printf(PRINT_ALL, "Processed %d mergeable surfaces into %d merged, %d unmerged\n",
-			numSortedSurfaces, numMergedSurfaces, numUnmergedSurfaces);
-	}
-
-	for (i = 0; i < s_worldData.numWorldSurfaces; i++)
-		s_worldData.surfacesViewCount[i] = -1;
-
-	ri.Free(surfacesSorted);
-
-	endTime = ri.Milliseconds();
-	ri.Printf(PRINT_ALL, "world VAOs calculation time = %5.2f seconds\n", (endTime - startTime) / 1000.0);
-}
-
-/*
 ===============
 R_LoadSurfaces
 ===============
@@ -3787,9 +3372,6 @@ void RE_LoadWorldMap( const char *name ) {
 		}
 	}
 
-	// create static VAOS from the world
-	R_CreateWorldVaos();
-
 	s_worldData.dataSize = (byte *)ri.Hunk_Alloc( 0, h_low ) - startMarker;
 
 	// only set tr.world now that we know the entire level has loaded properly
diff --git a/SP/code/rend2/tr_cmds.c b/SP/code/rend2/tr_cmds.c
index 578353b..27d7e12 100644
--- a/SP/code/rend2/tr_cmds.c
+++ b/SP/code/rend2/tr_cmds.c
@@ -73,8 +73,8 @@ void R_PerformanceCounters( void ) {
 	}
 	else if (r_speeds->integer == 7 )
 	{
-		ri.Printf( PRINT_ALL, "VAO draws: static %i dynamic %i\nMultidraws: %i merged %i\n",
-			backEnd.pc.c_staticVaoDraws, backEnd.pc.c_dynamicVaoDraws, backEnd.pc.c_multidraws, backEnd.pc.c_multidrawsMerged );
+		ri.Printf( PRINT_ALL, "VAO draws: static %i dynamic %i\n",
+			backEnd.pc.c_staticVaoDraws, backEnd.pc.c_dynamicVaoDraws);
 		ri.Printf( PRINT_ALL, "GLSL binds: %i  draws: gen %i light %i fog %i dlight %i\n",
 			backEnd.pc.c_glslShaderBinds, backEnd.pc.c_genericDraws, backEnd.pc.c_lightallDraws, backEnd.pc.c_fogDraws, backEnd.pc.c_dlightDraws);
 	}
diff --git a/SP/code/rend2/tr_extensions.c b/SP/code/rend2/tr_extensions.c
index f0b24b7..37bbe84 100644
--- a/SP/code/rend2/tr_extensions.c
+++ b/SP/code/rend2/tr_extensions.c
@@ -31,9 +31,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include "tr_dsa.h"
 
 #define GLE(ret, name, ...) name##proc * qgl##name;
-QGL_1_2_PROCS;
 QGL_1_3_PROCS;
-QGL_1_4_PROCS;
 QGL_1_5_PROCS;
 QGL_2_0_PROCS;
 QGL_EXT_framebuffer_object_PROCS;
@@ -67,19 +65,9 @@ void GLimp_InitExtraExtensions()
 	// GL function loader, based on https://gist.github.com/rygorous/16796a0c876cf8a5f542caddb55bce8a
 #define GLE(ret, name, ...) qgl##name = (name##proc *) SDL_GL_GetProcAddress("gl" #name);
 
-	// OpenGL 1.2, was GL_EXT_draw_range_elements
-	QGL_1_2_PROCS;
-	glRefConfig.drawRangeElements = !!r_ext_draw_range_elements->integer;
-	ri.Printf(PRINT_ALL, result[glRefConfig.drawRangeElements], "glDrawRangeElements()");
-
 	// OpenGL 1.3, was GL_ARB_texture_compression
 	QGL_1_3_PROCS;
 
-	// OpenGL 1.4, was GL_EXT_multi_draw_arrays
-	QGL_1_4_PROCS;
-	glRefConfig.multiDrawArrays = !!r_ext_multi_draw_arrays->integer;
-	ri.Printf(PRINT_ALL, result[glRefConfig.multiDrawArrays], "glMultiDrawElements()");
-
 	// OpenGL 1.5, was GL_ARB_vertex_buffer_object and GL_ARB_occlusion_query
 	QGL_1_5_PROCS;
 	glRefConfig.occlusionQuery = qtrue;
diff --git a/SP/code/rend2/tr_init.c b/SP/code/rend2/tr_init.c
index e9fae70..1812c34 100644
--- a/SP/code/rend2/tr_init.c
+++ b/SP/code/rend2/tr_init.c
@@ -122,8 +122,6 @@ cvar_t  *r_ati_truform_pointmode;   // linear/cubic
 cvar_t  *r_ati_fsaa_samples;        //DAJ valids are 1, 2, 4
 //----(SA)	end
 
-cvar_t  *r_ext_draw_range_elements;
-cvar_t  *r_ext_multi_draw_arrays;
 cvar_t  *r_ext_framebuffer_object;
 cvar_t  *r_ext_texture_float;
 cvar_t  *r_ext_framebuffer_multisample;
@@ -1317,8 +1315,6 @@ void R_Register( void ) {
 
 	r_ext_texture_env_add = ri.Cvar_Get( "r_ext_texture_env_add", "1", CVAR_ARCHIVE | CVAR_LATCH );
 
-	r_ext_draw_range_elements = ri.Cvar_Get( "r_ext_draw_range_elements", "1", CVAR_ARCHIVE | CVAR_LATCH);
-	r_ext_multi_draw_arrays = ri.Cvar_Get( "r_ext_multi_draw_arrays", "1", CVAR_ARCHIVE | CVAR_LATCH);
 	r_ext_framebuffer_object = ri.Cvar_Get( "r_ext_framebuffer_object", "1", CVAR_ARCHIVE | CVAR_LATCH);
 	r_ext_texture_float = ri.Cvar_Get( "r_ext_texture_float", "1", CVAR_ARCHIVE | CVAR_LATCH);
 	r_ext_framebuffer_multisample = ri.Cvar_Get( "r_ext_framebuffer_multisample", "0", CVAR_ARCHIVE | CVAR_LATCH);
diff --git a/SP/code/rend2/tr_local.h b/SP/code/rend2/tr_local.h
index 8646128..62deec7 100644
--- a/SP/code/rend2/tr_local.h
+++ b/SP/code/rend2/tr_local.h
@@ -1047,8 +1047,6 @@ typedef struct srfBspSurface_s
 	// BSP VBO offsets
 	int             firstVert;
 	int             firstIndex;
-	glIndex_t       minIndex;
-	glIndex_t       maxIndex;
 
 	// static render data
 	vao_t          *vao;
@@ -1123,8 +1121,6 @@ typedef struct srfVaoMdvMesh_s
 	// backEnd stats
 	int             numIndexes;
 	int             numVerts;
-	glIndex_t       minIndex;
-	glIndex_t       maxIndex;
 
 	// static render data
 	vao_t          *vao;
@@ -1253,15 +1249,8 @@ typedef struct {
 	int         *surfacesDlightBits;
 	int			*surfacesPshadowBits;
 
-	int			numMergedSurfaces;
-	msurface_t	*mergedSurfaces;
-	int         *mergedSurfacesViewCount;
-	int         *mergedSurfacesDlightBits;
-	int			*mergedSurfacesPshadowBits;
-
 	int nummarksurfaces;
 	int         *marksurfaces;
-	int         *viewSurfaces;
 
 	int numfogs;
 	fog_t       *fogs;
@@ -1519,8 +1508,6 @@ typedef struct {
 
 	qboolean    intelGraphics;
 
-	qboolean    drawRangeElements;
-	qboolean    multiDrawArrays;
 	qboolean	occlusionQuery;
 
 	int glslMajorVersion;
@@ -1558,9 +1545,6 @@ typedef struct {
 	int     c_staticVaoDraws;
 	int     c_dynamicVaoDraws;
 
-	int     c_multidraws;
-	int     c_multidrawsMerged;
-
 	int c_dlightVertexes;
 	int c_dlightIndexes;
 
@@ -1893,8 +1877,6 @@ extern cvar_t   *r_ext_NV_fog_dist;
 extern cvar_t   *r_nv_fogdist_mode;
 //----(SA)	end
 
-extern  cvar_t  *r_ext_draw_range_elements;
-extern  cvar_t  *r_ext_multi_draw_arrays;
 extern  cvar_t  *r_ext_framebuffer_object;
 extern  cvar_t  *r_ext_texture_float;
 extern  cvar_t  *r_ext_framebuffer_multisample;
@@ -2242,8 +2224,6 @@ typedef struct stageVars
 	vec2_t texcoords[NUM_TEXTURE_BUNDLES][SHADER_MAX_VERTEXES];
 } stageVars_t;
 
-#define MAX_MULTIDRAW_PRIMITIVES	256
-
 typedef struct shaderCommands_s
 {
 	glIndex_t	indexes[SHADER_MAX_INDEXES] QALIGN(16);
@@ -2259,6 +2239,7 @@ typedef struct shaderCommands_s
 	void *attribPointers[ATTR_INDEX_COUNT];
 	vao_t       *vao;
 	qboolean    useInternalVao;
+	qboolean    useCacheVao;
 
 	stageVars_t	svars QALIGN(16);
 
@@ -2276,14 +2257,6 @@ typedef struct shaderCommands_s
 
 	int numIndexes;
 	int numVertexes;
-	glIndex_t   minIndex;
-	glIndex_t   maxIndex;
-
-	int         multiDrawPrimitives;
-	GLsizei     multiDrawNumIndexes[MAX_MULTIDRAW_PRIMITIVES];
-	glIndex_t  *multiDrawFirstIndex[MAX_MULTIDRAW_PRIMITIVES];
-	glIndex_t   multiDrawMinIndex[MAX_MULTIDRAW_PRIMITIVES];
-	glIndex_t   multiDrawMaxIndex[MAX_MULTIDRAW_PRIMITIVES];
 
 	qboolean ATI_tess;
 
@@ -2300,7 +2273,7 @@ void RB_EndSurface( void );
 void RB_CheckOverflow( int verts, int indexes );
 #define RB_CHECKOVERFLOW( v,i ) if ( tess.numVertexes + ( v ) >= SHADER_MAX_VERTEXES || tess.numIndexes + ( i ) >= SHADER_MAX_INDEXES ) {RB_CheckOverflow( v,i );}
 
-void R_DrawElementsVao( int numIndexes, glIndex_t firstIndex, glIndex_t minIndex, glIndex_t maxIndex );
+void R_DrawElements( int numIndexes, glIndex_t firstIndex );
 void RB_StageIteratorGeneric( void );
 void RB_StageIteratorSky( void );
 void RB_StageIteratorVertexLitTexture( void );
@@ -2437,6 +2410,14 @@ void            R_VaoList_f(void);
 
 void            RB_UpdateTessVao(unsigned int attribBits);
 
+void VaoCache_Commit(void);
+void VaoCache_Init(void);
+void VaoCache_BindVao(void);
+void VaoCache_CheckAdd(qboolean *endSurface, qboolean *recycleVertexBuffer, qboolean *recycleIndexBuffer, int numVerts, int numIndexes);
+void VaoCache_RecycleVertexBuffer(void);
+void VaoCache_RecycleIndexBuffer(void);
+void VaoCache_InitQueue(void);
+void VaoCache_AddSurface(srfVert_t *verts, int numVerts, glIndex_t *indexes, int numIndexes);
 
 /*
 ============================================================
diff --git a/SP/code/rend2/tr_model.c b/SP/code/rend2/tr_model.c
index 1eacb93..5ba28b9 100644
--- a/SP/code/rend2/tr_model.c
+++ b/SP/code/rend2/tr_model.c
@@ -1040,9 +1040,6 @@ static qboolean R_LoadMDC( model_t *mod, int lod, void *buffer, const char *modN
 			vaoSurf->numIndexes = surf->numIndexes;
 			vaoSurf->numVerts = surf->numVerts;
 			
-			vaoSurf->minIndex = 0;
-			vaoSurf->maxIndex = surf->numVerts - 1;
-
 			vaoSurf->vao = R_CreateVao(va("staticMD3Mesh_VAO '%s'", surf->name), data, dataSize, (byte *)surf->indexes, surf->numIndexes * sizeof(*surf->indexes), VAO_USAGE_STATIC);
 
 			vaoSurf->vao->attribs[ATTR_INDEX_POSITION].enabled = 1;
@@ -1523,9 +1520,6 @@ static qboolean R_LoadMD3(model_t * mod, int lod, void *buffer, const char *modN
 			vaoSurf->numIndexes = surf->numIndexes;
 			vaoSurf->numVerts = surf->numVerts;
 			
-			vaoSurf->minIndex = 0;
-			vaoSurf->maxIndex = surf->numVerts - 1;
-
 			vaoSurf->vao = R_CreateVao(va("staticMD3Mesh_VAO '%s'", surf->name), data, dataSize, (byte *)surf->indexes, surf->numIndexes * sizeof(*surf->indexes), VAO_USAGE_STATIC);
 
 			vaoSurf->vao->attribs[ATTR_INDEX_POSITION].enabled = 1;
diff --git a/SP/code/rend2/tr_shade.c b/SP/code/rend2/tr_shade.c
index 7e3e1b1..9c0675d 100644
--- a/SP/code/rend2/tr_shade.c
+++ b/SP/code/rend2/tr_shade.c
@@ -40,42 +40,9 @@ If you have questions concerning this license or the applicable additional terms
 */
 
 
-void R_DrawElementsVao( int numIndexes, glIndex_t firstIndex, glIndex_t minIndex, glIndex_t maxIndex )
+void R_DrawElements( int numIndexes, glIndex_t firstIndex)
 {
-	if (glRefConfig.drawRangeElements)
-		qglDrawRangeElements(GL_TRIANGLES, minIndex, maxIndex, numIndexes, GL_INDEX_TYPE, BUFFER_OFFSET(firstIndex * sizeof(glIndex_t)));
-	else
-		qglDrawElements(GL_TRIANGLES, numIndexes, GL_INDEX_TYPE, BUFFER_OFFSET(firstIndex * sizeof(glIndex_t)));
-	
-}
-
-
-static void R_DrawMultiElementsVao( int multiDrawPrimitives, glIndex_t *multiDrawMinIndex, glIndex_t *multiDrawMaxIndex, 
-	GLsizei *multiDrawNumIndexes, glIndex_t **multiDrawFirstIndex)
-{
-	if (glRefConfig.multiDrawArrays && multiDrawPrimitives > 1)
-	{
-		qglMultiDrawElements(GL_TRIANGLES, multiDrawNumIndexes, GL_INDEX_TYPE, (const GLvoid **)multiDrawFirstIndex, multiDrawPrimitives);
-	}
-	else
-	{
-		int i;
-
-		if (glRefConfig.drawRangeElements)
-		{
-			for (i = 0; i < multiDrawPrimitives; i++)
-			{
-				qglDrawRangeElements(GL_TRIANGLES, multiDrawMinIndex[i], multiDrawMaxIndex[i], multiDrawNumIndexes[i], GL_INDEX_TYPE, multiDrawFirstIndex[i]);
-			}
-		}
-		else
-		{
-			for (i = 0; i < multiDrawPrimitives; i++)
-			{
-				qglDrawElements(GL_TRIANGLES, multiDrawNumIndexes[i], GL_INDEX_TYPE, multiDrawFirstIndex[i]);
-			}
-		}
-	}
+	qglDrawElements(GL_TRIANGLES, numIndexes, GL_INDEX_TYPE, BUFFER_OFFSET(firstIndex * sizeof(glIndex_t)));
 }
 
 
@@ -156,14 +123,7 @@ static void DrawTris (shaderCommands_t *input) {
 		VectorSet4(color, 1, 1, 1, 1);
 		GLSL_SetUniformVec4(sp, UNIFORM_COLOR, color);
 
-		if (input->multiDrawPrimitives)
-		{
-			R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex);
-		}
-		else
-		{
-			R_DrawElementsVao(input->numIndexes, input->firstIndex, input->minIndex, input->maxIndex);
-		}
+		R_DrawElements(input->numIndexes, input->firstIndex);
 	}
 
 	qglDepthRange( 0, 1 );
@@ -181,6 +141,7 @@ static void DrawNormals (shaderCommands_t *input) {
 	//FIXME: implement this
 }
 
+
 /*
 ==============
 RB_BeginSurface
@@ -198,7 +159,6 @@ void RB_BeginSurface( shader_t *shader, int fogNum, int cubemapIndex ) {
 	tess.numIndexes = 0;
 	tess.firstIndex = 0;
 	tess.numVertexes = 0;
-	tess.multiDrawPrimitives = 0;
 	tess.shader = state;
 	tess.fogNum = fogNum;
 	tess.cubemapIndex = cubemapIndex;
@@ -208,6 +168,7 @@ void RB_BeginSurface( shader_t *shader, int fogNum, int cubemapIndex ) {
 	tess.numPasses = state->numUnfoggedPasses;
 	tess.currentStageIteratorFunc = state->optimalStageIteratorFunc;
 	tess.useInternalVao = qtrue;
+	tess.useCacheVao = qfalse;
 
 	tess.shaderTime = backEnd.refdef.floatTime - tess.shader->timeOffset;
 	if (tess.shader->clampTime && tess.shaderTime >= tess.shader->clampTime) {
@@ -221,7 +182,6 @@ void RB_BeginSurface( shader_t *shader, int fogNum, int cubemapIndex ) {
 }
 
 
-
 extern float EvalWaveForm( const waveForm_t *wf );
 extern float EvalWaveFormClamped( const waveForm_t *wf );
 
@@ -452,15 +412,7 @@ static void ProjectDlightTexture( void ) {
 			GL_State( GLS_ATEST_GT_0 | GLS_SRCBLEND_DST_COLOR | GLS_DSTBLEND_ONE | GLS_DEPTHFUNC_EQUAL );
 		}
 
-		if (tess.multiDrawPrimitives)
-		{
-			shaderCommands_t *input = &tess;
-			R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex);
-		}
-		else
-		{
-			R_DrawElementsVao(tess.numIndexes, tess.firstIndex, tess.minIndex, tess.maxIndex);
-		}
+		R_DrawElements(tess.numIndexes, tess.firstIndex);
 
 		backEnd.pc.c_totalIndexes += tess.numIndexes;
 		backEnd.pc.c_dlightIndexes += tess.numIndexes;
@@ -936,14 +888,7 @@ static void ForwardDlight( void ) {
 		// draw
 		//
 
-		if (input->multiDrawPrimitives)
-		{
-			R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex);
-		}
-		else
-		{
-			R_DrawElementsVao(input->numIndexes, input->firstIndex, input->minIndex, input->maxIndex);
-		}
+		R_DrawElements(input->numIndexes, input->firstIndex);
 
 		backEnd.pc.c_totalIndexes += tess.numIndexes;
 		backEnd.pc.c_dlightIndexes += tess.numIndexes;
@@ -1012,14 +957,7 @@ static void ProjectPshadowVBOGLSL( void ) {
 		// draw
 		//
 
-		if (input->multiDrawPrimitives)
-		{
-			R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex);
-		}
-		else
-		{
-			R_DrawElementsVao(input->numIndexes, input->firstIndex, input->minIndex, input->maxIndex);
-		}
+		R_DrawElements(input->numIndexes, input->firstIndex);
 
 		backEnd.pc.c_totalIndexes += tess.numIndexes;
 		//backEnd.pc.c_dlightIndexes += tess.numIndexes;
@@ -1160,15 +1098,7 @@ static void RB_FogPass( int wolfFog ) {
 		GL_State( GLS_SRCBLEND_SRC_ALPHA | GLS_DSTBLEND_ONE_MINUS_SRC_ALPHA );
 	}
 
-	if (tess.multiDrawPrimitives)
-	{
-		shaderCommands_t *input = &tess;
-		R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex);
-	}
-	else
-	{
-		R_DrawElementsVao(tess.numIndexes, tess.firstIndex, tess.minIndex, tess.maxIndex);
-	}
+	R_DrawElements(tess.numIndexes, tess.firstIndex);
 }
 
 
@@ -1189,6 +1119,7 @@ static unsigned int RB_CalcShaderVertexAttribs( shaderCommands_t *input )
 	return vertexAttribs;
 }
 
+
 static void RB_IterateStagesGeneric( shaderCommands_t *input )
 {
 	int stage;
@@ -1663,14 +1594,7 @@ static void RB_IterateStagesGeneric( shaderCommands_t *input )
 		//
 		// draw
 		//
-		if (input->multiDrawPrimitives)
-		{
-			R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex);
-		}
-		else
-		{
-			R_DrawElementsVao(input->numIndexes, input->firstIndex, input->minIndex, input->maxIndex);
-		}
+		R_DrawElements(input->numIndexes, input->firstIndex);
 
 		// allow skipping out to show just lightmaps during development
 		if ( r_lightmap->integer && ( pStage->bundle[0].isLightmap || pStage->bundle[1].isLightmap ) )
@@ -1744,20 +1668,12 @@ static void RB_RenderShadowmap( shaderCommands_t *input )
 			// draw
 			//
 
-			if (input->multiDrawPrimitives)
-			{
-				R_DrawMultiElementsVao(input->multiDrawPrimitives, input->multiDrawMinIndex, input->multiDrawMaxIndex, input->multiDrawNumIndexes, input->multiDrawFirstIndex);
-			}
-			else
-			{
-				R_DrawElementsVao(input->numIndexes, input->firstIndex, input->minIndex, input->maxIndex);
-			}
+			R_DrawElements(input->numIndexes, input->firstIndex);
 		}
 	}
 }
 
 
-
 /*
 ** RB_StageIteratorGeneric
 */
@@ -2009,6 +1925,12 @@ void RB_EndSurface( void ) {
 		}
 	}
 
+	if (tess.useCacheVao)
+	{
+		// upload indexes now
+		VaoCache_Commit();
+	}
+
 	//
 	// update performance counters
 	//
@@ -2036,7 +1958,6 @@ void RB_EndSurface( void ) {
 	tess.numIndexes = 0;
 	tess.numVertexes = 0;
 	tess.firstIndex = 0;
-	tess.multiDrawPrimitives = 0;
 
 	GLimp_LogComment( "----------\n" );
 }
diff --git a/SP/code/rend2/tr_sky.c b/SP/code/rend2/tr_sky.c
index 291258b..648d9aa 100644
--- a/SP/code/rend2/tr_sky.c
+++ b/SP/code/rend2/tr_sky.c
@@ -377,8 +377,6 @@ static void DrawSkySide( struct image_s *image, const int mins[2], const int max
 	int s, t;
 	int firstVertex = tess.numVertexes;
 	//int firstIndex = tess.numIndexes;
-	int minIndex = tess.minIndex;
-	int maxIndex = tess.maxIndex;
 	vec4_t color;
 
 	//tess.numVertexes = 0;
@@ -428,9 +426,6 @@ static void DrawSkySide( struct image_s *image, const int mins[2], const int max
 		}
 	}
 
-	tess.minIndex = firstVertex;
-	tess.maxIndex = tess.numVertexes;
-
 	// FIXME: A lot of this can probably be removed for speed, and refactored into a more convenient function
 	RB_UpdateTessVao(ATTR_POSITION | ATTR_TEXCOORD);
 /*
@@ -475,7 +470,7 @@ static void DrawSkySide( struct image_s *image, const int mins[2], const int max
 		GLSL_SetUniformVec4(sp, UNIFORM_DIFFUSETEXOFFTURB, vector);
 	}
 
-	R_DrawElementsVao(tess.numIndexes - tess.firstIndex, tess.firstIndex, tess.minIndex, tess.maxIndex);
+	R_DrawElements(tess.numIndexes - tess.firstIndex, tess.firstIndex);
 
 	//qglDrawElements(GL_TRIANGLES, tess.numIndexes - tess.firstIndex, GL_INDEX_TYPE, BUFFER_OFFSET(tess.firstIndex * sizeof(glIndex_t)));
 	
@@ -485,8 +480,6 @@ static void DrawSkySide( struct image_s *image, const int mins[2], const int max
 	tess.numIndexes = tess.firstIndex;
 	tess.numVertexes = firstVertex;
 	tess.firstIndex = 0;
-	tess.minIndex = minIndex;
-	tess.maxIndex = maxIndex;
 }
 
 static void DrawSkySideInner( struct image_s *image, const int mins[2], const int maxs[2] )
@@ -494,8 +487,6 @@ static void DrawSkySideInner( struct image_s *image, const int mins[2], const in
 	int s, t;
 	int firstVertex = tess.numVertexes;
 	//int firstIndex = tess.numIndexes;
-	int minIndex = tess.minIndex;
-	int maxIndex = tess.maxIndex;
 	vec4_t color;
 
 	//tess.numVertexes = 0;
@@ -545,9 +536,6 @@ static void DrawSkySideInner( struct image_s *image, const int mins[2], const in
 		}
 	}
 
-	tess.minIndex = firstVertex;
-	tess.maxIndex = tess.numVertexes;
-
 	// FIXME: A lot of this can probably be removed for speed, and refactored into a more convenient function
 	RB_UpdateTessVao(ATTR_POSITION | ATTR_TEXCOORD);
 /*
@@ -592,7 +580,7 @@ static void DrawSkySideInner( struct image_s *image, const int mins[2], const in
 		GLSL_SetUniformVec4(sp, UNIFORM_DIFFUSETEXOFFTURB, vector);
 	}
 
-	R_DrawElementsVao(tess.numIndexes - tess.firstIndex, tess.firstIndex, tess.minIndex, tess.maxIndex);
+	R_DrawElements(tess.numIndexes - tess.firstIndex, tess.firstIndex);
 
 	//qglDrawElements(GL_TRIANGLES, tess.numIndexes - tess.firstIndex, GL_INDEX_TYPE, BUFFER_OFFSET(tess.firstIndex * sizeof(glIndex_t)));
 	
@@ -602,8 +590,6 @@ static void DrawSkySideInner( struct image_s *image, const int mins[2], const in
 	tess.numIndexes = tess.firstIndex;
 	tess.numVertexes = firstVertex;
 	tess.firstIndex = 0;
-	tess.minIndex = minIndex;
-	tess.maxIndex = maxIndex;
 }
 
 static void DrawSkyBox( shader_t *shader ) {
diff --git a/SP/code/rend2/tr_surface.c b/SP/code/rend2/tr_surface.c
index e047472..0bb8c77 100644
--- a/SP/code/rend2/tr_surface.c
+++ b/SP/code/rend2/tr_surface.c
@@ -75,7 +75,7 @@ void RB_CheckOverflow( int verts, int indexes ) {
 
 void RB_CheckVao(vao_t *vao)
 {
-	if (vao != glState.currentVao || tess.multiDrawPrimitives >= MAX_MULTIDRAW_PRIMITIVES)
+	if (vao != glState.currentVao)
 	{
 		RB_EndSurface();
 		RB_BeginSurface(tess.shader, tess.fogNum, tess.cubemapIndex);
@@ -213,18 +213,14 @@ void RB_InstantQuad2(vec4_t quadVerts[4], vec2_t texCoords[4])
 	tess.indexes[tess.numIndexes++] = 0;
 	tess.indexes[tess.numIndexes++] = 2;
 	tess.indexes[tess.numIndexes++] = 3;
-	tess.minIndex = 0;
-	tess.maxIndex = 3;
 
 	RB_UpdateTessVao(ATTR_POSITION | ATTR_TEXCOORD);
 
-	R_DrawElementsVao(tess.numIndexes, tess.firstIndex, tess.minIndex, tess.maxIndex);
+	R_DrawElements(tess.numIndexes, tess.firstIndex);
 
 	tess.numIndexes = 0;
 	tess.numVertexes = 0;
 	tess.firstIndex = 0;
-	tess.minIndex = 0;
-	tess.maxIndex = 0;
 }
 
 void RB_InstantQuad(vec4_t quadVerts[4])
@@ -438,118 +434,88 @@ static void RB_SurfaceVertsAndIndexes( int numVerts, srfVert_t *verts, int numIn
 	tess.numVertexes += numVerts;
 }
 
-static qboolean RB_SurfaceVao(vao_t *vao, int numVerts, int numIndexes, int firstIndex, int minIndex, int maxIndex, int dlightBits, int pshadowBits, qboolean shaderCheck)
+static qboolean RB_SurfaceVaoCached(int numVerts, srfVert_t *verts, int numIndexes, glIndex_t *indexes, int dlightBits, int pshadowBits)
 {
-	int i, mergeForward, mergeBack;
-	GLvoid *firstIndexOffset, *lastIndexOffset;
+	qboolean recycleVertexBuffer = qfalse;
+	qboolean recycleIndexBuffer = qfalse;
+	qboolean endSurface = qfalse;
 
-	if (!vao)
-	{
+	if (!(!ShaderRequiresCPUDeforms(tess.shader) && !tess.shader->isSky && !tess.shader->isPortal))
 		return qfalse;
-	}
 
-	if (shaderCheck && !(!ShaderRequiresCPUDeforms(tess.shader) && !tess.shader->isSky && !tess.shader->isPortal))
-	{
+	if (!numIndexes || !numVerts)
 		return qfalse;
-	}
 
-	RB_CheckVao(vao);
+	VaoCache_BindVao();
 
 	tess.dlightBits |= dlightBits;
 	tess.pshadowBits |= pshadowBits;
 
-	// merge this into any existing multidraw primitives
-	mergeForward = -1;
-	mergeBack = -1;
-	firstIndexOffset = BUFFER_OFFSET(firstIndex * sizeof(glIndex_t));
-	lastIndexOffset  = BUFFER_OFFSET((firstIndex + numIndexes) * sizeof(glIndex_t));
+	VaoCache_CheckAdd(&endSurface, &recycleVertexBuffer, &recycleIndexBuffer, numVerts, numIndexes);
 
-	if (tess.multiDrawPrimitives && r_mergeMultidraws->integer)
+	if (endSurface)
 	{
-		i = 0;
+		RB_EndSurface();
+		RB_BeginSurface(tess.shader, tess.fogNum, tess.cubemapIndex);
+	}
 
-		if (r_mergeMultidraws->integer == 1)
-		{
-			// lazy merge, only check the last primitive
-			i = tess.multiDrawPrimitives - 1;
-		}
+	if (recycleVertexBuffer)
+		VaoCache_RecycleVertexBuffer();
 
-		for (; i < tess.multiDrawPrimitives; i++)
-		{
-			if (firstIndexOffset == tess.multiDrawFirstIndex[i] + tess.multiDrawNumIndexes[i])
-			{
-				mergeBack = i;
+	if (recycleIndexBuffer)
+		VaoCache_RecycleIndexBuffer();
 
-				if (mergeForward != -1)
-					break;
-			}
+	if (!tess.numVertexes)
+		VaoCache_InitQueue();
 
-			if (lastIndexOffset == tess.multiDrawFirstIndex[i])
-			{
-				mergeForward = i;
+	VaoCache_AddSurface(verts, numVerts, indexes, numIndexes);
 
-				if (mergeBack != -1)
-					break;
-			}
-		}
-	}
+	tess.numIndexes += numIndexes;
+	tess.numVertexes += numVerts;
+	tess.useInternalVao = qfalse;
+	tess.useCacheVao = qtrue;
 
-	if (mergeBack != -1 && mergeForward == -1)
-	{
-		tess.multiDrawNumIndexes[mergeBack] += numIndexes;
-		tess.multiDrawMinIndex[mergeBack] = MIN(tess.multiDrawMinIndex[mergeBack], minIndex);
-		tess.multiDrawMaxIndex[mergeBack] = MAX(tess.multiDrawMaxIndex[mergeBack], maxIndex);
-		backEnd.pc.c_multidrawsMerged++;
-	}
-	else if (mergeBack == -1 && mergeForward != -1)
+	return qtrue;
+}
+
+
+static qboolean RB_SurfaceVao(vao_t *vao, int numVerts, int numIndexes, int firstIndex, int dlightBits, int pshadowBits, qboolean shaderCheck)
+{
+	if (!vao)
 	{
-		tess.multiDrawNumIndexes[mergeForward] += numIndexes;
-		tess.multiDrawFirstIndex[mergeForward]  = firstIndexOffset;
-		tess.multiDrawMinIndex[mergeForward] = MIN(tess.multiDrawMinIndex[mergeForward], minIndex);
-		tess.multiDrawMaxIndex[mergeForward] = MAX(tess.multiDrawMaxIndex[mergeForward], maxIndex);
-		backEnd.pc.c_multidrawsMerged++;
+		return qfalse;
 	}
-	else if (mergeBack != -1 && mergeForward != -1)
-	{
-		tess.multiDrawNumIndexes[mergeBack] += numIndexes + tess.multiDrawNumIndexes[mergeForward];
-		tess.multiDrawMinIndex[mergeBack] = MIN(tess.multiDrawMinIndex[mergeBack], MIN(tess.multiDrawMinIndex[mergeForward], minIndex));
-		tess.multiDrawMaxIndex[mergeBack] = MAX(tess.multiDrawMaxIndex[mergeBack], MAX(tess.multiDrawMaxIndex[mergeForward], maxIndex));
-		tess.multiDrawPrimitives--;
 
-		if (mergeForward != tess.multiDrawPrimitives)
-		{
-			tess.multiDrawNumIndexes[mergeForward] = tess.multiDrawNumIndexes[tess.multiDrawPrimitives];
-			tess.multiDrawFirstIndex[mergeForward] = tess.multiDrawFirstIndex[tess.multiDrawPrimitives];
-			tess.multiDrawMinIndex[mergeForward] = tess.multiDrawMinIndex[tess.multiDrawPrimitives];
-			tess.multiDrawMaxIndex[mergeForward] = tess.multiDrawMaxIndex[tess.multiDrawPrimitives];
-		}
-		backEnd.pc.c_multidrawsMerged += 2;
-	}
-	else //if (mergeBack == -1 && mergeForward == -1)
+	if (shaderCheck && !(!ShaderRequiresCPUDeforms(tess.shader) && !tess.shader->isSky && !tess.shader->isPortal))
 	{
-		tess.multiDrawNumIndexes[tess.multiDrawPrimitives] = numIndexes;
-		tess.multiDrawFirstIndex[tess.multiDrawPrimitives] = firstIndexOffset;
-		tess.multiDrawMinIndex[tess.multiDrawPrimitives] = minIndex;
-		tess.multiDrawMaxIndex[tess.multiDrawPrimitives] = maxIndex;
-		tess.multiDrawPrimitives++;
+		return qfalse;
 	}
 
-	backEnd.pc.c_multidraws++;
+	RB_CheckVao(vao);
 
-	tess.numIndexes  += numIndexes;
-	tess.numVertexes += numVerts;
+	tess.dlightBits |= dlightBits;
+	tess.pshadowBits |= pshadowBits;
+
+	RB_EndSurface();
+	RB_BeginSurface(tess.shader, tess.fogNum, tess.cubemapIndex);
+
+	backEnd.pc.c_staticVaoDraws++;
+
+	tess.numIndexes = numIndexes;
+	tess.numVertexes = numVerts;
 
 	return qtrue;
 }
 
+
 /*
 =============
 RB_SurfaceTriangles
 =============
 */
 static void RB_SurfaceTriangles( srfBspSurface_t *srf ) {
-	if( RB_SurfaceVao (srf->vao, srf->numVerts, srf->numIndexes,
-				srf->firstIndex, srf->minIndex, srf->maxIndex, srf->dlightBits, srf->pshadowBits, qtrue ) )
+	if (RB_SurfaceVaoCached(srf->numVerts, srf->verts, srf->numIndexes,
+		srf->indexes, srf->dlightBits, srf->pshadowBits))
 	{
 		return;
 	}
@@ -610,8 +576,6 @@ static void RB_SurfaceBeam( void ) {
 	tess.numVertexes = 0;
 	tess.numIndexes = 0;
 	tess.firstIndex = 0;
-	tess.minIndex = 0;
-	tess.maxIndex = 0;
 
 	for ( i = 0; i <= NUM_BEAM_SEGS; i++ ) {
 		VectorCopy(start_points[ i % NUM_BEAM_SEGS ], tess.xyz[tess.numVertexes++]);
@@ -628,9 +592,6 @@ static void RB_SurfaceBeam( void ) {
 		tess.indexes[tess.numIndexes++] = 1  + (i + 1) * 2;
 	}
 
-	tess.minIndex = 0;
-	tess.maxIndex = tess.numVertexes;
-
 	// FIXME: A lot of this can probably be removed for speed, and refactored into a more convenient function
 	RB_UpdateTessVao(ATTR_POSITION);
 	
@@ -640,13 +601,11 @@ static void RB_SurfaceBeam( void ) {
 
 	GLSL_SetUniformVec4(sp, UNIFORM_COLOR, colorRed);
 
-	R_DrawElementsVao(tess.numIndexes, tess.firstIndex, tess.minIndex, tess.maxIndex);
+	R_DrawElements(tess.numIndexes, tess.firstIndex);
 
 	tess.numIndexes = 0;
 	tess.numVertexes = 0;
 	tess.firstIndex = 0;
-	tess.minIndex = 0;
-	tess.maxIndex = 0;
 }
 
 //================================================================================
@@ -991,8 +950,8 @@ RB_SurfaceFace
 ==============
 */
 static void RB_SurfaceFace( srfBspSurface_t *srf ) {
-	if( RB_SurfaceVao (srf->vao, srf->numVerts, srf->numIndexes,
-					srf->firstIndex, srf->minIndex, srf->maxIndex, srf->dlightBits, srf->pshadowBits, qtrue ) )
+	if (RB_SurfaceVaoCached(srf->numVerts, srf->verts, srf->numIndexes,
+		srf->indexes, srf->dlightBits, srf->pshadowBits))
 	{
 		return;
 	}
@@ -1059,8 +1018,8 @@ static void RB_SurfaceGrid( srfBspSurface_t *srf ) {
 	int     pshadowBits;
 	//int		*vDlightBits;
 
-	if( RB_SurfaceVao (srf->vao, srf->numVerts, srf->numIndexes,
-					srf->firstIndex, srf->minIndex, srf->maxIndex, srf->dlightBits, srf->pshadowBits, qtrue ) )
+	if (RB_SurfaceVaoCached(srf->numVerts, srf->verts, srf->numIndexes,
+		srf->indexes, srf->dlightBits, srf->pshadowBits))
 	{
 		return;
 	}
@@ -1312,7 +1271,7 @@ static void RB_SurfaceFlare( srfFlare_t *surf ) {
 static void RB_SurfaceVaoMesh(srfBspSurface_t * srf)
 {
 	RB_SurfaceVao (srf->vao, srf->numVerts, srf->numIndexes, srf->firstIndex,
-			srf->minIndex, srf->maxIndex, srf->dlightBits, srf->pshadowBits, qfalse );
+			srf->dlightBits, srf->pshadowBits, qfalse );
 }
 
 void RB_SurfaceVaoMdvMesh(srfVaoMdvMesh_t * surface)
@@ -1342,8 +1301,6 @@ void RB_SurfaceVaoMdvMesh(srfVaoMdvMesh_t * surface)
 
 	tess.numIndexes = surface->numIndexes;
 	tess.numVertexes = surface->numVerts;
-	tess.minIndex = surface->minIndex;
-	tess.maxIndex = surface->maxIndex;
 
 	//mdvModel = surface->mdvModel;
 	//mdvSurface = surface->mdvSurface;
diff --git a/SP/code/rend2/tr_vbo.c b/SP/code/rend2/tr_vbo.c
index 0ac63a4..e4d0ca3 100644
--- a/SP/code/rend2/tr_vbo.c
+++ b/SP/code/rend2/tr_vbo.c
@@ -499,6 +499,8 @@ void R_InitVaos(void)
 
 	R_BindNullVao();
 
+	VaoCache_Init();
+
 	GL_CheckErrors();
 }
 
@@ -650,3 +652,315 @@ void RB_UpdateTessVao(unsigned int attribBits)
 		qglBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, tess.numIndexes * sizeof(tess.indexes[0]), tess.indexes);
 	}
 }
+
+// FIXME: This sets a limit of 65536 verts/262144 indexes per static surface
+// This is higher than the old vq3 limits but is worth noting
+#define VAOCACHE_QUEUE_MAX_SURFACES (1 << 10)
+#define VAOCACHE_QUEUE_MAX_VERTEXES (1 << 16)
+#define VAOCACHE_QUEUE_MAX_INDEXES (VAOCACHE_QUEUE_MAX_VERTEXES * 4)
+
+typedef struct queuedSurface_s
+{
+	srfVert_t *vertexes;
+	int numVerts;
+	glIndex_t *indexes;
+	int numIndexes;
+}
+queuedSurface_t;
+
+static struct
+{
+	queuedSurface_t surfaces[VAOCACHE_QUEUE_MAX_SURFACES];
+	int numSurfaces;
+
+	srfVert_t vertexes[VAOCACHE_QUEUE_MAX_VERTEXES];
+	int vertexCommitSize;
+
+	glIndex_t indexes[VAOCACHE_QUEUE_MAX_INDEXES];
+	int indexCommitSize;
+}
+vcq;
+
+#define VAOCACHE_MAX_SURFACES (1 << 16)
+#define VAOCACHE_MAX_BATCHES (1 << 10)
+
+// srfVert_t is 60 bytes
+// assuming each vert is referenced 4 times, need 16 bytes (4 glIndex_t) per vert
+// -> need about 4/15ths the space for indexes as vertexes
+#define VAOCACHE_VERTEX_BUFFER_SIZE (16 * 1024 * 1024)
+#define VAOCACHE_INDEX_BUFFER_SIZE (5 * 1024 * 1024)
+
+typedef struct buffered_s
+{
+	void *data;
+	int size;
+	int bufferOffset;
+}
+buffered_t;
+
+static struct
+{
+	vao_t *vao;
+	buffered_t surfaceIndexSets[VAOCACHE_MAX_SURFACES];
+	int numSurfaces;
+
+	int batchLengths[VAOCACHE_MAX_BATCHES];
+	int numBatches;
+
+	int vertexOffset;
+	int indexOffset;
+}
+vc;
+
+void VaoCache_Commit(void)
+{
+	buffered_t *indexSet;
+	int *batchLength;
+	queuedSurface_t *surf, *end = vcq.surfaces + vcq.numSurfaces;
+
+	R_BindVao(vc.vao);
+
+	// Search for a matching batch
+	// FIXME: Use faster search
+	indexSet = vc.surfaceIndexSets;
+	batchLength = vc.batchLengths;
+	for (; batchLength < vc.batchLengths + vc.numBatches; batchLength++)
+	{
+		if (*batchLength == vcq.numSurfaces)
+		{
+			buffered_t *indexSet2 = indexSet;
+			for (surf = vcq.surfaces; surf < end; surf++, indexSet2++)
+			{
+				if (surf->indexes != indexSet2->data || (surf->numIndexes * sizeof(glIndex_t)) != indexSet2->size)
+					break;
+			}
+
+			if (surf == end)
+				break;
+		}
+
+		indexSet += *batchLength;
+	}
+
+	// If found, use it
+	if (indexSet < vc.surfaceIndexSets + vc.numSurfaces)
+	{
+		tess.firstIndex = indexSet->bufferOffset / sizeof(glIndex_t);
+		//ri.Printf(PRINT_ALL, "firstIndex %d numIndexes %d as %d\n", tess.firstIndex, tess.numIndexes, batchLength - vc.batchLengths);
+		//ri.Printf(PRINT_ALL, "vc.numSurfaces %d vc.numBatches %d\n", vc.numSurfaces, vc.numBatches);
+	}
+	// If not, rebuffer the batch
+	// FIXME: keep track of the vertexes so we don't have to reupload them every time
+	else
+	{
+		srfVert_t *dstVertex = vcq.vertexes;
+		glIndex_t *dstIndex = vcq.indexes;
+
+		batchLength = vc.batchLengths + vc.numBatches;
+		*batchLength = vcq.numSurfaces;
+		vc.numBatches++;
+
+		tess.firstIndex = vc.indexOffset / sizeof(glIndex_t);
+		vcq.vertexCommitSize = 0;
+		vcq.indexCommitSize = 0;
+		for (surf = vcq.surfaces; surf < end; surf++)
+		{
+			glIndex_t *srcIndex = surf->indexes;
+			int vertexesSize = surf->numVerts * sizeof(srfVert_t);
+			int indexesSize = surf->numIndexes * sizeof(glIndex_t);
+			int i, indexOffset = (vc.vertexOffset + vcq.vertexCommitSize) / sizeof(srfVert_t);
+
+			Com_Memcpy(dstVertex, surf->vertexes, vertexesSize);
+			dstVertex += surf->numVerts;
+
+			vcq.vertexCommitSize += vertexesSize;
+
+			indexSet = vc.surfaceIndexSets + vc.numSurfaces;
+			indexSet->data = surf->indexes;
+			indexSet->size = indexesSize;
+			indexSet->bufferOffset = vc.indexOffset + vcq.indexCommitSize;
+			vc.numSurfaces++;
+
+			for (i = 0; i < surf->numIndexes; i++)
+				*dstIndex++ = *srcIndex++ + indexOffset;
+
+			vcq.indexCommitSize += indexesSize;
+		}
+
+		//ri.Printf(PRINT_ALL, "committing %d to %d, %d to %d as %d\n", vcq.vertexCommitSize, vc.vertexOffset, vcq.indexCommitSize, vc.indexOffset, batchLength - vc.batchLengths);
+
+		if (vcq.vertexCommitSize)
+		{
+			qglBindBuffer(GL_ARRAY_BUFFER, vc.vao->vertexesVBO);
+			qglBufferSubData(GL_ARRAY_BUFFER, vc.vertexOffset, vcq.vertexCommitSize, vcq.vertexes);
+			vc.vertexOffset += vcq.vertexCommitSize;
+		}
+
+		if (vcq.indexCommitSize)
+		{
+			qglBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vc.vao->indexesIBO);
+			qglBufferSubData(GL_ELEMENT_ARRAY_BUFFER, vc.indexOffset, vcq.indexCommitSize, vcq.indexes);
+			vc.indexOffset += vcq.indexCommitSize;
+		}
+	}
+}
+
+void VaoCache_Init(void)
+{
+	srfVert_t vert;
+	int dataSize;
+
+	vc.vao = R_CreateVao("VaoCache", NULL, VAOCACHE_VERTEX_BUFFER_SIZE, NULL, VAOCACHE_INDEX_BUFFER_SIZE, VAO_USAGE_DYNAMIC);
+
+	vc.vao->attribs[ATTR_INDEX_POSITION].enabled       = 1;
+	vc.vao->attribs[ATTR_INDEX_TEXCOORD].enabled       = 1;
+	vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].enabled     = 1;
+	vc.vao->attribs[ATTR_INDEX_NORMAL].enabled         = 1;
+	vc.vao->attribs[ATTR_INDEX_TANGENT].enabled        = 1;
+	vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].enabled = 1;
+	vc.vao->attribs[ATTR_INDEX_COLOR].enabled          = 1;
+
+	vc.vao->attribs[ATTR_INDEX_POSITION].count       = 3;
+	vc.vao->attribs[ATTR_INDEX_TEXCOORD].count       = 2;
+	vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].count     = 2;
+	vc.vao->attribs[ATTR_INDEX_NORMAL].count         = 4;
+	vc.vao->attribs[ATTR_INDEX_TANGENT].count        = 4;
+	vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].count = 4;
+	vc.vao->attribs[ATTR_INDEX_COLOR].count          = 4;
+
+	vc.vao->attribs[ATTR_INDEX_POSITION].type             = GL_FLOAT;
+	vc.vao->attribs[ATTR_INDEX_TEXCOORD].type             = GL_FLOAT;
+	vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].type           = GL_FLOAT;
+	vc.vao->attribs[ATTR_INDEX_NORMAL].type               = GL_SHORT;
+	vc.vao->attribs[ATTR_INDEX_TANGENT].type              = GL_SHORT;
+	vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].type       = GL_SHORT;
+	vc.vao->attribs[ATTR_INDEX_COLOR].type                = GL_UNSIGNED_SHORT;
+
+	vc.vao->attribs[ATTR_INDEX_POSITION].normalized       = GL_FALSE;
+	vc.vao->attribs[ATTR_INDEX_TEXCOORD].normalized       = GL_FALSE;
+	vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].normalized     = GL_FALSE;
+	vc.vao->attribs[ATTR_INDEX_NORMAL].normalized         = GL_TRUE;
+	vc.vao->attribs[ATTR_INDEX_TANGENT].normalized        = GL_TRUE;
+	vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].normalized = GL_TRUE;
+	vc.vao->attribs[ATTR_INDEX_COLOR].normalized          = GL_TRUE;
+
+	vc.vao->attribs[ATTR_INDEX_POSITION].offset       = 0;        dataSize  = sizeof(vert.xyz);
+	vc.vao->attribs[ATTR_INDEX_TEXCOORD].offset       = dataSize; dataSize += sizeof(vert.st);
+	vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].offset     = dataSize; dataSize += sizeof(vert.lightmap);
+	vc.vao->attribs[ATTR_INDEX_NORMAL].offset         = dataSize; dataSize += sizeof(vert.normal);
+	vc.vao->attribs[ATTR_INDEX_TANGENT].offset        = dataSize; dataSize += sizeof(vert.tangent);
+	vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].offset = dataSize; dataSize += sizeof(vert.lightdir);
+	vc.vao->attribs[ATTR_INDEX_COLOR].offset          = dataSize; dataSize += sizeof(vert.color);
+
+	vc.vao->attribs[ATTR_INDEX_POSITION].stride       = dataSize;
+	vc.vao->attribs[ATTR_INDEX_TEXCOORD].stride       = dataSize;
+	vc.vao->attribs[ATTR_INDEX_LIGHTCOORD].stride     = dataSize;
+	vc.vao->attribs[ATTR_INDEX_NORMAL].stride         = dataSize;
+	vc.vao->attribs[ATTR_INDEX_TANGENT].stride        = dataSize;
+	vc.vao->attribs[ATTR_INDEX_LIGHTDIRECTION].stride = dataSize;
+	vc.vao->attribs[ATTR_INDEX_COLOR].stride          = dataSize;
+
+	Vao_SetVertexPointers(vc.vao);
+
+	vc.numSurfaces = 0;
+	vc.numBatches = 0;
+	vc.vertexOffset = 0;
+	vc.indexOffset = 0;
+	vcq.vertexCommitSize = 0;
+	vcq.indexCommitSize = 0;
+	vcq.numSurfaces = 0;
+}
+
+void VaoCache_BindVao(void)
+{
+	R_BindVao(vc.vao);
+}
+
+void VaoCache_CheckAdd(qboolean *endSurface, qboolean *recycleVertexBuffer, qboolean *recycleIndexBuffer, int numVerts, int numIndexes)
+{
+	int vertexesSize = sizeof(srfVert_t) * numVerts;
+	int indexesSize = sizeof(glIndex_t) * numIndexes;
+
+	if (vc.vao->vertexesSize < vc.vertexOffset + vcq.vertexCommitSize + vertexesSize)
+	{
+		//ri.Printf(PRINT_ALL, "out of space in vertex cache: %d < %d + %d + %d\n", vc.vao->vertexesSize, vc.vertexOffset, vc.vertexCommitSize, vertexesSize);
+		*recycleVertexBuffer = qtrue;
+		*recycleIndexBuffer = qtrue;
+		*endSurface = qtrue;
+	}
+
+	if (vc.vao->indexesSize < vc.indexOffset + vcq.indexCommitSize + indexesSize)
+	{
+		//ri.Printf(PRINT_ALL, "out of space in index cache\n");
+		*recycleIndexBuffer = qtrue;
+		*endSurface = qtrue;
+	}
+
+	if (vc.numSurfaces + vcq.numSurfaces >= VAOCACHE_MAX_SURFACES)
+	{
+		//ri.Printf(PRINT_ALL, "out of surfaces in index cache\n");
+		*recycleIndexBuffer = qtrue;
+		*endSurface = qtrue;
+	}
+
+	if (vc.numBatches >= VAOCACHE_MAX_BATCHES)
+	{
+		//ri.Printf(PRINT_ALL, "out of batches in index cache\n");
+		*recycleIndexBuffer = qtrue;
+		*endSurface = qtrue;
+	}
+
+	if (vcq.numSurfaces >= VAOCACHE_QUEUE_MAX_SURFACES)
+	{
+		//ri.Printf(PRINT_ALL, "out of queued surfaces\n");
+		*endSurface = qtrue;
+	}
+
+	if (VAOCACHE_QUEUE_MAX_VERTEXES * sizeof(srfVert_t) < vcq.vertexCommitSize + vertexesSize)
+	{
+		//ri.Printf(PRINT_ALL, "out of queued vertexes\n");
+		*endSurface = qtrue;
+	}
+
+	if (VAOCACHE_QUEUE_MAX_INDEXES * sizeof(glIndex_t) < vcq.indexCommitSize + indexesSize)
+	{
+		//ri.Printf(PRINT_ALL, "out of queued indexes\n");
+		*endSurface = qtrue;
+	}
+}
+
+void VaoCache_RecycleVertexBuffer(void)
+{
+	qglBindBuffer(GL_ARRAY_BUFFER, vc.vao->vertexesVBO);
+	qglBufferData(GL_ARRAY_BUFFER, vc.vao->vertexesSize, NULL, GL_DYNAMIC_DRAW);
+	vc.vertexOffset = 0;
+}
+
+void VaoCache_RecycleIndexBuffer(void)
+{
+	qglBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vc.vao->indexesIBO);
+	qglBufferData(GL_ELEMENT_ARRAY_BUFFER, vc.vao->indexesSize, NULL, GL_DYNAMIC_DRAW);
+	vc.indexOffset = 0;
+	vc.numSurfaces = 0;
+	vc.numBatches = 0;
+}
+
+void VaoCache_InitQueue(void)
+{
+	vcq.vertexCommitSize = 0;
+	vcq.indexCommitSize = 0;
+	vcq.numSurfaces = 0;
+}
+
+void VaoCache_AddSurface(srfVert_t *verts, int numVerts, glIndex_t *indexes, int numIndexes)
+{
+	queuedSurface_t *queueEntry = vcq.surfaces + vcq.numSurfaces;
+	queueEntry->vertexes = verts;
+	queueEntry->numVerts = numVerts;
+	queueEntry->indexes = indexes;
+	queueEntry->numIndexes = numIndexes;
+	vcq.numSurfaces++;
+
+	vcq.vertexCommitSize += sizeof(srfVert_t) * numVerts;;
+	vcq.indexCommitSize += sizeof(glIndex_t) * numIndexes;
+}
diff --git a/SP/code/rend2/tr_world.c b/SP/code/rend2/tr_world.c
index cb60ff1..dc6190a 100644
--- a/SP/code/rend2/tr_world.c
+++ b/SP/code/rend2/tr_world.c
@@ -43,7 +43,7 @@ static qboolean	R_CullSurface( msurface_t *surf ) {
 		return qfalse;
 	}
 
-	if ( *surf->data == SF_GRID && r_nocurves->integer ) {
+	if ( r_nocurves->integer && *surf->data == SF_GRID ) {
 		return qtrue;
 	}
 
@@ -663,43 +663,23 @@ static void R_RecursiveWorldNode( mnode_t *node, uint32_t planeBits, uint32_t dl
 			tr.viewParms.visBounds[1][2] = node->maxs[2];
 		}
 
-		// add merged and unmerged surfaces
-		if (tr.world->viewSurfaces && !r_nocurves->integer)
-			view = tr.world->viewSurfaces + node->firstmarksurface;
-		else
-			view = tr.world->marksurfaces + node->firstmarksurface;
+		// add surfaces
+		view = tr.world->marksurfaces + node->firstmarksurface;
 
 		c = node->nummarksurfaces;
 		while (c--) {
 			// just mark it as visible, so we don't jump out of the cache derefencing the surface
 			surf = *view;
-			if (surf < 0)
+			if (tr.world->surfacesViewCount[surf] != tr.viewCount)
 			{
-				if (tr.world->mergedSurfacesViewCount[-surf - 1] != tr.viewCount)
-				{
-					tr.world->mergedSurfacesViewCount[-surf - 1]  = tr.viewCount;
-					tr.world->mergedSurfacesDlightBits[-surf - 1] = dlightBits;
-					tr.world->mergedSurfacesPshadowBits[-surf - 1] = pshadowBits;
-				}
-				else
-				{
-					tr.world->mergedSurfacesDlightBits[-surf - 1] |= dlightBits;
-					tr.world->mergedSurfacesPshadowBits[-surf - 1] |= pshadowBits;
-				}
+				tr.world->surfacesViewCount[surf] = tr.viewCount;
+				tr.world->surfacesDlightBits[surf] = dlightBits;
+				tr.world->surfacesPshadowBits[surf] = pshadowBits;
 			}
 			else
 			{
-				if (tr.world->surfacesViewCount[surf] != tr.viewCount)
-				{
-					tr.world->surfacesViewCount[surf] = tr.viewCount;
-					tr.world->surfacesDlightBits[surf] = dlightBits;
-					tr.world->surfacesPshadowBits[surf] = pshadowBits;
-				}
-				else
-				{
-					tr.world->surfacesDlightBits[surf] |= dlightBits;
-					tr.world->surfacesPshadowBits[surf] |= pshadowBits;
-				}
+				tr.world->surfacesDlightBits[surf] |= dlightBits;
+				tr.world->surfacesPshadowBits[surf] |= pshadowBits;
 			}
 			view++;
 		}
@@ -910,14 +890,6 @@ void R_AddWorldSurfaces( void ) {
 			R_AddWorldSurface( tr.world->surfaces + i, tr.world->surfacesDlightBits[i], tr.world->surfacesPshadowBits[i] );
 			tr.refdef.dlightMask |= tr.world->surfacesDlightBits[i];
 		}
-		for (i = 0; i < tr.world->numMergedSurfaces; i++)
-		{
-			if (tr.world->mergedSurfacesViewCount[i] != tr.viewCount)
-				continue;
-
-			R_AddWorldSurface( tr.world->mergedSurfaces + i, tr.world->mergedSurfacesDlightBits[i], tr.world->mergedSurfacesPshadowBits[i] );
-			tr.refdef.dlightMask |= tr.world->mergedSurfacesDlightBits[i];
-		}
 
 		tr.refdef.dlightMask = ~tr.refdef.dlightMask;
 	}
diff --git a/SP/code/renderer/qgl.h b/SP/code/renderer/qgl.h
index b271188..d6fe1d7 100644
--- a/SP/code/renderer/qgl.h
+++ b/SP/code/renderer/qgl.h
@@ -521,19 +521,11 @@ extern void (APIENTRYP qglPNTrianglesfATI)(GLenum pname, GLfloat param);
 
 // GL function loader, based on https://gist.github.com/rygorous/16796a0c876cf8a5f542caddb55bce8a
 
-// OpenGL 1.2, was GL_EXT_draw_range_elements
-#define QGL_1_2_PROCS \
-	GLE(void, DrawRangeElements, GLenum mode, GLuint start, GLuint end, GLsizei count, GLenum type, const GLvoid *indices) \
-
 // OpenGL 1.3, was GL_ARB_texture_compression
 #define QGL_1_3_PROCS \
 	GLE(void, CompressedTexImage2D, GLenum target, GLint level, GLenum internalformat, GLsizei width, GLsizei height, GLint border, GLsizei imageSize, const void *data) \
 	GLE(void, CompressedTexSubImage2D, GLenum target, GLint level, GLint xoffset, GLint yoffset, GLsizei width, GLsizei height, GLenum format, GLsizei imageSize, const void *data) \
 
-// OpenGL 1.4, was GL_EXT_multi_draw_arrays
-#define QGL_1_4_PROCS \
-	GLE(void, MultiDrawElements, GLenum mode, const GLsizei *count, GLenum type, const GLvoid* *indices, GLsizei primcount) \
-
 // OpenGL 1.5, was GL_ARB_vertex_buffer_object and GL_ARB_occlusion_query
 #define QGL_1_5_PROCS \
 	GLE(void, GenQueries, GLsizei n, GLuint *ids) \
@@ -782,9 +774,7 @@ extern void (APIENTRYP qglPNTrianglesfATI)(GLenum pname, GLfloat param);
 	GLE(GLvoid, NamedFramebufferRenderbufferEXT, GLuint framebuffer, GLenum attachment, GLenum renderbuffertarget, GLuint renderbuffer) \
 
 #define GLE(ret, name, ...) typedef ret APIENTRY name##proc(__VA_ARGS__); extern name##proc * qgl##name;
-QGL_1_2_PROCS;
 QGL_1_3_PROCS;
-QGL_1_4_PROCS;
 QGL_1_5_PROCS;
 QGL_2_0_PROCS;
 QGL_EXT_framebuffer_object_PROCS;

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-games/iortcw.git



More information about the Pkg-games-commits mailing list