While investigating a bug involving BX caches in the prelude to Wings of Dawn's release, I decided to take a stab at the BX file generation code in case I accidentally did something to irrversibly change the format. Fearing the possibility of everyone having to rebuild their BX caches, I wanted to make it less painful and looked into optimizing it. Here is the fruit of my Saturday labor:
Index: code/graphics/2d.cpp
===================================================================
--- code/graphics/2d.cpp (revision 11296)
+++ code/graphics/2d.cpp (working copy)
@@ -32,9 +32,8 @@
#include "gamesequence/gamesequence.h" //WMC - for scripting hooks in gr_flip()
#include "io/keycontrol.h" // m!m
#include "debugconsole/console.h"
-#include "debugconsole/console.h"
+#include "io/timer.h"
-
#if defined(SCP_UNIX) && !defined(__APPLE__)
#if ( SDL_VERSION_ATLEAST(1, 2, 7) )
#include "SDL_cpuinfo.h"
@@ -1454,6 +1453,20 @@
return idx;
}
+int poly_list::find_first_vertex_fast(int idx)
+{
+ uint* first_idx = std::lower_bound(sorted_indices, sorted_indices + n_verts, idx, finder(this, NULL, NULL));
+
+ if ( first_idx == sorted_indices + n_verts ) {
+ // if this happens then idx was never in the index list to begin with which is not good
+ mprintf(("Sorted index list missing index %d!", idx));
+ Int3();
+ return idx;
+ }
+
+ return *first_idx;
+}
+
/**
* Given a list (plist) find the index within the indexed list that the vert at position idx within list is at
*/
@@ -1479,7 +1492,18 @@
return -1;
}
+int poly_list::find_index_fast(poly_list *plist, int idx)
+{
+ // searching for an out of bounds index using the finder means we're trying to find the vert and norm we're passing into the finder instance
+ uint* first_idx = std::lower_bound(sorted_indices, sorted_indices + n_verts, n_verts, finder(this, &plist->vert[idx], &plist->norm[idx]));
+ if (first_idx == sorted_indices + n_verts) {
+ return -1;
+ }
+
+ return *first_idx;
+}
+
void poly_list::allocate(int _verts)
{
if (_verts <= currently_allocated)
@@ -1500,6 +1524,11 @@
tsb = NULL;
}
+ if ( sorted_indices != NULL ) {
+ vm_free(sorted_indices);
+ sorted_indices = NULL;
+ }
+
if (_verts) {
vert = (vertex*)vm_malloc(sizeof(vertex) * _verts);
norm = (vec3d*)vm_malloc(sizeof(vec3d) * _verts);
@@ -1507,6 +1536,8 @@
if (Cmdline_normal) {
tsb = (tsb_t*)vm_malloc(sizeof(tsb_t) * _verts);
}
+
+ sorted_indices = (uint*)vm_malloc(sizeof(uint) * _verts);
}
n_verts = 0;
@@ -1529,6 +1560,11 @@
vm_free(tsb);
tsb = NULL;
}
+
+ if (sorted_indices != NULL) {
+ vm_free(sorted_indices);
+ sorted_indices = NULL;
+ }
}
void poly_list::calculate_tangent()
@@ -1626,6 +1662,9 @@
int j, z = 0;
ubyte *nverts_good = NULL;
+ uint t0, t1;
+ t0 = timer_get_milliseconds();
+
// calculate tangent space data (must be done early)
calculate_tangent();
@@ -1640,8 +1679,10 @@
vertex_list.reserve(n_verts);
+ generate_sorted_index_list();
+
for (j = 0; j < n_verts; j++) {
- if (find_first_vertex(j) == j) {
+ if (find_first_vertex_fast(j) == j) {
nverts++;
nverts_good[j] = 1;
vertex_list.push_back(j);
@@ -1648,6 +1689,10 @@
}
}
+ t1 = timer_get_milliseconds();
+
+ mprintf(("Index Buffer created in %d milliseconds\n", t1-t0));
+
// if there is nothig to change then bail
if (n_verts == nverts) {
if (nverts_good != NULL) {
@@ -1682,6 +1727,8 @@
vm_free(nverts_good);
}
+ buffer_list_internal.generate_sorted_index_list();
+
(*this) = buffer_list_internal;
}
@@ -1696,11 +1743,94 @@
memcpy(tsb, other_list.tsb, sizeof(tsb_t) * other_list.n_verts);
}
+ memcpy(sorted_indices, other_list.sorted_indices, sizeof(uint) * other_list.n_verts);
+
n_verts = other_list.n_verts;
return *this;
}
+void poly_list::generate_sorted_index_list()
+{
+ for ( uint j = 0; j < n_verts; ++j) {
+ sorted_indices[j] = j;
+ }
+
+ std::sort(sorted_indices, sorted_indices + n_verts, finder(this));
+}
+
+bool poly_list::finder::operator()(const uint a, const uint b)
+{
+ vertex *vert_a;
+ vertex *vert_b;
+ vec3d *norm_a;
+ vec3d *norm_b;
+
+ Assert(search_list != NULL);
+
+ if ( a == search_list->n_verts ) {
+ Assert(vert_to_find != NULL);
+ Assert(norm_to_find != NULL);
+ Assert(a != b);
+
+ vert_a = vert_to_find;
+ norm_a = norm_to_find;
+ } else {
+ vert_a = &search_list->vert[a];
+ norm_a = &search_list->norm[a];
+ }
+
+ if ( b == search_list->n_verts ) {
+ Assert(vert_to_find != NULL);
+ Assert(norm_to_find != NULL);
+ Assert(a != b);
+
+ vert_b = vert_to_find;
+ norm_b = norm_to_find;
+ } else {
+ vert_b = &search_list->vert[b];
+ norm_b = &search_list->norm[b];
+ }
+
+ if (norm_a->xyz.x != norm_b->xyz.x) {
+ return norm_a->xyz.x < norm_b->xyz.x;
+ }
+
+ if (norm_a->xyz.y != norm_b->xyz.y) {
+ return norm_a->xyz.y < norm_b->xyz.y;
+ }
+
+ if (norm_a->xyz.z != norm_b->xyz.z) {
+ return norm_a->xyz.z < norm_b->xyz.z;
+ }
+
+ if (vert_a->world.xyz.x != vert_b->world.xyz.x) {
+ return vert_a->world.xyz.x < vert_b->world.xyz.x;
+ }
+
+ if (vert_a->world.xyz.y != vert_b->world.xyz.y) {
+ return vert_a->world.xyz.y < vert_b->world.xyz.y;
+ }
+
+ if (vert_a->world.xyz.z != vert_b->world.xyz.z) {
+ return vert_a->world.xyz.z < vert_b->world.xyz.z;
+ }
+
+ if (vert_a->texture_position.u != vert_b->texture_position.u) {
+ return vert_a->texture_position.u < vert_b->texture_position.u;
+ }
+
+ if ( vert_a->texture_position.v != vert_b->texture_position.v ) {
+ return vert_a->texture_position.v < vert_b->texture_position.v;
+ }
+
+ if ( !compare_indices ) {
+ return vert_a->texture_position.v < vert_b->texture_position.v;
+ } else {
+ return a < b;
+ }
+}
+
void gr_shield_icon(coord2d coords[6], int resize_mode)
{
if (gr_screen.mode == GR_STUB) {
Index: code/graphics/2d.h
===================================================================
--- code/graphics/2d.h (revision 11296)
+++ code/graphics/2d.h (working copy)
@@ -100,10 +100,23 @@
* This should be basicly just like it is in the VB
* a list of triangles and their associated normals
*/
-class poly_list
-{
+class poly_list {
+ // helper function struct that let's us sort the indices.
+ // an instance is fed into std::sort and std::lower_bound.
+ // overloaded operator() is used for the comparison function.
+ struct finder {
+ poly_list* search_list;
+ bool compare_indices;
+ vertex* vert_to_find;
+ vec3d* norm_to_find;
+
+ finder(poly_list* _search_list): search_list(_search_list), vert_to_find(NULL), norm_to_find(NULL), compare_indices(true) {}
+ finder(poly_list* _search_list, vertex* _vert, vec3d* _norm): search_list(_search_list), vert_to_find(_vert), norm_to_find(_norm), compare_indices(false) {}
+
+ bool operator()(const uint a, const uint b);
+ };
public:
- poly_list(): n_verts(0), vert(NULL), norm(NULL), tsb(NULL), currently_allocated(0) {}
+ poly_list(): n_verts(0), vert(NULL), norm(NULL), tsb(NULL), currently_allocated(0), sorted_indices(NULL) {}
~poly_list();
poly_list& operator = (poly_list&);
@@ -115,11 +128,15 @@
vec3d *norm;
tsb_t *tsb;
+ uint *sorted_indices;
+
int find_index(poly_list *plist, int idx);
-
+ int find_index_fast(poly_list *plist, int idx);
private:
int currently_allocated;
int find_first_vertex(int idx);
+ int find_first_vertex_fast(int idx);
+ void generate_sorted_index_list();
};
Index: code/model/modelinterp.cpp
===================================================================
--- code/model/modelinterp.cpp (revision 11296)
+++ code/model/modelinterp.cpp (working copy)
@@ -4381,7 +4381,7 @@
new_buffer.assign(j, first_index);
} else {
- first_index = model_list->find_index(&polygon_list[i], j);
+ first_index = model_list->find_index_fast(&polygon_list[i], j);
Assert(first_index != -1);
new_buffer.assign(j, first_index);
Speeds up creation of BX files by a crazy amount. I tried it on the Diaspora Sobek Battlestar which is about a 100 Meg POF file. Takes about 10 seconds in a Release configuration which is pretty damn good; it takes 6 seconds to load the model with the BX file. In Debug, the Sobek takes about 30 seconds to load with a BX file. Loading it without the BX file with this patch only took 40 seconds.
How did i do it? I used std::sort and std::lower_bound with a custom comparison function. Generating index buffers is currently done brute force but this patch is a lot smarter about finding common verts when building an index buffer. I first sort the verts spatially while keeping track of their indices and then I use a lower_bound to perform a binary search.
Try it yourself. Make a build with this patch, delete a BX file in your cache folder and view the model in the tech room or lab. Heck, maybe with this we can get rid of BX files altogether.