Skip to content

Commit 051a56c

Browse files
committed
WARP: Fix very large binaries from being prevented from creating WARP files of all the functions
This was caused by the internal flatbuffer verifier reaching the max number of tables, this is fine and the solution is to split the large chunk into smaller ones, effectively creating more flatbuffer "views" of tables. This is fine from a performance perspective because we already have optimized for many small chunks (e.g. we have a lookup table stored next to each chunk). I left some comments for future improvements but this should be good now. Also alongside this change we improved generating performance by 2x on some pathological binaries.
1 parent 12411d9 commit 051a56c

File tree

1 file changed

+42
-17
lines changed

1 file changed

+42
-17
lines changed

plugins/warp/src/processor.rs

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ use ar::Archive;
1111
use dashmap::DashMap;
1212
use rayon::iter::IntoParallelIterator;
1313
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
14+
use rayon::prelude::ParallelSlice;
1415
use regex::Regex;
1516
use serde_json::{json, Value};
1617
use tempdir::TempDir;
@@ -36,6 +37,11 @@ use crate::cache::cached_type_references;
3637
use crate::convert::platform_to_target;
3738
use crate::{build_function, INCLUDE_TAG_ICON, INCLUDE_TAG_NAME};
3839

40+
/// Ensure we never exceed these many functions per signature chunk.
41+
///
42+
/// This was added to fix running into the max table limit on certain files.
43+
const MAX_FUNCTIONS_PER_CHUNK: usize = 1_000_000;
44+
3945
#[derive(Error, Debug)]
4046
pub enum ProcessingError {
4147
#[error("Failed to open archive: {0}")]
@@ -618,6 +624,15 @@ impl WarpFileProcessor {
618624
}
619625
}
620626

627+
// In the future we may want to do something with a view that has no functions, but for now
628+
// we do not care to create any chunks. By skipping this we can avoid merging of empty chunks,
629+
// which is quick, but it still requires some allocations that we can avoid.
630+
if view.functions().is_empty() {
631+
self.state
632+
.set_file_state(path.clone(), ProcessingFileState::Processed);
633+
return Err(ProcessingError::SkippedFile(path));
634+
}
635+
621636
// Process the view
622637
let warp_file = self.process_view(path, &view);
623638
// Close the view manually, see comment in [`BinaryView`].
@@ -655,6 +670,10 @@ impl WarpFileProcessor {
655670
})
656671
.filter_map(|res| match res {
657672
Ok(result) => Some(Ok(result)),
673+
Err(ProcessingError::SkippedFile(path)) => {
674+
log::debug!("Skipping directory file: {:?}", path);
675+
None
676+
}
658677
Err(ProcessingError::Cancelled) => Some(Err(ProcessingError::Cancelled)),
659678
Err(e) => {
660679
log::error!("Directory file processing error: {:?}", e);
@@ -739,21 +758,23 @@ impl WarpFileProcessor {
739758
let mut chunks = Vec::new();
740759
if self.file_data != FileDataKindField::Types {
741760
let mut signature_chunks = self.create_signature_chunks(view)?;
742-
for (target, signature_chunk) in signature_chunks.drain() {
743-
if signature_chunk.raw_functions().count() != 0 {
744-
let chunk = Chunk::new_with_target(
745-
ChunkKind::Signature(signature_chunk),
746-
self.compression_type.into(),
747-
target,
748-
);
749-
chunks.push(chunk)
761+
for (target, mut target_chunks) in signature_chunks.drain() {
762+
for signature_chunk in target_chunks.drain(..) {
763+
if signature_chunk.raw_functions().next().is_some() {
764+
let chunk = Chunk::new_with_target(
765+
ChunkKind::Signature(signature_chunk),
766+
self.compression_type.into(),
767+
target.clone(),
768+
);
769+
chunks.push(chunk)
770+
}
750771
}
751772
}
752773
}
753774

754775
if self.file_data != FileDataKindField::Signatures {
755776
let type_chunk = self.create_type_chunk(view)?;
756-
if type_chunk.raw_types().count() != 0 {
777+
if type_chunk.raw_types().next().is_some() {
757778
chunks.push(Chunk::new(
758779
ChunkKind::Type(type_chunk),
759780
self.compression_type.into(),
@@ -773,7 +794,7 @@ impl WarpFileProcessor {
773794
pub fn create_signature_chunks(
774795
&self,
775796
view: &BinaryView,
776-
) -> Result<HashMap<Target, SignatureChunk<'static>>, ProcessingError> {
797+
) -> Result<HashMap<Target, Vec<SignatureChunk<'static>>>, ProcessingError> {
777798
let is_function_named = |f: &Guard<BNFunction>| {
778799
self.included_functions == IncludedFunctionsField::All
779800
|| view.symbol_by_address(f.start()).is_some()
@@ -836,15 +857,19 @@ impl WarpFileProcessor {
836857
acc
837858
});
838859

839-
let chunks: Result<HashMap<Target, SignatureChunk<'static>>, ProcessingError> =
860+
// Split into multiple chunks if a target has more than MAX_FUNCTIONS_PER_CHUNK functions.
861+
// We do this because otherwise some chunks may have too many flatbuffer tables for the verifier to handle.
862+
let chunks: Result<HashMap<Target, Vec<SignatureChunk<'static>>>, ProcessingError> =
840863
built_functions
841-
.into_iter()
864+
.into_par_iter()
842865
.map(|(target, functions)| {
843-
Ok((
844-
target,
845-
SignatureChunk::new(&functions)
846-
.ok_or(ProcessingError::ChunkCreationFailed)?,
847-
))
866+
let chunks: Result<Vec<_>, _> = functions
867+
.par_chunks(MAX_FUNCTIONS_PER_CHUNK)
868+
.map(|f| {
869+
SignatureChunk::new(&f).ok_or(ProcessingError::ChunkCreationFailed)
870+
})
871+
.collect();
872+
Ok((target, chunks?))
848873
})
849874
.collect();
850875

0 commit comments

Comments
 (0)