Addison Crump 862de53cf6
Full libfuzzer shimming (for cargo-fuzz libfuzzer alternative and other use cases) (#981)
* squash libfuzzer edits

* fixup: compat with custom mutators

* use tui flag

* add introspection support

* use libfuzzer dep now that we've merged

* force input loading

* some fixes

* begin docs, impl shrink

* make whole-archive conditional and not default

* make more copies of counters maps

* lol, remember to add the observer

* make size edge map observer an observer

* fixup: make def of run driver conditional

* add sanity checks for insertion

* revert silencing of forks

* add experimental tmin support; add default asan flags

* use default options instead of specifying our own

* implement lockless mode

* fix merge

* fixup lockless corpus

* fixup for generalisation

* remove erroneous drop_in_place

* improve error logging in the case of corpus loading failure

* ok, use lock files 😔

* fix tmin

* implement merge (again); fix rare cases with maps being too small

* implement a scheduler for removing excess

* implement a walking strategy for corpus loading for large corpora

* revert filename parameter; rename and remove duplicates

* various cleanup and clippy satisfaction

* fix no_std tests

* clang-format

* expand and satisfy the clippy gods

* fix sanitizer_ifaces bindgen for no_std

* fix wasm fuzzer

* fixup clippy script

* rename and provide a small amount of explanation for sanitizer_interfaces

* fixup: HasLastReportTime

* fix clippy oddities

* restrict clippy checks to linux-only for libafl_libfuzzer_runtime

* name the mutators

* format

* fix clippy warning

* hope docker is fixed

* fix cmin lint

* clippy pass

* more docs

* more clippy

* fix remaining clippy complaints

* fix import

* miri fixes (no constructors executed)

* exclude libafl_libfuzzer from cargo-hack

* fix clippy check for sanitizer_interfaces

* fmt

* fix CI (?)

* deduplicate sancov 8bit for improved perf on ASAN

* merge 8bit coverage regions + comment out insane deduplication

* no erroring out on free hooks

* fixup for non-forking merge

* skip the corpus dir if we use it

* fixup: recent libafl changes and feature flags

* libafl_libfuzzer: use rust-lld for whole-archive feature

* clarify cause of failure

* mark unsafe

* clippy :cursed_cowboy:

* attempt to fix wasm

* spooky unknowable bug 👻

* more clippy lints

* clippy fix for merge

* use the version pin

* add unsafe to ::register

* Serdeany autoreg fix

* make type assert actionable

* miri fixes

---------

Co-authored-by: Dominik Maier <domenukk@gmail.com>
Co-authored-by: Dominik Maier <dmnk@google.com>
Co-authored-by: Mrmaxmeier <Mrmaxmeier@gmail.com>
2023-08-24 13:30:23 +02:00

412 lines
13 KiB
Rust

//! A singlethreaded QEMU fuzzer that can auto-restart.
use core::{cell::RefCell, ptr::addr_of_mut, time::Duration};
#[cfg(unix)]
use std::os::unix::io::{AsRawFd, FromRawFd};
use std::{
env,
fs::{self, File, OpenOptions},
io::{self, Write},
path::PathBuf,
process,
};
use clap::{Arg, Command};
use libafl::{
corpus::{Corpus, InMemoryOnDiskCorpus, OnDiskCorpus},
events::SimpleRestartingEventManager,
executors::{ExitKind, ShadowExecutor, TimeoutExecutor},
feedback_or,
feedbacks::{CrashFeedback, MaxMapFeedback, TimeFeedback},
fuzzer::{Fuzzer, StdFuzzer},
inputs::{BytesInput, HasTargetBytes},
monitors::SimpleMonitor,
mutators::{
scheduled::havoc_mutations, token_mutations::I2SRandReplace, tokens_mutations,
StdMOptMutator, StdScheduledMutator, Tokens,
},
observers::{HitcountsMapObserver, TimeObserver, VariableMapObserver},
schedulers::{
powersched::PowerSchedule, IndexesLenTimeMinimizerScheduler, PowerQueueScheduler,
},
stages::{
calibrate::CalibrationStage, power::StdPowerMutationalStage, ShadowTracingStage,
StdMutationalStage,
},
state::{HasCorpus, HasMetadata, StdState},
Error,
};
use libafl_bolts::{
current_nanos, current_time,
os::dup2,
rands::StdRand,
shmem::{ShMemProvider, StdShMemProvider},
tuples::{tuple_list, Merge},
AsSlice,
};
use libafl_qemu::{
cmplog::{CmpLogObserver, QemuCmpLogHelper},
//asan::{init_with_asan, QemuAsanHelper},
edges::edges_map_mut_slice,
edges::QemuEdgeCoverageHelper,
edges::MAX_EDGES_NUM,
elf::EasyElf,
emu::Emulator,
filter_qemu_args,
hooks::QemuHooks,
GuestReg,
//snapshot::QemuSnapshotHelper,
MmapPerms,
QemuExecutor,
Regs,
};
#[cfg(unix)]
use nix::{self, unistd::dup};
pub const MAX_INPUT_SIZE: usize = 1048576; // 1MB
/// The fuzzer main
pub fn main() {
// Registry the metadata types used in this fuzzer
// Needed only on no_std
// unsafe { RegistryBuilder::register::<Tokens>(); }
let res = match Command::new(env!("CARGO_PKG_NAME"))
.version(env!("CARGO_PKG_VERSION"))
.author("AFLplusplus team")
.about("LibAFL-based fuzzer with QEMU for Fuzzbench")
.arg(
Arg::new("out")
.help("The directory to place finds in ('corpus')")
.long("libafl-out")
.required(true),
)
.arg(
Arg::new("in")
.help("The directory to read initial inputs from ('seeds')")
.long("libafl-in")
.required(true),
)
.arg(
Arg::new("tokens")
.long("libafl-tokens")
.help("A file to read tokens from, to be used during fuzzing"),
)
.arg(
Arg::new("logfile")
.long("libafl-logfile")
.help("Duplicates all output to this file")
.default_value("libafl.log"),
)
.arg(
Arg::new("timeout")
.long("libafl-timeout")
.help("Timeout for each individual execution, in milliseconds")
.default_value("1000"),
)
.try_get_matches_from(filter_qemu_args())
{
Ok(res) => res,
Err(err) => {
println!(
"Syntax: {}, --libafl-in <input> --libafl-out <output>\n{:?}",
env::current_exe()
.unwrap_or_else(|_| "fuzzer".into())
.to_string_lossy(),
err,
);
return;
}
};
println!(
"Workdir: {:?}",
env::current_dir().unwrap().to_string_lossy().to_string()
);
// For fuzzbench, crashes and finds are inside the same `corpus` directory, in the "queue" and "crashes" subdir.
let mut out_dir = PathBuf::from(res.get_one::<String>("out").unwrap().to_string());
if fs::create_dir(&out_dir).is_err() {
println!("Out dir at {:?} already exists.", &out_dir);
if !out_dir.is_dir() {
println!("Out dir at {:?} is not a valid directory!", &out_dir);
return;
}
}
let mut crashes = out_dir.clone();
crashes.push("crashes");
out_dir.push("queue");
let in_dir = PathBuf::from(res.get_one::<String>("in").unwrap().to_string());
if !in_dir.is_dir() {
println!("In dir at {:?} is not a valid directory!", &in_dir);
return;
}
let tokens = res.get_one::<String>("tokens").map(PathBuf::from);
let logfile = PathBuf::from(res.get_one::<String>("logfile").unwrap().to_string());
let timeout = Duration::from_millis(
res.get_one::<String>("timeout")
.unwrap()
.to_string()
.parse()
.expect("Could not parse timeout in milliseconds"),
);
fuzz(out_dir, crashes, in_dir, tokens, logfile, timeout)
.expect("An error occurred while fuzzing");
}
/// The actual fuzzer
fn fuzz(
corpus_dir: PathBuf,
objective_dir: PathBuf,
seed_dir: PathBuf,
tokenfile: Option<PathBuf>,
logfile: PathBuf,
timeout: Duration,
) -> Result<(), Error> {
env::remove_var("LD_LIBRARY_PATH");
let args: Vec<String> = env::args().collect();
let env: Vec<(String, String)> = env::vars().collect();
let emu = Emulator::new(&args, &env).unwrap();
//let emu = init_with_asan(&mut args, &mut env);
let mut elf_buffer = Vec::new();
let elf = EasyElf::from_file(emu.binary_path(), &mut elf_buffer)?;
let test_one_input_ptr = elf
.resolve_symbol("LLVMFuzzerTestOneInput", emu.load_addr())
.expect("Symbol LLVMFuzzerTestOneInput not found");
println!("LLVMFuzzerTestOneInput @ {test_one_input_ptr:#x}");
emu.set_breakpoint(test_one_input_ptr); // LLVMFuzzerTestOneInput
unsafe { emu.run() };
println!("Break at {:#x}", emu.read_reg::<_, u64>(Regs::Rip).unwrap());
let stack_ptr: u64 = emu.read_reg(Regs::Rsp).unwrap();
let mut ret_addr = [0; 8];
unsafe { emu.read_mem(stack_ptr, &mut ret_addr) };
let ret_addr = u64::from_le_bytes(ret_addr);
println!("Stack pointer = {stack_ptr:#x}");
println!("Return address = {ret_addr:#x}");
emu.remove_breakpoint(test_one_input_ptr); // LLVMFuzzerTestOneInput
emu.set_breakpoint(ret_addr); // LLVMFuzzerTestOneInput ret addr
let input_addr = emu
.map_private(0, MAX_INPUT_SIZE, MmapPerms::ReadWrite)
.unwrap();
println!("Placing input at {input_addr:#x}");
let log = RefCell::new(
OpenOptions::new()
.append(true)
.create(true)
.open(&logfile)?,
);
#[cfg(unix)]
let mut stdout_cpy = unsafe {
let new_fd = dup(io::stdout().as_raw_fd())?;
File::from_raw_fd(new_fd)
};
#[cfg(unix)]
let file_null = File::open("/dev/null")?;
// 'While the stats are state, they are usually used in the broker - which is likely never restarted
let monitor = SimpleMonitor::new(|s| {
#[cfg(unix)]
writeln!(&mut stdout_cpy, "{s}").unwrap();
#[cfg(windows)]
println!("{s}");
writeln!(log.borrow_mut(), "{:?} {}", current_time(), s).unwrap();
});
let mut shmem_provider = StdShMemProvider::new()?;
let (state, mut mgr) = match SimpleRestartingEventManager::launch(monitor, &mut shmem_provider)
{
// The restarting state will spawn the same process again as child, then restarted it each time it crashes.
Ok(res) => res,
Err(err) => match err {
Error::ShuttingDown => {
return Ok(());
}
_ => {
panic!("Failed to setup the restarter: {err}");
}
},
};
// Create an observation channel using the coverage map
let edges_observer = unsafe {
HitcountsMapObserver::new(VariableMapObserver::from_mut_slice(
"edges",
edges_map_mut_slice(),
addr_of_mut!(MAX_EDGES_NUM),
))
};
// Create an observation channel to keep track of the execution time
let time_observer = TimeObserver::new("time");
// Create an observation channel using cmplog map
let cmplog_observer = CmpLogObserver::new("cmplog", true);
let map_feedback = MaxMapFeedback::tracking(&edges_observer, true, false);
let calibration = CalibrationStage::new(&map_feedback);
// Feedback to rate the interestingness of an input
// This one is composed by two Feedbacks in OR
let mut feedback = feedback_or!(
// New maximization map feedback linked to the edges observer and the feedback state
map_feedback,
// Time feedback, this one does not need a feedback state
TimeFeedback::with_observer(&time_observer)
);
// A feedback to choose if an input is a solution or not
let mut objective = CrashFeedback::new();
// create a State from scratch
let mut state = state.unwrap_or_else(|| {
StdState::new(
// RNG
StdRand::with_seed(current_nanos()),
// Corpus that will be evolved, we keep it in memory for performance
InMemoryOnDiskCorpus::new(corpus_dir).unwrap(),
// Corpus in which we store solutions (crashes in this example),
// on disk so the user can get them after stopping the fuzzer
OnDiskCorpus::new(objective_dir).unwrap(),
// States of the feedbacks.
// The feedbacks can report the data that should persist in the State.
&mut feedback,
// Same for objective feedbacks
&mut objective,
)
.unwrap()
});
// Setup a randomic Input2State stage
let i2s = StdMutationalStage::new(StdScheduledMutator::new(tuple_list!(I2SRandReplace::new())));
// Setup a MOPT mutator
let mutator = StdMOptMutator::new(
&mut state,
havoc_mutations().merge(tokens_mutations()),
7,
5,
)?;
let power = StdPowerMutationalStage::new(mutator);
// A minimization+queue policy to get testcasess from the corpus
let scheduler = IndexesLenTimeMinimizerScheduler::new(PowerQueueScheduler::new(
&mut state,
&edges_observer,
PowerSchedule::FAST,
));
// A fuzzer with feedbacks and a corpus scheduler
let mut fuzzer = StdFuzzer::new(scheduler, feedback, objective);
// The wrapped harness function, calling out to the LLVM-style harness
let mut harness = |input: &BytesInput| {
let target = input.target_bytes();
let mut buf = target.as_slice();
let mut len = buf.len();
if len > MAX_INPUT_SIZE {
buf = &buf[0..MAX_INPUT_SIZE];
len = MAX_INPUT_SIZE;
}
unsafe {
emu.write_mem(input_addr, buf);
emu.write_reg(Regs::Rdi, input_addr).unwrap();
emu.write_reg(Regs::Rsi, len as GuestReg).unwrap();
emu.write_reg(Regs::Rip, test_one_input_ptr).unwrap();
emu.write_reg(Regs::Rsp, stack_ptr).unwrap();
emu.run();
}
ExitKind::Ok
};
let mut hooks = QemuHooks::new(
&emu,
tuple_list!(
QemuEdgeCoverageHelper::default(),
QemuCmpLogHelper::default(),
//QemuAsanHelper::default(),
//QemuSnapshotHelper::new()
),
);
let executor = QemuExecutor::new(
&mut hooks,
&mut harness,
tuple_list!(edges_observer, time_observer),
&mut fuzzer,
&mut state,
&mut mgr,
)?;
// Create the executor for an in-process function with one observer for edge coverage and one for the execution time
let executor = TimeoutExecutor::new(executor, timeout);
// Show the cmplog observer
let mut executor = ShadowExecutor::new(executor, tuple_list!(cmplog_observer));
// Read tokens
if let Some(tokenfile) = tokenfile {
if state.metadata_map().get::<Tokens>().is_none() {
state.add_metadata(Tokens::from_file(tokenfile)?);
}
}
if state.must_load_initial_inputs() {
state
.load_initial_inputs(&mut fuzzer, &mut executor, &mut mgr, &[seed_dir.clone()])
.unwrap_or_else(|_| {
println!("Failed to load initial corpus at {:?}", &seed_dir);
process::exit(0);
});
println!("We imported {} inputs from disk.", state.corpus().count());
}
let tracing = ShadowTracingStage::new(&mut executor);
// The order of the stages matter!
let mut stages = tuple_list!(calibration, tracing, i2s, power);
// Remove target ouput (logs still survive)
#[cfg(unix)]
{
let null_fd = file_null.as_raw_fd();
dup2(null_fd, io::stdout().as_raw_fd())?;
dup2(null_fd, io::stderr().as_raw_fd())?;
}
// reopen file to make sure we're at the end
log.replace(
OpenOptions::new()
.append(true)
.create(true)
.open(&logfile)?,
);
fuzzer
.fuzz_loop(&mut stages, &mut executor, &mut state, &mut mgr)
.expect("Error in the fuzzing loop");
// Never reached
Ok(())
}