FRET-LibAFL/libafl/src/mutators/token_mutations.rs
2024-08-30 14:15:26 +02:00

1932 lines
69 KiB
Rust

//! Tokens are what AFL calls extras or dictionaries.
//! They may be inserted as part of mutations during fuzzing.
use alloc::{borrow::Cow, vec::Vec};
#[cfg(any(target_os = "linux", target_vendor = "apple"))]
use core::slice::from_raw_parts;
use core::{
fmt::Debug,
mem::size_of,
ops::{Add, AddAssign, Deref},
slice::Iter,
};
#[cfg(feature = "std")]
use std::{
fs::File,
io::{BufRead, BufReader},
path::Path,
};
use hashbrown::HashSet;
use libafl_bolts::{rands::Rand, AsSlice, HasLen};
use serde::{Deserialize, Serialize};
#[cfg(feature = "std")]
use crate::mutators::str_decode;
use crate::{
corpus::{CorpusId, HasCurrentCorpusId},
inputs::{HasMutatorBytes, UsesInput},
mutators::{
buffer_self_copy, mutations::buffer_copy, MultiMutator, MutationResult, Mutator, Named,
},
observers::cmp::{AFLppCmpValuesMetadata, CmpValues, CmpValuesMetadata},
stages::TaintMetadata,
state::{HasCorpus, HasMaxSize, HasRand},
Error, HasMetadata,
};
/// A state metadata holding a list of tokens
#[allow(clippy::unsafe_derive_deserialize)]
#[derive(Debug, Default, Clone, Serialize, Deserialize)]
pub struct Tokens {
// We keep a vec and a set, set for faster deduplication, vec for access
tokens_vec: Vec<Vec<u8>>,
tokens_set: HashSet<Vec<u8>>,
}
libafl_bolts::impl_serdeany!(Tokens);
/// The metadata used for token mutators
impl Tokens {
/// Creates a new tokens metadata (old-skool afl name: `dictornary`)
#[must_use]
pub fn new() -> Self {
Tokens::default()
}
/// Add tokens from a slice of Vecs of bytes
pub fn add_tokens<IT, V>(&mut self, tokens: IT) -> &mut Self
where
IT: IntoIterator<Item = V>,
V: AsRef<Vec<u8>>,
{
for token in tokens {
self.add_token(token.as_ref());
}
self
}
/// Build tokens from files
#[cfg(feature = "std")]
pub fn add_from_files<IT, P>(mut self, files: IT) -> Result<Self, Error>
where
IT: IntoIterator<Item = P>,
P: AsRef<Path>,
{
for file in files {
self.add_from_file(file)?;
}
Ok(self)
}
/// Parse autodict section
pub fn parse_autodict(&mut self, slice: &[u8], size: usize) {
let mut head = 0;
loop {
if head >= size {
// Make double sure this is not completely off
assert!(head == size);
break;
}
let size = slice[head] as usize;
head += 1;
if size > 0 {
self.add_token(&slice[head..head + size].to_vec());
log::info!(
"Token size: {} content: {:x?}",
size,
&slice[head..head + size].to_vec()
);
head += size;
}
}
}
/// Create a token section from a start and an end pointer
/// Reads from an autotokens section, returning the count of new entries read
///
/// # Safety
/// The caller must ensure that the region between `token_start` and `token_stop`
/// is a valid region, containing autotokens in the expected format.
#[cfg(any(target_os = "linux", target_vendor = "apple"))]
pub unsafe fn from_mut_ptrs(
token_start: *const u8,
token_stop: *const u8,
) -> Result<Self, Error> {
let mut ret = Self::default();
if token_start.is_null() || token_stop.is_null() {
return Ok(Self::new());
}
if token_stop < token_start {
return Err(Error::illegal_argument(format!(
"Tried to create tokens from illegal section: stop < start ({token_stop:?} < {token_start:?})"
)));
}
let section_size: usize = token_stop.offset_from(token_start).try_into().unwrap();
// log::info!("size: {}", section_size);
let slice = from_raw_parts(token_start, section_size);
// Now we know the beginning and the end of the token section.. let's parse them into tokens
ret.parse_autodict(slice, section_size);
Ok(ret)
}
/// Creates a new instance from a file
#[cfg(feature = "std")]
pub fn from_file<P>(file: P) -> Result<Self, Error>
where
P: AsRef<Path>,
{
let mut ret = Self::new();
ret.add_from_file(file)?;
Ok(ret)
}
/// Adds a token to a dictionary, checking it is not a duplicate
/// Returns `false` if the token was already present and did not get added.
#[allow(clippy::ptr_arg)]
pub fn add_token(&mut self, token: &Vec<u8>) -> bool {
if !self.tokens_set.insert(token.clone()) {
return false;
}
self.tokens_vec.push(token.clone());
true
}
/// Reads a tokens file, returning the count of new entries read
#[cfg(feature = "std")]
pub fn add_from_file<P>(&mut self, file: P) -> Result<&mut Self, Error>
where
P: AsRef<Path>,
{
// log::info!("Loading tokens file {:?} ...", file);
let file = File::open(file)?; // panic if not found
let reader = BufReader::new(file);
for line in reader.lines() {
let line = line.unwrap();
let line = line.trim_start().trim_end();
// we are only interested in '"..."', not prefixed 'foo = '
let start = line.chars().next();
if line.is_empty() || start == Some('#') {
continue;
}
let Some(pos_quote) = line.find('\"') else {
return Err(Error::illegal_argument(format!("Illegal line: {line}")));
};
if line.chars().nth(line.len() - 1) != Some('"') {
return Err(Error::illegal_argument(format!("Illegal line: {line}")));
}
// extract item
let Some(item) = line.get(pos_quote + 1..line.len() - 1) else {
return Err(Error::illegal_argument(format!("Illegal line: {line}")));
};
if item.is_empty() {
continue;
}
// decode
let token: Vec<u8> = match str_decode(item) {
Ok(val) => val,
Err(_) => {
return Err(Error::illegal_argument(format!(
"Illegal line (hex decoding): {line}"
)))
}
};
// add
self.add_token(&token);
}
Ok(self)
}
/// Returns the amount of tokens in this Tokens instance
#[inline]
#[must_use]
pub fn len(&self) -> usize {
self.tokens_vec.len()
}
/// Returns if this tokens-instance is empty
#[inline]
#[must_use]
pub fn is_empty(&self) -> bool {
self.tokens_vec.is_empty()
}
/// Gets the tokens stored in this db
#[must_use]
pub fn tokens(&self) -> &[Vec<u8>] {
&self.tokens_vec
}
/// Returns an iterator over the tokens.
pub fn iter(&self) -> Iter<'_, Vec<u8>> {
<&Self as IntoIterator>::into_iter(self)
}
}
impl AddAssign for Tokens {
fn add_assign(&mut self, other: Self) {
self.add_tokens(&other);
}
}
impl AddAssign<&[Vec<u8>]> for Tokens {
fn add_assign(&mut self, other: &[Vec<u8>]) {
self.add_tokens(other);
}
}
impl Add<&[Vec<u8>]> for Tokens {
type Output = Self;
fn add(self, other: &[Vec<u8>]) -> Self {
let mut ret = self;
ret.add_tokens(other);
ret
}
}
impl Add for Tokens {
type Output = Self;
fn add(self, other: Self) -> Self {
self.add(other.tokens_vec.as_slice())
}
}
impl<IT, V> From<IT> for Tokens
where
IT: IntoIterator<Item = V>,
V: AsRef<Vec<u8>>,
{
fn from(tokens: IT) -> Self {
let mut ret = Self::default();
ret.add_tokens(tokens);
ret
}
}
impl Deref for Tokens {
type Target = [Vec<u8>];
fn deref(&self) -> &[Vec<u8>] {
self.tokens()
}
}
impl Add for &Tokens {
type Output = Tokens;
fn add(self, other: Self) -> Tokens {
let mut ret: Tokens = self.clone();
ret.add_tokens(other);
ret
}
}
impl<'it> IntoIterator for &'it Tokens {
type Item = <Iter<'it, Vec<u8>> as Iterator>::Item;
type IntoIter = Iter<'it, Vec<u8>>;
fn into_iter(self) -> Self::IntoIter {
self.as_slice().iter()
}
}
/// Inserts a random token at a random position in the `Input`.
#[derive(Debug, Default)]
pub struct TokenInsert;
impl<I, S> Mutator<I, S> for TokenInsert
where
S: HasMetadata + HasRand + HasMaxSize,
I: HasMutatorBytes,
{
fn mutate(&mut self, state: &mut S, input: &mut I) -> Result<MutationResult, Error> {
let max_size = state.max_size();
let tokens_len = {
let Some(meta) = state.metadata_map().get::<Tokens>() else {
return Ok(MutationResult::Skipped);
};
if meta.tokens().is_empty() {
return Ok(MutationResult::Skipped);
}
meta.tokens().len()
};
let token_idx = state.rand_mut().below(tokens_len);
let size = input.bytes().len();
let off = state.rand_mut().below(size + 1);
let meta = state.metadata_map().get::<Tokens>().unwrap();
let token = &meta.tokens()[token_idx];
let mut len = token.len();
if size + len > max_size {
if max_size > size {
len = max_size - size;
} else {
return Ok(MutationResult::Skipped);
}
}
input.resize(size + len, 0);
unsafe {
buffer_self_copy(input.bytes_mut(), off, off + len, size - off);
buffer_copy(input.bytes_mut(), token, 0, off, len);
}
Ok(MutationResult::Mutated)
}
}
impl Named for TokenInsert {
fn name(&self) -> &Cow<'static, str> {
static NAME: Cow<'static, str> = Cow::Borrowed("TokenInsert");
&NAME
}
}
impl TokenInsert {
/// Create a `TokenInsert` `Mutation`.
#[must_use]
pub fn new() -> Self {
Self
}
}
/// A `TokenReplace` [`Mutator`] replaces a random part of the input with one of a range of tokens.
/// From AFL terms, this is called as `Dictionary` mutation (which doesn't really make sense ;) ).
#[derive(Debug, Default)]
pub struct TokenReplace;
impl<I, S> Mutator<I, S> for TokenReplace
where
S: UsesInput + HasMetadata + HasRand + HasMaxSize,
I: HasMutatorBytes,
{
fn mutate(&mut self, state: &mut S, input: &mut I) -> Result<MutationResult, Error> {
let size = input.bytes().len();
if size == 0 {
return Ok(MutationResult::Skipped);
}
let tokens_len = {
let Some(meta) = state.metadata_map().get::<Tokens>() else {
return Ok(MutationResult::Skipped);
};
if meta.tokens().is_empty() {
return Ok(MutationResult::Skipped);
}
meta.tokens().len()
};
let token_idx = state.rand_mut().below(tokens_len);
let off = state.rand_mut().below(size);
let meta = state.metadata_map().get::<Tokens>().unwrap();
let token = &meta.tokens()[token_idx];
let mut len = token.len();
if off + len > size {
len = size - off;
}
unsafe {
buffer_copy(input.bytes_mut(), token, 0, off, len);
}
Ok(MutationResult::Mutated)
}
}
impl Named for TokenReplace {
fn name(&self) -> &Cow<'static, str> {
static NAME: Cow<'static, str> = Cow::Borrowed("TokenReplace");
&NAME
}
}
impl TokenReplace {
/// Creates a new `TokenReplace` struct.
#[must_use]
pub fn new() -> Self {
Self
}
}
/// A `I2SRandReplace` [`Mutator`] replaces a random matching input-2-state comparison operand with the other.
/// It needs a valid [`CmpValuesMetadata`] in the state.
#[derive(Debug, Default)]
pub struct I2SRandReplace;
impl<I, S> Mutator<I, S> for I2SRandReplace
where
S: UsesInput + HasMetadata + HasRand + HasMaxSize,
I: HasMutatorBytes,
{
#[allow(clippy::too_many_lines)]
fn mutate(&mut self, state: &mut S, input: &mut I) -> Result<MutationResult, Error> {
let size = input.bytes().len();
if size == 0 {
return Ok(MutationResult::Skipped);
}
let cmps_len = {
let Some(meta) = state.metadata_map().get::<CmpValuesMetadata>() else {
return Ok(MutationResult::Skipped);
};
log::trace!("meta: {:x?}", meta);
if meta.list.is_empty() {
return Ok(MutationResult::Skipped);
}
meta.list.len()
};
let idx = state.rand_mut().below(cmps_len);
let off = state.rand_mut().below(size);
let len = input.bytes().len();
let bytes = input.bytes_mut();
let meta = state.metadata_map().get::<CmpValuesMetadata>().unwrap();
let cmp_values = &meta.list[idx];
let mut result = MutationResult::Skipped;
match cmp_values {
CmpValues::U8(v) => {
for byte in bytes.iter_mut().take(len).skip(off) {
if *byte == v.0 {
*byte = v.1;
result = MutationResult::Mutated;
break;
} else if *byte == v.1 {
*byte = v.0;
result = MutationResult::Mutated;
break;
}
}
}
CmpValues::U16(v) => {
if len >= size_of::<u16>() {
for i in off..=len - size_of::<u16>() {
let val =
u16::from_ne_bytes(bytes[i..i + size_of::<u16>()].try_into().unwrap());
if val == v.0 {
let new_bytes = v.1.to_ne_bytes();
bytes[i..i + size_of::<u16>()].copy_from_slice(&new_bytes);
result = MutationResult::Mutated;
break;
} else if val.swap_bytes() == v.0 {
let new_bytes = v.1.swap_bytes().to_ne_bytes();
bytes[i..i + size_of::<u16>()].copy_from_slice(&new_bytes);
result = MutationResult::Mutated;
break;
} else if val == v.1 {
let new_bytes = v.0.to_ne_bytes();
bytes[i..i + size_of::<u16>()].copy_from_slice(&new_bytes);
result = MutationResult::Mutated;
break;
} else if val.swap_bytes() == v.1 {
let new_bytes = v.0.swap_bytes().to_ne_bytes();
bytes[i..i + size_of::<u16>()].copy_from_slice(&new_bytes);
result = MutationResult::Mutated;
break;
}
}
}
}
CmpValues::U32(v) => {
if len >= size_of::<u32>() {
for i in off..=len - size_of::<u32>() {
let val =
u32::from_ne_bytes(bytes[i..i + size_of::<u32>()].try_into().unwrap());
if val == v.0 {
let new_bytes = v.1.to_ne_bytes();
bytes[i..i + size_of::<u32>()].copy_from_slice(&new_bytes);
result = MutationResult::Mutated;
break;
} else if val.swap_bytes() == v.0 {
let new_bytes = v.1.swap_bytes().to_ne_bytes();
bytes[i..i + size_of::<u32>()].copy_from_slice(&new_bytes);
result = MutationResult::Mutated;
break;
} else if val == v.1 {
let new_bytes = v.0.to_ne_bytes();
bytes[i..i + size_of::<u32>()].copy_from_slice(&new_bytes);
result = MutationResult::Mutated;
break;
} else if val.swap_bytes() == v.1 {
let new_bytes = v.0.swap_bytes().to_ne_bytes();
bytes[i..i + size_of::<u32>()].copy_from_slice(&new_bytes);
result = MutationResult::Mutated;
break;
}
}
}
}
CmpValues::U64(v) => {
if len >= size_of::<u64>() {
for i in off..=len - size_of::<u64>() {
let val =
u64::from_ne_bytes(bytes[i..i + size_of::<u64>()].try_into().unwrap());
if val == v.0 {
let new_bytes = v.1.to_ne_bytes();
bytes[i..i + size_of::<u64>()].copy_from_slice(&new_bytes);
result = MutationResult::Mutated;
break;
} else if val.swap_bytes() == v.0 {
let new_bytes = v.1.swap_bytes().to_ne_bytes();
bytes[i..i + size_of::<u64>()].copy_from_slice(&new_bytes);
result = MutationResult::Mutated;
break;
} else if val == v.1 {
let new_bytes = v.0.to_ne_bytes();
bytes[i..i + size_of::<u64>()].copy_from_slice(&new_bytes);
result = MutationResult::Mutated;
break;
} else if val.swap_bytes() == v.1 {
let new_bytes = v.0.swap_bytes().to_ne_bytes();
bytes[i..i + size_of::<u64>()].copy_from_slice(&new_bytes);
result = MutationResult::Mutated;
break;
}
}
}
}
CmpValues::Bytes(v) => {
'outer: for i in off..len {
let mut size = core::cmp::min(v.0.len(), len - i);
while size != 0 {
if v.0.as_slice()[0..size] == input.bytes()[i..i + size] {
unsafe {
buffer_copy(input.bytes_mut(), v.1.as_slice(), 0, i, size);
}
result = MutationResult::Mutated;
break 'outer;
}
size -= 1;
}
size = core::cmp::min(v.1.len(), len - i);
while size != 0 {
if v.1.as_slice()[0..size] == input.bytes()[i..i + size] {
unsafe {
buffer_copy(input.bytes_mut(), v.0.as_slice(), 0, i, size);
}
result = MutationResult::Mutated;
break 'outer;
}
size -= 1;
}
}
}
}
Ok(result)
}
}
impl Named for I2SRandReplace {
fn name(&self) -> &Cow<'static, str> {
static NAME: Cow<'static, str> = Cow::Borrowed("I2SRandReplace");
&NAME
}
}
impl I2SRandReplace {
/// Creates a new `I2SRandReplace` struct.
#[must_use]
pub fn new() -> Self {
Self
}
}
const CMP_ATTTRIBUTE_IS_EQUAL: u8 = 1;
const CMP_ATTRIBUTE_IS_GREATER: u8 = 2;
const CMP_ATTRIBUTE_IS_LESSER: u8 = 4;
const CMP_ATTRIBUTE_IS_FP: u8 = 8;
const CMP_ATTRIBUTE_IS_FP_MOD: u8 = 16;
const CMP_ATTRIBUTE_IS_INT_MOD: u8 = 32;
const CMP_ATTRIBUTE_IS_TRANSFORM: u8 = 64;
/// AFL++ redqueen mutation
#[derive(Debug, Default)]
pub struct AFLppRedQueen {
enable_transform: bool,
enable_arith: bool,
text_type: TextType,
/// We use this variable to check if we scheduled a new `corpus_id`
/// - and, hence, need to recalculate `text_type`
last_corpus_id: Option<CorpusId>,
}
impl AFLppRedQueen {
#[inline]
fn swapa(x: u8) -> u8 {
(x & 0xf8) + ((x & 7) ^ 0x07)
}
/// Cmplog Pattern Matching
#[allow(clippy::cast_sign_loss)]
#[allow(clippy::too_many_arguments)]
#[allow(clippy::too_many_lines)]
#[allow(clippy::cast_possible_wrap)]
#[allow(clippy::if_not_else)]
#[allow(clippy::cast_precision_loss)]
pub fn cmp_extend_encoding(
&self,
pattern: u64,
repl: u64,
another_pattern: u64,
changed_val: u64,
attr: u8,
another_buf: &[u8],
buf: &[u8],
buf_idx: usize,
taint_len: usize,
input_len: usize,
hshape: usize,
vec: &mut Vec<Vec<u8>>,
) -> Result<bool, Error> {
// TODO: ascii2num (we need check q->is_ascii (in calibration stage(?)))
// try Transform
if self.enable_transform
&& pattern != another_pattern
&& repl == changed_val
&& attr <= CMP_ATTTRIBUTE_IS_EQUAL
{
// Try to identify transform magic
let mut bytes: usize = match hshape {
0 => 0, // NEVER happen
1 => 1,
2 => 2,
3 | 4 => 4,
_ => 8,
};
// prevent overflow
bytes = core::cmp::min(bytes, input_len.wrapping_sub(buf_idx));
let (b_val, o_b_val, mask): (u64, u64, u64) = match bytes {
0 => {
(0, 0, 0) // cannot happen
}
1 => (
u64::from(buf[buf_idx]),
u64::from(another_buf[buf_idx]),
0xff,
),
2 | 3 => (
u64::from(u16::from_be_bytes(
another_buf[buf_idx..buf_idx + 2].try_into().unwrap(),
)),
u64::from(u16::from_be_bytes(
another_buf[buf_idx..buf_idx + 2].try_into().unwrap(),
)),
0xffff,
),
4..=7 => (
u64::from(u32::from_be_bytes(
buf[buf_idx..buf_idx + 4].try_into().unwrap(),
)),
u64::from(u32::from_be_bytes(
another_buf[buf_idx..buf_idx + 4].try_into().unwrap(),
)),
0xffff_ffff,
),
_ => (
u64::from_be_bytes(buf[buf_idx..buf_idx + 8].try_into().unwrap()),
u64::from_be_bytes(another_buf[buf_idx..buf_idx + 8].try_into().unwrap()),
0xffff_ffff_ffff_ffff,
),
};
// Try arith
let diff = (pattern as i64).wrapping_sub(b_val as i64);
let new_diff = (another_pattern as i64).wrapping_sub(o_b_val as i64);
if diff == new_diff && diff != 0 {
let new_repl: u64 = (repl as i64).wrapping_sub(diff) as u64;
let ret = self.cmp_extend_encoding(
pattern,
new_repl,
another_pattern,
repl,
CMP_ATTRIBUTE_IS_TRANSFORM,
another_buf,
buf,
buf_idx,
taint_len,
input_len,
hshape,
vec,
)?;
if ret {
return Ok(true);
}
}
// Try XOR
// Shadowing
let diff: i64 = (pattern ^ b_val) as i64;
let new_diff: i64 = (another_pattern ^ o_b_val) as i64;
if diff == new_diff && diff != 0 {
let new_repl: u64 = (repl as i64 ^ diff) as u64;
let ret = self.cmp_extend_encoding(
pattern,
new_repl,
another_pattern,
repl,
CMP_ATTRIBUTE_IS_TRANSFORM,
another_buf,
buf,
buf_idx,
taint_len,
input_len,
hshape,
vec,
)?;
if ret {
return Ok(true);
}
}
// Try Lowercase
// Shadowing
let diff = (b_val | 0x2020_2020_2020_2020 & mask) == (pattern & mask);
let new_diff = (b_val | 0x2020_2020_2020_2020 & mask) == (another_pattern & mask);
if new_diff && diff {
let new_repl: u64 = repl & (0x5f5f_5f5f_5f5f_5f5f & mask);
let ret = self.cmp_extend_encoding(
pattern,
new_repl,
another_pattern,
repl,
CMP_ATTRIBUTE_IS_TRANSFORM,
another_buf,
buf,
buf_idx,
taint_len,
input_len,
hshape,
vec,
)?;
if ret {
return Ok(true);
}
}
// Try Uppercase
// Shadowing
let diff = (b_val | 0x5f5f_5f5f_5f5f_5f5f & mask) == (pattern & mask);
let o_diff = (b_val | 0x5f5f_5f5f_5f5f_5f5f & mask) == (another_pattern & mask);
if o_diff && diff {
let new_repl: u64 = repl & (0x2020_2020_2020_2020 & mask);
let ret = self.cmp_extend_encoding(
pattern,
new_repl,
another_pattern,
repl,
CMP_ATTRIBUTE_IS_TRANSFORM,
another_buf,
buf,
buf_idx,
taint_len,
input_len,
hshape,
vec,
)?;
if ret {
return Ok(true);
}
}
}
let its_len = core::cmp::min(input_len.wrapping_sub(buf_idx), taint_len);
// Try pattern matching
// println!("Pattern match");
match hshape {
0 => (), // NEVER HAPPEN, Do nothing
1 => {
// 1 byte pattern match
let buf_8 = buf[buf_idx];
let another_buf_8 = another_buf[buf_idx];
if buf_8 == pattern as u8 && another_buf_8 == another_pattern as u8 {
let mut cloned = buf.to_vec();
cloned[buf_idx] = repl as u8;
vec.push(cloned);
return Ok(true);
}
}
2 | 3 => {
if its_len >= 2 {
let buf_16 = u16::from_be_bytes(buf[buf_idx..buf_idx + 2].try_into()?);
let another_buf_16 =
u16::from_be_bytes(another_buf[buf_idx..buf_idx + 2].try_into()?);
if buf_16 == pattern as u16 && another_buf_16 == another_pattern as u16 {
let mut cloned = buf.to_vec();
cloned[buf_idx + 1] = (repl & 0xff) as u8;
cloned[buf_idx] = (repl >> 8 & 0xff) as u8;
vec.push(cloned);
return Ok(true);
}
}
}
4..=7 => {
if its_len >= 4 {
let buf_32 = u32::from_be_bytes(buf[buf_idx..buf_idx + 4].try_into()?);
let another_buf_32 =
u32::from_be_bytes(another_buf[buf_idx..buf_idx + 4].try_into()?);
// println!("buf: {buf_32} {another_buf_32} {pattern} {another_pattern}");
if buf_32 == pattern as u32 && another_buf_32 == another_pattern as u32 {
let mut cloned = buf.to_vec();
cloned[buf_idx + 3] = (repl & 0xff) as u8;
cloned[buf_idx + 2] = (repl >> 8 & 0xff) as u8;
cloned[buf_idx + 1] = (repl >> 16 & 0xff) as u8;
cloned[buf_idx] = (repl >> 24 & 0xff) as u8;
vec.push(cloned);
return Ok(true);
}
}
}
_ => {
if its_len >= 8 {
let buf_64 = u64::from_be_bytes(buf[buf_idx..buf_idx + 8].try_into()?);
let another_buf_64 =
u64::from_be_bytes(another_buf[buf_idx..buf_idx + 8].try_into()?);
if buf_64 == pattern && another_buf_64 == another_pattern {
let mut cloned = buf.to_vec();
cloned[buf_idx + 7] = (repl & 0xff) as u8;
cloned[buf_idx + 6] = (repl >> 8 & 0xff) as u8;
cloned[buf_idx + 5] = (repl >> 16 & 0xff) as u8;
cloned[buf_idx + 4] = (repl >> 24 & 0xff) as u8;
cloned[buf_idx + 3] = (repl >> 32 & 0xff) as u8;
cloned[buf_idx + 2] = (repl >> 32 & 0xff) as u8;
cloned[buf_idx + 1] = (repl >> 40 & 0xff) as u8;
cloned[buf_idx] = (repl >> 48 & 0xff) as u8;
vec.push(cloned);
return Ok(true);
}
}
}
}
// Try arith
if self.enable_arith || attr != CMP_ATTRIBUTE_IS_TRANSFORM {
if (attr & (CMP_ATTRIBUTE_IS_GREATER | CMP_ATTRIBUTE_IS_LESSER)) == 0 || hshape < 4 {
return Ok(false);
}
// Transform >= to < and <= to >
let attr = if (attr & CMP_ATTTRIBUTE_IS_EQUAL) != 0
&& (attr & (CMP_ATTRIBUTE_IS_GREATER | CMP_ATTRIBUTE_IS_LESSER)) != 0
{
if attr & CMP_ATTRIBUTE_IS_GREATER != 0 {
attr + 2
} else {
attr - 2
}
} else {
attr
};
// FP
if (CMP_ATTRIBUTE_IS_FP..CMP_ATTRIBUTE_IS_FP_MOD).contains(&attr) {
let repl_new: u64;
if attr & CMP_ATTRIBUTE_IS_GREATER != 0 {
if hshape == 4 && its_len >= 4 {
let mut g = repl as f32;
g += 1.0;
repl_new = u64::from(g as u32);
} else if hshape == 8 && its_len >= 8 {
let mut g = repl as f64;
g += 1.0;
repl_new = g as u64;
} else {
return Ok(false);
}
let ret = self.cmp_extend_encoding(
pattern,
repl,
another_pattern,
repl_new,
CMP_ATTRIBUTE_IS_FP_MOD,
another_buf,
buf,
buf_idx,
taint_len,
input_len,
hshape,
vec,
)?;
if ret {
return Ok(true);
}
} else {
if hshape == 4 && its_len >= 4 {
let mut g = repl as f32;
g -= 1.0;
repl_new = u64::from(g as u32);
} else if hshape == 8 && its_len >= 8 {
let mut g = repl as f64;
g -= 1.0;
repl_new = g as u64;
} else {
return Ok(false);
}
let ret = self.cmp_extend_encoding(
pattern,
repl,
another_pattern,
repl_new,
CMP_ATTRIBUTE_IS_FP_MOD,
another_buf,
buf,
buf_idx,
taint_len,
input_len,
hshape,
vec,
)?;
if ret {
return Ok(true);
}
}
} else if attr < CMP_ATTRIBUTE_IS_FP {
if attr & CMP_ATTRIBUTE_IS_GREATER != 0 {
let repl_new = repl.wrapping_add(1);
let ret = self.cmp_extend_encoding(
pattern,
repl,
another_pattern,
repl_new,
CMP_ATTRIBUTE_IS_INT_MOD,
another_buf,
buf,
buf_idx,
taint_len,
input_len,
hshape,
vec,
)?;
if ret {
return Ok(true);
}
} else {
let repl_new = repl.wrapping_sub(1);
let ret = self.cmp_extend_encoding(
pattern,
repl,
another_pattern,
repl_new,
CMP_ATTRIBUTE_IS_INT_MOD,
another_buf,
buf,
buf_idx,
taint_len,
input_len,
hshape,
vec,
)?;
if ret {
return Ok(true);
}
}
} else {
return Ok(false);
}
}
Ok(false)
}
/// rtn part from AFL++
#[allow(clippy::too_many_arguments)]
pub fn rtn_extend_encoding(
&self,
pattern: &[u8],
repl: &[u8],
o_pattern: &[u8],
_changed_val: &[u8],
o_buf: &[u8],
buf: &[u8],
buf_idx: usize,
taint_len: usize,
input_len: usize,
hshape: usize,
vec: &mut Vec<Vec<u8>>,
) -> bool {
let l0 = pattern.len();
let ol0 = o_pattern.len();
let lmax = core::cmp::max(l0, ol0);
let its_len = core::cmp::min(
core::cmp::min(input_len.wrapping_sub(buf_idx), taint_len),
core::cmp::min(lmax, hshape),
);
// TODO: Match before (This: https://github.com/AFLplusplus/AFLplusplus/blob/ea14f3fd40e32234989043a525e3853fcb33c1b6/src/afl-fuzz-redqueen.c#L2047)
let mut copy_len = 0;
for i in 0..its_len {
let b1 = i < pattern.len() && pattern[i] != buf[buf_idx + i];
let b2 = i < o_pattern.len() && o_pattern[i] != o_buf[buf_idx + i];
if b1 || b2 {
break;
}
copy_len += 1;
}
if copy_len > 0 {
unsafe {
for l in 1..=copy_len {
let mut cloned = buf.to_vec();
buffer_copy(&mut cloned, repl, 0, buf_idx, l);
vec.push(cloned);
}
// vec.push(cloned);
}
true
} else {
false
}
// TODO: Transform (This: https://github.com/AFLplusplus/AFLplusplus/blob/stable/src/afl-fuzz-redqueen.c#L2089)
// It's hard to implement this naively
// because AFL++ redqueen does not check any pattern, but it calls its_fuzz() instead.
// we can't execute the harness inside a mutator
// Direct matching
}
}
impl<I, S> MultiMutator<I, S> for AFLppRedQueen
where
S: UsesInput + HasMetadata + HasRand + HasMaxSize + HasCorpus + HasCurrentCorpusId,
I: HasMutatorBytes + From<Vec<u8>>,
{
#[allow(clippy::needless_range_loop)]
#[allow(clippy::too_many_lines)]
fn multi_mutate(
&mut self,
state: &mut S,
input: &I,
max_count: Option<usize>,
) -> Result<Vec<I>, Error> {
// TODO
// handle 128-bits logs
let size = input.bytes().len();
if size == 0 {
return Ok(vec![]);
}
let (cmp_len, cmp_meta, taint_meta) = {
let (Some(cmp_meta), Some(taint_meta)) = (
state.metadata_map().get::<AFLppCmpValuesMetadata>(),
state.metadata_map().get::<TaintMetadata>(),
) else {
return Ok(vec![]);
};
let cmp_len = cmp_meta.headers().len();
if cmp_len == 0 {
return Ok(vec![]);
}
(cmp_len, cmp_meta, taint_meta)
};
// These idxes must saved in this mutator itself!
let mut taint_idx = 0;
let orig_cmpvals = cmp_meta.orig_cmpvals();
let new_cmpvals = cmp_meta.new_cmpvals();
let headers = cmp_meta.headers();
let input_len = input.bytes().len();
let new_bytes = taint_meta.input_vec();
let orig_bytes = input.bytes();
let taint = taint_meta.ranges();
let mut ret = max_count.map_or_else(Vec::new, Vec::with_capacity);
let mut gathered_tokens = Tokens::new();
// println!("orig: {:#?} new: {:#?}", orig_cmpvals, new_cmpvals);
// Compute when mutating it for the 1st time.
let current_corpus_id = state.current_corpus_id()?.ok_or_else(|| Error::key_not_found("No corpus-id is currently being fuzzed, but called AFLppRedQueen::multi_mutated()."))?;
if self.last_corpus_id.is_none() || self.last_corpus_id.unwrap() != current_corpus_id {
self.text_type = check_if_text(orig_bytes, orig_bytes.len());
self.last_corpus_id = Some(current_corpus_id);
}
// println!("approximate size: {cmp_len} x {input_len}");
for cmp_idx in 0..cmp_len {
let (w_idx, header) = headers[cmp_idx];
if orig_cmpvals.get(&w_idx).is_none() || new_cmpvals.get(&w_idx).is_none() {
// These two should have same boolean value
// so there's nothing interesting at cmp_idx, then just skip!
continue;
}
let orig_val = orig_cmpvals.get(&w_idx).unwrap();
let new_val = new_cmpvals.get(&w_idx).unwrap();
let logged = core::cmp::min(orig_val.len(), new_val.len());
for cmp_h_idx in 0..logged {
let mut skip_opt = false;
for prev_idx in 0..cmp_h_idx {
if new_val[prev_idx] == new_val[cmp_h_idx] {
skip_opt = true;
}
}
// Opt not in the paper
if skip_opt {
continue;
}
for cmp_buf_idx in 0..input_len {
if let Some(max_count) = max_count {
if ret.len() >= max_count {
// TODO: does this bias towards earlier mutations?
break;
}
}
let taint_len = match taint.get(taint_idx) {
Some(t) => {
if cmp_buf_idx < t.start {
input_len - cmp_buf_idx
} else {
// if cmp_buf_idx == t.end go to next range
if cmp_buf_idx == t.end {
taint_idx += 1;
}
// Here cmp_buf_idx >= t.start
t.end - cmp_buf_idx
}
}
None => input_len - cmp_buf_idx,
};
let hshape = (header.shape() + 1) as usize;
match (&orig_val[cmp_h_idx], &new_val[cmp_h_idx]) {
(CmpValues::U8(_orig), CmpValues::U8(_new)) => {
/* just don't do it for u8, not worth it. not even instrumented
let (orig_v0, orig_v1, new_v0, new_v1) = (orig.0, orig.1, new.0, new.1);
let attribute = header.attribute() as u8;
let mut cmp_found = false;
if new_v0 != orig_v0 && orig_v0 != orig_v1 {
// Compare v0 against v1
cmp_found |= self.cmp_extend_encoding(
orig_v0.into(),
orig_v1.into(),
new_v0.into(),
new_v1.into(),
attribute,
new_bytes,
orig_bytes,
cmp_buf_idx,
taint_len,
input_len,
hshape,
&mut ret,
);
// Swapped
cmp_found |= self.cmp_extend_encoding(
orig_v0.swap_bytes().into(),
orig_v1.swap_bytes().into(),
new_v0.swap_bytes().into(),
new_v1.swap_bytes().into(),
attribute,
new_bytes,
orig_bytes,
cmp_buf_idx,
taint_len,
input_len,
hshape,
&mut ret,
);
}
if new_v1 != orig_v1 && orig_v0 != orig_v1 {
// Compare v1 against v0
cmp_found |= self.cmp_extend_encoding(
orig_v1.into(),
orig_v0.into(),
new_v1.into(),
new_v0.into(),
Self::swapa(attribute),
new_bytes,
orig_bytes,
cmp_buf_idx,
taint_len,
input_len,
hshape,
&mut ret,
);
// Swapped
cmp_found |= self.cmp_extend_encoding(
orig_v1.swap_bytes().into(),
orig_v0.swap_bytes().into(),
new_v1.swap_bytes().into(),
new_v0.swap_bytes().into(),
Self::swapa(attribute),
new_bytes,
orig_bytes,
cmp_buf_idx,
taint_len,
input_len,
hshape,
&mut ret,
);
}
*/
/*
U8 or U16 is not worth
if !cmp_found && self.text_type.is_ascii_or_utf8() {
if orig_v0 == new_v0 {
let v = orig_v0.to_ne_bytes().to_vec();
Self::try_add_autotokens(&mut gathered_tokens, &v, hshape);
}
if orig_v1 == new_v1 {
let v = orig_v1.to_ne_bytes().to_vec();
Self::try_add_autotokens(&mut gathered_tokens, &v, hshape);
}
}
*/
}
(CmpValues::U16(orig), CmpValues::U16(new)) => {
let (orig_v0, orig_v1, new_v0, new_v1) = (orig.0, orig.1, new.0, new.1);
let attribute: u8 = header.attribute() as u8;
if new_v0 != orig_v0 && orig_v0 != orig_v1 {
// Compare v0 against v1
self.cmp_extend_encoding(
orig_v0.into(),
orig_v1.into(),
new_v0.into(),
new_v1.into(),
attribute,
new_bytes,
orig_bytes,
cmp_buf_idx,
taint_len,
input_len,
hshape,
&mut ret,
)?;
// Swapped
// Compare v0 against v1
self.cmp_extend_encoding(
orig_v0.swap_bytes().into(),
orig_v1.swap_bytes().into(),
new_v0.swap_bytes().into(),
new_v1.swap_bytes().into(),
attribute,
new_bytes,
orig_bytes,
cmp_buf_idx,
taint_len,
input_len,
hshape,
&mut ret,
)?;
}
if new_v1 != orig_v1 && orig_v0 != orig_v1 {
// Compare v1 against v0
self.cmp_extend_encoding(
orig_v1.into(),
orig_v0.into(),
new_v1.into(),
new_v0.into(),
Self::swapa(attribute),
new_bytes,
orig_bytes,
cmp_buf_idx,
taint_len,
input_len,
hshape,
&mut ret,
)?;
// Swapped
self.cmp_extend_encoding(
orig_v1.swap_bytes().into(),
orig_v0.swap_bytes().into(),
new_v1.swap_bytes().into(),
new_v0.swap_bytes().into(),
Self::swapa(attribute),
new_bytes,
orig_bytes,
cmp_buf_idx,
taint_len,
input_len,
hshape,
&mut ret,
)?;
}
/*
U8 or U16 is not worth
if !cmp_found && self.text_type.is_ascii_or_utf8() {
if orig_v0 == new_v0 {
let v = orig_v0.to_ne_bytes().to_vec();
Self::try_add_autotokens(&mut gathered_tokens, &v, hshape);
}
if orig_v1 == new_v1 {
let v = orig_v1.to_ne_bytes().to_vec();
Self::try_add_autotokens(&mut gathered_tokens, &v, hshape);
}
}
*/
}
(CmpValues::U32(orig), CmpValues::U32(new)) => {
let (orig_v0, orig_v1, new_v0, new_v1) = (orig.0, orig.1, new.0, new.1);
let attribute = header.attribute() as u8;
let mut cmp_found = false;
if new_v0 != orig_v0 && orig_v0 != orig_v1 {
// Compare v0 against v1
cmp_found |= self.cmp_extend_encoding(
orig_v0.into(),
orig_v1.into(),
new_v0.into(),
new_v1.into(),
attribute,
new_bytes,
orig_bytes,
cmp_buf_idx,
taint_len,
input_len,
hshape,
&mut ret,
)?;
// swapped
// Compare v0 against v1
cmp_found |= self.cmp_extend_encoding(
orig_v0.swap_bytes().into(),
orig_v1.swap_bytes().into(),
new_v0.swap_bytes().into(),
new_v1.swap_bytes().into(),
attribute,
new_bytes,
orig_bytes,
cmp_buf_idx,
taint_len,
input_len,
hshape,
&mut ret,
)?;
}
if new_v1 != orig_v1 && orig_v0 != orig_v1 {
// Compare v1 against v0
cmp_found |= self.cmp_extend_encoding(
orig_v1.into(),
orig_v0.into(),
new_v1.into(),
new_v0.into(),
Self::swapa(attribute),
new_bytes,
orig_bytes,
cmp_buf_idx,
taint_len,
input_len,
hshape,
&mut ret,
)?;
// Swapped
// Compare v1 against v0
cmp_found |= self.cmp_extend_encoding(
orig_v1.swap_bytes().into(),
orig_v0.swap_bytes().into(),
new_v1.swap_bytes().into(),
new_v0.swap_bytes().into(),
Self::swapa(attribute),
new_bytes,
orig_bytes,
cmp_buf_idx,
taint_len,
input_len,
hshape,
&mut ret,
)?;
}
if !cmp_found {
if orig_v0 == new_v0
&& check_if_text(orig_v0.to_ne_bytes().as_ref(), hshape).size()
== hshape
{
let v = orig_v0.to_ne_bytes().to_vec();
Self::try_add_autotokens(&mut gathered_tokens, &v, hshape);
}
if orig_v1 == new_v1
&& check_if_text(orig_v1.to_ne_bytes().as_ref(), hshape).size()
== hshape
{
let v = orig_v1.to_ne_bytes().to_vec();
Self::try_add_autotokens(&mut gathered_tokens, &v, hshape);
}
}
}
(CmpValues::U64(orig), CmpValues::U64(new)) => {
let (orig_v0, orig_v1, new_v0, new_v1) = (orig.0, orig.1, new.0, new.1);
let attribute = header.attribute() as u8;
let mut cmp_found = false;
if new_v0 != orig_v0 && orig_v0 != orig_v1 {
// Compare v0 against v1
cmp_found |= self.cmp_extend_encoding(
orig_v0,
orig_v1,
new_v0,
new_v1,
attribute,
new_bytes,
orig_bytes,
cmp_buf_idx,
taint_len,
input_len,
hshape,
&mut ret,
)?;
// Swapped
// Compare v0 against v1
cmp_found |= self.cmp_extend_encoding(
orig_v0.swap_bytes(),
orig_v1.swap_bytes(),
new_v0.swap_bytes(),
new_v1.swap_bytes(),
attribute,
new_bytes,
orig_bytes,
cmp_buf_idx,
taint_len,
input_len,
hshape,
&mut ret,
)?;
}
if new_v1 != orig_v1 && orig_v0 != orig_v1 {
// Compare v1 against v0
cmp_found |= self.cmp_extend_encoding(
orig_v1,
orig_v0,
new_v1,
new_v0,
Self::swapa(attribute),
new_bytes,
orig_bytes,
cmp_buf_idx,
taint_len,
input_len,
hshape,
&mut ret,
)?;
// Swapped
// Compare v1 against v0
cmp_found |= self.cmp_extend_encoding(
orig_v1.swap_bytes(),
orig_v0.swap_bytes(),
new_v1.swap_bytes(),
new_v0.swap_bytes(),
Self::swapa(attribute),
new_bytes,
orig_bytes,
cmp_buf_idx,
taint_len,
input_len,
hshape,
&mut ret,
)?;
}
if !cmp_found {
if orig_v0 == new_v0
&& check_if_text(orig_v0.to_ne_bytes().as_ref(), hshape).size()
== hshape
{
let v = orig_v0.to_ne_bytes().to_vec();
Self::try_add_autotokens(&mut gathered_tokens, &v, hshape);
}
if orig_v1 == new_v1
&& check_if_text(orig_v1.to_ne_bytes().as_ref(), hshape).size()
== hshape
{
let v = orig_v1.to_ne_bytes().to_vec();
Self::try_add_autotokens(&mut gathered_tokens, &v, hshape);
}
}
}
(CmpValues::Bytes(orig), CmpValues::Bytes(new)) => {
let (orig_v0, orig_v1, new_v0, new_v1) =
(&orig.0, &orig.1, &new.0, &new.1);
// let attribute = header.attribute() as u8;
let mut rtn_found = false;
// Compare v0 against v1
rtn_found |= self.rtn_extend_encoding(
orig_v0.as_slice(),
orig_v1.as_slice(),
new_v0.as_slice(),
new_v1.as_slice(),
new_bytes,
orig_bytes,
cmp_buf_idx,
taint_len,
input_len,
hshape,
&mut ret,
);
// Compare v1 against v0
rtn_found |= self.rtn_extend_encoding(
orig_v1.as_slice(),
orig_v0.as_slice(),
new_v1.as_slice(),
new_v0.as_slice(),
new_bytes,
orig_bytes,
cmp_buf_idx,
taint_len,
input_len,
hshape,
&mut ret,
);
let is_ascii_or_utf8 = self.text_type.is_ascii_or_utf8();
let mut v0_len = orig_v0.len();
let mut v1_len = orig_v1.len();
if v0_len > 0
&& (is_ascii_or_utf8
|| check_if_text(orig_v0.as_slice(), v0_len).size() == hshape)
{
// this is not utf8.
let v = strlen(orig_v0.as_slice());
if v > 0 {
v0_len = v;
}
}
if v1_len > 0
&& (is_ascii_or_utf8
|| check_if_text(orig_v1.as_slice(), v1_len).size() == hshape)
{
// this is not utf8.
let v = strlen(orig_v1.as_slice());
if v > 0 {
v1_len = v;
}
}
if v0_len > 0
&& orig_v0 == new_v0
&& (!rtn_found
|| check_if_text(orig_v0.as_slice(), v0_len).size() == v0_len)
{
Self::try_add_autotokens(
&mut gathered_tokens,
orig_v0.as_slice(),
v0_len,
);
}
if v1_len > 0
&& orig_v1 == new_v1
&& (!rtn_found
|| check_if_text(orig_v1.as_slice(), v1_len).size() == v1_len)
{
Self::try_add_autotokens(
&mut gathered_tokens,
orig_v1.as_slice(),
v1_len,
);
}
}
(_, _) => {
// not gonna happen
}
}
/*
if matched {
// before returning the result
// save indexes
self.cmp_start_idx = cmp_start_idx;
self.cmp_h_start_idx = cmp_h_start_idx;
self.cmp_buf_start_idx = cmp_buf_start_idx + 1; // next
self.taint_idx = taint_idx;
return Ok(MutationResult::Mutated);
}
*/
// if no match then go to next round
}
}
}
match state.metadata_mut::<Tokens>() {
Ok(existing) => {
existing.add_tokens(&gathered_tokens);
// println!("we have {} tokens", existing.len())
}
Err(_) => {
state.add_metadata(gathered_tokens);
}
}
if let Some(max_count) = max_count {
Ok(ret.into_iter().take(max_count).map(I::from).collect())
} else {
Ok(ret.into_iter().map(I::from).collect())
}
}
}
impl Named for AFLppRedQueen {
fn name(&self) -> &Cow<'static, str> {
static NAME: Cow<'static, str> = Cow::Borrowed("AFLppRedQueen");
&NAME
}
}
impl AFLppRedQueen {
/// Create a new `AFLppRedQueen` Mutator
#[must_use]
pub fn new() -> Self {
Self {
enable_transform: false,
enable_arith: false,
text_type: TextType::None,
last_corpus_id: None,
}
}
/// Constructor with cmplog options
#[must_use]
pub fn with_cmplog_options(transform: bool, arith: bool) -> Self {
Self {
enable_transform: transform,
enable_arith: arith,
text_type: TextType::None,
last_corpus_id: None,
}
}
#[allow(clippy::needless_range_loop)]
fn try_add_autotokens(tokens: &mut Tokens, b: &[u8], shape: usize) {
let mut cons_ff = 0;
let mut cons_0 = 0;
for idx in 0..shape {
if b[idx] == 0 {
cons_0 += 1;
} else if b[idx] == 0xff {
cons_ff += 1;
} else {
cons_0 = 0;
cons_ff = 0;
}
if cons_0 > 1 || cons_ff > 1 {
return;
}
}
let mut v = b.to_vec();
tokens.add_token(&v);
v.reverse();
tokens.add_token(&v);
}
}
#[derive(Debug, Copy, Clone)]
enum TextType {
None,
Ascii(usize),
UTF8(usize),
}
impl Default for TextType {
fn default() -> Self {
Self::None
}
}
impl TextType {
fn is_ascii_or_utf8(self) -> bool {
match self {
Self::None => false,
Self::Ascii(_) | Self::UTF8(_) => true,
}
}
fn size(self) -> usize {
match self {
Self::None => 0,
Self::Ascii(sz) | Self::UTF8(sz) => sz,
}
}
}
/// Returns `true` if the given `u8` char is
/// in the valid ascii range (`<= 0x7F`)
#[inline]
const fn isascii(c: u8) -> bool {
c <= 0x7F
}
/// Returns `true` if the given `u8` char is
/// a valid printable character (between `0x20` and `0x7E`)
#[inline]
const fn isprint(c: u8) -> bool {
c >= 0x20 && c <= 0x7E
}
#[inline]
const fn strlen(buf: &[u8]) -> usize {
let mut count = 0;
while count < buf.len() {
if buf[count] == 0x0 {
break;
}
count += 1;
}
count
}
fn check_if_text(buf: &[u8], max_len: usize) -> TextType {
// assert!(buf.len() >= max_len);
let len = max_len;
let mut offset: usize = 0;
let mut ascii = 0;
let mut utf8 = 0;
let mut comp = len;
while offset < max_len {
if buf[offset] == 0x09
|| buf[offset] == 0x0A
|| buf[offset] == 0x0D
|| (0x20 <= buf[offset] && buf[offset] <= 0x7E)
{
offset += 1;
utf8 += 1;
ascii += 1;
continue;
}
if isascii(buf[offset]) || isprint(buf[offset]) {
ascii += 1;
}
// non-overlong 2-byte
if len - offset > 1
&& ((0xC2 <= buf[offset] && buf[offset] <= 0xDF)
&& (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0xBF))
{
offset += 2;
utf8 += 1;
comp -= 1;
continue;
}
// excluding overlongs
if (len - offset > 2)
&& ((buf[offset] == 0xE0 && (0xA0 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) && (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF)) || // straight 3-byte
(((0xE1 <= buf[offset] && buf[offset] <= 0xEC) || buf[offset] == 0xEE || buf[offset] == 0xEF) && (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) && (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF)) || // excluding surrogates
(buf[offset] == 0xED && (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0x9F) && (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF)))
{
offset += 3;
utf8 += 1;
comp -= 2;
continue;
}
// planes 1-3
if (len - offset > 3)
&& ((buf[offset] == 0xF0 && (0x90 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) && (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF) && (0x80 <= buf[offset + 3] && buf[offset + 3] <= 0xBF)) || // planes 4-15
((0xF1 <= buf[offset] && buf[offset] <= 0xF3) && (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0xBF) && (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF) && (0x80 <= buf[offset + 3] && buf[offset + 3] <= 0xBF)) || // plane 16
(buf[offset] == 0xF4 && (0x80 <= buf[offset + 1] && buf[offset + 1] <= 0x8F) && (0x80 <= buf[offset + 2] && buf[offset + 2] <= 0xBF) && (0x80 <= buf[offset + 3] && buf[offset + 3] <= 0xBF)))
{
offset += 4;
utf8 += 1;
comp -= 3;
continue;
}
offset += 1;
}
let percent_utf8 = (utf8 * 100) / comp;
let percent_ascii = (ascii * 100) / len;
if percent_utf8 >= percent_ascii && percent_utf8 >= 99 {
// utf
return TextType::UTF8(utf8);
}
if percent_ascii >= 99 {
return TextType::Ascii(ascii);
};
TextType::None
}
#[cfg(test)]
mod tests {
#[cfg(feature = "std")]
use std::fs;
#[cfg(feature = "std")]
use super::{AFLppRedQueen, Tokens};
#[cfg(feature = "std")]
#[test]
fn test_read_tokens() {
let _res = fs::remove_file("test.tkns");
let data = r#"
# comment
token1@123="AAA"
token1="A\x41A"
"A\AA"
token2="B"
"#;
fs::write("test.tkns", data).expect("Unable to write test.tkns");
let tokens = Tokens::from_file("test.tkns").unwrap();
log::info!("Token file entries: {:?}", tokens.tokens());
assert_eq!(tokens.tokens().len(), 2);
let _res = fs::remove_file("test.tkns");
}
#[cfg(feature = "std")]
#[test]
fn test_token_mutations() {
let rq = AFLppRedQueen::with_cmplog_options(true, true);
let pattern = 0;
let repl = 0;
let another_pattern = 0;
let changed_val = 0;
let attr = 0;
let another_buf = &[0, 0, 0, 0];
let buf = &[0, 0, 0, 0];
let buf_idx = 0;
let taint_len = 0;
let input_len = 0;
let hshape = 0;
let mut vec = std::vec::Vec::new();
let _res = rq.cmp_extend_encoding(
pattern,
repl,
another_pattern,
changed_val,
attr,
another_buf,
buf,
buf_idx,
taint_len,
input_len,
hshape,
&mut vec,
);
}
}