Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,10 @@ default-run = "codspeed"
name = "codspeed"
path = "src/main.rs"

[[bin]]
name = "compare_walltime_output"
path = "src/bin/compare_walltime_output.rs"


[dependencies]
anyhow = { workspace = true }
Expand Down
7 changes: 7 additions & 0 deletions crates/runner-shared/src/debug_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,13 @@ impl std::fmt::Debug for DebugInfo {
}
}

/// Per-pid mounting info referencing a deduplicated debug info entry.
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct DebugInfoPidMapping {
pub debug_info_key: String,
pub load_bias: u64,
}

#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ModuleDebugInfo {
/// The path to the object file on disk (e.g. `/usr/lib/libc.so.6`)
Expand Down
1 change: 1 addition & 0 deletions crates/runner-shared/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ pub mod debug_info;
pub mod fifo;
pub mod metadata;
pub mod perf_event;
pub mod perf_map;
pub mod unwind_data;
pub mod walltime_results;
28 changes: 24 additions & 4 deletions crates/runner-shared/src/metadata.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
use anyhow::Context;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::io::BufWriter;
use std::path::Path;

use crate::debug_info::ModuleDebugInfo;
use crate::debug_info::{DebugInfoPidMapping, ModuleDebugInfo};
use crate::fifo::MarkerType;
use crate::perf_map::SymbolPidMapping;
use crate::unwind_data::UnwindDataPidMapping;

#[derive(Serialize, Deserialize)]
pub struct PerfMetadata {
Expand All @@ -25,9 +28,26 @@ pub struct PerfMetadata {
#[deprecated(note = "Use ExecutionTimestamps in the 'artifacts' module instead")]
pub markers: Vec<MarkerType>,

/// Debug info for all modules across all processes, mapping PID to module debug info
#[serde(default, skip_serializing_if = "std::collections::HashMap::is_empty")]
pub debug_info_by_pid: std::collections::HashMap<i32, Vec<ModuleDebugInfo>>,
/// Kept for backward compatibility, was used before deduplication of debug info entries.
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
#[deprecated(note = "Use 'debug_info' + 'debug_info_pid_mappings_by_pid' instead")]
pub debug_info_by_pid: HashMap<i32, Vec<ModuleDebugInfo>>,

/// Deduplicated debug info entries, keyed by semantic key
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub debug_info: HashMap<String, ModuleDebugInfo>,

/// Per-pid debug info references, mapping PID to list of debug info index + load bias
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub debug_info_pid_mappings_by_pid: HashMap<i32, Vec<DebugInfoPidMapping>>,

/// Per-pid unwind data references, mapping PID to list of unwind data index + mounting info
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub unwind_data_pid_mappings_by_pid: HashMap<i32, Vec<UnwindDataPidMapping>>,

/// Per-pid symbol references, mapping PID to list of perf map index + load bias
#[serde(default, skip_serializing_if = "HashMap::is_empty")]
pub symbol_pid_mappings_by_pid: HashMap<i32, Vec<SymbolPidMapping>>,
}

impl PerfMetadata {
Expand Down
11 changes: 11 additions & 0 deletions crates/runner-shared/src/perf_map.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
use serde::{Deserialize, Serialize};

/// File suffix used when registering module symbols in a PID agnostic way.
pub const SYMBOLS_MAP_SUFFIX: &str = "symbols.map";

/// Per-pid mounting info referencing a deduplicated perf map entry.
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct SymbolPidMapping {
pub perf_map_key: String,
pub load_bias: u64,
}
250 changes: 182 additions & 68 deletions crates/runner-shared/src/unwind_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,92 +8,39 @@ use std::{hash::DefaultHasher, ops::Range};

pub const UNWIND_FILE_EXT: &str = "unwind_data";

pub type UnwindData = UnwindDataV2;

pub type UnwindData = UnwindDataV3;
impl UnwindData {
pub fn parse(reader: &[u8]) -> anyhow::Result<Self> {
let compat: UnwindDataCompat = bincode::deserialize(reader)?;

match compat {
UnwindDataCompat::V1(v1) => Ok(v1.into()),
UnwindDataCompat::V2(v2) => Ok(v2),
UnwindDataCompat::V1(_) => {
anyhow::bail!("Cannot parse V1 unwind data as V3 (breaking changes)")
}
UnwindDataCompat::V2(_) => {
anyhow::bail!("Cannot parse V2 unwind data as V3 (breaking changes)")
}
UnwindDataCompat::V3(v3) => Ok(v3),
}
}

pub fn save_to<P: AsRef<std::path::Path>>(&self, folder: P, pid: i32) -> anyhow::Result<()> {
let unwind_data_path = folder.as_ref().join(format!(
"{}_{:x}_{:x}_{}.{UNWIND_FILE_EXT}",
pid,
self.avma_range.start,
self.avma_range.end,
self.timestamp.unwrap_or_default()
));
self.to_file(unwind_data_path)?;

Ok(())
}

pub fn to_file<P: AsRef<std::path::Path>>(&self, path: P) -> anyhow::Result<()> {
if let Ok(true) = std::fs::exists(path.as_ref()) {
// This happens in CI for the root `systemd-run` process which execs into bash which
// also execs into bash, each process reloading common libraries like `ld-linux.so`.
// We detect this when we harvest unwind_data by parsing the perf data (exec-harness).
// Until we properly handle the process tree and deduplicate unwind data, just debug
// log here
// Any relevant occurence should have other symptoms reported by users.
log::debug!(
"{} already exists, file will be truncated",
path.as_ref().display()
);
log::debug!("{} {:x?}", self.path, self.avma_range);
}

let compat = UnwindDataCompat::V2(self.clone());
let file = std::fs::File::create(path.as_ref())?;
const BUFFER_SIZE: usize = 256 * 1024 /* 256 KB */;

pub fn save_to<P: AsRef<std::path::Path>>(&self, folder: P, key: &str) -> anyhow::Result<()> {
let path = folder.as_ref().join(format!("{key}.{UNWIND_FILE_EXT}"));
let compat = UnwindDataCompat::V3(self.clone());
let file = std::fs::File::create(&path)?;
const BUFFER_SIZE: usize = 256 * 1024;
let writer = BufWriter::with_capacity(BUFFER_SIZE, file);
bincode::serialize_into(writer, &compat)?;

Ok(())
}
}

impl Debug for UnwindData {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let eh_frame_hdr_hash = {
let mut hasher = DefaultHasher::new();
self.eh_frame_hdr.hash(&mut hasher);
hasher.finish()
};
let eh_frame_hash = {
let mut hasher = DefaultHasher::new();
self.eh_frame.hash(&mut hasher);
hasher.finish()
};

f.debug_struct("UnwindData")
.field("path", &self.path)
.field("timestamp", &self.timestamp)
.field("avma_range", &format_args!("{:x?}", self.avma_range))
.field("base_avma", &format_args!("{:x}", self.base_avma))
.field("base_svma", &format_args!("{:x}", self.base_svma))
.field(
"eh_frame_hdr_svma",
&format_args!("{:x?}", self.eh_frame_hdr_svma),
)
.field("eh_frame_hdr_hash", &format_args!("{eh_frame_hdr_hash:x}"))
.field("eh_frame_hash", &format_args!("{eh_frame_hash:x}"))
.field("eh_frame_svma", &format_args!("{:x?}", self.eh_frame_svma))
.finish()
}
}

/// A versioned enum for `UnwindData` to allow for future extensions while maintaining backward compatibility.
#[derive(Serialize, Deserialize)]
enum UnwindDataCompat {
V1(UnwindDataV1),
V2(UnwindDataV2),
V3(UnwindDataV3),
}

#[doc(hidden)]
Expand All @@ -113,7 +60,7 @@ struct UnwindDataV1 {
}

#[doc(hidden)]
#[derive(Serialize, Deserialize, Clone)]
#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
pub struct UnwindDataV2 {
pub path: String,

Expand All @@ -132,6 +79,21 @@ pub struct UnwindDataV2 {
pub eh_frame_svma: Range<u64>,
}

impl UnwindDataV2 {
/// Parse unwind data bytes, converting V1 to V2 but erroring on V3
/// (since V3 doesn't have the per-pid fields needed for V2).
pub fn parse(reader: &[u8]) -> anyhow::Result<Self> {
let compat: UnwindDataCompat = bincode::deserialize(reader)?;
match compat {
UnwindDataCompat::V1(v1) => Ok(v1.into()),
UnwindDataCompat::V2(v2) => Ok(v2),
UnwindDataCompat::V3(_) => {
anyhow::bail!("Cannot parse V3 unwind data as V2 (missing per-pid fields)")
}
}
}
}

impl From<UnwindDataV1> for UnwindDataV2 {
fn from(v1: UnwindDataV1) -> Self {
Self {
Expand All @@ -147,3 +109,155 @@ impl From<UnwindDataV1> for UnwindDataV2 {
}
}
}

/// Pid-agnostic unwind data.
/// Contains only the data that is common across all PIDs loading the same shared library.
#[derive(Serialize, Deserialize, Clone, PartialEq, Eq, Hash)]
pub struct UnwindDataV3 {
pub path: String,
pub base_svma: u64,
pub eh_frame_hdr: Vec<u8>,
pub eh_frame_hdr_svma: Range<u64>,
pub eh_frame: Vec<u8>,
pub eh_frame_svma: Range<u64>,
}

impl From<UnwindDataV2> for UnwindDataV3 {
fn from(v2: UnwindDataV2) -> Self {
Self {
path: v2.path,
base_svma: v2.base_svma,
eh_frame_hdr: v2.eh_frame_hdr,
eh_frame_hdr_svma: v2.eh_frame_hdr_svma,
eh_frame: v2.eh_frame,
eh_frame_svma: v2.eh_frame_svma,
}
}
}

impl Debug for UnwindDataV3 {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let eh_frame_hdr_hash = {
let mut hasher = DefaultHasher::new();
self.eh_frame_hdr.hash(&mut hasher);
hasher.finish()
};
let eh_frame_hash = {
let mut hasher = DefaultHasher::new();
self.eh_frame.hash(&mut hasher);
hasher.finish()
};

f.debug_struct("UnwindData")
.field("path", &self.path)
.field("base_svma", &format_args!("{:x}", self.base_svma))
.field(
"eh_frame_hdr_svma",
&format_args!("{:x?}", self.eh_frame_hdr_svma),
)
.field("eh_frame_hdr_hash", &format_args!("{eh_frame_hdr_hash:x}"))
.field("eh_frame_hash", &format_args!("{eh_frame_hash:x}"))
.field("eh_frame_svma", &format_args!("{:x?}", self.eh_frame_svma))
.finish()
}
}

/// Per-pid mounting info referencing a deduplicated unwind data entry.
#[derive(Serialize, Deserialize, Clone, Debug)]
pub struct UnwindDataPidMapping {
pub unwind_data_key: String,
pub timestamp: Option<u64>,
pub avma_range: Range<u64>,
pub base_avma: u64,
}

#[cfg(test)]
mod tests {
use super::*;

const V2_BINARY: &[u8] = include_bytes!("../testdata/unwind_data_v2.bin");
const V3_BINARY: &[u8] = include_bytes!("../testdata/unwind_data_v3.bin");

fn create_sample_v2() -> UnwindDataV2 {
UnwindDataV2 {
path: "/lib/test.so".to_string(),
timestamp: Some(12345),
avma_range: 0x1000..0x2000,
base_avma: 0x1000,
base_svma: 0x0,
eh_frame_hdr: vec![1, 2, 3, 4],
eh_frame_hdr_svma: 0x100..0x200,
eh_frame: vec![5, 6, 7, 8],
eh_frame_svma: 0x200..0x300,
}
}

fn create_sample_v3() -> UnwindDataV3 {
UnwindDataV3 {
path: "/lib/test.so".to_string(),
base_svma: 0x0,
eh_frame_hdr: vec![1, 2, 3, 4],
eh_frame_hdr_svma: 0x100..0x200,
eh_frame: vec![5, 6, 7, 8],
eh_frame_svma: 0x200..0x300,
}
}

#[test]
fn test_parse_v2_as_v3_should_error() {
// Try to parse V2 binary artifact as V3 using UnwindData::parse
let result = UnwindData::parse(V2_BINARY);

// Should error due to breaking changes between V2 and V3
assert!(result.is_err());
let err = result.unwrap_err();
assert!(
err.to_string()
.contains("Cannot parse V2 unwind data as V3"),
"Expected error message about V2->V3 incompatibility, got: {err}"
);
}

#[test]
fn test_parse_v3_as_v2_should_error() {
// Try to parse V3 binary artifact as V2 using UnwindDataV2::parse
let result = UnwindDataV2::parse(V3_BINARY);

// Should error with specific message about missing per-pid fields
assert!(result.is_err());
let err = result.unwrap_err();
assert!(
err.to_string()
.contains("Cannot parse V3 unwind data as V2"),
"Expected error message about V3->V2 incompatibility, got: {err}"
);
}

#[test]
fn test_parse_v3_as_v3() {
// Parse V3 binary artifact as V3 using UnwindData::parse
let parsed_v3 = UnwindData::parse(V3_BINARY).expect("Failed to parse V3 data as V3");

// Should match expected V3 data
let expected_v3 = create_sample_v3();
assert_eq!(parsed_v3, expected_v3);
}

#[test]
fn test_parse_v2_as_v2() {
// Parse V2 binary artifact as V2 using UnwindDataV2::parse
let parsed_v2 = UnwindDataV2::parse(V2_BINARY).expect("Failed to parse V2 data as V2");

// Should match expected V2 data
let expected_v2 = create_sample_v2();
assert_eq!(parsed_v2.path, expected_v2.path);
assert_eq!(parsed_v2.timestamp, expected_v2.timestamp);
assert_eq!(parsed_v2.avma_range, expected_v2.avma_range);
assert_eq!(parsed_v2.base_avma, expected_v2.base_avma);
assert_eq!(parsed_v2.base_svma, expected_v2.base_svma);
assert_eq!(parsed_v2.eh_frame_hdr, expected_v2.eh_frame_hdr);
assert_eq!(parsed_v2.eh_frame_hdr_svma, expected_v2.eh_frame_hdr_svma);
assert_eq!(parsed_v2.eh_frame, expected_v2.eh_frame);
assert_eq!(parsed_v2.eh_frame_svma, expected_v2.eh_frame_svma);
}
}
Binary file added crates/runner-shared/testdata/unwind_data_v2.bin
Binary file not shown.
Binary file added crates/runner-shared/testdata/unwind_data_v3.bin
Binary file not shown.
Loading