diff --git a/Cargo.toml b/Cargo.toml
index 8be093bf..f141b1a5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -10,6 +10,10 @@ default-run = "codspeed"
 name = "codspeed"
 path = "src/main.rs"
 
+[[bin]]
+name = "compare_walltime_output"
+path = "src/bin/compare_walltime_output.rs"
+
 
 [dependencies]
 anyhow = { workspace = true }
diff --git a/crates/runner-shared/src/debug_info.rs b/crates/runner-shared/src/debug_info.rs
index 2fcbf696..240c1bdb 100644
--- a/crates/runner-shared/src/debug_info.rs
+++ b/crates/runner-shared/src/debug_info.rs
@@ -21,6 +21,13 @@ impl std::fmt::Debug for DebugInfo {
     }
 }
 
+/// Per-pid mounting info referencing a deduplicated debug info entry.
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct DebugInfoPidMapping {
+    pub debug_info_key: String,
+    pub load_bias: u64,
+}
+
 #[derive(Debug, Clone, Serialize, Deserialize)]
 pub struct ModuleDebugInfo {
     /// The path to the object file on disk (e.g. `/usr/lib/libc.so.6`)
diff --git a/crates/runner-shared/src/lib.rs b/crates/runner-shared/src/lib.rs
index 381a8a04..7b59a2a8 100644
--- a/crates/runner-shared/src/lib.rs
+++ b/crates/runner-shared/src/lib.rs
@@ -3,5 +3,6 @@ pub mod debug_info;
 pub mod fifo;
 pub mod metadata;
 pub mod perf_event;
+pub mod perf_map;
 pub mod unwind_data;
 pub mod walltime_results;
diff --git a/crates/runner-shared/src/metadata.rs b/crates/runner-shared/src/metadata.rs
index 13beae77..ab74c344 100644
--- a/crates/runner-shared/src/metadata.rs
+++ b/crates/runner-shared/src/metadata.rs
@@ -1,10 +1,13 @@
 use anyhow::Context;
 use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
 use std::io::BufWriter;
 use std::path::Path;
 
-use crate::debug_info::ModuleDebugInfo;
+use crate::debug_info::{DebugInfoPidMapping, ModuleDebugInfo};
 use crate::fifo::MarkerType;
+use crate::perf_map::SymbolPidMapping;
+use crate::unwind_data::UnwindDataPidMapping;
 
 #[derive(Serialize, Deserialize)]
 pub struct PerfMetadata {
@@ -25,9 +28,26 @@ pub struct PerfMetadata {
     #[deprecated(note = "Use ExecutionTimestamps in the 'artifacts' module instead")]
     pub markers: Vec<MarkerType>,
 
-    /// Debug info for all modules across all processes, mapping PID to module debug info
-    #[serde(default, skip_serializing_if = "std::collections::HashMap::is_empty")]
-    pub debug_info_by_pid: std::collections::HashMap<i32, Vec<ModuleDebugInfo>>,
+    /// Kept for backward compatibility, was used before deduplication of debug info entries.
+    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
+    #[deprecated(note = "Use 'debug_info' + 'debug_info_pid_mappings_by_pid' instead")]
+    pub debug_info_by_pid: HashMap<i32, Vec<ModuleDebugInfo>>,
+
+    /// Deduplicated debug info entries, keyed by semantic key
+    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
+    pub debug_info: HashMap<String, ModuleDebugInfo>,
+
+    /// Per-pid debug info references, mapping PID to list of debug info index + load bias
+    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
+    pub debug_info_pid_mappings_by_pid: HashMap<i32, Vec<DebugInfoPidMapping>>,
+
+    /// Per-pid unwind data references, mapping PID to list of unwind data index + mounting info
+    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
+    pub unwind_data_pid_mappings_by_pid: HashMap<i32, Vec<UnwindDataPidMapping>>,
+
+    /// Per-pid symbol references, mapping PID to list of perf map index + load bias
+    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
+    pub symbol_pid_mappings_by_pid: HashMap<i32, Vec<SymbolPidMapping>>,
 }
 
 impl PerfMetadata {
diff --git a/crates/runner-shared/src/perf_map.rs b/crates/runner-shared/src/perf_map.rs
new file mode 100644
index 00000000..9e83647c
--- /dev/null
+++ b/crates/runner-shared/src/perf_map.rs
@@ -0,0 +1,11 @@
+use serde::{Deserialize, Serialize};
+
+/// File suffix used when registering module symbols in a PID agnostic way.
+pub const SYMBOLS_MAP_SUFFIX: &str = "symbols.map";
+
+/// Per-pid mounting info referencing a deduplicated perf map entry.
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct SymbolPidMapping {
+    pub perf_map_key: String,
+    pub load_bias: u64,
+}
diff --git a/crates/runner-shared/src/unwind_data.rs b/crates/runner-shared/src/unwind_data.rs
index ed4cb029..41c1e818 100644
--- a/crates/runner-shared/src/unwind_data.rs
+++ b/crates/runner-shared/src/unwind_data.rs
@@ -8,92 +8,39 @@ use std::{hash::DefaultHasher, ops::Range};
 
 pub const UNWIND_FILE_EXT: &str = "unwind_data";
 
-pub type UnwindData = UnwindDataV2;
-
+pub type UnwindData = UnwindDataV3;
 impl UnwindData {
     pub fn parse(reader: &[u8]) -> anyhow::Result<Self> {
         let compat: UnwindDataCompat = bincode::deserialize(reader)?;
 
         match compat {
-            UnwindDataCompat::V1(v1) => Ok(v1.into()),
-            UnwindDataCompat::V2(v2) => Ok(v2),
+            UnwindDataCompat::V1(_) => {
+                anyhow::bail!("Cannot parse V1 unwind data as V3 (breaking changes)")
+            }
+            UnwindDataCompat::V2(_) => {
+                anyhow::bail!("Cannot parse V2 unwind data as V3 (breaking changes)")
+            }
+            UnwindDataCompat::V3(v3) => Ok(v3),
         }
     }
 
-    pub fn save_to<P: AsRef<std::path::Path>>(&self, folder: P, pid: i32) -> anyhow::Result<()> {
-        let unwind_data_path = folder.as_ref().join(format!(
-            "{}_{:x}_{:x}_{}.{UNWIND_FILE_EXT}",
-            pid,
-            self.avma_range.start,
-            self.avma_range.end,
-            self.timestamp.unwrap_or_default()
-        ));
-        self.to_file(unwind_data_path)?;
-
-        Ok(())
-    }
-
-    pub fn to_file<P: AsRef<std::path::Path>>(&self, path: P) -> anyhow::Result<()> {
-        if let Ok(true) = std::fs::exists(path.as_ref()) {
-            // This happens in CI for the root `systemd-run` process which execs into bash which
-            // also execs into bash, each process reloading common libraries like `ld-linux.so`.
-            // We detect this when we harvest unwind_data by parsing the perf data (exec-harness).
-            // Until we properly handle the process tree and deduplicate unwind data, just debug
-            // log here
-            // Any relevant occurence should have other symptoms reported by users.
-            log::debug!(
-                "{} already exists, file will be truncated",
-                path.as_ref().display()
-            );
-            log::debug!("{} {:x?}", self.path, self.avma_range);
-        }
-
-        let compat = UnwindDataCompat::V2(self.clone());
-        let file = std::fs::File::create(path.as_ref())?;
-        const BUFFER_SIZE: usize = 256 * 1024 /* 256 KB */;
-
+    pub fn save_to<P: AsRef<std::path::Path>>(&self, folder: P, key: &str) -> anyhow::Result<()> {
+        let path = folder.as_ref().join(format!("{key}.{UNWIND_FILE_EXT}"));
+        let compat = UnwindDataCompat::V3(self.clone());
+        let file = std::fs::File::create(&path)?;
+        const BUFFER_SIZE: usize = 256 * 1024;
         let writer = BufWriter::with_capacity(BUFFER_SIZE, file);
         bincode::serialize_into(writer, &compat)?;
-
         Ok(())
     }
 }
 
-impl Debug for UnwindData {
-    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
-        let eh_frame_hdr_hash = {
-            let mut hasher = DefaultHasher::new();
-            self.eh_frame_hdr.hash(&mut hasher);
-            hasher.finish()
-        };
-        let eh_frame_hash = {
-            let mut hasher = DefaultHasher::new();
-            self.eh_frame.hash(&mut hasher);
-            hasher.finish()
-        };
-
-        f.debug_struct("UnwindData")
-            .field("path", &self.path)
-            .field("timestamp", &self.timestamp)
-            .field("avma_range", &format_args!("{:x?}", self.avma_range))
-            .field("base_avma", &format_args!("{:x}", self.base_avma))
-            .field("base_svma", &format_args!("{:x}", self.base_svma))
-            .field(
-                "eh_frame_hdr_svma",
-                &format_args!("{:x?}", self.eh_frame_hdr_svma),
-            )
-            .field("eh_frame_hdr_hash", &format_args!("{eh_frame_hdr_hash:x}"))
-            .field("eh_frame_hash", &format_args!("{eh_frame_hash:x}"))
-            .field("eh_frame_svma", &format_args!("{:x?}", self.eh_frame_svma))
-            .finish()
-    }
-}
-
 /// A versioned enum for `UnwindData` to allow for future extensions while maintaining backward compatibility.
 #[derive(Serialize, Deserialize)]
 enum UnwindDataCompat {
     V1(UnwindDataV1),
     V2(UnwindDataV2),
+    V3(UnwindDataV3),
 }
 
 #[doc(hidden)]
@@ -113,7 +60,7 @@ struct UnwindDataV1 {
 }
 
 #[doc(hidden)]
-#[derive(Serialize, Deserialize, Clone)]
+#[derive(Serialize, Deserialize, Clone, Debug, PartialEq)]
 pub struct UnwindDataV2 {
     pub path: String,
 
@@ -132,6 +79,21 @@ pub struct UnwindDataV2 {
     pub eh_frame_svma: Range<u64>,
 }
 
+impl UnwindDataV2 {
+    /// Parse unwind data bytes, converting V1 to V2 but erroring on V3
+    /// (since V3 doesn't have the per-pid fields needed for V2).
+    pub fn parse(reader: &[u8]) -> anyhow::Result<Self> {
+        let compat: UnwindDataCompat = bincode::deserialize(reader)?;
+        match compat {
+            UnwindDataCompat::V1(v1) => Ok(v1.into()),
+            UnwindDataCompat::V2(v2) => Ok(v2),
+            UnwindDataCompat::V3(_) => {
+                anyhow::bail!("Cannot parse V3 unwind data as V2 (missing per-pid fields)")
+            }
+        }
+    }
+}
+
 impl From<UnwindDataV1> for UnwindDataV2 {
     fn from(v1: UnwindDataV1) -> Self {
         Self {
@@ -147,3 +109,155 @@ impl From<UnwindDataV1> for UnwindDataV2 {
         }
     }
 }
+
+/// Pid-agnostic unwind data.
+/// Contains only the data that is common across all PIDs loading the same shared library.
+#[derive(Serialize, Deserialize, Clone, PartialEq, Eq, Hash)]
+pub struct UnwindDataV3 {
+    pub path: String,
+    pub base_svma: u64,
+    pub eh_frame_hdr: Vec<u8>,
+    pub eh_frame_hdr_svma: Range<u64>,
+    pub eh_frame: Vec<u8>,
+    pub eh_frame_svma: Range<u64>,
+}
+
+impl From<UnwindDataV2> for UnwindDataV3 {
+    fn from(v2: UnwindDataV2) -> Self {
+        Self {
+            path: v2.path,
+            base_svma: v2.base_svma,
+            eh_frame_hdr: v2.eh_frame_hdr,
+            eh_frame_hdr_svma: v2.eh_frame_hdr_svma,
+            eh_frame: v2.eh_frame,
+            eh_frame_svma: v2.eh_frame_svma,
+        }
+    }
+}
+
+impl Debug for UnwindDataV3 {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let eh_frame_hdr_hash = {
+            let mut hasher = DefaultHasher::new();
+            self.eh_frame_hdr.hash(&mut hasher);
+            hasher.finish()
+        };
+        let eh_frame_hash = {
+            let mut hasher = DefaultHasher::new();
+            self.eh_frame.hash(&mut hasher);
+            hasher.finish()
+        };
+
+        f.debug_struct("UnwindData")
+            .field("path", &self.path)
+            .field("base_svma", &format_args!("{:x}", self.base_svma))
+            .field(
+                "eh_frame_hdr_svma",
+                &format_args!("{:x?}", self.eh_frame_hdr_svma),
+            )
+            .field("eh_frame_hdr_hash", &format_args!("{eh_frame_hdr_hash:x}"))
+            .field("eh_frame_hash", &format_args!("{eh_frame_hash:x}"))
+            .field("eh_frame_svma", &format_args!("{:x?}", self.eh_frame_svma))
+            .finish()
+    }
+}
+
+/// Per-pid mounting info referencing a deduplicated unwind data entry.
+#[derive(Serialize, Deserialize, Clone, Debug)]
+pub struct UnwindDataPidMapping {
+    pub unwind_data_key: String,
+    pub timestamp: Option<u64>,
+    pub avma_range: Range<u64>,
+    pub base_avma: u64,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    const V2_BINARY: &[u8] = include_bytes!("../testdata/unwind_data_v2.bin");
+    const V3_BINARY: &[u8] = include_bytes!("../testdata/unwind_data_v3.bin");
+
+    fn create_sample_v2() -> UnwindDataV2 {
+        UnwindDataV2 {
+            path: "/lib/test.so".to_string(),
+            timestamp: Some(12345),
+            avma_range: 0x1000..0x2000,
+            base_avma: 0x1000,
+            base_svma: 0x0,
+            eh_frame_hdr: vec![1, 2, 3, 4],
+            eh_frame_hdr_svma: 0x100..0x200,
+            eh_frame: vec![5, 6, 7, 8],
+            eh_frame_svma: 0x200..0x300,
+        }
+    }
+
+    fn create_sample_v3() -> UnwindDataV3 {
+        UnwindDataV3 {
+            path: "/lib/test.so".to_string(),
+            base_svma: 0x0,
+            eh_frame_hdr: vec![1, 2, 3, 4],
+            eh_frame_hdr_svma: 0x100..0x200,
+            eh_frame: vec![5, 6, 7, 8],
+            eh_frame_svma: 0x200..0x300,
+        }
+    }
+
+    #[test]
+    fn test_parse_v2_as_v3_should_error() {
+        // Try to parse V2 binary artifact as V3 using UnwindData::parse
+        let result = UnwindData::parse(V2_BINARY);
+
+        // Should error due to breaking changes between V2 and V3
+        assert!(result.is_err());
+        let err = result.unwrap_err();
+        assert!(
+            err.to_string()
+                .contains("Cannot parse V2 unwind data as V3"),
+            "Expected error message about V2->V3 incompatibility, got: {err}"
+        );
+    }
+
+    #[test]
+    fn test_parse_v3_as_v2_should_error() {
+        // Try to parse V3 binary artifact as V2 using UnwindDataV2::parse
+        let result = UnwindDataV2::parse(V3_BINARY);
+
+        // Should error with specific message about missing per-pid fields
+        assert!(result.is_err());
+        let err = result.unwrap_err();
+        assert!(
+            err.to_string()
+                .contains("Cannot parse V3 unwind data as V2"),
+            "Expected error message about V3->V2 incompatibility, got: {err}"
+        );
+    }
+
+    #[test]
+    fn test_parse_v3_as_v3() {
+        // Parse V3 binary artifact as V3 using UnwindData::parse
+        let parsed_v3 = UnwindData::parse(V3_BINARY).expect("Failed to parse V3 data as V3");
+
+        // Should match expected V3 data
+        let expected_v3 = create_sample_v3();
+        assert_eq!(parsed_v3, expected_v3);
+    }
+
+    #[test]
+    fn test_parse_v2_as_v2() {
+        // Parse V2 binary artifact as V2 using UnwindDataV2::parse
+        let parsed_v2 = UnwindDataV2::parse(V2_BINARY).expect("Failed to parse V2 data as V2");
+
+        // Should match expected V2 data
+        let expected_v2 = create_sample_v2();
+        assert_eq!(parsed_v2.path, expected_v2.path);
+        assert_eq!(parsed_v2.timestamp, expected_v2.timestamp);
+        assert_eq!(parsed_v2.avma_range, expected_v2.avma_range);
+        assert_eq!(parsed_v2.base_avma, expected_v2.base_avma);
+        assert_eq!(parsed_v2.base_svma, expected_v2.base_svma);
+        assert_eq!(parsed_v2.eh_frame_hdr, expected_v2.eh_frame_hdr);
+        assert_eq!(parsed_v2.eh_frame_hdr_svma, expected_v2.eh_frame_hdr_svma);
+        assert_eq!(parsed_v2.eh_frame, expected_v2.eh_frame);
+        assert_eq!(parsed_v2.eh_frame_svma, expected_v2.eh_frame_svma);
+    }
+}
diff --git a/crates/runner-shared/testdata/unwind_data_v2.bin b/crates/runner-shared/testdata/unwind_data_v2.bin
new file mode 100644
index 00000000..f26e8277
Binary files /dev/null and b/crates/runner-shared/testdata/unwind_data_v2.bin differ
diff --git a/crates/runner-shared/testdata/unwind_data_v3.bin b/crates/runner-shared/testdata/unwind_data_v3.bin
new file mode 100644
index 00000000..74b42b68
Binary files /dev/null and b/crates/runner-shared/testdata/unwind_data_v3.bin differ
diff --git a/src/bin/compare_walltime_output.rs b/src/bin/compare_walltime_output.rs
new file mode 100644
index 00000000..dd27b2cf
--- /dev/null
+++ b/src/bin/compare_walltime_output.rs
@@ -0,0 +1,900 @@
+/// Compares walltime profiling output between old and new runner formats.
+///
+/// Usage: cargo run --bin compare_walltime_output -- [<old_profile_dir> <new_profile_dir>]
+///
+/// If no arguments are provided, automatically finds the two most recent /tmp/profile.*.out
+/// directories. The one containing `0.unwind_data` is treated as the new format, the other as old.
+///
+/// Old format: {pid}_{start:x}_{end:x}_{timestamp}.unwind_data files + perf-{pid}.map files
+/// New format: {index}.unwind_data files + {index}.perf.map files + perf.metadata
+///
+/// PIDs will differ between runs, but their relative order is stable (ASLR must be disabled).
+/// We compare by matching the Nth sorted PID from old against the Nth from new.
+use anyhow::{Context, Result};
+use itertools::Itertools;
+use runner_shared::debug_info::ModuleDebugInfo;
+use runner_shared::metadata::PerfMetadata;
+use runner_shared::perf_map::SYMBOLS_MAP_SUFFIX;
+use runner_shared::unwind_data::{UNWIND_FILE_EXT, UnwindData};
+use std::collections::{BTreeMap, BTreeSet, HashMap};
+use std::hash::{DefaultHasher, Hash, Hasher};
+use std::io::BufRead;
+use std::path::Path;
+
+/// Normalized entry for comparison (pid-agnostic + pid-specific combined).
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
+struct NormalizedEntry {
+    path: String,
+    base_svma: u64,
+    base_avma: u64,
+    avma_start: u64,
+    avma_end: u64,
+    timestamp: Option<u64>,
+    eh_frame_hdr_hash: u64,
+    eh_frame_hash: u64,
+}
+
+fn hash_bytes(data: &[u8]) -> u64 {
+    let mut hasher = DefaultHasher::new();
+    data.hash(&mut hasher);
+    hasher.finish()
+}
+
+/// Load old format: each file is {pid}_{start:x}_{end:x}_{timestamp}.unwind_data
+/// containing a V2 (or V1) with all data inline.
+fn load_old_format(dir: &Path) -> Result<BTreeMap<i32, Vec<NormalizedEntry>>> {
+    let mut result: BTreeMap<i32, Vec<NormalizedEntry>> = BTreeMap::new();
+
+    for entry in std::fs::read_dir(dir)? {
+        let entry = entry?;
+        let name = entry.file_name().to_string_lossy().to_string();
+        if !name.ends_with(&format!(".{UNWIND_FILE_EXT}")) {
+            continue;
+        }
+
+        // Old format: {pid}_{start:x}_{end:x}_{timestamp}.unwind_data
+        let stem = name.strip_suffix(&format!(".{UNWIND_FILE_EXT}")).unwrap();
+        let parts: Vec<&str> = stem.splitn(4, '_').collect();
+        if parts.len() != 4 {
+            // Not an old-format file (could be new format like "0.unwind_data")
+            continue;
+        }
+
+        let pid: i32 = parts[0].parse().context(format!("Invalid pid in {name}"))?;
+
+        let data = std::fs::read(entry.path())?;
+        let v2 = deserialize_as_v2(&data).context(format!("Failed to parse {name}"))?;
+
+        result.entry(pid).or_default().push(NormalizedEntry {
+            path: v2.path,
+            base_svma: v2.base_svma,
+            base_avma: v2.base_avma,
+            avma_start: v2.avma_range.start,
+            avma_end: v2.avma_range.end,
+            timestamp: v2.timestamp,
+            eh_frame_hdr_hash: hash_bytes(&v2.eh_frame_hdr),
+            eh_frame_hash: hash_bytes(&v2.eh_frame),
+        });
+    }
+
+    for entries in result.values_mut() {
+        entries.sort();
+    }
+
+    Ok(result)
+}
+
+/// Deserialize a bincode file that may contain V1 or V2 data, returning the V2 representation.
+fn deserialize_as_v2(data: &[u8]) -> Result<OldUnwindData> {
+    let compat: OldUnwindDataCompat = bincode::deserialize(data)?;
+    match compat {
+        OldUnwindDataCompat::V1(v1) => Ok(OldUnwindData {
+            path: v1.path,
+            timestamp: None,
+            avma_range: v1.avma_range,
+            base_avma: v1.base_avma,
+            base_svma: v1.base_svma,
+            eh_frame_hdr: v1.eh_frame_hdr,
+            eh_frame_hdr_svma: v1.eh_frame_hdr_svma,
+            eh_frame: v1.eh_frame,
+            eh_frame_svma: v1.eh_frame_svma,
+        }),
+        OldUnwindDataCompat::V2(v2) => Ok(v2),
+    }
+}
+
+// Self-contained types for deserializing old V1/V2 files without depending on the
+// current UnwindDataCompat (which now has V3 and would confuse variant indexing).
+#[derive(serde::Deserialize)]
+enum OldUnwindDataCompat {
+    V1(OldUnwindDataV1),
+    V2(OldUnwindData),
+}
+
+#[allow(dead_code)]
+#[derive(serde::Deserialize)]
+struct OldUnwindDataV1 {
+    path: String,
+    avma_range: std::ops::Range<u64>,
+    base_avma: u64,
+    base_svma: u64,
+    eh_frame_hdr: Vec<u8>,
+    eh_frame_hdr_svma: std::ops::Range<u64>,
+    eh_frame: Vec<u8>,
+    eh_frame_svma: std::ops::Range<u64>,
+}
+
+#[allow(dead_code)]
+#[derive(serde::Deserialize)]
+struct OldUnwindData {
+    path: String,
+    timestamp: Option<u64>,
+    avma_range: std::ops::Range<u64>,
+    base_avma: u64,
+    base_svma: u64,
+    eh_frame_hdr: Vec<u8>,
+    eh_frame_hdr_svma: std::ops::Range<u64>,
+    eh_frame: Vec<u8>,
+    eh_frame_svma: std::ops::Range<u64>,
+}
+
+/// Load new format: {key}.unwind_data files (V3) + perf.metadata with unwind_data_by_pid.
+fn load_new_format(
+    dir: &Path,
+    metadata: &PerfMetadata,
+) -> Result<BTreeMap<i32, Vec<NormalizedEntry>>> {
+    // Load all unwind data files into a map keyed by stem
+    let mut unwind_by_key: HashMap<String, UnwindData> = HashMap::new();
+    for entry in std::fs::read_dir(dir)? {
+        let entry = entry?;
+        let name = entry.file_name().to_string_lossy().to_string();
+        if !name.ends_with(&format!(".{UNWIND_FILE_EXT}")) {
+            continue;
+        }
+
+        let stem = name.strip_suffix(&format!(".{UNWIND_FILE_EXT}")).unwrap();
+
+        let data = std::fs::read(entry.path())?;
+        let unwind = UnwindData::parse(&data).context(format!("Failed to parse {name}"))?;
+        unwind_by_key.insert(stem.to_string(), unwind);
+    }
+
+    // Combine with per-pid metadata
+    let mut result: BTreeMap<i32, Vec<NormalizedEntry>> = BTreeMap::new();
+
+    for (pid, mappings) in &metadata.unwind_data_pid_mappings_by_pid {
+        let entries = result.entry(*pid).or_default();
+        for mapping in mappings {
+            let v3 = unwind_by_key
+                .get(&mapping.unwind_data_key)
+                .with_context(|| {
+                    format!(
+                        "Missing unwind data file for key {} (pid {})",
+                        mapping.unwind_data_key, pid
+                    )
+                })?;
+
+            entries.push(NormalizedEntry {
+                path: v3.path.clone(),
+                base_svma: v3.base_svma,
+                base_avma: mapping.base_avma,
+                avma_start: mapping.avma_range.start,
+                avma_end: mapping.avma_range.end,
+                timestamp: mapping.timestamp,
+                eh_frame_hdr_hash: hash_bytes(&v3.eh_frame_hdr),
+                eh_frame_hash: hash_bytes(&v3.eh_frame),
+            });
+        }
+    }
+
+    for entries in result.values_mut() {
+        entries.sort();
+    }
+
+    Ok(result)
+}
+
+/// Collect per-pid entry lists in sorted PID order.
+/// The Nth process from one run is matched against the Nth process from the other.
+fn sorted_by_pid<T>(by_pid: BTreeMap<i32, Vec<T>>) -> Vec<(i32, Vec<T>)> {
+    // BTreeMap iterates in sorted key order, so this is deterministic
+    by_pid.into_iter().collect()
+}
+
+/// Format a process label showing the positional index and both PIDs.
+fn proc_label(idx: usize, old_pid: Option<i32>, new_pid: Option<i32>) -> String {
+    match (old_pid, new_pid) {
+        (Some(op), Some(np)) => format!("process #{idx} (old pid {op}, new pid {np})"),
+        (Some(op), None) => format!("process #{idx} (old pid {op})"),
+        (None, Some(np)) => format!("process #{idx} (new pid {np})"),
+        (None, None) => format!("process #{idx}"),
+    }
+}
+
+fn group_entries_by_path(entries: &[NormalizedEntry]) -> BTreeMap<&str, Vec<&NormalizedEntry>> {
+    let mut map: BTreeMap<&str, Vec<&NormalizedEntry>> = BTreeMap::new();
+    for e in entries {
+        map.entry(&e.path).or_default().push(e);
+    }
+    map
+}
+
+/// Extract the filename from a full module path for compact display.
+fn short_module_name(path: &str) -> &str {
+    path.rsplit('/').next().unwrap_or(path)
+}
+
+// ---------------------------------------------------------------------------
+// Perf map comparison
+// ---------------------------------------------------------------------------
+
+/// A single symbol entry from a perf map (address already includes load_bias).
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
+struct PerfMapSymbol {
+    addr: u64,
+    size: u64,
+    name: String,
+}
+
+/// Parse a perf map file (one `{addr:x} {size:x} {name}` per line).
+fn parse_perf_map(path: &Path) -> Result<Vec<PerfMapSymbol>> {
+    let file = std::fs::File::open(path)?;
+    let reader = std::io::BufReader::new(file);
+    let mut symbols = Vec::new();
+
+    for line in reader.lines() {
+        let line = line?;
+        let line = line.trim();
+        if line.is_empty() {
+            continue;
+        }
+        // Format: "{addr:x} {size:x} {name}"
+        let mut parts = line.splitn(3, ' ');
+        let addr = u64::from_str_radix(parts.next().context("missing addr")?, 16)
+            .context("invalid addr hex")?;
+        let size = u64::from_str_radix(parts.next().context("missing size")?, 16)
+            .context("invalid size hex")?;
+        let name = parts.next().context("missing name")?.to_string();
+
+        symbols.push(PerfMapSymbol { addr, size, name });
+    }
+
+    symbols.sort();
+    Ok(symbols)
+}
+
+/// Load old-format perf maps: `perf-{pid}.map` files with biased addresses.
+fn load_old_perf_maps(dir: &Path) -> Result<BTreeMap<i32, Vec<PerfMapSymbol>>> {
+    let mut result: BTreeMap<i32, Vec<PerfMapSymbol>> = BTreeMap::new();
+
+    for entry in std::fs::read_dir(dir)? {
+        let entry = entry?;
+        let name = entry.file_name().to_string_lossy().to_string();
+        // Match perf-{pid}.map
+        let Some(stem) = name
+            .strip_prefix("perf-")
+            .and_then(|s| s.strip_suffix(".map"))
+        else {
+            continue;
+        };
+        let pid: i32 = stem
+            .parse()
+            .context(format!("Invalid pid in perf map filename {name}"))?;
+
+        let symbols = parse_perf_map(&entry.path()).context(format!("Failed to parse {name}"))?;
+        result.entry(pid).or_default().extend(symbols);
+    }
+
+    for entries in result.values_mut() {
+        entries.sort();
+    }
+
+    Ok(result)
+}
+
+/// Load new-format perf maps: `{key}.perf.map` files (raw addresses) +
+/// `symbol_pid_mappings_by_pid` from metadata with per-pid load_bias.
+fn load_new_perf_maps(
+    dir: &Path,
+    metadata: &PerfMetadata,
+) -> Result<BTreeMap<i32, Vec<PerfMapSymbol>>> {
+    // Load perf map files keyed by stem (raw ELF addresses, no bias)
+    let mut maps_by_key: HashMap<String, Vec<PerfMapSymbol>> = HashMap::new();
+    for entry in std::fs::read_dir(dir)? {
+        let entry = entry?;
+        let name = entry.file_name().to_string_lossy().to_string();
+        let Some(stem) = name.strip_suffix(&format!(".{SYMBOLS_MAP_SUFFIX}")) else {
+            continue;
+        };
+        let symbols = parse_perf_map(&entry.path()).context(format!("Failed to parse {name}"))?;
+        maps_by_key.insert(stem.to_string(), symbols);
+    }
+
+    // Combine with per-pid metadata to apply load_bias
+    let mut result: BTreeMap<i32, Vec<PerfMapSymbol>> = BTreeMap::new();
+
+    for (pid, mappings) in &metadata.symbol_pid_mappings_by_pid {
+        let entries = result.entry(*pid).or_default();
+        for mapping in mappings {
+            let raw_symbols = maps_by_key.get(&mapping.perf_map_key).with_context(|| {
+                format!(
+                    "Missing perf map file for key {} (pid {})",
+                    mapping.perf_map_key, pid
+                )
+            })?;
+
+            for sym in raw_symbols {
+                entries.push(PerfMapSymbol {
+                    addr: sym.addr.wrapping_add(mapping.load_bias),
+                    size: sym.size,
+                    name: sym.name.clone(),
+                });
+            }
+        }
+    }
+
+    for entries in result.values_mut() {
+        entries.sort();
+    }
+
+    Ok(result)
+}
+
+fn compare_perf_maps(
+    old: &[(i32, Vec<PerfMapSymbol>)],
+    new: &[(i32, Vec<PerfMapSymbol>)],
+) -> usize {
+    let mut differences = 0;
+
+    if old.len() != new.len() {
+        println!(
+            "  DIFF: old has {} processes, new has {}",
+            old.len(),
+            new.len()
+        );
+        differences += 1;
+    }
+
+    let max_procs = old.len().max(new.len());
+    for proc_idx in 0..max_procs {
+        let old_pid = old.get(proc_idx).map(|(pid, _)| *pid);
+        let new_pid = new.get(proc_idx).map(|(pid, _)| *pid);
+        let label = proc_label(proc_idx, old_pid, new_pid);
+        match (old.get(proc_idx), new.get(proc_idx)) {
+            (Some((_, old_syms)), Some((_, new_syms))) => {
+                // Build sets keyed by (name, addr, size) for comparison
+                let old_set: BTreeSet<_> =
+                    old_syms.iter().map(|s| (&s.name, s.addr, s.size)).collect();
+                let new_set: BTreeSet<_> =
+                    new_syms.iter().map(|s| (&s.name, s.addr, s.size)).collect();
+
+                let only_old: Vec<_> = old_set.difference(&new_set).collect();
+                let only_new: Vec<_> = new_set.difference(&old_set).collect();
+
+                if only_old.is_empty() && only_new.is_empty() {
+                    continue;
+                }
+
+                println!(
+                    "  {label}: {} symbols in old, {} in new",
+                    old_syms.len(),
+                    new_syms.len()
+                );
+
+                if !only_old.is_empty() {
+                    println!("    {} symbols only in old", only_old.len());
+                    differences += 1;
+                }
+                if !only_new.is_empty() {
+                    println!("    {} symbols only in new", only_new.len());
+                    differences += 1;
+                }
+            }
+            (Some(_), None) => {
+                println!("  {label}: only in old");
+                differences += 1;
+            }
+            (None, Some(_)) => {
+                println!("  {label}: only in new");
+                differences += 1;
+            }
+            (None, None) => unreachable!(),
+        }
+    }
+
+    if differences == 0 {
+        println!("  No differences found");
+    }
+
+    differences
+}
+
+// ---------------------------------------------------------------------------
+// Debug info comparison
+// ---------------------------------------------------------------------------
+
+/// A normalized debug info entry for comparison (with load_bias applied).
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
+struct NormalizedDebugInfoEntry {
+    object_path: String,
+    load_bias: u64,
+    addr_bounds: (u64, u64),
+    symbols: Vec<NormalizedDebugSymbol>,
+}
+
+#[derive(Debug, PartialEq, Eq, PartialOrd, Ord)]
+struct NormalizedDebugSymbol {
+    addr: u64,
+    size: u64,
+    name: String,
+    file: String,
+    line: Option<u32>,
+}
+
+/// Old metadata format that still has `debug_info_by_pid`.
+#[derive(serde::Deserialize)]
+struct OldPerfMetadata {
+    #[serde(default)]
+    debug_info_by_pid: HashMap<i32, Vec<ModuleDebugInfo>>,
+}
+
+/// Load debug info from old format: `debug_info_by_pid` field in metadata.
+fn load_old_debug_info(dir: &Path) -> Result<BTreeMap<i32, Vec<NormalizedDebugInfoEntry>>> {
+    let metadata_path = dir.join("perf.metadata");
+    let file = std::fs::File::open(&metadata_path).context("Failed to open old perf.metadata")?;
+    let old_metadata: OldPerfMetadata =
+        serde_json::from_reader(file).context("Failed to parse old perf.metadata")?;
+
+    Ok(old_metadata
+        .debug_info_by_pid
+        .iter()
+        .map(|(pid, modules)| {
+            let mut entries: Vec<_> = modules
+                .iter()
+                .map(|m| NormalizedDebugInfoEntry {
+                    object_path: m.object_path.clone(),
+                    load_bias: m.load_bias,
+                    addr_bounds: m.addr_bounds,
+                    symbols: m
+                        .debug_infos
+                        .iter()
+                        .map(|d| NormalizedDebugSymbol {
+                            addr: d.addr,
+                            size: d.size,
+                            name: d.name.clone(),
+                            file: d.file.clone(),
+                            line: d.line,
+                        })
+                        .sorted()
+                        .collect(),
+                })
+                .collect();
+            entries.sort();
+            (*pid, entries)
+        })
+        .collect())
+}
+
+/// Load debug info from new format: `debug_info` HashMap + `debug_info_pid_mappings_by_pid`.
+fn load_new_debug_info(metadata: &PerfMetadata) -> BTreeMap<i32, Vec<NormalizedDebugInfoEntry>> {
+    metadata
+        .debug_info_pid_mappings_by_pid
+        .iter()
+        .map(|(pid, mappings)| {
+            let mut entries: Vec<_> = mappings
+                .iter()
+                .filter_map(|mapping| {
+                    let module = metadata.debug_info.get(&mapping.debug_info_key)?;
+                    Some(NormalizedDebugInfoEntry {
+                        object_path: module.object_path.clone(),
+                        load_bias: mapping.load_bias,
+                        addr_bounds: module.addr_bounds,
+                        symbols: module
+                            .debug_infos
+                            .iter()
+                            .map(|d| NormalizedDebugSymbol {
+                                addr: d.addr,
+                                size: d.size,
+                                name: d.name.clone(),
+                                file: d.file.clone(),
+                                line: d.line,
+                            })
+                            .sorted()
+                            .collect(),
+                    })
+                })
+                .collect();
+            entries.sort();
+            (*pid, entries)
+        })
+        .collect()
+}
+
+fn compare_debug_info(
+    old: &[(i32, Vec<NormalizedDebugInfoEntry>)],
+    new: &[(i32, Vec<NormalizedDebugInfoEntry>)],
+) -> usize {
+    let mut differences = 0;
+
+    if old.len() != new.len() {
+        println!(
+            "  DIFF: old has {} processes, new has {}",
+            old.len(),
+            new.len()
+        );
+        differences += 1;
+    }
+
+    let max_procs = old.len().max(new.len());
+    for proc_idx in 0..max_procs {
+        let old_pid = old.get(proc_idx).map(|(pid, _)| *pid);
+        let new_pid = new.get(proc_idx).map(|(pid, _)| *pid);
+        let label = proc_label(proc_idx, old_pid, new_pid);
+        match (old.get(proc_idx), new.get(proc_idx)) {
+            (Some((_, old_entries)), Some((_, new_entries))) => {
+                let old_set: BTreeSet<_> = old_entries.iter().map(|e| &e.object_path).collect();
+                let new_set: BTreeSet<_> = new_entries.iter().map(|e| &e.object_path).collect();
+
+                let only_old: Vec<_> = old_set.difference(&new_set).collect();
+                let only_new: Vec<_> = new_set.difference(&old_set).collect();
+
+                if !only_old.is_empty() {
+                    println!("  {label}: {} modules only in old:", only_old.len());
+                    for path in &only_old {
+                        println!("    {}", short_module_name(path));
+                    }
+                    differences += 1;
+                }
+                if !only_new.is_empty() {
+                    println!("  {label}: {} modules only in new:", only_new.len());
+                    for path in &only_new {
+                        println!("    {}", short_module_name(path));
+                    }
+                    differences += 1;
+                }
+
+                // Compare matching modules
+                for old_entry in old_entries {
+                    if let Some(new_entry) = new_entries
+                        .iter()
+                        .find(|e| e.object_path == old_entry.object_path)
+                    {
+                        if old_entry != new_entry {
+                            let short = short_module_name(&old_entry.object_path);
+                            if old_entry.load_bias != new_entry.load_bias {
+                                println!(
+                                    "  {label}: {short}: load_bias mismatch: old={:x} new={:x}",
+                                    old_entry.load_bias, new_entry.load_bias
+                                );
+                            }
+                            if old_entry.addr_bounds != new_entry.addr_bounds {
+                                println!(
+                                    "  {label}: {short}: addr_bounds mismatch: old={:x?} new={:x?}",
+                                    old_entry.addr_bounds, new_entry.addr_bounds
+                                );
+                            }
+                            if old_entry.symbols != new_entry.symbols {
+                                println!(
+                                    "  {label}: {short}: {} symbols in old, {} in new",
+                                    old_entry.symbols.len(),
+                                    new_entry.symbols.len()
+                                );
+                            }
+                            differences += 1;
+                        }
+                    }
+                }
+            }
+            (Some(_), None) => {
+                println!("  {label}: only in old");
+                differences += 1;
+            }
+            (None, Some(_)) => {
+                println!("  {label}: only in new");
+                differences += 1;
+            }
+            (None, None) => unreachable!(),
+        }
+    }
+
+    if differences == 0 {
+        println!("  No differences found");
+    }
+
+    differences
+}
+
+/// Find the two most recent /tmp/profile.*.out directories and auto-detect which is old/new
+/// based on whether the directory contains `0.unwind_data` (new format).
+fn find_profile_dirs() -> Result<(std::path::PathBuf, std::path::PathBuf)> {
+    let mut dirs: Vec<_> = std::fs::read_dir("/tmp")?
+        .filter_map(|e| e.ok())
+        .filter(|e| {
+            let name = e.file_name().to_string_lossy().to_string();
+            name.starts_with("profile.") && name.ends_with(".out") && e.path().is_dir()
+        })
+        .collect();
+
+    // Sort by modification time, most recent last
+    dirs.sort_by_key(|e| {
+        e.metadata()
+            .and_then(|m| m.modified())
+            .unwrap_or(std::time::SystemTime::UNIX_EPOCH)
+    });
+
+    if dirs.len() < 2 {
+        anyhow::bail!(
+            "Found {} /tmp/profile.*.out directories, need at least 2",
+            dirs.len()
+        );
+    }
+
+    let dir_a = dirs.pop().unwrap().path();
+    let dir_b = dirs.pop().unwrap().path();
+
+    // The new format has .unwind_data files with non-numeric stems (semantic keys).
+    // The old format has {pid}_{start}_{end}_{ts}.unwind_data files.
+    let has_new_format_unwind = |dir: &Path| -> bool {
+        std::fs::read_dir(dir)
+            .ok()
+            .map(|entries| {
+                entries.filter_map(|e| e.ok()).any(|e| {
+                    let name = e.file_name().to_string_lossy().to_string();
+                    if let Some(stem) = name.strip_suffix(&format!(".{UNWIND_FILE_EXT}")) {
+                        // New format: stem is NOT a plain number and NOT an old pid_start_end_ts format
+                        stem.parse::<usize>().is_err() && !stem.contains('_')
+                    } else {
+                        false
+                    }
+                })
+            })
+            .unwrap_or(false)
+    };
+
+    let a_is_new = has_new_format_unwind(&dir_a);
+    let b_is_new = has_new_format_unwind(&dir_b);
+
+    match (a_is_new, b_is_new) {
+        (true, false) => Ok((dir_b, dir_a)),
+        (false, true) => Ok((dir_a, dir_b)),
+        _ => anyhow::bail!(
+            "Cannot auto-detect old/new: both or neither contain semantic .{UNWIND_FILE_EXT} files\n  {}\n  {}",
+            dir_a.display(),
+            dir_b.display()
+        ),
+    }
+}
+
+fn main() -> Result<()> {
+    let args: Vec<String> = std::env::args().collect();
+    let (old_path, new_path);
+
+    if args.len() == 3 {
+        old_path = std::path::PathBuf::from(&args[1]);
+        new_path = std::path::PathBuf::from(&args[2]);
+    } else if args.len() == 1 {
+        let (o, n) = find_profile_dirs()?;
+        old_path = o;
+        new_path = n;
+    } else {
+        eprintln!("Usage: {} [<old_profile_dir> <new_profile_dir>]", args[0]);
+        eprintln!(
+            "  If no arguments given, auto-detects from the two most recent /tmp/profile.*.out dirs."
+        );
+        std::process::exit(1);
+    }
+
+    let old_dir = old_path.as_path();
+    let new_dir = new_path.as_path();
+
+    // Load metadata for the new format (needed by both unwind and perf map loaders)
+    let metadata_path = new_dir.join("perf.metadata");
+    let metadata_file =
+        std::fs::File::open(&metadata_path).context("Failed to open perf.metadata")?;
+    let metadata: PerfMetadata = PerfMetadata::from_reader(metadata_file)?;
+
+    let mut differences = 0;
+
+    // -----------------------------------------------------------------------
+    // Unwind data comparison
+    // -----------------------------------------------------------------------
+    println!("=== Unwind Data ===");
+
+    println!("Loading old format from: {}", old_dir.display());
+    let old_by_pid = load_old_format(old_dir)?;
+    println!(
+        "  {} PIDs, {} total entries",
+        old_by_pid.len(),
+        old_by_pid.values().map(|v| v.len()).sum::<usize>()
+    );
+
+    println!("Loading new format from: {}", new_dir.display());
+    let new_by_pid = load_new_format(new_dir, &metadata)?;
+    println!(
+        "  {} PIDs, {} total entries",
+        new_by_pid.len(),
+        new_by_pid.values().map(|v| v.len()).sum::<usize>()
+    );
+
+    let old = sorted_by_pid(old_by_pid);
+    let new = sorted_by_pid(new_by_pid);
+
+    if old.len() != new.len() {
+        println!(
+            "  DIFF: old has {} processes, new has {}",
+            old.len(),
+            new.len()
+        );
+        differences += 1;
+    }
+
+    let max_procs = old.len().max(new.len());
+    for proc_idx in 0..max_procs {
+        let old_pid = old.get(proc_idx).map(|(pid, _)| *pid);
+        let new_pid = new.get(proc_idx).map(|(pid, _)| *pid);
+        let label = proc_label(proc_idx, old_pid, new_pid);
+        match (old.get(proc_idx), new.get(proc_idx)) {
+            (Some((_, old_entries)), Some((_, new_entries))) => {
+                // Group by module path
+                let old_by_mod = group_entries_by_path(old_entries);
+                let new_by_mod = group_entries_by_path(new_entries);
+
+                let all_modules: BTreeSet<&str> = old_by_mod
+                    .keys()
+                    .chain(new_by_mod.keys())
+                    .copied()
+                    .collect();
+
+                for module in all_modules {
+                    let old_mod = old_by_mod.get(module);
+                    let new_mod = new_by_mod.get(module);
+                    match (old_mod, new_mod) {
+                        (Some(_), None) => {
+                            println!("  {label}: only in old: {module}");
+                            differences += 1;
+                        }
+                        (None, Some(_)) => {
+                            println!("  {label}: only in new: {module}");
+                            differences += 1;
+                        }
+                        (Some(old_es), Some(new_es)) => {
+                            if old_es != new_es {
+                                let short = short_module_name(module);
+                                println!("  {label}: mismatch in {short}:");
+                                for &e in old_es {
+                                    if !new_es.contains(&e) {
+                                        println!(
+                                            "    old only: avma={:x}-{:x} base_avma={:x}",
+                                            e.avma_start, e.avma_end, e.base_avma
+                                        );
+                                    }
+                                }
+                                for &e in new_es {
+                                    if !old_es.contains(&e) {
+                                        println!(
+                                            "    new only: avma={:x}-{:x} base_avma={:x}",
+                                            e.avma_start, e.avma_end, e.base_avma
+                                        );
+                                    }
+                                }
+                                differences += 1;
+                            }
+                        }
+                        (None, None) => unreachable!(),
+                    }
+                }
+            }
+            (Some(_), None) => {
+                println!("  {label}: only in old");
+                differences += 1;
+            }
+            (None, Some(_)) => {
+                println!("  {label}: only in new");
+                differences += 1;
+            }
+            (None, None) => unreachable!(),
+        }
+    }
+    if differences == 0 {
+        println!("  No differences found");
+    }
+
+    // -----------------------------------------------------------------------
+    // Perf map comparison
+    // -----------------------------------------------------------------------
+    println!("\n=== Perf Maps ===");
+
+    println!("Loading old perf maps from: {}", old_dir.display());
+    let old_perf_by_pid = load_old_perf_maps(old_dir)?;
+    println!(
+        "  {} PIDs, {} total symbols",
+        old_perf_by_pid.len(),
+        old_perf_by_pid.values().map(|v| v.len()).sum::<usize>()
+    );
+
+    println!("Loading new perf maps from: {}", new_dir.display());
+    let new_perf_by_pid = load_new_perf_maps(new_dir, &metadata)?;
+    println!(
+        "  {} PIDs, {} total symbols",
+        new_perf_by_pid.len(),
+        new_perf_by_pid.values().map(|v| v.len()).sum::<usize>()
+    );
+
+    let old_perf = sorted_by_pid(old_perf_by_pid);
+    let new_perf = sorted_by_pid(new_perf_by_pid);
+
+    differences += compare_perf_maps(&old_perf, &new_perf);
+
+    // -----------------------------------------------------------------------
+    // Debug info comparison
+    // -----------------------------------------------------------------------
+    println!("\n=== Debug Info ===");
+
+    println!("Loading old debug info from: {}", old_dir.display());
+    let old_debug_by_pid = load_old_debug_info(old_dir)?;
+    println!(
+        "  {} PIDs, {} total modules",
+        old_debug_by_pid.len(),
+        old_debug_by_pid.values().map(|v| v.len()).sum::<usize>()
+    );
+
+    println!("Loading new debug info from: {}", new_dir.display());
+    println!(
+        "  (metadata has {} debug_info entries, {} pids in debug_info_pid_mappings_by_pid)",
+        metadata.debug_info.len(),
+        metadata.debug_info_pid_mappings_by_pid.len()
+    );
+    let new_debug_by_pid = load_new_debug_info(&metadata);
+    println!(
+        "  {} PIDs, {} total modules",
+        new_debug_by_pid.len(),
+        new_debug_by_pid.values().map(|v| v.len()).sum::<usize>()
+    );
+
+    let old_debug = sorted_by_pid(old_debug_by_pid);
+    let new_debug = sorted_by_pid(new_debug_by_pid);
+
+    differences += compare_debug_info(&old_debug, &new_debug);
+
+    // -----------------------------------------------------------------------
+    // Summary
+    // -----------------------------------------------------------------------
+    println!(
+        "\n=== Folder sizes ===\n  old: {}\n  new: {}",
+        format_dir_size(old_dir)?,
+        format_dir_size(new_dir)?
+    );
+
+    if differences == 0 {
+        println!("\nOK: Both formats produce identical output.");
+    } else {
+        println!("\nFOUND {differences} difference(s).");
+        std::process::exit(1);
+    }
+
+    Ok(())
+}
+
+fn dir_size(dir: &Path) -> Result<u64> {
+    let mut total = 0;
+    for entry in std::fs::read_dir(dir)? {
+        let entry = entry?;
+        let meta = entry.metadata()?;
+        if meta.is_file() {
+            total += meta.len();
+        }
+    }
+    Ok(total)
+}
+
+fn format_dir_size(dir: &Path) -> Result<String> {
+    let bytes = dir_size(dir)?;
+    let (val, unit) = if bytes >= 1024 * 1024 {
+        (bytes as f64 / (1024.0 * 1024.0), "MB")
+    } else if bytes >= 1024 {
+        (bytes as f64 / 1024.0, "KB")
+    } else {
+        (bytes as f64, "B")
+    };
+    Ok(format!("{val:.1} {unit} ({})", dir.display()))
+}
diff --git a/src/executor/shared/fifo.rs b/src/executor/shared/fifo.rs
index 723f921c..39b9533b 100644
--- a/src/executor/shared/fifo.rs
+++ b/src/executor/shared/fifo.rs
@@ -69,6 +69,15 @@ impl GenericFifo {
 pub struct FifoBenchmarkData {
     /// Name and version of the integration
     pub integration: Option<(String, String)>,
+    pub bench_pids: HashSet<pid_t>,
+}
+
+impl FifoBenchmarkData {
+    pub fn is_exec_harness(&self) -> bool {
+        self.integration
+            .as_ref()
+            .is_some_and(|(name, _)| name == "exec-harness")
+    }
 }
 
 pub struct RunnerFifo {
@@ -254,7 +263,10 @@ impl RunnerFifo {
                     );
                     let marker_result =
                         ExecutionTimestamps::new(&bench_order_by_timestamp, &markers);
-                    let fifo_data = FifoBenchmarkData { integration };
+                    let fifo_data = FifoBenchmarkData {
+                        integration,
+                        bench_pids,
+                    };
                     return Ok((marker_result, fifo_data, exit_status));
                 }
                 Err(e) => return Err(anyhow::Error::from(e)),
diff --git a/src/executor/wall_time/perf/debug_info.rs b/src/executor/wall_time/perf/debug_info.rs
index e6dd737e..3dfde863 100644
--- a/src/executor/wall_time/perf/debug_info.rs
+++ b/src/executor/wall_time/perf/debug_info.rs
@@ -2,13 +2,18 @@ use crate::executor::wall_time::perf::perf_map::ModuleSymbols;
 use crate::prelude::*;
 use addr2line::{fallible_iterator::FallibleIterator, gimli};
 use object::{Object, ObjectSection};
+use rayon::prelude::*;
 use runner_shared::debug_info::{DebugInfo, ModuleDebugInfo};
 use std::path::Path;
 
 type EndianRcSlice = gimli::EndianRcSlice<gimli::RunTimeEndian>;
 
 pub trait ModuleDebugInfoExt {
-    fn from_symbols<P: AsRef<Path>>(path: P, symbols: &ModuleSymbols) -> anyhow::Result<Self>
+    fn from_symbols<P: AsRef<Path>>(
+        path: P,
+        symbols: &ModuleSymbols,
+        load_bias: u64,
+    ) -> anyhow::Result<Self>
     where
         Self: Sized;
 
@@ -36,12 +41,15 @@ pub trait ModuleDebugInfoExt {
 
 impl ModuleDebugInfoExt for ModuleDebugInfo {
     /// Create debug info from existing symbols by looking up file/line in DWARF
-    fn from_symbols<P: AsRef<Path>>(path: P, symbols: &ModuleSymbols) -> anyhow::Result<Self> {
+    fn from_symbols<P: AsRef<Path>>(
+        path: P,
+        symbols: &ModuleSymbols,
+        load_bias: u64,
+    ) -> anyhow::Result<Self> {
         let content = std::fs::read(path.as_ref())?;
         let object = object::File::parse(&*content)?;
 
         let ctx = Self::create_dwarf_context(&object).context("Failed to create DWARF context")?;
-        let load_bias = symbols.load_bias();
         let (mut min_addr, mut max_addr) = (None, None);
         let debug_infos = symbols
             .symbols()
@@ -96,34 +104,26 @@ impl ModuleDebugInfoExt for ModuleDebugInfo {
     }
 }
 
-/// Represents all the modules inside a process and their debug info.
-pub struct ProcessDebugInfo {
-    modules: Vec<ModuleDebugInfo>,
-}
-
-impl ProcessDebugInfo {
-    pub fn new(
-        process_symbols: &crate::executor::wall_time::perf::perf_map::ProcessSymbols,
-    ) -> Self {
-        let mut modules = Vec::new();
-        for (path, module_symbols) in process_symbols.modules_with_symbols() {
-            match ModuleDebugInfo::from_symbols(path, module_symbols) {
-                Ok(module_debug_info) => {
-                    modules.push(module_debug_info);
-                }
+/// Compute debug info once per unique ELF path from deduplicated symbols.
+/// Returns a map of path -> ModuleDebugInfo with `load_bias: 0` (load bias is per-pid).
+pub fn debug_info_by_path(
+    symbols_by_path: &std::collections::HashMap<
+        std::path::PathBuf,
+        crate::executor::wall_time::perf::perf_map::ModuleSymbols,
+    >,
+) -> std::collections::HashMap<std::path::PathBuf, ModuleDebugInfo> {
+    symbols_by_path
+        .par_iter()
+        .filter_map(|(path, module_symbols)| {
+            match ModuleDebugInfo::from_symbols(path, module_symbols, 0) {
+                Ok(module_debug_info) => Some((path.clone(), module_debug_info)),
                 Err(error) => {
-                    trace!("Failed to load debug info for module {path:?}: {error}");
+                    trace!("Failed to load debug info for module {path:?}: {error}",);
+                    None
                 }
             }
-        }
-
-        Self { modules }
-    }
-
-    /// Returns the debug info modules for this process
-    pub fn modules(self) -> Vec<ModuleDebugInfo> {
-        self.modules
-    }
+        })
+        .collect()
 }
 
 #[cfg(test)]
@@ -141,8 +141,12 @@ mod tests {
             file_offset,
         )
         .unwrap();
-        let module_debug_info =
-            ModuleDebugInfo::from_symbols("testdata/perf_map/go_fib.bin", &module_symbols).unwrap();
+        let module_debug_info = ModuleDebugInfo::from_symbols(
+            "testdata/perf_map/go_fib.bin",
+            &module_symbols,
+            module_symbols.load_bias(),
+        )
+        .unwrap();
         insta::assert_debug_snapshot!(module_debug_info.debug_infos);
     }
 
@@ -160,6 +164,7 @@ mod tests {
         let mut module_debug_info = ModuleDebugInfo::from_symbols(
             "testdata/perf_map/cpp_my_benchmark.bin",
             &module_symbols,
+            module_symbols.load_bias(),
         )
         .unwrap();
 
@@ -176,7 +181,8 @@ mod tests {
             ModuleSymbols::new(MODULE_PATH, 0x00005555555a2000, 0x0000555555692000, 0x4d000)
                 .unwrap();
         let module_debug_info =
-            ModuleDebugInfo::from_symbols(MODULE_PATH, &module_symbols).unwrap();
+            ModuleDebugInfo::from_symbols(MODULE_PATH, &module_symbols, module_symbols.load_bias())
+                .unwrap();
         insta::assert_debug_snapshot!(module_debug_info.debug_infos);
     }
 
@@ -192,7 +198,8 @@ mod tests {
         )
         .unwrap();
         let module_debug_info =
-            ModuleDebugInfo::from_symbols(MODULE_PATH, &module_symbols).unwrap();
+            ModuleDebugInfo::from_symbols(MODULE_PATH, &module_symbols, module_symbols.load_bias())
+                .unwrap();
         insta::assert_debug_snapshot!(module_debug_info.debug_infos);
     }
 
@@ -205,7 +212,8 @@ mod tests {
         let module_symbols =
             ModuleSymbols::new(MODULE_PATH, start_addr, end_addr, file_offset).unwrap();
         let module_debug_info =
-            ModuleDebugInfo::from_symbols(MODULE_PATH, &module_symbols).unwrap();
+            ModuleDebugInfo::from_symbols(MODULE_PATH, &module_symbols, module_symbols.load_bias())
+                .unwrap();
         insta::assert_debug_snapshot!(module_debug_info.debug_infos);
     }
 }
diff --git a/src/executor/wall_time/perf/jit_dump.rs b/src/executor/wall_time/perf/jit_dump.rs
index 9f92c4f4..324312f4 100644
--- a/src/executor/wall_time/perf/jit_dump.rs
+++ b/src/executor/wall_time/perf/jit_dump.rs
@@ -5,10 +5,12 @@ use crate::{
 use linux_perf_data::jitdump::{JitDumpReader, JitDumpRecord};
 use runner_shared::unwind_data::UnwindData;
 use std::{
-    collections::HashSet,
+    collections::{HashMap, HashSet},
     path::{Path, PathBuf},
 };
 
+use super::parse_perf_file::{SymbolPidMappingWithFullPath, UnwindDataPidMappingWithFullPath};
+
 struct JitDump {
     path: PathBuf,
 }
@@ -42,16 +44,22 @@ impl JitDump {
         Ok(ModuleSymbols::from_symbols(symbols))
     }
 
-    /// Parses the JIT dump file and converts it into a list of `UnwindData`.
+    /// Parses the JIT dump file and converts it into deduplicated unwind data + pid mappings.
     ///
     /// The JIT dump file contains synthetic `eh_frame` data for jitted functions. This can be parsed and
-    /// then converted to `UnwindData` which is used for stack unwinding.
+    /// then converted to `UnwindData` + `UnwindDataPidMappingWithFullPath` which is used for stack unwinding.
     ///
     /// See: https://github.com/python/cpython/blob/main/Python/perf_jit_trampoline.c
-    pub fn into_unwind_data(self) -> Result<Vec<UnwindData>> {
+    pub fn into_unwind_data(
+        self,
+    ) -> Result<(
+        HashMap<String, UnwindData>,
+        Vec<UnwindDataPidMappingWithFullPath>,
+    )> {
         let file = std::fs::File::open(self.path)?;
 
-        let mut jit_unwind_data = Vec::new();
+        let mut unwind_data_by_path = HashMap::new();
+        let mut pid_mappings = Vec::new();
         let mut current_unwind_info: Option<(Vec<u8>, Vec<u8>)> = None;
 
         let mut reader = JitDumpReader::new(file)?;
@@ -72,16 +80,23 @@ impl JitDump {
                         continue;
                     };
 
-                    jit_unwind_data.push(UnwindData {
-                        path: format!("jit_{name}"),
+                    let path = format!("jit_{name}");
+                    unwind_data_by_path.insert(
+                        path.clone(),
+                        UnwindData {
+                            path: path.clone(),
+                            base_svma: 0,
+                            eh_frame_hdr,
+                            eh_frame_hdr_svma: 0..0,
+                            eh_frame,
+                            eh_frame_svma: 0..0,
+                        },
+                    );
+                    pid_mappings.push(UnwindDataPidMappingWithFullPath {
+                        path,
                         timestamp: Some(raw_record.timestamp),
                         avma_range: avma_start..avma_end,
                         base_avma: 0,
-                        eh_frame_hdr,
-                        eh_frame_hdr_svma: 0..0,
-                        eh_frame,
-                        eh_frame_svma: 0..0,
-                        base_svma: 0,
                     });
                 }
                 JitDumpRecord::CodeUnwindingInfo(record) => {
@@ -97,15 +112,33 @@ impl JitDump {
             }
         }
 
-        Ok(jit_unwind_data)
+        Ok((unwind_data_by_path, pid_mappings))
     }
 }
 
-/// Converts all the `jit-<pid>.dump` into unwind data and copies it to the profile folder.
+/// Converts all the `jit-<pid>.dump` into symbols, unwind data, and perf maps.
+///
+/// Returns:
+/// - JIT symbols deduplicated by path
+/// - Per-pid symbol mappings (load_bias = 0 for JIT)
+/// - JIT unwind data deduplicated by path
+/// - Per-pid unwind mappings
 pub async fn harvest_perf_jit_for_pids(
     profile_folder: &Path,
     pids: &HashSet<libc::pid_t>,
-) -> Result<()> {
+) -> Result<(
+    HashMap<PathBuf, ModuleSymbols>,
+    HashMap<i32, Vec<SymbolPidMappingWithFullPath>>,
+    HashMap<String, UnwindData>,
+    HashMap<i32, Vec<UnwindDataPidMappingWithFullPath>>,
+)> {
+    let mut all_symbols_by_path = HashMap::new();
+    let mut all_symbol_mappings_by_pid: HashMap<i32, Vec<SymbolPidMappingWithFullPath>> =
+        HashMap::new();
+    let mut all_unwind_data = HashMap::new();
+    let mut all_unwind_data_mappings_by_pid: HashMap<i32, Vec<UnwindDataPidMappingWithFullPath>> =
+        HashMap::new();
+
     for pid in pids {
         let name = format!("jit-{pid}.dump");
         let path = PathBuf::from("/tmp").join(&name);
@@ -115,7 +148,8 @@ pub async fn harvest_perf_jit_for_pids(
         }
         debug!("Found JIT dump file: {path:?}");
 
-        // Append the symbols to the existing perf map file
+        // Extract JIT symbols into deduplicated structure
+        let jit_key = PathBuf::from(format!("jit-{pid}"));
         let symbols = match JitDump::new(path.clone()).into_perf_map() {
             Ok(symbols) => symbols,
             Err(error) => {
@@ -123,20 +157,39 @@ pub async fn harvest_perf_jit_for_pids(
                 continue;
             }
         };
+
+        // Also write to perf-<pid>.map for harvested Python perf maps compatibility
         symbols.append_to_file(profile_folder.join(format!("perf-{pid}.map")))?;
 
-        let unwind_data = match JitDump::new(path).into_unwind_data() {
-            Ok(unwind_data) => unwind_data,
+        all_symbols_by_path.insert(jit_key.clone(), symbols);
+        all_symbol_mappings_by_pid
+            .entry(*pid)
+            .or_default()
+            .push(SymbolPidMappingWithFullPath {
+                path: jit_key,
+                load_bias: 0,
+                mappings: vec![],
+            });
+
+        let (unwind_data, pid_mappings) = match JitDump::new(path).into_unwind_data() {
+            Ok(data) => data,
             Err(error) => {
                 warn!("Failed to convert jit dump into unwind data: {error:?}");
                 continue;
             }
         };
 
-        for module in unwind_data {
-            module.save_to(profile_folder, *pid)?;
-        }
+        all_unwind_data.extend(unwind_data);
+        all_unwind_data_mappings_by_pid
+            .entry(*pid)
+            .or_default()
+            .extend(pid_mappings);
     }
 
-    Ok(())
+    Ok((
+        all_symbols_by_path,
+        all_symbol_mappings_by_pid,
+        all_unwind_data,
+        all_unwind_data_mappings_by_pid,
+    ))
 }
diff --git a/src/executor/wall_time/perf/mod.rs b/src/executor/wall_time/perf/mod.rs
index 6792ba99..d258e1d4 100644
--- a/src/executor/wall_time/perf/mod.rs
+++ b/src/executor/wall_time/perf/mod.rs
@@ -10,8 +10,6 @@ use crate::executor::helpers::run_with_sudo::run_with_sudo;
 use crate::executor::helpers::run_with_sudo::wrap_with_sudo;
 use crate::executor::shared::fifo::FifoBenchmarkData;
 use crate::executor::shared::fifo::RunnerFifo;
-use crate::executor::valgrind::helpers::ignored_objects_path::get_objects_path_to_ignore;
-use crate::executor::wall_time::perf::debug_info::ProcessDebugInfo;
 use crate::executor::wall_time::perf::jit_dump::harvest_perf_jit_for_pids;
 use crate::executor::wall_time::perf::perf_executable::get_working_perf_executable;
 use crate::prelude::*;
@@ -20,19 +18,19 @@ use fifo::PerfFifo;
 use parse_perf_file::MemmapRecordsOutput;
 use perf_executable::get_compression_flags;
 use perf_executable::get_event_flags;
-use rayon::prelude::*;
 use runner_shared::artifacts::ArtifactExt;
 use runner_shared::artifacts::ExecutionTimestamps;
-use runner_shared::debug_info::ModuleDebugInfo;
 use runner_shared::fifo::Command as FifoCommand;
 use runner_shared::fifo::IntegrationMode;
 use runner_shared::metadata::PerfMetadata;
 use std::path::Path;
 use std::path::PathBuf;
-use std::{cell::OnceCell, collections::HashMap, process::ExitStatus};
+use std::{cell::OnceCell, process::ExitStatus};
 
 mod jit_dump;
+mod naming;
 mod parse_perf_file;
+mod save_artifacts;
 mod setup;
 
 pub mod debug_info;
@@ -42,7 +40,7 @@ pub mod perf_executable;
 pub mod perf_map;
 pub mod unwind_data;
 
-const PERF_METADATA_CURRENT_VERSION: u64 = 1;
+const PERF_METADATA_CURRENT_VERSION: u64 = 3;
 const PERF_PIPEDATA_FILE_NAME: &str = "perf.pipedata";
 
 pub struct PerfRunner {
@@ -283,11 +281,19 @@ impl BenchmarkData {
         let path_ref = path.as_ref();
 
         debug!("Reading perf data from file for mmap extraction");
+        let pid_filter = if self.fifo_data.is_exec_harness() {
+            parse_perf_file::PidFilter::All
+        } else {
+            parse_perf_file::PidFilter::TrackedPids(self.fifo_data.bench_pids.clone())
+        };
         let MemmapRecordsOutput {
-            symbols_by_pid,
-            unwind_data_by_pid,
+            mut symbols_by_path,
+            mut pid_symbol_mappings_by_pid,
+            mut unwind_data_by_path,
+            mut pid_unwind_data_mappings_by_pid,
+            tracked_pids,
         } = {
-            parse_perf_file::parse_for_memmap2(perf_file_path).map_err(|e| {
+            parse_perf_file::parse_for_memmap2(perf_file_path, pid_filter).map_err(|e| {
                 error!("Failed to parse perf file: {e}");
                 BenchmarkDataSaveError::FailedToParsePerfFile
             })?
@@ -296,38 +302,48 @@ impl BenchmarkData {
         // Harvest the perf maps generated by python. This will copy the perf
         // maps from /tmp to the profile folder. We have to write our own perf
         // maps to these files AFTERWARDS, otherwise it'll be overwritten!
-        let bench_pids = symbols_by_pid.keys().copied().collect();
-        debug!("Harvesting perf maps and jit dumps for pids: {bench_pids:?}");
-        harvest_perf_maps_for_pids(path_ref, &bench_pids)
+        debug!("Harvesting perf maps and jit dumps for pids: {tracked_pids:?}");
+        harvest_perf_maps_for_pids(path_ref, &tracked_pids)
             .await
             .map_err(|e| {
                 error!("Failed to harvest perf maps: {e}");
                 BenchmarkDataSaveError::FailedToHarvestPerfMaps
             })?;
-        harvest_perf_jit_for_pids(path_ref, &bench_pids)
-            .await
-            .map_err(|e| {
-                error!("Failed to harvest jit dumps: {e}");
-                BenchmarkDataSaveError::FailedToHarvestJitDumps
-            })?;
+        let (jit_symbols, jit_symbol_mappings, jit_unwind_data, jit_unwind_mappings) =
+            harvest_perf_jit_for_pids(path_ref, &tracked_pids)
+                .await
+                .map_err(|e| {
+                    error!("Failed to harvest jit dumps: {e}");
+                    BenchmarkDataSaveError::FailedToHarvestJitDumps
+                })?;
+        symbols_by_path.extend(jit_symbols);
+        for (pid, mappings) in jit_symbol_mappings {
+            pid_symbol_mappings_by_pid
+                .entry(pid)
+                .or_default()
+                .extend(mappings);
+        }
+        unwind_data_by_path.extend(jit_unwind_data);
+        for (pid, mappings) in jit_unwind_mappings {
+            pid_unwind_data_mappings_by_pid
+                .entry(pid)
+                .or_default()
+                .extend(mappings);
+        }
 
-        debug!("Saving symbols addresses");
-        symbols_by_pid.par_iter().for_each(|(_, proc_sym)| {
-            proc_sym.save_to(path_ref).unwrap();
-        });
-
-        // Collect debug info for each process by looking up file/line for symbols
-        debug!("Saving debug_info");
-        let debug_info_by_pid: HashMap<i32, Vec<ModuleDebugInfo>> = symbols_by_pid
-            .par_iter()
-            .map(|(pid, proc_sym)| (*pid, ProcessDebugInfo::new(proc_sym).modules()))
-            .collect();
-
-        unwind_data_by_pid.par_iter().for_each(|(pid, modules)| {
-            modules.iter().for_each(|module| {
-                module.save_to(path_ref, *pid).unwrap();
-            });
-        });
+        let symbol_pid_mappings_by_pid =
+            save_artifacts::save_symbols(path_ref, &symbols_by_path, &pid_symbol_mappings_by_pid);
+
+        let (debug_info, debug_info_pid_mappings_by_pid) =
+            save_artifacts::save_debug_info(&symbols_by_path, &pid_symbol_mappings_by_pid);
+
+        let unwind_data_pid_mappings_by_pid = save_artifacts::save_unwind_data(
+            path_ref,
+            &unwind_data_by_path,
+            pid_unwind_data_mappings_by_pid,
+        );
+
+        let ignored_modules = save_artifacts::collect_ignored_modules(&pid_symbol_mappings_by_pid);
 
         debug!("Saving metadata");
         #[allow(deprecated)]
@@ -339,52 +355,13 @@ impl BenchmarkData {
                 .clone()
                 .ok_or(BenchmarkDataSaveError::MissingIntegration)?,
             uri_by_ts: self.marker_result.uri_by_ts.clone(),
-            ignored_modules: {
-                let mut to_ignore = vec![];
-
-                // Check if any of the ignored modules has been loaded in the process
-                for ignore_path in get_objects_path_to_ignore() {
-                    for proc in symbols_by_pid.values() {
-                        if let Some(mapping) = proc.module_mapping(&ignore_path) {
-                            let (Some((base_addr, _)), Some((_, end_addr))) = (
-                                mapping.iter().min_by_key(|(base_addr, _)| base_addr),
-                                mapping.iter().max_by_key(|(_, end_addr)| end_addr),
-                            ) else {
-                                continue;
-                            };
-
-                            to_ignore.push((ignore_path.clone(), *base_addr, *end_addr));
-                        }
-                    }
-                }
-
-                // When python is statically linked, we'll not find it in the ignored modules. Add it manually:
-                let python_modules = symbols_by_pid.values().filter_map(|proc| {
-                    proc.loaded_modules().find(|path| {
-                        path.file_name()
-                            .map(|name| name.to_string_lossy().starts_with("python"))
-                            .unwrap_or(false)
-                    })
-                });
-                for path in python_modules {
-                    if let Some(mapping) = symbols_by_pid
-                        .values()
-                        .find_map(|proc| proc.module_mapping(path))
-                    {
-                        let (Some((base_addr, _)), Some((_, end_addr))) = (
-                            mapping.iter().min_by_key(|(base_addr, _)| base_addr),
-                            mapping.iter().max_by_key(|(_, end_addr)| end_addr),
-                        ) else {
-                            continue;
-                        };
-                        to_ignore.push((path.to_string_lossy().into(), *base_addr, *end_addr));
-                    }
-                }
-
-                to_ignore
-            },
+            ignored_modules,
             markers: self.marker_result.markers.clone(),
-            debug_info_by_pid,
+            debug_info,
+            debug_info_pid_mappings_by_pid,
+            unwind_data_pid_mappings_by_pid,
+            symbol_pid_mappings_by_pid,
+            debug_info_by_pid: Default::default(), // No longer used
         };
         metadata.save_to(&path).unwrap();
 
diff --git a/src/executor/wall_time/perf/naming.rs b/src/executor/wall_time/perf/naming.rs
new file mode 100644
index 00000000..ba0e0a33
--- /dev/null
+++ b/src/executor/wall_time/perf/naming.rs
@@ -0,0 +1,58 @@
+use std::hash::{DefaultHasher, Hash, Hasher};
+
+/// Convert a full path to a semantic key suitable for filenames.
+///
+/// The key is `{basename}-{hash:08x}` where `basename` is the last component
+/// of the path and `hash` is a truncated hash of the full path. This gives
+/// human-readable filenames while still being unique when multiple libraries
+/// share the same basename.
+pub fn path_to_semantic_key(path: &str) -> String {
+    let basename = path.rsplit('/').next().unwrap_or(path);
+    let mut hasher = DefaultHasher::new();
+    path.hash(&mut hasher);
+    let hash = hasher.finish() as u32;
+    format!("{basename}-{hash:08x}")
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_normal_path() {
+        let key = path_to_semantic_key("/usr/lib/libc.so.6");
+        assert!(key.starts_with("libc.so.6-"));
+        assert_eq!(key.len(), "libc.so.6-".len() + 8);
+    }
+
+    #[test]
+    fn test_jit_path() {
+        let key = path_to_semantic_key("/tmp/jit-12345.so");
+        assert!(key.starts_with("jit-12345.so-"));
+        assert_eq!(key.len(), "jit-12345.so-".len() + 8);
+    }
+
+    #[test]
+    fn test_same_basename_different_paths() {
+        let key1 = path_to_semantic_key("/usr/lib/libc.so.6");
+        let key2 = path_to_semantic_key("/opt/lib/libc.so.6");
+        // Both start with the same basename
+        assert!(key1.starts_with("libc.so.6-"));
+        assert!(key2.starts_with("libc.so.6-"));
+        // But have different hashes
+        assert_ne!(key1, key2);
+    }
+
+    #[test]
+    fn test_bare_filename() {
+        let key = path_to_semantic_key("libfoo.so");
+        assert!(key.starts_with("libfoo.so-"));
+    }
+
+    #[test]
+    fn test_deterministic() {
+        let key1 = path_to_semantic_key("/usr/lib/libc.so.6");
+        let key2 = path_to_semantic_key("/usr/lib/libc.so.6");
+        assert_eq!(key1, key2);
+    }
+}
diff --git a/src/executor/wall_time/perf/parse_perf_file.rs b/src/executor/wall_time/perf/parse_perf_file.rs
index 729c7b4a..d332ea3a 100644
--- a/src/executor/wall_time/perf/parse_perf_file.rs
+++ b/src/executor/wall_time/perf/parse_perf_file.rs
@@ -1,5 +1,5 @@
-use super::perf_map::ProcessSymbols;
-use super::unwind_data::UnwindDataExt;
+use super::perf_map::ModuleSymbols;
+use super::unwind_data::unwind_data_from_elf;
 use crate::prelude::*;
 use libc::pid_t;
 use linux_perf_data::PerfFileReader;
@@ -8,16 +8,51 @@ use linux_perf_data::linux_perf_event_reader::EventRecord;
 use linux_perf_data::linux_perf_event_reader::RecordType;
 use runner_shared::unwind_data::UnwindData;
 use std::collections::HashMap;
+use std::collections::HashSet;
 use std::path::Path;
+use std::path::PathBuf;
+
+/// Per-pid mounting info for a specific ELF, referencing it by path.
+#[derive(Debug)]
+pub struct UnwindDataPidMappingWithFullPath {
+    pub path: String,
+    pub timestamp: Option<u64>,
+    pub avma_range: std::ops::Range<u64>,
+    pub base_avma: u64,
+}
+
+/// Per-pid mounting info for symbols, referencing the deduplicated symbols by path.
+#[derive(Debug)]
+pub struct SymbolPidMappingWithFullPath {
+    pub path: PathBuf,
+    pub load_bias: u64,
+    pub mappings: Vec<(u64, u64)>,
+}
 
 pub struct MemmapRecordsOutput {
-    pub symbols_by_pid: HashMap<pid_t, ProcessSymbols>,
-    pub unwind_data_by_pid: HashMap<pid_t, Vec<UnwindData>>,
+    /// Deduplicated symbols keyed by ELF path
+    pub symbols_by_path: HashMap<PathBuf, ModuleSymbols>,
+    /// Per-pid mounting info referencing symbols by path
+    pub pid_symbol_mappings_by_pid: HashMap<pid_t, Vec<SymbolPidMappingWithFullPath>>,
+    /// Deduplicated unwind data keyed by ELF path
+    pub unwind_data_by_path: HashMap<String, UnwindData>,
+    /// Per-pid mounting info referencing unwind data by path
+    pub pid_unwind_data_mappings_by_pid: HashMap<pid_t, Vec<UnwindDataPidMappingWithFullPath>>,
+    pub tracked_pids: HashSet<pid_t>,
 }
 
-pub(super) fn parse_for_memmap2<P: AsRef<Path>>(perf_file_path: P) -> Result<MemmapRecordsOutput> {
-    let mut symbols_by_pid = HashMap::<pid_t, ProcessSymbols>::new();
-    let mut unwind_data_by_pid = HashMap::<pid_t, Vec<UnwindData>>::new();
+/// Parse the perf file at `perf_file_path` and look for MMAP2 records for the given `pids`.
+/// If the pids filter is empty, all MMAP2 records will be parsed.
+///
+/// Returns process symbols and unwind data for the executable mappings found in the perf file.
+pub fn parse_for_memmap2<P: AsRef<Path>>(
+    perf_file_path: P,
+    mut pid_filter: PidFilter,
+) -> Result<MemmapRecordsOutput> {
+    let mut symbols_by_path = HashMap::<PathBuf, ModuleSymbols>::new();
+    let mut symbol_mappings_by_pid = HashMap::<pid_t, Vec<SymbolPidMappingWithFullPath>>::new();
+    let mut unwind_data_by_path = HashMap::<String, UnwindData>::new();
+    let mut unwind_mappings_by_pid = HashMap::<pid_t, Vec<UnwindDataPidMappingWithFullPath>>::new();
 
     // 1MiB buffer
     let reader = std::io::BufReader::with_capacity(
@@ -37,85 +72,207 @@ pub(super) fn parse_for_memmap2<P: AsRef<Path>>(perf_file_path: P) -> Result<Mem
 
         // Check the type from the raw record to avoid parsing overhead since we do not care about
         // most records.
-        if record.record_type != RecordType::MMAP2 {
-            continue;
-        }
+        match record.record_type {
+            RecordType::FORK => {
+                // Process fork events to track children (and children of children) of filtered PIDs
+                let Ok(parsed_record) = record.parse() else {
+                    continue;
+                };
 
-        let Ok(parsed_record) = record.parse() else {
-            continue;
-        };
+                let EventRecord::Fork(fork_record) = parsed_record else {
+                    continue;
+                };
 
-        // Should never fail since we already checked the type in the raw record
-        let EventRecord::Mmap2(record) = parsed_record else {
-            continue;
-        };
+                if pid_filter.add_child_if_parent_tracked(fork_record.ppid, fork_record.pid) {
+                    trace!(
+                        "Fork: Tracking child PID {} from parent PID {}",
+                        fork_record.pid, fork_record.ppid
+                    );
+                }
+            }
+            RecordType::MMAP2 => {
+                let Ok(parsed_record) = record.parse() else {
+                    continue;
+                };
 
-        // Check PROT_EXEC early to avoid string allocation for non-executable mappings
-        if record.protection as i32 & libc::PROT_EXEC == 0 {
-            continue;
+                // Should never fail since we already checked the type in the raw record
+                let EventRecord::Mmap2(mmap2_record) = parsed_record else {
+                    continue;
+                };
+
+                // Filter on pid early to avoid string allocation for unwanted records
+                if !pid_filter.should_include(mmap2_record.pid) {
+                    continue;
+                }
+
+                process_mmap2_record(
+                    mmap2_record,
+                    &mut symbols_by_path,
+                    &mut symbol_mappings_by_pid,
+                    &mut unwind_data_by_path,
+                    &mut unwind_mappings_by_pid,
+                );
+            }
+            _ => continue,
         }
+    }
 
-        // Filter on raw bytes before allocating a String
-        let path_slice: &[u8] = &record.path.as_slice();
+    // Retrieve the set of PIDs we ended up tracking after processing all records
+    let tracked_pids: HashSet<pid_t> = match pid_filter {
+        PidFilter::All => symbol_mappings_by_pid.keys().copied().collect(),
+        PidFilter::TrackedPids(tracked) => tracked,
+    };
 
-        // Skip anonymous mappings
-        if path_slice == b"//anon" {
-            continue;
+    Ok(MemmapRecordsOutput {
+        symbols_by_path,
+        pid_symbol_mappings_by_pid: symbol_mappings_by_pid,
+        unwind_data_by_path,
+        pid_unwind_data_mappings_by_pid: unwind_mappings_by_pid,
+        tracked_pids,
+    })
+}
+
+/// PID filter for parsing perf records
+pub enum PidFilter {
+    /// Parse records for all PIDs
+    All,
+    /// Parse records only for specific PIDs and their children
+    TrackedPids(HashSet<pid_t>),
+}
+
+impl PidFilter {
+    /// Check if a PID should be included in parsing
+    fn should_include(&self, pid: pid_t) -> bool {
+        match self {
+            PidFilter::All => true,
+            PidFilter::TrackedPids(tracked_pids) => tracked_pids.contains(&pid),
         }
+    }
 
-        // Skip special mappings like [vdso], [heap], etc.
-        if path_slice.first() == Some(&b'[') && path_slice.last() == Some(&b']') {
-            continue;
+    /// Add a child PID to the filter if we're tracking its parent
+    /// Returns true if the child was added
+    fn add_child_if_parent_tracked(&mut self, parent_pid: pid_t, child_pid: pid_t) -> bool {
+        match self {
+            PidFilter::All => false, // Already tracking all PIDs
+            PidFilter::TrackedPids(tracked_pids) => {
+                if tracked_pids.contains(&parent_pid) {
+                    tracked_pids.insert(child_pid)
+                } else {
+                    false
+                }
+            }
         }
+    }
+}
 
-        let record_path_string = String::from_utf8_lossy(path_slice).into_owned();
-        let end_addr = record.address + record.length;
+/// Process a single MMAP2 record and add it to the symbols and unwind data maps
+fn process_mmap2_record(
+    record: linux_perf_data::linux_perf_event_reader::Mmap2Record,
+    symbols_by_path: &mut HashMap<PathBuf, ModuleSymbols>,
+    symbol_mappings_by_pid: &mut HashMap<pid_t, Vec<SymbolPidMappingWithFullPath>>,
+    unwind_data_by_path: &mut HashMap<String, UnwindData>,
+    unwind_mappings_by_pid: &mut HashMap<pid_t, Vec<UnwindDataPidMappingWithFullPath>>,
+) {
+    // Check PROT_EXEC early to avoid string allocation for non-executable mappings
+    if record.protection as i32 & libc::PROT_EXEC == 0 {
+        return;
+    }
 
-        trace!(
-            "Mapping: Pid {}: {:016x}-{:016x} {:08x} {:?} (Prot {:?})",
-            record.pid,
-            record.address,
-            end_addr,
-            record.page_offset,
-            record_path_string,
-            record.protection,
-        );
-        symbols_by_pid
-            .entry(record.pid)
-            .or_insert(ProcessSymbols::new(record.pid))
-            .add_mapping(
-                record.pid,
-                &record_path_string,
-                record.address,
-                end_addr,
-                record.page_offset,
-            );
+    // Filter on raw bytes before allocating a String
+    let path_slice: &[u8] = &record.path.as_slice();
 
-        match UnwindData::new(
-            record_path_string.as_bytes(),
-            record.page_offset,
+    // Skip anonymous mappings
+    if path_slice == b"//anon" {
+        return;
+    }
+
+    // Skip special mappings like [vdso], [heap], etc.
+    if path_slice.first() == Some(&b'[') && path_slice.last() == Some(&b']') {
+        return;
+    }
+
+    let record_path_string = String::from_utf8_lossy(path_slice).into_owned();
+    let record_path = PathBuf::from(&record_path_string);
+    let end_addr = record.address + record.length;
+
+    trace!(
+        "Mapping: Pid {}: {:016x}-{:016x} {:08x} {:?} (Prot {:?})",
+        record.pid,
+        record.address,
+        end_addr,
+        record.page_offset,
+        record_path_string,
+        record.protection,
+    );
+
+    // Extract symbols once per ELF path (pid-agnostic)
+    if !symbols_by_path.contains_key(&record_path) {
+        match ModuleSymbols::from_elf(&record_path) {
+            Ok(symbols) => {
+                symbols_by_path.insert(record_path.clone(), symbols);
+            }
+            Err(error) => {
+                debug!("Failed to load symbols for module {record_path_string}: {error}");
+            }
+        }
+    }
+
+    // Compute per-pid load_bias and store mapping info
+    if symbols_by_path.contains_key(&record_path) {
+        match ModuleSymbols::compute_load_bias(
+            &record_path,
             record.address,
             end_addr,
-            None,
+            record.page_offset,
         ) {
-            Ok(unwind_data) => {
-                unwind_data_by_pid
-                    .entry(record.pid)
-                    .or_default()
-                    .push(unwind_data);
-                trace!(
-                    "Added unwind data for {record_path_string} ({:x} - {:x})",
-                    record.address, end_addr
-                );
+            Ok(load_bias) => {
+                let pid_mappings = symbol_mappings_by_pid.entry(record.pid).or_default();
+                // Find existing mapping for this path in this pid, or create a new one
+                if let Some(existing) = pid_mappings.iter_mut().find(|m| m.path == record_path) {
+                    existing.mappings.push((record.address, end_addr));
+                } else {
+                    pid_mappings.push(SymbolPidMappingWithFullPath {
+                        path: record_path.clone(),
+                        load_bias,
+                        mappings: vec![(record.address, end_addr)],
+                    });
+                }
             }
             Err(error) => {
-                debug!("Failed to create unwind data for module {record_path_string}: {error}");
+                debug!("Failed to compute load bias for {record_path_string}: {error}");
             }
         }
     }
 
-    Ok(MemmapRecordsOutput {
-        symbols_by_pid,
-        unwind_data_by_pid,
-    })
+    // NOTE: We still read the ELF file for every mapping even if we've already seen this path.
+    // The V3 data will be deduplicated below, but there's a possible optimization to skip
+    // the ELF read entirely when the path is already known, by caching the necessary data to
+    // compute load_bias.
+    match unwind_data_from_elf(
+        record_path_string.as_bytes(),
+        record.page_offset,
+        record.address,
+        end_addr,
+        None,
+    ) {
+        Ok((v3, mapping)) => {
+            // Store pid-agnostic data once per path
+            unwind_data_by_path
+                .entry(record_path_string.clone())
+                .or_insert(v3);
+
+            // Always store per-pid mounting info
+            unwind_mappings_by_pid
+                .entry(record.pid)
+                .or_default()
+                .push(mapping);
+            trace!(
+                "Added unwind data for {record_path_string} ({:x} - {:x})",
+                record.address, end_addr
+            );
+        }
+        Err(error) => {
+            debug!("Failed to create unwind data for module {record_path_string}: {error}");
+        }
+    }
 }
diff --git a/src/executor/wall_time/perf/perf_map.rs b/src/executor/wall_time/perf/perf_map.rs
index f5bea6f0..b43fad5f 100644
--- a/src/executor/wall_time/perf/perf_map.rs
+++ b/src/executor/wall_time/perf/perf_map.rs
@@ -1,12 +1,10 @@
 use crate::executor::wall_time::perf::elf_helper;
-use crate::prelude::*;
-use libc::pid_t;
 use object::{Object, ObjectSymbol, ObjectSymbolTable};
+use runner_shared::perf_map::SYMBOLS_MAP_SUFFIX;
 use std::{
-    collections::HashMap,
     fmt::Debug,
     io::{BufWriter, Write},
-    path::{Path, PathBuf},
+    path::Path,
 };
 
 #[derive(Hash, PartialEq, Eq, Clone)]
@@ -44,16 +42,8 @@ impl ModuleSymbols {
         &self.symbols
     }
 
-    pub fn load_bias(&self) -> u64 {
-        self.load_bias
-    }
-
-    pub fn new<P: AsRef<Path>>(
-        path: P,
-        runtime_start_addr: u64,
-        runtime_end_addr: u64,
-        runtime_offset: u64,
-    ) -> anyhow::Result<Self> {
+    /// Extract symbols from an ELF file (pid-agnostic, load_bias = 0).
+    pub fn from_elf<P: AsRef<Path>>(path: P) -> anyhow::Result<Self> {
         let content = std::fs::read(path.as_ref())?;
         let object = object::File::parse(&*content)?;
 
@@ -126,17 +116,36 @@ impl ModuleSymbols {
             return Err(anyhow::anyhow!("No symbols found"));
         }
 
-        let load_bias = elf_helper::compute_load_bias(
+        Ok(Self {
+            load_bias: 0,
+            symbols,
+        })
+    }
+
+    /// Compute the load_bias for this module given runtime addresses.
+    /// This reads the ELF file again to find the matching PT_LOAD segment.
+    pub fn compute_load_bias<P: AsRef<Path>>(
+        path: P,
+        runtime_start_addr: u64,
+        runtime_end_addr: u64,
+        runtime_offset: u64,
+    ) -> anyhow::Result<u64> {
+        let content = std::fs::read(path.as_ref())?;
+        let object = object::File::parse(&*content)?;
+        elf_helper::compute_load_bias(
             runtime_start_addr,
             runtime_end_addr,
             runtime_offset,
             &object,
-        )?;
-
-        Ok(Self { load_bias, symbols })
+        )
     }
 
     pub fn append_to_file<P: AsRef<Path>>(&self, path: P) -> anyhow::Result<()> {
+        self.write_to_file(path, self.load_bias)
+    }
+
+    /// Write symbols to a file applying the given load_bias.
+    pub fn write_to_file<P: AsRef<Path>>(&self, path: P, load_bias: u64) -> anyhow::Result<()> {
         let file = std::fs::OpenOptions::new()
             .create(true)
             .append(true)
@@ -148,7 +157,7 @@ impl ModuleSymbols {
             writeln!(
                 writer,
                 "{:x} {:x} {}",
-                symbol.addr.wrapping_add(self.load_bias),
+                symbol.addr.wrapping_add(load_bias),
                 symbol.size,
                 symbol.name
             )?;
@@ -156,81 +165,32 @@ impl ModuleSymbols {
 
         Ok(())
     }
-}
-
-/// Represents all the modules inside a process and their symbols.
-pub struct ProcessSymbols {
-    pid: pid_t,
-    module_mappings: HashMap<PathBuf, Vec<(u64, u64)>>,
-    modules: HashMap<PathBuf, ModuleSymbols>,
-}
-
-impl ProcessSymbols {
-    pub fn new(pid: pid_t) -> Self {
-        Self {
-            pid,
-            module_mappings: HashMap::new(),
-            modules: HashMap::new(),
-        }
-    }
-
-    pub fn add_mapping<P: AsRef<Path>>(
-        &mut self,
-        pid: pid_t,
-        module_path: P,
-        start_addr: u64,
-        end_addr: u64,
-        file_offset: u64,
-    ) {
-        if self.pid != pid {
-            warn!("pid mismatch: {} != {}", self.pid, pid);
-            return;
-        }
-
-        let path = module_path.as_ref().to_path_buf();
-        match ModuleSymbols::new(module_path, start_addr, end_addr, file_offset) {
-            Ok(symbol) => {
-                self.modules.entry(path.clone()).or_insert(symbol);
-            }
-            Err(error) => {
-                debug!("Failed to load symbols for module {path:?}: {error}");
-            }
-        }
-
-        self.module_mappings
-            .entry(path.clone())
-            .or_default()
-            .push((start_addr, end_addr));
-    }
 
-    pub fn loaded_modules(&self) -> impl Iterator<Item = &PathBuf> {
-        self.modules.keys()
-    }
-
-    pub fn modules_with_symbols(&self) -> impl Iterator<Item = (&PathBuf, &ModuleSymbols)> {
-        self.modules.iter()
+    /// Save symbols (at raw ELF addresses, no bias) to a keyed file.
+    pub fn save_to<P: AsRef<Path>>(&self, folder: P, key: &str) -> anyhow::Result<()> {
+        let path = folder.as_ref().join(format!("{key}.{SYMBOLS_MAP_SUFFIX}"));
+        self.write_to_file(path, 0)
     }
+}
 
-    pub fn module_mapping<P: AsRef<std::path::Path>>(
-        &self,
-        module_path: P,
-    ) -> Option<&[(u64, u64)]> {
-        self.module_mappings
-            .get(module_path.as_ref())
-            .map(|bounds| bounds.as_slice())
+#[cfg(test)]
+impl ModuleSymbols {
+    pub fn load_bias(&self) -> u64 {
+        self.load_bias
     }
 
-    pub fn save_to<P: AsRef<std::path::Path>>(&self, folder: P) -> anyhow::Result<()> {
-        if self.modules.is_empty() {
-            return Ok(());
-        }
-
-        let symbols_path = folder.as_ref().join(format!("perf-{}.map", self.pid));
-        for module in self.modules.values() {
-            module.append_to_file(&symbols_path)?;
-        }
+    /// Extract symbols from an ELF file and compute the load_bias from runtime addresses.
+    pub fn new<P: AsRef<Path>>(
+        path: P,
+        runtime_start_addr: u64,
+        runtime_end_addr: u64,
+        runtime_offset: u64,
+    ) -> anyhow::Result<Self> {
+        let mut module = Self::from_elf(path.as_ref())?;
+        module.load_bias =
+            Self::compute_load_bias(path, runtime_start_addr, runtime_end_addr, runtime_offset)?;
 
-        Ok(())
+        Ok(module)
     }
 }
 
diff --git a/src/executor/wall_time/perf/save_artifacts.rs b/src/executor/wall_time/perf/save_artifacts.rs
new file mode 100644
index 00000000..9853afaf
--- /dev/null
+++ b/src/executor/wall_time/perf/save_artifacts.rs
@@ -0,0 +1,190 @@
+use crate::executor::valgrind::helpers::ignored_objects_path::get_objects_path_to_ignore;
+use crate::executor::wall_time::perf::debug_info::debug_info_by_path;
+use crate::executor::wall_time::perf::naming;
+use crate::executor::wall_time::perf::perf_map::ModuleSymbols;
+use crate::prelude::*;
+use rayon::prelude::*;
+use runner_shared::debug_info::{DebugInfoPidMapping, ModuleDebugInfo};
+use runner_shared::perf_map::SymbolPidMapping;
+use runner_shared::unwind_data::{UnwindData, UnwindDataPidMapping};
+use std::collections::HashMap;
+use std::path::{Path, PathBuf};
+
+use super::parse_perf_file::{SymbolPidMappingWithFullPath, UnwindDataPidMappingWithFullPath};
+
+/// Save deduplicated symbol files to disk and build per-pid mappings using semantic keys.
+pub fn save_symbols(
+    profile_folder: &Path,
+    symbols_by_path: &HashMap<PathBuf, ModuleSymbols>,
+    pid_symbol_mappings_by_pid: &HashMap<i32, Vec<SymbolPidMappingWithFullPath>>,
+) -> HashMap<i32, Vec<SymbolPidMapping>> {
+    debug!("Saving symbols ({} unique entries)", symbols_by_path.len());
+
+    let path_to_key = build_path_to_key_map(symbols_by_path.keys().map(|p| p.to_string_lossy()));
+
+    symbols_by_path.par_iter().for_each(|(path, module)| {
+        let key = &path_to_key[&*path.to_string_lossy()];
+        module.save_to(profile_folder, key).unwrap();
+    });
+
+    convert_pid_mappings(pid_symbol_mappings_by_pid, |m| {
+        let key = path_to_key.get(&*m.path.to_string_lossy())?.clone();
+        Some((
+            key.clone(),
+            SymbolPidMapping {
+                perf_map_key: key,
+                load_bias: m.load_bias,
+            },
+        ))
+    })
+}
+
+/// Compute debug info from symbols, then build per-pid debug info mappings using semantic keys.
+pub fn save_debug_info(
+    symbols_by_path: &HashMap<PathBuf, ModuleSymbols>,
+    pid_symbol_mappings_by_pid: &HashMap<i32, Vec<SymbolPidMappingWithFullPath>>,
+) -> (
+    HashMap<String, ModuleDebugInfo>,
+    HashMap<i32, Vec<DebugInfoPidMapping>>,
+) {
+    debug!("Saving debug_info");
+
+    let debug_info_by_elf_path = debug_info_by_path(symbols_by_path);
+
+    let path_to_key =
+        build_path_to_key_map(debug_info_by_elf_path.keys().map(|p| p.to_string_lossy()));
+
+    let debug_info: HashMap<String, ModuleDebugInfo> = debug_info_by_elf_path
+        .into_iter()
+        .map(|(path, info)| (path_to_key[&*path.to_string_lossy()].clone(), info))
+        .collect();
+
+    let pid_mappings = convert_pid_mappings(pid_symbol_mappings_by_pid, |m| {
+        let key = path_to_key.get(&*m.path.to_string_lossy())?.clone();
+        Some((
+            key.clone(),
+            DebugInfoPidMapping {
+                debug_info_key: key,
+                load_bias: m.load_bias,
+            },
+        ))
+    });
+
+    (debug_info, pid_mappings)
+}
+
+/// Save deduplicated unwind data files to disk and build per-pid mappings using semantic keys.
+pub fn save_unwind_data(
+    profile_folder: &Path,
+    unwind_data_by_path: &HashMap<String, UnwindData>,
+    pid_unwind_data_mappings_by_pid: HashMap<i32, Vec<UnwindDataPidMappingWithFullPath>>,
+) -> HashMap<i32, Vec<UnwindDataPidMapping>> {
+    debug!(
+        "Saving unwind data ({} unique entries)",
+        unwind_data_by_path.len()
+    );
+
+    let path_to_key = build_path_to_key_map(unwind_data_by_path.keys().map(String::as_str));
+
+    unwind_data_by_path.par_iter().for_each(|(path, entry)| {
+        let key = &path_to_key[path.as_str()];
+        entry.save_to(profile_folder, key).unwrap();
+    });
+
+    pid_unwind_data_mappings_by_pid
+        .into_iter()
+        .map(|(pid, mappings)| {
+            let converted = mappings
+                .into_iter()
+                .map(|m| UnwindDataPidMapping {
+                    unwind_data_key: path_to_key[m.path.as_str()].clone(),
+                    timestamp: m.timestamp,
+                    avma_range: m.avma_range,
+                    base_avma: m.base_avma,
+                })
+                .sorted_by_key(|m| m.unwind_data_key.clone())
+                .collect();
+            (pid, converted)
+        })
+        .collect()
+}
+
+/// Collect ignored modules by finding known-ignored and python modules in the pid mappings.
+pub fn collect_ignored_modules(
+    pid_symbol_mappings_by_pid: &HashMap<i32, Vec<SymbolPidMappingWithFullPath>>,
+) -> Vec<(String, u64, u64)> {
+    let mut to_ignore = vec![];
+
+    // Check if any of the ignored modules has been loaded in the process
+    for ignore_path in get_objects_path_to_ignore() {
+        let ignore_pathbuf = PathBuf::from(&ignore_path);
+        for pid_mappings in pid_symbol_mappings_by_pid.values() {
+            if let Some(m) = pid_mappings.iter().find(|m| m.path == ignore_pathbuf) {
+                if let Some((base_addr, end_addr)) = address_range(&m.mappings) {
+                    to_ignore.push((ignore_path.clone(), base_addr, end_addr));
+                }
+            }
+        }
+    }
+
+    // When python is statically linked, we'll not find it in the ignored modules. Add it manually:
+    for pid_mappings in pid_symbol_mappings_by_pid.values() {
+        for m in pid_mappings {
+            let is_python = m
+                .path
+                .file_name()
+                .map(|name| name.to_string_lossy().starts_with("python"))
+                .unwrap_or(false);
+            if !is_python {
+                continue;
+            }
+            if let Some((base_addr, end_addr)) = address_range(&m.mappings) {
+                to_ignore.push((m.path.to_string_lossy().into(), base_addr, end_addr));
+            }
+        }
+    }
+
+    to_ignore
+}
+
+/// Extract the overall address range (min base, max end) from a list of mapping ranges.
+fn address_range(mappings: &[(u64, u64)]) -> Option<(u64, u64)> {
+    let &(base_addr, _) = mappings.iter().min_by_key(|(base, _)| base)?;
+    let &(_, end_addr) = mappings.iter().max_by_key(|(_, end)| end)?;
+    Some((base_addr, end_addr))
+}
+
+/// Convert raw per-pid mappings into final per-pid mappings using a converter closure.
+/// The closure returns `Option<(sort_key, T)>` to allow filtering and sorting.
+fn convert_pid_mappings<T, K: Ord>(
+    raw_mappings: &HashMap<i32, Vec<SymbolPidMappingWithFullPath>>,
+    converter: impl Fn(&SymbolPidMappingWithFullPath) -> Option<(K, T)>,
+) -> HashMap<i32, Vec<T>> {
+    raw_mappings
+        .iter()
+        .map(|(pid, mappings)| {
+            let converted = mappings
+                .iter()
+                .filter_map(&converter)
+                .sorted_by(|(k1, _), (k2, _)| k1.cmp(k2))
+                .map(|(_, v)| v)
+                .collect();
+            (*pid, converted)
+        })
+        .collect()
+}
+
+/// Build a `path_string → semantic_key` map from an iterator of path-like items.
+fn build_path_to_key_map<I, S>(paths: I) -> HashMap<String, String>
+where
+    I: Iterator<Item = S>,
+    S: AsRef<str>,
+{
+    paths
+        .map(|p| {
+            let s = p.as_ref().to_owned();
+            let key = naming::path_to_semantic_key(&s);
+            (s, key)
+        })
+        .collect()
+}
diff --git a/src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__unwind_data__tests__cpp_unwind_data.snap b/src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__unwind_data__tests__cpp_unwind_data.snap
index 8f06aff1..b58e2a19 100644
--- a/src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__unwind_data__tests__cpp_unwind_data.snap
+++ b/src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__unwind_data__tests__cpp_unwind_data.snap
@@ -1,17 +1,22 @@
 ---
 source: src/executor/wall_time/perf/unwind_data.rs
-expression: "UnwindData::new(MODULE_PATH.as_bytes(), file_offset, start_addr, end_addr,\nNone,)"
+expression: "unwind_data_from_elf(MODULE_PATH.as_bytes(), file_offset, start_addr,\nend_addr, None,)"
 ---
 Ok(
-    UnwindData {
-        path: "testdata/perf_map/cpp_my_benchmark.bin",
-        timestamp: None,
-        avma_range: 400000..459000,
-        base_avma: 400000,
-        base_svma: 400000,
-        eh_frame_hdr_svma: 4577bc..458b30,
-        eh_frame_hdr_hash: 4b4eac90f7f5e60d,
-        eh_frame_hash: 233bdd4ae9fe4ba4,
-        eh_frame_svma: 451098..4577bc,
-    },
+    (
+        UnwindData {
+            path: "testdata/perf_map/cpp_my_benchmark.bin",
+            base_svma: 400000,
+            eh_frame_hdr_svma: 4577bc..458b30,
+            eh_frame_hdr_hash: 4b4eac90f7f5e60d,
+            eh_frame_hash: 233bdd4ae9fe4ba4,
+            eh_frame_svma: 451098..4577bc,
+        },
+        UnwindDataPidMappingWithFullPath {
+            path: "testdata/perf_map/cpp_my_benchmark.bin",
+            timestamp: None,
+            avma_range: 4194304..4558848,
+            base_avma: 4194304,
+        },
+    ),
 )
diff --git a/src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__unwind_data__tests__golang_unwind_data.snap b/src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__unwind_data__tests__golang_unwind_data.snap
index e3774e60..b359ddb7 100644
--- a/src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__unwind_data__tests__golang_unwind_data.snap
+++ b/src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__unwind_data__tests__golang_unwind_data.snap
@@ -1,17 +1,22 @@
 ---
 source: src/executor/wall_time/perf/unwind_data.rs
-expression: "UnwindData::new(MODULE_PATH.as_bytes(), file_offset, start_addr, end_addr,\nNone)"
+expression: "unwind_data_from_elf(MODULE_PATH.as_bytes(), file_offset, start_addr,\nend_addr, None)"
 ---
 Ok(
-    UnwindData {
-        path: "testdata/perf_map/go_fib.bin",
-        timestamp: None,
-        avma_range: 402000..50f000,
-        base_avma: 400000,
-        base_svma: 400000,
-        eh_frame_hdr_svma: 6498b0..649b94,
-        eh_frame_hdr_hash: f1f69beb959a08d7,
-        eh_frame_hash: a8727039dd21b51c,
-        eh_frame_svma: 649b98..64aa70,
-    },
+    (
+        UnwindData {
+            path: "testdata/perf_map/go_fib.bin",
+            base_svma: 400000,
+            eh_frame_hdr_svma: 6498b0..649b94,
+            eh_frame_hdr_hash: f1f69beb959a08d7,
+            eh_frame_hash: a8727039dd21b51c,
+            eh_frame_svma: 649b98..64aa70,
+        },
+        UnwindDataPidMappingWithFullPath {
+            path: "testdata/perf_map/go_fib.bin",
+            timestamp: None,
+            avma_range: 4202496..5304320,
+            base_avma: 4194304,
+        },
+    ),
 )
diff --git a/src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__unwind_data__tests__ruff_unwind_data.snap b/src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__unwind_data__tests__ruff_unwind_data.snap
index 8304f0ea..17a0435f 100644
--- a/src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__unwind_data__tests__ruff_unwind_data.snap
+++ b/src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__unwind_data__tests__ruff_unwind_data.snap
@@ -1,17 +1,22 @@
 ---
 source: src/executor/wall_time/perf/unwind_data.rs
-expression: "UnwindData::new(MODULE_PATH.as_bytes(), file_offset, start_addr, end_addr,\nNone)"
+expression: "unwind_data_from_elf(MODULE_PATH.as_bytes(), file_offset, start_addr,\nend_addr, None)"
 ---
 Ok(
-    UnwindData {
-        path: "testdata/perf_map/ty_walltime",
-        timestamp: None,
-        avma_range: 555555e6d000..555556813000,
-        base_avma: 555555554000,
-        base_svma: 0,
-        eh_frame_hdr_svma: 7ec298..80f67c,
-        eh_frame_hdr_hash: 6d6dd1e2c782318a,
-        eh_frame_hash: ee27244db791265a,
-        eh_frame_svma: 80f680..918a9c,
-    },
+    (
+        UnwindData {
+            path: "testdata/perf_map/ty_walltime",
+            base_svma: 0,
+            eh_frame_hdr_svma: 7ec298..80f67c,
+            eh_frame_hdr_hash: 6d6dd1e2c782318a,
+            eh_frame_hash: ee27244db791265a,
+            eh_frame_svma: 80f680..918a9c,
+        },
+        UnwindDataPidMappingWithFullPath {
+            path: "testdata/perf_map/ty_walltime",
+            timestamp: None,
+            avma_range: 93825001771008..93825011888128,
+            base_avma: 93824992231424,
+        },
+    ),
 )
diff --git a/src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__unwind_data__tests__rust_divan_unwind_data.snap b/src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__unwind_data__tests__rust_divan_unwind_data.snap
index ea13d0d6..c29b0048 100644
--- a/src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__unwind_data__tests__rust_divan_unwind_data.snap
+++ b/src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__unwind_data__tests__rust_divan_unwind_data.snap
@@ -1,17 +1,22 @@
 ---
 source: src/executor/wall_time/perf/unwind_data.rs
-expression: "UnwindData::new(MODULE_PATH.as_bytes(), file_offset, start_addr, end_addr,\nNone)"
+expression: "unwind_data_from_elf(MODULE_PATH.as_bytes(), file_offset, start_addr,\nend_addr, None)"
 ---
 Ok(
-    UnwindData {
-        path: "testdata/perf_map/divan_sleep_benches.bin",
-        timestamp: None,
-        avma_range: 5555555a2000..555555692000,
-        base_avma: 555555554000,
-        base_svma: 0,
-        eh_frame_hdr_svma: 2ac74..2ea60,
-        eh_frame_hdr_hash: f579da4368e627c1,
-        eh_frame_hash: 791501d5a9c438d,
-        eh_frame_svma: 11540..2ac74,
-    },
+    (
+        UnwindData {
+            path: "testdata/perf_map/divan_sleep_benches.bin",
+            base_svma: 0,
+            eh_frame_hdr_svma: 2ac74..2ea60,
+            eh_frame_hdr_hash: f579da4368e627c1,
+            eh_frame_hash: 791501d5a9c438d,
+            eh_frame_svma: 11540..2ac74,
+        },
+        UnwindDataPidMappingWithFullPath {
+            path: "testdata/perf_map/divan_sleep_benches.bin",
+            timestamp: None,
+            avma_range: 93824992550912..93824993533952,
+            base_avma: 93824992231424,
+        },
+    ),
 )
diff --git a/src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__unwind_data__tests__the_algorithms_unwind_data.snap b/src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__unwind_data__tests__the_algorithms_unwind_data.snap
index 6ddc5f8b..bef66aa1 100644
--- a/src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__unwind_data__tests__the_algorithms_unwind_data.snap
+++ b/src/executor/wall_time/perf/snapshots/codspeed_runner__executor__wall_time__perf__unwind_data__tests__the_algorithms_unwind_data.snap
@@ -1,17 +1,22 @@
 ---
 source: src/executor/wall_time/perf/unwind_data.rs
-expression: "UnwindData::new(MODULE_PATH.as_bytes(), file_offset, start_addr, end_addr,\nNone)"
+expression: "unwind_data_from_elf(MODULE_PATH.as_bytes(), file_offset, start_addr,\nend_addr, None)"
 ---
 Ok(
-    UnwindData {
-        path: "testdata/perf_map/the_algorithms.bin",
-        timestamp: None,
-        avma_range: 5555555a7000..5555556b0000,
-        base_avma: 555555554000,
-        base_svma: 0,
-        eh_frame_hdr_svma: 2f0ec..33590,
-        eh_frame_hdr_hash: 277fbdb59e6decaa,
-        eh_frame_hash: 21d8b0c8da0d1029,
-        eh_frame_svma: 128a8..2f0ec,
-    },
+    (
+        UnwindData {
+            path: "testdata/perf_map/the_algorithms.bin",
+            base_svma: 0,
+            eh_frame_hdr_svma: 2f0ec..33590,
+            eh_frame_hdr_hash: 277fbdb59e6decaa,
+            eh_frame_hash: 21d8b0c8da0d1029,
+            eh_frame_svma: 128a8..2f0ec,
+        },
+        UnwindDataPidMappingWithFullPath {
+            path: "testdata/perf_map/the_algorithms.bin",
+            timestamp: None,
+            avma_range: 93824992571392..93824993656832,
+            base_avma: 93824992231424,
+        },
+    ),
 )
diff --git a/src/executor/wall_time/perf/unwind_data.rs b/src/executor/wall_time/perf/unwind_data.rs
index 045962ef..921e656f 100644
--- a/src/executor/wall_time/perf/unwind_data.rs
+++ b/src/executor/wall_time/perf/unwind_data.rs
@@ -8,95 +8,89 @@ use object::ObjectSection;
 use runner_shared::unwind_data::UnwindData;
 use std::ops::Range;
 
-pub trait UnwindDataExt {
-    fn new(
-        path_slice: &[u8],
-        runtime_file_offset: u64,
-        runtime_start_addr: u64,
-        runtime_end_addr: u64,
-        build_id: Option<&[u8]>,
-    ) -> anyhow::Result<Self>
-    where
-        Self: Sized;
-}
-
-impl UnwindDataExt for UnwindData {
-    // Based on this: https://github.com/mstange/linux-perf-stuff/blob/22ca6531b90c10dd2a4519351c843b8d7958a451/src/main.rs#L747-L893
-    fn new(
-        path_slice: &[u8],
-        runtime_file_offset: u64,
-        runtime_start_addr: u64,
-        runtime_end_addr: u64,
-        build_id: Option<&[u8]>,
-    ) -> anyhow::Result<Self> {
-        let avma_range = runtime_start_addr..runtime_end_addr;
-
-        let path = String::from_utf8_lossy(path_slice).to_string();
-        let Some(file) = std::fs::File::open(&path).ok() else {
-            bail!("Could not open file {path}");
-        };
-
-        let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? };
-        let file = object::File::parse(&mmap[..])?;
-
-        // Verify the build id (if we have one)
-        match (build_id, file.build_id()) {
-            (Some(build_id), Ok(Some(file_build_id))) => {
-                if build_id != file_build_id {
-                    let file_build_id = CodeId::from_binary(file_build_id);
-                    let expected_build_id = CodeId::from_binary(build_id);
-                    bail!(
-                        "File {path:?} has non-matching build ID {file_build_id} (expected {expected_build_id})"
-                    );
-                }
-            }
-            (Some(_), Err(_)) | (Some(_), Ok(None)) => {
-                bail!("File {path:?} does not contain a build ID, but we expected it to have one");
+use super::parse_perf_file::UnwindDataPidMappingWithFullPath;
+
+// Based on: https://github.com/mstange/linux-perf-stuff/blob/22ca6531b90c10dd2a4519351c843b8d7958a451/src/main.rs#L747-L893
+pub fn unwind_data_from_elf(
+    path_slice: &[u8],
+    runtime_file_offset: u64,
+    runtime_start_addr: u64,
+    runtime_end_addr: u64,
+    build_id: Option<&[u8]>,
+) -> anyhow::Result<(UnwindData, UnwindDataPidMappingWithFullPath)> {
+    let avma_range = runtime_start_addr..runtime_end_addr;
+
+    let path = String::from_utf8_lossy(path_slice).to_string();
+    let Some(file) = std::fs::File::open(&path).ok() else {
+        bail!("Could not open file {path}");
+    };
+
+    let mmap = unsafe { memmap2::MmapOptions::new().map(&file)? };
+    let file = object::File::parse(&mmap[..])?;
+
+    // Verify the build id (if we have one)
+    match (build_id, file.build_id()) {
+        (Some(build_id), Ok(Some(file_build_id))) => {
+            if build_id != file_build_id {
+                let file_build_id = CodeId::from_binary(file_build_id);
+                let expected_build_id = CodeId::from_binary(build_id);
+                bail!(
+                    "File {path:?} has non-matching build ID {file_build_id} (expected {expected_build_id})"
+                );
             }
-            _ => {
-                // No build id to check
-            }
-        };
-
-        let base_avma = elf_helper::compute_base_avma(
-            runtime_start_addr,
-            runtime_end_addr,
-            runtime_file_offset,
-            &file,
-        )?;
-        let base_svma = elf_helper::relative_address_base(&file);
-        let eh_frame = file.section_by_name(".eh_frame");
-        let eh_frame_hdr = file.section_by_name(".eh_frame_hdr");
-
-        fn section_data<'a>(section: &impl ObjectSection<'a>) -> Option<Vec<u8>> {
-            section.data().ok().map(|data| data.to_owned())
         }
-
-        let eh_frame_data = eh_frame.as_ref().and_then(section_data);
-        let eh_frame_hdr_data = eh_frame_hdr.as_ref().and_then(section_data);
-
-        fn svma_range<'a>(section: &impl ObjectSection<'a>) -> Range<u64> {
-            section.address()..section.address() + section.size()
+        (Some(_), Err(_)) | (Some(_), Ok(None)) => {
+            bail!("File {path:?} does not contain a build ID, but we expected it to have one");
         }
+        _ => {
+            // No build id to check
+        }
+    };
+
+    let base_avma = elf_helper::compute_base_avma(
+        runtime_start_addr,
+        runtime_end_addr,
+        runtime_file_offset,
+        &file,
+    )?;
+    let base_svma = elf_helper::relative_address_base(&file);
+    let eh_frame = file.section_by_name(".eh_frame");
+    let eh_frame_hdr = file.section_by_name(".eh_frame_hdr");
+
+    fn section_data<'a>(section: &impl ObjectSection<'a>) -> Option<Vec<u8>> {
+        section.data().ok().map(|data| data.to_owned())
+    }
 
-        Ok(UnwindData {
-            path,
-            timestamp: None,
-            avma_range,
-            base_avma,
-            base_svma,
-            eh_frame_hdr: eh_frame_hdr_data.context("Failed to find eh_frame hdr data")?,
-            eh_frame_hdr_svma: eh_frame_hdr
-                .as_ref()
-                .map(svma_range)
-                .context("Failed to find eh_frame hdr section")?,
-            eh_frame: eh_frame_data.context("Failed to find eh_frame data")?,
-            eh_frame_svma: eh_frame
-                .as_ref()
-                .map(svma_range)
-                .context("Failed to find eh_frame section")?,
-        })
+    let eh_frame_data = eh_frame.as_ref().and_then(section_data);
+    let eh_frame_hdr_data = eh_frame_hdr.as_ref().and_then(section_data);
+
+    fn svma_range<'a>(section: &impl ObjectSection<'a>) -> Range<u64> {
+        section.address()..section.address() + section.size()
     }
+
+    let v3 = UnwindData {
+        path: path.clone(),
+        base_svma,
+        eh_frame_hdr: eh_frame_hdr_data.context("Failed to find eh_frame hdr data")?,
+        eh_frame_hdr_svma: eh_frame_hdr
+            .as_ref()
+            .map(svma_range)
+            .context("Failed to find eh_frame hdr section")?,
+        eh_frame: eh_frame_data.context("Failed to find eh_frame data")?,
+        eh_frame_svma: eh_frame
+            .as_ref()
+            .map(svma_range)
+            .context("Failed to find eh_frame section")?,
+    };
+
+    let mapping = UnwindDataPidMappingWithFullPath {
+        path,
+        timestamp: None,
+        avma_range,
+        base_avma,
+    };
+
+    Ok((v3, mapping))
 }
 
 #[cfg(test)]
@@ -146,7 +140,7 @@ mod tests {
         let (start_addr, end_addr, file_offset) =
             (0x0000000000402000_u64, 0x000000000050f000_u64, 0x2000);
         assert_elf_load_bias!(start_addr, end_addr, file_offset, MODULE_PATH, 0x0);
-        insta::assert_debug_snapshot!(UnwindData::new(
+        insta::assert_debug_snapshot!(unwind_data_from_elf(
             MODULE_PATH.as_bytes(),
             file_offset,
             start_addr,
@@ -168,7 +162,7 @@ mod tests {
             (0x0000000000400000_u64, 0x0000000000459000_u64, 0x0);
         assert_elf_load_bias!(start_addr, end_addr, file_offset, MODULE_PATH, 0x0);
 
-        insta::assert_debug_snapshot!(UnwindData::new(
+        insta::assert_debug_snapshot!(unwind_data_from_elf(
             MODULE_PATH.as_bytes(),
             file_offset,
             start_addr,
@@ -190,7 +184,7 @@ mod tests {
             MODULE_PATH,
             0x555555554000
         );
-        insta::assert_debug_snapshot!(UnwindData::new(
+        insta::assert_debug_snapshot!(unwind_data_from_elf(
             MODULE_PATH.as_bytes(),
             file_offset,
             start_addr,
@@ -218,7 +212,7 @@ mod tests {
             MODULE_PATH,
             0x555555554000
         );
-        insta::assert_debug_snapshot!(UnwindData::new(
+        insta::assert_debug_snapshot!(unwind_data_from_elf(
             MODULE_PATH.as_bytes(),
             file_offset,
             start_addr,
@@ -247,7 +241,7 @@ mod tests {
             0x555555554000
         );
 
-        insta::assert_debug_snapshot!(UnwindData::new(
+        insta::assert_debug_snapshot!(unwind_data_from_elf(
             MODULE_PATH.as_bytes(),
             file_offset,
             start_addr,