1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

use anyhow::{bail, Context};
use camino::Utf8Path;
use fs_err as fs;
use goblin::{
    archive::Archive,
    elf::Elf,
    mach::{segment::Section, symbols, Mach, MachO, SingleArch},
    pe::PE,
    Object,
};
use std::collections::HashSet;
use uniffi_meta::Metadata;

/// Extract metadata written by the `uniffi::export` macro from a library file
///
/// In addition to generating the scaffolding, that macro and also encodes the
/// `uniffi_meta::Metadata` for the components which can be used to generate the bindings side of
/// the interface.
pub fn extract_from_library(path: &Utf8Path) -> anyhow::Result<Vec<Metadata>> {
    extract_from_bytes(&fs::read(path)?)
}

fn extract_from_bytes(file_data: &[u8]) -> anyhow::Result<Vec<Metadata>> {
    match Object::parse(file_data)? {
        Object::Elf(elf) => extract_from_elf(elf, file_data),
        Object::PE(pe) => extract_from_pe(pe, file_data),
        Object::Mach(mach) => extract_from_mach(mach, file_data),
        Object::Archive(archive) => extract_from_archive(archive, file_data),
        _ => bail!("Unknown library format"),
    }
}

pub fn extract_from_elf(elf: Elf<'_>, file_data: &[u8]) -> anyhow::Result<Vec<Metadata>> {
    let mut extracted = ExtractedItems::new();
    let iter = elf
        .syms
        .iter()
        .filter_map(|sym| elf.section_headers.get(sym.st_shndx).map(|sh| (sym, sh)));

    for (sym, sh) in iter {
        let name = elf
            .strtab
            .get_at(sym.st_name)
            .context("Error getting symbol name")?;
        if is_metadata_symbol(name) {
            // Offset relative to the start of the section.
            let section_offset = sym.st_value - sh.sh_addr;
            // Offset relative to the start of the file contents
            extracted.extract_item(name, file_data, (sh.sh_offset + section_offset) as usize)?;
        }
    }
    Ok(extracted.into_metadata())
}

pub fn extract_from_pe(pe: PE<'_>, file_data: &[u8]) -> anyhow::Result<Vec<Metadata>> {
    let mut extracted = ExtractedItems::new();
    for export in pe.exports {
        if let Some(name) = export.name {
            if is_metadata_symbol(name) {
                extracted.extract_item(
                    name,
                    file_data,
                    export.offset.context("Error getting symbol offset")?,
                )?;
            }
        }
    }
    Ok(extracted.into_metadata())
}

pub fn extract_from_mach(mach: Mach<'_>, file_data: &[u8]) -> anyhow::Result<Vec<Metadata>> {
    match mach {
        Mach::Binary(macho) => extract_from_macho(macho, file_data),
        // Multi-binary library, just extract the first one
        Mach::Fat(multi_arch) => match multi_arch.get(0)? {
            SingleArch::MachO(macho) => extract_from_macho(macho, file_data),
            SingleArch::Archive(archive) => extract_from_archive(archive, file_data),
        },
    }
}

pub fn extract_from_macho(macho: MachO<'_>, file_data: &[u8]) -> anyhow::Result<Vec<Metadata>> {
    let mut sections: Vec<Section> = Vec::new();
    for sects in macho.segments.sections() {
        sections.extend(sects.map(|r| r.expect("section").0));
    }
    let mut extracted = ExtractedItems::new();
    sections.sort_by_key(|s| s.addr);

    // Iterate through the symbols.  This picks up symbols from the .o files embedded in a Darwin
    // archive.
    for (name, nlist) in macho.symbols().flatten() {
        // Check that the symbol:
        //   - Is global (exported)
        //   - Has type=N_SECT (it's regular data as opposed to something like
        //     "undefined" or "indirect")
        //   - Has a metadata symbol name
        if nlist.is_global() && nlist.get_type() == symbols::N_SECT && is_metadata_symbol(name) {
            let section = &sections[nlist.n_sect];
            // `nlist.n_value` is an address, so we can calculating the offset inside the section
            // using the difference between that and `section.addr`
            let offset = section.offset as usize + nlist.n_value as usize - section.addr as usize;
            extracted.extract_item(name, file_data, offset)?;
        }
    }

    // Iterate through the exports.  This picks up symbols from .dylib files.
    for export in macho.exports()? {
        let name = &export.name;
        if is_metadata_symbol(name) {
            extracted.extract_item(name, file_data, export.offset as usize)?;
        }
    }
    Ok(extracted.into_metadata())
}

pub fn extract_from_archive(
    archive: Archive<'_>,
    file_data: &[u8],
) -> anyhow::Result<Vec<Metadata>> {
    // Store the names of archive members that have metadata symbols in them
    let mut members_to_check: HashSet<&str> = HashSet::new();
    for (member_name, _, symbols) in archive.summarize() {
        for name in symbols {
            if is_metadata_symbol(name) {
                members_to_check.insert(member_name);
            }
        }
    }

    let mut items = vec![];
    for member_name in members_to_check {
        items.append(
            &mut extract_from_bytes(
                archive
                    .extract(member_name, file_data)
                    .with_context(|| format!("Failed to extract archive member `{member_name}`"))?,
            )
            .with_context(|| {
                format!("Failed to extract data from archive member `{member_name}`")
            })?,
        );
    }
    Ok(items)
}

/// Container for extracted metadata items
#[derive(Default)]
struct ExtractedItems {
    items: Vec<Metadata>,
    /// symbol names for the extracted items, we use this to ensure that we don't extract the same
    /// symbol twice
    names: HashSet<String>,
}

impl ExtractedItems {
    fn new() -> Self {
        Self::default()
    }

    fn extract_item(&mut self, name: &str, file_data: &[u8], offset: usize) -> anyhow::Result<()> {
        if self.names.contains(name) {
            // Already extracted this item
            return Ok(());
        }

        // Use the file data starting from offset, without specifying the end position.  We don't
        // always know the end position, because goblin reports the symbol size as 0 for PE and
        // MachO files.
        //
        // This works fine, because `MetadataReader` knows when the serialized data is terminated
        // and will just ignore the trailing data.
        let data = &file_data[offset..];
        self.items.push(Metadata::read(data)?);
        self.names.insert(name.to_string());
        Ok(())
    }

    fn into_metadata(self) -> Vec<Metadata> {
        self.items
    }
}

fn is_metadata_symbol(name: &str) -> bool {
    // Skip the "_" char that Darwin prepends, if present
    let name = name.strip_prefix('_').unwrap_or(name);
    name.starts_with("UNIFFI_META")
}