How can I get Serde to allocate strings from an arena during deserialization?

前端 未结 1 1927
时光取名叫无心
时光取名叫无心 2020-12-18 10:04

I have a struct with string fields. I\'d like to control how the memory for the strings is allocated. In particular, I\'d like to allocate them using something like copy_are

相关标签:
1条回答
  • 2020-12-18 10:26

    Here is one possible implementation that uses serde::de::DeserializeSeed to expose the arena allocator to the deserialization code.

    In a more elaborate use case you may want to write a procedural macro to generate such impls.


    #[macro_use]
    extern crate serde_derive;
    
    extern crate copy_arena;
    extern crate serde;
    extern crate serde_json;
    
    use std::fmt;
    use std::marker::PhantomData;
    use std::str;
    
    use serde::de::{self, DeserializeSeed, Deserializer, MapAccess, Visitor};
    
    use copy_arena::{Allocator, Arena};
    
    #[derive(Debug)]
    struct Jason<'a> {
        one: &'a str,
        two: &'a str,
    }
    
    struct ArenaSeed<'a, T> {
        allocator: Allocator<'a>,
        marker: PhantomData<fn() -> T>,
    }
    
    impl<'a, T> ArenaSeed<'a, T> {
        fn new(arena: &'a mut Arena) -> Self {
            ArenaSeed {
                allocator: arena.allocator(),
                marker: PhantomData,
            }
        }
    
        fn alloc_string(&mut self, owned: String) -> &'a str {
            let slice = self.allocator.alloc_slice(owned.as_bytes());
            // We know the bytes are valid UTF-8.
            str::from_utf8(slice).unwrap()
        }
    }
    
    impl<'de, 'a> DeserializeSeed<'de> for ArenaSeed<'a, Jason<'a>> {
        type Value = Jason<'a>;
    
        fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
        where
            D: Deserializer<'de>,
        {
            static FIELDS: &[&str] = &["one", "two"];
            deserializer.deserialize_struct("Jason", FIELDS, self)
        }
    }
    
    impl<'de, 'a> Visitor<'de> for ArenaSeed<'a, Jason<'a>> {
        type Value = Jason<'a>;
    
        fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
            formatter.write_str("struct Jason")
        }
    
        fn visit_map<A>(mut self, mut map: A) -> Result<Self::Value, A::Error>
        where
            A: MapAccess<'de>,
        {
            #[derive(Deserialize)]
            #[serde(field_identifier, rename_all = "lowercase")]
            enum Field { One, Two }
    
            let mut one = None;
            let mut two = None;
            while let Some(key) = map.next_key()? {
                match key {
                    Field::One => {
                        if one.is_some() {
                            return Err(de::Error::duplicate_field("one"));
                        }
                        one = Some(self.alloc_string(map.next_value()?));
                    }
                    Field::Two => {
                        if two.is_some() {
                            return Err(de::Error::duplicate_field("two"));
                        }
                        two = Some(self.alloc_string(map.next_value()?));
                    }
                }
            }
            let one = one.ok_or_else(|| de::Error::missing_field("one"))?;
            let two = two.ok_or_else(|| de::Error::missing_field("two"))?;
            Ok(Jason { one, two })
        }
    }
    
    fn main() {
        let j = r#" {"one": "I", "two": "II"} "#;
    
        let mut arena = Arena::new();
        let seed = ArenaSeed::new(&mut arena);
        let mut de = serde_json::Deserializer::from_str(j);
        let jason: Jason = seed.deserialize(&mut de).unwrap();
        println!("{:?}", jason);
    }
    

    If arena allocation is not a strict requirement and you just need to amortize the cost of string allocation across lots of deserialized objects, Deserialize::deserialize_in_place is a more concise alternative.

    // [dependencies]
    // serde = "1.0"
    // serde_derive = { version = "1.0", features = ["deserialize_in_place"] }
    // serde_json = "1.0"
    
    #[macro_use]
    extern crate serde_derive;
    
    extern crate serde;
    extern crate serde_json;
    
    use serde::Deserialize;
    
    #[derive(Deserialize, Debug)]
    struct Jason {
        one: String,
        two: String,
    }
    
    fn main() {
        let j = r#" {"one": "I", "two": "II"} "#;
    
        // Allocate some Strings during deserialization.
        let mut de = serde_json::Deserializer::from_str(j);
        let mut jason = Jason::deserialize(&mut de).unwrap();
        println!("{:?} {:p} {:p}", jason, jason.one.as_str(), jason.two.as_str());
    
        // Reuse the same String allocations for some new data.
        // As long as the strings in the new datum are at most as long as the
        // previous datum, the strings do not need to be reallocated and will
        // remain at the same memory address.
        let mut de = serde_json::Deserializer::from_str(j);
        Jason::deserialize_in_place(&mut de, &mut jason).unwrap();
        println!("{:?} {:p} {:p}", jason, jason.one.as_str(), jason.two.as_str());
    
        // Do not reuse the string allocations.
        // The strings here will not be at the same address as above.
        let mut de = serde_json::Deserializer::from_str(j);
        let jason = Jason::deserialize(&mut de).unwrap();
        println!("{:?} {:p} {:p}", jason, jason.one.as_str(), jason.two.as_str());
    }
    
    0 讨论(0)
提交回复
热议问题