fix: keeping optimize search (#2321)

2024-01-03 09:45:47 +08:00 · 2024-01-03 09:45:47 +08:00 · e0a1ef7a26
parent d8519a3856
commit e0a1ef7a26
60 changed files with 1308 additions and 1289 deletions
--- a/.gitignore
+++ b/.gitignore
@ -2,6 +2,8 @@
 .DS_Store
 .history
 report.json
+flamegraph.svg
+flamegraph.html

 # Generated by Cargo
 # will have compiled files and executables
--- a/Cargo.lock
+++ b/Cargo.lock
@ -2053,7 +2053,6 @@ dependencies = [
 "arrow-json",
 "arrow-schema",
 "aws-sdk-dynamodb",
- "blake3",
 "byteorder",
 "bytes",
 "chrono",
@ -2061,6 +2060,7 @@ dependencies = [
 "dotenv_config",
 "dotenvy",
 "getrandom",
+ "hashbrown 0.14.3",
 "hex",
 "indexmap 2.1.0",
 "itertools 0.12.0",
@ -2446,8 +2446,7 @@ checksum = "7e962a19be5cfc3f3bf6dd8f61eb50107f356ad6270fbb3ed41476571db78be5"
 [[package]]
 name = "datafusion"
 version = "34.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "193fd1e7628278d0641c5122860f9a7fd6a1d77d055838d12f55d15bbe28d4d0"
+source = "git+https://github.com/openobserve/arrow-datafusion.git?rev=45e5537ca43d2c2a6e55b9804073b191b337b9e5#45e5537ca43d2c2a6e55b9804073b191b337b9e5"
 dependencies = [
 "ahash 0.8.6",
 "arrow",
@ -2493,8 +2492,7 @@ dependencies = [
 [[package]]
 name = "datafusion-common"
 version = "34.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "548bc49c4a489e3de474813831ea556dc9d368f9ed8d867b1493da42e8e9f613"
+source = "git+https://github.com/openobserve/arrow-datafusion.git?rev=45e5537ca43d2c2a6e55b9804073b191b337b9e5#45e5537ca43d2c2a6e55b9804073b191b337b9e5"
 dependencies = [
 "ahash 0.8.6",
 "arrow",
@ -2513,8 +2511,7 @@ dependencies = [
 [[package]]
 name = "datafusion-execution"
 version = "34.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ecc865657ffcf4da5ff08bdc6436a9a833bc0aa96c3254c8d18ab8a0ad4e437d"
+source = "git+https://github.com/openobserve/arrow-datafusion.git?rev=45e5537ca43d2c2a6e55b9804073b191b337b9e5#45e5537ca43d2c2a6e55b9804073b191b337b9e5"
 dependencies = [
 "arrow",
 "chrono",
@ -2534,8 +2531,7 @@ dependencies = [
 [[package]]
 name = "datafusion-expr"
 version = "34.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "33c473f72d8d81a532e63f6e562ed66dd9209dfd8e433d9712abd42444ee161e"
+source = "git+https://github.com/openobserve/arrow-datafusion.git?rev=45e5537ca43d2c2a6e55b9804073b191b337b9e5#45e5537ca43d2c2a6e55b9804073b191b337b9e5"
 dependencies = [
 "ahash 0.8.6",
 "arrow",
@ -2550,8 +2546,7 @@ dependencies = [
 [[package]]
 name = "datafusion-optimizer"
 version = "34.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cb6218318001d2f6783b7fffa17592318f65f26609d7aab605a3dd0c7c2e2618"
+source = "git+https://github.com/openobserve/arrow-datafusion.git?rev=45e5537ca43d2c2a6e55b9804073b191b337b9e5#45e5537ca43d2c2a6e55b9804073b191b337b9e5"
 dependencies = [
 "arrow",
 "async-trait",
@ -2568,8 +2563,7 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-expr"
 version = "34.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9e1ca7e35ca22f9dc506c2375b92054b03ccf91afe25c0a90b395a1473a09735"
+source = "git+https://github.com/openobserve/arrow-datafusion.git?rev=45e5537ca43d2c2a6e55b9804073b191b337b9e5#45e5537ca43d2c2a6e55b9804073b191b337b9e5"
 dependencies = [
 "ahash 0.8.6",
 "arrow",
@ -2602,8 +2596,7 @@ dependencies = [
 [[package]]
 name = "datafusion-physical-plan"
 version = "34.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ddde97adefcca3a55257c646ffee2a95b6cac66f74d1146a6e3a6dbb37830631"
+source = "git+https://github.com/openobserve/arrow-datafusion.git?rev=45e5537ca43d2c2a6e55b9804073b191b337b9e5#45e5537ca43d2c2a6e55b9804073b191b337b9e5"
 dependencies = [
 "ahash 0.8.6",
 "arrow",
@ -2633,8 +2626,7 @@ dependencies = [
 [[package]]
 name = "datafusion-sql"
 version = "34.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a60d9d6460a64fddb8663db41da97e6b8b0bf79da42f997ebe81722731eaf0e5"
+source = "git+https://github.com/openobserve/arrow-datafusion.git?rev=45e5537ca43d2c2a6e55b9804073b191b337b9e5#45e5537ca43d2c2a6e55b9804073b191b337b9e5"
 dependencies = [
 "arrow",
 "arrow-schema",
@ -3759,6 +3751,7 @@ dependencies = [
 "bytes",
 "chrono",
 "config",
+ "futures",
 "hashbrown 0.14.3",
 "indexmap 2.1.0",
 "itertools 0.12.0",
@ -4817,6 +4810,7 @@ dependencies = [
 "get_if_addrs",
 "getrandom",
 "glob",
+ "hashbrown 0.14.3",
 "hashlink",
 "hex",
 "http-auth-basic",
--- a/Cargo.toml
+++ b/Cargo.toml
@ -76,8 +76,10 @@ clap = { version = "4.1", default-features = false, features = [
 cloudevents-sdk = { version = "0.7.0", features = ["actix"] }
 csv = "1.2.1"
 dashmap = { version = "5.4", features = ["serde"] }
-datafusion = { version = "34", features = ["simd"] }
-datafusion-expr = "34"
+datafusion = { git = "https://github.com/openobserve/arrow-datafusion.git", rev = "45e5537ca43d2c2a6e55b9804073b191b337b9e5", version = "34", features = [
+  "simd",
+] }
+datafusion-expr = { git = "https://github.com/openobserve/arrow-datafusion.git", rev = "45e5537ca43d2c2a6e55b9804073b191b337b9e5", version = "34" }
 arrow = { version = "49", features = ["simd", "ipc_compression"] }
 arrow-schema = { version = "49", features = ["serde"] }
 parquet = { version = "49", features = ["arrow", "async"] }
@ -91,6 +93,7 @@ flate2 = { version = "1.0", features = ["zlib"] }
 futures = "0.3"
 get_if_addrs = "0.5"
 glob = "0.3"
+hashbrown = { version = "0.14.3", features = ["serde"] }
 hashlink = "0.8.4"
 hex = "0.4"
 http-auth-basic = "0.3"
@ -207,23 +210,17 @@ actix-web-prometheus = { version = "0.1", features = ["process"] }
 anyhow = "1.0"
 argon2 = { version = "0.5", features = ["alloc", "password-hash"] }
 async-trait = "0.1"
-async-recursion = "1.0"
-awc = "3.2"
 aws-config = "0.56.1"
 aws-sdk-dynamodb = "0.30.0"
 base64 = "0.21"
-blake3 = { version = "1.4", features = ["rayon"] }
 bytes = "1.4"
 byteorder = "1.4.3"
 chrono = { version = "0.4", default-features = false, features = ["clock"] }
 dashmap = { version = "5.4", features = ["serde"] }
-datafusion = { version = "34", features = ["simd"] }
-datafusion-expr = "34"
 arrow = { version = "49", features = ["simd", "ipc_compression"] }
 arrow-json = "49"
 arrow-schema = { version = "49", features = ["serde"] }
 parquet = { version = "49", features = ["arrow", "async"] }
-object_store = { version = "0.8", features = ["aws", "azure", "gcp"] }
 dotenv_config = "0.1.7"
 dotenvy = "0.15"
 faststr = "0.2"
@ -249,19 +246,9 @@ rs-snowflake = "0.6"
 segment = "0.2"
 serde = { version = "1", features = ["derive"] }
 serde_json = "1"
-simd-json = "0.13"
 sha256 = "1.4.0"
-sled = "0.34"
 snafu = "0.7.5"
 snap = "1"
-sqlparser = { version = "0.40", features = ["serde", "visitor"] }
-sqlx = { version = "0.7", features = [
-  "runtime-tokio-rustls",
-  "postgres",
-  "mysql",
-  "sqlite",
-  "chrono",
-] }
 sysinfo = "0.29"
 tempfile = "3"
 thiserror = "1.0"
--- a/rustfmt.toml
+++ b/rustfmt.toml
@ -1,7 +1,7 @@
 version = "Two"
 unstable_features = true

-comment_width = 80
+comment_width = 100
 wrap_comments = true
 format_code_in_doc_comments = true
 format_macro_bodies = true
--- a/src/common/infra/file_list/mysql.rs
+++ b/src/common/infra/file_list/mysql.rs
@ -184,14 +184,19 @@ INSERT IGNORE INTO file_list (org, stream, date, file, deleted, min_ts, max_ts,
            for file in files {
                let (stream_key, date_key, file_name) =
                    parse_file_key_columns(file).map_err(|e| Error::Message(e.to_string()))?;
-                let ret: Option<i64> = sqlx::query_scalar(
+                let ret: Option<i64> = match sqlx::query_scalar(
                    r#"SELECT id FROM file_list WHERE stream = ? AND date = ? AND file = ?"#,
                )
                .bind(stream_key)
                .bind(date_key)
                .bind(file_name)
                .fetch_one(&pool)
-                .await?;
+                .await
+                {
+                    Ok(v) => v,
+                    Err(sqlx::Error::RowNotFound) => continue,
+                    Err(e) => return Err(e.into()),
+                };
                match ret {
                    Some(v) => ids.push(v.to_string()),
                    None => {
@ -202,8 +207,10 @@ INSERT IGNORE INTO file_list (org, stream, date, file, deleted, min_ts, max_ts,
                }
            }
            // delete files by ids
-            let sql = format!("DELETE FROM file_list WHERE id IN({});", ids.join(","));
-            _ = pool.execute(sql.as_str()).await?;
+            if !ids.is_empty() {
+                let sql = format!("DELETE FROM file_list WHERE id IN({});", ids.join(","));
+                _ = pool.execute(sql.as_str()).await?;
+            }
        }
        Ok(())
    }
@ -259,13 +266,19 @@ INSERT IGNORE INTO file_list (org, stream, date, file, deleted, min_ts, max_ts,
            for file in files {
                let (stream_key, date_key, file_name) =
                    parse_file_key_columns(file).map_err(|e| Error::Message(e.to_string()))?;
-                let ret: Option<i64> = sqlx::query_scalar(
+                let ret: Option<i64> = match sqlx::query_scalar(
                    r#"SELECT id FROM file_list_deleted WHERE stream = ? AND date = ? AND file = ?"#,
                )
                .bind(stream_key)
                .bind(date_key)
                .bind(file_name)
-                .fetch_one(&pool).await?;
+                .fetch_one(&pool)
+                .await
+                {
+                    Ok(v) => v,
+                    Err(sqlx::Error::RowNotFound) => continue,
+                    Err(e) => return Err(e.into()),
+                };
                match ret {
                    Some(v) => ids.push(v.to_string()),
                    None => {
@ -277,11 +290,13 @@ INSERT IGNORE INTO file_list (org, stream, date, file, deleted, min_ts, max_ts,
                }
            }
            // delete files by ids
-            let sql = format!(
-                "DELETE FROM file_list_deleted WHERE id IN({});",
-                ids.join(",")
-            );
-            _ = pool.execute(sql.as_str()).await?;
+            if !ids.is_empty() {
+                let sql = format!(
+                    "DELETE FROM file_list_deleted WHERE id IN({});",
+                    ids.join(",")
+                );
+                _ = pool.execute(sql.as_str()).await?;
+            }
        }
        Ok(())
    }
--- a/src/common/infra/file_list/postgres.rs
+++ b/src/common/infra/file_list/postgres.rs
@ -185,14 +185,19 @@ INSERT INTO file_list (org, stream, date, file, deleted, min_ts, max_ts, records
            for file in files {
                let (stream_key, date_key, file_name) =
                    parse_file_key_columns(file).map_err(|e| Error::Message(e.to_string()))?;
-                let ret: Option<i64> = sqlx::query_scalar(
+                let ret: Option<i64> = match sqlx::query_scalar(
                    r#"SELECT id FROM file_list WHERE stream = $1 AND date = $2 AND file = $3;"#,
                )
                .bind(stream_key)
                .bind(date_key)
                .bind(file_name)
                .fetch_one(&pool)
-                .await?;
+                .await
+                {
+                    Ok(v) => v,
+                    Err(sqlx::Error::RowNotFound) => continue,
+                    Err(e) => return Err(e.into()),
+                };
                match ret {
                    Some(v) => ids.push(v.to_string()),
                    None => {
@ -204,8 +209,10 @@ INSERT INTO file_list (org, stream, date, file, deleted, min_ts, max_ts, records
                }
            }
            // delete files by ids
-            let sql = format!("DELETE FROM file_list WHERE id IN({});", ids.join(","));
-            _ = pool.execute(sql.as_str()).await?;
+            if !ids.is_empty() {
+                let sql = format!("DELETE FROM file_list WHERE id IN({});", ids.join(","));
+                _ = pool.execute(sql.as_str()).await?;
+            }
        }
        Ok(())
    }
@ -264,14 +271,19 @@ INSERT INTO file_list (org, stream, date, file, deleted, min_ts, max_ts, records
            for file in files {
                let (stream_key, date_key, file_name) =
                    parse_file_key_columns(file).map_err(|e| Error::Message(e.to_string()))?;
-                let ret: Option<i64> = sqlx::query_scalar(
+                let ret: Option<i64> = match sqlx::query_scalar(
                    r#"SELECT id FROM file_list_deleted WHERE stream = $1 AND date = $2 AND file = $3;"#,
                )
                .bind(stream_key)
                .bind(date_key)
                .bind(file_name)
                .fetch_one(&pool)
-                .await?;
+                .await
+                {
+                    Ok(v) => v,
+                    Err(sqlx::Error::RowNotFound) => continue,
+                    Err(e) => return Err(e.into()),
+                };
                match ret {
                    Some(v) => ids.push(v.to_string()),
                    None => {
@ -283,11 +295,13 @@ INSERT INTO file_list (org, stream, date, file, deleted, min_ts, max_ts, records
                }
            }
            // delete files by ids
-            let sql = format!(
-                "DELETE FROM file_list_deleted WHERE id IN({});",
-                ids.join(",")
-            );
-            _ = pool.execute(sql.as_str()).await?;
+            if !ids.is_empty() {
+                let sql = format!(
+                    "DELETE FROM file_list_deleted WHERE id IN({});",
+                    ids.join(",")
+                );
+                _ = pool.execute(sql.as_str()).await?;
+            }
        }
        Ok(())
    }
--- a/src/common/infra/file_list/sqlite.rs
+++ b/src/common/infra/file_list/sqlite.rs
@ -469,6 +469,9 @@ INSERT INTO file_list (org, stream, date, file, deleted, min_ts, max_ts, records
 }

 pub async fn batch_add(client: &Pool<Sqlite>, files: &[FileKey]) -> Result<()> {
+    if files.is_empty() {
+        return Ok(());
+    }
    let chunks = files.chunks(100);
    for files in chunks {
        let mut tx = client.begin().await?;
@ -529,6 +532,9 @@ pub async fn batch_add(client: &Pool<Sqlite>, files: &[FileKey]) -> Result<()> {
 }

 pub async fn batch_remove(client: &Pool<Sqlite>, files: &[String]) -> Result<()> {
+    if files.is_empty() {
+        return Ok(());
+    }
    let chunks = files.chunks(100);
    for files in chunks {
        // get ids of the files
@ -537,14 +543,19 @@ pub async fn batch_remove(client: &Pool<Sqlite>, files: &[String]) -> Result<()>
        for file in files {
            let (stream_key, date_key, file_name) =
                parse_file_key_columns(file).map_err(|e| Error::Message(e.to_string()))?;
-            let ret: Option<i64> = sqlx::query_scalar(
+            let ret: Option<i64> = match sqlx::query_scalar(
                r#"SELECT id FROM file_list WHERE stream = $1 AND date = $2 AND file = $3;"#,
            )
            .bind(stream_key)
            .bind(date_key)
            .bind(file_name)
            .fetch_one(&pool)
-            .await?;
+            .await
+            {
+                Ok(v) => v,
+                Err(sqlx::Error::RowNotFound) => continue,
+                Err(e) => return Err(e.into()),
+            };
            match ret {
                Some(v) => ids.push(v.to_string()),
                None => {
@ -555,8 +566,10 @@ pub async fn batch_remove(client: &Pool<Sqlite>, files: &[String]) -> Result<()>
            }
        }
        // delete files by ids
-        let sql = format!("DELETE FROM file_list WHERE id IN({});", ids.join(","));
-        _ = pool.execute(sql.as_str()).await?;
+        if !ids.is_empty() {
+            let sql = format!("DELETE FROM file_list WHERE id IN({});", ids.join(","));
+            _ = pool.execute(sql.as_str()).await?;
+        }
    }
    Ok(())
 }
@ -597,6 +610,9 @@ pub async fn batch_add_deleted(
 }

 pub async fn batch_remove_deleted(client: &Pool<Sqlite>, files: &[String]) -> Result<()> {
+    if files.is_empty() {
+        return Ok(());
+    }
    let chunks = files.chunks(100);
    for files in chunks {
        // get ids of the files
@ -605,14 +621,19 @@ pub async fn batch_remove_deleted(client: &Pool<Sqlite>, files: &[String]) -> Re
        for file in files {
            let (stream_key, date_key, file_name) =
                parse_file_key_columns(file).map_err(|e| Error::Message(e.to_string()))?;
-            let ret: Option<i64> = sqlx::query_scalar(
+            let ret: Option<i64> = match sqlx::query_scalar(
                r#"SELECT id FROM file_list_deleted WHERE stream = $1 AND date = $2 AND file = $3;"#,
            )
            .bind(stream_key)
            .bind(date_key)
            .bind(file_name)
            .fetch_one(&pool)
-            .await?;
+            .await
+            {
+                Ok(v) => v,
+                Err(sqlx::Error::RowNotFound) => continue,
+                Err(e) => return Err(e.into()),
+            };
            match ret {
                Some(v) => ids.push(v.to_string()),
                None => {
@ -624,11 +645,13 @@ pub async fn batch_remove_deleted(client: &Pool<Sqlite>, files: &[String]) -> Re
            }
        }
        // delete files by ids
-        let sql = format!(
-            "DELETE FROM file_list_deleted WHERE id IN({});",
-            ids.join(",")
-        );
-        _ = pool.execute(sql.as_str()).await?;
+        if !ids.is_empty() {
+            let sql = format!(
+                "DELETE FROM file_list_deleted WHERE id IN({});",
+                ids.join(",")
+            );
+            _ = pool.execute(sql.as_str()).await?;
+        }
    }
    Ok(())
 }
--- a/src/common/meta/stream.rs
+++ b/src/common/meta/stream.rs
@ -343,9 +343,7 @@ impl StreamParams {
 pub struct SchemaEvolution {
    pub schema_compatible: bool,
    pub types_delta: Option<Vec<Field>>,
-    pub schema_fields: Vec<Field>,
    pub is_schema_changed: bool,
-    pub record_schema: Schema,
 }

 pub struct SchemaRecords {
--- a/src/common/utils/flatten.rs
+++ b/src/common/utils/flatten.rs
@ -16,7 +16,6 @@
 use serde_json::value::{Map, Value};

 const KEY_SEPARATOR: &str = "_";
-const FORMAT_KEY_ENABLED: bool = true;

 /// Flattens the provided JSON object (`current`).
 ///
@ -27,7 +26,28 @@ const FORMAT_KEY_ENABLED: bool = true;
 /// # Errors
 /// Will return `Err` if `to_flatten` it's not an object, or if flattening the
 /// object would result in two or more keys colliding.
-pub fn flatten(to_flatten: &Value) -> Result<Value, anyhow::Error> {
+pub fn flatten(to_flatten: Value) -> Result<Value, anyhow::Error> {
+    // quick check to see if we have an object`
+    let to_flatten = match to_flatten {
+        Value::Object(v) => {
+            if v.is_empty() || !v.iter().any(|(_k, v)| v.is_object() || v.is_array()) {
+                if v.iter().all(|(k, _v)| check_key(k)) {
+                    return Ok(Value::Object(v));
+                }
+                let mut formatted_map = Map::<String, Value>::with_capacity(v.len());
+                for (mut k, v) in v.into_iter() {
+                    format_key(&mut k);
+                    formatted_map.insert(k, v);
+                }
+                return Ok(Value::Object(formatted_map));
+            }
+            Value::Object(v)
+        }
+        _ => {
+            return Err(anyhow::anyhow!("flatten value must be an object"));
+        }
+    };
+
    let mut flat = Map::<String, Value>::new();
    flatten_value(to_flatten, "".to_owned(), 0, &mut flat).map(|_x| Value::Object(flat))
 }
@ -36,38 +56,21 @@ pub fn flatten(to_flatten: &Value) -> Result<Value, anyhow::Error> {
 /// its 0-based depth is `depth`.  The result is stored in the JSON object
 /// `flattened`.
 fn flatten_value(
-    current: &Value,
+    current: Value,
    parent_key: String,
    depth: u32,
    flattened: &mut Map<String, Value>,
 ) -> Result<(), anyhow::Error> {
-    if depth == 0 {
-        match current {
-            Value::Object(map) => {
-                if map.is_empty() {
-                    return Ok(()); // If the top level input object is empty there is nothing to do
-                }
-            }
-            _ => return Err(anyhow::anyhow!("flatten value must be an object")),
+    match current {
+        Value::Object(map) => {
+            flatten_object(map, &parent_key, depth, flattened)?;
        }
-    }
-
-    if let Some(current) = current.as_object() {
-        flatten_object(current, &parent_key, depth, flattened)?;
-    } else if let Some(current) = current.as_array() {
-        flatten_array(current, &parent_key, depth, flattened)?;
-    } else {
-        if flattened.contains_key(&parent_key) {
-            // log::error!(
-            //     "flatten will be overwritten current: {:?}, new key: {}, val:
-            // {}, ",     flattened,
-            //     parent_key,
-            //     current.clone(),
-            // );
-            // return Err(anyhow::anyhow!( "flatten will be overwritten a key
-            // {}", parent_key));
+        Value::Array(arr) => {
+            flatten_array(arr, &parent_key, depth, flattened)?;
+        }
+        _ => {
+            flattened.insert(parent_key, current);
        }
-        flattened.insert(parent_key, current.clone());
    }
    Ok(())
 }
@ -76,17 +79,13 @@ fn flatten_value(
 /// 0-based depth is `depth`.  The result is stored in the JSON object
 /// `flattened`.
 fn flatten_object(
-    current: &Map<String, Value>,
+    current: Map<String, Value>,
    parent_key: &str,
    depth: u32,
    flattened: &mut Map<String, Value>,
 ) -> Result<(), anyhow::Error> {
-    for (k, v) in current.iter() {
-        let k = if FORMAT_KEY_ENABLED {
-            format_key(k)
-        } else {
-            k.to_string()
-        };
+    for (mut k, v) in current.into_iter() {
+        format_key(&mut k);
        let parent_key = if depth > 0 {
            format!("{}{}{}", parent_key, KEY_SEPARATOR, k)
        } else {
@ -101,7 +100,7 @@ fn flatten_object(
 /// 0-based depth is `depth`.  The result is stored in the JSON object
 /// `flattened`.
 fn flatten_array(
-    current: &[Value],
+    current: Vec<Value>,
    parent_key: &str,
    depth: u32,
    flattened: &mut Map<String, Value>,
@ -114,30 +113,32 @@ fn flatten_array(
    //     flatten_value(obj, parent_key, depth + 1, flattened)?;
    // }
    let v = Value::String(Value::Array(current.to_vec()).to_string());
-    flatten_value(&v, parent_key.to_string(), depth, flattened)?;
+    flatten_value(v, parent_key.to_string(), depth, flattened)?;
    Ok(())
 }

 /// We need every character in the key to be lowercase alphanumeric or
 /// underscore
-pub fn format_key(key: &str) -> String {
-    if key
-        .chars()
-        .all(|c| c.is_lowercase() || c.is_numeric() || c == '_')
-    {
-        return key.to_string();
+pub fn format_key(key: &mut String) {
+    if check_key(key) {
+        return;
    }
+    let mut key_chars = key.chars().collect::<Vec<_>>();
+    for c in key_chars.iter_mut() {
+        if c.is_lowercase() || c.is_numeric() {
+            continue;
+        } else if c.is_uppercase() {
+            *c = c.to_lowercase().next().unwrap();
+        } else {
+            *c = '_';
+        }
+    }
+    *key = key_chars.into_iter().collect::<String>();
+}
+
+fn check_key(key: &str) -> bool {
    key.chars()
-        .map(|c| {
-            if c.is_lowercase() || c.is_numeric() {
-                c
-            } else if c.is_uppercase() {
-                c.to_lowercase().next().unwrap()
-            } else {
-                '_'
-            }
-        })
-        .collect::<String>()
+        .all(|c| c.is_lowercase() || c.is_numeric() || c == '_')
 }

 #[cfg(test)]
@ -146,10 +147,42 @@ mod tests {

    use super::*;

+    #[test]
+    fn test_check_key_lowercase() {
+        assert_eq!(check_key("hello"), true);
+    }
+
+    #[test]
+    fn test_check_key_numeric() {
+        assert_eq!(check_key("123"), true);
+    }
+
+    #[test]
+    fn test_check_key_underscore() {
+        assert_eq!(check_key("my_key"), true);
+    }
+
+    #[test]
+    fn test_check_key_mixed_case() {
+        assert_eq!(check_key("Hello_World"), false);
+    }
+
+    #[test]
+    fn test_check_key_special_characters() {
+        assert_eq!(check_key("key!"), false);
+    }
+
    #[test]
    fn object_with_plain_values() {
        let obj = json!({"int": 1, "float": 2.0, "str": "a", "bool": true, "null": null});
-        assert_eq!(obj, flatten(&obj).unwrap());
+        assert_eq!(obj, flatten(obj.clone()).unwrap());
+    }
+
+    #[test]
+    fn object_with_plain_values_with_format_key() {
+        let obj = json!({"int": 1, "float": 2.0, "str": "a", "bool": true, "null": null});
+        let obj2 = json!({"int": 1, "Float": 2.0, "str": "a", "bool": true, "null": null});
+        assert_eq!(obj, flatten(obj2).unwrap());
    }

    /// Ensures that when using `ArrayFormatting::Plain` both arrays and objects
@ -158,7 +191,7 @@ mod tests {
    fn array_formatting_plain() {
        let obj = json!({"s": {"a": [1, 2.0, "b", null, true]}});
        assert_eq!(
-            flatten(&obj).unwrap(),
+            flatten(obj).unwrap(),
            json!({
                format!("s{k}a", k = KEY_SEPARATOR): "[1,2.0,\"b\",null,true]",
            })
@ -169,7 +202,7 @@ mod tests {
    fn nested_single_key_value() {
        let obj = json!({"key": "value", "nested_key": {"key": "value"}});
        assert_eq!(
-            flatten(&obj).unwrap(),
+            flatten(obj).unwrap(),
            json!({"key": "value", "nested_key_key": "value"}),
        );
    }
@ -178,7 +211,7 @@ mod tests {
    fn nested_multiple_key_value() {
        let obj = json!({"key": "value", "nested_key": {"key1": "value1", "key2": "value2"}});
        assert_eq!(
-            flatten(&obj).unwrap(),
+            flatten(obj).unwrap(),
            json!({"key": "value", "nested_key_key1": "value1", "nested_key_key2": "value2"}),
        );
    }
@ -198,7 +231,7 @@ mod tests {
            ]
        });
        assert_eq!(
-            flatten(&obj).unwrap(),
+            flatten(obj).unwrap(),
            json!({"simple_key": "simple_value", "key": "[\"value1\",{\"key\":\"value2\"},{\"nested_array\":[\"nested1\",\"nested2\",[\"nested3\",\"nested4\"]]}]"}),
        );
    }
@ -218,20 +251,20 @@ mod tests {
    #[test]
    fn empty_array() {
        let obj = json!({"key": []});
-        assert_eq!(flatten(&obj).unwrap(), json!({}));
+        assert_eq!(flatten(obj).unwrap(), json!({}));
    }

    /// Ensure that empty objects are not present in the result
    #[test]
    fn empty_object() {
        let obj = json!({"key": {}});
-        assert_eq!(flatten(&obj).unwrap(), json!({}));
+        assert_eq!(flatten(obj).unwrap(), json!({}));
    }

    #[test]
    fn empty_top_object() {
        let obj = json!({});
-        assert_eq!(flatten(&obj).unwrap(), json!({}));
+        assert_eq!(flatten(obj).unwrap(), json!({}));
    }

    /// Ensure that if all the end values of the JSON object are either `[]` or
@ -240,7 +273,7 @@ mod tests {
    fn empty_complex_object() {
        let obj = json!({"key": {"key2": {}, "key3": [[], {}, {"k": {}, "q": []}]}});
        assert_eq!(
-            flatten(&obj).unwrap(),
+            flatten(obj).unwrap(),
            json!({"key_key3": "[[],{},{\"k\":{},\"q\":[]}]"})
        );
    }
@ -248,25 +281,25 @@ mod tests {
    #[test]
    fn nested_object_with_empty_array_and_string() {
        let obj = json!({"key": {"key2": [], "key3": "a"}});
-        assert_eq!(flatten(&obj).unwrap(), json!({"key_key3": "a"}));
+        assert_eq!(flatten(obj).unwrap(), json!({"key_key3": "a"}));
    }

    #[test]
    fn nested_object_with_empty_object_and_string() {
        let obj = json!({"key": {"key2": {}, "key3": "a"}});
-        assert_eq!(flatten(&obj).unwrap(), json!({"key_key3": "a"}));
+        assert_eq!(flatten(obj).unwrap(), json!({"key_key3": "a"}));
    }

    #[test]
    fn empty_string_as_key() {
        let obj = json!({"key": {"": "a"}});
-        assert_eq!(flatten(&obj).unwrap(), json!({"key_": "a"}));
+        assert_eq!(flatten(obj).unwrap(), json!({"key_": "a"}));
    }

    #[test]
    fn empty_string_as_key_multiple_times() {
        let obj = json!({"key": {"": {"": {"": "a"}}}});
-        assert_eq!(flatten(&obj).unwrap(), json!({"key___": "a"}));
+        assert_eq!(flatten(obj).unwrap(), json!({"key___": "a"}));
    }

    /// Flattening only makes sense for objects. Passing something else must
@ -279,7 +312,7 @@ mod tests {
        let null = json!(null);
        let array = json!([1, 2, 3]);

-        for j in [integer, string, boolean, null, array].iter() {
+        for j in [integer, string, boolean, null, array].into_iter() {
            let res = flatten(j);
            match res {
                Err(_) => {} // Good
@ -291,7 +324,7 @@ mod tests {
    #[test]
    fn complex_array() {
        let obj = json!({"a": [1, [2, [3, 4], 5], 6]});
-        assert_eq!(flatten(&obj).unwrap(), json!({"a": "[1,[2,[3,4],5],6]"}));
+        assert_eq!(flatten(obj).unwrap(), json!({"a": "[1,[2,[3,4],5],6]"}));
    }

    #[test]
@ -302,16 +335,16 @@ mod tests {
                json!({"key": "value", "nested_key_key": "value", "nested_key_foo": "bar"}),
            ),
            (
-                json!({"key+bar": "value", "@nested_key": {"key": "value", "Foo": "Bar"}}),
-                json!({"key_bar": "value", "_nested_key_key": "value", "_nested_key_foo": "Bar"}),
+                json!({"key+bar": "value", "@nested_key": {"#key": "value", "&Foo": "Bar"}}),
+                json!({"key_bar": "value", "_nested_key__key": "value", "_nested_key__foo": "Bar"}),
            ),
            (
                json!({"a": {"A.1": [1, [3, 4], 5], "A_2": 6}}),
                json!({"a_a_1": "[1,[3,4],5]", "a_a_2": 6}),
            ),
        ];
-        for (input, expected) in datas.iter() {
-            assert_eq!(flatten(input).unwrap(), *expected);
+        for (input, expected) in datas.into_iter() {
+            assert_eq!(flatten(input).unwrap(), expected);
        }
    }

@ -350,7 +383,7 @@ mod tests {
            "phonenumbers":"[{\"number\":\"555-555-1234\",\"type\":\"home\"},{\"number\":\"555-555-5678\",\"type\":\"work\"}]"
        });

-        let output = flatten(&input).unwrap();
+        let output = flatten(input).unwrap();
        assert_eq!(output, expected_output);
    }
 }
--- a/src/config/Cargo.toml
+++ b/src/config/Cargo.toml
@ -11,13 +11,13 @@ anyhow.workspace = true
 arrow-json.workspace = true
 arrow-schema.workspace = true
 aws-sdk-dynamodb.workspace = true
-blake3.workspace = true
 bytes.workspace = true
 byteorder.workspace = true
 chrono.workspace = true
 dashmap.workspace = true
 dotenv_config.workspace = true
 dotenvy.workspace = true
+hashbrown.workspace = true
 hex.workspace = true
 indexmap.workspace = true
 itertools.workspace = true
--- a/src/config/src/config.rs
+++ b/src/config/src/config.rs
@ -278,6 +278,8 @@ pub struct Common {
    pub bloom_filter_enabled: bool,
    #[env_config(name = "ZO_BLOOM_FILTER_DEFAULT_FIELDS", default = "")]
    pub bloom_filter_default_fields: String,
+    #[env_config(name = "ZO_BLOOM_FILTER_FORCE_DISABLED", default = false)]
+    pub bloom_filter_force_disabled: bool,
    #[env_config(name = "ZO_TRACING_ENABLED", default = false)]
    pub tracing_enabled: bool,
    #[env_config(name = "OTEL_OTLP_HTTP_ENDPOINT", default = "")]
@ -361,10 +363,17 @@ pub struct Limit {
    pub req_json_limit: usize,
    #[env_config(name = "ZO_PAYLOAD_LIMIT", default = 209715200)]
    pub req_payload_limit: usize,
-    #[env_config(name = "ZO_MAX_FILE_SIZE_ON_DISK", default = 32)] // MB
-    pub max_file_size_on_disk: u64,
    #[env_config(name = "ZO_MAX_FILE_RETENTION_TIME", default = 600)] // seconds
    pub max_file_retention_time: u64,
+    #[env_config(name = "ZO_MAX_FILE_SIZE_ON_DISK", default = 64)] // MB, per log file size on disk
+    pub max_file_size_on_disk: usize,
+    #[env_config(name = "ZO_MEM_FILE_MAX_SIZE", default = 256)] // MB, per log file size in memory
+    pub mem_file_max_size: usize,
+    #[env_config(name = "ZO_MEM_TABLE_MAX_SIZE", default = 0)]
+    // MB, total file size in memory, default is 50% of system memory
+    pub mem_table_max_size: usize,
+    #[env_config(name = "ZO_MEM_PERSIST_INTERVAL", default = 5)] // seconds
+    pub mem_persist_interval: u64,
    #[env_config(name = "ZO_FILE_PUSH_INTERVAL", default = 10)] // seconds
    pub file_push_interval: u64,
    #[env_config(name = "ZO_FILE_MOVE_THREAD_NUM", default = 0)]
@ -607,9 +616,9 @@ pub fn init() -> Config {
    if cfg.limit.query_thread_num == 0 {
        cfg.limit.query_thread_num = cpu_num * 4;
    }
-    // HACK for move_file_thread_num equal to CPU core
+    // HACK for move_file_thread_num equal to CPU core * 2
    if cfg.limit.file_move_thread_num == 0 {
-        cfg.limit.file_move_thread_num = cpu_num;
+        cfg.limit.file_move_thread_num = cpu_num * 2;
    }

    // check common config
@ -623,7 +632,7 @@ pub fn init() -> Config {
    }

    // check memeory cache
-    if let Err(e) = check_memory_cache_config(&mut cfg) {
+    if let Err(e) = check_memory_config(&mut cfg) {
        panic!("memory cache config error: {e}");
    }

@ -840,7 +849,7 @@ fn check_sled_config(cfg: &mut Config) -> Result<(), anyhow::Error> {
    Ok(())
 }

-fn check_memory_cache_config(cfg: &mut Config) -> Result<(), anyhow::Error> {
+fn check_memory_config(cfg: &mut Config) -> Result<(), anyhow::Error> {
    let mem_total = cgroup::get_memory_limit();
    cfg.limit.mem_total = mem_total;
    if cfg.memory_cache.max_size == 0 {
@ -867,6 +876,14 @@ fn check_memory_cache_config(cfg: &mut Config) -> Result<(), anyhow::Error> {
    } else {
        cfg.memory_cache.datafusion_max_size *= 1024 * 1024;
    }
+
+    // for memtable limit check
+    cfg.limit.mem_file_max_size *= 1024 * 1024;
+    if cfg.limit.mem_table_max_size == 0 {
+        cfg.limit.mem_table_max_size = mem_total / 2; // 50%
+    } else {
+        cfg.limit.mem_table_max_size *= 1024 * 1024;
+    }
    Ok(())
 }

@ -1000,7 +1017,7 @@ mod tests {

        cfg.memory_cache.max_size = 1024;
        cfg.memory_cache.release_size = 1024;
-        check_memory_cache_config(&mut cfg).unwrap();
+        check_memory_config(&mut cfg).unwrap();
        assert_eq!(cfg.memory_cache.max_size, 1024 * 1024 * 1024);
        assert_eq!(cfg.memory_cache.release_size, 1024 * 1024 * 1024);

--- a/src/config/src/utils/hasher.rs
+++ b/src/config/src/utils/hasher.rs
@ -1,89 +0,0 @@
-// Copyright 2023 Zinc Labs Inc.
-//
-// This program is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Affero General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// This program is distributed in the hope that it will be useful
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU Affero General Public License for more details.
-//
-// You should have received a copy of the GNU Affero General Public License
-// along with this program.  If not, see <http://www.gnu.org/licenses/>.
-
-use arrow_schema::{Field, Schema};
-use itertools::Itertools;
-
-use super::schema_ext::SchemaExt;
-
-#[derive(Debug, Default, Clone, PartialEq, Eq, Hash)]
-pub struct Signature(pub [u8; 32]);
-
-impl From<Signature> for String {
-    fn from(sig: Signature) -> Self {
-        hex::encode(sig.0)
-    }
-}
-
-pub fn get_fields_key(fields: &[Field]) -> String {
-    let mut hasher = blake3::Hasher::new();
-    fields.iter().sorted_by_key(|v| v.name()).for_each(|field| {
-        hasher.update(field.name().as_bytes());
-        hasher.update(field.data_type().to_string().as_bytes());
-    });
-    Signature(hasher.finalize().into()).into()
-}
-
-pub fn get_schema_key(schema: &Schema) -> String {
-    get_fields_key(&schema.to_cloned_fields())
-}
-
-pub fn get_schema_key_xxh3(schema: &Schema) -> String {
-    get_fields_key_xxh3(&schema.to_cloned_fields())
-}
-
-pub fn get_fields_key_xxh3(fields: &[Field]) -> String {
-    let mut hasher = xxhash_rust::xxh3::Xxh3::new();
-    for field in fields.iter().sorted_by_key(|v| v.name()) {
-        hasher.update(field.name().as_bytes());
-        hasher.update(field.data_type().to_string().as_bytes());
-    }
-    let hash = hasher.digest();
-    format!("{hash:x}")
-}
-
-#[cfg(test)]
-mod tests {
-
-    use arrow_schema::DataType;
-
-    use super::*;
-
-    #[tokio::test]
-    async fn test_ingest() {
-        let mut schmea_vec = vec![
-            Field::new("log", DataType::Utf8, false),
-            Field::new("pod_id", DataType::Int64, false),
-        ];
-
-        for i in 0..30 {
-            schmea_vec.push(Field::new(format!("field_{}", i), DataType::Utf8, false));
-        }
-
-        let schema = Schema::new(schmea_vec);
-
-        let start1 = std::time::Instant::now();
-        for _ in 0..100000 {
-            get_schema_key(&schema);
-        }
-        log::info!("Time taken for blake3: {:?}", start1.elapsed());
-
-        let start2 = std::time::Instant::now();
-        for _ in 0..100000 {
-            get_schema_key_xxh3(&schema);
-        }
-        log::info!("Time taken for xxh3: {:?}", start2.elapsed());
-    }
-}
--- a/src/config/src/utils/mod.rs
+++ b/src/config/src/utils/mod.rs
@ -15,7 +15,6 @@

 pub(crate) mod cgroup;
 pub(crate) mod file;
-pub mod hasher;
 pub mod parquet;
 pub mod rand;
 pub mod schema;
--- a/src/config/src/utils/parquet.rs
+++ b/src/config/src/utils/parquet.rs
@ -43,7 +43,7 @@ pub fn new_parquet_writer<'a>(
        .set_dictionary_enabled(true)
        .set_encoding(Encoding::PLAIN)
        .set_sorting_columns(Some(
-            [SortingColumn::new(sort_column_id as i32, false, false)].to_vec(),
+            [SortingColumn::new(sort_column_id as i32, true, false)].to_vec(),
        ))
        .set_column_dictionary_enabled(
            ColumnPath::from(vec![CONFIG.common.column_timestamp.to_string()]),
@ -66,6 +66,10 @@ pub fn new_parquet_writer<'a>(
        writer_props = writer_props
            .set_column_dictionary_enabled(ColumnPath::from(vec![field.to_string()]), false);
    }
+    for field in BLOOM_FILTER_DEFAULT_FIELDS.iter() {
+        writer_props = writer_props
+            .set_column_dictionary_enabled(ColumnPath::from(vec![field.to_string()]), false);
+    }
    // Bloom filter stored by row_group, so if the num_rows can limit to
    // PARQUET_MAX_ROW_GROUP_SIZE,
    let num_rows = metadata.records as u64;
--- a/src/config/src/utils/schema.rs
+++ b/src/config/src/utils/schema.rs
@ -21,8 +21,9 @@ use std::{

 use arrow_json::reader;
 use arrow_schema::{ArrowError, DataType, Field, Schema};
+use serde_json::{Map, Value};

-use crate::meta::stream::StreamType;
+use crate::{meta::stream::StreamType, FxIndexMap};

 pub fn infer_json_schema<R: BufRead>(
    reader: R,
@ -42,21 +43,118 @@ pub fn infer_json_schema_from_seekable<R: BufRead + Seek>(
    Ok(fix_schema(schema, stream_type))
 }

-pub fn infer_json_schema_from_iterator<I, V>(
+pub fn infer_json_schema_from_values<I, V>(
    value_iter: I,
    stream_type: impl Into<StreamType>,
 ) -> Result<Schema, ArrowError>
 where
-    I: Iterator<Item = Result<V, ArrowError>>,
-    V: Borrow<serde_json::Value>,
+    I: Iterator<Item = V>,
+    V: Borrow<Value>,
 {
-    let schema = reader::infer_json_schema_from_iterator(value_iter)?;
-    Ok(fix_schema(schema, stream_type.into()))
+    let mut fields = None;
+    for value in value_iter {
+        match value.borrow() {
+            Value::Object(v) => {
+                if fields.is_none() {
+                    fields = Some(FxIndexMap::with_capacity_and_hasher(
+                        v.len(),
+                        Default::default(),
+                    ));
+                }
+                infer_json_schema_from_object(fields.as_mut().unwrap(), v)?;
+            }
+            _ => {
+                return Err(ArrowError::SchemaError(
+                    "Cannot infer schema from non-object value".to_string(),
+                ));
+            }
+        }
+    }
+    let fields = fields.unwrap_or_default();
+    let fields = fields
+        .into_iter()
+        .map(|(_, field)| field)
+        .collect::<Vec<_>>();
+    Ok(fix_schema(Schema::new(fields), stream_type.into()))
 }

-/// Fix the schema to ensure that the start_time and end_time fields are always
-/// present with uint64 and that null fields are removed and sort the fields by
-/// name.
+fn infer_json_schema_from_object(
+    fields: &mut FxIndexMap<String, Field>,
+    value: &Map<String, Value>,
+) -> Result<(), ArrowError> {
+    for (key, value) in value.iter() {
+        match value {
+            Value::String(_) => {
+                convet_data_type(fields, key, DataType::Utf8)?;
+            }
+            Value::Number(v) => {
+                if v.is_i64() {
+                    convet_data_type(fields, key, DataType::Int64)?;
+                } else if v.is_u64() {
+                    convet_data_type(fields, key, DataType::UInt64)?;
+                } else if v.is_f64() {
+                    convet_data_type(fields, key, DataType::Float64)?;
+                } else {
+                    return Err(ArrowError::SchemaError(
+                        "Cannot infer schema from non-basic-number type value".to_string(),
+                    ));
+                }
+            }
+            Value::Bool(_) => {
+                convet_data_type(fields, key, DataType::Boolean)?;
+            }
+            Value::Null => {}
+            _ => {
+                return Err(ArrowError::SchemaError(
+                    "Cannot infer schema from non-basic type value".to_string(),
+                ));
+            }
+        }
+    }
+    Ok(())
+}
+
+fn convet_data_type(
+    fields: &mut FxIndexMap<String, Field>,
+    key: &str,
+    data_type: DataType,
+) -> Result<(), ArrowError> {
+    let Some(f) = fields.get(key) else {
+        fields.insert(key.to_string(), Field::new(key, data_type, true));
+        return Ok(());
+    };
+    let f_type = f.data_type();
+    if f_type == &data_type {
+        return Ok(());
+    }
+    match (f_type, &data_type) {
+        (DataType::Utf8, _) => {}
+        (DataType::Int64, DataType::UInt64)
+        | (DataType::Int64, DataType::Float64)
+        | (DataType::Int64, DataType::Utf8) => {
+            fields.insert(key.to_string(), Field::new(key, data_type, true));
+        }
+        (DataType::UInt64, DataType::Float64) | (DataType::UInt64, DataType::Utf8) => {
+            fields.insert(key.to_string(), Field::new(key, data_type, true));
+        }
+        (DataType::Float64, DataType::Utf8) => {
+            fields.insert(key.to_string(), Field::new(key, data_type, true));
+        }
+        (DataType::Boolean, _) => {
+            fields.insert(key.to_string(), Field::new(key, data_type, true));
+        }
+        _ => {
+            return Err(ArrowError::SchemaError(format!(
+                "Cannot infer schema from conflicting types: {:?} and {:?}",
+                f_type, data_type
+            )));
+        }
+    }
+    Ok(())
+}
+
+/// Fix the schema to ensure that the start_time and end_time fields are always present with uint64
+/// and that null fields are removed and sort the fields by name.
 fn fix_schema(schema: Schema, stream_type: StreamType) -> Schema {
    let mut fields = if stream_type == StreamType::Traces {
        itertools::chain(
@ -71,8 +169,8 @@ fn fix_schema(schema: Schema, stream_type: StreamType) -> Schema {
                }
            }),
            vec![
-                Arc::new(Field::new("start_time", DataType::UInt64, false)),
-                Arc::new(Field::new("end_time", DataType::UInt64, false)),
+                Arc::new(Field::new("start_time", DataType::UInt64, true)),
+                Arc::new(Field::new("end_time", DataType::UInt64, true)),
            ],
        )
        .collect::<Vec<_>>()
--- a/src/config/src/utils/schema_ext.rs
+++ b/src/config/src/utils/schema_ext.rs
@ -13,9 +13,9 @@
 // You should have received a copy of the GNU Affero General Public License
 // along with this program.  If not, see <http://www.gnu.org/licenses/>.

-use arrow_schema::{Field, Schema};
+use std::hash::{Hash, Hasher};

-use super::hasher::get_fields_key_xxh3;
+use arrow_schema::{Field, Schema};

 /// SchemaExt helper...
 pub trait SchemaExt {
@ -29,6 +29,8 @@ impl SchemaExt for Schema {
    }

    fn hash_key(&self) -> String {
-        get_fields_key_xxh3(&self.to_cloned_fields())
+        let mut hasher = xxhash_rust::xxh3::Xxh3::new();
+        self.hash(&mut hasher);
+        format!("{:x}", hasher.finish())
    }
 }
--- a/src/handler/http/request/logs/ingest.rs
+++ b/src/handler/http/request/logs/ingest.rs
@ -102,7 +102,10 @@ pub async fn multi(
        )
        .await
        {
-            Ok(v) => MetaHttpResponse::json(v),
+            Ok(v) => match v.code {
+                503 => HttpResponse::ServiceUnavailable().json(v),
+                _ => MetaHttpResponse::json(v),
+            },
            Err(e) => {
                log::error!("Error processing request: {:?}", e);
                HttpResponse::BadRequest().json(MetaHttpResponse::error(
@ -148,7 +151,10 @@ pub async fn json(
        )
        .await
        {
-            Ok(v) => MetaHttpResponse::json(v),
+            Ok(v) => match v.code {
+                503 => HttpResponse::ServiceUnavailable().json(v),
+                _ => MetaHttpResponse::json(v),
+            },
            Err(e) => {
                log::error!("Error processing request: {:?}", e);
                HttpResponse::BadRequest().json(MetaHttpResponse::error(
--- a/src/ingester/Cargo.toml
+++ b/src/ingester/Cargo.toml
@ -12,6 +12,7 @@ arrow-schema.workspace = true
 bytes.workspace = true
 byteorder.workspace = true
 chrono.workspace = true
+futures.workspace = true
 hashbrown.workspace = true
 indexmap.workspace = true
 itertools.workspace = true
--- a/src/ingester/src/errors.rs
+++ b/src/ingester/src/errors.rs
@ -25,9 +25,6 @@ pub enum Error {
    WalError {
        source: wal::Error,
    },
-    Message {
-        message: String,
-    },
    OpenFileError {
        source: io::Error,
        path: PathBuf,
@ -90,4 +87,8 @@ pub enum Error {
    WriteParquetRecordBatchError {
        source: parquet::errors::ParquetError,
    },
+    TokioJoinError {
+        source: tokio::task::JoinError,
+    },
+    MemoryTableOverflowError {},
 }
--- a/src/ingester/src/immutable.rs
+++ b/src/ingester/src/immutable.rs
@ -16,14 +16,15 @@
 use std::{path::PathBuf, sync::Arc};

 use arrow_schema::Schema;
-use config::metrics;
+use config::{metrics, CONFIG};
+use futures::future::try_join_all;
 use once_cell::sync::Lazy;
 use snafu::ResultExt;
-use tokio::time;
+use tokio::{sync::Semaphore, task};

 use crate::{
    entry::RecordBatchEntry,
-    errors::{DeleteFileSnafu, RenameFileSnafu, Result, WriteDataSnafu},
+    errors::{DeleteFileSnafu, RenameFileSnafu, Result, TokioJoinSnafu, WriteDataSnafu},
    memtable::MemTable,
    rwmap::RwIndexMap,
    writer::WriterKey,
@ -63,7 +64,8 @@ impl Immutable {
        }
    }

-    pub(crate) async fn persist(&self, wal_path: &PathBuf) -> Result<()> {
+    pub(crate) async fn persist(&self, wal_path: &PathBuf) -> Result<i64> {
+        let mut persist_size = 0;
        // 1. dump memtable to disk
        let paths = self
            .memtable
@ -73,39 +75,70 @@ impl Immutable {
        let done_path = wal_path.with_extension("lock");
        let lock_data = paths
            .iter()
-            .map(|p| p.to_string_lossy())
+            .map(|(p, _)| p.to_string_lossy())
            .collect::<Vec<_>>()
            .join("\n");
        std::fs::write(&done_path, lock_data.as_bytes()).context(WriteDataSnafu)?;
        // 3. delete wal file
        std::fs::remove_file(wal_path).context(DeleteFileSnafu { path: wal_path })?;
        // 4. rename the tmp files to parquet files
-        for path in paths {
+        for (path, size) in paths {
+            persist_size += size;
            let parquet_path = path.with_extension("parquet");
            std::fs::rename(&path, &parquet_path).context(RenameFileSnafu { path: &path })?;
        }
        // 5. delete the lock file
        std::fs::remove_file(&done_path).context(DeleteFileSnafu { path: &done_path })?;
-        Ok(())
+        Ok(persist_size)
    }
 }

 pub(crate) async fn persist() -> Result<()> {
-    loop {
-        let r = IMMUTABLES.read().await;
-        let Some((path, immutable)) = r.first() else {
+    let r = IMMUTABLES.read().await;
+    let n = r.len();
+    let mut paths = Vec::with_capacity(n);
+    for item in r.iter() {
+        if paths.len() >= n {
            break;
-        };
-        let path = path.clone();
-        // persist entry to local disk
-        immutable.persist(&path).await?;
-        drop(r);
-
-        // remove entry from IMMUTABLES
-        IMMUTABLES.write().await.remove(&path);
-        metrics::INGEST_MEMTABLE_FILES.with_label_values(&[]).dec();
-
-        time::sleep(time::Duration::from_millis(10)).await;
+        }
+        paths.push(item.0.clone());
    }
+    drop(r);
+
+    let mut tasks = Vec::with_capacity(paths.len());
+    let semaphore = Arc::new(Semaphore::new(CONFIG.limit.file_move_thread_num));
+    for path in paths {
+        let permit = semaphore.clone().acquire_owned().await.unwrap();
+        let task: task::JoinHandle<Result<Option<(PathBuf, i64)>>> = task::spawn(async move {
+            let r = IMMUTABLES.read().await;
+            let Some(immutable) = r.get(&path) else {
+                drop(permit);
+                return Ok(None);
+            };
+            // persist entry to local disk
+            let ret = immutable.persist(&path).await;
+            drop(permit);
+            ret.map(|size| Some((path, size)))
+        });
+        tasks.push(task);
+    }
+
+    // remove entry from IMMUTABLES
+    let tasks = try_join_all(tasks).await.context(TokioJoinSnafu)?;
+    let mut rw = IMMUTABLES.write().await;
+    for task in tasks {
+        if let Some((path, size)) = task? {
+            log::info!("[INGESTER] persist file: {:?}, size: {}", &path, size);
+            // remove entry
+            rw.remove(&path);
+            // update metrics
+            metrics::INGEST_MEMTABLE_BYTES
+                .with_label_values(&[])
+                .sub(size);
+            metrics::INGEST_MEMTABLE_FILES.with_label_values(&[]).dec();
+        }
+    }
+    rw.shrink_to_fit();
+
    Ok(())
 }
--- a/src/ingester/src/lib.rs
+++ b/src/ingester/src/lib.rs
@ -25,8 +25,7 @@ mod writer;

 pub use entry::Entry;
 pub use immutable::read_from_immutable;
-use tokio::time;
-pub use writer::{get_writer, read_from_memtable};
+pub use writer::{check_memtable_size, get_writer, read_from_memtable};

 pub async fn init() -> errors::Result<()> {
    // check uncompleted parquet files, need delete those files
@ -37,8 +36,10 @@ pub async fn init() -> errors::Result<()> {

    // start a job to dump immutable data to disk
    tokio::task::spawn(async move {
-        // immutable persist every 10 seconds
-        let mut interval = time::interval(time::Duration::from_secs(10));
+        // immutable persist every 10 (default) seconds
+        let mut interval = tokio::time::interval(tokio::time::Duration::from_secs(
+            config::CONFIG.limit.mem_persist_interval,
+        ));
        interval.tick().await; // the first tick is immediate
        loop {
            if let Err(e) = immutable::persist().await {
--- a/src/ingester/src/memtable.rs
+++ b/src/ingester/src/memtable.rs
@ -60,7 +60,7 @@ impl MemTable {
        thread_id: usize,
        org_id: &str,
        stream_type: &str,
-    ) -> Result<Vec<PathBuf>> {
+    ) -> Result<Vec<(PathBuf, i64)>> {
        let mut paths = Vec::new();
        let r = self.streams.read().await;
        for (stream_name, stream) in r.iter() {
--- a/src/ingester/src/partition.rs
+++ b/src/ingester/src/partition.rs
@ -71,7 +71,7 @@ impl Partition {
        org_id: &str,
        stream_type: &str,
        stream_name: &str,
-    ) -> Result<Vec<PathBuf>> {
+    ) -> Result<Vec<(PathBuf, i64)>> {
        let r = self.files.read().await;
        let mut paths = Vec::with_capacity(r.len());
        let mut path = PathBuf::from(&CONFIG.common.data_wal_dir);
@ -119,9 +119,6 @@ impl Partition {
                .context(WriteFileSnafu { path: path.clone() })?;

            // update metrics
-            metrics::INGEST_MEMTABLE_BYTES
-                .with_label_values(&[])
-                .sub(file_meta.original_size);
            metrics::INGEST_WAL_USED_BYTES
                .with_label_values(&[&org_id, &stream_name, stream_type])
                .add(buf_parquet.len() as i64);
@ -129,7 +126,7 @@ impl Partition {
                .with_label_values(&[&org_id, &stream_name, stream_type])
                .inc_by(buf_parquet.len() as u64);

-            paths.push(path);
+            paths.push((path, file_meta.original_size));
        }
        Ok(paths)
    }
--- a/src/ingester/src/stream.rs
+++ b/src/ingester/src/stream.rs
@ -61,7 +61,7 @@ impl Stream {
        org_id: &str,
        stream_type: &str,
        stream_name: &str,
-    ) -> Result<Vec<PathBuf>> {
+    ) -> Result<Vec<(PathBuf, i64)>> {
        let mut paths = Vec::new();
        let r = self.partitions.read().await;
        for (_, partition) in r.iter() {
--- a/src/ingester/src/wal.rs
+++ b/src/ingester/src/wal.rs
@ -20,7 +20,7 @@ use std::{
    sync::Arc,
 };

-use config::{utils::schema::infer_json_schema_from_iterator, CONFIG};
+use config::{utils::schema::infer_json_schema_from_values, CONFIG};
 use snafu::ResultExt;

 use crate::{errors::*, immutable, memtable, writer::WriterKey};
@ -34,17 +34,14 @@ use crate::{errors::*, immutable, memtable, writer::WriterKey};
 // 5. delete the lock file
 //
 // so, there are some cases that the process is not completed:
-// 1. the process is killed before step 2, so there are some .par files and have
-//    no lock file, need delete those files
-// 2. the process is killed before step 3, so there are some .par files and have
-//    lock file, the files actually wrote to disk completely, need to continue
-//    step 3, 4 and 5
-// 3. the process is killed before step 4, so there are some .par files and have
-//    lock file, the files actually wrote to disk completely, need to continue
-//    step 4 and 5
-// 4. the process is killed before step 5, so there are some .parquet files and
-//    have lock file, the files actually wrote to disk completely, need to
-//    continue step 5
+// 1. the process is killed before step 2, so there are some .par files and have no lock file, need
+//    delete those files
+// 2. the process is killed before step 3, so there are some .par files and have lock file, the
+//    files actually wrote to disk completely, need to continue step 3, 4 and 5
+// 3. the process is killed before step 4, so there are some .par files and have lock file, the
+//    files actually wrote to disk completely, need to continue step 4 and 5
+// 4. the process is killed before step 5, so there are some .parquet files and have lock file, the
+//    files actually wrote to disk completely, need to continue step 5
 pub(crate) async fn check_uncompleted_parquet_files() -> Result<()> {
    // 1. get all .lock files
    let wal_dir = PathBuf::from(&CONFIG.common.data_wal_dir).join("logs");
@ -54,8 +51,7 @@ pub(crate) async fn check_uncompleted_parquet_files() -> Result<()> {
    })?;
    let lock_files = scan_files(wal_dir, "lock");

-    // 2. check if there is a .wal file with the same name, delete it and rename the
-    //    .par file to .parquet
+    // 2. check if there is a .wal file with same name, delete it and rename the .par to .parquet
    for lock_file in lock_files.iter() {
        log::warn!("found uncompleted wal file: {:?}", lock_file);
        let wal_file = lock_file.with_extension("wal");
@ -151,9 +147,8 @@ pub(crate) async fn replay_wal_files() -> Result<()> {
            let entry = super::Entry::from_bytes(&entry)?;
            i += 1;
            total += entry.data.len();
-            let schema =
-                infer_json_schema_from_iterator(entry.data.iter().cloned().map(Ok), stream_type)
-                    .context(InferJsonSchemaSnafu)?;
+            let schema = infer_json_schema_from_values(entry.data.iter().cloned(), stream_type)
+                .context(InferJsonSchemaSnafu)?;
            memtable.write(Arc::new(schema), entry).await?;
        }
        log::warn!(
--- a/src/ingester/src/writer.rs
+++ b/src/ingester/src/writer.rs
@ -23,7 +23,7 @@ use std::{

 use arrow_schema::Schema;
 use chrono::{Duration, Utc};
-use config::CONFIG;
+use config::{metrics, CONFIG};
 use once_cell::sync::Lazy;
 use snafu::ResultExt;
 use tokio::sync::{Mutex, RwLock};
@ -60,6 +60,16 @@ pub struct Writer {
    created_at: AtomicI64,
 }

+// check total memory size
+pub fn check_memtable_size() -> Result<()> {
+    let total_mem_size = metrics::INGEST_MEMTABLE_BYTES.with_label_values(&[]).get();
+    if total_mem_size >= CONFIG.limit.mem_table_max_size as i64 {
+        Err(Error::MemoryTableOverflowError {})
+    } else {
+        Ok(())
+    }
+}
+
 /// Get a writer for a given org_id and stream_type
 pub async fn get_writer(thread_id: usize, org_id: &str, stream_type: &str) -> Arc<Writer> {
    let key = WriterKey::new(org_id, stream_type);
@ -103,6 +113,13 @@ impl Writer {
        let wal_dir = PathBuf::from(&CONFIG.common.data_wal_dir)
            .join("logs")
            .join(thread_id.to_string());
+        log::info!(
+            "[INGESTER] create file: {}/{}/{}/{}.wal",
+            wal_dir.display().to_string(),
+            &key.org_id,
+            &key.stream_type,
+            wal_id
+        );
        Self {
            thread_id,
            key: key.clone(),
@ -112,7 +129,7 @@ impl Writer {
                    &key.org_id,
                    &key.stream_type,
                    wal_id,
-                    CONFIG.limit.max_file_size_on_disk,
+                    CONFIG.limit.max_file_size_on_disk as u64,
                )
                .expect("wal file create error"),
            )),
@ -136,10 +153,17 @@ impl Writer {
                &self.key.org_id,
                &self.key.stream_type,
                wal_id,
-                CONFIG.limit.max_file_size_on_disk,
+                CONFIG.limit.max_file_size_on_disk as u64,
            )
            .context(WalSnafu)?;
            let old_wal = std::mem::replace(&mut *wal, new_wal);
+            log::info!(
+                "[INGESTER] create file: {}/{}/{}/{}.wal",
+                self.thread_id,
+                &self.key.org_id,
+                &self.key.stream_type,
+                wal_id
+            );

            // rotation memtable
            let mut mem = self.memtable.write().await;
@ -180,9 +204,10 @@ impl Writer {

    /// Check if the wal file size is over the threshold or the file is too old
    async fn check_threshold(&self, written_size: (usize, usize), data_size: usize) -> bool {
-        let (compressed_size, _uncompressed_size) = written_size;
+        let (compressed_size, uncompressed_size) = written_size;
        compressed_size > 0
-            && (compressed_size + data_size > CONFIG.limit.max_file_size_on_disk as usize
+            && (compressed_size + data_size > CONFIG.limit.max_file_size_on_disk
+                || uncompressed_size + data_size > CONFIG.limit.mem_file_max_size
                || self.created_at.load(Ordering::Relaxed)
                    + Duration::seconds(CONFIG.limit.max_file_retention_time as i64)
                        .num_microseconds()
--- a/src/job/files/json.rs
+++ b/src/job/files/json.rs
@ -26,7 +26,7 @@ use config::{
    metrics,
    utils::{
        parquet::new_parquet_writer,
-        schema::{infer_json_schema_from_iterator, infer_json_schema_from_seekable},
+        schema::{infer_json_schema_from_seekable, infer_json_schema_from_values},
    },
    CONFIG,
 };
@ -36,6 +36,7 @@ use tokio::{sync::Semaphore, task, time};
 use crate::{
    common::{
        infra::{cluster, storage, wal},
+        meta::stream::StreamParams,
        utils::{file::scan_files, json, stream::populate_file_meta},
    },
    service::{
@ -53,10 +54,10 @@ pub async fn run() -> Result<(), anyhow::Error> {
        }
        interval.tick().await;
        if let Err(e) = move_files_to_storage().await {
-            log::error!("Error moving disk files to remote: {}", e);
+            log::error!("Error moving json files to remote: {}", e);
        }
    }
-    log::info!("job::files::disk is stopped");
+    log::info!("job::files::json is stopped");
    Ok(())
 }

@ -84,8 +85,9 @@ pub async fn move_files_to_storage() -> Result<(), anyhow::Error> {
        let columns = file_path.splitn(5, '/').collect::<Vec<&str>>();

        // eg: files/default/logs/olympics/0/2023/08/21/08/8b8a5451bbe1c44b/
-        // 7099303408192061440f3XQ2p.json eg: files/default/traces/default/0/
-        // 2023/09/04/05/default/service_name=ingester/7104328279989026816guOA4t.json
+        // 7099303408192061440f3XQ2p.json
+        // eg: files/default/traces/default/0/023/09/04/05/default/
+        // service_name=ingester/7104328279989026816guOA4t.json
        // let _ = columns[0].to_string(); // files/
        let org_id = columns[1].to_string();
        let stream_type = StreamType::from(columns[2]);
@ -98,20 +100,20 @@ pub async fn move_files_to_storage() -> Result<(), anyhow::Error> {
        }

        // check the file is using for write
-        // if wal::check_in_use(
-        //     StreamParams::new(&org_id, &stream_name, stream_type),
-        //     &file_name,
-        // )
-        // .await
-        // {
-        //     // println!("file is using for write, skip, {}", file_name);
-        //     continue;
-        // }
-        log::info!("[JOB] convert disk file: {}", file);
+        if wal::check_in_use(
+            StreamParams::new(&org_id, &stream_name, stream_type),
+            &file_name,
+        )
+        .await
+        {
+            // println!("file is using for write, skip, {}", file_name);
+            continue;
+        }
+        // log::info!("[JOB] convert json file: {}", file);

        // check if we are allowed to ingest or just delete the file
        if db::compact::retention::is_deleting_stream(&org_id, &stream_name, stream_type, None) {
-            log::info!(
+            log::warn!(
                "[JOB] the stream [{}/{}/{}] is deleting, just delete file: {}",
                &org_id,
                stream_type,
@ -120,7 +122,7 @@ pub async fn move_files_to_storage() -> Result<(), anyhow::Error> {
            );
            if let Err(e) = tokio::fs::remove_file(&local_file).await {
                log::error!(
-                    "[JOB] Failed to remove disk file from disk: {}, {}",
+                    "[JOB] Failed to remove json file from disk: {}, {}",
                    local_file,
                    e
                );
@ -133,7 +135,7 @@ pub async fn move_files_to_storage() -> Result<(), anyhow::Error> {
            let ret =
                upload_file(&org_id, &stream_name, stream_type, &local_file, &file_name).await;
            if let Err(e) = ret {
-                log::error!("[JOB] Error while uploading disk file to storage {}", e);
+                log::error!("[JOB] Error while uploading json file to storage {}", e);
                drop(permit);
                return Ok(());
            }
@ -142,7 +144,7 @@ pub async fn move_files_to_storage() -> Result<(), anyhow::Error> {
            let ret = db::file_list::local::set(&key, Some(meta.clone()), false).await;
            if let Err(e) = ret {
                log::error!(
-                    "[JOB] Failed write disk file meta: {}, error: {}",
+                    "[JOB] Failed write json file meta: {}, error: {}",
                    local_file,
                    e.to_string()
                );
@ -153,7 +155,7 @@ pub async fn move_files_to_storage() -> Result<(), anyhow::Error> {
            // check if allowed to delete the file
            loop {
                if wal::lock_files_exists(&file_path).await {
-                    log::info!(
+                    log::warn!(
                        "[JOB] the file is still in use, waiting for a few ms: {}",
                        file_path
                    );
@ -166,7 +168,7 @@ pub async fn move_files_to_storage() -> Result<(), anyhow::Error> {
            let ret = tokio::fs::remove_file(&local_file).await;
            if let Err(e) = ret {
                log::error!(
-                    "[JOB] Failed to remove disk file from disk: {}, {}",
+                    "[JOB] Failed to remove json file from disk: {}, {}",
                    local_file,
                    e.to_string()
                );
@ -191,7 +193,7 @@ pub async fn move_files_to_storage() -> Result<(), anyhow::Error> {

    for task in tasks {
        if let Err(e) = task.await {
-            log::error!("[JOB] Error while uploading disk file to storage {}", e);
+            log::error!("[JOB] Error while uploading json file to storage {}", e);
        };
    }
    Ok(())
@ -207,11 +209,11 @@ async fn upload_file(
    let mut file = fs::File::open(path_str).unwrap();
    let file_meta = file.metadata().unwrap();
    let file_size = file_meta.len();
-    log::info!("[JOB] File upload begin: disk: {}", path_str);
+    log::info!("[JOB] File upload begin: {}", path_str);
    if file_size == 0 {
        if let Err(e) = tokio::fs::remove_file(path_str).await {
            log::error!(
-                "[JOB] Failed to remove disk file from disk: {}, {}",
+                "[JOB] Failed to remove json file from disk: {}, {}",
                path_str,
                e
            );
@ -261,8 +263,8 @@ async fn upload_file(
                        path_str
                    ));
                }
-                let value_iter = res_records.iter().map(Ok);
-                infer_json_schema_from_iterator(value_iter, stream_type).unwrap()
+                let value_iter = res_records.iter();
+                infer_json_schema_from_values(value_iter, stream_type).unwrap()
            }
        };
    let arrow_schema = Arc::new(inferred_schema);
@ -347,11 +349,11 @@ async fn upload_file(
    let file_name = new_file_name.to_owned();
    match storage::put(&new_file_name, bytes::Bytes::from(buf_parquet)).await {
        Ok(_) => {
-            log::info!("[JOB] disk file upload succeeded: {}", file_name);
+            log::info!("[JOB] File upload succeeded: {}", file_name);
            Ok((file_name, file_meta, stream_type))
        }
        Err(err) => {
-            log::error!("[JOB] disk file upload error: {:?}", err);
+            log::error!("[JOB] File upload error: {:?}", err);
            Err(anyhow::anyhow!(err))
        }
    }
--- a/src/job/files/parquet.rs
+++ b/src/job/files/parquet.rs
@ -13,7 +13,7 @@
 // You should have received a copy of the GNU Affero General Public License
 // along with this program.  If not, see <http://www.gnu.org/licenses/>.

-use std::{fs, io::Read, path::Path};
+use std::{fs, io::Read, path::Path, sync::Arc};

 use config::{
    meta::stream::{FileMeta, StreamType},
@ -21,6 +21,7 @@ use config::{
    utils::parquet::read_metadata,
    CONFIG,
 };
+use parquet::arrow::ParquetRecordBatchStreamBuilder;
 use tokio::{sync::Semaphore, task, time};

 use crate::{
@ -28,7 +29,7 @@ use crate::{
        infra::{cluster, storage, wal},
        utils::file::scan_files,
    },
-    service::{db, usage::report_compression_stats},
+    service::{db, schema::schema_evolution, usage::report_compression_stats},
 };

 pub async fn run() -> Result<(), anyhow::Error> {
@ -40,10 +41,10 @@ pub async fn run() -> Result<(), anyhow::Error> {
        }
        interval.tick().await;
        if let Err(e) = move_files_to_storage().await {
-            log::error!("Error moving disk files to remote: {}", e);
+            log::error!("Error moving parquet files to remote: {}", e);
        }
    }
-    log::info!("job::files::disk is stopped");
+    log::info!("job::files::parquet is stopped");
    Ok(())
 }

@ -71,8 +72,9 @@ pub async fn move_files_to_storage() -> Result<(), anyhow::Error> {
        let columns = file_path.splitn(5, '/').collect::<Vec<&str>>();

        // eg: files/default/logs/olympics/0/2023/08/21/08/8b8a5451bbe1c44b/
-        // 7099303408192061440f3XQ2p.json eg: files/default/traces/default/0/
-        // 2023/09/04/05/default/service_name=ingester/7104328279989026816guOA4t.json
+        // 7099303408192061440f3XQ2p.parquet
+        // eg: files/default/traces/default/0/2023/09/04/05/default/
+        // service_name=ingester/7104328279989026816guOA4t.parquet
        // let _ = columns[0].to_string(); // files/
        let org_id = columns[1].to_string();
        let stream_type = StreamType::from(columns[2]);
@ -84,21 +86,9 @@ pub async fn move_files_to_storage() -> Result<(), anyhow::Error> {
            file_name = file_name.replace('_', "/");
        }

-        // check the file is using for write
-        // if wal::check_in_use(
-        //     StreamParams::new(&org_id, &stream_name, stream_type),
-        //     &file_name,
-        // )
-        // .await
-        // {
-        //     // println!("file is using for write, skip, {}", file_name);
-        //     continue;
-        // }
-        log::info!("[JOB] convert disk file: {}", file);
-
        // check if we are allowed to ingest or just delete the file
        if db::compact::retention::is_deleting_stream(&org_id, &stream_name, stream_type, None) {
-            log::info!(
+            log::warn!(
                "[JOB] the stream [{}/{}/{}] is deleting, just delete file: {}",
                &org_id,
                stream_type,
@ -107,7 +97,7 @@ pub async fn move_files_to_storage() -> Result<(), anyhow::Error> {
            );
            if let Err(e) = tokio::fs::remove_file(&local_file).await {
                log::error!(
-                    "[JOB] Failed to remove disk file from disk: {}, {}",
+                    "[JOB] Failed to remove parquet file from disk: {}, {}",
                    local_file,
                    e
                );
@ -120,7 +110,7 @@ pub async fn move_files_to_storage() -> Result<(), anyhow::Error> {
            let ret =
                upload_file(&org_id, &stream_name, stream_type, &local_file, &file_name).await;
            if let Err(e) = ret {
-                log::error!("[JOB] Error while uploading disk file to storage {}", e);
+                log::error!("[JOB] Error while uploading parquet file to storage {}", e);
                drop(permit);
                return Ok(());
            }
@ -129,7 +119,7 @@ pub async fn move_files_to_storage() -> Result<(), anyhow::Error> {
            let ret = db::file_list::local::set(&key, Some(meta.clone()), false).await;
            if let Err(e) = ret {
                log::error!(
-                    "[JOB] Failed write disk file meta: {}, error: {}",
+                    "[JOB] Failed write parquet file meta: {}, error: {}",
                    local_file,
                    e.to_string()
                );
@ -140,7 +130,7 @@ pub async fn move_files_to_storage() -> Result<(), anyhow::Error> {
            // check if allowed to delete the file
            loop {
                if wal::lock_files_exists(&file_path).await {
-                    log::info!(
+                    log::warn!(
                        "[JOB] the file is still in use, waiting for a few ms: {}",
                        file_path
                    );
@ -153,7 +143,7 @@ pub async fn move_files_to_storage() -> Result<(), anyhow::Error> {
            let ret = tokio::fs::remove_file(&local_file).await;
            if let Err(e) = ret {
                log::error!(
-                    "[JOB] Failed to remove disk file from disk: {}, {}",
+                    "[JOB] Failed to remove parquet file from disk: {}, {}",
                    local_file,
                    e.to_string()
                );
@ -178,7 +168,7 @@ pub async fn move_files_to_storage() -> Result<(), anyhow::Error> {

    for task in tasks {
        if let Err(e) = task.await {
-            log::error!("[JOB] Error while uploading disk file to storage {}", e);
+            log::error!("[JOB] Error while uploading parquet file to storage {}", e);
        };
    }
    Ok(())
@ -194,11 +184,11 @@ async fn upload_file(
    let mut file = fs::File::open(path_str).unwrap();
    let file_meta = file.metadata().unwrap();
    let file_size = file_meta.len();
-    log::info!("[JOB] File upload begin: disk: {}", path_str);
+    log::info!("[JOB] File upload begin: {}", path_str);
    if file_size == 0 {
        if let Err(e) = tokio::fs::remove_file(path_str).await {
            log::error!(
-                "[JOB] Failed to remove disk file from disk: {}, {}",
+                "[JOB] Failed to remove parquet file from disk: {}, {}",
                path_str,
                e
            );
@ -218,15 +208,23 @@ async fn upload_file(
    let mut file_meta = read_metadata(&buf_parquet).await?;
    file_meta.compressed_size = file_size as i64;

-    // TODO ?
-    // schema_evolution(
-    //     org_id,
-    //     stream_name,
-    //     stream_type,
-    //     arrow_schema,
-    //     file_meta.min_ts,
-    // )
-    // .await;
+    // read schema
+    let schema_reader = std::io::Cursor::new(buf_parquet.clone());
+    let arrow_reader = ParquetRecordBatchStreamBuilder::new(schema_reader).await?;
+    let inferred_schema = arrow_reader
+        .schema()
+        .as_ref()
+        .clone()
+        .with_metadata(std::collections::HashMap::new());
+
+    schema_evolution(
+        org_id,
+        stream_name,
+        stream_type,
+        Arc::new(inferred_schema),
+        file_meta.min_ts,
+    )
+    .await;

    let new_file_name =
        super::generate_storage_file_name(org_id, stream_type, stream_name, file_name);
@ -234,11 +232,11 @@ async fn upload_file(
    let file_name = new_file_name.to_owned();
    match storage::put(&new_file_name, buf_parquet).await {
        Ok(_) => {
-            log::info!("[JOB] disk file upload succeeded: {}", file_name);
+            log::info!("[JOB] File upload succeeded: {}", file_name);
            Ok((file_name, file_meta, stream_type))
        }
        Err(err) => {
-            log::error!("[JOB] disk file upload error: {:?}", err);
+            log::error!("[JOB] File upload error: {:?}", err);
            Err(anyhow::anyhow!(err))
        }
    }
--- a/src/service/alerts/mod.rs
+++ b/src/service/alerts/mod.rs
@ -18,7 +18,7 @@ use std::collections::{HashMap, HashSet};
 use actix_web::http;
 use arrow_schema::DataType;
 use chrono::{Duration, Local, TimeZone, Utc};
-use config::{meta::stream::StreamType, utils::schema_ext::SchemaExt, CONFIG};
+use config::{meta::stream::StreamType, CONFIG};

 use crate::{
    common::{
@ -80,8 +80,7 @@ pub async fn save(

    // before saving alert check column type to decide numeric condition
    let schema = db::schema::get(org_id, stream_name, stream_type).await?;
-    let fields = schema.to_cloned_fields();
-    if stream_name.is_empty() || fields.is_empty() {
+    if stream_name.is_empty() || schema.fields().is_empty() {
        return Err(anyhow::anyhow!("Stream {stream_name} not found"));
    }

--- a/src/service/compact/merge.rs
+++ b/src/service/compact/merge.rs
@ -30,7 +30,7 @@ use tokio::{sync::Semaphore, task::JoinHandle};
 use crate::{
    common::{
        infra::{cache, file_list as infra_file_list, storage},
-        meta::stream::StreamStats,
+        meta::stream::{PartitionTimeLevel, StreamStats},
        utils::json,
    },
    service::{db, file_list, search::datafusion, stream},
@ -84,6 +84,17 @@ pub async fn merge_by_stream(
        )
        .unwrap()
        .timestamp_micros();
+    let offset_time_day = Utc
+        .with_ymd_and_hms(
+            offset_time.year(),
+            offset_time.month(),
+            offset_time.day(),
+            0,
+            0,
+            0,
+        )
+        .unwrap()
+        .timestamp_micros();

    // check offset
    let time_now: DateTime<Utc> = Utc::now();
@ -98,10 +109,8 @@ pub async fn merge_by_stream(
        )
        .unwrap()
        .timestamp_micros();
-    // 1. if step_secs less than 1 hour, must wait for at least
-    //    max_file_retention_time
-    // 2. if step_secs greater than 1 hour, must wait for at least 3 *
-    //    max_file_retention_time
+    // 1. if step_secs less than 1 hour, must wait for at least max_file_retention_time
+    // 2. if step_secs greater than 1 hour, must wait for at least 3 * max_file_retention_time
    // -- first period: the last hour local file upload to storage, write file list
    // -- second period, the last hour file list upload to storage
    // -- third period, we can do the merge, so, at least 3 times of
@ -122,12 +131,19 @@ pub async fn merge_by_stream(
        return Ok(()); // the time is future, just wait
    }

-    // get current hour all files
-    let (partition_offset_start, partition_offset_end) = (
-        offset_time_hour,
-        offset_time_hour + Duration::hours(1).num_microseconds().unwrap()
-            - Duration::seconds(1).num_microseconds().unwrap(),
-    );
+    // get current hour(day) all files
+    let (partition_offset_start, partition_offset_end) =
+        if partition_time_level == PartitionTimeLevel::Daily {
+            (
+                offset_time_day,
+                offset_time_day + Duration::hours(24).num_microseconds().unwrap() - 1,
+            )
+        } else {
+            (
+                offset_time_hour,
+                offset_time_hour + Duration::hours(1).num_microseconds().unwrap() - 1,
+            )
+        };
    let files = file_list::query(
        org_id,
        stream_name,
--- a/src/service/compact/mod.rs
+++ b/src/service/compact/mod.rs
@ -264,8 +264,7 @@ pub async fn run_merge() -> Result<(), anyhow::Error> {
 }

 /// compactor delete files run steps:
-/// 1. get pending deleted files from file_list_deleted table, created_at > 2
-///    hours
+/// 1. get pending deleted files from file_list_deleted table, created_at > 2 hours
 /// 2. delete files from storage
 pub async fn run_delete_files() -> Result<(), anyhow::Error> {
    let now = Utc::now();
--- a/src/service/distinct_values.rs
+++ b/src/service/distinct_values.rs
@ -160,7 +160,7 @@ impl DistinctValues {
                    data,
                    Some(&schema_key),
                );
-                let data = json::Value::Object(data.to_owned());
+                let data = json::Value::Object(data.clone());
                let data_size = json::to_vec(&data).unwrap_or_default().len();

                let hour_buf = buf.entry(hour_key).or_insert_with(|| SchemaRecords {
@ -172,7 +172,7 @@ impl DistinctValues {
                hour_buf.records.push(Arc::new(data));
                hour_buf.records_size += data_size;
            }
-            _ = ingestion::write_file(buf, 0, &stream_params, None).await;
+            _ = ingestion::write_file(buf, 0, &stream_params).await;
        }
        Ok(())
    }
--- a/src/service/enrichment_table/mod.rs
+++ b/src/service/enrichment_table/mod.rs
@ -196,7 +196,6 @@ pub async fn save_enrichment_data(
        buf,
        thread_id,
        &StreamParams::new(org_id, stream_name, StreamType::EnrichmentTables),
-        None,
    )
    .await;
    req_stats.response_time = start.elapsed().as_secs_f64();
--- a/src/service/ingestion/grpc.rs
+++ b/src/service/ingestion/grpc.rs
@ -18,7 +18,7 @@ use opentelemetry_proto::tonic::{
    metrics::v1::{exemplar, number_data_point},
 };

-use crate::{common::utils::json, service::ingestion::get_value};
+use crate::common::utils::json;

 pub fn get_val(attr_val: &Option<&AnyValue>) -> json::Value {
    match attr_val {
@ -108,7 +108,7 @@ pub fn get_exemplar_val(attr_val: &Option<exemplar::Value>) -> json::Value {
 pub fn get_val_for_attr(attr_val: json::Value) -> json::Value {
    let local_val = attr_val.as_object().unwrap();
    if let Some((_key, value)) = local_val.into_iter().next() {
-        return serde_json::Value::String(get_value(value));
+        return serde_json::Value::String(super::get_string_value(value));
    };
    ().into()
 }
--- a/src/service/ingestion/mod.rs
+++ b/src/service/ingestion/mod.rs
@ -40,14 +40,13 @@ use crate::{
        utils::{
            flatten,
            functions::get_vrl_compiler_config,
-            json::{Map, Value},
+            json::{self, Map, Value},
        },
    },
    service::{db, format_partition_key, stream::stream_settings},
 };

 pub mod grpc;
-pub mod otlp_json;

 pub type TriggerAlertData = Option<Vec<(Alert, Vec<Map<String, Value>>)>>;

@ -261,14 +260,13 @@ pub fn register_stream_transforms(
    (local_trans, stream_vrl_map)
 }

-pub fn apply_stream_transform<'a>(
-    local_trans: &Vec<StreamTransform>,
-    value: &'a Value,
-    stream_vrl_map: &'a AHashMap<String, VRLResultResolver>,
+pub fn apply_stream_transform(
+    local_trans: &[StreamTransform],
+    mut value: Value,
+    stream_vrl_map: &AHashMap<String, VRLResultResolver>,
    stream_name: &str,
    runtime: &mut Runtime,
 ) -> Result<Value, anyhow::Error> {
-    let mut value = value.clone();
    for trans in local_trans {
        let func_key = format!("{stream_name}/{}", trans.transform.name);
        if stream_vrl_map.contains_key(&func_key) && !value.is_null() {
@ -276,7 +274,7 @@ pub fn apply_stream_transform<'a>(
            value = apply_vrl_fn(runtime, vrl_runtime, &value);
        }
    }
-    flatten::flatten(&value)
+    flatten::flatten(value)
 }

 pub async fn chk_schema_by_record(
@ -324,7 +322,6 @@ pub async fn write_file(
    buf: AHashMap<String, SchemaRecords>,
    thread_id: usize,
    stream: &StreamParams,
-    _partition_time_level: Option<PartitionTimeLevel>,
 ) -> RequestStats {
    let mut req_stats = RequestStats::default();
    for (hour_key, entry) in buf {
@ -332,8 +329,6 @@ pub async fn write_file(
            continue;
        }
        let entry_records = entry.records.len();
-
-        // -- call new ingester
        let writer =
            ingester::get_writer(thread_id, &stream.org_id, &stream.stream_type.to_string()).await;
        writer
@ -349,7 +344,6 @@ pub async fn write_file(
            )
            .await
            .unwrap();
-        // -- end call new ingester

        req_stats.size += entry.records_size as f64 / SIZE_IN_MB;
        req_stats.records += entry_records as i64;
@ -357,22 +351,6 @@ pub async fn write_file(
    req_stats
 }

-pub fn get_value(value: &Value) -> String {
-    if value.is_boolean() {
-        value.as_bool().unwrap().to_string()
-    } else if value.is_f64() {
-        value.as_f64().unwrap().to_string()
-    } else if value.is_i64() {
-        value.as_i64().unwrap().to_string()
-    } else if value.is_u64() {
-        value.as_u64().unwrap().to_string()
-    } else if value.is_string() {
-        value.as_str().unwrap().to_string()
-    } else {
-        value.to_string()
-    }
-}
-
 pub fn is_ingestion_allowed(org_id: &str, stream_name: Option<&str>) -> Option<anyhow::Error> {
    if !cluster::is_ingester(&cluster::LOCAL_NODE_ROLE) {
        return Some(anyhow::anyhow!("not an ingester"));
@ -391,6 +369,119 @@ pub fn is_ingestion_allowed(org_id: &str, stream_name: Option<&str>) -> Option<a
    None
 }

+pub fn get_float_value(val: &Value) -> f64 {
+    match val {
+        Value::String(v) => v.parse::<f64>().unwrap_or(0.0),
+        Value::Number(v) => v.as_f64().unwrap_or(0.0),
+        _ => 0.0,
+    }
+}
+
+pub fn get_int_value(val: &Value) -> i64 {
+    match val {
+        Value::String(v) => v.parse::<i64>().unwrap_or(0),
+        Value::Number(v) => v.as_i64().unwrap_or(0),
+        _ => 0,
+    }
+}
+
+pub fn get_string_value(value: &Value) -> String {
+    if value.is_boolean() {
+        value.as_bool().unwrap_or_default().to_string()
+    } else if value.is_i64() {
+        value.as_i64().unwrap_or_default().to_string()
+    } else if value.is_u64() {
+        value.as_u64().unwrap_or_default().to_string()
+    } else if value.is_f64() {
+        value.as_f64().unwrap_or_default().to_string()
+    } else if value.is_string() {
+        value.as_str().unwrap_or_default().to_string()
+    } else {
+        value.to_string()
+    }
+}
+
+pub fn get_val_for_attr(attr_val: &Value) -> Value {
+    let local_val = attr_val.as_object().unwrap();
+    if let Some((key, value)) = local_val.into_iter().next() {
+        match key.as_str() {
+            "stringValue" | "string_value" => {
+                return json::json!(get_string_value(value));
+            }
+            "boolValue" | "bool_value" => {
+                return json::json!(value.as_bool().unwrap_or(false).to_string());
+            }
+            "intValue" | "int_value" => {
+                return json::json!(get_int_value(value).to_string());
+            }
+            "doubleValue" | "double_value" => {
+                return json::json!(get_float_value(value).to_string());
+            }
+
+            "bytesValue" | "bytes_value" => {
+                return json::json!(value.as_str().unwrap_or("").to_string());
+            }
+
+            "arrayValue" | "array_value" => {
+                let mut vals = vec![];
+                for item in value
+                    .get("values")
+                    .unwrap()
+                    .as_array()
+                    .unwrap_or(&vec![])
+                    .iter()
+                {
+                    vals.push(get_val_for_attr(item));
+                }
+                return json::json!(vals);
+            }
+
+            "kvlistValue" | "kvlist_value" => {
+                let mut vals = json::Map::new();
+                for item in value
+                    .get("values")
+                    .unwrap()
+                    .as_array()
+                    .unwrap_or(&vec![])
+                    .iter()
+                {
+                    let mut key = item.get("key").unwrap().as_str().unwrap_or("").to_string();
+                    flatten::format_key(&mut key);
+                    let value = item.get("value").unwrap().clone();
+                    vals.insert(key, get_val_for_attr(&value));
+                }
+                return json::json!(vals);
+            }
+
+            _ => {
+                return json::json!(get_string_value(value));
+            }
+        }
+    };
+    attr_val.clone()
+}
+
+pub fn get_val_with_type_retained(val: &Value) -> Value {
+    match val {
+        Value::String(val) => {
+            json::json!(val)
+        }
+        Value::Bool(val) => {
+            json::json!(val)
+        }
+        Value::Number(val) => {
+            json::json!(val)
+        }
+        Value::Array(val) => {
+            json::json!(val)
+        }
+        Value::Object(val) => {
+            json::json!(val)
+        }
+        Value::Null => Value::Null,
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use std::collections::HashMap;
--- a/src/service/ingestion/otlp_json.rs
+++ b/src/service/ingestion/otlp_json.rs
@ -1,104 +0,0 @@
-use crate::common::utils::{flatten::format_key, json};
-
-pub fn get_float_value(val: &json::Value) -> f64 {
-    match val {
-        json::Value::String(v) => v.parse::<f64>().unwrap_or(0.0),
-        json::Value::Number(v) => v.as_f64().unwrap_or(0.0),
-        _ => 0.0,
-    }
-}
-
-pub fn get_int_value(val: &json::Value) -> i64 {
-    match val {
-        json::Value::String(v) => v.parse::<i64>().unwrap_or(0),
-        json::Value::Number(v) => v.as_i64().unwrap_or(0),
-        _ => 0,
-    }
-}
-pub fn get_string_value(val: &json::Value) -> String {
-    match val {
-        json::Value::String(v) => v.to_string(),
-        json::Value::Number(v) => v.as_i64().unwrap_or(0).to_string(),
-        _ => "".to_string(),
-    }
-}
-
-pub fn get_val_for_attr(attr_val: &json::Value) -> json::Value {
-    let local_val = attr_val.as_object().unwrap();
-    if let Some((key, value)) = local_val.into_iter().next() {
-        match key.as_str() {
-            "stringValue" | "string_value" => {
-                return json::json!(get_string_value(value));
-            }
-            "boolValue" | "bool_value" => {
-                return json::json!(value.as_bool().unwrap_or(false).to_string());
-            }
-            "intValue" | "int_value" => {
-                return json::json!(get_int_value(value).to_string());
-            }
-            "doubleValue" | "double_value" => {
-                return json::json!(get_float_value(value).to_string());
-            }
-
-            "bytesValue" | "bytes_value" => {
-                return json::json!(value.as_str().unwrap_or("").to_string());
-            }
-
-            "arrayValue" | "array_value" => {
-                let mut vals = vec![];
-                for item in value
-                    .get("values")
-                    .unwrap()
-                    .as_array()
-                    .unwrap_or(&vec![])
-                    .iter()
-                {
-                    vals.push(get_val_for_attr(item));
-                }
-                return json::json!(vals);
-            }
-
-            "kvlistValue" | "kvlist_value" => {
-                let mut vals = json::Map::new();
-                for item in value
-                    .get("values")
-                    .unwrap()
-                    .as_array()
-                    .unwrap_or(&vec![])
-                    .iter()
-                {
-                    let key = item.get("key").unwrap().as_str().unwrap_or("").to_string();
-                    let value = item.get("value").unwrap().clone();
-                    vals.insert(format_key(&key), get_val_for_attr(&value));
-                }
-                return json::json!(vals);
-            }
-
-            _ => {
-                return json::json!(get_string_value(value));
-            }
-        }
-    };
-    attr_val.clone()
-}
-
-pub fn get_val_with_type_retained(val: &json::Value) -> json::Value {
-    match val {
-        json::Value::String(val) => {
-            json::json!(val)
-        }
-        json::Value::Bool(val) => {
-            json::json!(val)
-        }
-        json::Value::Number(val) => {
-            json::json!(val)
-        }
-        json::Value::Array(val) => {
-            json::json!(val)
-        }
-        json::Value::Object(val) => {
-            json::json!(val)
-        }
-        json::Value::Null => json::Value::Null,
-    }
-}
--- a/src/service/logs/bulk.rs
+++ b/src/service/logs/bulk.rs
@ -40,7 +40,7 @@ use crate::{
    service::{
        db, distinct_values,
        ingestion::{evaluate_trigger, write_file, TriggerAlertData},
-        schema::stream_schema_exists,
+        schema::{get_upto_discard_error, stream_schema_exists},
        usage::report_request_usage_stats,
    },
 };
@ -63,6 +63,11 @@ pub async fn ingest(
        return Err(anyhow::anyhow!("Quota exceeded for this organization"));
    }

+    // check memtable
+    if let Err(e) = ingester::check_memtable_size() {
+        return Err(anyhow::Error::msg(e.to_string()));
+    }
+
    // let mut errors = false;
    let mut bulk_res = BulkResponse {
        took: 0,
@ -70,7 +75,7 @@ pub async fn ingest(
        items: vec![],
    };

-    let mut min_ts =
+    let min_ts =
        (Utc::now() - Duration::hours(CONFIG.limit.ingest_allowed_upto)).timestamp_micros();

    let mut runtime = crate::service::ingestion::init_functions_runtime();
@ -164,13 +169,13 @@ pub async fn ingest(
            let key = format!("{org_id}/{}/{stream_name}", StreamType::Logs);

            // JSON Flattening
-            let mut value = flatten::flatten(&value)?;
+            let mut value = flatten::flatten(value)?;

            if let Some(transforms) = stream_transform_map.get(&key) {
                let mut ret_value = value.clone();
                ret_value = crate::service::ingestion::apply_stream_transform(
                    transforms,
-                    &ret_value,
+                    ret_value,
                    &stream_vrl_map,
                    &stream_name,
                    &mut runtime,
@ -222,10 +227,9 @@ pub async fn ingest(
                None => Utc::now().timestamp_micros(),
            };
            // check ingestion time
-            let earliest_time = Utc::now() - Duration::hours(CONFIG.limit.ingest_allowed_upto);
-            if timestamp < earliest_time.timestamp_micros() {
+            if timestamp < min_ts {
                bulk_res.errors = true;
-                let failure_reason = Some(super::get_upto_discard_error());
+                let failure_reason = Some(get_upto_discard_error().to_string());
                add_record_status(
                    stream_name.clone(),
                    doc_id.clone(),
@ -237,9 +241,6 @@ pub async fn ingest(
                );
                continue;
            }
-            if timestamp < min_ts {
-                min_ts = timestamp;
-            }
            local_val.insert(
                CONFIG.common.column_timestamp.clone(),
                json::Value::Number(timestamp.into()),
@ -257,7 +258,7 @@ pub async fn ingest(
            let mut status = RecordStatus::default();
            let need_trigger = !stream_trigger_map.contains_key(&stream_name);

-            let local_trigger = super::add_valid_record_arrow(
+            let local_trigger = match super::add_valid_record(
                &StreamMeta {
                    org_id: org_id.to_string(),
                    stream_name: stream_name.clone(),
@ -271,7 +272,23 @@ pub async fn ingest(
                local_val,
                need_trigger,
            )
-            .await;
+            .await
+            {
+                Ok(v) => v,
+                Err(e) => {
+                    bulk_res.errors = true;
+                    add_record_status(
+                        stream_name.clone(),
+                        doc_id.clone(),
+                        action.clone(),
+                        value,
+                        &mut bulk_res,
+                        Some(TS_PARSE_FAILED.to_string()),
+                        Some(e.to_string()),
+                    );
+                    continue;
+                }
+            };
            if local_trigger.is_some() {
                stream_trigger_map.insert(stream_name.clone(), local_trigger);
            }
@ -330,7 +347,6 @@ pub async fn ingest(
            stream_data.data,
            thread_id,
            &StreamParams::new(org_id, &stream_name, StreamType::Logs),
-            None,
        )
        .await;
        req_stats.response_time += time;
--- a/src/service/logs/ingest.rs
+++ b/src/service/logs/ingest.rs
@ -17,7 +17,6 @@ use std::io::{BufRead, Read};

 use actix_web::http;
 use ahash::AHashMap;
-use bytes::Bytes;
 use chrono::{Duration, Utc};
 use config::{meta::stream::StreamType, metrics, CONFIG, DISTINCT_FIELDS};
 use datafusion::arrow::datatypes::Schema;
@ -43,6 +42,7 @@ use crate::{
        distinct_values, get_formatted_stream_name,
        ingestion::{evaluate_trigger, is_ingestion_allowed, write_file, TriggerAlertData},
        logs::StreamMeta,
+        schema::get_upto_discard_error,
        usage::report_request_usage_stats,
    },
 };
@ -54,7 +54,7 @@ pub async fn ingest(
    thread_id: usize,
 ) -> Result<IngestionResponse, anyhow::Error> {
    let start = std::time::Instant::now();
-
+    // check stream
    let mut stream_schema_map: AHashMap<String, Schema> = AHashMap::new();
    let mut stream_params = StreamParams::new(org_id, in_stream_name, StreamType::Logs);
    let stream_name = &get_formatted_stream_name(&mut stream_params, &mut stream_schema_map).await;
@ -62,20 +62,20 @@ pub async fn ingest(
        return Err(value);
    }

-    let mut min_ts =
+    // check memtable
+    if let Err(e) = ingester::check_memtable_size() {
+        return Ok(IngestionResponse {
+            code: http::StatusCode::SERVICE_UNAVAILABLE.into(),
+            status: vec![],
+            error: Some(e.to_string()),
+        });
+    }
+
+    let min_ts =
        (Utc::now() - Duration::hours(CONFIG.limit.ingest_allowed_upto)).timestamp_micros();

-    let mut runtime = crate::service::ingestion::init_functions_runtime();
-
-    let mut stream_alerts_map: AHashMap<String, Vec<Alert>> = AHashMap::new();
-    let mut stream_status = StreamStatus::new(stream_name);
-    let mut distinct_values = Vec::with_capacity(16);
-    let mut trigger: TriggerAlertData = None;
-
-    let multi_req: &Bytes;
-    let reader: Vec<json::Value>;
-
    // Start Register Transforms for stream
+    let mut runtime = crate::service::ingestion::init_functions_runtime();
    let (local_trans, stream_vrl_map) = crate::service::ingestion::register_stream_transforms(
        org_id,
        StreamType::Logs,
@ -84,6 +84,7 @@ pub async fn ingest(
    // End Register Transforms for stream

    // Start get stream alerts
+    let mut stream_alerts_map: AHashMap<String, Vec<Alert>> = AHashMap::new();
    crate::service::ingestion::get_stream_alerts(
        org_id,
        StreamType::Logs,
@ -91,113 +92,113 @@ pub async fn ingest(
        &mut stream_alerts_map,
    )
    .await;
-    // End get stream alert
+    // End get stream alerts
+
+    let mut stream_status = StreamStatus::new(stream_name);
+    let mut distinct_values = Vec::with_capacity(16);
+    let mut trigger: TriggerAlertData = None;

    let partition_det =
        crate::service::ingestion::get_stream_partition_keys(stream_name, &stream_schema_map).await;
    let partition_keys = partition_det.partition_keys;
    let partition_time_level = partition_det.partition_time_level;

-    let mut buf: AHashMap<String, SchemaRecords> = AHashMap::new();
+    let mut write_buf: AHashMap<String, SchemaRecords> = AHashMap::new();

-    let ep: &str;
-
-    let data = match in_req {
+    let json_req: Vec<json::Value>; // to hold json request because of borrow checker
+    let (ep, data) = match in_req {
        IngestionRequest::JSON(req) => {
-            reader = json::from_slice(req).unwrap_or({
+            json_req = json::from_slice(req).unwrap_or({
                let val: json::Value = json::from_slice(req)?;
                vec![val]
            });
-            ep = "/api/org/ingest/logs/_json";
-            IngestionData::JSON(&reader)
-        }
-        IngestionRequest::GCP(req) => {
-            ep = "/api/org/ingest/logs/_gcs";
-            IngestionData::GCP(req)
-        }
-        IngestionRequest::Multi(req) => {
-            multi_req = req;
-            ep = "/api/org/ingest/logs/_multi";
-            IngestionData::Multi(multi_req)
-        }
-        IngestionRequest::KinesisFH(req) => {
-            ep = "/api/org/ingest/logs/_kinesis";
-            IngestionData::KinesisFH(req)
+            ("/api/org/ingest/logs/_json", IngestionData::JSON(&json_req))
        }
+        IngestionRequest::GCP(req) => ("/api/org/ingest/logs/_gcs", IngestionData::GCP(req)),
+        IngestionRequest::Multi(req) => ("/api/org/ingest/logs/_multi", IngestionData::Multi(req)),
+        IngestionRequest::KinesisFH(req) => (
+            "/api/org/ingest/logs/_kinesis",
+            IngestionData::KinesisFH(req),
+        ),
    };

-    for rec in data.iter() {
-        match rec {
-            Ok(item) => {
-                match apply_functions(
-                    &item,
-                    &local_trans,
-                    &stream_vrl_map,
-                    stream_name,
-                    &mut runtime,
-                ) {
-                    Ok(mut res) => {
-                        let local_val = res.as_object_mut().unwrap();
-
-                        match handle_ts(local_val, min_ts) {
-                            Ok(t) => min_ts = t,
-                            Err(e) => {
-                                stream_status.status.failed += 1;
-                                stream_status.status.error = e.to_string();
-                                continue;
-                            }
-                        }
-                        let local_trigger = super::add_valid_record_arrow(
-                            &StreamMeta {
-                                org_id: org_id.to_string(),
-                                stream_name: stream_name.to_string(),
-                                partition_keys: &partition_keys,
-                                partition_time_level: &partition_time_level,
-                                stream_alerts_map: &stream_alerts_map,
-                            },
-                            &mut stream_schema_map,
-                            &mut stream_status.status,
-                            &mut buf,
-                            local_val,
-                            trigger.is_none(),
-                        )
-                        .await;
-                        if local_trigger.is_some() {
-                            trigger = local_trigger;
-                        }
-
-                        // get distinct_value item
-                        for field in DISTINCT_FIELDS.iter() {
-                            if let Some(val) = local_val.get(field) {
-                                if !val.is_null() {
-                                    distinct_values.push(distinct_values::DvItem {
-                                        stream_type: StreamType::Logs,
-                                        stream_name: stream_name.to_string(),
-                                        field_name: field.to_string(),
-                                        field_value: val.as_str().unwrap().to_string(),
-                                        filter_name: "".to_string(),
-                                        filter_value: "".to_string(),
-                                    });
-                                }
-                            }
-                        }
-                    }
-                    Err(e) => {
-                        stream_status.status.failed += 1;
-                        stream_status.status.error = e.to_string();
-                        continue;
-                    }
-                };
-            }
+    for ret in data.iter() {
+        let item = match ret {
+            Ok(item) => item,
            Err(e) => {
-                log::error!("Error: {:?}", e);
-                return Err(anyhow::Error::msg("Failed processing"));
+                log::error!("IngestionError: {:?}", e);
+                return Err(anyhow::anyhow!("Failed processing: {:?}", e));
+            }
+        };
+
+        let mut res = match apply_functions(
+            item,
+            &local_trans,
+            &stream_vrl_map,
+            stream_name,
+            &mut runtime,
+        ) {
+            Ok(res) => res,
+            Err(e) => {
+                stream_status.status.failed += 1;
+                stream_status.status.error = e.to_string();
+                continue;
+            }
+        };
+
+        let local_val = res.as_object_mut().unwrap();
+        if let Err(e) = handle_timestamp(local_val, min_ts) {
+            stream_status.status.failed += 1;
+            stream_status.status.error = e.to_string();
+            continue;
+        }
+
+        let local_trigger = match super::add_valid_record(
+            &StreamMeta {
+                org_id: org_id.to_string(),
+                stream_name: stream_name.to_string(),
+                partition_keys: &partition_keys,
+                partition_time_level: &partition_time_level,
+                stream_alerts_map: &stream_alerts_map,
+            },
+            &mut stream_schema_map,
+            &mut stream_status.status,
+            &mut write_buf,
+            local_val,
+            trigger.is_none(),
+        )
+        .await
+        {
+            Ok(v) => v,
+            Err(e) => {
+                stream_status.status.failed += 1;
+                stream_status.status.error = e.to_string();
+                continue;
+            }
+        };
+        if local_trigger.is_some() {
+            trigger = local_trigger;
+        }
+
+        // get distinct_value item
+        for field in DISTINCT_FIELDS.iter() {
+            if let Some(val) = local_val.get(field) {
+                if !val.is_null() {
+                    distinct_values.push(distinct_values::DvItem {
+                        stream_type: StreamType::Logs,
+                        stream_name: stream_name.to_string(),
+                        field_name: field.to_string(),
+                        field_value: val.as_str().unwrap().to_string(),
+                        filter_name: "".to_string(),
+                        filter_value: "".to_string(),
+                    });
+                }
            }
        }
    }

    // write to file
-    let mut req_stats = write_file(buf, thread_id, &stream_params, None).await;
+    let mut req_stats = write_file(write_buf, thread_id, &stream_params).await;

    // send distinct_values
    if !distinct_values.is_empty() {
@ -206,7 +207,7 @@ pub async fn ingest(
        }
    }

-    // only one trigger per request, as it updates etcd
+    // only one trigger per request
    evaluate_trigger(trigger).await;

    // update ingestion metrics
@ -229,7 +230,6 @@ pub async fn ingest(
            StreamType::Logs.to_string().as_str(),
        ])
        .inc();
-
    req_stats.response_time = start.elapsed().as_secs_f64();

    // report data usage
@ -243,6 +243,7 @@ pub async fn ingest(
    )
    .await;

+    // drop variables
    drop(runtime);
    drop(stream_schema_map);
    drop(stream_vrl_map);
@ -256,8 +257,8 @@ pub async fn ingest(
 }

 pub fn apply_functions<'a>(
-    item: &'a json::Value,
-    local_trans: &Vec<StreamTransform>,
+    item: json::Value,
+    local_trans: &[StreamTransform],
    stream_vrl_map: &'a AHashMap<String, VRLResultResolver>,
    stream_name: &'a str,
    runtime: &mut Runtime,
@ -267,7 +268,7 @@ pub fn apply_functions<'a>(
    if !local_trans.is_empty() {
        value = crate::service::ingestion::apply_stream_transform(
            local_trans,
-            &value,
+            value,
            stream_vrl_map,
            stream_name,
            runtime,
@ -281,10 +282,10 @@ pub fn apply_functions<'a>(
    }
 }

-pub fn handle_ts(
+pub fn handle_timestamp(
    local_val: &mut json::Map<String, json::Value>,
-    mut min_ts: i64,
-) -> Result<i64, anyhow::Error> {
+    min_ts: i64,
+) -> Result<(), anyhow::Error> {
    // handle timestamp
    let timestamp = match local_val.get(&CONFIG.common.column_timestamp) {
        Some(v) => match parse_timestamp_micro_from_value(v) {
@ -294,18 +295,14 @@ pub fn handle_ts(
        None => Utc::now().timestamp_micros(),
    };
    // check ingestion time
-    let earliest_time = Utc::now() - Duration::hours(CONFIG.limit.ingest_allowed_upto);
-    if timestamp < earliest_time.timestamp_micros() {
-        return Err(anyhow::Error::msg(super::get_upto_discard_error()));
-    }
    if timestamp < min_ts {
-        min_ts = timestamp;
+        return Err(get_upto_discard_error());
    }
    local_val.insert(
        CONFIG.common.column_timestamp.clone(),
        json::Value::Number(timestamp.into()),
    );
-    Ok(min_ts)
+    Ok(())
 }

 impl<'a> Iterator for IngestionDataIter<'a> {
--- a/src/service/logs/mod.rs
+++ b/src/service/logs/mod.rs
@ -17,10 +17,10 @@ use std::{collections::HashMap, sync::Arc};

 use ahash::AHashMap;
 use arrow_schema::{DataType, Field};
-use config::{meta::stream::StreamType, utils::hasher::get_fields_key_xxh3, CONFIG};
+use config::{meta::stream::StreamType, utils::schema_ext::SchemaExt, CONFIG};
 use datafusion::arrow::datatypes::Schema;

-use super::ingestion::TriggerAlertData;
+use super::ingestion::{get_string_value, TriggerAlertData};
 use crate::{
    common::{
        meta::{
@ -28,15 +28,10 @@ use crate::{
            ingestion::RecordStatus,
            stream::{PartitionTimeLevel, SchemaRecords},
        },
-        utils::{
-            self,
-            json::{Map, Value},
-        },
+        utils::json::{self, Map, Value},
    },
    service::{
-        ingestion::{get_value, get_wal_time_key},
-        schema::check_for_schema,
-        stream::unwrap_partition_time_level,
+        ingestion::get_wal_time_key, schema::check_for_schema, stream::unwrap_partition_time_level,
    },
 };

@ -49,13 +44,6 @@ pub mod syslog;

 static BULK_OPERATORS: [&str; 3] = ["create", "index", "update"];

-pub(crate) fn get_upto_discard_error() -> String {
-    format!(
-        "Too old data, only last {} hours data can be ingested. Data discarded. You can adjust ingestion max time by setting the environment variable ZO_INGEST_ALLOWED_UPTO=<max_hours>",
-        CONFIG.limit.ingest_allowed_upto
-    )
-}
-
 fn parse_bulk_index(v: &Value) -> Option<(String, String, String)> {
    let local_val = v.as_object().unwrap();
    for action in BULK_OPERATORS {
@ -75,339 +63,155 @@ fn parse_bulk_index(v: &Value) -> Option<(String, String, String)> {
    None
 }

-pub fn cast_to_type(mut value: Value, delta: Vec<Field>) -> (Option<String>, Option<String>) {
-    let local_map = value.as_object_mut().unwrap();
-    // let mut error_msg = String::new();
+pub fn cast_to_type(
+    value: &mut Map<String, Value>,
+    delta: Vec<Field>,
+) -> Result<(), anyhow::Error> {
    let mut parse_error = String::new();
    for field in delta {
-        let field_map = local_map.get(field.name());
-        if let Some(val) = field_map {
-            if val.is_null() {
-                local_map.insert(field.name().clone(), val.clone());
-                continue;
-            }
-            let local_val = get_value(val);
-            match field.data_type() {
-                DataType::Boolean => {
-                    match local_val.parse::<bool>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::Int8 => {
-                    match local_val.parse::<i8>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::Int16 => {
-                    match local_val.parse::<i16>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::Int32 => {
-                    match local_val.parse::<i32>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::Int64 => {
-                    match local_val.parse::<i64>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::UInt8 => {
-                    match local_val.parse::<u8>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::UInt16 => {
-                    match local_val.parse::<u16>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::UInt32 => {
-                    match local_val.parse::<u32>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::UInt64 => {
-                    match local_val.parse::<u64>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::Float16 => {
-                    match local_val.parse::<f32>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::Float32 => {
-                    match local_val.parse::<f32>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::Float64 => {
-                    match local_val.parse::<f64>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::Utf8 => {
-                    match local_val.parse::<String>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                _ => println!("{local_val:?}"),
-            };
+        let field_name = field.name().clone();
+        let Some(val) = value.get(&field_name) else {
+            continue;
+        };
+        if val.is_null() {
+            value.insert(field_name, Value::Null);
+            continue;
        }
+        match field.data_type() {
+            DataType::Utf8 => {
+                if val.is_string() {
+                    continue;
+                }
+                value.insert(field_name, Value::String(get_string_value(val)));
+            }
+            DataType::Int64 | DataType::Int32 | DataType::Int16 | DataType::Int8 => {
+                if val.is_i64() {
+                    continue;
+                }
+                let val = get_string_value(val);
+                match val.parse::<i64>() {
+                    Ok(val) => {
+                        value.insert(field_name, Value::Number(val.into()));
+                    }
+                    Err(_) => set_parsing_error(&mut parse_error, &field),
+                };
+            }
+            DataType::UInt64 | DataType::UInt32 | DataType::UInt16 | DataType::UInt8 => {
+                if val.is_u64() {
+                    continue;
+                }
+                let val = get_string_value(val);
+                match val.parse::<u64>() {
+                    Ok(val) => {
+                        value.insert(field_name, Value::Number(val.into()));
+                    }
+                    Err(_) => set_parsing_error(&mut parse_error, &field),
+                };
+            }
+            DataType::Float64 | DataType::Float32 | DataType::Float16 => {
+                if val.is_f64() {
+                    continue;
+                }
+                let val = get_string_value(val);
+                match val.parse::<f64>() {
+                    Ok(val) => {
+                        value.insert(
+                            field_name,
+                            Value::Number(serde_json::Number::from_f64(val).unwrap()),
+                        );
+                    }
+                    Err(_) => set_parsing_error(&mut parse_error, &field),
+                };
+            }
+            DataType::Boolean => {
+                if val.is_boolean() {
+                    continue;
+                }
+                let val = get_string_value(val);
+                match val.parse::<bool>() {
+                    Ok(val) => {
+                        value.insert(field_name, Value::Bool(val));
+                    }
+                    Err(_) => set_parsing_error(&mut parse_error, &field),
+                };
+            }
+            _ => set_parsing_error(&mut parse_error, &field),
+        };
    }
-    if parse_error.is_empty() {
-        (Some(utils::json::to_string(&local_map).unwrap()), None)
+    if !parse_error.is_empty() {
+        Err(anyhow::Error::msg(parse_error))
    } else {
-        (None, Some(parse_error))
+        Ok(())
    }
 }

-pub fn cast_to_type_arrow(mut value: Value, delta: Vec<Field>) -> (Option<String>, Option<String>) {
-    let local_map = value.as_object_mut().unwrap();
-    // let mut error_msg = String::new();
-    let mut parse_error = String::new();
-    for field in delta {
-        let field_map = local_map.get(field.name());
-        if let Some(val) = field_map {
-            if val.is_null() {
-                local_map.insert(field.name().clone(), val.clone());
-                continue;
-            }
-            let local_val = get_value(val);
-            match field.data_type() {
-                DataType::Boolean => {
-                    match local_val.parse::<bool>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::Int8 => {
-                    match local_val.parse::<i8>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::Int16 => {
-                    match local_val.parse::<i16>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::Int32 => {
-                    match local_val.parse::<i32>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::Int64 => {
-                    match local_val.parse::<i64>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::UInt8 => {
-                    match local_val.parse::<u8>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::UInt16 => {
-                    match local_val.parse::<u16>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::UInt32 => {
-                    match local_val.parse::<u32>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::UInt64 => {
-                    match local_val.parse::<u64>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::Float16 => {
-                    match local_val.parse::<f32>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::Float32 => {
-                    match local_val.parse::<f32>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::Float64 => {
-                    match local_val.parse::<f64>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                DataType::Utf8 => {
-                    match local_val.parse::<String>() {
-                        Ok(val) => {
-                            local_map.insert(field.name().clone(), val.into());
-                        }
-                        Err(_) => set_parsing_error(&mut parse_error, &field),
-                    };
-                }
-                _ => println!("{local_val:?}"),
-            };
-        }
-    }
-    if parse_error.is_empty() {
-        // Convert the Map to a Vec of (String, Value) pairs
-        let mut entries: Vec<_> = local_map.clone().into_iter().collect();
-        entries.sort_by(|a, b| a.0.cmp(&b.0));
-
-        // Convert it back to a Map
-        let sorted_map: Map<String, Value> = entries.into_iter().collect();
-        (Some(utils::json::to_string(&sorted_map).unwrap()), None)
-    } else {
-        (None, Some(parse_error))
-    }
-}
-
-async fn add_valid_record_arrow(
+async fn add_valid_record(
    stream_meta: &StreamMeta<'_>,
    stream_schema_map: &mut AHashMap<String, Schema>,
    status: &mut RecordStatus,
-    buf: &mut AHashMap<String, SchemaRecords>,
-    local_val: &mut Map<String, Value>,
+    write_buf: &mut AHashMap<String, SchemaRecords>,
+    record_val: &mut Map<String, Value>,
    need_trigger: bool,
-) -> TriggerAlertData {
+) -> Result<TriggerAlertData, anyhow::Error> {
    let mut trigger: Vec<(Alert, Vec<Map<String, Value>>)> = Vec::new();
-    let timestamp: i64 = local_val
+    let timestamp: i64 = record_val
        .get(&CONFIG.common.column_timestamp)
        .unwrap()
        .as_i64()
        .unwrap();

-    let mut value_str = utils::json::to_string(&local_val).unwrap();
    // check schema
    let schema_evolution = check_for_schema(
        &stream_meta.org_id,
        &stream_meta.stream_name,
        StreamType::Logs,
-        &value_str,
        stream_schema_map,
+        &Value::Object(record_val.clone()),
        timestamp,
-        true,
    )
-    .await;
+    .await?;

    // get hour key
-    let schema_key = get_fields_key_xxh3(&schema_evolution.schema_fields);
+    let rec_schema = stream_schema_map.get(&stream_meta.stream_name).unwrap();
+    let schema_key = rec_schema.hash_key();
    let hour_key = get_wal_time_key(
        timestamp,
        stream_meta.partition_keys,
        unwrap_partition_time_level(*stream_meta.partition_time_level, StreamType::Logs),
-        local_val,
+        record_val,
        Some(&schema_key),
    );

-    let rec_schema = stream_schema_map.get(&stream_meta.stream_name).unwrap();
-
    if schema_evolution.schema_compatible {
        let valid_record = if schema_evolution.types_delta.is_some() {
            let delta = schema_evolution.types_delta.unwrap();
-            let loc_value: Value = utils::json::from_slice(value_str.as_bytes()).unwrap();
-            let (ret_val, error) = if !CONFIG.common.widening_schema_evolution {
-                cast_to_type_arrow(loc_value, delta)
+            let ret_val = if !CONFIG.common.widening_schema_evolution {
+                cast_to_type(record_val, delta)
            } else if schema_evolution.is_schema_changed {
                let local_delta = delta
                    .into_iter()
                    .filter(|x| x.metadata().contains_key("zo_cast"))
                    .collect::<Vec<_>>();
-
-                if local_delta.is_empty() {
-                    (Some(value_str.clone()), None)
+                if !local_delta.is_empty() {
+                    cast_to_type(record_val, local_delta)
                } else {
-                    cast_to_type_arrow(loc_value, local_delta)
+                    Ok(())
                }
            } else {
-                cast_to_type_arrow(loc_value, delta)
+                cast_to_type(record_val, delta)
            };
-            if ret_val.is_some() {
-                value_str = ret_val.unwrap();
-                true
-            } else {
-                status.failed += 1;
-                status.error = error.unwrap();
-                false
+            match ret_val {
+                Ok(_) => true,
+                Err(e) => {
+                    status.failed += 1;
+                    status.error = e.to_string();
+                    false
+                }
            }
        } else {
            true
        };
+
        if valid_record {
            if need_trigger && !stream_meta.stream_alerts_map.is_empty() {
                // Start check for alert trigger
@ -419,17 +223,16 @@ async fn add_valid_record_arrow(
                );
                if let Some(alerts) = stream_meta.stream_alerts_map.get(&key) {
                    for alert in alerts {
-                        if let Ok(Some(v)) = alert.evaluate(Some(local_val)).await {
+                        if let Ok(Some(v)) = alert.evaluate(Some(record_val)).await {
                            trigger.push((alert.clone(), v));
                        }
                    }
                }
                // End check for alert trigger
            }
-            let loc_value: Value = utils::json::from_slice(value_str.as_bytes()).unwrap();
-            let hour_buf = buf.entry(hour_key).or_insert_with(|| {
-                let schema_key = get_fields_key_xxh3(&schema_evolution.schema_fields);
+            let hour_buf = write_buf.entry(hour_key).or_insert_with(|| {
                let schema = Arc::new(rec_schema.clone().with_metadata(HashMap::new()));
+                let schema_key = schema.hash_key();
                SchemaRecords {
                    schema_key,
                    schema,
@ -437,17 +240,19 @@ async fn add_valid_record_arrow(
                    records_size: 0,
                }
            });
-            hour_buf.records.push(Arc::new(loc_value));
-            hour_buf.records_size += value_str.len();
+            let record_val = Value::Object(record_val.clone());
+            let record_size = json::to_vec(&record_val).unwrap_or_default().len();
+            hour_buf.records.push(Arc::new(record_val));
+            hour_buf.records_size += record_size;
            status.successful += 1;
        };
    } else {
        status.failed += 1;
    }
    if trigger.is_empty() {
-        None
+        Ok(None)
    } else {
-        Some(trigger)
+        Ok(Some(trigger))
    }
 }

@ -483,8 +288,7 @@ mod tests {
        let mut local_val = Map::new();
        local_val.insert("test".to_string(), Value::from("test13212"));
        let delta = vec![Field::new("test", DataType::Utf8, true)];
-        let (ret_val, error) = cast_to_type(Value::from(local_val), delta);
-        assert!(ret_val.is_some());
-        assert!(error.is_none());
+        let ret_val = cast_to_type(&mut local_val, delta);
+        assert!(ret_val.is_ok());
    }
 }
--- a/src/service/logs/multi.rs
+++ b/src/service/logs/multi.rs
@ -35,6 +35,7 @@ use crate::{
        distinct_values, get_formatted_stream_name,
        ingestion::{evaluate_trigger, is_ingestion_allowed, write_file, TriggerAlertData},
        logs::StreamMeta,
+        schema::get_upto_discard_error,
        usage::report_request_usage_stats,
    },
 };
@ -45,8 +46,7 @@ use crate::{
 /// - org_id: org id to ingest data in
 /// - in_stream_name: stream to write data in
 /// - body: incoming payload
-/// - extend_json: a hashmap of string -> string values which should be extended
-///   in each json row
+/// - extend_json: a hashmap of string -> string values which should be extended in each json row
 /// - thread_id: a unique thread-id associated with this process
 pub async fn ingest_with_keys(
    org_id: &str,
@ -77,7 +77,7 @@ async fn ingest_inner(
    }
    let mut runtime = crate::service::ingestion::init_functions_runtime();

-    let mut min_ts =
+    let min_ts =
        (Utc::now() - Duration::hours(CONFIG.limit.ingest_allowed_upto)).timestamp_micros();

    let mut stream_alerts_map: AHashMap<String, Vec<Alert>> = AHashMap::new();
@ -122,13 +122,13 @@ async fn ingest_inner(
        }

        // JSON Flattening
-        value = flatten::flatten(&value)?;
+        value = flatten::flatten(value)?;
        // Start row based transform

        if !local_trans.is_empty() {
            value = crate::service::ingestion::apply_stream_transform(
                &local_trans,
-                &value,
+                value,
                &stream_vrl_map,
                stream_name,
                &mut runtime,
@ -157,14 +157,10 @@ async fn ingest_inner(
            None => Utc::now().timestamp_micros(),
        };
        // check ingestion time
-        let earliest_time = Utc::now() - Duration::hours(CONFIG.limit.ingest_allowed_upto);
-        if timestamp < earliest_time.timestamp_micros() {
-            stream_status.status.failed += 1; // to old data, just discard
-            stream_status.status.error = super::get_upto_discard_error();
-            continue;
-        }
        if timestamp < min_ts {
-            min_ts = timestamp;
+            stream_status.status.failed += 1; // to old data, just discard
+            stream_status.status.error = get_upto_discard_error().to_string();
+            continue;
        }
        local_val.insert(
            CONFIG.common.column_timestamp.clone(),
@ -172,7 +168,7 @@ async fn ingest_inner(
        );

        // write data
-        let local_trigger = super::add_valid_record_arrow(
+        let local_trigger = match super::add_valid_record(
            &StreamMeta {
                org_id: org_id.to_string(),
                stream_name: stream_name.to_string(),
@ -186,7 +182,15 @@ async fn ingest_inner(
            local_val,
            trigger.is_none(),
        )
-        .await;
+        .await
+        {
+            Ok(v) => v,
+            Err(e) => {
+                stream_status.status.failed += 1;
+                stream_status.status.error = e.to_string();
+                continue;
+            }
+        };
        if local_trigger.is_some() {
            trigger = local_trigger;
        }
@ -209,7 +213,7 @@ async fn ingest_inner(
    }

    // write to file
-    let mut req_stats = write_file(buf, thread_id, &stream_params, partition_time_level).await;
+    let mut req_stats = write_file(buf, thread_id, &stream_params).await;

    // only one trigger per request, as it updates etcd
    evaluate_trigger(trigger).await;
--- a/src/service/logs/otlp_grpc.rs
+++ b/src/service/logs/otlp_grpc.rs
@ -45,7 +45,7 @@ use crate::{
            grpc::{get_val, get_val_with_type_retained},
            write_file, TriggerAlertData,
        },
-        schema::stream_schema_exists,
+        schema::{get_upto_discard_error, stream_schema_exists},
        usage::report_request_usage_stats,
    },
 };
@ -77,7 +77,7 @@ pub async fn usage_ingest(
        return Err(anyhow::anyhow!("stream [{stream_name}] is being deleted"));
    }

-    let mut min_ts =
+    let min_ts =
        (Utc::now() - Duration::hours(CONFIG.limit.ingest_allowed_upto)).timestamp_micros();

    let mut stream_alerts_map: AHashMap<String, Vec<Alert>> = AHashMap::new();
@ -102,7 +102,7 @@ pub async fn usage_ingest(

    let mut buf: AHashMap<String, SchemaRecords> = AHashMap::new();
    let reader: Vec<json::Value> = json::from_slice(&body)?;
-    for item in reader.iter() {
+    for item in reader.into_iter() {
        // JSON Flattening
        let mut value = flatten::flatten(item)?;

@ -122,21 +122,17 @@ pub async fn usage_ingest(
            None => Utc::now().timestamp_micros(),
        };
        // check ingestion time
-        let earlest_time = Utc::now() - Duration::hours(CONFIG.limit.ingest_allowed_upto);
-        if timestamp < earlest_time.timestamp_micros() {
-            stream_status.status.failed += 1; // to old data, just discard
-            stream_status.status.error = super::get_upto_discard_error();
-            continue;
-        }
        if timestamp < min_ts {
-            min_ts = timestamp;
+            stream_status.status.failed += 1; // to old data, just discard
+            stream_status.status.error = get_upto_discard_error().to_string();
+            continue;
        }
        local_val.insert(
            CONFIG.common.column_timestamp.clone(),
            json::Value::Number(timestamp.into()),
        );

-        let local_trigger = super::add_valid_record_arrow(
+        let local_trigger = match super::add_valid_record(
            &StreamMeta {
                org_id: org_id.to_string(),
                stream_name: stream_name.to_string(),
@ -150,7 +146,15 @@ pub async fn usage_ingest(
            local_val,
            trigger.is_none(),
        )
-        .await;
+        .await
+        {
+            Ok(v) => v,
+            Err(e) => {
+                stream_status.status.failed += 1;
+                stream_status.status.error = e.to_string();
+                continue;
+            }
+        };
        if local_trigger.is_some() {
            trigger = local_trigger;
        }
@ -177,7 +181,6 @@ pub async fn usage_ingest(
        buf,
        thread_id,
        &StreamParams::new(org_id, stream_name, StreamType::Logs),
-        None,
    )
    .await;

@ -239,6 +242,17 @@ pub async fn handle_grpc_request(
            "Quota exceeded for this organization".to_string(),
        )));
    }
+
+    // check memtable
+    if let Err(e) = ingester::check_memtable_size() {
+        return Ok(
+            HttpResponse::ServiceUnavailable().json(MetaHttpResponse::error(
+                http::StatusCode::SERVICE_UNAVAILABLE.into(),
+                e.to_string(),
+            )),
+        );
+    }
+
    let start = std::time::Instant::now();
    let mut stream_schema_map: AHashMap<String, Schema> = AHashMap::new();
    let stream_name = match in_stream_name {
@ -332,7 +346,7 @@ pub async fn handle_grpc_request(

                if ts < earlest_time.timestamp_micros().try_into().unwrap() {
                    stream_status.status.failed += 1; // to old data, just discard
-                    stream_status.status.error = super::get_upto_discard_error();
+                    stream_status.status.error = get_upto_discard_error().to_string();
                    continue;
                }

@ -373,12 +387,12 @@ pub async fn handle_grpc_request(
                };

                // flattening
-                rec = flatten::flatten(&rec)?;
+                rec = flatten::flatten(rec)?;

                if !local_trans.is_empty() {
                    rec = crate::service::ingestion::apply_stream_transform(
                        &local_trans,
-                        &rec,
+                        rec,
                        &stream_vrl_map,
                        stream_name,
                        &mut runtime,
@ -387,7 +401,7 @@ pub async fn handle_grpc_request(
                // get json object
                let local_val = rec.as_object_mut().unwrap();

-                let local_trigger = super::add_valid_record_arrow(
+                let local_trigger = match super::add_valid_record(
                    &StreamMeta {
                        org_id: org_id.to_string(),
                        stream_name: stream_name.to_string(),
@ -401,7 +415,15 @@ pub async fn handle_grpc_request(
                    local_val,
                    trigger.is_none(),
                )
-                .await;
+                .await
+                {
+                    Ok(v) => v,
+                    Err(e) => {
+                        stream_status.status.failed += 1;
+                        stream_status.status.error = e.to_string();
+                        continue;
+                    }
+                };
                if local_trigger.is_some() {
                    trigger = local_trigger;
                }
@ -430,7 +452,6 @@ pub async fn handle_grpc_request(
        data_buf,
        thread_id,
        &StreamParams::new(org_id, stream_name, StreamType::Logs),
-        None,
    )
    .await;

--- a/src/service/logs/otlp_http.rs
+++ b/src/service/logs/otlp_http.rs
@ -42,11 +42,9 @@ use crate::{
    service::{
        db, distinct_values, get_formatted_stream_name,
        ingestion::{
-            evaluate_trigger,
-            otlp_json::{get_int_value, get_val_for_attr},
-            write_file, TriggerAlertData,
+            evaluate_trigger, get_int_value, get_val_for_attr, write_file, TriggerAlertData,
        },
-        schema::stream_schema_exists,
+        schema::{get_upto_discard_error, stream_schema_exists},
        usage::report_request_usage_stats,
    },
 };
@ -101,6 +99,16 @@ pub async fn logs_json_handler(
        )));
    }

+    // check memtable
+    if let Err(e) = ingester::check_memtable_size() {
+        return Ok(
+            HttpResponse::ServiceUnavailable().json(MetaHttpResponse::error(
+                http::StatusCode::SERVICE_UNAVAILABLE.into(),
+                e.to_string(),
+            )),
+        );
+    }
+
    let start = std::time::Instant::now();
    let mut stream_schema_map: AHashMap<String, Schema> = AHashMap::new();
    let stream_name = match in_stream_name {
@ -127,7 +135,7 @@ pub async fn logs_json_handler(
    let mut stream_status = StreamStatus::new(stream_name);
    let mut trigger: TriggerAlertData = None;

-    let mut min_ts =
+    let min_ts =
        (Utc::now() - Duration::hours(CONFIG.limit.ingest_allowed_upto)).timestamp_micros();

    let partition_det =
@ -262,11 +270,9 @@ pub async fn logs_json_handler(
                    let attributes = log.get("attributes").unwrap().as_array().unwrap();
                    for res_attr in attributes {
                        let local_attr = res_attr.as_object().unwrap();
-
-                        local_val.insert(
-                            flatten::format_key(local_attr.get("key").unwrap().as_str().unwrap()),
-                            get_val_for_attr(local_attr.get("value").unwrap()),
-                        );
+                        let mut key = local_attr.get("key").unwrap().as_str().unwrap().to_string();
+                        flatten::format_key(&mut key);
+                        local_val.insert(key, get_val_for_attr(local_attr.get("value").unwrap()));
                    }
                }
                // remove attributes after adding
@ -306,14 +312,10 @@ pub async fn logs_json_handler(
                }

                // check ingestion time
-                let earliest_time = Utc::now() - Duration::hours(CONFIG.limit.ingest_allowed_upto);
-                if timestamp < earliest_time.timestamp_micros() {
-                    stream_status.status.failed += 1; // to old data, just discard
-                    stream_status.status.error = super::get_upto_discard_error();
-                    continue;
-                }
                if timestamp < min_ts {
-                    min_ts = timestamp;
+                    stream_status.status.failed += 1; // to old data, just discard
+                    stream_status.status.error = get_upto_discard_error().to_string();
+                    continue;
                }

                local_val.insert(
@ -323,25 +325,25 @@ pub async fn logs_json_handler(

                local_val.append(&mut service_att_map.clone());

-                value = json::to_value(local_val).unwrap();
+                value = json::to_value(local_val)?;

                // JSON Flattening
-                value = flatten::flatten(&value).unwrap();
+                value = flatten::flatten(value).unwrap();

                if !local_trans.is_empty() {
                    value = crate::service::ingestion::apply_stream_transform(
                        &local_trans,
-                        &value,
+                        value,
                        &stream_vrl_map,
                        stream_name,
                        &mut runtime,
                    )
-                    .unwrap_or(value);
+                    .unwrap();
                }

                local_val = value.as_object_mut().unwrap();

-                let local_trigger = super::add_valid_record_arrow(
+                let local_trigger = match super::add_valid_record(
                    &StreamMeta {
                        org_id: org_id.to_string(),
                        stream_name: stream_name.to_string(),
@ -355,8 +357,15 @@ pub async fn logs_json_handler(
                    local_val,
                    trigger.is_none(),
                )
-                .await;
-
+                .await
+                {
+                    Ok(v) => v,
+                    Err(e) => {
+                        stream_status.status.failed += 1;
+                        stream_status.status.error = e.to_string();
+                        continue;
+                    }
+                };
                if local_trigger.is_some() {
                    trigger = local_trigger;
                }
@ -385,7 +394,6 @@ pub async fn logs_json_handler(
        buf,
        thread_id,
        &StreamParams::new(org_id, stream_name, StreamType::Logs),
-        None,
    )
    .await;

--- a/src/service/logs/syslog.rs
+++ b/src/service/logs/syslog.rs
@ -38,6 +38,7 @@ use crate::{
    service::{
        db, distinct_values, get_formatted_stream_name,
        ingestion::{evaluate_trigger, write_file, TriggerAlertData},
+        schema::get_upto_discard_error,
    },
 };

@ -118,12 +119,12 @@ pub async fn ingest(msg: &str, addr: SocketAddr) -> Result<HttpResponse, anyhow:

    let parsed_msg = syslog_loose::parse_message(msg);
    let mut value = message_to_value(parsed_msg);
-    value = flatten::flatten(&value).unwrap();
+    value = flatten::flatten(value).unwrap();

    if !local_trans.is_empty() {
        value = crate::service::ingestion::apply_stream_transform(
            &local_trans,
-            &value,
+            value,
            &stream_vrl_map,
            stream_name,
            &mut runtime,
@ -149,7 +150,7 @@ pub async fn ingest(msg: &str, addr: SocketAddr) -> Result<HttpResponse, anyhow:
    let earlest_time = Utc::now() - Duration::hours(CONFIG.limit.ingest_allowed_upto);
    if timestamp < earlest_time.timestamp_micros() {
        stream_status.status.failed += 1; // to old data, just discard
-        stream_status.status.error = super::get_upto_discard_error();
+        stream_status.status.error = get_upto_discard_error().to_string();
    }

    local_val.insert(
@ -157,7 +158,7 @@ pub async fn ingest(msg: &str, addr: SocketAddr) -> Result<HttpResponse, anyhow:
        json::Value::Number(timestamp.into()),
    );

-    let local_trigger = super::add_valid_record_arrow(
+    let local_trigger = match super::add_valid_record(
        &StreamMeta {
            org_id: org_id.to_string(),
            stream_name: stream_name.to_string(),
@ -171,7 +172,15 @@ pub async fn ingest(msg: &str, addr: SocketAddr) -> Result<HttpResponse, anyhow:
        local_val,
        trigger.is_none(),
    )
-    .await;
+    .await
+    {
+        Ok(v) => v,
+        Err(e) => {
+            stream_status.status.failed += 1;
+            stream_status.status.error = e.to_string();
+            None
+        }
+    };
    if local_trigger.is_some() {
        trigger = local_trigger;
    }
@ -192,7 +201,7 @@ pub async fn ingest(msg: &str, addr: SocketAddr) -> Result<HttpResponse, anyhow:
        }
    }

-    write_file(buf, thread_id, &stream_params, None).await;
+    write_file(buf, thread_id, &stream_params).await;

    // only one trigger per request, as it updates etcd
    evaluate_trigger(trigger).await;
--- a/src/service/metrics/json.rs
+++ b/src/service/metrics/json.rs
@ -58,6 +58,15 @@ pub async fn ingest(org_id: &str, body: web::Bytes, thread_id: usize) -> Result<
        return Err(anyhow::anyhow!("Quota exceeded for this organization"));
    }

+    // check memtable
+    if let Err(e) = ingester::check_memtable_size() {
+        return Ok(IngestionResponse {
+            code: http::StatusCode::SERVICE_UNAVAILABLE.into(),
+            status: vec![],
+            error: Some(e.to_string()),
+        });
+    }
+
    let mut runtime = crate::service::ingestion::init_functions_runtime();
    let mut stream_schema_map: AHashMap<String, Schema> = AHashMap::new();
    let mut stream_status_map: AHashMap<String, StreamStatus> = AHashMap::new();
@ -65,7 +74,7 @@ pub async fn ingest(org_id: &str, body: web::Bytes, thread_id: usize) -> Result<
    let mut stream_partitioning_map: AHashMap<String, PartitioningDetails> = AHashMap::new();

    let reader: Vec<json::Value> = json::from_slice(&body)?;
-    for record in reader.iter() {
+    for record in reader.into_iter() {
        // JSON Flattening
        let mut record = flatten::flatten(record)?;
        // check data type
@ -117,8 +126,8 @@ pub async fn ingest(org_id: &str, body: web::Bytes, thread_id: usize) -> Result<
        }

        // apply functions
-        let mut record = json::Value::Object(record.to_owned());
-        apply_func(&mut runtime, org_id, &stream_name, &mut record)?;
+        let record = json::Value::Object(record.to_owned());
+        let mut record = apply_func(&mut runtime, org_id, &stream_name, record)?;

        let record = record.as_object_mut().unwrap();

@ -263,17 +272,10 @@ pub async fn ingest(org_id: &str, body: web::Bytes, thread_id: usize) -> Result<
            continue;
        }

-        let time_level = if let Some(details) = stream_partitioning_map.get(&stream_name) {
-            details.partition_time_level
-        } else {
-            Some(CONFIG.limit.metrics_file_retention.as_str().into())
-        };
-
        let mut req_stats = write_file(
            stream_data,
            thread_id,
            &StreamParams::new(org_id, &stream_name, StreamType::Metrics),
-            time_level,
        )
        .await;
        req_stats.response_time = time;
@ -319,21 +321,19 @@ fn apply_func(
    runtime: &mut Runtime,
    org_id: &str,
    metric_name: &str,
-    value: &mut json::Value,
-) -> Result<()> {
+    value: json::Value,
+) -> Result<json::Value> {
    let (local_tans, stream_vrl_map) = crate::service::ingestion::register_stream_transforms(
        org_id,
        StreamType::Metrics,
        metric_name,
    );

-    *value = crate::service::ingestion::apply_stream_transform(
+    crate::service::ingestion::apply_stream_transform(
        &local_tans,
        value,
        &stream_vrl_map,
        metric_name,
        runtime,
-    )?;
-
-    Ok(())
+    )
 }
--- a/src/service/metrics/otlp_grpc.rs
+++ b/src/service/metrics/otlp_grpc.rs
@ -75,6 +75,17 @@ pub async fn handle_grpc_request(
            "Quota exceeded for this organisation".to_string(),
        )));
    }
+
+    // check memtable
+    if let Err(e) = ingester::check_memtable_size() {
+        return Ok(
+            HttpResponse::ServiceUnavailable().json(MetaHttpResponse::error(
+                http::StatusCode::SERVICE_UNAVAILABLE.into(),
+                e.to_string(),
+            )),
+        );
+    }
+
    let start = std::time::Instant::now();
    let mut runtime = crate::service::ingestion::init_functions_runtime();
    let mut metric_data_map: AHashMap<String, AHashMap<String, SchemaRecords>> = AHashMap::new();
@ -186,7 +197,7 @@ pub async fn handle_grpc_request(

                for mut rec in records {
                    // flattening
-                    rec = flatten::flatten(&rec)?;
+                    rec = flatten::flatten(rec)?;

                    let local_metric_name =
                        &format_stream_name(rec.get(NAME_LABEL).unwrap().as_str().unwrap());
@ -253,7 +264,7 @@ pub async fn handle_grpc_request(
                    if !local_trans.is_empty() {
                        rec = crate::service::ingestion::apply_stream_transform(
                            &local_trans,
-                            &rec,
+                            rec,
                            &stream_vrl_map,
                            local_metric_name,
                            &mut runtime,
@ -353,18 +364,11 @@ pub async fn handle_grpc_request(
            continue;
        }

-        let time_level = if let Some(details) = stream_partitioning_map.get(&stream_name) {
-            details.partition_time_level
-        } else {
-            Some(CONFIG.limit.metrics_file_retention.as_str().into())
-        };
-
        // write to file
        let mut req_stats = write_file(
            stream_data,
            thread_id,
            &StreamParams::new(org_id, &stream_name, StreamType::Metrics),
-            time_level,
        )
        .await;

--- a/src/service/metrics/otlp_http.rs
+++ b/src/service/metrics/otlp_http.rs
@ -45,9 +45,8 @@ use crate::{
    service::{
        db, format_stream_name,
        ingestion::{
-            chk_schema_by_record, evaluate_trigger,
-            otlp_json::{get_float_value, get_int_value, get_string_value, get_val_for_attr},
-            write_file, TriggerAlertData,
+            chk_schema_by_record, evaluate_trigger, get_float_value, get_int_value,
+            get_string_value, get_val_for_attr, write_file, TriggerAlertData,
        },
        metrics::{format_label_name, get_exclude_labels, otlp_grpc::handle_grpc_request},
        schema::{set_schema_metadata, stream_schema_exists},
@ -99,6 +98,16 @@ pub async fn metrics_json_handler(
        )));
    }

+    // check memtable
+    if let Err(e) = ingester::check_memtable_size() {
+        return Ok(
+            HttpResponse::ServiceUnavailable().json(MetaHttpResponse::error(
+                http::StatusCode::SERVICE_UNAVAILABLE.into(),
+                e.to_string(),
+            )),
+        );
+    }
+
    let start = std::time::Instant::now();
    let mut runtime = crate::service::ingestion::init_functions_runtime();
    let mut metric_data_map: AHashMap<String, AHashMap<String, SchemaRecords>> = AHashMap::new();
@ -281,7 +290,7 @@ pub async fn metrics_json_handler(

                    for mut rec in records {
                        // flattening
-                        rec = flatten::flatten(&rec).expect("failed to flatten");
+                        rec = flatten::flatten(rec).expect("failed to flatten");
                        // get json object

                        let local_metric_name =
@ -349,12 +358,12 @@ pub async fn metrics_json_handler(
                        if !local_trans.is_empty() {
                            rec = crate::service::ingestion::apply_stream_transform(
                                &local_trans,
-                                &rec,
+                                rec,
                                &stream_vrl_map,
                                local_metric_name,
                                &mut runtime,
                            )
-                            .unwrap_or(rec);
+                            .unwrap();
                        }

                        let val_map: &mut serde_json::Map<String, serde_json::Value> =
@ -455,18 +464,11 @@ pub async fn metrics_json_handler(
            continue;
        }

-        let time_level = if let Some(details) = stream_partitioning_map.get(&stream_name) {
-            details.partition_time_level
-        } else {
-            Some(CONFIG.limit.metrics_file_retention.as_str().into())
-        };
-
        // write to file
        let mut req_stats = write_file(
            stream_data,
            thread_id,
            &StreamParams::new(org_id, &stream_name, StreamType::Metrics),
-            time_level,
        )
        .await;

--- a/src/service/metrics/prom.rs
+++ b/src/service/metrics/prom.rs
@ -17,7 +17,7 @@ use std::{collections::HashMap, sync::Arc};

 use actix_web::web;
 use ahash::AHashMap;
-use chrono::{Duration, TimeZone, Utc};
+use chrono::{TimeZone, Utc};
 use config::{meta::stream::StreamType, metrics, utils::schema_ext::SchemaExt, FxIndexMap, CONFIG};
 use datafusion::arrow::datatypes::Schema;
 use promql_parser::{label::MatchOp, parser};
@ -70,8 +70,13 @@ pub async fn remote_write(
        return Err(anyhow::anyhow!("Quota exceeded for this organization"));
    }

-    let mut min_ts =
-        (Utc::now() - Duration::hours(CONFIG.limit.ingest_allowed_upto)).timestamp_micros();
+    // check memtable
+    if let Err(e) = ingester::check_memtable_size() {
+        return Err(anyhow::Error::msg(e.to_string()));
+    }
+
+    // let min_ts = (Utc::now() -
+    // Duration::hours(CONFIG.limit.ingest_allowed_upto)).timestamp_micros();
    let dedup_enabled = CONFIG.common.metrics_dedup_enabled;
    let election_interval = CONFIG.limit.metrics_leader_election_interval * 1000000;
    let mut last_received: i64 = 0;
@ -169,9 +174,6 @@ pub async fn remote_write(
            };

            let timestamp = parse_i64_to_timestamp_micros(sample.timestamp);
-            if timestamp < min_ts {
-                min_ts = timestamp;
-            }

            if first_line && dedup_enabled && !cluster_name.is_empty() {
                let lock = METRIC_CLUSTER_LEADER.read().await;
@ -261,7 +263,7 @@ pub async fn remote_write(

            value = crate::service::ingestion::apply_stream_transform(
                &local_trans,
-                &value,
+                value,
                &stream_vrl_map,
                &metric_name,
                &mut runtime,
@ -357,18 +359,11 @@ pub async fn remote_write(
            continue;
        }

-        let time_level = if let Some(details) = stream_partitioning_map.get(&stream_name) {
-            details.partition_time_level
-        } else {
-            Some(CONFIG.limit.metrics_file_retention.as_str().into())
-        };
-
        // write to file
        let mut req_stats = write_file(
            stream_data,
            thread_id,
            &StreamParams::new(org_id, &stream_name, StreamType::Metrics),
-            time_level,
        )
        .await;

--- a/src/service/schema.rs
+++ b/src/service/schema.rs
@ -23,7 +23,10 @@ use std::{
 use ahash::AHashMap;
 use config::{
    meta::stream::StreamType,
-    utils::{schema::infer_json_schema, schema_ext::SchemaExt},
+    utils::{
+        schema::{infer_json_schema, infer_json_schema_from_values},
+        schema_ext::SchemaExt,
+    },
    CONFIG,
 };
 use datafusion::arrow::{
@ -41,6 +44,20 @@ use crate::{
    service::{db, search::server_internal_error},
 };

+pub(crate) fn get_upto_discard_error() -> anyhow::Error {
+    anyhow::anyhow!(
+        "Too old data, only last {} hours data can be ingested. Data discarded. You can adjust ingestion max time by setting the environment variable ZO_INGEST_ALLOWED_UPTO=<max_hours>",
+        CONFIG.limit.ingest_allowed_upto
+    )
+}
+
+pub(crate) fn get_rquest_columns_limit_error() -> anyhow::Error {
+    anyhow::anyhow!(
+        "Too many cloumns, only {} columns accept. Data discarded. You can adjust ingestion columns limit by setting the environment variable ZO_COLS_PER_RECORD_LIMIT=<max_cloumns>",
+        CONFIG.limit.req_cols_per_record_limit
+    )
+}
+
 #[tracing::instrument(name = "service:schema:schema_evolution", skip(inferred_schema))]
 pub async fn schema_evolution(
    org_id: &str,
@ -262,11 +279,10 @@ pub async fn check_for_schema(
    org_id: &str,
    stream_name: &str,
    stream_type: StreamType,
-    val_str: &str,
    stream_schema_map: &mut AHashMap<String, Schema>,
+    record_val: &json::Value,
    record_ts: i64,
-    is_arrow: bool,
-) -> SchemaEvolution {
+) -> Result<SchemaEvolution, anyhow::Error> {
    let mut schema = if stream_schema_map.contains_key(stream_name) {
        stream_schema_map.get(stream_name).unwrap().clone()
    } else {
@ -278,38 +294,27 @@ pub async fn check_for_schema(
    };

    if !schema.fields().is_empty() && CONFIG.common.skip_schema_validation {
-        return SchemaEvolution {
+        return Ok(SchemaEvolution {
            schema_compatible: true,
            types_delta: None,
-            schema_fields: schema.to_cloned_fields(),
            is_schema_changed: false,
-            record_schema: schema,
-        };
+        });
    }

-    let mut schema_reader = BufReader::new(val_str.as_bytes());
-    let inferred_schema = infer_json_schema(&mut schema_reader, None, stream_type).unwrap();
+    let value_iter = [record_val].into_iter();
+    let inferred_schema = infer_json_schema_from_values(value_iter, stream_type).unwrap();

    if schema.fields.eq(&inferred_schema.fields) {
        // return (true, None, schema.fields().to_vec());
-        return SchemaEvolution {
+        return Ok(SchemaEvolution {
            schema_compatible: true,
            types_delta: None,
-            schema_fields: schema.to_cloned_fields(),
            is_schema_changed: false,
-            record_schema: schema,
-        };
+        });
    }

    if inferred_schema.fields.len() > CONFIG.limit.req_cols_per_record_limit {
-        // return (false, None, inferred_schema.fields().to_vec());
-        return SchemaEvolution {
-            schema_compatible: false,
-            types_delta: None,
-            schema_fields: inferred_schema.to_cloned_fields(),
-            is_schema_changed: false,
-            record_schema: schema,
-        };
+        return Err(get_rquest_columns_limit_error());
    }

    if schema.fields().is_empty() {
@ -324,12 +329,12 @@ pub async fn check_for_schema(
        )
        .await
        {
-            return value;
+            return Ok(value);
        }
    };

-    let (field_datatype_delta, is_schema_changed, final_fields, record_schema) =
-        get_schema_changes(&schema, &inferred_schema, is_arrow);
+    let (is_schema_changed, field_datatype_delta, _) =
+        get_schema_changes(&schema, &inferred_schema);

    if is_schema_changed {
        if let Some(value) = handle_existing_schema(
@ -339,28 +344,23 @@ pub async fn check_for_schema(
            &inferred_schema,
            record_ts,
            stream_schema_map,
-            is_arrow,
        )
        .await
        {
-            value
+            Ok(value)
        } else {
-            SchemaEvolution {
+            Ok(SchemaEvolution {
                schema_compatible: true,
                types_delta: Some(field_datatype_delta),
-                schema_fields: schema.to_cloned_fields(),
                is_schema_changed: false,
-                record_schema,
-            }
+            })
        }
    } else {
-        SchemaEvolution {
+        Ok(SchemaEvolution {
            schema_compatible: true,
            types_delta: Some(field_datatype_delta),
-            schema_fields: final_fields,
            is_schema_changed,
-            record_schema,
-        }
+        })
    }
 }

@ -371,7 +371,6 @@ async fn handle_existing_schema(
    inferred_schema: &Schema,
    record_ts: i64,
    stream_schema_map: &mut AHashMap<String, Schema>,
-    is_arrow: bool,
 ) -> Option<SchemaEvolution> {
    if !CONFIG.common.local_mode {
        let mut lock = etcd::Locker::new(&format!("schema/{org_id}/{stream_type}/{stream_name}"));
@ -379,8 +378,8 @@ async fn handle_existing_schema(
        let schema = db::schema::get_from_db(org_id, stream_name, stream_type)
            .await
            .unwrap();
-        let (field_datatype_delta, is_schema_changed, final_fields, _) =
-            get_schema_changes(&schema, inferred_schema, is_arrow);
+        let (is_schema_changed, field_datatype_delta, final_fields) =
+            get_schema_changes(&schema, inferred_schema);
        let is_field_delta = !field_datatype_delta.is_empty();
        let mut metadata = schema.metadata().clone();
        if !metadata.contains_key("created_at") {
@ -390,7 +389,7 @@ async fn handle_existing_schema(
            );
        }
        metadata.extend(inferred_schema.metadata().to_owned());
-        let final_schema = Schema::new(final_fields.clone()).with_metadata(metadata);
+        let final_schema = Schema::new(final_fields).with_metadata(metadata);
        if is_schema_changed {
            log::info!(
                "Acquired lock for cluster stream {} to update schema",
@ -415,9 +414,7 @@ async fn handle_existing_schema(
        Some(SchemaEvolution {
            schema_compatible: true,
            types_delta: Some(field_datatype_delta),
-            schema_fields: final_fields,
            is_schema_changed,
-            record_schema: final_schema,
        })
    } else {
        let key = format!(
@ -436,8 +433,8 @@ async fn handle_existing_schema(
            let schema = db::schema::get_from_db(org_id, stream_name, stream_type)
                .await
                .unwrap();
-            let (field_datatype_delta, is_schema_changed, final_fields, _) =
-                get_schema_changes(&schema, inferred_schema, is_arrow);
+            let (is_schema_changed, field_datatype_delta, final_fields) =
+                get_schema_changes(&schema, inferred_schema);
            let is_field_delta = !field_datatype_delta.is_empty();
            let mut metadata = schema.metadata().clone();
            if !metadata.contains_key("created_at") {
@ -447,7 +444,7 @@ async fn handle_existing_schema(
                );
            }
            metadata.extend(inferred_schema.metadata().to_owned());
-            let final_schema = Schema::new(final_fields.clone()).with_metadata(metadata);
+            let final_schema = Schema::new(final_fields).with_metadata(metadata);
            if is_schema_changed {
                log::info!(
                    "Acquired lock for local stream {} to update schema",
@ -473,26 +470,21 @@ async fn handle_existing_schema(
            Some(SchemaEvolution {
                schema_compatible: true,
                types_delta: Some(field_datatype_delta),
-                schema_fields: final_fields,
                is_schema_changed,
-                record_schema: final_schema,
            })
        } else {
            // Some other request has already acquired the lock.
            let schema = db::schema::get_from_db(org_id, stream_name, stream_type)
                .await
                .unwrap();
-            let (field_datatype_delta, _is_schema_changed, final_fields, _) =
-                get_schema_changes(&schema, inferred_schema, is_arrow);
+            let (_, field_datatype_delta, _) = get_schema_changes(&schema, inferred_schema);
            stream_schema_map.insert(stream_name.to_string(), schema.clone());
            log::info!("Schema exists for stream {} ", stream_name);
            drop(lock_acquired); // release lock
            Some(SchemaEvolution {
                schema_compatible: true,
                types_delta: Some(field_datatype_delta),
-                schema_fields: final_fields,
                is_schema_changed: false,
-                record_schema: schema,
            })
        }
    }
@ -555,9 +547,7 @@ async fn handle_new_schema(
                return Some(SchemaEvolution {
                    schema_compatible: true,
                    types_delta: None,
-                    schema_fields: final_schema.to_cloned_fields(),
                    is_schema_changed: true,
-                    record_schema: final_schema,
                });
            } else {
                stream_schema_map.insert(stream_name.to_string(), chk_schema.clone());
@ -610,9 +600,7 @@ async fn handle_new_schema(
                    return Some(SchemaEvolution {
                        schema_compatible: true,
                        types_delta: None,
-                        schema_fields: final_schema.to_cloned_fields(),
                        is_schema_changed: true,
-                        record_schema: final_schema,
                    });
                } else {
                    // No schema change
@ -639,21 +627,16 @@ async fn handle_new_schema(
    None
 }

-fn get_schema_changes(
-    schema: &Schema,
-    inferred_schema: &Schema,
-    _is_arrow: bool,
-) -> (Vec<Field>, bool, Vec<Field>, Schema) {
+fn get_schema_changes(schema: &Schema, inferred_schema: &Schema) -> (bool, Vec<Field>, Vec<Field>) {
    let mut is_schema_changed = false;
    let mut field_datatype_delta: Vec<_> = vec![];
-    let mut new_field_delta: Vec<_> = vec![];

    let mut merged_fields = schema
        .fields()
        .iter()
        .map(|f| f.as_ref().to_owned())
        .collect::<Vec<_>>();
-    let mut merged_fields_chk: AHashMap<String, usize> = AHashMap::new();
+    let mut merged_fields_chk = hashbrown::HashMap::with_capacity(merged_fields.len());
    for (i, f) in merged_fields.iter().enumerate() {
        merged_fields_chk.insert(f.name().to_string(), i);
    }
@ -663,41 +646,31 @@ fn get_schema_changes(
        let item_data_type = item.data_type();

        match merged_fields_chk.get(item_name) {
+            None => {
+                is_schema_changed = true;
+                merged_fields.push((**item).clone());
+                merged_fields_chk.insert(item_name.to_string(), merged_fields.len() - 1);
+            }
            Some(idx) => {
                let existing_field = &merged_fields[*idx];
                if existing_field.data_type() != item_data_type {
                    if !CONFIG.common.widening_schema_evolution {
                        field_datatype_delta.push(existing_field.clone());
+                    } else if is_widening_conversion(existing_field.data_type(), item_data_type) {
+                        is_schema_changed = true;
+                        field_datatype_delta.push((**item).clone());
+                        merged_fields[*idx] = (**item).clone();
                    } else {
-                        let allowed =
-                            is_widening_conversion(existing_field.data_type(), item_data_type);
-                        if allowed {
-                            is_schema_changed = true;
-                            field_datatype_delta.push((**item).clone());
-                            merged_fields[*idx] = (**item).clone();
-                        } else {
-                            let mut meta = existing_field.metadata().clone();
-                            meta.insert("zo_cast".to_owned(), true.to_string());
-                            field_datatype_delta.push(existing_field.clone().with_metadata(meta));
-                        }
+                        let mut meta = existing_field.metadata().clone();
+                        meta.insert("zo_cast".to_owned(), true.to_string());
+                        field_datatype_delta.push(existing_field.clone().with_metadata(meta));
                    }
                }
            }
-            None => {
-                is_schema_changed = true;
-                new_field_delta.push(item);
-                merged_fields.push((**item).clone());
-                merged_fields_chk.insert(item_name.to_string(), merged_fields.len() - 1);
-            }
        }
    }

-    (
-        field_datatype_delta,
-        is_schema_changed,
-        merged_fields,
-        Schema::empty(),
-    )
+    (is_schema_changed, field_datatype_delta, merged_fields)
 }

 pub async fn stream_schema_exists(
@ -859,7 +832,8 @@ mod tests {
    async fn test_check_for_schema() {
        let stream_name = "Sample";
        let org_name = "nexus";
-        let record = r#"{"Year": 1896, "City": "Athens", "_timestamp": 1234234234234}"#;
+        let record =
+            json::json!(r#"{"Year": 1896, "City": "Athens", "_timestamp": 1234234234234}"#);

        let schema = Schema::new(vec![
            Field::new("Year", DataType::Int64, false),
@ -872,12 +846,12 @@ mod tests {
            org_name,
            stream_name,
            StreamType::Logs,
-            record,
            &mut map,
+            &record,
            1234234234234,
-            false,
        )
-        .await;
+        .await
+        .unwrap();
        assert!(result.schema_compatible);
    }
 }
--- a/src/service/search/datafusion/date_format_udf.rs
+++ b/src/service/search/datafusion/date_format_udf.rs
@ -56,8 +56,8 @@ pub fn date_format_expr_impl() -> ScalarFunctionImplementation {
            )));
        }

-        // 1. cast both arguments to Union. These casts MUST be aligned with the
-        //    signature or this function panics!
+        // 1. cast both arguments to Union. These casts MUST be aligned with the signature or this
+        //    function panics!
        let timestamp = &args[0]
            .as_any()
            .downcast_ref::<Int64Array>()
--- a/src/service/search/datafusion/exec.rs
+++ b/src/service/search/datafusion/exec.rs
@ -18,7 +18,7 @@ use std::{str::FromStr, sync::Arc};
 use ahash::AHashMap as HashMap;
 use config::{
    meta::stream::{FileKey, FileMeta, StreamType},
-    utils::{parquet::new_parquet_writer, schema::infer_json_schema_from_iterator},
+    utils::{parquet::new_parquet_writer, schema::infer_json_schema_from_values},
    CONFIG, PARQUET_BATCH_SIZE,
 };
 use datafusion::{
@ -126,6 +126,7 @@ pub async fn sql(
        )
        .await?,
    );
+    let mut spend_time = start.elapsed().as_secs_f64();

    // get alias from context query for agg sql
    let meta_sql = sql::Sql::new(&sql.query_context);
@ -182,10 +183,13 @@ pub async fn sql(
        }
        let batches = df.collect().await?;
        result.insert(format!("agg_{name}"), batches);
+
+        let q_time = start.elapsed().as_secs_f64();
        log::info!(
            "[session_id {session_id}] Query agg:{name} took {:.3} seconds.",
-            start.elapsed().as_secs_f64()
+            q_time - spend_time
        );
+        spend_time = q_time;
    }

    // drop table
@ -1023,6 +1027,9 @@ pub fn create_session_config(search_type: &SearchType) -> Result<SessionConfig>
    if CONFIG.common.bloom_filter_enabled {
        config = config.set_bool("datafusion.execution.parquet.bloom_filter_enabled", true);
    }
+    if CONFIG.common.bloom_filter_force_disabled {
+        config = config.set_bool("datafusion.execution.parquet.bloom_filter_enabled", false);
+    }
    Ok(config)
 }

@ -1184,12 +1191,12 @@ fn apply_query_fn(
                        },
                        &json::Value::Object(hit.clone()),
                    );
-                    (!ret_val.is_null()).then_some(flatten::flatten(&ret_val).unwrap_or(ret_val))
+                    (!ret_val.is_null()).then_some(flatten::flatten(ret_val).unwrap())
                })
                .collect();

-            let value_iter = rows_val.iter().map(Ok);
-            let inferred_schema = infer_json_schema_from_iterator(value_iter, stream_type).unwrap();
+            let value_iter = rows_val.iter();
+            let inferred_schema = infer_json_schema_from_values(value_iter, stream_type).unwrap();
            let mut decoder =
                arrow::json::ReaderBuilder::new(Arc::new(inferred_schema)).build_decoder()?;

--- a/src/service/search/datafusion/match_udf.rs
+++ b/src/service/search/datafusion/match_udf.rs
@ -65,8 +65,8 @@ pub fn match_expr_impl(case_insensitive: bool) -> ScalarFunctionImplementation {
            )));
        }

-        // 1. cast both arguments to string. These casts MUST be aligned with the
-        //    signature or this function panics!
+        // 1. cast both arguments to string. These casts MUST be aligned with the signature or this
+        //    function panics!
        let haystack = &args[0]
            .as_any()
            .downcast_ref::<StringArray>()
--- a/src/service/search/datafusion/time_range_udf.rs
+++ b/src/service/search/datafusion/time_range_udf.rs
@ -55,8 +55,8 @@ pub fn time_range_expr_impl() -> ScalarFunctionImplementation {
            )));
        }

-        // 1. cast both arguments to Union. These casts MUST be aligned with the
-        //    signature or this function panics!
+        // 1. cast both arguments to Union. These casts MUST be aligned with the signature or this
+        //    function panics!
        let base = &args[0]
            .as_any()
            .downcast_ref::<Int64Array>()
--- a/src/service/search/datafusion/transform_udf.rs
+++ b/src/service/search/datafusion/transform_udf.rs
@ -30,7 +30,7 @@ use vrl::compiler::{runtime::Runtime, CompilationResult, Program, TargetValueRef

 use crate::{
    common::{infra::config::QUERY_FUNCTIONS, utils::json},
-    service::ingestion::{compile_vrl_function, get_value},
+    service::ingestion::{compile_vrl_function, get_string_value},
 };

 fn create_user_df(
@ -147,13 +147,14 @@ fn get_udf_vrl(
                        for col in res.fields {
                            let field_builder = col_val_map.entry(col.to_string()).or_default();
                            if res_map.contains_key(&col) {
-                                field_builder.insert(i, get_value(res_map.get(&col).unwrap()));
+                                field_builder
+                                    .insert(i, get_string_value(res_map.get(&col).unwrap()));
                            } else {
                                field_builder.insert(i, "".to_string());
                            }
                        }
                    } else {
-                        res_data_vec.insert(i, get_value(&result));
+                        res_data_vec.insert(i, get_string_value(&result));
                    }
                }
            }
--- a/src/service/search/grpc/mod.rs
+++ b/src/service/search/grpc/mod.rs
@ -280,23 +280,31 @@ fn get_key_from_error(err: &str, pos: usize) -> Option<String> {
    None
 }

-fn check_memory_circuit_breaker(scan_stats: &ScanStats) -> Result<(), Error> {
+fn check_memory_circuit_breaker(session_id: &str, scan_stats: &ScanStats) -> Result<(), Error> {
    let scan_size = if scan_stats.compressed_size > 0 {
        scan_stats.compressed_size
    } else {
        scan_stats.original_size
    };
    if let Some(cur_memory) = memory_stats::memory_stats() {
-        if cur_memory.physical_mem as i64 + scan_size
-            > (CONFIG.limit.mem_total * CONFIG.common.memory_circuit_breaker_ratio / 100) as i64
+        // left memory < datafusion * breaker_ratio and scan_size >=  left memory
+        let left_mem = CONFIG.limit.mem_total - cur_memory.physical_mem;
+        if (left_mem
+            < (CONFIG.memory_cache.datafusion_max_size
+                * CONFIG.common.memory_circuit_breaker_ratio
+                / 100))
+            && (scan_size >= left_mem as i64)
        {
            let err = format!(
-                "fire memory_circuit_breaker, try to alloc {} bytes, now current memory usage is {} bytes, larger than limit of [{} bytes] ",
+                "fire memory_circuit_breaker, try to alloc {} bytes, now current memory usage is {} bytes, left memory {} bytes, left memory more than limit of [{} bytes] or scan_size more than left memory , please submit a new query with a short time range",
                scan_size,
                cur_memory.physical_mem,
-                CONFIG.limit.mem_total * CONFIG.common.memory_circuit_breaker_ratio / 100
+                left_mem,
+                CONFIG.memory_cache.datafusion_max_size
+                    * CONFIG.common.memory_circuit_breaker_ratio
+                    / 100
            );
-            log::warn!("{}", err);
+            log::warn!("[{session_id}] {}", err);
            return Err(Error::Message(err.to_string()));
        }
    }
--- a/src/service/search/grpc/storage.rs
+++ b/src/service/search/grpc/storage.rs
@ -153,7 +153,7 @@ pub async fn search(
    );

    if CONFIG.common.memory_circuit_breaker_enable {
-        super::check_memory_circuit_breaker(&scan_stats)?;
+        super::check_memory_circuit_breaker(session_id, &scan_stats)?;
    }

    // load files to local cache
--- a/src/service/search/grpc/wal.rs
+++ b/src/service/search/grpc/wal.rs
@ -122,10 +122,6 @@ pub async fn search_parquet(
        return Ok((HashMap::new(), scan_stats));
    }

-    if CONFIG.common.memory_circuit_breaker_enable {
-        super::check_memory_circuit_breaker(&scan_stats)?;
-    }
-
    // fetch all schema versions, get latest schema
    let schema_latest = Arc::new(
        schema_latest
@ -351,7 +347,7 @@ pub async fn search_memtable(
    );

    if CONFIG.common.memory_circuit_breaker_enable {
-        super::check_memory_circuit_breaker(&scan_stats)?;
+        super::check_memory_circuit_breaker(session_id, &scan_stats)?;
    }

    // fetch all schema versions, get latest schema
--- a/src/service/search/mod.rs
+++ b/src/service/search/mod.rs
@ -424,8 +424,7 @@ async fn search_in_cluster(mut req: cluster_rpc::SearchRequest) -> Result<search
                            },
                            &json::Value::Object(hit.clone()),
                        );
-                        (!ret_val.is_null())
-                            .then_some(flatten::flatten(&ret_val).unwrap_or(ret_val))
+                        (!ret_val.is_null()).then_some(flatten::flatten(ret_val).unwrap())
                    })
                    .collect(),
                None => json_rows
@ -444,7 +443,8 @@ async fn search_in_cluster(mut req: cluster_rpc::SearchRequest) -> Result<search

        if sql.uses_zo_fn {
            for source in sources {
-                result.add_hit(&flatten::flatten(&source).unwrap());
+                result
+                    .add_hit(&flatten::flatten(source).map_err(|e| Error::Message(e.to_string()))?);
            }
        } else {
            for source in sources {
--- a/src/service/traces/mod.rs
+++ b/src/service/traces/mod.rs
@ -20,7 +20,7 @@ use ahash::AHashMap;
 use bytes::BytesMut;
 use chrono::{Duration, Utc};
 use config::{
-    meta::stream::StreamType, metrics, utils::hasher::get_fields_key_xxh3, CONFIG, DISTINCT_FIELDS,
+    meta::stream::StreamType, metrics, utils::schema_ext::SchemaExt, CONFIG, DISTINCT_FIELDS,
 };
 use datafusion::arrow::datatypes::Schema;
 use opentelemetry::trace::{SpanId, TraceId};
@ -42,7 +42,7 @@ use crate::{
            traces::{Event, Span, SpanRefType},
            usage::UsageType,
        },
-        utils::{self, flatten, json},
+        utils::{flatten, json},
    },
    service::{
        db, distinct_values, format_partition_key, format_stream_name,
@ -83,6 +83,17 @@ pub async fn handle_trace_request(
            "Quota exceeded for this organization".to_string(),
        )));
    }
+
+    // check memtable
+    if let Err(e) = ingester::check_memtable_size() {
+        return Ok(
+            HttpResponse::ServiceUnavailable().json(MetaHttpResponse::error(
+                http::StatusCode::SERVICE_UNAVAILABLE.into(),
+                e.to_string(),
+            )),
+        );
+    }
+
    let start = std::time::Instant::now();

    let traces_stream_name = match in_stream_name {
@ -241,30 +252,34 @@ pub async fn handle_trace_request(
                let value: json::Value = json::to_value(local_val).unwrap();

                // JSON Flattening
-                let mut value = flatten::flatten(&value).unwrap();
+                let mut value = flatten::flatten(value).map_err(|e| {
+                    std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string())
+                })?;

                if !local_trans.is_empty() {
                    value = crate::service::ingestion::apply_stream_transform(
                        &local_trans,
-                        &value,
+                        value,
                        &stream_vrl_map,
                        traces_stream_name,
                        &mut runtime,
                    )
-                    .unwrap_or(value);
+                    .map_err(|e| {
+                        std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string())
+                    })?;
                }
                // End row based transform */
                // get json object
-                let val_map = value.as_object_mut().unwrap();
+                let record_val = value.as_object_mut().unwrap();

-                val_map.insert(
+                record_val.insert(
                    CONFIG.common.column_timestamp.clone(),
                    json::Value::Number(timestamp.into()),
                );

                // get distinct_value item
                for field in DISTINCT_FIELDS.iter() {
-                    if let Some(val) = val_map.get(field) {
+                    if let Some(val) = record_val.get(field) {
                        if !val.is_null() {
                            let (filter_name, filter_value) = if field == "operation_name" {
                                ("service_name".to_string(), service_name.clone())
@ -283,30 +298,17 @@ pub async fn handle_trace_request(
                    }
                }

-                let value_str = crate::common::utils::json::to_string(&val_map).unwrap();
-
                // check schema
-                let schema_evolution = check_for_schema(
+                let _ = check_for_schema(
                    org_id,
                    traces_stream_name,
                    StreamType::Traces,
-                    &value_str,
                    &mut traces_schema_map,
+                    &json::Value::Object(record_val.clone()),
                    timestamp.try_into().unwrap(),
-                    true,
                )
                .await;

-                let schema_key = get_fields_key_xxh3(&schema_evolution.schema_fields);
-                // get hour key
-                let mut hour_key = super::ingestion::get_wal_time_key(
-                    timestamp.try_into().unwrap(),
-                    &partition_keys,
-                    partition_time_level,
-                    val_map,
-                    Some(&schema_key),
-                );
-
                if trigger.is_none() && !stream_alerts_map.is_empty() {
                    // Start check for alert trigger
                    let key = format!("{}/{}/{}", &org_id, StreamType::Traces, traces_stream_name);
@ -314,7 +316,7 @@ pub async fn handle_trace_request(
                        let mut trigger_alerts: Vec<(Alert, Vec<json::Map<String, json::Value>>)> =
                            Vec::new();
                        for alert in alerts {
-                            if let Ok(Some(v)) = alert.evaluate(Some(val_map)).await {
+                            if let Ok(Some(v)) = alert.evaluate(Some(record_val)).await {
                                trigger_alerts.push((alert.clone(), v));
                            }
                        }
@ -323,28 +325,37 @@ pub async fn handle_trace_request(
                    // End check for alert trigger
                }

+                // get hour key
+                let rec_schema = traces_schema_map
+                    .get(traces_stream_name)
+                    .unwrap()
+                    .clone()
+                    .with_metadata(HashMap::new());
+                let schema_key = rec_schema.hash_key();
+                let mut hour_key = super::ingestion::get_wal_time_key(
+                    timestamp.try_into().unwrap(),
+                    &partition_keys,
+                    partition_time_level,
+                    record_val,
+                    Some(&schema_key),
+                );
+
                if partition_keys.is_empty() {
                    let partition_key = format!("service_name={}", service_name);
                    hour_key.push_str(&format!("/{}", format_partition_key(&partition_key)));
                }

-                let hour_buf = data_buf.entry(hour_key).or_insert_with(|| {
-                    let schema = traces_schema_map
-                        .get(traces_stream_name)
-                        .unwrap()
-                        .clone()
-                        .with_metadata(HashMap::new());
-                    SchemaRecords {
-                        schema_key,
-                        schema: Arc::new(schema),
-                        records: vec![],
-                        records_size: 0,
-                    }
+                let hour_buf = data_buf.entry(hour_key).or_insert_with(|| SchemaRecords {
+                    schema_key,
+                    schema: Arc::new(rec_schema),
+                    records: vec![],
+                    records_size: 0,
                });
-                let loc_value: utils::json::Value =
-                    utils::json::from_slice(value_str.as_bytes()).unwrap();
-                hour_buf.records.push(Arc::new(loc_value));
-                hour_buf.records_size += value_str.len();
+                let record_val = record_val.to_owned();
+                let record_val = json::Value::Object(record_val);
+                let record_size = json::to_vec(&record_val).unwrap_or_default().len();
+                hour_buf.records.push(Arc::new(record_val));
+                hour_buf.records_size += record_size;

                if timestamp < min_ts.try_into().unwrap() {
                    partial_success.rejected_spans += 1;
@ -359,7 +370,6 @@ pub async fn handle_trace_request(
        data_buf,
        thread_id,
        &StreamParams::new(org_id, traces_stream_name, StreamType::Traces),
-        None,
    )
    .await;
    let time = start.elapsed().as_secs_f64();
--- a/src/service/traces/otlp_http.rs
+++ b/src/service/traces/otlp_http.rs
@ -19,7 +19,7 @@ use actix_web::{http, web, HttpResponse};
 use ahash::AHashMap;
 use chrono::{Duration, Utc};
 use config::{
-    meta::stream::StreamType, metrics, utils::hasher::get_fields_key_xxh3, CONFIG, DISTINCT_FIELDS,
+    meta::stream::StreamType, metrics, utils::schema_ext::SchemaExt, CONFIG, DISTINCT_FIELDS,
 };
 use datafusion::arrow::datatypes::Schema;
 use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest;
@ -37,7 +37,6 @@ use crate::{
            },
            usage::UsageType,
        },
-        utils,
        utils::{flatten, json},
    },
    service::{
@ -87,6 +86,16 @@ pub async fn traces_json(
        )));
    }

+    // check memtable
+    if let Err(e) = ingester::check_memtable_size() {
+        return Ok(
+            HttpResponse::ServiceUnavailable().json(MetaHttpResponse::error(
+                http::StatusCode::SERVICE_UNAVAILABLE.into(),
+                e.to_string(),
+            )),
+        );
+    }
+
    let start = std::time::Instant::now();
    let traces_stream_name = match in_stream_name {
        Some(name) => format_stream_name(name),
@ -304,30 +313,34 @@ pub async fn traces_json(
                    let mut value: json::Value = json::to_value(local_val).unwrap();

                    // JSON Flattening
-                    value = flatten::flatten(&value).unwrap();
+                    value = flatten::flatten(value).map_err(|e| {
+                        std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string())
+                    })?;

                    if !local_trans.is_empty() {
                        value = crate::service::ingestion::apply_stream_transform(
                            &local_trans,
-                            &value,
+                            value,
                            &stream_vrl_map,
                            traces_stream_name,
                            &mut runtime,
                        )
-                        .unwrap_or(value);
+                        .map_err(|e| {
+                            std::io::Error::new(std::io::ErrorKind::InvalidData, e.to_string())
+                        })?;
                    }
                    // End row based transform */
                    // get json object
-                    let val_map = value.as_object_mut().unwrap();
+                    let record_val = value.as_object_mut().unwrap();

-                    val_map.insert(
+                    record_val.insert(
                        CONFIG.common.column_timestamp.clone(),
                        json::Value::Number(timestamp.into()),
                    );

                    // get distinct_value item
                    for field in DISTINCT_FIELDS.iter() {
-                        if let Some(val) = val_map.get(field) {
+                        if let Some(val) = record_val.get(field) {
                            if !val.is_null() {
                                let (filter_name, filter_value) = if field == "operation_name" {
                                    ("service_name".to_string(), service_name.clone())
@ -346,30 +359,17 @@ pub async fn traces_json(
                        }
                    }

-                    let value_str = crate::common::utils::json::to_string(&val_map).unwrap();
-
                    // check schema
-                    let schema_evolution = check_for_schema(
+                    let _ = check_for_schema(
                        org_id,
                        traces_stream_name,
                        StreamType::Traces,
-                        &value_str,
                        &mut traces_schema_map,
+                        &json::Value::Object(record_val.clone()),
                        timestamp.try_into().unwrap(),
-                        true,
                    )
                    .await;

-                    // get hour key
-                    let schema_key = get_fields_key_xxh3(&schema_evolution.schema_fields);
-                    let mut hour_key = crate::service::ingestion::get_wal_time_key(
-                        timestamp.try_into().unwrap(),
-                        &partition_keys,
-                        partition_time_level,
-                        val_map,
-                        Some(&schema_key),
-                    );
-
                    if trigger.is_none() && !stream_alerts_map.is_empty() {
                        // Start check for alert trigger
                        let key =
@ -380,7 +380,7 @@ pub async fn traces_json(
                                Vec<json::Map<String, json::Value>>,
                            )> = Vec::new();
                            for alert in alerts {
-                                if let Ok(Some(v)) = alert.evaluate(Some(val_map)).await {
+                                if let Ok(Some(v)) = alert.evaluate(Some(record_val)).await {
                                    trigger_alerts.push((alert.clone(), v));
                                }
                            }
@ -389,28 +389,37 @@ pub async fn traces_json(
                        // End check for alert trigger
                    }

+                    // get hour key
+                    let rec_schema = traces_schema_map
+                        .get(traces_stream_name)
+                        .unwrap()
+                        .clone()
+                        .with_metadata(HashMap::new());
+                    let schema_key = rec_schema.hash_key();
+                    let mut hour_key = crate::service::ingestion::get_wal_time_key(
+                        timestamp.try_into().unwrap(),
+                        &partition_keys,
+                        partition_time_level,
+                        record_val,
+                        Some(&schema_key),
+                    );
+
                    if partition_keys.is_empty() {
                        let partition_key = format!("service_name={}", service_name);
                        hour_key.push_str(&format!("/{}", format_partition_key(&partition_key)));
                    }

-                    let hour_buf = data_buf.entry(hour_key).or_insert_with(|| {
-                        let schema = traces_schema_map
-                            .get(traces_stream_name)
-                            .unwrap()
-                            .clone()
-                            .with_metadata(HashMap::new());
-                        SchemaRecords {
-                            schema_key,
-                            schema: Arc::new(schema),
-                            records: vec![],
-                            records_size: 0,
-                        }
+                    let hour_buf = data_buf.entry(hour_key).or_insert_with(|| SchemaRecords {
+                        schema_key,
+                        schema: Arc::new(rec_schema),
+                        records: vec![],
+                        records_size: 0,
                    });
-                    let loc_value: utils::json::Value =
-                        utils::json::from_slice(value_str.as_bytes()).unwrap();
-                    hour_buf.records.push(Arc::new(loc_value));
-                    hour_buf.records_size += value_str.len();
+                    let record_val = record_val.to_owned();
+                    let record_val = json::Value::Object(record_val);
+                    let record_size = json::to_vec(&record_val).unwrap_or_default().len();
+                    hour_buf.records.push(Arc::new(record_val));
+                    hour_buf.records_size += record_size;
                }
            }
        }
@ -421,7 +430,6 @@ pub async fn traces_json(
        data_buf,
        thread_id,
        &StreamParams::new(org_id, traces_stream_name, StreamType::Traces),
-        None,
    )
    .await;
    let time = start.elapsed().as_secs_f64();
--- a/src/wal/src/reader.rs
+++ b/src/wal/src/reader.rs
@ -22,7 +22,6 @@ use std::{
 use byteorder::{BigEndian, ReadBytesExt};
 use crc32fast::Hasher;
 use snafu::{ensure, ResultExt};
-use snap::read::FrameDecoder;

 use crate::errors::*;

@ -84,7 +83,7 @@ where

        let compressed_read = self.f.by_ref().take(expected_len);
        let hashing_read = CrcReader::new(compressed_read);
-        let mut decompressing_read = FrameDecoder::new(hashing_read);
+        let mut decompressing_read = snap::read::FrameDecoder::new(hashing_read);

        let mut data = Vec::with_capacity(100);
        decompressing_read