diff --git a/docs/snippets/tables.mdx b/docs/snippets/tables.mdx index 717f0f9..0ee813b 100644 --- a/docs/snippets/tables.mdx +++ b/docs/snippets/tables.mdx @@ -82,6 +82,10 @@ export const PySchemaAlterSetup = "table_name = \"schema_evolution_alter_example export const PySchemaDropSetup = "if data is None:\n data = [\n {\n \"id\": 1,\n \"name\": \"Laptop\",\n \"price\": 1200.00,\n \"temp_col1\": \"X\",\n \"temp_col2\": 100,\n \"vector\": np.random.random(128).tolist(),\n },\n {\n \"id\": 2,\n \"name\": \"Smartphone\",\n \"price\": 800.00,\n \"temp_col1\": \"Y\",\n \"temp_col2\": 200,\n \"vector\": np.random.random(128).tolist(),\n },\n {\n \"id\": 3,\n \"name\": \"Headphones\",\n \"price\": 150.00,\n \"temp_col1\": \"Z\",\n \"temp_col2\": 300,\n \"vector\": np.random.random(128).tolist(),\n },\n ]\ntable = tmp_db.create_table(\"schema_evolution_drop_example\", data, mode=\"overwrite\")\n"; +export const PySchemaFieldMetadataMerge = "# Set two metadata keys on the `category` field.\nres = table.update_field_metadata(\n {\"path\": \"category\", \"metadata\": {\"unit\": \"label\", \"pii\": \"false\"}}\n)\nprint(res.version)\n\n# Merge: add a new key, delete one with None, keep the rest.\ntable.update_field_metadata(\n {\"path\": \"category\", \"metadata\": {\"source\": \"import\", \"pii\": None}}\n)\n\n# Arrow stores field metadata as bytes.\nassert table.schema.field(\"category\").metadata == {\n b\"unit\": b\"label\",\n b\"source\": b\"import\",\n}\n"; + +export const PySchemaFieldMetadataReplace = "table.update_field_metadata(\n {\n \"path\": \"category\",\n \"metadata\": {\"owner\": \"search-team\"},\n \"replace\": True,\n }\n)\n"; + export const PyTablesBasicConnect = "import lancedb\n\nuri = \"data/sample-lancedb\"\ndb = lancedb.connect(uri)\n"; export const PyTablesDocumentModel = "from pydantic import BaseModel\n\nclass Document(BaseModel):\n content: str\n source: str\n"; @@ -184,6 +188,10 @@ export const TsSchemaAlterSetup = "const schemaAlter = new arrow.Schema([\n new export const TsSchemaDropSetup = "const schemaDropData = [\n {\n id: 1,\n name: \"Laptop\",\n price: 1200.0,\n temp_col1: \"X\",\n temp_col2: 100,\n vector: Array.from({ length: 128 }, () => Math.random()),\n },\n {\n id: 2,\n name: \"Smartphone\",\n price: 800.0,\n temp_col1: \"Y\",\n temp_col2: 200,\n vector: Array.from({ length: 128 }, () => Math.random()),\n },\n {\n id: 3,\n name: \"Headphones\",\n price: 150.0,\n temp_col1: \"Z\",\n temp_col2: 300,\n vector: Array.from({ length: 128 }, () => Math.random()),\n },\n];\nconst schemaDropTable = await db.createTable(\n \"schema_evolution_drop_example\",\n schemaDropData,\n { mode: \"overwrite\" },\n);\n"; +export const TsSchemaFieldMetadataMerge = "// Set two metadata keys on the `category` field.\nconst res = await fieldMetadataTable.updateFieldMetadata([\n { path: \"category\", metadata: { unit: \"label\", pii: \"false\" } },\n]);\nconsole.log(res.version);\n\n// Merge: add a new key, delete one via null, keep the rest.\nawait fieldMetadataTable.updateFieldMetadata([\n { path: \"category\", metadata: { source: \"import\", pii: null } },\n]);\n"; + +export const TsSchemaFieldMetadataReplace = "await fieldMetadataTable.updateFieldMetadata([\n {\n path: \"category\",\n metadata: { owner: \"search-team\" },\n replace: true,\n },\n]);\n"; + export const TsUpdateConnectEnterprise = "const db = await lancedb.connect(\"db://your-project-slug\", {\n apiKey: \"your-api-key\",\n region: \"us-east-1\",\n});\n"; export const TsUpdateConnectLocal = "const db = await lancedb.connect(\"./data\");\n"; @@ -278,6 +286,10 @@ export const RsSchemaAlterSetup = "let schema_alter_schema = Arc::new(Schema::ne export const RsSchemaDropSetup = "let schema_drop_schema = Arc::new(Schema::new(vec![\n Field::new(\"id\", DataType::Int64, false),\n Field::new(\"name\", DataType::Utf8, false),\n Field::new(\"price\", DataType::Float64, false),\n Field::new(\"temp_col1\", DataType::Utf8, false),\n Field::new(\"temp_col2\", DataType::Int32, false),\n Field::new(\n \"vector\",\n DataType::FixedSizeList(Arc::new(Field::new(\"item\", DataType::Float32, true)), 128),\n false,\n ),\n]));\nlet schema_drop_batch = RecordBatch::try_new(\n schema_drop_schema.clone(),\n vec![\n Arc::new(Int64Array::from(vec![1, 2, 3])),\n Arc::new(StringArray::from(vec![\"Laptop\", \"Smartphone\", \"Headphones\"])),\n Arc::new(Float64Array::from(vec![1200.0, 800.0, 150.0])),\n Arc::new(StringArray::from(vec![\"X\", \"Y\", \"Z\"])),\n Arc::new(Int32Array::from(vec![100, 200, 300])),\n Arc::new(\n FixedSizeListArray::from_iter_primitive::(\n vec![\n Some(vec![Some(0.1_f32); 128]),\n Some(vec![Some(0.2_f32); 128]),\n Some(vec![Some(0.3_f32); 128]),\n ],\n 128,\n ),\n ),\n ],\n)\n.unwrap();\nlet schema_drop_reader: Box = Box::new(RecordBatchIterator::new(\n vec![Ok(schema_drop_batch)].into_iter(),\n schema_drop_schema.clone(),\n));\nlet schema_drop_table = db\n .create_table(\"schema_evolution_drop_example\", schema_drop_reader)\n .mode(CreateTableMode::Overwrite)\n .execute()\n .await\n .unwrap();\n"; +export const RsSchemaFieldMetadataMerge = "// Set two metadata keys on the `category` field.\nlet res = field_metadata_table\n .update_field_metadata(&[FieldMetadataUpdate::new(\"category\")\n .set(\"unit\", \"label\")\n .set(\"pii\", \"false\")])\n .await\n .unwrap();\nprintln!(\"version: {}\", res.version);\n\n// Merge: add a new key, delete one with `.remove`, keep the rest.\nfield_metadata_table\n .update_field_metadata(&[FieldMetadataUpdate::new(\"category\")\n .set(\"source\", \"import\")\n .remove(\"pii\")])\n .await\n .unwrap();\n"; + +export const RsSchemaFieldMetadataReplace = "field_metadata_table\n .update_field_metadata(&[FieldMetadataUpdate::new(\"category\")\n .set(\"owner\", \"search-team\")\n .replace()])\n .await\n .unwrap();\n"; + export const RsUpdateConnectEnterprise = "let uri = \"db://your-project-slug\";\nlet api_key = \"your-api-key\";\nlet region = \"us-east-1\";\n"; export const RsUpdateConnectLocal = "let db = connect(\"./data\").execute().await.unwrap();\n"; diff --git a/docs/tables/schema.mdx b/docs/tables/schema.mdx index 0426d1c..9ff174b 100644 --- a/docs/tables/schema.mdx +++ b/docs/tables/schema.mdx @@ -47,6 +47,12 @@ import { PyAlterVectorColumn as AlterVectorColumn, TsAlterVectorColumn as TsAlterVectorColumn, RsAlterVectorColumn as RsAlterVectorColumn, + PySchemaFieldMetadataMerge as SchemaFieldMetadataMerge, + TsSchemaFieldMetadataMerge as TsSchemaFieldMetadataMerge, + RsSchemaFieldMetadataMerge as RsSchemaFieldMetadataMerge, + PySchemaFieldMetadataReplace as SchemaFieldMetadataReplace, + TsSchemaFieldMetadataReplace as TsSchemaFieldMetadataReplace, + RsSchemaFieldMetadataReplace as RsSchemaFieldMetadataReplace, } from '/snippets/tables.mdx'; Schema evolution enables non-breaking modifications to a database table's structure — such as adding columns, altering data types, or dropping fields — to adapt to evolving data requirements without service interruptions. @@ -58,11 +64,12 @@ LanceDB supports ACID-compliant schema evolution through granular operations (ad ## Schema evolution operations -LanceDB supports three primary schema evolution operations: +LanceDB supports four primary schema evolution operations: 1. **Adding new columns**: Extend your table with additional attributes 2. **Altering existing columns**: Change column names, data types, or nullability -3. **Dropping columns**: Remove unnecessary columns from your schema +3. **Updating field metadata**: Attach or change per-column Arrow metadata +4. **Dropping columns**: Remove unnecessary columns from your schema @@ -310,6 +317,59 @@ For such cases, use `addColumns` / `add_columns` (with `arrow_cast`), then `drop Changing data types requires rewriting the column data and may be resource-intensive for large tables. Renaming columns or changing nullability is more efficient as it only updates metadata. +## Update field metadata + +Each column in a LanceDB table can carry a small key/value map of Arrow field metadata — useful +for annotating columns with units, provenance, PII flags, embedding model versions, or any other +schema-level context your application needs. + +Use [`update_field_metadata`](https://lancedb.github.io/lancedb/python/python/#lancedb.table.Table.update_field_metadata) +in Python, [`updateFieldMetadata`](https://lancedb.github.io/lancedb/js/classes/Table/#updatefieldmetadata) +in TypeScript/JavaScript, or `update_field_metadata` in Rust to add, change, or remove these +key/value pairs without rewriting the column data. Each call commits a new table version and returns +the new `version`. + +Each update targets one field by **dot-path**: top-level columns are addressed by name (for +example `"embedding"`), and nested fields by their full path (for example `"address.zip"`). By +default, the keys you pass are **merged** into the field's existing metadata — keys you do not +mention are preserved, and passing `None` (Python) or `null` (TypeScript) deletes a key. Set +`replace: true` to swap the field's entire metadata map instead of merging. + + + + {SchemaFieldMetadataMerge} + + + + {TsSchemaFieldMetadataMerge} + + + + {RsSchemaFieldMetadataMerge} + + + +To overwrite a field's metadata entirely instead of merging, set `replace` to `true`: + + + + {SchemaFieldMetadataReplace} + + + + {TsSchemaFieldMetadataReplace} + + + + {RsSchemaFieldMetadataReplace} + + + + +You can pass multiple updates in a single call to change metadata on several fields at once — +each call commits a single new table version. + + ## Drop columns You can remove columns using the [`drop_columns`](https://lancedb.github.io/lancedb/python/python/#lancedb.table.Table.drop_columns) diff --git a/tests/py/test_tables.py b/tests/py/test_tables.py index 0d899a6..ea74465 100644 --- a/tests/py/test_tables.py +++ b/tests/py/test_tables.py @@ -1124,6 +1124,45 @@ def test_alter_vector_column(tmp_db): # --8<-- [end:alter_vector_column] +def test_schema_field_metadata(tmp_db): + table = tmp_db.create_table( + "schema_field_metadata_example", + pa.table({"id": [0, 1], "category": ["a", "b"]}), + mode="overwrite", + ) + + # --8<-- [start:schema_field_metadata_merge] + # Set two metadata keys on the `category` field. + res = table.update_field_metadata( + {"path": "category", "metadata": {"unit": "label", "pii": "false"}} + ) + print(res.version) + + # Merge: add a new key, delete one with None, keep the rest. + table.update_field_metadata( + {"path": "category", "metadata": {"source": "import", "pii": None}} + ) + + # Arrow stores field metadata as bytes. + assert table.schema.field("category").metadata == { + b"unit": b"label", + b"source": b"import", + } + # --8<-- [end:schema_field_metadata_merge] + + # --8<-- [start:schema_field_metadata_replace] + table.update_field_metadata( + { + "path": "category", + "metadata": {"owner": "search-team"}, + "replace": True, + } + ) + # --8<-- [end:schema_field_metadata_replace] + + assert table.schema.field("category").metadata == {b"owner": b"search-team"} + + # ============================================================================ # Versioning Examples # ============================================================================ diff --git a/tests/rs/tables.rs b/tests/rs/tables.rs index e197d11..94b4af7 100644 --- a/tests/rs/tables.rs +++ b/tests/rs/tables.rs @@ -12,7 +12,9 @@ use arrow_array::{ use arrow_schema::{DataType, Field, Schema}; use lancedb::connect; use lancedb::database::CreateTableMode; -use lancedb::table::{ColumnAlteration, Duration, NewColumnTransform, OptimizeAction}; +use lancedb::table::{ + ColumnAlteration, Duration, FieldMetadataUpdate, NewColumnTransform, OptimizeAction, +}; // --8<-- [start:update_make_users_reader] fn make_users_reader( @@ -773,6 +775,56 @@ async fn main() { // --8<-- [end:alter_vector_column] assert_eq!(vector_table.count_rows(None).await.unwrap(), 1); + let field_metadata_schema = Arc::new(Schema::new(vec![ + Field::new("id", DataType::Int64, false), + Field::new("category", DataType::Utf8, false), + ])); + let field_metadata_batch = RecordBatch::try_new( + field_metadata_schema.clone(), + vec![ + Arc::new(Int64Array::from(vec![0, 1])), + Arc::new(StringArray::from(vec!["a", "b"])), + ], + ) + .unwrap(); + let field_metadata_reader: Box = Box::new( + RecordBatchIterator::new(vec![Ok(field_metadata_batch)].into_iter(), field_metadata_schema), + ); + let field_metadata_table = db + .create_table("schema_field_metadata_example", field_metadata_reader) + .mode(CreateTableMode::Overwrite) + .execute() + .await + .unwrap(); + + // --8<-- [start:schema_field_metadata_merge] + // Set two metadata keys on the `category` field. + let res = field_metadata_table + .update_field_metadata(&[FieldMetadataUpdate::new("category") + .set("unit", "label") + .set("pii", "false")]) + .await + .unwrap(); + println!("version: {}", res.version); + + // Merge: add a new key, delete one with `.remove`, keep the rest. + field_metadata_table + .update_field_metadata(&[FieldMetadataUpdate::new("category") + .set("source", "import") + .remove("pii")]) + .await + .unwrap(); + // --8<-- [end:schema_field_metadata_merge] + + // --8<-- [start:schema_field_metadata_replace] + field_metadata_table + .update_field_metadata(&[FieldMetadataUpdate::new("category") + .set("owner", "search-team") + .replace()]) + .await + .unwrap(); + // --8<-- [end:schema_field_metadata_replace] + // --8<-- [start:update_example_table_setup] let table = db .create_table( diff --git a/tests/ts/tables.test.ts b/tests/ts/tables.test.ts index 491274c..01806ba 100644 --- a/tests/ts/tables.test.ts +++ b/tests/ts/tables.test.ts @@ -429,6 +429,38 @@ test("schema evolution snippets (async)", async () => { await vectorTable.alterColumns([{ path: "embedding_v2", rename: "embedding" }]); // --8<-- [end:alter_vector_column] expect(await vectorTable.countRows()).toBe(1); + + const fieldMetadataTable = await db.createTable( + "schema_field_metadata_example", + [ + { id: 0, category: "a" }, + { id: 1, category: "b" }, + ], + { mode: "overwrite" }, + ); + + // --8<-- [start:schema_field_metadata_merge] + // Set two metadata keys on the `category` field. + const res = await fieldMetadataTable.updateFieldMetadata([ + { path: "category", metadata: { unit: "label", pii: "false" } }, + ]); + console.log(res.version); + + // Merge: add a new key, delete one via null, keep the rest. + await fieldMetadataTable.updateFieldMetadata([ + { path: "category", metadata: { source: "import", pii: null } }, + ]); + // --8<-- [end:schema_field_metadata_merge] + + // --8<-- [start:schema_field_metadata_replace] + await fieldMetadataTable.updateFieldMetadata([ + { + path: "category", + metadata: { owner: "search-team" }, + replace: true, + }, + ]); + // --8<-- [end:schema_field_metadata_replace] }); });