Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions docs/snippets/tables.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ export const PySchemaAlterSetup = "table_name = \"schema_evolution_alter_example

export const PySchemaDropSetup = "if data is None:\n data = [\n {\n \"id\": 1,\n \"name\": \"Laptop\",\n \"price\": 1200.00,\n \"temp_col1\": \"X\",\n \"temp_col2\": 100,\n \"vector\": np.random.random(128).tolist(),\n },\n {\n \"id\": 2,\n \"name\": \"Smartphone\",\n \"price\": 800.00,\n \"temp_col1\": \"Y\",\n \"temp_col2\": 200,\n \"vector\": np.random.random(128).tolist(),\n },\n {\n \"id\": 3,\n \"name\": \"Headphones\",\n \"price\": 150.00,\n \"temp_col1\": \"Z\",\n \"temp_col2\": 300,\n \"vector\": np.random.random(128).tolist(),\n },\n ]\ntable = tmp_db.create_table(\"schema_evolution_drop_example\", data, mode=\"overwrite\")\n";

export const PySchemaFieldMetadataMerge = "# Set two metadata keys on the `category` field.\nres = table.update_field_metadata(\n {\"path\": \"category\", \"metadata\": {\"unit\": \"label\", \"pii\": \"false\"}}\n)\nprint(res.version)\n\n# Merge: add a new key, delete one with None, keep the rest.\ntable.update_field_metadata(\n {\"path\": \"category\", \"metadata\": {\"source\": \"import\", \"pii\": None}}\n)\n\n# Arrow stores field metadata as bytes.\nassert table.schema.field(\"category\").metadata == {\n b\"unit\": b\"label\",\n b\"source\": b\"import\",\n}\n";

export const PySchemaFieldMetadataReplace = "table.update_field_metadata(\n {\n \"path\": \"category\",\n \"metadata\": {\"owner\": \"search-team\"},\n \"replace\": True,\n }\n)\n";

export const PyTablesBasicConnect = "import lancedb\n\nuri = \"data/sample-lancedb\"\ndb = lancedb.connect(uri)\n";

export const PyTablesDocumentModel = "from pydantic import BaseModel\n\nclass Document(BaseModel):\n content: str\n source: str\n";
Expand Down Expand Up @@ -184,6 +188,10 @@ export const TsSchemaAlterSetup = "const schemaAlter = new arrow.Schema([\n new

export const TsSchemaDropSetup = "const schemaDropData = [\n {\n id: 1,\n name: \"Laptop\",\n price: 1200.0,\n temp_col1: \"X\",\n temp_col2: 100,\n vector: Array.from({ length: 128 }, () => Math.random()),\n },\n {\n id: 2,\n name: \"Smartphone\",\n price: 800.0,\n temp_col1: \"Y\",\n temp_col2: 200,\n vector: Array.from({ length: 128 }, () => Math.random()),\n },\n {\n id: 3,\n name: \"Headphones\",\n price: 150.0,\n temp_col1: \"Z\",\n temp_col2: 300,\n vector: Array.from({ length: 128 }, () => Math.random()),\n },\n];\nconst schemaDropTable = await db.createTable(\n \"schema_evolution_drop_example\",\n schemaDropData,\n { mode: \"overwrite\" },\n);\n";

export const TsSchemaFieldMetadataMerge = "// Set two metadata keys on the `category` field.\nconst res = await fieldMetadataTable.updateFieldMetadata([\n { path: \"category\", metadata: { unit: \"label\", pii: \"false\" } },\n]);\nconsole.log(res.version);\n\n// Merge: add a new key, delete one via null, keep the rest.\nawait fieldMetadataTable.updateFieldMetadata([\n { path: \"category\", metadata: { source: \"import\", pii: null } },\n]);\n";

export const TsSchemaFieldMetadataReplace = "await fieldMetadataTable.updateFieldMetadata([\n {\n path: \"category\",\n metadata: { owner: \"search-team\" },\n replace: true,\n },\n]);\n";

export const TsUpdateConnectEnterprise = "const db = await lancedb.connect(\"db://your-project-slug\", {\n apiKey: \"your-api-key\",\n region: \"us-east-1\",\n});\n";

export const TsUpdateConnectLocal = "const db = await lancedb.connect(\"./data\");\n";
Expand Down Expand Up @@ -278,6 +286,10 @@ export const RsSchemaAlterSetup = "let schema_alter_schema = Arc::new(Schema::ne

export const RsSchemaDropSetup = "let schema_drop_schema = Arc::new(Schema::new(vec![\n Field::new(\"id\", DataType::Int64, false),\n Field::new(\"name\", DataType::Utf8, false),\n Field::new(\"price\", DataType::Float64, false),\n Field::new(\"temp_col1\", DataType::Utf8, false),\n Field::new(\"temp_col2\", DataType::Int32, false),\n Field::new(\n \"vector\",\n DataType::FixedSizeList(Arc::new(Field::new(\"item\", DataType::Float32, true)), 128),\n false,\n ),\n]));\nlet schema_drop_batch = RecordBatch::try_new(\n schema_drop_schema.clone(),\n vec![\n Arc::new(Int64Array::from(vec![1, 2, 3])),\n Arc::new(StringArray::from(vec![\"Laptop\", \"Smartphone\", \"Headphones\"])),\n Arc::new(Float64Array::from(vec![1200.0, 800.0, 150.0])),\n Arc::new(StringArray::from(vec![\"X\", \"Y\", \"Z\"])),\n Arc::new(Int32Array::from(vec![100, 200, 300])),\n Arc::new(\n FixedSizeListArray::from_iter_primitive::<Float32Type, _, _>(\n vec![\n Some(vec![Some(0.1_f32); 128]),\n Some(vec![Some(0.2_f32); 128]),\n Some(vec![Some(0.3_f32); 128]),\n ],\n 128,\n ),\n ),\n ],\n)\n.unwrap();\nlet schema_drop_reader: Box<dyn RecordBatchReader + Send> = Box::new(RecordBatchIterator::new(\n vec![Ok(schema_drop_batch)].into_iter(),\n schema_drop_schema.clone(),\n));\nlet schema_drop_table = db\n .create_table(\"schema_evolution_drop_example\", schema_drop_reader)\n .mode(CreateTableMode::Overwrite)\n .execute()\n .await\n .unwrap();\n";

export const RsSchemaFieldMetadataMerge = "// Set two metadata keys on the `category` field.\nlet res = field_metadata_table\n .update_field_metadata(&[FieldMetadataUpdate::new(\"category\")\n .set(\"unit\", \"label\")\n .set(\"pii\", \"false\")])\n .await\n .unwrap();\nprintln!(\"version: {}\", res.version);\n\n// Merge: add a new key, delete one with `.remove`, keep the rest.\nfield_metadata_table\n .update_field_metadata(&[FieldMetadataUpdate::new(\"category\")\n .set(\"source\", \"import\")\n .remove(\"pii\")])\n .await\n .unwrap();\n";

export const RsSchemaFieldMetadataReplace = "field_metadata_table\n .update_field_metadata(&[FieldMetadataUpdate::new(\"category\")\n .set(\"owner\", \"search-team\")\n .replace()])\n .await\n .unwrap();\n";

export const RsUpdateConnectEnterprise = "let uri = \"db://your-project-slug\";\nlet api_key = \"your-api-key\";\nlet region = \"us-east-1\";\n";

export const RsUpdateConnectLocal = "let db = connect(\"./data\").execute().await.unwrap();\n";
Expand Down
64 changes: 62 additions & 2 deletions docs/tables/schema.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,12 @@ import {
PyAlterVectorColumn as AlterVectorColumn,
TsAlterVectorColumn as TsAlterVectorColumn,
RsAlterVectorColumn as RsAlterVectorColumn,
PySchemaFieldMetadataMerge as SchemaFieldMetadataMerge,
TsSchemaFieldMetadataMerge as TsSchemaFieldMetadataMerge,
RsSchemaFieldMetadataMerge as RsSchemaFieldMetadataMerge,
PySchemaFieldMetadataReplace as SchemaFieldMetadataReplace,
TsSchemaFieldMetadataReplace as TsSchemaFieldMetadataReplace,
RsSchemaFieldMetadataReplace as RsSchemaFieldMetadataReplace,
} from '/snippets/tables.mdx';

Schema evolution enables non-breaking modifications to a database table's structure — such as adding columns, altering data types, or dropping fields — to adapt to evolving data requirements without service interruptions.
Expand All @@ -58,11 +64,12 @@ LanceDB supports ACID-compliant schema evolution through granular operations (ad

## Schema evolution operations

LanceDB supports three primary schema evolution operations:
LanceDB supports four primary schema evolution operations:

1. **Adding new columns**: Extend your table with additional attributes
2. **Altering existing columns**: Change column names, data types, or nullability
3. **Dropping columns**: Remove unnecessary columns from your schema
3. **Updating field metadata**: Attach or change per-column Arrow metadata
4. **Dropping columns**: Remove unnecessary columns from your schema


<Tip title="Schema Evolution Performance">
Expand Down Expand Up @@ -310,6 +317,59 @@ For such cases, use `addColumns` / `add_columns` (with `arrow_cast`), then `drop
Changing data types requires rewriting the column data and may be resource-intensive for large tables. Renaming columns or changing nullability is more efficient as it only updates metadata.
</Warning>

## Update field metadata

Each column in a LanceDB table can carry a small key/value map of Arrow field metadata — useful
for annotating columns with units, provenance, PII flags, embedding model versions, or any other
schema-level context your application needs.

Use [`update_field_metadata`](https://lancedb.github.io/lancedb/python/python/#lancedb.table.Table.update_field_metadata)
in Python, [`updateFieldMetadata`](https://lancedb.github.io/lancedb/js/classes/Table/#updatefieldmetadata)
in TypeScript/JavaScript, or `update_field_metadata` in Rust to add, change, or remove these
key/value pairs without rewriting the column data. Each call commits a new table version and returns
the new `version`.

Each update targets one field by **dot-path**: top-level columns are addressed by name (for
example `"embedding"`), and nested fields by their full path (for example `"address.zip"`). By
default, the keys you pass are **merged** into the field's existing metadata — keys you do not
mention are preserved, and passing `None` (Python) or `null` (TypeScript) deletes a key. Set
`replace: true` to swap the field's entire metadata map instead of merging.

<CodeGroup>
<CodeBlock filename="Python" language="Python" icon="python">
{SchemaFieldMetadataMerge}
</CodeBlock>

<CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
{TsSchemaFieldMetadataMerge}
</CodeBlock>

<CodeBlock filename="Rust" language="Rust" icon="rust">
{RsSchemaFieldMetadataMerge}
</CodeBlock>
</CodeGroup>

To overwrite a field's metadata entirely instead of merging, set `replace` to `true`:

<CodeGroup>
<CodeBlock filename="Python" language="Python" icon="python">
{SchemaFieldMetadataReplace}
</CodeBlock>

<CodeBlock filename="TypeScript" language="TypeScript" icon="square-js">
{TsSchemaFieldMetadataReplace}
</CodeBlock>

<CodeBlock filename="Rust" language="Rust" icon="rust">
{RsSchemaFieldMetadataReplace}
</CodeBlock>
</CodeGroup>

<Tip>
You can pass multiple updates in a single call to change metadata on several fields at once —
each call commits a single new table version.
</Tip>

## Drop columns

You can remove columns using the [`drop_columns`](https://lancedb.github.io/lancedb/python/python/#lancedb.table.Table.drop_columns)
Expand Down
39 changes: 39 additions & 0 deletions tests/py/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -1124,6 +1124,45 @@ def test_alter_vector_column(tmp_db):
# --8<-- [end:alter_vector_column]


def test_schema_field_metadata(tmp_db):
table = tmp_db.create_table(
"schema_field_metadata_example",
pa.table({"id": [0, 1], "category": ["a", "b"]}),
mode="overwrite",
)

# --8<-- [start:schema_field_metadata_merge]
# Set two metadata keys on the `category` field.
res = table.update_field_metadata(
{"path": "category", "metadata": {"unit": "label", "pii": "false"}}
)
print(res.version)

# Merge: add a new key, delete one with None, keep the rest.
table.update_field_metadata(
{"path": "category", "metadata": {"source": "import", "pii": None}}
)

# Arrow stores field metadata as bytes.
assert table.schema.field("category").metadata == {
b"unit": b"label",
b"source": b"import",
}
# --8<-- [end:schema_field_metadata_merge]

# --8<-- [start:schema_field_metadata_replace]
table.update_field_metadata(
{
"path": "category",
"metadata": {"owner": "search-team"},
"replace": True,
}
)
# --8<-- [end:schema_field_metadata_replace]

assert table.schema.field("category").metadata == {b"owner": b"search-team"}


# ============================================================================
# Versioning Examples
# ============================================================================
Expand Down
54 changes: 53 additions & 1 deletion tests/rs/tables.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ use arrow_array::{
use arrow_schema::{DataType, Field, Schema};
use lancedb::connect;
use lancedb::database::CreateTableMode;
use lancedb::table::{ColumnAlteration, Duration, NewColumnTransform, OptimizeAction};
use lancedb::table::{
ColumnAlteration, Duration, FieldMetadataUpdate, NewColumnTransform, OptimizeAction,
};

// --8<-- [start:update_make_users_reader]
fn make_users_reader(
Expand Down Expand Up @@ -773,6 +775,56 @@ async fn main() {
// --8<-- [end:alter_vector_column]
assert_eq!(vector_table.count_rows(None).await.unwrap(), 1);

let field_metadata_schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Int64, false),
Field::new("category", DataType::Utf8, false),
]));
let field_metadata_batch = RecordBatch::try_new(
field_metadata_schema.clone(),
vec![
Arc::new(Int64Array::from(vec![0, 1])),
Arc::new(StringArray::from(vec!["a", "b"])),
],
)
.unwrap();
let field_metadata_reader: Box<dyn RecordBatchReader + Send> = Box::new(
RecordBatchIterator::new(vec![Ok(field_metadata_batch)].into_iter(), field_metadata_schema),
);
let field_metadata_table = db
.create_table("schema_field_metadata_example", field_metadata_reader)
.mode(CreateTableMode::Overwrite)
.execute()
.await
.unwrap();

// --8<-- [start:schema_field_metadata_merge]
// Set two metadata keys on the `category` field.
let res = field_metadata_table
.update_field_metadata(&[FieldMetadataUpdate::new("category")
.set("unit", "label")
.set("pii", "false")])
.await
.unwrap();
println!("version: {}", res.version);

// Merge: add a new key, delete one with `.remove`, keep the rest.
field_metadata_table
.update_field_metadata(&[FieldMetadataUpdate::new("category")
.set("source", "import")
.remove("pii")])
.await
.unwrap();
// --8<-- [end:schema_field_metadata_merge]

// --8<-- [start:schema_field_metadata_replace]
field_metadata_table
.update_field_metadata(&[FieldMetadataUpdate::new("category")
.set("owner", "search-team")
.replace()])
.await
.unwrap();
// --8<-- [end:schema_field_metadata_replace]

// --8<-- [start:update_example_table_setup]
let table = db
.create_table(
Expand Down
32 changes: 32 additions & 0 deletions tests/ts/tables.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,38 @@ test("schema evolution snippets (async)", async () => {
await vectorTable.alterColumns([{ path: "embedding_v2", rename: "embedding" }]);
// --8<-- [end:alter_vector_column]
expect(await vectorTable.countRows()).toBe(1);

const fieldMetadataTable = await db.createTable(
"schema_field_metadata_example",
[
{ id: 0, category: "a" },
{ id: 1, category: "b" },
],
{ mode: "overwrite" },
);

// --8<-- [start:schema_field_metadata_merge]
// Set two metadata keys on the `category` field.
const res = await fieldMetadataTable.updateFieldMetadata([
{ path: "category", metadata: { unit: "label", pii: "false" } },
]);
console.log(res.version);

// Merge: add a new key, delete one via null, keep the rest.
await fieldMetadataTable.updateFieldMetadata([
{ path: "category", metadata: { source: "import", pii: null } },
]);
// --8<-- [end:schema_field_metadata_merge]

// --8<-- [start:schema_field_metadata_replace]
await fieldMetadataTable.updateFieldMetadata([
{
path: "category",
metadata: { owner: "search-team" },
replace: true,
},
]);
// --8<-- [end:schema_field_metadata_replace]
});
});

Expand Down
Loading