Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 52 additions & 50 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,70 +1,72 @@
# PHPVector adapter for Neuron AI framework
# neuron-core/php-vector

This is the [PHPVector](https://github.com/ezimuel/PHPVector) adapter for the [Neuron AI framework](https://neuron-ai.dev/).
PHPVector adapter for the [Neuron AI](https://neuron-ai.dev) framework. Implements
`NeuronAI\RAG\VectorStore\VectorStoreInterface` on top of `ezimuel/phpvector`.

## Install
## Installation

```
```bash
composer require neuron-core/php-vector
```

## Use in RAG
## Usage

```php
use NeuronAI\PHPVector\PHPVector;
use PHPVector\VectorDatabase;

// Persistent database: pass a path to enable on-disk storage.
$store = new PHPVector(
database: new VectorDatabase(path: '/var/data/mydb'),
topK: 5,
);
```

Inside a Neuron RAG class:

class MyRAG extends RAG
```php
protected function vectorStore(): VectorStoreInterface
{
...

protected function vectorStore(): VectorStoreInterface
{
return new PHPVector(
database: new VectorDatabase(path: '/var/data/mydb'),
topK: 5
);
}
return new PHPVector(
database: new VectorDatabase(path: '/var/data/mydb'),
topK: 5,
);
}
```

## Use in Retrieval components
## Persistence

PHPVector separates document storage from index storage:

- `new VectorDatabase(path: '...')` creates (or targets) a database directory.
- `VectorDatabase::open('...')` loads an existing database from disk.
- `addDocument()` writes the document file to disk on each call (asynchronously via `pcntl_fork` when available, otherwise synchronously).
- `save()` persists the HNSW + BM25 index and finalizes deletions.

By default this adapter auto-saves after every mutation (`addDocument`, `addDocuments`,
`deleteBy`), batched to a single `save()` per call, so persistence "just works". Disable it
to manage `save()` yourself:

```php
use NeuronAI\PHPVector\PHPVector;
$store = new PHPVector(database: $db, autoSave: false);
// ... many addDocument() calls ...
$db->save();
```

class MyAgent extends Agent
{
...

protected function tools(): array
{
return [
RetrievalTool::make(
new SimilarityRetrieval(
$this->vectorStore(),
$this->embeddings()
)
),
];
}

protected function vectorStore(): VectorStoreInterface
{
return new PHPVector(
database: new VectorDatabase(path: '/var/data/mydb'),
topK: 5
);
}

protected function embeddings(): EmbeddingsProviderInterface
{
return new OllamaEmbeddingsProvider(
model: 'OLLAMA_EMBEDDINGS_MODEL'
);
}
}
Auto-save is skipped for in-memory databases (no path), so it never throws.

## Deletion

`deleteBy()` removes documents by Neuron's `sourceType` / `sourceName`, which this adapter
stores as PHPVector metadata:

```php
$store->deleteBy('pdf'); // all documents from sourceType "pdf"
$store->deleteBy('pdf', 'manual.pdf'); // only that exact source
```

## Official documentation
## Requirements

**[Go to the official documentation](https://neuron.inspector.dev/)**
- PHP 8.1+
- `ezimuel/phpvector` ^0.3.0
- `neuron-core/neuron-ai` ^3.0
4 changes: 2 additions & 2 deletions composer.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "neuron-core/php-vector",
"description": "Conversational Data Collection.",
"description": "PHPVector adapter for the Neuron AI framework.",
"minimum-stability": "stable",
"authors": [
{
Expand All @@ -11,7 +11,7 @@
"license": "MIT",
"require": {
"php": "^8.1",
"ezimuel/phpvector": "^0.2.0",
"ezimuel/phpvector": "^0.3.0",
"neuron-core/neuron-ai": "^3.0"
},
"require-dev": {
Expand Down
86 changes: 67 additions & 19 deletions src/PHPVector.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@

namespace NeuronAI\PHPVector;

use NeuronAI\Exceptions\VectorStoreException;
use NeuronAI\RAG\Document as NeuronDocument;
use NeuronAI\RAG\VectorStore\VectorStoreInterface;
use NeuronAI\StaticConstructor;
use PHPVector\Document;
use PHPVector\Metadata\MetadataFilter;
use PHPVector\SearchResult;
use PHPVector\VectorDatabase;

Expand All @@ -18,22 +18,20 @@ class PHPVector implements VectorStoreInterface
{
use StaticConstructor;

private const SOURCE_TYPE_KEY = 'sourceType';
private const SOURCE_NAME_KEY = 'sourceName';

public function __construct(
protected VectorDatabase $database,
protected int $topK = 5,
protected bool $autoSave = true,
) {
}

public function addDocument(NeuronDocument $document): VectorStoreInterface
{
$this->database->addDocument(
new Document(
id: $document->id,
vector: $document->embedding,
text: $document->content,
metadata: $document->metadata,
)
);
$this->write($document);
$this->persist();

return $this;
}
Expand All @@ -44,27 +42,67 @@ public function addDocument(NeuronDocument $document): VectorStoreInterface
public function addDocuments(array $documents): VectorStoreInterface
{
foreach ($documents as $document) {
$this->addDocument($document);
$this->write($document);
}
$this->persist();

return $this;
}

/**
* @throws VectorStoreException
* Persist a Neuron document into PHPVector.
*
* Neuron's `sourceType`/`sourceName` are top-level Document properties, but
* PHPVector only stores `metadata`. They are folded into metadata under the
* reserved keys so `deleteBy()` can filter on them; `similaritySearch()`
* restores them and strips the reserved keys back out.
*/
private function write(NeuronDocument $document): void
{
$this->database->addDocument(
new Document(
id: $document->id,
vector: $document->embedding,
text: $document->content,
metadata: [
...$document->metadata,
self::SOURCE_TYPE_KEY => $document->sourceType,
self::SOURCE_NAME_KEY => $document->sourceName,
],
)
);
}

private function persist(): void
{
if ($this->autoSave && $this->database->isPersistent()) {
$this->database->save();
}
}

public function deleteBy(string $sourceType, ?string $sourceName = null): VectorStoreInterface
{
throw new VectorStoreException('Deletion not supported.');
$filters = [MetadataFilter::eq(self::SOURCE_TYPE_KEY, $sourceType)];

if ($sourceName !== null) {
$filters[] = MetadataFilter::eq(self::SOURCE_NAME_KEY, $sourceName);
}

foreach ($this->database->metadataSearch(filters: $filters) as $result) {
$this->database->deleteDocument($result->document->id);
}

$this->persist();

return $this;
}

/**
* @throws VectorStoreException
* @deprecated Use deleteBy() instead.
*/
public function deleteBySource(string $sourceType, string $sourceName): VectorStoreInterface
{
$this->deleteBy($sourceType, $sourceName);
return $this;
return $this->deleteBy($sourceType, $sourceName);
}

/**
Expand All @@ -79,11 +117,21 @@ public function similaritySearch(array $embedding): iterable
);

return array_map(function (SearchResult $result): NeuronDocument {
$document = new NeuronDocument($result->document->text);
$document->id = $result->document->id;
$document->embedding = $result->document->vector;
$document->metadata = $result->document->metadata;
$phpDoc = $result->document;

$metadata = $phpDoc->metadata;
$sourceType = $metadata[self::SOURCE_TYPE_KEY] ?? 'manual';
$sourceName = $metadata[self::SOURCE_NAME_KEY] ?? 'manual';
unset($metadata[self::SOURCE_TYPE_KEY], $metadata[self::SOURCE_NAME_KEY]);

$document = new NeuronDocument($phpDoc->text);
$document->id = $phpDoc->id;
$document->embedding = $phpDoc->vector;
$document->sourceType = $sourceType;
$document->sourceName = $sourceName;
$document->metadata = $metadata;
$document->score = $result->score;

return $document;
}, $results);
}
Expand Down
Loading
Loading