Add, replace, cache and delete artifacts .md

import pytest
import shutil
import lamindb as ln
ln.setup.login("testuser1")
 logged in testuser1
Current user:
 - handle: testuser1
 - uid: DzTjkKse
Hide code cell content
try:
    root_path = ln.UPath("s3://lamindb-ci/test-add-replace-cache")
    if root_path.exists():
        root_path.rmdir()
    ln.setup.delete("testuser1/test-add-replace-cache", force=True)
except BaseException:  # noqa: S110
    pass
ln.setup.init(storage="s3://lamindb-ci/test-add-replace-cache")
! re-setting django: avoid this by clearing the default instance on the command line via: lamin disconnect
! updating cloud SQLite 's3://lamindb-ci/test-add-replace-cache/.lamindb/lamin.db' of instance 'testuser1/test-add-replace-cache'
! locked instance (to unlock and push changes to the cloud SQLite file, call: lamin disconnect)
 initialized lamindb: testuser1/test-add-replace-cache

Save with auto-managed (key=None)

AUTO_KEY_PREFIX = ln.core.storage.paths.AUTO_KEY_PREFIX
root = ln.settings.storage.root
artifact = ln.Artifact("./test-files/iris.csv", description="iris.csv")
! data is a DataFrame, please use .from_dataframe()
! no run & transform got linked, call `ln.track()` & re-run
artifact.save()
Artifact(uid='gOZM1xkjaHcVREwW0000', key=None, description='iris.csv', suffix='.csv', kind=None, otype='DataFrame', size=224, hash='iwc1TmF1TW_l5weDvscSHw', n_files=None, n_observations=None, extra_data=None, branch_id=1, created_on_id=1, space_id=1, storage_id=1, run_id=None, schema_id=None, created_by_id=1, created_at=2026-06-22 07:40:42 UTC, is_locked=False, version_tag=None, is_latest=True)
key_path = root / f"{AUTO_KEY_PREFIX}{artifact.uid}{artifact.suffix}"
assert key_path.exists()
cache_csv_path = artifact.cache()
print(cache_csv_path)
assert cache_csv_path.suffix == ".csv"
! run input wasn't tracked, call `ln.track()` and re-run
/home/runner/.cache/lamindb/lamindb-ci/test-add-replace-cache/.lamindb/gOZM1xkjaHcVREwW0000.csv
artifact.replace("./test-files/iris.data")
artifact.save()
! no run & transform got linked, call `ln.track()` & re-run
Artifact(uid='gOZM1xkjaHcVREwW0000', key=None, description='iris.csv', suffix='.data', kind=None, otype='DataFrame', size=182, hash='42Br6no9CjB6s5ZbmO-bmw', n_files=None, n_observations=None, extra_data=None, branch_id=1, created_on_id=1, space_id=1, storage_id=1, run_id=None, schema_id=None, created_by_id=1, created_at=2026-06-22 07:40:42 UTC, is_locked=False, version_tag=None, is_latest=True)
old_key_path = key_path
new_key_path = root / f"{AUTO_KEY_PREFIX}{artifact.uid}{artifact.suffix}"

The suffix changed:

print(old_key_path)
print(new_key_path)
assert not old_key_path.exists()
assert new_key_path.exists()
s3://lamindb-ci/test-add-replace-cache/.lamindb/gOZM1xkjaHcVREwW0000.csv
s3://lamindb-ci/test-add-replace-cache/.lamindb/gOZM1xkjaHcVREwW0000.data
cache_data_path = artifact.cache()
print(cache_data_path)
assert cache_data_path.suffix == ".data"
assert cache_data_path.stat().st_mtime >= cache_csv_path.stat().st_mtime
! run input wasn't tracked, call `ln.track()` and re-run
/home/runner/.cache/lamindb/lamindb-ci/test-add-replace-cache/.lamindb/gOZM1xkjaHcVREwW0000.data
artifact.delete(permanent=True)

Save with manually passed real key

ln.settings.creation._artifact_use_virtual_keys = False
artifact = ln.Artifact("./test-files/iris.csv", key="iris.csv")
! data is a DataFrame, please use .from_dataframe()
! no run & transform got linked, call `ln.track()` & re-run
artifact.save()
Artifact(uid='qlqyNfoNrvl1pWpu0000', key='iris.csv', description=None, suffix='.csv', kind=None, otype='DataFrame', size=224, hash='iwc1TmF1TW_l5weDvscSHw', n_files=None, n_observations=None, extra_data=None, branch_id=1, created_on_id=1, space_id=1, storage_id=1, run_id=None, schema_id=None, created_by_id=1, created_at=2026-06-22 07:40:44 UTC, is_locked=False, version_tag=None, is_latest=True)
key_path = root / "iris.csv"
assert key_path.exists()
artifact.replace("./test-files/new_iris.csv")
! no run & transform got linked, call `ln.track()` & re-run
artifact.save()
 replacing the existing cache path /home/runner/.cache/lamindb/lamindb-ci/test-add-replace-cache/iris.csv
Artifact(uid='qlqyNfoNrvl1pWpu0000', key='iris.csv', description=None, suffix='.csv', kind=None, otype='DataFrame', size=229, hash='lp2-ycXcKcaliUTnR_TqHA', n_files=None, n_observations=None, extra_data=None, branch_id=1, created_on_id=1, space_id=1, storage_id=1, run_id=None, schema_id=None, created_by_id=1, created_at=2026-06-22 07:40:44 UTC, is_locked=False, version_tag=None, is_latest=True)

Check paths: no changes here, as the suffix didn’t change.

old_key_path = key_path
new_key_path = root / "new_iris.csv"
old_key_path
S3QueryPath('lamindb-ci/test-add-replace-cache/iris.csv', protocol='s3')
new_key_path
S3QueryPath('lamindb-ci/test-add-replace-cache/new_iris.csv', protocol='s3')
assert old_key_path.exists()
assert not new_key_path.exists()
artifact.replace("./test-files/iris.data")
! no run & transform got linked, call `ln.track()` & re-run
! replacing the file will replace key 'iris.csv' with 'iris.data' and delete 'iris.csv' upon `save()`
artifact.save()
Artifact(uid='qlqyNfoNrvl1pWpu0000', key='iris.data', description=None, suffix='.data', kind=None, otype='DataFrame', size=182, hash='42Br6no9CjB6s5ZbmO-bmw', n_files=None, n_observations=None, extra_data=None, branch_id=1, created_on_id=1, space_id=1, storage_id=1, run_id=None, schema_id=None, created_by_id=1, created_at=2026-06-22 07:40:44 UTC, is_locked=False, version_tag=None, is_latest=True)
new_key_path = root / "iris.data"
old_key_path
S3QueryPath('lamindb-ci/test-add-replace-cache/iris.csv', protocol='s3')
new_key_path
S3QueryPath('lamindb-ci/test-add-replace-cache/iris.data', protocol='s3')
assert not old_key_path.exists()
assert new_key_path.exists()
artifact.delete(permanent=True, storage=True)

Save from memory

import pandas as pd
iris = pd.read_csv("./test-files/iris.csv")
artifact = ln.Artifact.from_dataframe(
    iris, description="iris_store", key="iris.parquet"
)
! no run & transform got linked, call `ln.track()` & re-run
artifact.save()
Artifact(uid='2ma8KYlrUS6n01mQ0000', key='iris.parquet', description='iris_store', suffix='.parquet', kind='dataset', otype='DataFrame', size=4113, hash='1xKzUadWOZxoo-Q8FpyaBg', n_files=None, n_observations=6, extra_data=None, branch_id=1, created_on_id=1, space_id=1, storage_id=1, run_id=None, schema_id=None, created_by_id=1, created_at=2026-06-22 07:40:47 UTC, is_locked=False, version_tag=None, is_latest=True)
key_path = root / "iris.parquet"
assert key_path.exists()
artifact.replace(data=iris[:-1])
! no run & transform got linked, call `ln.track()` & re-run
assert artifact.key == "iris.parquet"
artifact.save()
 replacing the existing cache path /home/runner/.cache/lamindb/lamindb-ci/test-add-replace-cache/iris.parquet
Artifact(uid='2ma8KYlrUS6n01mQ0000', key='iris.parquet', description='iris_store', suffix='.parquet', kind='dataset', otype='DataFrame', size=4093, hash='_eSm_-NOAnbi9lM4uryPoA', n_files=None, n_observations=6, extra_data=None, branch_id=1, created_on_id=1, space_id=1, storage_id=1, run_id=None, schema_id=None, created_by_id=1, created_at=2026-06-22 07:40:47 UTC, is_locked=False, version_tag=None, is_latest=True)
assert key_path.exists()
artifact.replace("./test-files/new_iris.csv")
! no run & transform got linked, call `ln.track()` & re-run
! replacing the file will replace key 'iris.parquet' with 'iris.csv' and delete 'iris.parquet' upon `save()`
artifact.save()
 replacing the existing cache path /home/runner/.cache/lamindb/lamindb-ci/test-add-replace-cache/iris.csv
Artifact(uid='2ma8KYlrUS6n01mQ0000', key='iris.csv', description='iris_store', suffix='.csv', kind='dataset', otype='DataFrame', size=229, hash='lp2-ycXcKcaliUTnR_TqHA', n_files=None, n_observations=6, extra_data=None, branch_id=1, created_on_id=1, space_id=1, storage_id=1, run_id=None, schema_id=None, created_by_id=1, created_at=2026-06-22 07:40:47 UTC, is_locked=False, version_tag=None, is_latest=True)
old_key_path = key_path
new_key_path = root / "iris.csv"
old_key_path
S3QueryPath('lamindb-ci/test-add-replace-cache/iris.parquet', protocol='s3')
new_key_path
S3QueryPath('lamindb-ci/test-add-replace-cache/iris.csv', protocol='s3')
assert not old_key_path.exists()
assert new_key_path.exists()
# we use the path in the next sections
path_in_storage = artifact.path
artifact.delete(permanent=True, storage=False)
 a file/folder remains here: s3://lamindb-ci/test-add-replace-cache/iris.csv

Save with manually passed virtual key

ln.settings.creation._artifact_use_virtual_keys = True
artifact = ln.Artifact("./test-files/iris.csv", key="iris.csv")
! data is a DataFrame, please use .from_dataframe()
! no run & transform got linked, call `ln.track()` & re-run
artifact.save()
 replacing the existing cache path /home/runner/.cache/lamindb/lamindb-ci/test-add-replace-cache/iris.csv
Artifact(uid='f1CD0KVPvG4hLi2F0000', key='iris.csv', description=None, suffix='.csv', kind=None, otype='DataFrame', size=224, hash='iwc1TmF1TW_l5weDvscSHw', n_files=None, n_observations=None, extra_data=None, branch_id=1, created_on_id=1, space_id=1, storage_id=1, run_id=None, schema_id=None, created_by_id=1, created_at=2026-06-22 07:40:48 UTC, is_locked=False, version_tag=None, is_latest=True)
with pytest.raises(ValueError) as err:
    artifact.replace(path_in_storage)
assert err.exconly().startswith(
    "ValueError: Can only replace with a local path not in any Storage."
)
! no run & transform got linked, call `ln.track()` & re-run
# return an existing artifact if the hash is the same
assert artifact == artifact.replace("./test-files/iris.csv")
! no run & transform got linked, call `ln.track()` & re-run
 returning artifact with same hash: Artifact(uid='f1CD0KVPvG4hLi2F0000', key='iris.csv', description=None, suffix='.csv', kind=None, otype='DataFrame', size=224, hash='iwc1TmF1TW_l5weDvscSHw', n_files=None, n_observations=None, extra_data=None, branch_id=1, created_on_id=1, space_id=1, storage_id=1, run_id=None, schema_id=None, created_by_id=1, created_at=2026-06-22 07:40:48 UTC, is_locked=False, version_tag=None, is_latest=True); to track this artifact as an input, use: ln.Artifact.get()
fpath = artifact.path
assert fpath.suffix == ".csv" and fpath.stem == artifact.uid
artifact.replace("./test-files/iris.data")
! no run & transform got linked, call `ln.track()` & re-run
artifact.save()
 replacing the existing cache path /home/runner/.cache/lamindb/lamindb-ci/test-add-replace-cache/iris.data
Artifact(uid='f1CD0KVPvG4hLi2F0000', key='iris.data', description=None, suffix='.data', kind=None, otype='DataFrame', size=182, hash='42Br6no9CjB6s5ZbmO-bmw', n_files=None, n_observations=None, extra_data=None, branch_id=1, created_on_id=1, space_id=1, storage_id=1, run_id=None, schema_id=None, created_by_id=1, created_at=2026-06-22 07:40:48 UTC, is_locked=False, version_tag=None, is_latest=True)
assert artifact.key == "iris.data"
assert not fpath.exists()
fpath = artifact.path
assert fpath.suffix == ".data" and fpath.stem == artifact.uid
artifact.delete(permanent=True, storage=True)

Save in existing storage with a virtual key

artifact = ln.Artifact(path_in_storage, key="iris_test.csv").save()
! data is a DataFrame, please use .from_dataframe()
! no run & transform got linked, call `ln.track()` & re-run
assert artifact._real_key.endswith("iris.csv")
artifact.replace("./test-files/iris.data")
! no run & transform got linked, call `ln.track()` & re-run
! replacing the file will replace key 'iris_test.csv' with 'iris_test.data', _real_key 'iris.csv' with 'iris.data' and delete 'iris.csv' upon `save()`
assert artifact._real_key.endswith("iris.data")
assert artifact._clear_storagekey.endswith("iris.csv")
assert artifact.key == "iris_test.data"
artifact.save()
Artifact(uid='iskxUnEYxHWaPCST0000', key='iris_test.data', description=None, suffix='.data', kind=None, otype='DataFrame', size=182, hash='42Br6no9CjB6s5ZbmO-bmw', n_files=None, n_observations=None, extra_data=None, branch_id=1, created_on_id=1, space_id=1, storage_id=1, run_id=None, schema_id=None, created_by_id=1, created_at=2026-06-22 07:40:50 UTC, is_locked=False, version_tag=None, is_latest=True)
path = artifact.path

assert path.name == "iris.data"
assert path.exists()
assert not path_in_storage.exists()
artifact.delete(permanent=True, storage=True)

Replace with folder artifacts

adata = ln.examples.datasets.anndata_pbmc68k_reduced()

adata.write_zarr("./test-files/pbmc68k.zarr")
/opt/hostedtoolcache/Python/3.14.6/x64/lib/python3.14/site-packages/anndata/_io/zarr.py:44: UserWarning: Writing zarr v2 data will no longer be the default in the next minor release. v3 data will be written by default. If you are explicitly setting this configuration, consider migrating to the zarr v3 file format.
  f = open_write_group(store)
artifact = ln.Artifact("./test-files/pbmc68k.zarr", key="pbmc68k.zarr").save()
save_hash = artifact.hash
save_n_files = artifact.n_files
! no run & transform got linked, call `ln.track()` & re-run
with pytest.raises(ValueError) as err:
    artifact.replace("./test-files/iris.csv")
assert err.exconly().endswith("It is not allowed to replace a folder with a file.")
! no run & transform got linked, call `ln.track()` & re-run
assert save_hash is not None
assert artifact.path.is_dir()
adata.obs["add_new_col"] = "new"

adata.write_zarr("./test-files/pbmc68k_new.zarr")
/opt/hostedtoolcache/Python/3.14.6/x64/lib/python3.14/site-packages/anndata/_io/zarr.py:44: UserWarning: Writing zarr v2 data will no longer be the default in the next minor release. v3 data will be written by default. If you are explicitly setting this configuration, consider migrating to the zarr v3 file format.
  f = open_write_group(store)
artifact.replace("./test-files/pbmc68k_new.zarr")
artifact.save()
! no run & transform got linked, call `ln.track()` & re-run
 replacing the existing cache path /home/runner/.cache/lamindb/lamindb-ci/test-add-replace-cache/pbmc68k.zarr
Artifact(uid='SJdas40efp7T8Zep0000', key='pbmc68k.zarr', description=None, suffix='.zarr', kind=None, otype='AnnData', size=411567, hash='Ptm27hP2Hi_BRRDnl8mq4Q', n_files=138, n_observations=None, extra_data=None, branch_id=1, created_on_id=1, space_id=1, storage_id=1, run_id=None, schema_id=None, created_by_id=1, created_at=2026-06-22 07:40:53 UTC, is_locked=False, version_tag=None, is_latest=True)
assert artifact.key == "pbmc68k.zarr"
assert artifact.hash != save_hash
assert artifact.n_files != save_n_files
assert artifact.path.is_dir()
shutil.rmtree(artifact.cache())
! run input wasn't tracked, call `ln.track()` and re-run
with artifact.open() as store:
    assert "add_new_col" in store.obs
! run input wasn't tracked, call `ln.track()` and re-run
# checks that .open above opened the cloud path without syncing
assert not artifact._cache_path.exists()
shutil.rmtree("./test-files/pbmc68k.zarr")
shutil.rmtree("./test-files/pbmc68k_new.zarr")
artifact.delete(permanent=True, storage=True)
 deleting all versions of this artifact because they all share the same store
ln.setup.delete("test-add-replace-cache", force=True)
 deleted storage record on hub 137768267d4556a491fc02b58f1b630b | s3://lamindb-ci/test-add-replace-cache
 deleted instance record on hub 23d258b416df505da3a53878e9797d26