Register files from Census release 2023-07-25ΒΆ

import lamindb as ln
import lnschema_bionty as lb

# import cellxgene_census
import pandas as pd
πŸ’‘ lamindb instance: laminlabs/cellxgene
ln.track()
πŸ’‘ notebook imports: lamindb==0.67.2 lnschema_bionty==0.39.0 pandas==2.1.4 requests==2.31.0
πŸ’‘ loaded: Transform(uid='pNa7RdI26sp45zKv', name='Register files from Census release 2023-07-25', short_name='census-release-2023-07-25', version='1', type='notebook', updated_at=2024-01-27 05:27:26 UTC, created_by_id=1)
πŸ’‘ loaded: Run(uid='dJ9t75LeOeqYWA4B0WbA', run_at=2024-01-30 09:03:47 UTC, transform_id=18, created_by_id=1)
census_version = "2023-07-25"  # LTS release of Census

Register collections (updated 2024-01-27)ΒΆ

artifacts = ln.Artifact.filter(version=census_version).all()
artifacts.count()
850
collection = ln.Collection(artifacts, name="cellxgene-census", version=census_version)
collection.save()
collections = ln.Collection.filter(version=census_version).all()
collections.count()
80

Register datasetsΒΆ

Get the h5ad files directory on s3 from Census:

h5ad_dir = (
    cellxgene_census.get_census_version_directory()
    .get("stable")
    .get("h5ads")
    .get("uri")
)
h5ad_dir
's3://cellxgene-data-public/cell-census/2023-07-25/h5ads/'
ln.UPath(h5ad_dir).view_tree()
 (0 sub-directories & 850 files with suffixes '.h5ad'): 
β”œβ”€β”€ 00099d5e-154f-4a7a-aa8d-fa30c8c0c43c.h5ad
β”œβ”€β”€ 0041b9c3-6a49-4bf7-8514-9bc7190067a7.h5ad
β”œβ”€β”€ 00476f9f-ebc1-4b72-b541-32f912ce36ea.h5ad
β”œβ”€β”€ 00e5dedd-b9b7-43be-8c28-b0e5c6414a62.h5ad
β”œβ”€β”€ 00ff600e-6e2e-4d76-846f-0eec4f0ae417.h5ad
β”œβ”€β”€ 01209dce-3575-4bed-b1df-129f57fbc031.h5ad
...
files = ln.File.from_dir("s3://cellxgene-data-public/cell-census/2023-07-25/h5ads")
ln.save(files)
dataset = ln.Dataset(files, name="cellxgene-census", version=census_version)
dataset.save()
dataset = ln.Dataset.filter(name="cellxgene-census", version=census_version).one()
files = dataset.files.all()

Register metadataΒΆ

Get all datasets and associated metadata using cellxgene REST API:

import requests


def get_metadata_from_cxg():
    api_url_base = "https://api.cellxgene.cziscience.com"
    datasets_path = "/curation/v1/datasets"
    datasets_url = f"{api_url_base}{datasets_path}"
    headers = {"Content-Type": "application/json"}
    res = requests.get(url=datasets_url, headers=headers)
    res.raise_for_status()
    cellxgene_meta = res.json()
    return cellxgene_meta
cellxgene_meta = get_metadata_from_cxg()
len(cellxgene_meta)
1132
cellxgene_meta[0].keys()
dict_keys(['assay', 'assets', 'cell_count', 'cell_type', 'collection_doi', 'collection_id', 'collection_name', 'collection_version_id', 'dataset_id', 'dataset_version_id', 'development_stage', 'disease', 'donor_id', 'explorer_url', 'is_primary_data', 'mean_genes_per_cell', 'organism', 'primary_cell_count', 'processing_status', 'published_at', 'revised_at', 'schema_version', 'self_reported_ethnicity', 'sex', 'suspension_type', 'tissue', 'title', 'tombstone', 'x_approximate_distribution'])

featuresΒΆ

obs_features = {
    "assay": "bionty.ExperimentalFactor",
    "cell_type": "bionty.CellType",
    "development_stage": "bionty.DevelopmentalStage",
    "disease": "bionty.Disease",
    "donor_id": "core.ULabel",
    "self_reported_ethnicity": "bionty.Ethnicity",
    "sex": "bionty.Phenotype",
    "suspension_type": "core.ULabel",
    "tissue": "bionty.Tissue",
}

obs_features_records = []
for name, registry in obs_features.items():
    record = ln.Feature(name=name, type="category", registries=registry)
    obs_features_records.append(record)
ln.save(obs_features_records)
obs_feature_set = ln.FeatureSet(features=obs_features_records, name="obs features")
obs_feature_set.save()
obs_feature_set.files.set(files, through_defaults={"slot": "obs"})
ext_features = {"organism": "bionty.Organism", "collection": "core.ULabel"}

ext_features_records = []
for name, registry in ext_features.items():
    record = ln.Feature(name=name, type="category", registries=registry)
    ext_features_records.append(record)
ln.save(ext_features_records)
ext_feature_set = ln.FeatureSet(features=ext_features_records, name="external features")
ext_feature_set.save()
ext_feature_set.files.set(files, through_defaults={"slot": "external"})

collections, organismsΒΆ

Register collections:

is_collection = ln.ULabel(name="is_collection")
is_collection.save()

collections_meta = set()
for dataset_meta in cellxgene_meta:
    collections_meta.add(
        (
            dataset_meta["collection_name"],
            dataset_meta["collection_doi"],
            dataset_meta["collection_id"],
        )
    )

collections_records = []
for collection_name, collection_doi, collection_id in collections_meta:
    collection = ln.ULabel(
        name=collection_name,
        description=collection_doi,
        reference=collection_id,
        reference_type="collection_id",
    )
    collections_records.append(collection)
ln.save(collections_records)
is_collection.children.add(*collections_records)

Register organisms:

ncbitaxon_source = lb.BiontySource.filter(source="ncbitaxon").one()

organisms_meta = set()
for dataset_meta in cellxgene_meta:
    organisms_meta.update({i["ontology_term_id"] for i in dataset_meta["organism"]})

organisms_records = lb.Organism.from_values(
    organisms_meta, field=lb.Organism.ontology_id, bionty_source=ncbitaxon_source
)
# rename house mouse to mouse
for r in organisms_records:
    if r.name == "house mouse":
        r.name = "mouse"
ln.save(organisms_records, parents=False)

Annotate files with collections and organisms:

ext_features = ext_feature_set.members.lookup()
files = dataset.files.all()
collections = is_collection.children.all()
organisms = lb.Organism.filter().all()

for dataset_meta in cellxgene_meta:
    # get registered file record based on dataset_id
    file = files.filter(key__contains=dataset_meta["dataset_id"]).one_or_none()
    if file is None:
        continue

    # register collection
    collection = ln.ULabel.filter(reference=dataset_meta["collection_id"]).one()
    file.labels.add(collection, feature=ext_features.collection)

    # register organism
    organism_ontology_ids = [i["ontology_term_id"] for i in dataset_meta["organism"]]
    organism_records = lb.Organism.filter(ontology_id__in=organism_ontology_ids).list()
    file.labels.add(organism_records, feature=ext_features.organism)

ontologiesΒΆ

Register all ontology ids:

from typing import Optional
from lnschema_bionty.models import Registry
from lamindb.dev._feature_manager import get_accessor_by_orm

obs_features_records = obs_feature_set.members.lookup()
ACCESSORS = get_accessor_by_orm(ln.File)
FEATURE_TO_ACCESSOR = {}
for name in obs_features.keys():
    feature = getattr(obs_features_records, name)
    accessor = ACCESSORS.get(feature.registries)
    orm = getattr(ln.File, accessor).field.model
    # TODO: ulabels are defined in the File model, improve this in LaminDB
    if orm == ln.File:
        orm = getattr(ln.File, accessor).field.related_model
    FEATURE_TO_ACCESSOR[name] = (accessor, orm)


def create_ontology_record_from_source(
    ontology_id: str,
    from_orm: Registry,
    target_orm: Registry,
    bionty_source: Optional[lb.BiontySource] = None,
):
    from_record = from_orm.from_bionty(
        ontology_id=ontology_id, bionty_source=bionty_source
    )
    try:
        target_record = target_orm(
            name=from_record.name,
            description=from_record.description,
            ontology_id=from_record.ontology_id,
            bionty_source_id=from_record.bionty_source_id,
        )
        return target_record
    except Exception:
        pass
ln.settings.upon_create_search_names = False

ontology_ids = {}
for name in obs_features.keys():
    if name in ["donor_id", "suspension_type"]:
        continue
    allids = set()
    for i in cellxgene_meta:
        if name in i:
            allids.update([(j["label"], j["ontology_term_id"]) for j in i[name]])

    ontology_ids[name] = allids

bionty_source_ds_mouse = lb.BiontySource.filter(
    entity="DevelopmentalStage", organism="mouse"
).one()
bionty_source_pato = lb.BiontySource.filter(source="pato").one()

# register all ontology ids
for name, terms in ontology_ids.items():
    print(f"registering {name}")
    accessor, orm = FEATURE_TO_ACCESSOR.get(name)
    terms_ids = [i[1] for i in terms]
    records = orm.from_values(terms_ids, field="ontology_id")
    if len(records) > 0:
        ln.save(records)
    inspect_result = orm.inspect(terms_ids, field="ontology_id", mute=True)
    if len(inspect_result.non_validated) > 0:
        if name == "development_stage":
            records = orm.from_values(
                inspect_result.non_validated,
                field="ontology_id",
                bionty_source=bionty_source_ds_mouse,
            )
            records += [
                create_ontology_record_from_source(
                    ontology_id=term_id, from_orm=lb.Tissue, target_orm=orm
                )
                for term_id in inspect_result.non_validated
                if term_id.startswith("UBERON:")
            ]
            records += [
                orm(name=term_id, ontology_id=term_id)
                for term_id in inspect_result.non_validated
                if term_id == "unknown"
            ]
        else:
            records = [
                orm(name=term[0], ontology_id=term[1])
                for term in terms
                if (not term[1].startswith("PATO:"))
                and (term[1] in inspect_result.non_validated)
            ]
            records += [
                create_ontology_record_from_source(
                    ontology_id=term_id,
                    from_orm=lb.Phenotype,
                    target_orm=orm,
                    bionty_source=bionty_source_pato,
                )
                for term_id in inspect_result.non_validated
                if term_id.startswith("PATO:")
            ]

        if len(records) > 0:
            print(f"registered {len(records)} records: {records}")
            ln.save(records)
Hide code cell output
registering assay
❗ did not create ExperimentalFactor record for 1 non-validated ontology_id: 'EFO:0700016'
❗ now recursing through parents: this only happens once, but is much slower than bulk saving
registered 1 records: [ExperimentalFactor(uid='gWUGSA9l', name='Smart-seq v4', ontology_id='EFO:0700016', created_by_id=1)]
registering cell_type
❗ now recursing through parents: this only happens once, but is much slower than bulk saving
registering development_stage
❗ did not create DevelopmentalStage records for 6 non-validated ontology_ids: 'UBERON:0018241', 'UBERON:0000113', 'UBERON:0034919', 'UBERON:0007220', 'UBERON:0007222', 'unknown'
❗ now recursing through parents: this only happens once, but is much slower than bulk saving
registered 6 records: [DevelopmentalStage(uid='wksJWjer', name='prime adult stage', ontology_id='UBERON:0018241', description='A Life Cycle Stage That Starts At Completion Of Development And Growth Of The Sexually Mature Adult Animal, And Ends Before Senescence.', bionty_source_id=47, created_by_id=1), DevelopmentalStage(uid='GDaE3j6Z', name='post-juvenile adult stage', ontology_id='UBERON:0000113', description='The Stage Of Being A Sexually Mature Adult Animal.', bionty_source_id=47, created_by_id=1), DevelopmentalStage(uid='l00DTC4g', name='juvenile stage', ontology_id='UBERON:0034919', description='The Stage Of Being No More Dependent Of The Nest And/Or From Caregivers For Subsistence While Having Not Reach Sexual Maturity.', bionty_source_id=47, created_by_id=1), DevelopmentalStage(uid='lNh8U4YZ', name='late embryonic stage', ontology_id='UBERON:0007220', description='An Embryo Stage That Covers Late Steps Of The Embryogenesis With A Fully Formed Embryo Still Developing Before Birth Or Egg Hatching.', bionty_source_id=47, created_by_id=1), DevelopmentalStage(uid='hqyIKjfF', name='late adult stage', ontology_id='UBERON:0007222', bionty_source_id=47, created_by_id=1), DevelopmentalStage(uid='xL8yuEN7', name='unknown', ontology_id='unknown', created_by_id=1)]
registering disease
❗ did not create Disease record for 1 non-validated ontology_id: 'PATO:0000461'
❗ now recursing through parents: this only happens once, but is much slower than bulk saving
registered 1 records: [Disease(uid='4r2nqggf', name='normal', ontology_id='PATO:0000461', description='A Quality Inhering In A Bearer By Virtue Of The Bearer'S Exhibiting No Deviation From Normal Or Average.', bionty_source_id=38, created_by_id=1)]
registering self_reported_ethnicity
❗ did not create Ethnicity records for 3 non-validated ontology_ids: 'multiethnic', 'na', 'unknown'
❗ now recursing through parents: this only happens once, but is much slower than bulk saving
registered 3 records: [Ethnicity(uid='xL8yuEN7', name='unknown', ontology_id='unknown', created_by_id=1), Ethnicity(uid='UY1fNAFT', name='na', ontology_id='na', created_by_id=1), Ethnicity(uid='8lAgy5Ej', name='multiethnic', ontology_id='multiethnic', created_by_id=1)]
registering sex
❗ did not create Phenotype records for 3 non-validated ontology_ids: 'PATO:0000384', 'unknown', 'PATO:0000383'
registered 3 records: [Phenotype(uid='xL8yuEN7', name='unknown', ontology_id='unknown', created_by_id=1), Phenotype(uid='Pl1UiuS0', name='male', ontology_id='PATO:0000384', description='A Biological Sex Quality Inhering In An Individual Or A Population Whose Sex Organs Contain Only Male Gametes.', bionty_source_id=38, created_by_id=1), Phenotype(uid='hSl0sSF0', name='female', ontology_id='PATO:0000383', description='A Biological Sex Quality Inhering In An Individual Or A Population That Only Produces Gametes That Can Be Fertilised By Male Gametes.', bionty_source_id=38, created_by_id=1)]
registering tissue
❗ did not create Tissue records for 18 non-validated ontology_ids: 'CL:0000010 (cell culture)', 'CL:0000082 (cell culture)', 'CL:0000084 (cell culture)', 'CL:0000115 (cell culture)', 'CL:0000351 (cell culture)', 'CL:0002322 (cell culture)', 'CL:0002327 (cell culture)', 'CL:0002328 (cell culture)', 'CL:0002334 (cell culture)', 'CL:0002335 (cell culture)', 'CL:0002633 (cell culture)', 'CL:0010003 (cell culture)', 'UBERON:0000088 (organoid)', 'UBERON:0000310 (organoid)', 'UBERON:0000966 (organoid)', 'UBERON:0001295 (organoid)', 'UBERON:0002048 (organoid)', 'UBERON:0002370 (organoid)'
❗ now recursing through parents: this only happens once, but is much slower than bulk saving
registered 18 records: [Tissue(uid='x3tRcugV', name='trophoblast (organoid)', ontology_id='UBERON:0000088 (organoid)', created_by_id=1), Tissue(uid='UoElNxsj', name='endothelial cell (cell culture)', ontology_id='CL:0000115 (cell culture)', created_by_id=1), Tissue(uid='9YB5clqY', name='cultured cell (cell culture)', ontology_id='CL:0000010 (cell culture)', created_by_id=1), Tissue(uid='WSs6UA9e', name='lung (organoid)', ontology_id='UBERON:0002048 (organoid)', created_by_id=1), Tissue(uid='CevFMDqD', name='preadipocyte (cell culture)', ontology_id='CL:0002334 (cell culture)', created_by_id=1), Tissue(uid='RkE6D8y1', name='endometrium (organoid)', ontology_id='UBERON:0001295 (organoid)', created_by_id=1), Tissue(uid='rIPA0OEl', name='T cell (cell culture)', ontology_id='CL:0000084 (cell culture)', created_by_id=1), Tissue(uid='dwdBlCNp', name='breast (organoid)', ontology_id='UBERON:0000310 (organoid)', created_by_id=1), Tissue(uid='Ash8pGf8', name='trophoblast cell (cell culture)', ontology_id='CL:0000351 (cell culture)', created_by_id=1), Tissue(uid='uS0Cw8zN', name='retina (organoid)', ontology_id='UBERON:0000966 (organoid)', created_by_id=1), Tissue(uid='vg9s890t', name='respiratory basal cell (cell culture)', ontology_id='CL:0002633 (cell culture)', created_by_id=1), Tissue(uid='lfIFQFR5', name='epithelial cell of lung (cell culture)', ontology_id='CL:0000082 (cell culture)', created_by_id=1), Tissue(uid='w6gzNa8D', name='mammary gland epithelial cell (cell culture)', ontology_id='CL:0002327 (cell culture)', created_by_id=1), Tissue(uid='yPk6E1V8', name='epithelial cell of alveolus of lung (cell culture)', ontology_id='CL:0010003 (cell culture)', created_by_id=1), Tissue(uid='K4RSNRBc', name='thymus (organoid)', ontology_id='UBERON:0002370 (organoid)', created_by_id=1), Tissue(uid='9ICArUMH', name='embryonic stem cell (cell culture)', ontology_id='CL:0002322 (cell culture)', created_by_id=1), Tissue(uid='7MzqN14b', name='bronchial epithelial cell (cell culture)', ontology_id='CL:0002328 (cell culture)', created_by_id=1), Tissue(uid='kWD0kb5x', name='brown preadipocyte (cell culture)', ontology_id='CL:0002335 (cell culture)', created_by_id=1)]

donors and suspension_typesΒΆ

donor_ids = set()
suspension_types = set()

for i in cellxgene_meta:
    if "donor_id" in i:
        donor_ids.update(i["donor_id"])
    if "suspension_type" in i:
        suspension_types.update(i["suspension_type"])

is_donor = ln.ULabel(name="is_donor", description="parent of donor ids")
is_donor.save()

is_suspension_type = ln.ULabel(
    name="is_suspension_type", description="parent of suspension types"
)
is_suspension_type.save()
is_donor = ln.ULabel.filter(name="is_donor").one()
donors = is_donor.children.all()
result = donors.inspect(donor_ids, mute=True)
new_donors = [ln.ULabel(name=name) for name in result.non_validated]
ln.save(new_donors)
is_donor.children.add(*new_donors)

is_suspension_type = ln.ULabel.filter(name="is_suspension_type").one()
stypes = is_suspension_type.children.all()
result = stypes.inspect(suspension_types, mute=True)
new_stypes = [ln.ULabel(name=name) for name in result.non_validated]
ln.save(new_stypes)
is_suspension_type.children.add(*new_stypes)

Annotate files with metadataΒΆ

features = ln.Feature.lookup()

for idx, dataset_meta in enumerate(cellxgene_meta):
    if idx % 100 == 0:
        print(f"annotating dataset {idx} of {len(cellxgene_meta)}")
    file = files.filter(key__contains=dataset_meta["dataset_id"]).one_or_none()
    if file is None:
        continue
    for field, terms in dataset_meta.items():
        if field not in FEATURE_TO_ACCESSOR:
            continue
        accessor, orm = FEATURE_TO_ACCESSOR.get(field)
        if field in ["donor_id", "suspension_type"]:
            records = orm.from_values(terms, field="name")
            if len(records) > 0:
                # stratify by feature so that link tables records are written
                file.labels.add(records, feature=getattr(features, field))
        else:
            records = orm.from_values(
                [i["ontology_term_id"] for i in terms], field="ontology_id"
            )
            if len(records) > 0:
                getattr(file, accessor).add(*records)
Hide code cell output
annotating dataset 0 of 1132
annotating dataset 100 of 1132
annotating dataset 200 of 1132
annotating dataset 300 of 1132
annotating dataset 400 of 1132
annotating dataset 500 of 1132
annotating dataset 600 of 1132
annotating dataset 700 of 1132
annotating dataset 800 of 1132
annotating dataset 900 of 1132
annotating dataset 1000 of 1132
annotating dataset 1100 of 1132

Validate and register genesΒΆ

# register synthetic constructs and sars_cov_2 as new organisms
lb.Organism.from_bionty(
    ontology_id="NCBITaxon:32630", bionty_source=ncbitaxon_source
).save(parents=False)
lb.Organism.from_bionty(
    ontology_id="NCBITaxon:2697049", bionty_source=ncbitaxon_source
).save(parents=False)

# genes files
organisms = lb.Organism.lookup(field=lb.Organism.scientific_name)
genes_files = {
    "homo_sapiens": "https://github.com/chanzuckerberg/single-cell-curation/raw/main/cellxgene_schema_cli/cellxgene_schema/ontology_files/genes_homo_sapiens.csv.gz",
    "mus_musculus": "https://github.com/chanzuckerberg/single-cell-curation/raw/main/cellxgene_schema_cli/cellxgene_schema/ontology_files/genes_mus_musculus.csv.gz",
    "synthetic_construct": "https://github.com/chanzuckerberg/single-cell-curation/raw/main/cellxgene_schema_cli/cellxgene_schema/ontology_files/genes_ercc.csv.gz",
    "severe_acute_respiratory_syndrome_coronavirus_2": "https://github.com/chanzuckerberg/single-cell-curation/raw/main/cellxgene_schema_cli/cellxgene_schema/ontology_files/genes_sars_cov_2.csv.gz",
}

Register all genes for each organism:

for organism_name, genes_file in genes_files.items():
    print(f"registering {organism_name} genes")
    df = pd.read_csv(genes_file, header=None, index_col=0)
    organism_record = getattr(organisms, organism_name)
    gene_records = lb.Gene.from_values(
        df.index, field=lb.Gene.ensembl_gene_id, organism=organism_record
    )
    ln.save(gene_records)
    validated = lb.Gene.validate(
        df.index, field=lb.Gene.ensembl_gene_id, organism=organism_record
    )
    # register legacy genes manually
    new_records = []
    for gene_id in df.index[~validated]:
        new_records.append(
            lb.Gene(
                ensembl_gene_id=gene_id,
                symbol=df.loc[gene_id][1],
                organism=organism_record,
            )
        )
    ln.save(new_records)

    genes_feature_set = ln.FeatureSet(
        features=gene_records + new_records, name=f"all {organism_record.name} genes"
    )
    genes_feature_set.save()
Hide code cell output
registering homo_sapiens genes
❗ did not create Gene records for 147 non-validated ensembl_gene_ids: 'ENSG00000112096', 'ENSG00000137808', 'ENSG00000161149', 'ENSG00000182230', 'ENSG00000203812', 'ENSG00000204092', 'ENSG00000205485', 'ENSG00000212951', 'ENSG00000215271', 'ENSG00000221995', 'ENSG00000224739', 'ENSG00000224745', 'ENSG00000225178', 'ENSG00000225932', 'ENSG00000226377', 'ENSG00000226380', 'ENSG00000226403', 'ENSG00000227021', 'ENSG00000227220', 'ENSG00000227902', ...
❗ 147 terms (0.20%) are not validated for ensembl_gene_id: ENSG00000269933, ENSG00000261737, ENSG00000259834, ENSG00000256374, ENSG00000263464, ENSG00000203812, ENSG00000272196, ENSG00000272880, ENSG00000284299, ENSG00000270188, ENSG00000287116, ENSG00000237133, ENSG00000224739, ENSG00000227902, ENSG00000239467, ENSG00000272551, ENSG00000280374, ENSG00000284741, ENSG00000236886, ENSG00000229352, ...
registering mus_musculus genes
❗ did not create Gene records for 135 non-validated ensembl_gene_ids: 'ENSMUSG00000022591', 'ENSMUSG00000045506', 'ENSMUSG00000053706', 'ENSMUSG00000053861', 'ENSMUSG00000066378', 'ENSMUSG00000066810', 'ENSMUSG00000066936', 'ENSMUSG00000067085', 'ENSMUSG00000067122', 'ENSMUSG00000067292', 'ENSMUSG00000067627', 'ENSMUSG00000067929', 'ENSMUSG00000068181', 'ENSMUSG00000069518', 'ENSMUSG00000072693', 'ENSMUSG00000073290', 'ENSMUSG00000073291', 'ENSMUSG00000073682', 'ENSMUSG00000074210', 'ENSMUSG00000074302', ...
❗ 135 terms (0.20%) are not validated for ensembl_gene_id: ENSMUSG00000022591, ENSMUSG00000094127, ENSMUSG00000066936, ENSMUSG00000116275, ENSMUSG00000091312, ENSMUSG00000098794, ENSMUSG00000079353, ENSMUSG00000096240, ENSMUSG00000079286, ENSMUSG00000085431, ENSMUSG00000075015, ENSMUSG00000075014, ENSMUSG00000078091, ENSMUSG00000075006, ENSMUSG00000079175, ENSMUSG00000079171, ENSMUSG00000079170, ENSMUSG00000079169, ENSMUSG00000090353, ENSMUSG00000100963, ...
registering synthetic_construct genes
❗ loading non-default source inside a LaminDB instance
❗ no Bionty source found, skipping Bionty validation
❗ loading non-default source inside a LaminDB instance
❗ did not create Gene records for 92 non-validated ensembl_gene_ids: 'ERCC-00002', 'ERCC-00003', 'ERCC-00004', 'ERCC-00009', 'ERCC-00012', 'ERCC-00013', 'ERCC-00014', 'ERCC-00016', 'ERCC-00017', 'ERCC-00019', 'ERCC-00022', 'ERCC-00024', 'ERCC-00025', 'ERCC-00028', 'ERCC-00031', 'ERCC-00033', 'ERCC-00034', 'ERCC-00035', 'ERCC-00039', 'ERCC-00040', ...
❗ 92 terms (100.00%) are not validated for ensembl_gene_id: ERCC-00002, ERCC-00003, ERCC-00004, ERCC-00009, ERCC-00012, ERCC-00013, ERCC-00014, ERCC-00016, ERCC-00017, ERCC-00019, ERCC-00022, ERCC-00024, ERCC-00025, ERCC-00028, ERCC-00031, ERCC-00033, ERCC-00034, ERCC-00035, ERCC-00039, ERCC-00040, ...
registering severe_acute_respiratory_syndrome_coronavirus_2 genes
❗ loading non-default source inside a LaminDB instance
❗ no Bionty source found, skipping Bionty validation
❗ loading non-default source inside a LaminDB instance
❗ did not create Gene records for 12 non-validated ensembl_gene_ids: 'ENSSASG00005000002', 'ENSSASG00005000003', 'ENSSASG00005000004', 'ENSSASG00005000006', 'ENSSASG00005000010', 'ENSSASG00005000007', 'ENSSASG00005000011', 'ENSSASG00005000009', 'ENSSASG00005000012', 'ENSSASG00005000008', 'ENSSASG00005000005', 'ENSSASG00005000013'
❗ 12 terms (100.00%) are not validated for ensembl_gene_id: ENSSASG00005000002, ENSSASG00005000003, ENSSASG00005000004, ENSSASG00005000006, ENSSASG00005000010, ENSSASG00005000007, ENSSASG00005000011, ENSSASG00005000009, ENSSASG00005000012, ENSSASG00005000008, ENSSASG00005000005, ENSSASG00005000013