genular ⓑ
Unifying Genes and Cells with Biological Knowledge for Deep Insights
This is a demo database interface. Results are limited to a small (100) subset of data. For full access, please use the API or download the data dumps.
What's This All About?
Genular is an database that unites single cell RNA sequencing data with genomic and proteomic knowledge, almost half of a billion cells from diverse tissues and conditions to uncover how genes truly behave in specific cell types across immunity, development and disease. While uniquely providing an Cell Significance Index (CSI) that highlights truly distinctive gene behavior in one integrated environment.
- Cross Linking Data and Discovery: Every gene entry integrates gene expressions, protein data, interaction networks, and disease associations all in one searchable interface.
- Cell Significance Index (CSI): Our advanced CSI metric pinpoints which genes stand out in any given cell type helping to quickly spot critical regulators of immune states, tissue differentiation and pathological processes.
- Pathways and Networks: Go beyond raw expression: explore the functional networks and pathways your genes are part of, and discover how they shape complex traits like macrophage reprogramming or T cell memory.
- Advanced Filtering: Zoom in on specific conditions like T cells under viral infection to isolate the genes that matter most, and see how they connect to broader regulatory systems.
The Genular Podcast: Genes, Cells, and Discoveries [deep dive]
Genes
58M
Proteins
9.4M
Unique Cells
125M
(stats on 01 Feb 2025)
Document Schema
const gene = {
// Unique NCBI Gene ID - gene2accession.GeneID
geneID: { type: Number, index: { unique: true } },
// Taxonomy information
tax: {
id: { type: Number, index: true }, // Taxonomy ID - gene2accession.tax_id
name: {
name: { type: String }, // Taxonomy name - taxdump.names.dmp.name_txt
unique: { type: String },
type: { type: Number },
},
},
updated: { type: Date, default: Date.now }, // Last update timestamp
// Gene status (e.g., Predicted, Validated) - gene2accession.status
geneStatus: { type: String },
// Accession information
accession: {
rna: { type: String }, // NCBI RNA nucleotide accession number - gene2accession.RNA_nucleotide_accession.version
protein: [{ type: String }], // Array of protein accession numbers - gene2accession.protein_accession.version
gene: [{ type: String }], // Genomic nucleotide accession numbers - gene2accession.genomic_nucleotide_accession.version
peptide: { type: String }, // NCBI peptide reference sequence - gene2accession.mature_peptide_accession.version
},
// Data representing gene expression profiles across various cell types.
// cellSignificanceIndex is an array of objects, each describing per-cell statistics
cellSignificanceIndex: [
{
i: {
type: String
// The cell identifier (e.g., "CL0000236").
// Generally follows a standardized nomenclature or cell ontology ID.
},
c: {
type: String
// Context or condition the cell is in (e.g., "overall", or some ontology-based condition).
},
p: {
type: Number
// The adjusted p-value (pAdjVal) for this cell/gene expression comparison.
},
d: {
type: Number
// The delta value (deltaVal), representing the expression difference or effect size.
},
tp: {
type: Number
// Threshold for p (robust pAdjVal threshold) specific to this cell/context pairing.
},
td: {
type: Number
// Threshold for d (robust deltaVal threshold) for this cell/context pairing.
},
fcp: {
type: Number
// Fold-change ratio for the p-value relative to its threshold (pAdjVal / thresholdP).
},
fcd: {
type: Number
// Fold-change ratio for the delta value (deltaVal / thresholdD).
},
cs: {
type: Number
// A "Cell Significance Index Score" computed from pAdjVal and deltaVal
// (e.g., deltaVal * -log10(pAdjVal)) as an overall expression significance metric.
},
e: {
// Nested statistics about the expression data for this cell:
mean: { type: Number }, // Mean of raw expression values
median: { type: Number }, // Median
min: { type: Number }, // Minimum
max: { type: Number }, // Maximum
var: { type: Number }, // Variance
std: { type: Number }, // Standard Deviation
data: { type: [Number] }
// Original expression values (array of numbers).
}
}
],
// mRNA expression levels (Immune cell specificity)
mRNAExpressions: {
proteinAtlas: [
{
l: { type: String }, // lineage
c: { type: String }, // cell
e: { type: Number } // normalizedRNAExpression
}
]
},
// Cross-reference to other databases
crossReference: {
// Identifiers in other databases (database:value) except: (HGNC:HGNC:1100) - gene_info.dbXrefs
bulk: [
{
dbName: { type: String },
value: { type: String },
}
],
enseGeneID: { type: String }, // ENSEMBL gene identifier - gene2ensembl.Ensembl_gene_identifier
enseProtID: [{ type: String }], // ENSEMBL protein identifiers - gene2ensembl.Ensembl_protein_identifier
enseRnaID: [{ type: String }], // ENSEMBL RNA identifiers - gene2ensembl.Ensembl_rna_identifier
pubMed: [{ type: Number }], // PubMed references - gene2pubmed.PubMed
},
// Genomic position information
genePos: {
start: { type: Number }, // Start position on genomic sequence - gene2accession.start_position_on_the_genomic_accession
end: { type: Number }, // End position on genomic sequence - gene2accession.end_position_on_the_genomic_accession
},
orientation: { type: String }, // Gene orientation ('+', '-', or '?') - gene2accession.orientation
symbol: { type: String, index: true }, // Gene symbol, searchable - gene2accession.Symbol
locTag: { type: String }, // Locus Tag - gene_info.LocusTag
// Chromosome on which this gene is placed - gene_info.chromosome
chrom: {
pos: { type: Number },
type: { type: String }, // Chromosome type (MT or NULL)
loc: { type: String }, // Chromosome location
},
desc: { type: String }, // Gene description - gene_info.description
geneType: { type: Number }, // Gene type (e.g., Types: unknown (0), tRNA (1), rRNA (2), snRNA (3), scRNA (4), snoRNA (5), protein-coding (6), pseudo (7), transposon 8), miscRNA (9), ncRNA (10), other (255)) - gene_info.type of gene
// MIM (Mendelian Inheritance in Man) data
mim: [
{
id: { type: String, index: true }, // MIM Number (OMIM) - mim2gene_medgen.MIM number)
relation: { type: String }, // MIM type (MIM relation) (gene | phenotype) - mim2gene_medgen.type)
cui: { type: Number }, // MedGenCUI - mim2gene_medgen.MedGenCUI)
}
],
// Gene ontology data from GO and Reactome DBs
ontology: [
{
// (gene2go.GO ID or Reactome ID)
// Ontology ID (GO:0005634, R-HSA-9033241)
id: { type: String },
// (gene2go.GO term or Reactome Pathway name)
// Ontology Term (biological_process || N-terminal protein myristoylation)
term: { type: String },
// (gene2go.Category)
// Ontology category (Function, Process, or Component)
cat: { type: String },
// (gene2go.PubMed) pipe-delimited set of PubMed uids reported as evidence for the association
pubMed: [{ type: Number }],
},
],
// Gene relationships
geneRelations: [
{
// Type of gene relation - gene_group.relationship
// Ortholog, Potential readthrough sibling, Readthrough child, Readthrough parent, Readthrough sibling, Region member, Region parent, Related functional gene, Related pseudogene,
relationType: { type: String },
similarGenes: [{ type: String }], // Array of similar genes - gene_group.Other GeneID
},
],
// Gene disorders (requires omim license).
geneDisorder: [
{
name: { type: String }, // Disorder name - morbidmap.Disorder
loc: { type: String } // Cytogenetic location - morbidmap.cytogenetic location
},
],
// Protein data
protein: [
{
// GENULAR proteinID
// = geneID + protein mass + protein length + crc32(sequence)
proteinID: { type: Number, index: { unique: true } }, // , dropDups: true
// Uniprot ID - Uniprot has multiple accessions!!
// (uniprot_sprot.accession)
accession: [{ type: String, index: true }],
// Protein Name
// (uniprot_sprot.name)
symbol: { type: String },
// Protein full-descriptive name
// (uniprot_sprot.protein.recommendedName.fullName)
name: { type: String },
// Protein IDs in other Databases
databaseIDs: {
pdbID: [{ type: String }], // Protein Structure ID (idmapping_selected.PDB)
goID: [{ type: String }], // Protein GO ID (idmapping_selected.GO)
unigeneID: { type: String }, // UniGene Protein Cluster (idmapping_selected.UniGene)
interProID: [{ type: String }], // Protein InterPro ID (uniprot_???.InterPro)
Pfam: [{ type: String }], // Protein family IDs
PROSITE: [{ type: String }], // Protein domain IDs
UniGene: { type: String }, // UniGene ID for protein cluster
PDBsum: [{ type: String }], // Protein database summary IDs
ProteinModelPortal: { type: String }, // Protein model IDs
DIP: { type: String }, // Database of Interacting Proteins ID
MINT: { type: String }, // Molecular INTeraction database ID
STRING: { type: String }, // Protein-protein interaction IDs
BindingDB: { type: String }, // Protein binding data IDs
ChEMBL: { type: String }, // Chemical entities of biological interest IDs
DEPOD: { type: String }, // Dephosphorylation database ID
iPTMnet: { type: String }, // Integrated Post-Translational Modification Network ID
PhosphoSite: { type: String }, // Protein phosphorylation site IDs
SwissPalm: { type: String }, // Protein palmitoylation data IDs
UniCarbKB: { type: String }, // Unified carbohydrate knowledgebase ID
BioMuta: { type: String }, // Protein mutation data IDs
DMDM: { type: String }, // Domain Mapping of Disease Mutations ID
EPD: { type: String }, // Eukaryotic Promoter Database ID
MaxQB: { type: String }, // MaxQuant Quantitative proteomics data ID
PaxDb: { type: String }, // Protein abundance database ID
PRIDE: { type: String }, // Proteomics Identifications Database ID
GeneID: { type: String }, // NCBI Gene ID
KEGG: { type: String }, // Kyoto Encyclopedia of Genes and Genomes ID
CTD: { type: String }, // Comparative Toxicogenomics Database ID
GeneCards: { type: String }, // GeneCards ID for human genes
HPA: [{ type: String }], // Human Protein Atlas ID
MalaCards: { type: String }, // Malacards ID for human diseases
neXtProt: { type: String }, // NeXtProt ID for human proteins
PharmGKB: { type: String }, // Pharmacogenomics Knowledgebase ID
HOGENOM: { type: String }, // Homologous genes database ID
HOVERGEN: { type: String }, // Homologous vertebrate genes database ID
InParanoid: { type: String }, // Eukaryotic ortholog groups ID
KO: { type: String }, // KEGG Orthology ID
PhylomeDB: { type: String }, // Phylome database ID
TreeFam: { type: String }, // TreeFam database ID
SignaLink: { type: String }, // Signaling pathway database ID
SIGNOR: { type: String }, // SIGNOR signaling network ID
EvolutionaryTrace: { type: String }, // Evolutionary Trace Report Maker ID
GeneWiki: { type: String }, // Gene Wiki ID for gene information
GenomeRNAi: { type: String }, // GenomeRNAi database ID for RNAi data
PRO: { type: String }, // Protein Ontology ID from PRO
Proteomes: { type: String }, // UniProt Proteomes ID
Bgee: { type: String }, // Database of gene expression evolution ID
CleanEx: { type: String }, // Expression reference database ID
},
// Citations DOI's array - (dbReference id="10.1104/pp.101.4.1413" type="DOI" />)
citations: [
{
title: { type: String }, // Title of the citation
pubmedID: { type: String }, // PubMed ID associated with the citation
doi: { type: String }, // Digital Object Identifier (DOI) for the citation
scope: [{ type: String }], // Scope or context of the citation
},
],
// RefSeq Protein Similarity Indices (idmapping_selected.RefSeq)
refSeq: {
// UniRef similarity indices for RefSeq proteins
uniref: {
s50: { type: String }, // UniRef50 ID - represents cluster at 50% sequence similarity
s90: { type: String }, // UniRef90 ID - represents cluster at 90% sequence similarity
s100: { type: String }, // UniRef100 ID - represents cluster at 100% sequence similarity
},
},
// Protein Family Information (Pfam ID, Source: UniProt)
proteinFamily: {
// PFAM ID for protein family classification (pdb_pfam_mapping.PFAM_ACC)
accession: { type: String },
// Name of the protein family (pdb_pfam_mapping.PFAM_Name)
name: { type: String },
// Description of the protein family (pdb_pfam_mapping.PFAM_desc)
description: { type: String },
// Value representing the protein family (pdb_pfam_mapping.eValue)
value: { type: String },
},
// Protein Motif Details from PROSITE Database
proteinMotifs: [
{
// PROSITE internal ID for protein motifs (prosite.ID)
id: { type: String },
// Description of the protein motif pattern (prosite.DE)
description: { type: String },
// Protein motif sequence pattern (prosite.PA)
sequence: { type: String },
}
],
// Protein Interaction Partners
interactionPartners: [
{
// Partner's UniProt ID (Format: string_id.uniprot_id) from protein.links.protein2
partnerID: { type: String },
// Combined interaction score from protein.links.combined_score
score: { type: Number },
},
],
// Protein Sequence Information
sequence: {
length: { type: Number }, // Length of the protein sequence
mass: { type: Number }, // Molecular mass of the protein
checksum: { type: String }, // Checksum for sequence verification
modified: { type: Date }, // Date of last modification
version: { type: Number }, // Version of the protein sequence
sequence: { type: String }, // Amino acid sequence of the protein (uniprot_???.sprot/trembl)
},
// Protein Existence Information
existence: { type: Number }, // Protein existence type (e.g., predicted)
// Protein Source Relevance
relevance: { type: Number }, // Source of the protein data: trembl (2), sprot (1) (uniprot_???.sprot/trembl)
// UniParc ID for Sequence Database
uniParcID: { type: String }, // FASTA sequence database ID (idmapping_selected.UniParc) (e.g., UPI00003B0FD4)
}
]
};