Quickstart¶

This example will upload a JSON file, convert into a dataset, annotate the dataset and then export to an Excel file.

Store your credentials first!

You will need to have your credentials stored in order to run this example. Create a Personal Access Token.

Python R

import solvebio
from solvebio import Vault
from solvebio import Dataset
from solvebio import DatasetImport

solvebio.login()

# Upload a local file to the root of your personal Vault
vault = Vault.get_personal_vault()

# Download a sample file of variants
# https://s3.amazonaws.com/downloads.solvebio.com/demo/interesting-variants.json.gz
object_ = vault.upload_file('local/path/to/interesting-variants.json.gz', '/')

# Create a dataset
dataset = Dataset.get_or_create_by_full_path('~/example-dataset')

# Import the file into the dataset
imp = DatasetImport.create(
    dataset_id=dataset.id,
    object_id=object_.id
)

# Wait until activity is completed
dataset.activity(follow=True)

# Query the dataset
genes_of_interest = ['SPOP', 'APC', 'IDH2']
results = dataset.query().filter(gene__in=genes_of_interest)

# Annotate with column by running migration
new_column = dict(
    name='interesting',
    data_type='boolean',
    description='These genes are interesting',
    expression='True'
)
dataset_migration = results.migrate(dataset, target_fields=[new_column], commit_mode='upsert', follow=False)

# Wait until activity is completed
dataset.activity(follow=True)

# Same number of results!
new_results = dataset.query().filter(interesting=True)
assert len(list(new_results)) == len(list(results))

# Export variants in the KRAS gene into Excel
query = dataset.query().filter(gene='KRAS')
export = query.export(format='excel', follow=True)
export.download('variants_kras.xlsx')

library(solvebio)

solvebio::login()

# Download a sample file of variants
# https://s3.amazonaws.com/downloads.solvebio.com/demo/interesting-variants.json.gz

# Upload the file to the root of your personal Vault
vault <- Vault.get_personal_vault()
object <- Object.upload_file('./interesting-variants.json.gz', vault$id, '/')

# Create a new, empty dataset
dataset_full_path <- paste(vault$full_path, "/r_examples/my_dataset", sep=":")
dataset <- Dataset.get_or_create_by_full_path(dataset_full_path)

# Import the file into the dataset
imp = DatasetImport.create(dataset_id = dataset$id,
                           commit_mode = 'append',
                           object_id = object$id)

# Wait until import is completed
Dataset.activity(dataset$id)

# Query the dataset
filters <- '[["gene__in", ["SPOP", "APC", "IDH2"]]]'
results <- Dataset.query(id = dataset$id, limit = 1000, paginate = TRUE, filters=filters)

# Annotate with column by running migration
new_column = list(
        name='interesting',
        data_type='boolean',
        description='These genes are interesting',
        expression='True'
)

filters = list(list("gene__in", list("SPOP", "APC", "IDH2")))

dm <- DatasetMigration.create(
    source_id=dataset$id,
    target_id=dataset$id,
    target_fields=list(new_column),
    source_params=list(filters=filters),
    commit_mode='upsert'
)

# Wait until migration is completed
Dataset.activity(dataset$id)

# Same number of results!
new_results <- Dataset.query(
    id = dataset$id,
    filters='[["interesting", true]]',
    limit = 1000,
    paginate = TRUE
)

lengths(new_results$amino_acid_change) == lengths(new_results$amino_acid_change)


# Export variants in the KRAS gene into Excel
filters <- list(list("gene", "KRAS"))
export <- DatasetExport.create(
    dataset$id,
    format = 'excel',
    params=list(filters=filters)
)

# Wait until export is completed
Dataset.activity(dataset$id)

# Download
url <- DatasetExport.get_download_url(export$id)
download.file(url, 'variants_kras.xlsx')

Last updated 2022-12-07.

Have questions or comments about this article? Get in touch with SolveBio Support by submitting a ticket or by sending us an email.