Editing AI for Genomic Medicine (section)

== <span style="color: #FFFFFF;">Applying</span> ==
'''Variant pathogenicity prediction using protein language model:'''
<syntaxhighlight lang="python">
import torch
from transformers import EsmTokenizer, EsmForMaskedLM

# ESM-2 protein language model for variant effect prediction
# Variant effect = change in log-likelihood when mutating an amino acid
tokenizer = EsmTokenizer.from_pretrained("facebook/esm2_t33_650M_UR50D")
model = EsmForMaskedLM.from_pretrained("facebook/esm2_t33_650M_UR50D")
model.eval()

def predict_variant_effect(wt_sequence: str, position: int, mutant_aa: str) -> float:
    """
    Predict pathogenicity of missense variant using ESM-2 masked language model.
    Higher (more negative) score = more pathogenic.
    """
    # Tokenize wild-type sequence
    tokens = tokenizer(wt_sequence, return_tensors='pt')
    # Mask the position of interest
    masked = tokens['input_ids'].clone()
    masked[0, position + 1] = tokenizer.mask_token_id  # +1 for [CLS]

    with torch.no_grad():
        output = model(**{**tokens, 'input_ids': masked})
        logits = output.logits[0, position + 1]  # Logits at masked position

    probs = torch.softmax(logits, dim=-1)
    wt_aa = wt_sequence[position]
    wt_id = tokenizer.convert_tokens_to_ids(wt_aa)
    mut_id = tokenizer.convert_tokens_to_ids(mutant_aa)
    # Log-likelihood difference: wild-type vs. mutant
    score = (torch.log(probs[mut_id]) - torch.log(probs[wt_id])).item()
    return score  # Negative = less likely than WT = potentially pathogenic

# Example: BRCA1 missense variant
wt_seq = "MDLSALRVEEVQNVINAMQKILECPICLELIKEPVSTKCDHIFCKFCMLKLLNQKKGPSQCPLCKNDITKRSLQESTRFSQLVEELLKIICAFQLDTGLEYANSYNFAKKENNSPEHLKDEVSIIQSMGYRNACKESSLSSSG..."
score = predict_variant_effect(wt_seq, position=100, mutant_aa="Q")
print(f"Variant effect score: {score:.4f}")
print(f"Interpretation: {'Potentially pathogenic' if score < -2.0 else 'Likely benign'}")

# Production approach: use AlphaMissense predictions (Google DeepMind, pre-computed)
# Download: https://zenodo.org/record/8208688
# Contains pathogenicity scores for all 71M possible human missense variants
</syntaxhighlight>

; Genomic medicine AI tools
: '''Variant interpretation''' → AlphaMissense, CADD, REVEL, ClinPred, EVE
: '''Pharmacogenomics''' → GeneSight, Translational Drug Development, CPIC decision support
: '''Precision oncology''' → Foundation One CDx, MSK-IMPACT, Tempus xT + treatment matching AI
: '''Liquid biopsy''' → GRAIL Galleri, Exact Sciences Oncotype DX, CancerSEEK
: '''Rare disease''' → Emedgene (Illumina), Fabric Genomics, PhenoTips AI
</div>

<div style="background-color: #8B4500; color: #FFFFFF; padding: 20px; border-radius: 8px; margin-bottom: 15px;">