Editing Active Learning (section)

== <span style="color: #FFFFFF;">Applying</span> ==
'''Active learning loop for image classification:'''
<syntaxhighlight lang="python">
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Subset
from torchvision import models

class ActiveLearner:
    def __init__(self, model, unlabeled_pool, labeled_indices, device='cuda'):
        self.model = model.to(device)
        self.unlabeled_pool = unlabeled_pool
        self.labeled_indices = list(labeled_indices)
        self.unlabeled_indices = [i for i in range(len(unlabeled_pool))
                                  if i not in set(labeled_indices)]
        self.device = device

    def train(self, labels_dict, epochs=5):
        """Train on currently labeled data."""
        labeled_ds = Subset(self.unlabeled_pool, self.labeled_indices)
        loader = DataLoader(labeled_ds, batch_size=32, shuffle=True)
        opt = torch.optim.Adam(self.model.parameters(), lr=1e-4)
        self.model.train()
        for _ in range(epochs):
            for X, _ in loader:
                y = torch.tensor([labels_dict[i] for i in self.labeled_indices
                                  if i < len(X)]).to(self.device)
                X = X.to(self.device)
                loss = nn.CrossEntropyLoss()(self.model(X), y)
                opt.zero_grad(); loss.backward(); opt.step()

    def query(self, n_query=10, strategy='uncertainty') -> list:
        """Select n_query examples to label next."""
        unlabeled_ds = Subset(self.unlabeled_pool, self.unlabeled_indices)
        loader = DataLoader(unlabeled_ds, batch_size=64)
        self.model.eval()
        all_probs = []
        with torch.no_grad():
            for X, _ in loader:
                probs = torch.softmax(self.model(X.to(self.device)), dim=1).cpu()
                all_probs.append(probs)
        all_probs = torch.cat(all_probs)

        if strategy == 'uncertainty':
            # Least confidence: highest entropy
            entropy = -(all_probs * all_probs.log().clamp(-100, 0)).sum(1)
            query_local_idx = entropy.topk(n_query).indices.tolist()
        elif strategy == 'margin':
            # Smallest gap between top-2 predictions
            top2 = all_probs.topk(2, dim=1).values
            margin = top2[:, 0] - top2[:, 1]
            query_local_idx = margin.topk(n_query, largest=False).indices.tolist()

        return [self.unlabeled_indices[i] for i in query_local_idx]

    def label_and_add(self, indices, labels_dict):
        """Add newly labeled examples to labeled set."""
        self.labeled_indices.extend(indices)
        self.unlabeled_indices = [i for i in self.unlabeled_indices if i not in set(indices)]
</syntaxhighlight>

; Active learning strategy guide
: '''Small budget (<100 labels)''' → Core-set (diversity), random seeding first 10
: '''Medium budget''' → BADGE (diverse + informative), uncertainty sampling
: '''Large unlabeled pool''' → Uncertainty sampling (fast); approximate with embeddings
: '''Batch selection''' → BADGE, k-means++ on uncertain examples
: '''NLP tasks''' → Uncertainty + semantic diversity filtering; avoid near-duplicate queries
: '''Medical/scientific''' → Core-set + expert-in-the-loop revision cycles
</div>

<div style="background-color: #8B4500; color: #FFFFFF; padding: 20px; border-radius: 8px; margin-bottom: 15px;">