Editing Cooperative AI (section)

== <span style="color: #FFFFFF;">Applying</span> ==
'''Cooperative agent with communication in multi-agent setting:'''
<syntaxhighlight lang="python">
import torch
import torch.nn as nn

class CommunicatingAgent(nn.Module):
    """Agent that can send and receive messages for cooperation."""
    def __init__(self, obs_dim, n_actions, msg_dim=16, hidden=128):
        super().__init__()
        self.obs_encoder = nn.Sequential(
            nn.Linear(obs_dim, hidden), nn.ReLU()
        )
        # Message encoder: what to communicate to teammates
        self.msg_encoder = nn.Sequential(
            nn.Linear(hidden, msg_dim), nn.Tanh()
        )
        # Policy: uses own obs + received messages from teammates
        self.policy = nn.Sequential(
            nn.Linear(hidden + msg_dim, hidden), nn.ReLU(),
            nn.Linear(hidden, n_actions)
        )

    def encode_message(self, obs: torch.Tensor) -> torch.Tensor:
        """Generate message to share with teammates."""
        return self.msg_encoder(self.obs_encoder(obs))

    def act(self, obs: torch.Tensor, received_msgs: torch.Tensor) -> torch.Tensor:
        """Choose action given observation and teammates' messages."""
        obs_feat = self.obs_encoder(obs)
        # Aggregate messages from all teammates
        agg_msg = received_msgs.mean(dim=0)  # Simple mean aggregation
        combined = torch.cat([obs_feat, agg_msg], dim=-1)
        return self.policy(combined)

# Cooperative training loop
def cooperative_episode(agents, env):
    obs_list = env.reset()
    total_reward = 0
    for step in range(env.max_steps):
        # Communication round: each agent encodes message
        messages = [agent.encode_message(obs) for agent, obs in zip(agents, obs_list)]
        msg_tensor = torch.stack(messages)

        # Action round: each agent acts using own obs + others' messages
        actions = []
        for i, (agent, obs) in enumerate(zip(agents, obs_list)):
            others_msgs = torch.cat([msg_tensor[:i], msg_tensor[i+1:]])
            action = agent.act(obs, others_msgs)
            actions.append(action.argmax())

        obs_list, team_reward, done, _ = env.step(actions)
        total_reward += team_reward
        if done: break
    return total_reward
</syntaxhighlight>

; Cooperative AI research areas
: '''Hanabi benchmark''' → Human-AI cooperation; theory of mind; zero-shot coordination
: '''Traffic coordination''' → Autonomous vehicles cooperating at intersections
: '''AI teammates''' → AI pair programming; AI surgical assistants; AI co-pilots
: '''Mechanism design''' → Auction design, voting systems, market mechanisms with AI agents
: '''Corrigibility research''' → MIRI, ARC (Alignment Research Center), Anthropic safety team
</div>

<div style="background-color: #8B4500; color: #FFFFFF; padding: 20px; border-radius: 8px; margin-bottom: 15px;">