Data Access

How to access colony data for analysis and research.

Available Datasets

Dataset	Records	Access	Description
Actors	106+	TypeDB	All colony actors (ants, queen, humans)
Edges	1000s	TypeDB	Graph edges with pheromone levels
Distinguished Points	22,690+	TypeDB + Gateway	Hunt BTC collision candidates
Traversal Events	100K+	TypeDB	Historical agent movements
Pheromone Snapshots	Daily	TypeDB	Edge states over time
Colony Metrics	Hourly	TypeDB	Aggregate health indicators

Quick Access Methods

1. Python Client (Recommended)

from ants.knowledge import TypeDBClient
from ants.knowledge.analytics import ColonyAnalytics

async def get_data():
    client = TypeDBClient()
    await client.connect()

    # Use analytics helper
    analytics = ColonyAnalytics(client)
    metrics = await analytics.get_metrics()

    print(f"Total actors: {metrics.total_actors}")
    print(f"Total edges: {metrics.total_edges}")
    print(f"Superhighways: {metrics.superhighway_count}")

    await client.disconnect()

2. Raw TypeQL Queries

from ants.knowledge import TypeDBClient

async def query_data():
    client = TypeDBClient()
    await client.connect()

    # Get all edges with pheromone
    edges = await client.query("""
        match $e isa edge,
            has id $id,
            has pheromone_level $p;
        select $id, $p;
    """)

    for edge in edges:
        print(f"{edge['id']}: {edge['p']}")

    await client.disconnect()

3. Export to Files

from ants.knowledge.analytics import ColonyAnalytics
import pandas as pd

async def export_data():
    client = TypeDBClient()
    await client.connect()

    analytics = ColonyAnalytics(client)

    # Export to JSON
    await analytics.export_json("colony_metrics.json")

    # Or get as DataFrame
    edges_df = await analytics.get_edges_dataframe()
    edges_df.to_csv("edges.csv", index=False)

    await client.disconnect()

Detailed Data Access

Actors

# All actors
match $a isa actor, has id $id, has role $r;
select $id, $r;

# AI agents only
match $a isa ai-agent,
    has id $id,
    has agent_subtype $subtype;
select $id, $subtype;

# By caste (for ants)
match $a isa ant, has caste "scout", has id $id;
select $id;

Edges & Pheromone

# All edges
match $e isa edge,
    has id $id,
    has pheromone_level $p,
    has base_weight $w;
select $id, $p, $w;

# Superhighways only
match $e isa edge, has pheromone_level $p, has id $id;
$p > 20.0;
select $id, $p;

# Edges from specific node
match
    $start isa concept, has id "btc-perp";
    $e (source: $start, target: $end) isa edge,
        has pheromone_level $p;
    $end has id $end_id;
select $end_id, $p;

Distinguished Points (Hunt BTC)

# All DPs
match $dp isa distinguished_point,
    has hash $h,
    has worker_type $type,
    has distance $d;
select $h, $type, $d;

# Tame only
match $dp isa distinguished_point,
    has worker_type "tame",
    has hash $h;
select $h;

# Check for collisions
match
    $t isa distinguished_point, has hash $h, has worker_type "tame";
    $w isa distinguished_point, has hash $h, has worker_type "wild";
select $h;

Events

# Recent traversals
match $t isa traversal,
    has timestamp $ts,
    has actor_id $actor,
    has source_id $src,
    has target_id $tgt;
sort $ts desc;
limit 100;
select $ts, $actor, $src, $tgt;

# Discoveries
match $d isa discovery,
    has timestamp $ts,
    has actor_id $actor,
    has value $v;
select $ts, $actor, $v;

Data Analysis Patterns

Pheromone Distribution

import pandas as pd
import matplotlib.pyplot as plt

async def analyze_pheromone():
    edges = await client.query("""
        match $e isa edge, has pheromone_level $p;
        select $p;
    """)

    df = pd.DataFrame(edges)

    # Distribution
    df['p'].hist(bins=50)
    plt.xlabel('Pheromone Level')
    plt.ylabel('Count')
    plt.title('Pheromone Distribution')
    plt.yscale('log')
    plt.show()

    # Statistics
    print(f"Mean: {df['p'].mean():.2f}")
    print(f"Median: {df['p'].median():.2f}")
    print(f"Max: {df['p'].max():.2f}")
    print(f"Superhighways: {(df['p'] > 20).sum()}")

Activity Over Time

async def analyze_activity():
    events = await client.query("""
        match $t isa traversal, has timestamp $ts;
        select $ts;
    """)

    df = pd.DataFrame(events)
    df['ts'] = pd.to_datetime(df['ts'])
    df['hour'] = df['ts'].dt.hour

    # Hourly activity
    df.groupby('hour').size().plot(kind='bar')
    plt.xlabel('Hour of Day')
    plt.ylabel('Traversals')
    plt.title('Colony Activity by Hour')
    plt.show()

Network Visualization

import networkx as nx

async def visualize_network():
    edges = await client.query("""
        match $e (source: $s, target: $t) isa edge,
            has pheromone_level $p;
        $s has id $src;
        $t has id $tgt;
        select $src, $tgt, $p;
    """)

    G = nx.DiGraph()
    for e in edges:
        G.add_edge(e['src'], e['tgt'], weight=e['p'])

    # Draw with pheromone as edge width
    pos = nx.spring_layout(G)
    weights = [G[u][v]['weight'] / 10 for u, v in G.edges()]
    nx.draw(G, pos, width=weights, with_labels=True)
    plt.show()

Data Access Best Practices

1. Use Pagination

match $e isa edge, has id $id;
offset 0; limit 1000;
select $id;

2. Filter Early

# Good: filter in query
match $e isa edge, has pheromone_level $p;
$p > 5.0;
select $p;

# Bad: filter in Python
# (fetches all edges first)

3. Cache Results

import json
from pathlib import Path

CACHE_FILE = Path("edge_cache.json")

async def get_edges_cached():
    if CACHE_FILE.exists():
        return json.loads(CACHE_FILE.read_text())

    edges = await client.query("...")
    CACHE_FILE.write_text(json.dumps(edges))
    return edges

4. Use Aggregations

# Good: aggregate in TypeDB
match $e isa edge, has pheromone_level $p;
reduce $total = sum($p), $count = count;

# Bad: aggregate in Python
# (fetches all rows first)

Jupyter Notebook Templates

We provide starter notebooks in examples/:

examples/colony_overview.ipynb - Basic exploration
examples/pheromone_analysis.ipynb - Pheromone deep dive
examples/network_visualization.ipynb - Graph vis
examples/hunt_btc_progress.ipynb - Mission metrics

pip install jupyter
jupyter notebook examples/

Data Access

Data Access

Available Datasets

Quick Access Methods

1. Python Client (Recommended)

2. Raw TypeQL Queries

3. Export to Files

Detailed Data Access

Actors

Edges & Pheromone

Distinguished Points (Hunt BTC)

Events

Data Analysis Patterns

Pheromone Distribution

Activity Over Time

Network Visualization

Data Access Best Practices

2. Filter Early

3. Cache Results

4. Use Aggregations

Jupyter Notebook Templates

Join the Swarm Intelligence

Ants at Work