mirror of
https://github.com/james-m-jordan/morphik-core.git
synced 2025-05-09 19:32:38 +00:00
add honeycomb connection (#56)
This commit is contained in:
parent
8712bb49e0
commit
dc24a918a1
@ -90,6 +90,19 @@ class Settings(BaseSettings):
|
||||
|
||||
# Colpali configuration
|
||||
ENABLE_COLPALI: bool
|
||||
|
||||
# Telemetry configuration
|
||||
TELEMETRY_ENABLED: bool = True
|
||||
HONEYCOMB_ENABLED: bool = True
|
||||
HONEYCOMB_ENDPOINT: str = "https://api.honeycomb.io"
|
||||
HONEYCOMB_PROXY_ENDPOINT: str = "https://otel-proxy.onrender.com/"
|
||||
SERVICE_NAME: str = "databridge-core"
|
||||
OTLP_TIMEOUT: int = 10
|
||||
OTLP_MAX_RETRIES: int = 3
|
||||
OTLP_RETRY_DELAY: int = 1
|
||||
OTLP_MAX_EXPORT_BATCH_SIZE: int = 512
|
||||
OTLP_SCHEDULE_DELAY_MILLIS: int = 5000
|
||||
OTLP_MAX_QUEUE_SIZE: int = 2048
|
||||
|
||||
|
||||
@lru_cache()
|
||||
@ -286,6 +299,22 @@ def get_settings() -> Settings:
|
||||
"GRAPH_MODEL": config["graph"]["model_name"],
|
||||
}
|
||||
|
||||
# load telemetry config
|
||||
telemetry_config = {}
|
||||
if "telemetry" in config:
|
||||
telemetry_config = {
|
||||
"TELEMETRY_ENABLED": config["telemetry"].get("enabled", True),
|
||||
"HONEYCOMB_ENABLED": config["telemetry"].get("honeycomb_enabled", True),
|
||||
"HONEYCOMB_ENDPOINT": config["telemetry"].get("honeycomb_endpoint", "https://api.honeycomb.io"),
|
||||
"SERVICE_NAME": config["telemetry"].get("service_name", "databridge-core"),
|
||||
"OTLP_TIMEOUT": config["telemetry"].get("otlp_timeout", 10),
|
||||
"OTLP_MAX_RETRIES": config["telemetry"].get("otlp_max_retries", 3),
|
||||
"OTLP_RETRY_DELAY": config["telemetry"].get("otlp_retry_delay", 1),
|
||||
"OTLP_MAX_EXPORT_BATCH_SIZE": config["telemetry"].get("otlp_max_export_batch_size", 512),
|
||||
"OTLP_SCHEDULE_DELAY_MILLIS": config["telemetry"].get("otlp_schedule_delay_millis", 5000),
|
||||
"OTLP_MAX_QUEUE_SIZE": config["telemetry"].get("otlp_max_queue_size", 2048),
|
||||
}
|
||||
|
||||
settings_dict = dict(ChainMap(
|
||||
api_config,
|
||||
auth_config,
|
||||
@ -299,6 +328,7 @@ def get_settings() -> Settings:
|
||||
rules_config,
|
||||
databridge_config,
|
||||
graph_config,
|
||||
telemetry_config,
|
||||
))
|
||||
|
||||
return Settings(**settings_dict)
|
||||
|
@ -8,6 +8,11 @@ from contextlib import asynccontextmanager
|
||||
import os
|
||||
import json
|
||||
from pathlib import Path
|
||||
import uuid
|
||||
import hashlib
|
||||
import logging
|
||||
|
||||
from core.config import get_settings
|
||||
|
||||
from opentelemetry import trace, metrics
|
||||
from opentelemetry.sdk.trace import TracerProvider
|
||||
@ -21,8 +26,68 @@ from opentelemetry.sdk.metrics.export import (
|
||||
AggregationTemporality,
|
||||
MetricsData,
|
||||
)
|
||||
from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import OTLPSpanExporter
|
||||
from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import OTLPMetricExporter
|
||||
from opentelemetry.exporter.otlp.proto.http.trace_exporter import OTLPSpanExporter
|
||||
from opentelemetry.exporter.otlp.proto.http.metric_exporter import OTLPMetricExporter
|
||||
import requests
|
||||
from urllib3.exceptions import ProtocolError, ReadTimeoutError
|
||||
|
||||
# Get settings from config
|
||||
settings = get_settings()
|
||||
|
||||
# Telemetry configuration - use settings directly from TOML
|
||||
TELEMETRY_ENABLED = settings.TELEMETRY_ENABLED
|
||||
HONEYCOMB_ENABLED = settings.HONEYCOMB_ENABLED
|
||||
|
||||
# Honeycomb configuration - using proxy to avoid exposing API key in code
|
||||
# Default to localhost:8080 for the proxy, but allow override from settings
|
||||
HONEYCOMB_PROXY_ENDPOINT = getattr(settings, "HONEYCOMB_PROXY_ENDPOINT", "http://localhost:8080")
|
||||
SERVICE_NAME = settings.SERVICE_NAME
|
||||
|
||||
# Headers for OTLP - no API key needed as the proxy will add it
|
||||
OTLP_HEADERS = {
|
||||
"Content-Type": "application/x-protobuf"
|
||||
}
|
||||
|
||||
# Configure timeouts and retries directly from TOML config
|
||||
OTLP_TIMEOUT = settings.OTLP_TIMEOUT
|
||||
OTLP_MAX_RETRIES = settings.OTLP_MAX_RETRIES
|
||||
OTLP_RETRY_DELAY = settings.OTLP_RETRY_DELAY
|
||||
OTLP_MAX_EXPORT_BATCH_SIZE = settings.OTLP_MAX_EXPORT_BATCH_SIZE
|
||||
OTLP_SCHEDULE_DELAY_MILLIS = settings.OTLP_SCHEDULE_DELAY_MILLIS
|
||||
OTLP_MAX_QUEUE_SIZE = settings.OTLP_MAX_QUEUE_SIZE
|
||||
|
||||
# OTLP endpoints - using our proxy instead of direct Honeycomb connection
|
||||
OTLP_TRACES_ENDPOINT = f"{HONEYCOMB_PROXY_ENDPOINT}/v1/traces"
|
||||
OTLP_METRICS_ENDPOINT = f"{HONEYCOMB_PROXY_ENDPOINT}/v1/metrics"
|
||||
|
||||
# Enable debug logging for OpenTelemetry
|
||||
os.environ["OTEL_PYTHON_LOGGING_LEVEL"] = "INFO" # Changed from DEBUG to reduce verbosity
|
||||
# Add export protocol setting if not already set
|
||||
if not os.getenv("OTEL_EXPORTER_OTLP_PROTOCOL"):
|
||||
os.environ["OTEL_EXPORTER_OTLP_PROTOCOL"] = "http/protobuf"
|
||||
|
||||
def get_installation_id() -> str:
|
||||
"""Generate or retrieve a unique anonymous installation ID."""
|
||||
id_file = Path.home() / ".databridge" / "installation_id"
|
||||
id_file.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if id_file.exists():
|
||||
return id_file.read_text().strip()
|
||||
|
||||
# Generate a new installation ID
|
||||
# We hash the machine-id (if available) or a random UUID
|
||||
machine_id_file = Path("/etc/machine-id")
|
||||
if machine_id_file.exists():
|
||||
machine_id = machine_id_file.read_text().strip()
|
||||
else:
|
||||
machine_id = str(uuid.uuid4())
|
||||
|
||||
# Hash the machine ID to make it anonymous
|
||||
installation_id = hashlib.sha256(machine_id.encode()).hexdigest()[:32]
|
||||
|
||||
# Save it for future use
|
||||
id_file.write_text(installation_id)
|
||||
return installation_id
|
||||
|
||||
|
||||
class FileSpanExporter:
|
||||
@ -139,6 +204,127 @@ class FileMetricExporter(MetricExporter):
|
||||
return {}
|
||||
|
||||
|
||||
class RetryingOTLPMetricExporter(MetricExporter):
|
||||
"""A wrapper around OTLPMetricExporter that adds better retry logic."""
|
||||
|
||||
def __init__(self, endpoint, headers=None, timeout=10):
|
||||
self.exporter = OTLPMetricExporter(
|
||||
endpoint=endpoint,
|
||||
headers=headers,
|
||||
timeout=timeout
|
||||
)
|
||||
self.max_retries = OTLP_MAX_RETRIES
|
||||
self.retry_delay = OTLP_RETRY_DELAY
|
||||
self.logger = logging.getLogger(__name__)
|
||||
super().__init__()
|
||||
|
||||
def export(self, metrics_data, **kwargs):
|
||||
"""Export metrics with retry logic for handling connection issues."""
|
||||
retries = 0
|
||||
last_exception = None
|
||||
|
||||
while retries <= self.max_retries:
|
||||
try:
|
||||
return self.exporter.export(metrics_data, **kwargs)
|
||||
except (requests.exceptions.ConnectionError,
|
||||
requests.exceptions.Timeout,
|
||||
ProtocolError,
|
||||
ReadTimeoutError) as e:
|
||||
last_exception = e
|
||||
retries += 1
|
||||
|
||||
if retries <= self.max_retries:
|
||||
# Use exponential backoff
|
||||
delay = self.retry_delay * (2 ** (retries - 1))
|
||||
self.logger.warning(
|
||||
f"Honeycomb export attempt {retries} failed: {str(e)}. "
|
||||
f"Retrying in {delay}s..."
|
||||
)
|
||||
time.sleep(delay)
|
||||
else:
|
||||
self.logger.error(
|
||||
f"Failed to export to Honeycomb after {retries} attempts: {str(e)}"
|
||||
)
|
||||
except Exception as e:
|
||||
# For non-connection errors, don't retry
|
||||
self.logger.error(f"Unexpected error exporting to Honeycomb: {str(e)}")
|
||||
return False
|
||||
|
||||
# If we get here, all retries failed
|
||||
return False
|
||||
|
||||
def shutdown(self, timeout_millis=30000, **kwargs):
|
||||
"""Shutdown the exporter."""
|
||||
return self.exporter.shutdown(timeout_millis, **kwargs)
|
||||
|
||||
def force_flush(self, timeout_millis=10000):
|
||||
"""Force flush the exporter."""
|
||||
return self.exporter.force_flush(timeout_millis)
|
||||
|
||||
def _preferred_temporality(self):
|
||||
"""Returns the preferred temporality."""
|
||||
return self.exporter._preferred_temporality()
|
||||
|
||||
|
||||
class RetryingOTLPSpanExporter:
|
||||
"""A wrapper around OTLPSpanExporter that adds better retry logic."""
|
||||
|
||||
def __init__(self, endpoint, headers=None, timeout=10):
|
||||
self.exporter = OTLPSpanExporter(
|
||||
endpoint=endpoint,
|
||||
headers=headers,
|
||||
timeout=timeout
|
||||
)
|
||||
self.max_retries = OTLP_MAX_RETRIES
|
||||
self.retry_delay = OTLP_RETRY_DELAY
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
def export(self, spans):
|
||||
"""Export spans with retry logic for handling connection issues."""
|
||||
retries = 0
|
||||
|
||||
while retries <= self.max_retries:
|
||||
try:
|
||||
return self.exporter.export(spans)
|
||||
except (requests.exceptions.ConnectionError,
|
||||
requests.exceptions.Timeout,
|
||||
ProtocolError,
|
||||
ReadTimeoutError) as e:
|
||||
retries += 1
|
||||
|
||||
if retries <= self.max_retries:
|
||||
# Use exponential backoff
|
||||
delay = self.retry_delay * (2 ** (retries - 1))
|
||||
self.logger.warning(
|
||||
f"Honeycomb trace export attempt {retries} failed: {str(e)}. "
|
||||
f"Retrying in {delay}s..."
|
||||
)
|
||||
time.sleep(delay)
|
||||
else:
|
||||
self.logger.error(
|
||||
f"Failed to export traces to Honeycomb after {retries} attempts: {str(e)}"
|
||||
)
|
||||
except Exception as e:
|
||||
# For non-connection errors, don't retry
|
||||
self.logger.error(f"Unexpected error exporting traces to Honeycomb: {str(e)}")
|
||||
return False
|
||||
|
||||
# If we get here, all retries failed
|
||||
return False
|
||||
|
||||
def shutdown(self):
|
||||
"""Shutdown the exporter."""
|
||||
return self.exporter.shutdown()
|
||||
|
||||
def force_flush(self):
|
||||
"""Force flush the exporter."""
|
||||
try:
|
||||
return self.exporter.force_flush()
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error during trace force_flush: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
@dataclass
|
||||
class UsageRecord:
|
||||
timestamp: datetime
|
||||
@ -163,40 +349,87 @@ class TelemetryService:
|
||||
return cls._instance
|
||||
|
||||
def _initialize(self):
|
||||
if not TELEMETRY_ENABLED:
|
||||
return
|
||||
|
||||
self._usage_records: List[UsageRecord] = []
|
||||
self._user_totals = defaultdict(lambda: defaultdict(int))
|
||||
self._lock = threading.Lock()
|
||||
self._installation_id = get_installation_id()
|
||||
|
||||
# Initialize OpenTelemetry
|
||||
resource = Resource.create({"service.name": "databridge-core"})
|
||||
# Initialize OpenTelemetry with more detailed resource attributes
|
||||
resource = Resource.create({
|
||||
"service.name": SERVICE_NAME,
|
||||
"service.version": os.getenv("DATABRIDGE_VERSION", "unknown"),
|
||||
"installation.id": self._installation_id,
|
||||
"environment": os.getenv("ENVIRONMENT", "production"),
|
||||
"telemetry.sdk.name": "opentelemetry",
|
||||
"telemetry.sdk.language": "python",
|
||||
"telemetry.sdk.version": "1.0.0"
|
||||
})
|
||||
|
||||
# Create logs directory
|
||||
# Initialize tracing with both file and OTLP exporters
|
||||
tracer_provider = TracerProvider(resource=resource)
|
||||
|
||||
# Always use both exporters
|
||||
log_dir = Path("logs/telemetry")
|
||||
log_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# Add file exporter for local logging
|
||||
file_span_processor = BatchSpanProcessor(FileSpanExporter(str(log_dir)))
|
||||
tracer_provider.add_span_processor(file_span_processor)
|
||||
|
||||
# Add Honeycomb OTLP exporter with retry logic
|
||||
if HONEYCOMB_ENABLED:
|
||||
# Create BatchSpanProcessor with improved configuration
|
||||
otlp_span_processor = BatchSpanProcessor(
|
||||
RetryingOTLPSpanExporter(
|
||||
endpoint=OTLP_TRACES_ENDPOINT,
|
||||
headers=OTLP_HEADERS,
|
||||
timeout=OTLP_TIMEOUT,
|
||||
),
|
||||
# Configure batch processing settings
|
||||
max_queue_size=OTLP_MAX_QUEUE_SIZE,
|
||||
max_export_batch_size=OTLP_MAX_EXPORT_BATCH_SIZE,
|
||||
schedule_delay_millis=OTLP_SCHEDULE_DELAY_MILLIS,
|
||||
)
|
||||
tracer_provider.add_span_processor(otlp_span_processor)
|
||||
|
||||
# Initialize tracing
|
||||
tracer_provider = TracerProvider(resource=resource)
|
||||
|
||||
# Use file exporter for local development
|
||||
if os.getenv("ENVIRONMENT", "development") == "development":
|
||||
span_processor = BatchSpanProcessor(FileSpanExporter(str(log_dir)))
|
||||
else:
|
||||
span_processor = BatchSpanProcessor(OTLPSpanExporter())
|
||||
|
||||
tracer_provider.add_span_processor(span_processor)
|
||||
trace.set_tracer_provider(tracer_provider)
|
||||
self.tracer = trace.get_tracer(__name__)
|
||||
|
||||
# Initialize metrics
|
||||
if os.getenv("ENVIRONMENT", "development") == "development":
|
||||
metric_reader = PeriodicExportingMetricReader(
|
||||
# Initialize metrics with both exporters
|
||||
metric_readers = [
|
||||
# Local file metrics reader
|
||||
PeriodicExportingMetricReader(
|
||||
FileMetricExporter(str(log_dir)),
|
||||
export_interval_millis=60000, # Export every minute
|
||||
)
|
||||
else:
|
||||
metric_reader = PeriodicExportingMetricReader(OTLPMetricExporter())
|
||||
),
|
||||
]
|
||||
|
||||
meter_provider = MeterProvider(resource=resource, metric_readers=[metric_reader])
|
||||
# Add Honeycomb metrics reader if API key is available
|
||||
if HONEYCOMB_ENABLED:
|
||||
try:
|
||||
# Configure the OTLP metric exporter with improved error handling
|
||||
otlp_metric_exporter = RetryingOTLPMetricExporter(
|
||||
endpoint=OTLP_METRICS_ENDPOINT,
|
||||
headers=OTLP_HEADERS,
|
||||
timeout=OTLP_TIMEOUT,
|
||||
)
|
||||
|
||||
# Configure the metrics reader with improved settings
|
||||
metric_readers.append(
|
||||
PeriodicExportingMetricReader(
|
||||
otlp_metric_exporter,
|
||||
export_interval_millis=OTLP_SCHEDULE_DELAY_MILLIS,
|
||||
export_timeout_millis=OTLP_TIMEOUT * 1000,
|
||||
)
|
||||
)
|
||||
print(f"Successfully configured Honeycomb metrics exporter to {OTLP_METRICS_ENDPOINT}")
|
||||
except Exception as e:
|
||||
print(f"Failed to configure Honeycomb metrics exporter: {str(e)}")
|
||||
|
||||
meter_provider = MeterProvider(resource=resource, metric_readers=metric_readers)
|
||||
metrics.set_meter_provider(meter_provider)
|
||||
self.meter = metrics.get_meter(__name__)
|
||||
|
||||
@ -224,18 +457,34 @@ class TelemetryService:
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
):
|
||||
"""
|
||||
Context manager for tracking operations with both usage metrics and OpenTelemetry
|
||||
Context manager for tracking operations with both usage metrics and OpenTelemetry.
|
||||
The user_id is hashed to ensure anonymity.
|
||||
"""
|
||||
if not TELEMETRY_ENABLED:
|
||||
yield None
|
||||
return
|
||||
|
||||
start_time = time.time()
|
||||
status = "success"
|
||||
current_span = trace.get_current_span()
|
||||
|
||||
# Hash the user ID for anonymity
|
||||
hashed_user_id = hashlib.sha256(user_id.encode()).hexdigest()[:16]
|
||||
|
||||
try:
|
||||
# Add operation attributes to the current span
|
||||
current_span.set_attribute("operation.type", operation_type)
|
||||
current_span.set_attribute("user.id", user_id)
|
||||
current_span.set_attribute("user.id", hashed_user_id)
|
||||
if metadata:
|
||||
for key, value in metadata.items():
|
||||
# Create a copy of metadata to avoid modifying the original
|
||||
metadata_copy = metadata.copy()
|
||||
|
||||
# Remove the nested 'metadata' field completely if it exists
|
||||
if 'metadata' in metadata_copy:
|
||||
del metadata_copy['metadata']
|
||||
|
||||
# Set attributes for all remaining metadata fields
|
||||
for key, value in metadata_copy.items():
|
||||
current_span.set_attribute(f"metadata.{key}", str(value))
|
||||
|
||||
yield current_span
|
||||
@ -249,30 +498,47 @@ class TelemetryService:
|
||||
duration = (time.time() - start_time) * 1000 # Convert to milliseconds
|
||||
|
||||
# Record metrics
|
||||
self.operation_counter.add(1, {"operation": operation_type, "status": status})
|
||||
attributes = {
|
||||
"operation": operation_type,
|
||||
"status": status,
|
||||
"installation_id": self._installation_id
|
||||
}
|
||||
self.operation_counter.add(1, attributes)
|
||||
if tokens_used > 0:
|
||||
self.token_counter.add(tokens_used, {"operation": operation_type})
|
||||
self.operation_duration.record(duration, {"operation": operation_type})
|
||||
self.token_counter.add(tokens_used, attributes)
|
||||
self.operation_duration.record(duration, attributes)
|
||||
|
||||
# Record usage
|
||||
# Create a sanitized copy of metadata for the usage record
|
||||
sanitized_metadata = None
|
||||
if metadata:
|
||||
sanitized_metadata = metadata.copy()
|
||||
# Remove the nested 'metadata' field completely if it exists
|
||||
if 'metadata' in sanitized_metadata:
|
||||
del sanitized_metadata['metadata']
|
||||
|
||||
record = UsageRecord(
|
||||
timestamp=datetime.now(),
|
||||
operation_type=operation_type,
|
||||
tokens_used=tokens_used,
|
||||
user_id=user_id,
|
||||
user_id=hashed_user_id,
|
||||
duration_ms=duration,
|
||||
status=status,
|
||||
metadata=metadata,
|
||||
metadata=sanitized_metadata,
|
||||
)
|
||||
|
||||
with self._lock:
|
||||
self._usage_records.append(record)
|
||||
self._user_totals[user_id][operation_type] += tokens_used
|
||||
self._user_totals[hashed_user_id][operation_type] += tokens_used
|
||||
|
||||
def get_user_usage(self, user_id: str) -> Dict[str, int]:
|
||||
"""Get usage statistics for a user."""
|
||||
if not TELEMETRY_ENABLED:
|
||||
return {}
|
||||
|
||||
hashed_user_id = hashlib.sha256(user_id.encode()).hexdigest()[:16]
|
||||
with self._lock:
|
||||
return dict(self._user_totals[user_id])
|
||||
return dict(self._user_totals[hashed_user_id])
|
||||
|
||||
def get_recent_usage(
|
||||
self,
|
||||
@ -282,12 +548,16 @@ class TelemetryService:
|
||||
status: Optional[str] = None,
|
||||
) -> List[UsageRecord]:
|
||||
"""Get recent usage records with optional filtering."""
|
||||
if not TELEMETRY_ENABLED:
|
||||
return []
|
||||
|
||||
with self._lock:
|
||||
records = self._usage_records.copy()
|
||||
|
||||
# Apply filters
|
||||
if user_id:
|
||||
records = [r for r in records if r.user_id == user_id]
|
||||
hashed_user_id = hashlib.sha256(user_id.encode()).hexdigest()[:16]
|
||||
records = [r for r in records if r.user_id == hashed_user_id]
|
||||
if operation_type:
|
||||
records = [r for r in records if r.operation_type == operation_type]
|
||||
if since:
|
||||
|
73
core/tests/unit/test_telemetry_proxy.py
Normal file
73
core/tests/unit/test_telemetry_proxy.py
Normal file
@ -0,0 +1,73 @@
|
||||
#!/usr/bin/env python
|
||||
"""
|
||||
Test script to verify that telemetry data is being properly sent through the proxy.
|
||||
This script will generate a test span and metric and send it to Honeycomb via the proxy.
|
||||
"""
|
||||
|
||||
import time
|
||||
import logging
|
||||
import uuid
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s")
|
||||
logger = logging.getLogger("telemetry-test")
|
||||
|
||||
# Import the telemetry service
|
||||
from core.services.telemetry import TelemetryService
|
||||
from core.config import get_settings
|
||||
|
||||
async def run_test():
|
||||
"""Run a telemetry test to verify proxy functionality."""
|
||||
settings = get_settings()
|
||||
|
||||
# Log the current configuration
|
||||
logger.info(f"Telemetry enabled: {settings.TELEMETRY_ENABLED}")
|
||||
logger.info(f"Honeycomb enabled: {settings.HONEYCOMB_ENABLED}")
|
||||
logger.info(f"Honeycomb proxy endpoint: {settings.HONEYCOMB_PROXY_ENDPOINT}")
|
||||
|
||||
# Get the telemetry service
|
||||
telemetry_service = TelemetryService()
|
||||
|
||||
# Generate a unique user ID for testing
|
||||
test_user_id = f"test-user-{uuid.uuid4()}"
|
||||
|
||||
# Track a test operation
|
||||
logger.info(f"Tracking test operation for user {test_user_id}")
|
||||
|
||||
# Use the telemetry service to track an operation (with async context manager)
|
||||
async with telemetry_service.track_operation(
|
||||
operation_type="test_proxy",
|
||||
user_id=test_user_id,
|
||||
tokens_used=100,
|
||||
metadata={
|
||||
"test": True,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"proxy_test": "Honeycomb proxy test"
|
||||
}
|
||||
) as span:
|
||||
# Simulate some work
|
||||
logger.info("Performing test operation...")
|
||||
await asyncio.sleep(2)
|
||||
|
||||
# Add some attributes to the span
|
||||
span.set_attribute("test.proxy", True)
|
||||
span.set_attribute("test.timestamp", time.time())
|
||||
|
||||
# Log a message
|
||||
logger.info("Test operation completed successfully")
|
||||
|
||||
# Wait a moment for the telemetry data to be sent
|
||||
logger.info("Waiting for telemetry data to be sent...")
|
||||
await asyncio.sleep(5)
|
||||
|
||||
logger.info("Test completed. Check Honeycomb for the telemetry data.")
|
||||
logger.info(f"Look for operation_type='test_proxy' and user_id='{test_user_id}'")
|
||||
|
||||
def main():
|
||||
"""Run the async test function."""
|
||||
asyncio.run(run_test())
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
@ -108,3 +108,16 @@ model_name = "llama3.2"
|
||||
# [graph]
|
||||
# provider = "openai"
|
||||
# model_name = "gpt-4o-mini"
|
||||
|
||||
[telemetry]
|
||||
enabled = true
|
||||
honeycomb_enabled = true
|
||||
honeycomb_endpoint = "https://api.honeycomb.io"
|
||||
honeycomb_proxy_endpoint = "https://otel-proxy.onrender.com"
|
||||
service_name = "databridge-core"
|
||||
otlp_timeout = 10
|
||||
otlp_max_retries = 3
|
||||
otlp_retry_delay = 1
|
||||
otlp_max_export_batch_size = 512
|
||||
otlp_schedule_delay_millis = 5000
|
||||
otlp_max_queue_size = 2048
|
||||
|
71
docs/telemetry.md
Normal file
71
docs/telemetry.md
Normal file
@ -0,0 +1,71 @@
|
||||
# DataBridge Telemetry
|
||||
|
||||
DataBridge includes an anonymous telemetry system to help us understand how the library is being used and to improve its functionality. We take privacy very seriously and ensure that no personally identifiable information (PII) is ever collected.
|
||||
|
||||
## What We Collect
|
||||
|
||||
The following anonymous data is collected:
|
||||
|
||||
- Installation ID (a randomly generated identifier, hashed from machine ID)
|
||||
- Operation types (e.g., document ingestion, queries, retrievals)
|
||||
- Operation durations
|
||||
- Token usage statistics
|
||||
- Error rates and types
|
||||
- Basic metadata about operations (excluding any PII)
|
||||
|
||||
We explicitly DO NOT collect:
|
||||
|
||||
- User identifiers (all user IDs are hashed)
|
||||
- File contents or queries
|
||||
- API keys or credentials
|
||||
- Personal information
|
||||
- IP addresses or location data
|
||||
- Any metadata fields containing sensitive information
|
||||
|
||||
## How to Opt Out
|
||||
|
||||
Telemetry is enabled by default but can be disabled by setting the environment variable:
|
||||
|
||||
```bash
|
||||
export DATABRIDGE_TELEMETRY_ENABLED=0
|
||||
```
|
||||
|
||||
Or in your Python code:
|
||||
|
||||
```python
|
||||
import os
|
||||
os.environ["DATABRIDGE_TELEMETRY_ENABLED"] = "0"
|
||||
```
|
||||
|
||||
## Data Storage and Retention
|
||||
|
||||
All telemetry data is:
|
||||
- Stored securely
|
||||
- Automatically anonymized before transmission
|
||||
- Used only for improving DataBridge
|
||||
- Never shared with third parties
|
||||
- Retained for a maximum of 90 days
|
||||
|
||||
## Technical Details
|
||||
|
||||
The telemetry system uses OpenTelemetry to collect metrics and traces. In development mode, data is stored locally in `logs/telemetry/`. In production, data is sent to our secure collector endpoint.
|
||||
|
||||
You can inspect the telemetry data being collected by looking at the local log files in development mode:
|
||||
- `logs/telemetry/traces.log`
|
||||
- `logs/telemetry/metrics.log`
|
||||
|
||||
## Why We Collect Telemetry
|
||||
|
||||
This data helps us:
|
||||
1. Understand how DataBridge is used in real-world scenarios
|
||||
2. Identify performance bottlenecks
|
||||
3. Prioritize features and improvements
|
||||
4. Fix bugs faster
|
||||
5. Make data-driven decisions about the project's direction
|
||||
|
||||
## Questions or Concerns
|
||||
|
||||
If you have any questions or concerns about our telemetry collection, please:
|
||||
1. Open an issue on our GitHub repository
|
||||
2. Email us at privacy@databridge.dev
|
||||
3. Review our telemetry implementation in `core/services/telemetry.py`
|
Loading…
x
Reference in New Issue
Block a user