bug fixes

2025-05-09 19:32:38 +00:00 · 2025-03-26 17:07:38 -07:00 · 2025-03-26 17:07:38 -07:00 · f0c44cb8ea
commit f0c44cb8ea
parent 9dcb7b6b1d
3 changed files with 16 additions and 189 deletions
--- a/core/config.py
+++ b/core/config.py
@ -237,7 +237,7 @@ def get_settings() -> Settings:
        )

    # load storage config
-    storage_config = {"STORAGE_PROVIDER": config["storage"]["provider"]}
+    storage_config = {"STORAGE_PROVIDER": config["storage"]["provider"], "STORAGE_PATH": config["storage"]["storage_path"]}
    match storage_config["STORAGE_PROVIDER"]:
        case "local":
            storage_config.update({"STORAGE_PATH": config["storage"]["storage_path"]})
--- a/core/services/telemetry.py
+++ b/core/services/telemetry.py
@ -40,7 +40,8 @@ HONEYCOMB_ENABLED = settings.HONEYCOMB_ENABLED

 # Honeycomb configuration - using proxy to avoid exposing API key in code
 # Default to localhost:8080 for the proxy, but allow override from settings
-HONEYCOMB_PROXY_ENDPOINT = getattr(settings, "HONEYCOMB_PROXY_ENDPOINT", "http://localhost:8080")
+HONEYCOMB_PROXY_ENDPOINT = getattr(settings, "HONEYCOMB_PROXY_ENDPOINT", "https://otel-proxy.onrender.com")
+HONEYCOMB_PROXY_ENDPOINT = HONEYCOMB_PROXY_ENDPOINT if isinstance(HONEYCOMB_PROXY_ENDPOINT, str) and len(HONEYCOMB_PROXY_ENDPOINT) > 0 else "https://otel-proxy.onrender.com"
 SERVICE_NAME = settings.SERVICE_NAME

 # Headers for OTLP - no API key needed as the proxy will add it
@ -236,18 +237,18 @@ class RetryingOTLPMetricExporter(MetricExporter):
                if retries <= self.max_retries:
                    # Use exponential backoff
                    delay = self.retry_delay * (2 ** (retries - 1))
-                    self.logger.warning(
-                        f"Honeycomb export attempt {retries} failed: {str(e)}. "
-                        f"Retrying in {delay}s..."
-                    )
+                    # self.logger.warning(
+                    #     f"Honeycomb export attempt {retries} failed: {str(e)}. "
+                    #     f"Retrying in {delay}s..."
+                    # )
                    time.sleep(delay)
-                else:
-                    self.logger.error(
-                        f"Failed to export to Honeycomb after {retries} attempts: {str(e)}"
-                    )
+                # else:
+                    # self.logger.error(
+                    #     f"Failed to export to Honeycomb after {retries} attempts: {str(e)}"
+                    # )
            except Exception as e:
                # For non-connection errors, don't retry
-                self.logger.error(f"Unexpected error exporting to Honeycomb: {str(e)}")
+                # self.logger.error(f"Unexpected error exporting to Honeycomb: {str(e)}")
                return False
                
        # If we get here, all retries failed
--- a/quick_setup.py
+++ b/quick_setup.py
@ -74,7 +74,7 @@ def create_s3_bucket(bucket_name, region=DEFAULT_REGION):

    aws_access_key = os.getenv("AWS_ACCESS_KEY")
    aws_secret_key = os.getenv("AWS_SECRET_ACCESS_KEY")
-    region = os.getenv("AWS_REGION") if os.getenv("AWS_REGION") else region
+    region = os.getenv("AWS_REGION") or region

    if not aws_access_key or not aws_secret_key:
        LOGGER.error("AWS credentials not found in environment variables.")
@ -112,9 +112,9 @@ def bucket_exists(s3_client, bucket_name):
        return True
    except botocore.exceptions.ClientError as e:
        error_code = int(e.response["Error"]["Code"])
-        if error_code == 404:
+        if error_code in [404, 403]:
            return False
-        raise
+        # raise e


 def setup_mongodb():
@ -180,178 +180,6 @@ def setup_mongodb():
        LOGGER.info("MongoDB connection closed.")


-def setup_postgres():
-    """
-    Set up PostgreSQL database and tables with proper indexes, including initializing a
-    separate multi-vector embeddings table and its associated similarity function without
-    interfering with the existing vector embeddings table.
-    """
-    import asyncio
-
-    from sqlalchemy import text
-    from sqlalchemy.ext.asyncio import create_async_engine
-
-    # Load PostgreSQL URI from .env file
-    postgres_uri = os.getenv("POSTGRES_URI")
-    if not postgres_uri:
-        raise ValueError("POSTGRES_URI not found in .env file.")
-
-    # Check if pgvector is installed when on macOS
-    if platform.system() == "Darwin":
-        try:
-            # Check if postgresql is installed via homebrew
-            result = subprocess.run(
-                ["brew", "list", "postgresql@14"], capture_output=True, text=True
-            )
-            if result.returncode != 0:
-                LOGGER.error(
-                    "PostgreSQL not found. Please install it with: brew install postgresql@14"
-                )
-                raise RuntimeError("PostgreSQL not installed")
-
-            # Check if pgvector is installed
-            result = subprocess.run(["brew", "list", "pgvector"], capture_output=True, text=True)
-            if result.returncode != 0:
-                LOGGER.error(
-                    "\nError: pgvector extension not found. Please install it with:\n"
-                    "brew install pgvector\n"
-                    "brew services stop postgresql@14\n"
-                    "brew services start postgresql@14\n"
-                )
-                raise RuntimeError("pgvector not installed")
-        except FileNotFoundError:
-            LOGGER.error("Homebrew not found. Please install it from https://brew.sh")
-            raise
-
-    async def _setup_postgres():
-        try:
-            # Create async engine
-            engine = create_async_engine(postgres_uri)
-
-            async with engine.begin() as conn:
-                try:
-                    # Enable pgvector extension
-                    await conn.execute(text("CREATE EXTENSION IF NOT EXISTS vector"))
-                    LOGGER.info("Enabled pgvector extension")
-                except Exception as e:
-                    if "could not open extension control file" in str(e):
-                        LOGGER.error(
-                            "\nError: pgvector extension not found. Please install it:\n"
-                            "- On macOS: brew install pgvector\n"
-                            "- On Ubuntu: sudo apt install postgresql-14-pgvector\n"
-                            "- On other systems: check https://github.com/pgvector/pgvector#installation\n"
-                        )
-                    raise
-
-                # Import and create all base tables
-                from core.database.postgres_database import Base
-
-                # Create regular tables first
-                await conn.run_sync(Base.metadata.create_all)
-                LOGGER.info("Created base PostgreSQL tables")
-
-                # Create caches table
-                create_caches_table = """
-                CREATE TABLE IF NOT EXISTS caches (
-                    name TEXT PRIMARY KEY,
-                    metadata JSON NOT NULL,
-                    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
-                    updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
-                )
-                """
-                await conn.execute(text(create_caches_table))
-                LOGGER.info("Created caches table")
-
-                # Get vector dimensions from config
-                dimensions = CONFIG["embedding"]["dimensions"]
-
-                # Drop existing vector index if it exists
-                drop_index_sql = """
-                DROP INDEX IF EXISTS vector_idx;
-                """
-                await conn.execute(text(drop_index_sql))
-
-                # Drop existing vector embeddings table if it exists
-                drop_table_sql = """
-                DROP TABLE IF EXISTS vector_embeddings;
-                """
-                await conn.execute(text(drop_table_sql))
-
-                # Create vector embeddings table with proper vector column
-                create_table_sql = f"""
-                CREATE TABLE vector_embeddings (
-                    id SERIAL PRIMARY KEY,
-                    document_id VARCHAR(255),
-                    chunk_number INTEGER,
-                    content TEXT,
-                    chunk_metadata TEXT,
-                    embedding vector({dimensions}),
-                    created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
-                );
-                """
-                await conn.execute(text(create_table_sql))
-                LOGGER.info("Created vector_embeddings table with vector column")
-
-                # Create the vector index
-                index_sql = """
-                CREATE INDEX vector_idx
-                ON vector_embeddings USING ivfflat (embedding vector_l2_ops)
-                WITH (lists = 100);
-                """
-                await conn.execute(text(index_sql))
-                LOGGER.info("Created IVFFlat index on vector_embeddings")
-
-                # Initialize multi-vector embeddings table and associated similarity function
-                drop_multi_vector_sql = """
-                DROP TABLE IF EXISTS multi_vector_embeddings;
-                """
-                await conn.execute(text(drop_multi_vector_sql))
-                LOGGER.info("Dropped existing multi_vector_embeddings table (if any)")
-
-                create_multi_vector_sql = """
-                CREATE TABLE multi_vector_embeddings (
-                    id BIGSERIAL PRIMARY KEY,
-                    embeddings BIT(128)[]
-                );
-                """
-                await conn.execute(text(create_multi_vector_sql))
-                LOGGER.info(
-                    "Created multi_vector_embeddings table with BIT(128)[] embeddings column"
-                )
-
-                create_function_sql = """
-                CREATE OR REPLACE FUNCTION max_sim(document_bits BIT[], query_bits BIT[]) RETURNS double precision AS $$
-                    WITH queries AS (
-                        SELECT row_number() OVER () AS query_number, * FROM (SELECT unnest(query_bits) AS query) AS foo
-                    ),
-                    documents AS (
-                        SELECT unnest(document_bits) AS document
-                    ),
-                    similarities AS (
-                        SELECT 
-                            query_number, 
-                            1.0 - (bit_count(document # query)::float / greatest(bit_length(query), 1)::float) AS similarity 
-                        FROM queries CROSS JOIN documents
-                    ),
-                    max_similarities AS (
-                        SELECT MAX(similarity) AS max_similarity FROM similarities GROUP BY query_number
-                    )
-                    SELECT SUM(max_similarity) FROM max_similarities
-                $$ LANGUAGE SQL;
-                """
-                await conn.execute(text(create_function_sql))
-                LOGGER.info("Created function max_sim for multi-vector similarity computation")
-
-            await engine.dispose()
-            LOGGER.info("PostgreSQL setup completed successfully")
-
-        except Exception as e:
-            LOGGER.error(f"Failed to setup PostgreSQL: {e}")
-            raise
-
-    asyncio.run(_setup_postgres())
-
-
 def setup():
    # Setup S3 if configured
    if STORAGE_PROVIDER == "aws-s3":
@ -366,9 +194,7 @@ def setup():
            setup_mongodb()
            LOGGER.info("MongoDB setup completed.")
        case "postgres":
-            LOGGER.info("Setting up PostgreSQL...")
-            setup_postgres()
-            LOGGER.info("PostgreSQL setup completed.")
+            LOGGER.info("Postgres is setup on database intialization - nothing to do here!")
        case _:
            LOGGER.error(f"Unsupported database provider: {DATABASE_PROVIDER}")
            raise ValueError(f"Unsupported database provider: {DATABASE_PROVIDER}")