"""Tests for the web tools provider architecture.

Covers:
- WebSearchProvider / WebExtractProvider ABC enforcement
- Per-capability backend selection (_get_search_backend, _get_extract_backend)
- Backward compatibility (web.backend still works as shared fallback)
- Config keys merge correctly via DEFAULT_CONFIG
"""
from __future__ import annotations

import json
from typing import Any, Dict, List

import pytest

from tests.tools.conftest import register_all_web_providers


# ---------------------------------------------------------------------------
# ABC enforcement
# ---------------------------------------------------------------------------


class TestWebProviderABCs:
    """The unified WebSearchProvider ABC enforces the interface contract.

    After PR #25182, all seven providers are subclasses of
    :class:`agent.web_search_provider.WebSearchProvider`. The legacy
    in-tree ABCs at ``tools.web_providers.base`` (separate
    ``WebSearchProvider`` + ``WebExtractProvider``) were deleted in the
    same PR — providers now advertise capabilities via
    ``supports_search() / supports_extract()`` flags.
    """

    def test_cannot_instantiate_abc_directly(self):
        from agent.web_search_provider import WebSearchProvider

        with pytest.raises(TypeError):
            WebSearchProvider()  # type: ignore[abstract]

    def test_concrete_search_only_provider_works(self):
        from agent.web_search_provider import WebSearchProvider

        class Dummy(WebSearchProvider):
            @property
            def name(self) -> str:
                return "dummy"

            @property
            def display_name(self) -> str:
                return "Dummy Search"

            def is_available(self) -> bool:
                return True

            def supports_search(self) -> bool:
                return True

            def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
                return {"success": True, "data": {"web": []}}

        d = Dummy()
        assert d.name == "dummy"
        assert d.display_name == "Dummy Search"
        assert d.is_available() is True
        assert d.supports_search() is True
        assert d.supports_extract() is False  # default
        assert d.search("test")["success"] is True

    def test_concrete_multi_capability_provider_works(self):
        from agent.web_search_provider import WebSearchProvider

        class Dummy(WebSearchProvider):
            @property
            def name(self) -> str:
                return "dummy"

            @property
            def display_name(self) -> str:
                return "Dummy Multi"

            def is_available(self) -> bool:
                return True

            def supports_search(self) -> bool:
                return True

            def supports_extract(self) -> bool:
                return True

            def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
                return {"success": True, "data": {"web": []}}

            def extract(self, urls: List[str], **kwargs: Any) -> List[Dict[str, Any]]:
                return [{"url": urls[0], "content": "x"}]

        d = Dummy()
        assert d.supports_search() is True
        assert d.supports_extract() is True
        assert d.extract(["https://example.com"])[0]["url"] == "https://example.com"

    def test_search_only_provider_skips_extract(self):
        """Search-only providers don't have to implement extract()."""
        from agent.web_search_provider import WebSearchProvider

        class SearchOnly(WebSearchProvider):
            @property
            def name(self) -> str:
                return "search-only"

            @property
            def display_name(self) -> str:
                return "Search Only"

            def is_available(self) -> bool:
                return True

            def supports_search(self) -> bool:
                return True

            def search(self, query: str, limit: int = 5) -> Dict[str, Any]:
                return {"success": True, "data": {"web": []}}

        # Should instantiate fine — extract has default supports_*()
        # returning False and isn't required to be overridden when not
        # advertised.
        s = SearchOnly()
        assert s.supports_search() is True
        assert s.supports_extract() is False


# ---------------------------------------------------------------------------
# Per-capability backend selection
# ---------------------------------------------------------------------------


class TestPerCapabilityBackendSelection:
    """_get_search_backend and _get_extract_backend read per-capability config."""

    def test_search_backend_overrides_generic(self, monkeypatch):
        from tools import web_tools

        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
            "backend": "firecrawl",
            "search_backend": "tavily",
        })
        monkeypatch.setenv("TAVILY_API_KEY", "test-key")
        assert web_tools._get_search_backend() == "tavily"

    def test_extract_backend_overrides_generic(self, monkeypatch):
        from tools import web_tools

        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
            "backend": "tavily",
            "extract_backend": "exa",
        })
        monkeypatch.setenv("EXA_API_KEY", "test-key")
        assert web_tools._get_extract_backend() == "exa"

    def test_falls_back_to_generic_backend_when_search_backend_empty(self, monkeypatch):
        from tools import web_tools

        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
            "backend": "tavily",
            "search_backend": "",
        })
        monkeypatch.setenv("TAVILY_API_KEY", "test-key")
        assert web_tools._get_search_backend() == "tavily"

    def test_explicit_extract_backend_honored_when_unavailable(self, monkeypatch):
        """An explicit per-capability backend is honored even with no creds, so
        its setup error surfaces instead of silently rerouting to the keyless
        Parallel default (which would send user URLs to a different provider)."""
        from tools import web_tools

        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
            "extract_backend": "firecrawl",
        })
        for key in ("FIRECRAWL_API_KEY", "FIRECRAWL_API_URL", "FIRECRAWL_GATEWAY_URL"):
            monkeypatch.delenv(key, raising=False)
        monkeypatch.setattr(web_tools, "_is_tool_gateway_ready", lambda: False, raising=False)
        # Resolves to firecrawl (not parallel) despite firecrawl being unavailable.
        assert web_tools._get_extract_backend() == "firecrawl"

    def test_falls_back_to_generic_backend_when_extract_backend_empty(self, monkeypatch):
        from tools import web_tools

        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
            "backend": "parallel",
            "extract_backend": "",
        })
        monkeypatch.setenv("PARALLEL_API_KEY", "test-key")
        assert web_tools._get_extract_backend() == "parallel"

    def test_explicit_search_backend_honored_when_unavailable(self, monkeypatch):
        from tools import web_tools

        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
            "backend": "firecrawl",
            "search_backend": "exa",  # set but no EXA_API_KEY
        })
        monkeypatch.delenv("EXA_API_KEY", raising=False)
        monkeypatch.setenv("FIRECRAWL_API_KEY", "fc-key")
        # The explicit per-capability choice (exa) is honored even though it's
        # unavailable, so its setup error surfaces — we don't silently reroute
        # to the shared backend (or the keyless Parallel default).
        assert web_tools._get_search_backend() == "exa"

    def test_fully_backward_compatible_with_web_backend_only(self, monkeypatch):
        from tools import web_tools

        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {
            "backend": "tavily",
        })
        monkeypatch.setenv("TAVILY_API_KEY", "test-key")
        # No search_backend or extract_backend set — both fall through
        assert web_tools._get_search_backend() == "tavily"
        assert web_tools._get_extract_backend() == "tavily"


# ---------------------------------------------------------------------------
# Config key presence in DEFAULT_CONFIG
# ---------------------------------------------------------------------------


class TestDefaultConfig:
    """The web section exists in DEFAULT_CONFIG with per-capability keys."""

    def test_web_section_in_default_config(self):
        from hermes_cli.config import DEFAULT_CONFIG

        assert "web" in DEFAULT_CONFIG
        web = DEFAULT_CONFIG["web"]
        assert "backend" in web
        assert "search_backend" in web
        assert "extract_backend" in web
        # All empty string by default (no override)
        assert web["backend"] == ""
        assert web["search_backend"] == ""
        assert web["extract_backend"] == ""


# ---------------------------------------------------------------------------
# web_search_tool uses _get_search_backend
# ---------------------------------------------------------------------------


class TestWebSearchUsesSearchBackend:
    """web_search_tool dispatches through _get_search_backend not _get_backend."""

    def test_search_tool_calls_search_backend(self, monkeypatch):
        from tools import web_tools

        called_with = []
        original_get_search = web_tools._get_search_backend

        def tracking_get_search():
            result = original_get_search()
            called_with.append(("search", result))
            return result

        monkeypatch.setattr(web_tools, "_get_search_backend", tracking_get_search)
        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {"backend": "firecrawl"})
        monkeypatch.setenv("FIRECRAWL_API_KEY", "fake")

        # The function will fail at Firecrawl client level but we just
        # need to verify _get_search_backend was called
        try:
            web_tools.web_search_tool("test", 1)
        except Exception:
            pass

        assert len(called_with) > 0
        assert called_with[0][0] == "search"


class TestUnconfiguredErrorEnvelopeParity:
    """Regression tests for PR #25182: the post-migration dispatcher must
    emit the same top-level error envelope as pre-migration main when no
    web backend is configured.

    Plugin-level error wrapping is correct for in-flight errors (per-page
    SDK exceptions, scrape timeouts) but PRE-FLIGHT configuration errors
    must surface at the top level so function-calling models that check
    ``result.get("error")`` detect the failure cleanly.
    """

    _register_providers = staticmethod(register_all_web_providers)

    @pytest.fixture(autouse=True)
    def _populate_web_registry(self):
        self._register_providers()
        yield
        from agent.web_search_registry import _reset_for_tests
        _reset_for_tests()

    def _clear_web_creds(self, monkeypatch):
        for k in (
            "BRAVE_SEARCH_API_KEY",
            "SEARXNG_URL",
            "TAVILY_API_KEY",
            "EXA_API_KEY",
            "PARALLEL_API_KEY",
            "FIRECRAWL_API_KEY",
            "FIRECRAWL_API_URL",
            "FIRECRAWL_GATEWAY_URL",
            "TOOL_GATEWAY_DOMAIN",
        ):
            monkeypatch.delenv(k, raising=False)

    def test_extract_empty_urls_does_not_raise(self, monkeypatch):
        """Regression: empty (or fully SSRF-blocked) URL sets skip the dispatch
        branch; the free-Parallel flag must still be initialized so the tool
        returns an error envelope instead of UnboundLocalError."""
        import asyncio
        from tools import web_tools
        self._clear_web_creds(monkeypatch)
        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})
        out = asyncio.run(web_tools.web_extract_tool([], "markdown"))
        # The key assertion is that it returns a normal error envelope (a
        # string) rather than raising UnboundLocalError.
        assert isinstance(out, str)
        result = json.loads(out)
        assert "error" in result

    def test_unconfigured_search_falls_back_to_free_parallel(self, monkeypatch):
        """``web_search_tool`` with no creds routes to Parallel's free Search
        MCP rather than erroring. The MCP transport is mocked so the test
        stays offline; we assert dispatch landed on parallel and returned the
        standard search envelope.
        """
        from tools import web_tools
        import plugins.web.parallel.provider as parallel_provider

        self._clear_web_creds(monkeypatch)
        monkeypatch.setattr(web_tools, "_firecrawl_client", None, raising=False)
        monkeypatch.setattr(web_tools, "_firecrawl_client_config", None, raising=False)
        monkeypatch.setattr(web_tools, "_load_web_config", lambda: {})

        captured = {}

        def _fake_mcp(query, limit, api_key):
            captured["query"] = query
            captured["api_key"] = api_key
            return {
                "success": True,
                "data": {"web": [
                    {"url": "https://example.com", "title": "Example",
                     "description": "hit", "position": 1},
                ]},
            }

        monkeypatch.setattr(parallel_provider, "_mcp_web_search", _fake_mcp)

        result = json.loads(web_tools.web_search_tool("hello world", limit=3))
        assert result.get("success") is True, f"expected success, got {result}"
        assert result["data"]["web"][0]["url"] == "https://example.com"
        # Keyless path: dispatched to parallel with no Bearer token.
        assert captured == {"query": "hello world", "api_key": None}


class TestDispatchersTriggerPluginDiscovery:
    """Regression tests for #27580: each web_*_tool dispatcher must
    idempotently call ``_ensure_web_plugins_loaded()`` before consulting
    ``agent.web_search_registry``.

    Without this, a tool call from a context that hasn't already loaded
    plugins (subprocess agent runs, delegate children, standalone scripts,
    test paths that import the registry directly) sees an empty registry
    and returns the misleading "No web extract provider configured" error
    even when the user has both the config key set AND the API key
    exported.

    Mirrors :func:`tools.browser_tool._ensure_browser_plugins_loaded` —
    every other plugin-backed dispatcher (image_gen, video_gen, browser,
    skills) already does this.
    """

    def _clear_registry(self):
        """Reset the web_search registry to empty and return a callback
        that restores the original contents. Used in a try/finally so the
        snapshot is restored even when the dispatcher under test raises."""
        from agent import web_search_registry

        with web_search_registry._lock:
            original = dict(web_search_registry._providers)
            web_search_registry._providers.clear()

        def _restore():
            with web_search_registry._lock:
                web_search_registry._providers.clear()
                web_search_registry._providers.update(original)

        return _restore

    def test_web_extract_tool_runs_discovery_before_registry_lookup(self, monkeypatch):
        """``web_extract_tool`` must invoke ``_ensure_web_plugins_loaded()``
        before looking up the configured backend so the registry is
        populated even from cold-start subprocess contexts.

        Without the fix, ``get_provider('firecrawl')`` returns ``None``
        on a fresh process and the dispatcher emits "No web extract
        provider configured" despite the user having both
        ``web.extract_backend: firecrawl`` and ``FIRECRAWL_API_KEY`` set
        (issue #27580).
        """
        import asyncio
        import json
        from unittest.mock import MagicMock
        from agent.web_search_provider import WebSearchProvider
        from agent import web_search_registry
        from tools import web_tools

        restore = self._clear_registry()
        try:
            class FakeFirecrawl(WebSearchProvider):
                @property
                def name(self) -> str:
                    return "firecrawl"

                @property
                def display_name(self) -> str:
                    return "Fake Firecrawl"

                def is_available(self) -> bool:
                    return True

                def supports_extract(self) -> bool:
                    return True

                async def extract(self, urls, format=None):
                    return [
                        {"url": u, "title": "", "content": "ok",
                         "raw_content": "ok", "metadata": {}}
                        for u in urls
                    ]

            # Simulate "plugin discovery loads the firecrawl plugin": the
            # wrapped helper registers the provider, mirroring what
            # ``plugins/web/firecrawl/__init__.py:register`` does at
            # real-process startup. Wrapping with ``MagicMock`` lets us
            # also assert the dispatcher actually invoked the hook — if
            # a future refactor accidentally drops the call the regression
            # would otherwise hide behind a still-populated registry.
            def _register_fake() -> None:
                if web_search_registry.get_provider("firecrawl") is None:
                    web_search_registry.register_provider(FakeFirecrawl())

            mock_hook = MagicMock(wraps=_register_fake)
            # Patch the helper on ``tools.web_tools`` directly rather than the
            # underlying ``hermes_cli.plugins._ensure_plugins_discovered`` so
            # the test stays valid even if the import inside the helper is
            # later moved to module scope or renamed.
            monkeypatch.setattr(
                web_tools, "_ensure_web_plugins_loaded", mock_hook
            )
            monkeypatch.setattr(
                web_tools, "_load_web_config",
                lambda: {"extract_backend": "firecrawl"},
            )
            # Sanity: registry IS empty before the tool call.
            assert web_search_registry.get_provider("firecrawl") is None

            result = json.loads(asyncio.run(
                web_tools.web_extract_tool(
                    ["https://example.com"],
                    use_llm_processing=False,
                )
            ))

            # The hook must have been called BEFORE the registry lookup —
            # that is the invariant under regression test. Without the
            # explicit ``.called`` assertion the test could pass if the
            # registry were populated by some unrelated side effect.
            assert mock_hook.called, (
                "web_extract_tool must call _ensure_web_plugins_loaded() "
                "before resolving the registry"
            )
            assert "No web extract provider configured" not in json.dumps(result)
            assert web_search_registry.get_provider("firecrawl") is not None
        finally:
            restore()

    def test_web_search_tool_runs_discovery_before_registry_lookup(self, monkeypatch):
        """``web_search_tool`` must invoke ``_ensure_web_plugins_loaded()``
        before the registry lookup for the same reason as the extract
        path (issue #27580 root cause applies to all dispatchers).
        """
        import json
        from unittest.mock import MagicMock
        from agent.web_search_provider import WebSearchProvider
        from agent import web_search_registry
        from tools import web_tools

        restore = self._clear_registry()
        try:
            class FakeBrave(WebSearchProvider):
                @property
                def name(self) -> str:
                    return "brave-free"

                @property
                def display_name(self) -> str:
                    return "Fake Brave"

                def is_available(self) -> bool:
                    return True

                def supports_search(self) -> bool:
                    return True

                def search(self, query, limit=5):
                    return {"success": True, "data": {"web": [
                        {"title": "ok", "url": "https://x", "description": "",
                         "position": 0}
                    ]}}

            def _register_fake() -> None:
                if web_search_registry.get_provider("brave-free") is None:
                    web_search_registry.register_provider(FakeBrave())

            mock_hook = MagicMock(wraps=_register_fake)
            monkeypatch.setattr(
                web_tools, "_ensure_web_plugins_loaded", mock_hook
            )
            monkeypatch.setattr(
                web_tools, "_load_web_config",
                lambda: {"search_backend": "brave-free"},
            )
            assert web_search_registry.get_provider("brave-free") is None

            result = json.loads(web_tools.web_search_tool("hello", limit=1))
            assert mock_hook.called, (
                "web_search_tool must call _ensure_web_plugins_loaded() "
                "before resolving the registry"
            )
            assert "No web search provider configured" not in json.dumps(result)
            assert web_search_registry.get_provider("brave-free") is not None
        finally:
            restore()

