From e159fe83fb0afb7f143083b8903ceb15a8368019 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov <32552679+zvonand@users.noreply.github.com> Date: Sat, 20 Dec 2025 16:45:23 +0100 Subject: [PATCH 01/12] Cherry-pick of https://github.com/Altinity/ClickHouse/pull/1078 with unresolved conflict markers (resolution in next commit) --- Original cherry-pick message follows: Merge pull request #1078 from Altinity/oauth-antalya-25.8 25.8 Antalya: Token Authentication and Authorization # Conflicts: # ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt # src/Access/AccessControl.h # src/Access/Authentication.cpp # src/Access/AuthenticationData.cpp # src/Access/IAccessStorage.cpp # src/Access/UsersConfigAccessStorage.cpp # src/Server/TCPHandler.cpp # src/configure_config.cmake --- .../aspell-ignore/en/aspell-dict.txt | 13 + contrib/jwt-cpp-cmake/CMakeLists.txt | 2 +- .../external-authenticators/index.md | 3 +- .../external-authenticators/tokens.md | 278 ++++++ src/Access/AccessControl.cpp | 40 +- src/Access/AccessControl.h | 10 + src/Access/Authentication.cpp | 16 + src/Access/AuthenticationData.cpp | 27 +- src/Access/AuthenticationData.h | 8 + src/Access/Common/JWKSProvider.cpp | 106 +++ src/Access/Common/JWKSProvider.h | 73 ++ src/Access/Credentials.cpp | 5 + src/Access/Credentials.h | 45 + src/Access/ExternalAuthenticators.cpp | 151 +++- src/Access/ExternalAuthenticators.h | 28 +- src/Access/IAccessStorage.cpp | 8 + src/Access/TokenAccessStorage.cpp | 591 +++++++++++++ src/Access/TokenAccessStorage.h | 88 ++ src/Access/TokenProcessors.h | 228 +++++ src/Access/TokenProcessorsJWT.cpp | 440 ++++++++++ src/Access/TokenProcessorsOpaque.cpp | 403 +++++++++ src/Access/TokenProcessorsParse.cpp | 139 +++ src/Access/UsersConfigAccessStorage.cpp | 807 ++++++++++++++++++ src/Core/ServerSettings.cpp | 9 + src/Parsers/Access/ASTAuthenticationData.cpp | 7 +- src/Parsers/Access/ASTCreateUserQuery.h | 4 +- src/Parsers/Access/ParserCreateUserQuery.cpp | 14 + src/Parsers/Access/ParserCreateUserQuery.h | 4 +- src/Parsers/CommonParsers.h | 1 + src/Server/HTTP/authenticateUserByHTTP.cpp | 24 +- src/Server/TCPHandler.cpp | 26 + src/Server/TCPHandler.h | 1 + .../System/StorageSystemBuildOptions.cpp.in | 1 + src/configure_config.cmake | 6 + tests/integration/test_jwt_auth/__init__.py | 0 .../test_jwt_auth/configs/users.xml | 15 + .../test_jwt_auth/configs/validators.xml | 26 + .../helpers/generate_private_key.py | 21 + .../test_jwt_auth/helpers/jwt_jwk.py | 113 +++ .../helpers/jwt_static_secret.py | 43 + .../test_jwt_auth/helpers/private_key_1 | 27 + .../test_jwt_auth/helpers/private_key_2 | 27 + .../test_jwt_auth/jwks_server/server.py | 33 + tests/integration/test_jwt_auth/test.py | 82 ++ 44 files changed, 3978 insertions(+), 15 deletions(-) create mode 100644 docs/en/operations/external-authenticators/tokens.md create mode 100644 src/Access/Common/JWKSProvider.cpp create mode 100644 src/Access/Common/JWKSProvider.h create mode 100644 src/Access/TokenAccessStorage.cpp create mode 100644 src/Access/TokenAccessStorage.h create mode 100644 src/Access/TokenProcessors.h create mode 100644 src/Access/TokenProcessorsJWT.cpp create mode 100644 src/Access/TokenProcessorsOpaque.cpp create mode 100644 src/Access/TokenProcessorsParse.cpp create mode 100644 tests/integration/test_jwt_auth/__init__.py create mode 100644 tests/integration/test_jwt_auth/configs/users.xml create mode 100644 tests/integration/test_jwt_auth/configs/validators.xml create mode 100644 tests/integration/test_jwt_auth/helpers/generate_private_key.py create mode 100644 tests/integration/test_jwt_auth/helpers/jwt_jwk.py create mode 100644 tests/integration/test_jwt_auth/helpers/jwt_static_secret.py create mode 100644 tests/integration/test_jwt_auth/helpers/private_key_1 create mode 100644 tests/integration/test_jwt_auth/helpers/private_key_2 create mode 100644 tests/integration/test_jwt_auth/jwks_server/server.py create mode 100644 tests/integration/test_jwt_auth/test.py diff --git a/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt b/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt index abf22b4d27f1..f0c6fbaf3ba7 100644 --- a/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt +++ b/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt @@ -350,6 +350,8 @@ Dresseler DuckDB Duployan Durre +ECDSA +EdDSA ECMA ETag EachRow @@ -651,7 +653,12 @@ JoinStrictness Jpan JumpConsistentHash Jupyter +<<<<<<< HEAD Jurc +======= +jwks +JWKS +>>>>>>> 58b404dc6a5 (Merge pull request #1078 from Altinity/oauth-antalya-25.8) KDevelop KafkaAssignedPartitions KafkaBackgroundReads @@ -3802,7 +3809,11 @@ uuid uuids uuidv vCPU +<<<<<<< HEAD vLLM +======= +validators +>>>>>>> 58b404dc6a5 (Merge pull request #1078 from Altinity/oauth-antalya-25.8) varPop varPopStable varSamp @@ -3820,6 +3831,8 @@ vectorscan vendoring verificationDepth verificationMode +verifier +verifiers versionedcollapsingmergetree vhost virtualized diff --git a/contrib/jwt-cpp-cmake/CMakeLists.txt b/contrib/jwt-cpp-cmake/CMakeLists.txt index 1edd793df9e4..606c13d29de2 100644 --- a/contrib/jwt-cpp-cmake/CMakeLists.txt +++ b/contrib/jwt-cpp-cmake/CMakeLists.txt @@ -17,4 +17,4 @@ set (JWT_CPP_INCLUDE_DIR "${ClickHouse_SOURCE_DIR}/contrib/jwt-cpp/include") add_library (_jwt-cpp INTERFACE) target_include_directories(_jwt-cpp SYSTEM BEFORE INTERFACE ${JWT_CPP_INCLUDE_DIR}) -add_library(ch_contrib::jwt-cpp ALIAS _jwt-cpp) +add_library(ch_contrib::jwt-cpp ALIAS _jwt-cpp) \ No newline at end of file diff --git a/docs/en/operations/external-authenticators/index.md b/docs/en/operations/external-authenticators/index.md index 037fa8c1054f..3a61522bc283 100644 --- a/docs/en/operations/external-authenticators/index.md +++ b/docs/en/operations/external-authenticators/index.md @@ -19,4 +19,5 @@ The following external authenticators and directories are supported: - [LDAP](/operations/external-authenticators/ldap#ldap-external-authenticator) [Authenticator](./ldap.md#ldap-external-authenticator) and [Directory](./ldap.md#ldap-external-user-directory) - Kerberos [Authenticator](/operations/external-authenticators/kerberos#kerberos-as-an-external-authenticator-for-existing-users) - [SSL X.509 authentication](/operations/external-authenticators/ssl-x509) -- HTTP [Authenticator](./http.md) \ No newline at end of file +- HTTP [Authenticator](./http.md) +- Token-based [Authenticator](./tokens.md) diff --git a/docs/en/operations/external-authenticators/tokens.md b/docs/en/operations/external-authenticators/tokens.md new file mode 100644 index 000000000000..ed9ebee2bd09 --- /dev/null +++ b/docs/en/operations/external-authenticators/tokens.md @@ -0,0 +1,278 @@ +--- +slug: /en/operations/external-authenticators/oauth +title: "Token-based authentication" +--- +import SelfManaged from '@site/docs/en/_snippets/_self_managed_only_no_roadmap.md'; + + + +ClickHouse users can be authenticated using tokens. This works in two ways: + +- An existing user (defined in `users.xml` or in local access control paths) can be authenticated with a token if this user can be `IDENTIFIED WITH jwt`. +- Use the information from the token or from an external Identity Provider (IdP) as a source of user definitions and allow locally undefined users to be authenticated with a valid token. + +Although not all tokens are JWTs, under the hood both ways are treated as the same authentication method to maintain better compatibility. + +# Token Processors + +## Configuration + +Token-based authentication is enabled by default. To disable it, set `enable_token_auth` to `0` in `config.xml`: + +```xml +0 +``` + +When disabled, token processors are not parsed, TokenAccessStorage is not available, and authentication via tokens (`--jwt` option or `Authorization: Bearer` header) is rejected. + +To use token-based authentication, add `token_processors` section to `config.xml` and define at least one token processor in it. +Its contents are different for different token processor types. + +**Common parameters** +- `type` -- type of token processor. Supported values: "jwt_static_key", "jwt_static_jwks", "jwt_dynamic_jwks", "azure", "openid". Mandatory. Case-insensitive. +- `token_cache_lifetime` -- maximum lifetime of cached token (in seconds). Optional, default: 3600. +- `username_claim` -- name of claim (field) that will be treated as ClickHouse username. Optional, default: "sub". +- `groups_claim` -- name of claim (field) that contains list of groups user belongs to. This claim will be looked up in the token itself (in case token is a valid JWT, e.g. in Keycloak) or in response from `/userinfo`. Optional, default: "groups". + +For each type, there are additional specific parameters (some of them are mandatory). +If some parameters that are not required for current processor type are specified, they are ignored. + +## JWT (JSON Web Token) + +JWT itself is a source of information about user. +It is decoded locally and its integrity is verified using either a local static key or JWKS (JSON Web Key Set), local or remote. + +### JWT with static key: +```xml + + + + jwt_static_key + HS256 + my_static_secret + + + +``` +**Parameters:** +- `algo` - Algorithm for signature validation. Mandatory. Supported values: + + | HMAC | RSA | ECDSA | PSS | EdDSA | + |-------| ----- | ------ | ----- | ------- | + | HS256 | RS256 | ES256 | PS256 | Ed25519 | + | HS384 | RS384 | ES384 | PS384 | Ed448 | + | HS512 | RS512 | ES512 | PS512 | | + | | | ES256K | | | + Also supports None (not recommended and must *NEVER* be used in production). +- `claims` - A string containing a JSON object that should be contained in the token payload. If this parameter is defined, token without corresponding payload will be considered invalid. Optional. +- `static_key` - key for symmetric algorithms. Mandatory for `HS*` family algorithms. +- `static_key_in_base64` - indicates if the `static_key` key is base64-encoded. Optional, default: `False`. +- `public_key` - public key for asymmetric algorithms. Mandatory except for `HS*` family algorithms and `None`. +- `private_key` - private key for asymmetric algorithms. Optional. +- `public_key_password` - public key password. Optional. +- `private_key_password` - private key password. Optional. +- `expected_issuer` - Expected value of the `iss` (issuer) claim in the JWT. If specified, tokens with a different issuer will be rejected. Optional. +- `expected_audience` - Expected value of the `aud` (audience) claim in the JWT. If specified, tokens with a different audience will be rejected. Optional. +- `allow_no_expiration` - If `true`, tokens without the `exp` (expiration) claim are accepted. Otherwise they are rejected. Optional, default: `false`. + +### JWT with static JWKS +```xml + + + + jwt_static_jwks + {"keys": [{"kty": "RSA", "alg": "RS256", "kid": "mykid", "n": "_public_key_mod_", "e": "AQAB"}]} + + + +``` + +**Parameters:** + +- `static_jwks` - content of JWKS in JSON +- `static_jwks_file` - path to a file with JWKS +- `claims` - A string containing a JSON object that should be contained in the token payload. If this parameter is defined, token without corresponding payload will be considered invalid. Optional. +- `verifier_leeway` - Clock skew tolerance (seconds). Useful for handling small differences in system clocks between ClickHouse and the token issuer. Optional. +- `expected_issuer` - Expected value of the `iss` (issuer) claim in the JWT. If specified, tokens with a different issuer will be rejected. Optional. +- `expected_audience` - Expected value of the `aud` (audience) claim in the JWT. If specified, tokens with a different audience will be rejected. Optional. +- `allow_no_expiration` - If `true`, tokens without the `exp` (expiration) claim are accepted. Otherwise they are rejected. Optional, default: `false`. + +:::note +Only one of `static_jwks` or `static_jwks_file` keys must be present in one verifier +::: + +:::note +Only RS* family algorithms are supported! +::: + +### JWT with remote JWKS +```xml + + + + jwt_dynamic_jwks + http://localhost:8000/.well-known/jwks.json + 3600 + + + +``` + +**Parameters:** + +- `uri` - JWKS endpoint. Mandatory. +- `jwks_cache_lifetime` - Period for resend request for refreshing JWKS. Optional, default: 3600. +- `claims` - A string containing a JSON object that should be contained in the token payload. If this parameter is defined, token without corresponding payload will be considered invalid. Optional. +- `verifier_leeway` - Clock skew tolerance (seconds). Useful for handling small differences in system clocks between ClickHouse and the token issuer. Optional. +- `expected_issuer` - Expected value of the `iss` (issuer) claim in the JWT. If specified, tokens with a different issuer will be rejected. Optional. +- `expected_audience` - Expected value of the `aud` (audience) claim in the JWT. If specified, tokens with a different audience will be rejected. Optional. +- `allow_no_expiration` - If `true`, tokens without the `exp` (expiration) claim are accepted. Otherwise they are rejected. Optional, default: `false`. + + +## Processors with external providers + +Some tokens cannot be decoded and validated locally. External service is needed in this case. "Azure" and "OpenID" (a generic type) are supported now. + +### Azure +```xml + + + + azure + + + +``` + +No additional parameters are required. + +### OpenID +```xml + + + + openid + url/.well-known/openid-configuration + 60 + 3600 + + + openid + url/userinfo + url/tokeninfo + url/.well-known/jwks.json + 60 + 3600 + + + +``` + +:::note +Either `configuration_endpoint` or both `userinfo_endpoint` and `token_introspection_endpoint` (and, optionally, `jwks_uri`) shall be set. If none of them are set or all three are set, this is an invalid configuration that will not be parsed. +::: + +**Parameters:** + +- `configuration_endpoint` - URI of OpenID configuration (often ends with `.well-known/openid-configuration`); +- `userinfo_endpoint` - URI of endpoint that returns user information in exchange for a valid token; +- `token_introspection_endpoint` - URI of token introspection endpoint (returns information about a valid token); +- `jwks_uri` - URI of OpenID configuration (often ends with `.well-known/jwks.json`) +- `jwks_cache_lifetime` - Period for resend request for refreshing JWKS. Optional, default: 3600. +- `verifier_leeway` - Clock skew tolerance (seconds). Useful for handling small differences in system clocks between ClickHouse and the token issuer. Optional, default: 60 +- `expected_issuer` - Expected value of the `iss` (issuer) claim in the JWT. If specified, tokens with a different issuer will be rejected. Optional. +- `expected_audience` - Expected value of the `aud` (audience) claim in the JWT. If specified, tokens with a different audience will be rejected. Optional. +- `allow_no_expiration` - If `true`, tokens without the `exp` (expiration) claim are accepted. Otherwise they are rejected. Optional, default: `false`. + +Sometimes a token is a valid JWT. In that case token will be decoded and validated locally if configuration endpoint returns JWKS URI (or `jwks_uri` is specified alongside `userinfo_endpoint` and `token_introspection_endpoint`). + +### Tokens cache +To reduce number of requests to IdP, tokens are cached internally for a maximum period of `token_cache_lifetime` seconds. +If token expires sooner than `token_cache_lifetime`, then cache entry for this token will only be valid while token is valid. +If token lifetime is longer than `token_cache_lifetime`, cache entry for this token will be valid for `token_cache_lifetime`. + +## Enabling token authentication for a user in `users.xml` {#enabling-jwt-auth-in-users-xml} + +In order to enable token-based authentication for the user, specify `jwt` section instead of `password` or other similar sections in the user definition. + +Parameters: +- `claims` - An optional string containing a json object that should be contained in the token payload. + +Example (goes into `users.xml`): +```xml + + + + {"resource_access":{"account": {"roles": ["view-profile"]}}} + + + +``` + +Here, the JWT payload must contain `["view-profile"]` on path `resource_access.account.roles`, otherwise authentication will not succeed even with a valid JWT. + +:::note +If `claims` is defined, this user will not be able to authenticate using opaque tokens, so, only JWT-based authentication will be available. +::: + +``` +{ +... + "resource_access": { + "account": { + "roles": ["view-profile"] + } + }, +... +} +``` + +:::note +A user cannot have JWT authentication together with any other authentication method. The presence of any other sections like `password` alongside `jwt` will force ClickHouse to shut down. +::: + +## Enabling token authentication using SQL {#enabling-jwt-auth-using-sql} + +Users with "JWT" authentication type cannot be created using SQL now. + +## Identity Provider as an External User Directory {#idp-external-user-directory} + +If there is no suitable user pre-defined in ClickHouse, authentication is still possible: Identity Provider can be used as source of user information. +To allow this, add `token` section to the `users_directories` section of the `config.xml` file. + +At each login attempt, ClickHouse tries to find the user definition locally and authenticate it as usual. +If a token is provided but the user is not defined, ClickHouse will treat the user as externally defined and will try to validate the token and get user information from the specified processor. +If validated successfully, the user will be considered existing and authenticated. The user will be assigned roles from the list specified in the `roles` section. +All this implies that the SQL-driven [Access Control and Account Management](/docs/en/guides/sre/user-management/index.md#access-control) is enabled and roles are created using the [CREATE ROLE](/docs/en/sql-reference/statements/create/role.md#create-role-statement) statement. + +**Example** + +```xml + + + + token_processor_name + + + + my_profile + + \bclickhouse-[a-zA-Z0-9]+\b + + s/-/_/g + + + +``` + +:::note +For now, no more than one `token` section can be defined inside `user_directories`. This _may_ change in future. +::: + +**Parameters** + +- `processor` — Name of one of processors defined in `token_processors` config section described above. This parameter is mandatory and cannot be empty. +- `common_roles` — Section with a list of locally defined roles that will be assigned to each user retrieved from the IdP. Optional. +- `default_profile` — Name of a locally defined settings profile that will be assigned to each user retrieved from the IdP. If the profile does not exist, a warning will be logged and the user will be created without a profile. Optional. +- `roles_filter` — Regex string for groups filtering. Only groups matching this regex will be mapped to roles. Optional. +- `roles_transform` — Sed-style transform pattern to apply to group names before mapping to roles. Format: `s/pattern/replacement/flags`. The `g` flag applies the replacement globally (all occurrences). Example: `s/-/_/g` converts `clickhouse-grp-dba` to `clickhouse_grp_dba`. Optional. diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index d7b8f94108ae..10dfc1c29459 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include @@ -43,6 +44,7 @@ namespace ErrorCodes extern const int REQUIRED_PASSWORD; extern const int CANNOT_COMPILE_REGEXP; extern const int BAD_ARGUMENTS; + extern const int INVALID_CONFIG_PARAMETER; } namespace @@ -292,6 +294,8 @@ void AccessControl::setupFromMainConfig(const Poco::Util::AbstractConfiguration setDefaultPasswordTypeFromConfig(config_.getString("default_password_type", "sha256_password")); setPasswordComplexityRulesFromConfig(config_); + setTokenAuthEnabled(config_.getBool("enable_token_auth", true)); + setBcryptWorkfactor(config_.getInt("bcrypt_workfactor", 12)); /// Optional improvements in access control system. @@ -430,6 +434,12 @@ void AccessControl::addLDAPStorage(const String & storage_name_, const Poco::Uti LOG_DEBUG(getLogger(), "Added {} access storage '{}', LDAP server name: {}", String(new_storage->getStorageType()), new_storage->getStorageName(), new_storage->getLDAPServerName()); } +void AccessControl::addTokenStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & config_, const String & prefix_) +{ + auto new_storage = std::make_shared(storage_name_, *this, config_, prefix_); + addStorage(new_storage); + LOG_DEBUG(getLogger(), "Added {} access storage '{}'", String(new_storage->getStorageType()), new_storage->getStorageName()); +} void AccessControl::addStoragesFromUserDirectoriesConfig( const Poco::Util::AbstractConfiguration & config, @@ -442,6 +452,8 @@ void AccessControl::addStoragesFromUserDirectoriesConfig( Strings keys_in_user_directories; config.keys(key, keys_in_user_directories); + bool has_token_storage = false; + for (const String & key_in_user_directories : keys_in_user_directories) { String prefix = key + "." + key_in_user_directories; @@ -455,6 +467,8 @@ void AccessControl::addStoragesFromUserDirectoriesConfig( type = DiskAccessStorage::STORAGE_TYPE; else if (type == "ldap") type = LDAPAccessStorage::STORAGE_TYPE; + else if (type == "token") + type = TokenAccessStorage::STORAGE_TYPE; String name = config.getString(prefix + ".name", type); @@ -488,6 +502,20 @@ void AccessControl::addStoragesFromUserDirectoriesConfig( bool allow_backup = config.getBool(prefix + ".allow_backup", true); addReplicatedStorage(name, zookeeper_path, get_zookeeper_function, allow_backup); } + else if (type == TokenAccessStorage::STORAGE_TYPE) + { + if (!isTokenAuthEnabled()) + { + LOG_INFO(getLogger(), "Token authentication is disabled, skipping token user directory '{}'", name); + continue; + } + + if (has_token_storage) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Only one `token` section can be defined."); + + addTokenStorage(name, config, prefix); + has_token_storage = true; + } else throw Exception(ErrorCodes::UNKNOWN_ELEMENT_IN_CONFIG, "Unknown storage type '{}' at {} in config", type, prefix); } @@ -669,7 +697,7 @@ void AccessControl::restoreFromBackup(RestorerFromBackup & restorer, const Strin void AccessControl::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config) { - external_authenticators->setConfiguration(config, getLogger()); + external_authenticators->setConfiguration(config, getLogger(), isTokenAuthEnabled()); } @@ -948,4 +976,14 @@ bool AccessControl::getAllowBetaTierSettings() const { return allow_beta_tier_settings; } + +void AccessControl::setTokenAuthEnabled(bool enable) +{ + enable_token_auth = enable; +} + +bool AccessControl::isTokenAuthEnabled() const +{ + return enable_token_auth; +} } diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h index 2fa35306d915..a7a84e5d34a5 100644 --- a/src/Access/AccessControl.h +++ b/src/Access/AccessControl.h @@ -93,6 +93,8 @@ class AccessControl : public MultipleAccessStorage /// Adds LDAPAccessStorage which allows querying remote LDAP server for user info. void addLDAPStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & config_, const String & prefix_); + void addTokenStorage(const String & storage_name_, const Poco::Util::AbstractConfiguration & config_, const String & prefix_); + void addReplicatedStorage(const String & storage_name, const String & zookeeper_path, const zkutil::GetZooKeeper & get_zookeeper_function, @@ -271,6 +273,10 @@ class AccessControl : public MultipleAccessStorage bool getAllowExperimentalTierSettings() const; bool getAllowBetaTierSettings() const; + /// Controls whether token-based auth is enabled. + void setTokenAuthEnabled(bool enable); + bool isTokenAuthEnabled() const; + private: class ContextAccessCache; class CustomSettingsPrefixes; @@ -306,7 +312,11 @@ class AccessControl : public MultipleAccessStorage std::atomic_bool allow_beta_tier_settings = true; std::atomic_bool enable_user_name_access_type = true; std::atomic_bool enable_read_write_grants = false; +<<<<<<< HEAD std::atomic_bool allow_impersonate_user = false; +======= + std::atomic_bool enable_token_auth = true; +>>>>>>> 58b404dc6a5 (Merge pull request #1078 from Altinity/oauth-antalya-25.8) }; } diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index 3f6f1bf5e009..0a4d1f978b91 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -329,8 +329,16 @@ Authentication::CredentialsCheckResult Authentication::areCredentialsValid( const ClientInfo & client_info, SettingsChanges & settings) { +<<<<<<< HEAD if (!credentials.isReady()) return CredentialsCheckResult::Fail; +======= + /// It is OK for TokenCredentials to be not ready: + /// When auth request happens, we do not even know the username. + /// Token is resolved a bit later and the user information will be put in credentials + if (!typeid_cast(&credentials) && !credentials.isReady()) + return false; +>>>>>>> 58b404dc6a5 (Merge pull request #1078 from Altinity/oauth-antalya-25.8) if (const auto * gss_acceptor_context = typeid_cast(&credentials)) { @@ -377,6 +385,14 @@ Authentication::CredentialsCheckResult Authentication::areCredentialsValid( } #endif + if (const auto * token_credentials = typeid_cast(&credentials)) + { + if (authentication_method.getType() != AuthenticationType::JWT) + return false; + + return external_authenticators.checkTokenCredentials(*token_credentials); + } + if ([[maybe_unused]] const auto * always_allow_credentials = typeid_cast(&credentials)) return CredentialsCheckResult::Success; diff --git a/src/Access/AuthenticationData.cpp b/src/Access/AuthenticationData.cpp index fdf3e0662f1d..e9834f0ef80f 100644 --- a/src/Access/AuthenticationData.cpp +++ b/src/Access/AuthenticationData.cpp @@ -28,12 +28,18 @@ # include #endif +<<<<<<< HEAD namespace CurrentMetrics { extern const Metric BcryptCacheBytes; extern const Metric BcryptCacheSize; } +======= +#if USE_JWT_CPP +#include +#endif +>>>>>>> 58b404dc6a5 (Merge pull request #1078 from Altinity/oauth-antalya-25.8) namespace DB { @@ -411,7 +417,10 @@ boost::intrusive_ptr AuthenticationData::toAST() const } case AuthenticationType::JWT: { - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is available only in ClickHouse Cloud"); + const auto & claims = getJWTClaims(); + if (!claims.empty()) + node->children.push_back(std::make_shared(claims)); + break; } case AuthenticationType::KERBEROS: { @@ -689,6 +698,22 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que auth_data.setHTTPAuthenticationServerName(server); auth_data.setHTTPAuthenticationScheme(scheme); } +#if USE_JWT_CPP + else if (query.type == AuthenticationType::JWT) + { + if (!args.empty()) + { + String value = checkAndGetLiteralArgument(args[0], "claims"); + picojson::value json_obj; + auto error = picojson::parse(json_obj, value); + if (!error.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad JWT claims: {}", error); + if (!json_obj.is()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad JWT claims: is not an object"); + auth_data.setJWTClaims(value); + } + } +#endif else { throw Exception(ErrorCodes::LOGICAL_ERROR, "Unexpected ASTAuthenticationData structure"); diff --git a/src/Access/AuthenticationData.h b/src/Access/AuthenticationData.h index 1eef3fad6187..f96fc68897a3 100644 --- a/src/Access/AuthenticationData.h +++ b/src/Access/AuthenticationData.h @@ -82,6 +82,12 @@ class AuthenticationData time_t getValidUntil() const { return valid_until; } void setValidUntil(time_t valid_until_) { valid_until = valid_until_; } + const String & getJWTClaims() const { return jwt_claims; } + void setJWTClaims(const String & jwt_claims_) { jwt_claims = jwt_claims_; } + + const String & getTokenProcessorName() const { return token_processor_name; } + void setTokenProcessorName(const String & token_processor_name_) { token_processor_name = token_processor_name_; } + friend bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs); friend bool operator !=(const AuthenticationData & lhs, const AuthenticationData & rhs) { return !(lhs == rhs); } @@ -121,6 +127,8 @@ class AuthenticationData String http_auth_server_name; HTTPAuthenticationScheme http_auth_scheme = HTTPAuthenticationScheme::BASIC; time_t valid_until = 0; + String jwt_claims; + String token_processor_name; }; } diff --git a/src/Access/Common/JWKSProvider.cpp b/src/Access/Common/JWKSProvider.cpp new file mode 100644 index 000000000000..40814ea5eb86 --- /dev/null +++ b/src/Access/Common/JWKSProvider.cpp @@ -0,0 +1,106 @@ +#include + +#if USE_JWT_CPP +#include +#include +#include +#include +#include +#include + + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int AUTHENTICATION_FAILED; + extern const int INVALID_CONFIG_PARAMETER; +} + +JWKSType JWKSClient::getJWKS() +{ + std::shared_lock lock(mutex); + + auto now = std::chrono::high_resolution_clock::now(); + auto diff = std::chrono::duration(now - last_request_send).count(); + + if (diff < refresh_timeout && cached_jwks.has_value()) + return cached_jwks.value(); + + Poco::Net::HTTPResponse response; + std::string response_string; + + Poco::Net::HTTPRequest request{Poco::Net::HTTPRequest::HTTP_GET, jwks_uri.getPathAndQuery()}; + + if (jwks_uri.getScheme() == "https") + { + Poco::Net::HTTPSClientSession session = Poco::Net::HTTPSClientSession(jwks_uri.getHost(), jwks_uri.getPort()); + session.sendRequest(request); + std::istream & response_stream = session.receiveResponse(response); + if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK || !response_stream) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Failed to get user info by access token, code: {}, reason: {}", + response.getStatus(), response.getReason()); + Poco::StreamCopier::copyToString(response_stream, response_string); + } + else + { + Poco::Net::HTTPClientSession session = Poco::Net::HTTPClientSession(jwks_uri.getHost(), jwks_uri.getPort()); + session.sendRequest(request); + std::istream & response_stream = session.receiveResponse(response); + if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK || !response_stream) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Failed to get user info by access token, code: {}, reason: {}", response.getStatus(), response.getReason()); + Poco::StreamCopier::copyToString(response_stream, response_string); + } + + last_request_send = std::chrono::high_resolution_clock::now(); + + JWKSType parsed_jwks; + + try + { + parsed_jwks = jwt::parse_jwks(response_string); + } + catch (const std::exception & e) + { + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Failed to parse JWKS: {}", e.what()); + } + + cached_jwks = std::move(parsed_jwks); + return cached_jwks.value(); +} + +StaticJWKSParams::StaticJWKSParams(const std::string & static_jwks_, const std::string & static_jwks_file_) +{ + if (static_jwks_.empty() && static_jwks_file_.empty()) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "JWT validator misconfigured: `static_jwks` or `static_jwks_file` keys must be present in static JWKS validator configuration"); + if (!static_jwks_.empty() && !static_jwks_file_.empty()) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "JWT validator misconfigured: `static_jwks` and `static_jwks_file` keys cannot both be present in static JWKS validator configuration"); + + static_jwks = static_jwks_; + static_jwks_file = static_jwks_file_; +} + +StaticJWKS::StaticJWKS(const StaticJWKSParams & params) +{ + String content = String(params.static_jwks); + if (!params.static_jwks_file.empty()) + { + std::ifstream ifs(params.static_jwks_file); + Poco::StreamCopier::copyToString(ifs, content); + } + try + { + auto keys = jwt::parse_jwks(content); + jwks = std::move(keys); + } + catch (const std::exception & e) + { + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Failed to parse JWKS: {}", e.what()); + } +} + +} +#endif diff --git a/src/Access/Common/JWKSProvider.h b/src/Access/Common/JWKSProvider.h new file mode 100644 index 000000000000..566effd6e21e --- /dev/null +++ b/src/Access/Common/JWKSProvider.h @@ -0,0 +1,73 @@ +#include + +#if USE_JWT_CPP +#include +#include +#include +#include + +#include + +namespace DB +{ + +using JWKSType = jwt::jwks; + +/// JWKS (JSON Web Key Set) is a kind of a set of public keys that are used to validate JWT authenticity locally. +/// They are usually exposed by identity providers (e.g. Keycloak) via a well-known URI (usually /.well-known/jwks.json) +/// This interface is responsible for managing JWKS. Retrieving, caching and refreshing of JWKS happens here. +/// JWKS can either be static (e.g. provided in config) or dynamic (fetched from a remote URI and). +class IJWKSProvider +{ +public: + virtual ~IJWKSProvider() = default; + + virtual JWKSType getJWKS() = 0; +}; + +class JWKSClient : public IJWKSProvider +{ +public: + explicit JWKSClient(const String & uri, const size_t refresh_ms_): refresh_timeout(refresh_ms_), jwks_uri(uri) {} + + ~JWKSClient() override = default; + JWKSClient(const JWKSClient &) = delete; + JWKSClient(JWKSClient &&) = delete; + JWKSClient &operator=(const JWKSClient &) = delete; + JWKSClient &operator=(JWKSClient &&) = delete; + + JWKSType getJWKS() override; + +private: + size_t refresh_timeout; + Poco::URI jwks_uri; + + std::shared_mutex mutex; + std::optional cached_jwks; + std::chrono::time_point last_request_send; +}; + +struct StaticJWKSParams +{ + StaticJWKSParams(const std::string &static_jwks_, const std::string &static_jwks_file_); + + String static_jwks; + String static_jwks_file; +}; + +class StaticJWKS : public IJWKSProvider +{ +public: + explicit StaticJWKS(const StaticJWKSParams ¶ms); + +private: + JWKSType getJWKS() override + { + return jwks; + } + + JWKSType jwks; +}; + +} +#endif diff --git a/src/Access/Credentials.cpp b/src/Access/Credentials.cpp index 4887d0545656..c60fb3cfea67 100644 --- a/src/Access/Credentials.cpp +++ b/src/Access/Credentials.cpp @@ -2,6 +2,7 @@ #include #include #include +#include namespace DB { @@ -9,6 +10,7 @@ namespace DB namespace ErrorCodes { extern const int LOGICAL_ERROR; + extern const int AUTHENTICATION_FAILED; } Credentials::Credentials(const String & user_name_) @@ -100,4 +102,7 @@ const String & BasicCredentials::getPassword() const return password; } +/// Unless the token is validated, we will not use any data from it, including username. +TokenCredentials::TokenCredentials(const String & token_) : Credentials(""), token(token_), expires_at(std::chrono::system_clock::now() + std::chrono::hours(1)) {} + } diff --git a/src/Access/Credentials.h b/src/Access/Credentials.h index f98eb31ff0a2..bb81ef93a15c 100644 --- a/src/Access/Credentials.h +++ b/src/Access/Credentials.h @@ -16,6 +16,8 @@ namespace Poco::Net class SocketAddress; } +#include + namespace DB { @@ -195,4 +197,47 @@ class SSHPTYCredentials : public Credentials #endif +class TokenCredentials : public Credentials +{ +public: + explicit TokenCredentials(const String & token_); + + const String & getToken() const + { + if (token.empty()) + { + throwNotReady(); + } + return token; + } + void setUserName(const String & user_name_) + { + user_name = user_name_; + if (!user_name.empty()) + { + is_ready = true; + } + } + std::set getGroups() const + { + return groups; + } + void setGroups(const std::set & groups_) + { + groups = groups_; + } + std::optional getExpiresAt() const + { + return expires_at; + } + void setExpiresAt(std::chrono::system_clock::time_point expires_at_) + { + expires_at = expires_at_; + } +private: + String token; + std::set groups; + std::optional expires_at; +}; + } diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index 6fa7c28bc980..f045165479b8 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -1,7 +1,10 @@ +#include #include #include #include #include +#include "Common/Logger.h" +#include "Common/logger_useful.h" #include #include #include @@ -12,6 +15,8 @@ #include #include +#include +#include #include #include @@ -267,7 +272,6 @@ HTTPAuthClientParams parseHTTPAuthParams(const Poco::Util::AbstractConfiguration return http_auth_params; } - } void parseLDAPRoleSearchParams(LDAPClient::RoleSearchParams & params, const Poco::Util::AbstractConfiguration & config, const String & prefix) @@ -285,6 +289,9 @@ void ExternalAuthenticators::resetImpl() ldap_client_params_blueprint.clear(); ldap_caches.clear(); kerberos_params.reset(); + token_processors.clear(); + access_token_to_username_cache.clear(); + username_to_access_token_cache.clear(); } void ExternalAuthenticators::reset() @@ -293,10 +300,41 @@ void ExternalAuthenticators::reset() resetImpl(); } -void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfiguration & config, LoggerPtr log) +void parseTokenProcessors(std::unordered_map> & token_processors, + const Poco::Util::AbstractConfiguration & config, + const String & token_processors_config, + LoggerPtr log) +{ + Poco::Util::AbstractConfiguration::Keys token_processors_keys; + config.keys(token_processors_config, token_processors_keys); + + token_processors.clear(); + + for (const auto & processor : token_processors_keys) + { + String prefix = fmt::format("{}.{}", token_processors_config, processor); + try + { + token_processors[processor] = ITokenProcessor::parseTokenProcessor(config, prefix, processor); + } + catch (...) + { + tryLogCurrentException(log, "Could not parse token processor" + backQuote(processor)); + } + } +} + +bool ExternalAuthenticators::isTokenAuthEnabled() const +{ + std::lock_guard lock(mutex); + return token_auth_enabled; +} + +void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfiguration & config, LoggerPtr log, bool token_auth_enabled_) { std::lock_guard lock(mutex); resetImpl(); + token_auth_enabled = token_auth_enabled_; Poco::Util::AbstractConfiguration::Keys all_keys; config.keys("", all_keys); @@ -304,8 +342,12 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur std::size_t ldap_servers_key_count = 0; std::size_t kerberos_keys_count = 0; std::size_t http_auth_server_keys_count = 0; + std::size_t jwt_validators_count = 0; + std::size_t token_processors_count = 0; const String http_auth_servers_config = "http_authentication_servers"; + const String jwt_validators_config = "jwt_validators"; + const String token_processors_config = "token_processors"; for (auto key : all_keys) { @@ -318,6 +360,8 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur ldap_servers_key_count += (key == "ldap_servers"); kerberos_keys_count += (key == "kerberos"); http_auth_server_keys_count += (key == http_auth_servers_config); + jwt_validators_count += (key == jwt_validators_config); + token_processors_count += (key == token_processors_config); } if (ldap_servers_key_count > 1) @@ -329,6 +373,12 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur if (http_auth_server_keys_count > 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple http_authentication_servers sections are not allowed"); + if (jwt_validators_count > 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple {} sections are not allowed", jwt_validators_config); + + if (token_processors_count > 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Multiple {} sections are not allowed", token_processors_config); + Poco::Util::AbstractConfiguration::Keys http_auth_server_names; config.keys(http_auth_servers_config, http_auth_server_names); http_auth_servers.clear(); @@ -383,6 +433,11 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur { tryLogCurrentException(log, "Could not parse Kerberos section"); } + + if (token_auth_enabled) + parseTokenProcessors(token_processors, config, token_processors_config, log); + else + LOG_INFO(log, "Token authentication is disabled, skipping token processors configuration"); } static UInt128 computeParamsHash(const LDAPClient::Params & params, const LDAPClient::RoleSearchParamsList * role_search_params) @@ -551,7 +606,7 @@ GSSAcceptorContext::Params ExternalAuthenticators::getKerberosParams() const return kerberos_params.value(); } -HTTPAuthClientParams ExternalAuthenticators::getHTTPAuthenticationParams(const String& server) const +HTTPAuthClientParams ExternalAuthenticators::getHTTPAuthenticationParams(const String & server) const { std::lock_guard lock{mutex}; @@ -561,6 +616,96 @@ HTTPAuthClientParams ExternalAuthenticators::getHTTPAuthenticationParams(const S return it->second; } +bool ExternalAuthenticators::checkCredentialsAgainstProcessor(const ITokenProcessor & processor, + TokenCredentials & credentials) const +{ + if (processor.resolveAndValidate(credentials)) + { + TokenCacheEntry cache_entry; + cache_entry.user_name = credentials.getUserName(); + cache_entry.external_roles = credentials.getGroups(); + + auto default_expiration_ts = std::chrono::system_clock::now() + + std::chrono::minutes(processor.getTokenCacheLifetime()); + + if (credentials.getExpiresAt().has_value()) + { + if (credentials.getExpiresAt().value() < default_expiration_ts) + cache_entry.expires_at = credentials.getExpiresAt().value(); + else + LOG_TRACE(getLogger("AccessTokenAuthentication"), "Attempt to authenticate user {} with expired access token by {}", credentials.getUserName(), processor.getProcessorName()); + + } + else + { + cache_entry.expires_at = default_expiration_ts; + } + + LOG_DEBUG(getLogger("AccessTokenAuthentication"), "Authenticated user {} with access token by {}", credentials.getUserName(), processor.getProcessorName()); + + // CHeck if a cache entry for the same user but with another token exists -- old cache entry is considered outdated and removed + auto old_token_iter = username_to_access_token_cache.find(cache_entry.user_name); + if (old_token_iter != username_to_access_token_cache.end()) + { + access_token_to_username_cache.erase(old_token_iter->second); + username_to_access_token_cache.erase(old_token_iter); + } + + access_token_to_username_cache[credentials.getToken()] = cache_entry; + username_to_access_token_cache[cache_entry.user_name] = credentials.getToken(); + LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} added", cache_entry.user_name); + + return true; + } + LOG_TRACE(getLogger("AccessTokenAuthentication"), "Failed authentication with access token by {}", processor.getProcessorName()); + + return false; +} + +bool ExternalAuthenticators::checkTokenCredentials(const TokenCredentials & credentials, const String & processor_name) const +{ + std::lock_guard lock{mutex}; + + if (!token_auth_enabled) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Token authentication is disabled"); + + if (token_processors.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Token authentication is not configured"); + + /// lookup token in local cache if not expired. + auto cached_entry_iter = access_token_to_username_cache.find(credentials.getToken()); + if (cached_entry_iter != access_token_to_username_cache.end()) + { + if (cached_entry_iter->second.expires_at <= std::chrono::system_clock::now()) // Token found in cache, but already outdated -- need to remove it. + { + LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} expired, removing", cached_entry_iter->second.user_name); + access_token_to_username_cache.erase(cached_entry_iter); + username_to_access_token_cache.erase(cached_entry_iter->second.user_name); + } + else + { + const auto & user_data = cached_entry_iter->second; + const_cast(credentials).setUserName(user_data.user_name); + const_cast(credentials).setGroups(user_data.external_roles); + LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} found, using it to authenticate", cached_entry_iter->second.user_name); + return true; + } + } + + if (processor_name.empty()) + { + for (const auto & it: token_processors) + { + if (checkCredentialsAgainstProcessor(*it.second, const_cast(credentials))) + return true; + } + } + else + return token_processors.contains(processor_name) && checkCredentialsAgainstProcessor(*token_processors[processor_name], const_cast(credentials)); + + return false; +} + bool ExternalAuthenticators::checkHTTPBasicCredentials( const String & server, const BasicCredentials & credentials, const ClientInfo & client_info, SettingsChanges & settings) const { diff --git a/src/Access/ExternalAuthenticators.h b/src/Access/ExternalAuthenticators.h index 6aa26bb3842a..1601903c83a1 100644 --- a/src/Access/ExternalAuthenticators.h +++ b/src/Access/ExternalAuthenticators.h @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include @@ -13,6 +14,7 @@ #include #include +#include #include #include #include @@ -37,7 +39,9 @@ class ExternalAuthenticators { public: void reset(); - void setConfiguration(const Poco::Util::AbstractConfiguration & config, LoggerPtr log); + void setConfiguration(const Poco::Util::AbstractConfiguration & config, LoggerPtr log, bool token_auth_enabled = true); + + bool isTokenAuthEnabled() const; // The name and readiness of the credentials must be verified before calling these. bool checkLDAPCredentials(const String & server, const BasicCredentials & credentials, @@ -45,6 +49,8 @@ class ExternalAuthenticators bool checkKerberosCredentials(const String & realm, const GSSAcceptorContext & credentials) const; bool checkHTTPBasicCredentials(const String & server, const BasicCredentials & credentials, const ClientInfo & client_info, SettingsChanges & settings) const; + bool checkTokenCredentials(const TokenCredentials & credentials, const String & processor_name = "") const; + GSSAcceptorContext::Params getKerberosParams() const; private: @@ -66,6 +72,26 @@ class ExternalAuthenticators mutable LDAPCaches ldap_caches TSA_GUARDED_BY(mutex) ; std::optional kerberos_params TSA_GUARDED_BY(mutex) ; std::unordered_map http_auth_servers TSA_GUARDED_BY(mutex) ; + mutable std::unordered_map> token_processors TSA_GUARDED_BY(mutex) ; + + struct TokenCacheEntry + { + std::chrono::system_clock::time_point expires_at; + String user_name; + std::set external_roles; + }; + + /// Home-made simple bi-mapping, needed to effectively clean up cache from old tokens. + using TokenToUsernameCache = std::unordered_map; // Access token -> cache entry + using UsernameToTokenCache = std::unordered_map; // User name -> access token + + mutable TokenToUsernameCache access_token_to_username_cache TSA_GUARDED_BY(mutex) ; + mutable UsernameToTokenCache username_to_access_token_cache TSA_GUARDED_BY(mutex) ; + + bool token_auth_enabled TSA_GUARDED_BY(mutex) = true; + + bool checkCredentialsAgainstProcessor(const ITokenProcessor & processor, + TokenCredentials & credentials) const TSA_REQUIRES(mutex); void resetImpl() TSA_REQUIRES(mutex); }; diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index df9d8ceb205b..dec29187ee54 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -11,6 +11,7 @@ #include #include #include +#include "Access/Common/AuthenticationType.h" #include #include #include @@ -33,7 +34,11 @@ namespace ErrorCodes extern const int ACCESS_ENTITY_NOT_FOUND; extern const int ACCESS_STORAGE_READONLY; extern const int ACCESS_STORAGE_DOESNT_ALLOW_BACKUP; +<<<<<<< HEAD extern const int REQUIRED_SECOND_FACTOR; +======= + extern const int AUTHENTICATION_FAILED; +>>>>>>> 58b404dc6a5 (Merge pull request #1078 from Altinity/oauth-antalya-25.8) extern const int WRONG_PASSWORD; extern const int IP_ADDRESS_NOT_ALLOWED; extern const int LOGICAL_ERROR; @@ -546,6 +551,9 @@ std::optional IAccessStorage::authenticateImpl( bool allow_no_password, bool allow_plaintext_password) const { + if (typeid_cast(&credentials) && !typeid_cast(&credentials)->isReady()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Could not resolve username from token"); + if (auto id = find(credentials.getUserName())) { if (auto user = tryRead(*id)) diff --git a/src/Access/TokenAccessStorage.cpp b/src/Access/TokenAccessStorage.cpp new file mode 100644 index 000000000000..e17bc7159cef --- /dev/null +++ b/src/Access/TokenAccessStorage.cpp @@ -0,0 +1,591 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; +} + +namespace +{ + struct ParsedTransform + { + String pattern; + String replacement; + bool global; + }; + + /// Unescape a string segment + String unescapeSegment(const String & str, size_t start, size_t end) + { + String result; + result.reserve(end - start); + bool escaped = false; + + for (size_t i = start; i < end; ++i) + { + if (escaped) + { + result += str[i]; + escaped = false; + } + else if (str[i] == '\\') + escaped = true; + else + result += str[i]; + } + + return result; + } + + /// Parse sed-style transform pattern: s/pattern/replacement/flags + ParsedTransform parseSedTransform(const String & transform) + { + if (transform.size() < 4 || transform[0] != 's' || transform[1] != '/') + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid roles_transform format. Expected sed-style pattern like 's/pattern/replacement/g'"); + } + + bool escaped = false; + size_t first_slash = 1; + size_t second_slash = String::npos; + size_t third_slash = String::npos; + + // Find delimiters using simple state machine + for (size_t i = first_slash + 1; i < transform.size(); ++i) + { + if (escaped) + { + escaped = false; + continue; + } + + if (transform[i] == '\\') + { + escaped = true; + continue; + } + + if (transform[i] == '/') + { + if (second_slash == String::npos) + second_slash = i; + else if (third_slash == String::npos) + third_slash = i; + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid roles_transform format. Too many unescaped slashes. Expected sed-style pattern like 's/pattern/replacement/g'"); + } + } + + if (second_slash == String::npos || third_slash == String::npos) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Invalid roles_transform format. Expected sed-style pattern like 's/pattern/replacement/g'"); + + ParsedTransform result; + + result.pattern = unescapeSegment(transform, first_slash + 1, second_slash); + + size_t replacement_end = (third_slash != String::npos) ? third_slash : transform.size(); + result.replacement = unescapeSegment(transform, second_slash + 1, replacement_end); + + String flags = transform.substr(third_slash + 1); + result.global = (flags.find('g') != String::npos); + + return result; + } + + String applyTransform(const String & input, const String & pattern, const String & replacement, bool global) + { + if (pattern.empty()) + return input; + + re2::RE2 re(pattern); + if (!re.ok()) + return input; + + String result = input; + if (global) + { + RE2::GlobalReplace(&result, re, replacement); + } + else + { + RE2::Replace(&result, re, replacement); + } + return result; + } +} + +TokenAccessStorage::TokenAccessStorage(const String & storage_name_, AccessControl & access_control_, const Poco::Util::AbstractConfiguration & config_, const String & prefix_) + : IAccessStorage(storage_name_), access_control(access_control_), config(config_), prefix(prefix_), + memory_storage(storage_name_, access_control.getChangesNotifier(), false) +{ + std::lock_guard lock(mutex); + + const String prefix_str = (prefix.empty() ? "" : prefix + "."); + + if (config.has(prefix_str + "roles_filter")) + roles_filter.emplace(config.getString(prefix_str + "roles_filter")); + + if (config.has(prefix_str + "roles_transform")) + { + String transform = config.getString(prefix_str + "roles_transform"); + ParsedTransform parsed = parseSedTransform(transform); + roles_transform_pattern = parsed.pattern; + roles_transform_replacement = parsed.replacement; + roles_transform_global = parsed.global; + } + + provider_name = config.getString(prefix_str + "processor"); + if (provider_name.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "'processor' must be specified for Token user directory"); + + std::set common_roles_cfg; + if (config.has(prefix_str + "common_roles")) + { + Poco::Util::AbstractConfiguration::Keys role_names; + config.keys(prefix_str + "common_roles", role_names); + + common_roles_cfg.insert(role_names.begin(), role_names.end()); + } + common_role_names.swap(common_roles_cfg); + + if (config.has(prefix_str + "default_profile")) + default_profile_name = config.getString(prefix_str + "default_profile"); + + user_external_roles.clear(); + users_per_roles.clear(); + roles_per_users.clear(); + granted_role_names.clear(); + granted_role_ids.clear(); + + role_change_subscription = access_control.subscribeForChanges( + [this] (const UUID & id, const AccessEntityPtr & entity) + { + this->processRoleChange(id, entity); + } + ); +} + +void TokenAccessStorage::applyRoleChangeNoLock(bool grant, const UUID & role_id, const String & role_name) +{ + std::vector user_ids; + + // Build a list of ids of the relevant users. + if (common_role_names.contains(role_name)) + { + user_ids = memory_storage.findAll(); + } + else + { + const auto it = users_per_roles.find(role_name); + if (it != users_per_roles.end()) + { + const auto & user_names = it->second; + user_ids.reserve(user_names.size()); + + for (const auto & user_name : user_names) + { + if (const auto user_id = memory_storage.find(user_name)) + user_ids.emplace_back(*user_id); + } + } + } + + // Update the granted roles of the relevant users. + if (!user_ids.empty()) + { + auto update_func = [&role_id, &grant] (const AccessEntityPtr & entity_, const UUID &) -> AccessEntityPtr + { + if (auto user = typeid_cast>(entity_)) + { + auto changed_user = typeid_cast>(user->clone()); + if (grant) + changed_user->granted_roles.grant(role_id); + else + changed_user->granted_roles.revoke(role_id); + return changed_user; + } + return entity_; + }; + + memory_storage.update(user_ids, update_func); + } + + // Actualize granted_role_* mappings. + if (grant) + { + if (!user_ids.empty()) + { + granted_role_names.insert_or_assign(role_id, role_name); + granted_role_ids.insert_or_assign(role_name, role_id); + } + } + else + { + granted_role_ids.erase(role_name); + granted_role_names.erase(role_id); + } +} + +void TokenAccessStorage::processRoleChange(const UUID & id, const AccessEntityPtr & entity) +{ + std::lock_guard lock(mutex); + const auto role = typeid_cast>(entity); + const auto it = granted_role_names.find(id); + + if (role) // Added or renamed a role. + { + const auto & new_role_name = role->getName(); + if (it != granted_role_names.end()) // Renamed a granted role. + { + const auto & old_role_name = it->second; + if (new_role_name != old_role_name) + { + // Revoke the old role first, then grant the new role. + applyRoleChangeNoLock(false /* revoke */, id, old_role_name); + applyRoleChangeNoLock(true /* grant */, id, new_role_name); + } + } + else // Added a role. + { + applyRoleChangeNoLock(true /* grant */, id, new_role_name); + } + } + else // Removed a role. + { + if (it != granted_role_names.end()) // Removed a granted role. + { + const auto & old_role_name = it->second; + applyRoleChangeNoLock(false /* revoke */, id, old_role_name); + } + } +} + +const char * TokenAccessStorage::getStorageType() const +{ + return STORAGE_TYPE; +} + +bool TokenAccessStorage::exists(const UUID & id) const +{ + std::lock_guard lock(mutex); + return memory_storage.exists(id); +} + +String TokenAccessStorage::getStorageParamsJSON() const +{ + std::lock_guard lock(mutex); + Poco::JSON::Object params_json; + + params_json.set("provider", provider_name); + + Poco::JSON::Array common_role_names_json; + for (const auto & role : common_role_names) + { + common_role_names_json.add(role); + } + params_json.set("roles", common_role_names_json); + + std::ostringstream oss; // STYLE_CHECK_ALLOW_STD_STRING_STREAM + oss.exceptions(std::ios::failbit); + Poco::JSON::Stringifier::stringify(params_json, oss); + + return oss.str(); +} + +bool TokenAccessStorage::areTokenCredentialsValidNoLock(const User & user, const Credentials & credentials, const ExternalAuthenticators & external_authenticators) const +{ + if (!credentials.isReady()) + return false; + + if (credentials.getUserName() != user.getName()) + return false; + + if (const auto * token_credentials = dynamic_cast(&credentials)) + return external_authenticators.checkTokenCredentials(*token_credentials); + + return false; +} + +std::optional TokenAccessStorage::findImpl(AccessEntityType type, const String & name) const +{ + std::lock_guard lock(mutex); + return memory_storage.find(type, name); +} + + +std::vector TokenAccessStorage::findAllImpl(AccessEntityType type) const +{ + std::lock_guard lock(mutex); + return memory_storage.findAll(type); +} + +AccessEntityPtr TokenAccessStorage::readImpl(const UUID & id, bool throw_if_not_exists) const +{ + std::lock_guard lock(mutex); + return memory_storage.read(id, throw_if_not_exists); +} + +std::optional> TokenAccessStorage::readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const +{ + std::lock_guard lock(mutex); + return memory_storage.readNameWithType(id, throw_if_not_exists); +} + +void TokenAccessStorage::assignRolesNoLock(User & user, const std::set & external_roles) const +{ + const auto & user_name = user.getName(); + auto & granted_roles = user.granted_roles; + + auto grant_role = [this, &user_name, &granted_roles] (const String & role_name, const bool common) + { + auto it = granted_role_ids.find(role_name); + if (it == granted_role_ids.end()) + { + if (const auto role_id = access_control.find(role_name)) + { + granted_role_names.insert_or_assign(*role_id, role_name); + it = granted_role_ids.insert_or_assign(role_name, *role_id).first; + } + } + + if (it != granted_role_ids.end()) + { + const auto & role_id = it->second; + granted_roles.grant(role_id); + } + else + { + LOG_TRACE(getLogger(), "Did not grant {} role '{}' to user '{}': role not found", (common ? "common" : "mapped"), role_name, user_name); + } + }; + + user_external_roles.erase(user_name); + granted_roles = {}; + const auto old_role_names = std::move(roles_per_users[user_name]); + + // Grant the common roles first. + for (const auto & role_name : common_role_names) + { + grant_role(role_name, true /* common */); + } + + // Grant the mapped external roles and actualize users_per_roles mapping. + // external_roles allowed to overlap with common_role_names. + for (const auto & role_name : external_roles) + { + grant_role(role_name, false /* mapped */); + users_per_roles[role_name].insert(user_name); + } + + // Cleanup users_per_roles and granted_role_* mappings. + for (const auto & old_role_name : old_role_names) + { + if (external_roles.contains(old_role_name)) + continue; + + const auto rit = users_per_roles.find(old_role_name); + if (rit == users_per_roles.end()) + continue; + + auto & user_names = rit->second; + user_names.erase(user_name); + + if (!user_names.empty()) + continue; + + users_per_roles.erase(rit); + + if (common_role_names.contains(old_role_name)) + continue; + + const auto iit = granted_role_ids.find(old_role_name); + if (iit == granted_role_ids.end()) + continue; + + const auto old_role_id = iit->second; + granted_role_names.erase(old_role_id); + granted_role_ids.erase(iit); + } + + // Actualize roles_per_users mapping and user_external_roles cache. + if (external_roles.empty()) + roles_per_users.erase(user_name); + else + roles_per_users[user_name] = external_roles; + + user_external_roles[user_name] = external_roles; +} + +void TokenAccessStorage::assignProfileNoLock(User & user) const +{ + if (default_profile_name.empty()) + return; + + const auto & user_name = user.getName(); + auto & settings = user.settings; + + // Look up the profile ID once + const auto profile_id = access_control.find(default_profile_name); + if (!profile_id) + { + LOG_TRACE(getLogger(), "Did not assign profile '{}' to user '{}': profile not found", default_profile_name, user_name); + return; + } + + // Check if profile is already assigned + bool profile_already_assigned = false; + for (const auto & element : settings) + { + if (element.parent_profile.has_value() && element.parent_profile == *profile_id) + { + profile_already_assigned = true; + break; + } + } + + if (!profile_already_assigned) + { + SettingsProfileElement profile_element; + profile_element.parent_profile = *profile_id; + settings.push_back(std::move(profile_element)); + LOG_TRACE(getLogger(), "Assigned profile '{}' to user '{}'", default_profile_name, user_name); + } +} + +void TokenAccessStorage::updateAssignedRolesNoLock(const UUID & id, const String & user_name, const std::set & external_roles) const +{ + // Map and grant the roles from scratch only if the list of external role has changed. + const auto it = user_external_roles.find(user_name); + if (it != user_external_roles.end() && it->second == external_roles) + return; + + auto update_func = [this, &external_roles] (const AccessEntityPtr & entity_, const UUID &) -> AccessEntityPtr + { + if (auto user = typeid_cast>(entity_)) + { + auto changed_user = typeid_cast>(user->clone()); + assignRolesNoLock(*changed_user, external_roles); + return changed_user; + } + return entity_; + }; + + memory_storage.update(id, update_func); +} + + +std::optional TokenAccessStorage::authenticateImpl( + const Credentials & credentials, + const Poco::Net::IPAddress & address, + const ExternalAuthenticators & external_authenticators, + const ClientInfo & /* client_info */, + bool throw_if_user_not_exists, + bool /* allow_no_password */, + bool /* allow_plaintext_password */) const +{ + std::lock_guard lock(mutex); + auto id = memory_storage.find(credentials.getUserName()); + UserPtr user = id ? memory_storage.read(*id) : nullptr; + + const auto & token_credentials = typeid_cast(credentials); + + if (!external_authenticators.checkTokenCredentials(token_credentials, provider_name)) + { + // Even though token itself may be valid (especially in case of a jwt token), authentication has just failed. + if (throw_if_user_not_exists) + throwNotFound(AccessEntityType::USER, credentials.getUserName(), getStorageName()); + + return {}; + } + + std::shared_ptr new_user; + if (!user) + { + // User does not exist, so we create one, and will add it if authentication is successful. + new_user = std::make_shared(); + new_user->setName(credentials.getUserName()); + new_user->authentication_methods.emplace_back(AuthenticationType::JWT); + user = new_user; + } + + if (!isAddressAllowed(*user, address)) + throwAddressNotAllowed(address); + + std::set external_roles; + if (roles_filter.has_value() && roles_filter.value().ok()) + { + LOG_TRACE(getLogger(), "{}: External role filter found, applying only matching groups", getStorageName()); + for (const auto & group: token_credentials.getGroups()) { + if (RE2::FullMatch(group, roles_filter.value())) + { + String transformed_group = group; + if (roles_transform_pattern.has_value() && roles_transform_replacement.has_value()) + { + transformed_group = applyTransform(group, roles_transform_pattern.value(), roles_transform_replacement.value(), roles_transform_global); + LOG_TRACE(getLogger(), "{}: Transformed group '{}' to '{}'", getStorageName(), group, transformed_group); + } + external_roles.insert(transformed_group); + LOG_TRACE(getLogger(), "{}: Granted role (group) {} to user", getStorageName(), transformed_group); + } + } + } + else + { + LOG_TRACE(getLogger(), "{}: No external role filtering set, applying all available groups", getStorageName()); + for (const auto & group: token_credentials.getGroups()) + { + String transformed_group = group; + if (roles_transform_pattern.has_value() && roles_transform_replacement.has_value()) + { + transformed_group = applyTransform(group, roles_transform_pattern.value(), roles_transform_replacement.value(), roles_transform_global); + LOG_TRACE(getLogger(), "{}: Transformed group '{}' to '{}'", getStorageName(), group, transformed_group); + } + external_roles.insert(transformed_group); + } + } + + if (new_user) + { + assignRolesNoLock(*new_user, external_roles); + assignProfileNoLock(*new_user); + id = memory_storage.insert(new_user); + } + else + { + // Just in case external_roles are changed. + updateAssignedRolesNoLock(*id, user->getName(), external_roles); + + // Also update profile if needed + memory_storage.update(*id, [this] (const AccessEntityPtr & entity_, const UUID &) -> AccessEntityPtr + { + if (auto user_entity = typeid_cast>(entity_)) + { + auto changed_user = typeid_cast>(user_entity->clone()); + assignProfileNoLock(*changed_user); + return changed_user; + } + return entity_; + }); + } + + if (id) + return AuthResult{ .user_id = *id, .authentication_data = AuthenticationData(AuthenticationType::JWT) }; + return std::nullopt; +} + + +} diff --git a/src/Access/TokenAccessStorage.h b/src/Access/TokenAccessStorage.h new file mode 100644 index 000000000000..aedf8843f2b9 --- /dev/null +++ b/src/Access/TokenAccessStorage.h @@ -0,0 +1,88 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace Poco +{ + namespace Util + { + class AbstractConfiguration; + } +} + + +namespace DB +{ +class AccessControl; + +/// Implementation of IAccessStorage which allows to import user data from oauth server using access token. +/// Normally, this should be unified with LDAPAccessStorage, but not done to minimize changes to code that is common with upstream. +class TokenAccessStorage : public IAccessStorage +{ +public: + static constexpr char STORAGE_TYPE[] = "token"; + + explicit TokenAccessStorage(const String & storage_name_, AccessControl & access_control_, const Poco::Util::AbstractConfiguration & config, const String & prefix); + ~TokenAccessStorage() override = default; + + // IAccessStorage implementations. + const char * getStorageType() const override; + String getStorageParamsJSON() const override; + bool isReadOnly() const override { return true; } + bool exists(const UUID & id) const override; + +private: + mutable std::recursive_mutex mutex; // Note: Reentrance possible by internal role lookup via access_control + AccessControl & access_control; + const Poco::Util::AbstractConfiguration & config; + const String & prefix; + + String provider_name; + std::optional roles_filter = std::nullopt; + std::optional roles_transform_pattern = std::nullopt; + std::optional roles_transform_replacement = std::nullopt; + bool roles_transform_global = false; + + std::set common_role_names; // role name that should be granted to all users at all times + String default_profile_name; // settings profile name that should be assigned to all users + mutable std::map> user_external_roles; + mutable std::map> users_per_roles; // role name -> user names (...it should be granted to; may but don't have to exist for common roles) + mutable std::map> roles_per_users; // user name -> role names (...that should be granted to it; may but don't have to include common roles) + mutable std::map granted_role_names; // (currently granted) role id -> its name + mutable std::map granted_role_ids; // (currently granted) role name -> its id + mutable MemoryAccessStorage memory_storage; + scope_guard role_change_subscription; + + void processRoleChange(const UUID & id, const AccessEntityPtr & entity); + + bool areTokenCredentialsValidNoLock(const User & user, const Credentials & credentials, const ExternalAuthenticators & external_authenticators) const; + + void applyRoleChangeNoLock(bool grant, const UUID & role_id, const String & role_name); + void assignRolesNoLock(User & user, const std::set & external_roles) const; + void assignProfileNoLock(User & user) const; + void updateAssignedRolesNoLock(const UUID & id, const String & user_name, const std::set & external_roles) const; + +protected: + std::optional findImpl(AccessEntityType type, const String & name) const override; + std::vector findAllImpl(AccessEntityType type) const override; + AccessEntityPtr readImpl(const UUID & id, bool throw_if_not_exists) const override; + std::optional> readNameWithTypeImpl(const UUID & id, bool throw_if_not_exists) const override; + std::optional authenticateImpl(const Credentials & credentials, + const Poco::Net::IPAddress & address, + const ExternalAuthenticators & external_authenticators, + const ClientInfo & client_info, + bool throw_if_user_not_exists, + bool allow_no_password, + bool allow_plaintext_password) const override; +}; +} diff --git a/src/Access/TokenProcessors.h b/src/Access/TokenProcessors.h new file mode 100644 index 000000000000..c898bfff15d4 --- /dev/null +++ b/src/Access/TokenProcessors.h @@ -0,0 +1,228 @@ +#pragma once + +#include +#include + +#if USE_JWT_CPP +#include +#include +#include +#endif + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int NOT_IMPLEMENTED; +} + +class ITokenProcessor +{ +public: + explicit ITokenProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_ = "sub", + const String & groups_claim_ = "groups") + : processor_name(processor_name_), token_cache_lifetime(token_cache_lifetime_), username_claim(username_claim_), groups_claim(groups_claim_) {} + virtual ~ITokenProcessor() = default; + + virtual bool resolveAndValidate(TokenCredentials &) const + { + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for ITokenProcessor interface"); + } + + virtual bool checkClaims(const TokenCredentials &, const String &) { return true; } + + UInt64 getTokenCacheLifetime() const { return token_cache_lifetime; } + String getProcessorName() const { return processor_name; } + + static std::unique_ptr parseTokenProcessor( + const Poco::Util::AbstractConfiguration & config, + const String & prefix, + const String & processor_name); + +protected: + const String processor_name; + const UInt64 token_cache_lifetime; + const String username_claim; + const String groups_claim; +}; + +#if USE_JWT_CPP + +struct StaticKeyJwtParams +{ + /// Algorithm name (required). Supported: "none", "hs256", "hs384", "hs512", + /// "ps256", "ps384", "ps512", "ed25519", "ed448", "rs256", "rs384", "rs512", + /// "es256", "es256k", "es384", "es512" + String algo; + + /// For HS algorithms (hs256, hs384, hs512): symmetric key (required for HS algorithms) + String static_key; + + /// For HS algorithms: whether static_key is base64 encoded (optional, defaults to false) + bool static_key_in_base64 = false; + + /// For PS/ED/RSA/ES algorithms: public key (required for PS/ED/RSA/ES algorithms) + String public_key; + + /// For PS/ED/RSA/ES algorithms: private key (optional) + String private_key; + + /// For PS/ED/RSA/ES algorithms: public key password (optional) + String public_key_password; + + /// For PS/ED/RSA/ES algorithms: private key password (optional) + String private_key_password; + + /// JWT claims to validate (optional) + String claims; +}; + +class StaticKeyJwtProcessor : public ITokenProcessor +{ +public: + explicit StaticKeyJwtProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_issuer_, + const String & expected_audience_, + bool allow_no_expiration_, + const StaticKeyJwtParams & params); + + bool resolveAndValidate(TokenCredentials & credentials) const override; + bool checkClaims(const TokenCredentials & credentials, const String & claims_to_check) override; + +private: + const String claims; + const String expected_issuer; + const String expected_audience; + const bool allow_no_expiration; + jwt::verifier verifier = jwt::verify(); +}; + + +class JwksJwtProcessor : public ITokenProcessor +{ +public: + explicit JwksJwtProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_issuer_, + const String & expected_audience_, + bool allow_no_expiration_, + const String & claims_, + size_t verifier_leeway_, + std::shared_ptr provider_) + : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), + claims(claims_), expected_issuer(expected_issuer_), expected_audience(expected_audience_), + allow_no_expiration(allow_no_expiration_), provider(provider_), verifier_leeway(verifier_leeway_) {} + + explicit JwksJwtProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_issuer_, + const String & expected_audience_, + bool allow_no_expiration_, + const String & claims_, + size_t verifier_leeway_, + const String & jwks_uri_, + size_t jwks_cache_lifetime_) + : JwksJwtProcessor(processor_name_, + token_cache_lifetime_, + username_claim_, + groups_claim_, + expected_issuer_, + expected_audience_, + allow_no_expiration_, + claims_, + verifier_leeway_, + std::make_shared(jwks_uri_, jwks_cache_lifetime_)) {} + + bool resolveAndValidate(TokenCredentials & credentials) const override; + bool checkClaims(const TokenCredentials & credentials, const String & claims_to_check) override; + +private: + const String claims; + const String expected_issuer; + const String expected_audience; + const bool allow_no_expiration; + mutable jwt::verifier verifier = jwt::verify(); + std::shared_ptr provider; + const size_t verifier_leeway; +}; + +/// Opaque tokens + +class GoogleTokenProcessor : public ITokenProcessor +{ +public: + GoogleTokenProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_) + : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_) {} + + bool resolveAndValidate(TokenCredentials & credentials) const override; +}; + +class AzureTokenProcessor : public ITokenProcessor +{ +public: + AzureTokenProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_) + : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_) {} + + bool resolveAndValidate(TokenCredentials & credentials) const override; +}; + +class OpenIdTokenProcessor : public ITokenProcessor +{ +public: + /// Specify endpoints manually + OpenIdTokenProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_issuer_, + const String & expected_audience_, + bool allow_no_expiration_, + const String & userinfo_endpoint_, + const String & token_introspection_endpoint_, + UInt64 verifier_leeway_, + const String & jwks_uri_, + UInt64 jwks_cache_lifetime_); + + /// Obtain endpoints from openid-configuration URL + OpenIdTokenProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_issuer_, + const String & expected_audience_, + bool allow_no_expiration_, + const String & openid_config_endpoint_, + UInt64 verifier_leeway_, + UInt64 jwks_cache_lifetime_); + + bool resolveAndValidate(TokenCredentials & credentials) const override; +private: + const String expected_issuer; + const String expected_audience; + const bool allow_no_expiration; + Poco::URI userinfo_endpoint; + Poco::URI token_introspection_endpoint; + + /// Access token is often a valid JWT, so we can validate it locally to avoid unnecesary network requests. + std::optional jwt_validator = std::nullopt; +}; + +#endif + +} diff --git a/src/Access/TokenProcessorsJWT.cpp b/src/Access/TokenProcessorsJWT.cpp new file mode 100644 index 000000000000..e041e4329b24 --- /dev/null +++ b/src/Access/TokenProcessorsJWT.cpp @@ -0,0 +1,440 @@ +#include "TokenProcessors.h" + +#if USE_JWT_CPP +#include +#include +#include + +namespace DB { + +namespace ErrorCodes +{ + extern const int AUTHENTICATION_FAILED; + extern const int INVALID_CONFIG_PARAMETER; +} + +namespace +{ + +bool check_claims(const picojson::value & claims, const picojson::value & payload, const String & path); +bool check_claims(const picojson::value::object & claims, const picojson::value::object & payload, const String & path) +{ + for (const auto & it : claims) + { + const auto & payload_it = payload.find(it.first); + if (payload_it == payload.end()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "Key '{}.{}' not found in JWT payload", path, it.first); + return false; + } + if (!check_claims(it.second, payload_it->second, path + "." + it.first)) + { + return false; + } + } + return true; +} + +bool check_claims(const picojson::value::array & claims, const picojson::value::array & payload, const String & path) +{ + if (claims.size() > payload.size()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload too small for claims key '{}'", path); + return false; + } + for (size_t claims_i = 0; claims_i < claims.size(); ++claims_i) + { + bool found = false; + const auto & claims_val = claims.at(claims_i); + for (const auto & payload_val : payload) + { + if (!check_claims(claims_val, payload_val, path + "[" + std::to_string(claims_i) + "]")) + continue; + found = true; + } + if (!found) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not contain an object matching claims key '{}[{}]'", path, claims_i); + return false; + } + } + return true; +} + +bool check_claims(const picojson::value & claims, const picojson::value & payload, const String & path) +{ + if (claims.is()) + { + if (!payload.is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match key type 'array' in claims '{}'", path); + return false; + } + return check_claims(claims.get(), payload.get(), path); + } + if (claims.is()) + { + if (!payload.is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match key type 'object' in claims '{}'", path); + return false; + } + return check_claims(claims.get(), payload.get(), path); + } + if (claims.is()) + { + if (!payload.is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match key type 'bool' in claims '{}'", path); + return false; + } + if (claims.get() != payload.get()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match the value in the '{}' assertions. Expected '{}' but given '{}'", path, claims.get(), payload.get()); + return false; + } + return true; + } + if (claims.is()) + { + if (!payload.is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match key type 'double' in claims '{}'", path); + return false; + } + if (claims.get() != payload.get()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match the value in the '{}' assertions. Expected '{}' but given '{}'", path, claims.get(), payload.get()); + return false; + } + return true; + } + if (claims.is()) + { + if (!payload.is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match key type 'std::string' in claims '{}'", path); + return false; + } + if (claims.get() != payload.get()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match the value in the '{}' assertions. Expected '{}' but given '{}'", path, claims.get(), payload.get()); + return false; + } + return true; + } +#ifdef PICOJSON_USE_INT64 + if (claims.is()) + { + if (!payload.is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match key type 'int64_t' in claims '{}'", path); + return false; + } + if (claims.get() != payload.get()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match the value in claims '{}'. Expected '{}' but given '{}'", path, claims.get(), payload.get()); + return false; + } + return true; + } +#endif + LOG_ERROR(getLogger("TokenAuthentication"), "JWT claim '{}' does not match any known type", path); + return false; +} + +bool check_claims(const String & claims, const picojson::value::object & payload) +{ + if (claims.empty()) + return true; + picojson::value json; + auto errors = picojson::parse(json, claims); + if (!errors.empty()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Bad JWT claims: {}", errors); + if (!json.is()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Bad JWT claims: is not an object"); + return check_claims(json.get(), payload, ""); +} + +} + +namespace +{ +std::set parseGroupsFromJsonArray(picojson::array groups_array) +{ + std::set external_groups_names; + + for (const auto & group : groups_array) + { + if (group.is()) + external_groups_names.insert(group.get()); + } + + return external_groups_names; +} +} + +StaticKeyJwtProcessor::StaticKeyJwtProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_issuer_, + const String & expected_audience_, + bool allow_no_expiration_, + const StaticKeyJwtParams & params) + : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), + claims(params.claims), expected_issuer(expected_issuer_), expected_audience(expected_audience_), + allow_no_expiration(allow_no_expiration_) +{ + const String & algo = params.algo; + const String & static_key = params.static_key; + bool static_key_in_base64 = params.static_key_in_base64; + const String & public_key = params.public_key; + const String & private_key = params.private_key; + const String & public_key_password = params.public_key_password; + const String & private_key_password = params.private_key_password; + + if (algo == "ps256" || + algo == "ps384" || + algo == "ps512" || + algo == "ed25519" || + algo == "ed448" || + algo == "rs256" || + algo == "rs384" || + algo == "rs512" || + algo == "es256" || + algo == "es256k" || + algo == "es384" || + algo == "es512" ) + { + if (public_key.empty()) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "{}: Invalid token processor definition, `public_key` parameter required for {}", processor_name, algo); + } + else if (algo == "hs256" || + algo == "hs384" || + algo == "hs512" ) + { + if (static_key.empty()) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "{}: Invalid token processor definition, `static_key` parameter required for {}", processor_name, algo); + } + else if (algo != "none") + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "{}: Invalid token processor definition, unknown algorithm {}", processor_name, algo); + + if (algo == "none") + verifier = verifier.allow_algorithm(jwt::algorithm::none()); + else if (algo == "ps256") + verifier = verifier.allow_algorithm(jwt::algorithm::ps256(public_key, private_key, public_key_password, private_key_password)); + else if (algo == "ps384") + verifier = verifier.allow_algorithm(jwt::algorithm::ps384(public_key, private_key, public_key_password, private_key_password)); + else if (algo == "ps512") + verifier = verifier.allow_algorithm(jwt::algorithm::ps512(public_key, private_key, public_key_password, private_key_password)); + else if (algo == "ed25519") + verifier = verifier.allow_algorithm(jwt::algorithm::ed25519(public_key, private_key, public_key_password, private_key_password)); + else if (algo == "ed448") + verifier = verifier.allow_algorithm(jwt::algorithm::ed448(public_key, private_key, public_key_password, private_key_password)); + else if (algo == "rs256") + verifier = verifier.allow_algorithm(jwt::algorithm::rs256(public_key, private_key, public_key_password, private_key_password)); + else if (algo == "rs384") + verifier = verifier.allow_algorithm(jwt::algorithm::rs384(public_key, private_key, public_key_password, private_key_password)); + else if (algo == "rs512") + verifier = verifier.allow_algorithm(jwt::algorithm::rs512(public_key, private_key, public_key_password, private_key_password)); + else if (algo == "es256") + verifier = verifier.allow_algorithm(jwt::algorithm::es256(public_key, private_key, public_key_password, private_key_password)); + else if (algo == "es256k") + verifier = verifier.allow_algorithm(jwt::algorithm::es256k(public_key, private_key, public_key_password, private_key_password)); + else if (algo == "es384") + verifier = verifier.allow_algorithm(jwt::algorithm::es384(public_key, private_key, public_key_password, private_key_password)); + else if (algo == "es512") + verifier = verifier.allow_algorithm(jwt::algorithm::es512(public_key, private_key, public_key_password, private_key_password)); + else if (algo.starts_with("hs")) + { + auto key = static_key; + if (static_key_in_base64) + key = base64Decode(key); + if (algo == "hs256") + verifier = verifier.allow_algorithm(jwt::algorithm::hs256(key)); + else if (algo == "hs384") + verifier = verifier.allow_algorithm(jwt::algorithm::hs384(key)); + else if (algo == "hs512") + verifier = verifier.allow_algorithm(jwt::algorithm::hs512(key)); + else + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "{}: Invalid token processor definition, unknown algorithm {}", processor_name, algo); + } + else + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "{}: Invalid token processor definition, unknown algorithm {}", processor_name, algo); + + if (!expected_issuer.empty()) + verifier = verifier.with_issuer(expected_issuer); + + if (!expected_audience.empty()) + verifier = verifier.with_audience(expected_audience); +} + +namespace +{ +bool checkUserClaims(const TokenCredentials & credentials, const String & claims_to_check) +{ + try { + auto decoded_jwt = jwt::decode(credentials.getToken()); + return check_claims(claims_to_check, decoded_jwt.get_payload_json()); + } + catch (const std::exception &) + { + return false; + } +} +} + +bool StaticKeyJwtProcessor::checkClaims(const TokenCredentials & credentials, const String & claims_to_check) +{ + return checkUserClaims(credentials, claims_to_check); +} + +bool JwksJwtProcessor::checkClaims(const TokenCredentials & credentials, const String & claims_to_check) +{ + return checkUserClaims(credentials, claims_to_check); +} + +bool StaticKeyJwtProcessor::resolveAndValidate(TokenCredentials & credentials) const +{ + try + { + auto decoded_jwt = jwt::decode(credentials.getToken()); + verifier.verify(decoded_jwt); + + if (!allow_no_expiration && !decoded_jwt.has_expires_at()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Token missing 'exp' claim, rejecting", processor_name); + return false; + } + + if (!check_claims(claims, decoded_jwt.get_payload_json())) + return false; + + if (!decoded_jwt.has_payload_claim(username_claim)) + { + LOG_ERROR(getLogger("TokenAuthentication"), "{}: Specified username_claim {} not found in token", processor_name, username_claim); + return false; + } + + credentials.setUserName(decoded_jwt.get_payload_claim(username_claim).as_string()); + + if (decoded_jwt.has_payload_claim(groups_claim)) + credentials.setGroups(parseGroupsFromJsonArray(decoded_jwt.get_payload_claim(groups_claim).as_array())); + else + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Specified groups_claim {} not found in token, no external roles will be mapped", processor_name, groups_claim); + + return true; + } + catch (const std::exception & ex) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Failed to validate JWT: {}", processor_name, ex.what()); + return false; + } +} + +bool JwksJwtProcessor::resolveAndValidate(TokenCredentials & credentials) const +{ + auto decoded_jwt = jwt::decode(credentials.getToken()); + + if (!allow_no_expiration && !decoded_jwt.has_expires_at()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Token missing 'exp' claim, rejecting", processor_name); + return false; + } + + if (!decoded_jwt.has_payload_claim(username_claim)) + { + LOG_ERROR(getLogger("TokenAuthentication"), "{}: Specified username_claim not found in token", processor_name); + return false; + } + + if (!decoded_jwt.has_key_id()) + { + LOG_ERROR(getLogger("TokenAuthentication"), "{}: 'kid' (key ID) claim not found in token", processor_name); + return false; + } + + if (!provider->getJWKS().has_jwk(decoded_jwt.get_key_id())) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWKS error: no JWK found for JWT"); + + auto jwk = provider->getJWKS().get_jwk(decoded_jwt.get_key_id()); + auto username = decoded_jwt.get_payload_claim(username_claim).as_string(); + + if (!decoded_jwt.has_algorithm()) + { + LOG_ERROR(getLogger("TokenAuthentication"), "{}: Algorithm not specified in token", processor_name); + return false; + } + auto algo = Poco::toLower(decoded_jwt.get_algorithm()); + + + String public_key; + + try + { + auto x5c = jwk.get_x5c_key_value(); + + if (!x5c.empty()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Verifying {} with 'x5c' key", processor_name, username); + public_key = jwt::helper::convert_base64_der_to_pem(x5c); + } + } + catch (const jwt::error::claim_not_present_exception &) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: x5c was not specified in JWK, will try RSA components", processor_name); + } + catch (const std::bad_cast &) + { + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: invalid claim value type found, claims must be strings"); + } + + if (public_key.empty()) + { + if (!(jwk.has_jwk_claim("n") && jwk.has_jwk_claim("e"))) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "{}: invalid JWK: 'n' or 'e' not found", processor_name); + LOG_TRACE(getLogger("TokenAuthentication"), "{}: `issuer` or `x5c` not present, verifying {} with RSA components", processor_name, username); + const auto modulus = jwk.get_jwk_claim("n").as_string(); + const auto exponent = jwk.get_jwk_claim("e").as_string(); + public_key = jwt::helper::create_public_key_from_rsa_components(modulus, exponent); + } + + if (jwk.has_algorithm() && Poco::toLower(jwk.get_algorithm()) != algo) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT validation error: `alg` in JWK does not match the algorithm used in JWT"); + + if (algo == "rs256") + verifier = verifier.allow_algorithm(jwt::algorithm::rs256(public_key, "", "", "")); + else if (algo == "rs384") + verifier = verifier.allow_algorithm(jwt::algorithm::rs384(public_key, "", "", "")); + else if (algo == "rs512") + verifier = verifier.allow_algorithm(jwt::algorithm::rs512(public_key, "", "", "")); + else + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: unknown algorithm {}", algo); + + verifier = verifier.leeway(verifier_leeway); + + if (!expected_issuer.empty()) + verifier = verifier.with_issuer(expected_issuer); + + if (!expected_audience.empty()) + verifier = verifier.with_audience(expected_audience); + + verifier.verify(decoded_jwt); + + if (!claims.empty() && !check_claims(claims, decoded_jwt.get_payload_json())) + return false; + + credentials.setUserName(decoded_jwt.get_payload_claim(username_claim).as_string()); + + if (decoded_jwt.has_payload_claim(groups_claim)) + credentials.setGroups(parseGroupsFromJsonArray(decoded_jwt.get_payload_claim(groups_claim).as_array())); + else + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Specified groups_claim {} not found in token, no external roles will be mapped", processor_name, groups_claim); + + return true; +} + +} + +#endif diff --git a/src/Access/TokenProcessorsOpaque.cpp b/src/Access/TokenProcessorsOpaque.cpp new file mode 100644 index 000000000000..6a8ced064c96 --- /dev/null +++ b/src/Access/TokenProcessorsOpaque.cpp @@ -0,0 +1,403 @@ +#include "TokenProcessors.h" + +#if USE_JWT_CPP +#include +#include +#include +#include +#include + +namespace DB { + +namespace ErrorCodes +{ + extern const int AUTHENTICATION_FAILED; + extern const int INVALID_CONFIG_PARAMETER; +} + +namespace +{ + /// The JSON reply from provider has only a few key-value pairs, so no need for any advanced parsing. + /// Reduce complexity by using picojson. + picojson::object parseJSON(const String & json_string) { + picojson::value jsonValue; + std::string err = picojson::parse(jsonValue, json_string); + + if (!err.empty()) { + throw std::runtime_error("JSON parsing error: " + err); + } + + if (!jsonValue.is()) { + throw std::runtime_error("JSON is not an object"); + } + + return jsonValue.get(); + } + + template + std::optional getValueByKey(const picojson::object & jsonObject, const std::string & key) { + auto it = jsonObject.find(key); // Find the key in the object + if (it == jsonObject.end()) + { + if constexpr (throw_on_exception) + throw std::runtime_error("Key not found: " + key); + else + return std::nullopt; + } + + const picojson::value & value = it->second; + if (!value.is()) { + if constexpr (throw_on_exception) + throw std::runtime_error("Value for key '" + key + "' has incorrect type."); + else + return std::nullopt; + } + + return value.get(); + } + + picojson::object getObjectFromURI(const Poco::URI & uri, const String & token = "") + { + Poco::Net::HTTPResponse response; + std::ostringstream responseString; + + Poco::Net::HTTPRequest request{Poco::Net::HTTPRequest::HTTP_GET, uri.getPathAndQuery()}; + if (!token.empty()) + request.add("Authorization", "Bearer " + token); + + if (uri.getScheme() == "https") { + Poco::Net::HTTPSClientSession session(uri.getHost(), uri.getPort()); + session.sendRequest(request); + Poco::StreamCopier::copyStream(session.receiveResponse(response), responseString); + } + else + { + Poco::Net::HTTPClientSession session(uri.getHost(), uri.getPort()); + session.sendRequest(request); + Poco::StreamCopier::copyStream(session.receiveResponse(response), responseString); + } + + if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "Failed to get user info by access token, code: {}, reason: {}", response.getStatus(), + response.getReason()); + + try + { + return parseJSON(responseString.str()); + } + catch (const std::runtime_error & e) + { + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Failed to parse server response: {}", e.what()); + } + } +} + +bool GoogleTokenProcessor::resolveAndValidate(TokenCredentials & credentials) const +{ + const String & token = credentials.getToken(); + + std::unordered_map user_info; + picojson::object user_info_json = getObjectFromURI(Poco::URI("https://www.googleapis.com/oauth2/v3/userinfo"), token); + + if (!user_info_json.contains("email")) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "{}: Specified username_claim {} not found in token", processor_name, username_claim); + + user_info["email"] = getValueByKey(user_info_json, "email").value_or(""); + + user_info[username_claim] = getValueByKey(user_info_json, username_claim).value(); + + String user_name = user_info[username_claim]; + + credentials.setUserName(user_name); + + auto token_info = getObjectFromURI(Poco::URI("https://www.googleapis.com/oauth2/v3/tokeninfo"), token); + if (token_info.contains("exp")) + credentials.setExpiresAt(std::chrono::system_clock::from_time_t((getValueByKey(token_info, "exp").value()))); + + /// Groups info can only be retrieved if user email is known. + /// If no email found in user info, we skip this step and there are no external roles for the user. + if (!user_info["email"].empty()) + { + std::set external_groups_names; + const Poco::URI get_groups_uri = Poco::URI("https://cloudidentity.googleapis.com/v1/groups/-/memberships:searchDirectGroups?query=member_key_id==" + user_info["email"] + "'"); + + try + { + auto groups_response = getObjectFromURI(get_groups_uri, token); + + if (!groups_response.contains("memberships") || !groups_response["memberships"].is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Failed to get Google groups: invalid content in response from server", processor_name); + return true; + } + + for (const auto & group: groups_response["memberships"].get()) + { + if (!group.is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Failed to get Google groups: invalid content in response from server", processor_name); + continue; + } + + auto group_data = group.get(); + String group_name = getValueByKey(group_data["groupKey"].get(), "id").value_or(""); + if (!group_name.empty()) + { + external_groups_names.insert(group_name); + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: User {}: new external group {}", processor_name, user_name, group_name); + } + } + + credentials.setGroups(external_groups_names); + } + catch (const Exception & e) + { + /// Could not get groups info. Log it and skip it. + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Failed to get Google groups, no external roles will be mapped. reason: {}", processor_name, e.what()); + return true; + } + } + + return true; +} + +bool AzureTokenProcessor::resolveAndValidate(TokenCredentials & credentials) const +{ + /// Token is a JWT in this case, but we cannot directly verify it against Azure AD JWKS. + /// We will not trust user data in this token except for 'exp' value to determine caching duration. + /// Explanation here: https://stackoverflow.com/questions/60778634/failing-signature-validation-of-jwt-tokens-from-azure-ad + /// Let Azure validate it: only valid tokens will be accepted. + /// Use GET https://graph.microsoft.com/oidc/userinfo to verify token and get user info at the same time + + const String & token = credentials.getToken(); + + try + { + picojson::object user_info_json = getObjectFromURI(Poco::URI("https://graph.microsoft.com/oidc/userinfo"), token); + String username = getValueByKey(user_info_json, username_claim).value(); + + if (!username.empty()) + credentials.setUserName(username); + else + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Failed to get username with token", processor_name); + + } + catch (...) + { + return false; + } + + try + { + credentials.setExpiresAt(jwt::decode(token).get_expires_at()); + } + catch (...) { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: No expiration data found in a valid token, will use default cache lifetime", processor_name); + } + + std::set external_groups_names; + const Poco::URI get_groups_uri = Poco::URI("https://graph.microsoft.com/v1.0/me/memberOf"); + + try + { + auto groups_response = getObjectFromURI(get_groups_uri, token); + + if (!groups_response.contains("value") || !groups_response["value"].is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Failed to get Azure groups: invalid content in response from server", processor_name); + return true; + } + + picojson::array groups_array = groups_response["value"].get(); + + for (const auto & group: groups_array) + { + /// Got some invalid response. Ignore this, log this. + if (!group.is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Failed to get Azure groups: invalid content in response from server", processor_name); + continue; + } + + auto group_data = group.get(); + if (!group_data.contains("displayName")) + continue; + + String group_name = getValueByKey(group_data, "displayName").value_or(""); + if (!group_name.empty()) + { + external_groups_names.insert(group_name); + LOG_TRACE(getLogger("TokenAuthentication"), "{}: User {}: new external group {}", processor_name, credentials.getUserName(), group_name); + } + } + } + catch (const Exception & e) + { + /// Could not get groups info. Log it and skip it. + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Failed to get Azure groups, no external roles will be mapped. reason: {}", processor_name, e.what()); + return true; + } + + credentials.setGroups(external_groups_names); + return true; +} + +OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_issuer_, + const String & expected_audience_, + bool allow_no_expiration_, + const String & userinfo_endpoint_, + const String & token_introspection_endpoint_, + UInt64 verifier_leeway_, + const String & jwks_uri_, + UInt64 jwks_cache_lifetime_) + : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), + expected_issuer(expected_issuer_), expected_audience(expected_audience_), + allow_no_expiration(allow_no_expiration_), + userinfo_endpoint(userinfo_endpoint_), token_introspection_endpoint(token_introspection_endpoint_) +{ + if (!jwks_uri_.empty()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: JWKS URI set, local JWT processing will be attempted", processor_name_); + jwt_validator.emplace(processor_name_ + "jwks_val", + token_cache_lifetime_, + username_claim_, + groups_claim_, + expected_issuer_, + expected_audience_, + allow_no_expiration_, + "", + verifier_leeway_, + jwks_uri_, + jwks_cache_lifetime_); + } +} + +OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_issuer_, + const String & expected_audience_, + bool allow_no_expiration_, + const String & openid_config_endpoint_, + UInt64 verifier_leeway_, + UInt64 jwks_cache_lifetime_) + : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), + expected_issuer(expected_issuer_), expected_audience(expected_audience_), + allow_no_expiration(allow_no_expiration_) +{ + const picojson::object openid_config = getObjectFromURI(Poco::URI(openid_config_endpoint_)); + + if (!openid_config.contains("userinfo_endpoint") || !openid_config.contains("introspection_endpoint")) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "{}: Cannot extract userinfo_endpoint or introspection_endpoint from OIDC configuration, consider manual configuration.", processor_name); + + if (openid_config.contains("jwks_uri")) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: JWKS URI set, local JWT processing will be attempted", processor_name_); + jwt_validator.emplace(processor_name_ + "jwks_val", + token_cache_lifetime_, + username_claim_, + groups_claim_, + expected_issuer_, + expected_audience_, + allow_no_expiration_, + "", + verifier_leeway_, + getValueByKey(openid_config, "jwks_uri").value(), + jwks_cache_lifetime_); + } +} + +bool OpenIdTokenProcessor::resolveAndValidate(TokenCredentials & credentials) const +{ + const String & token = credentials.getToken(); + String username; + picojson::object user_info_json; + + if (jwt_validator.has_value() && jwt_validator.value().resolveAndValidate(credentials)) + { + try + { + auto decoded_token = jwt::decode(token); + user_info_json = decoded_token.get_payload_json(); + username = getValueByKey(user_info_json, username_claim).value(); + + /// TODO: Now we work only with Keycloak -- and it provides expires_at in token itself. Need to add actual token introspection logic for other OIDC providers. + if (decoded_token.has_expires_at()) + credentials.setExpiresAt(decoded_token.get_expires_at()); + } + catch (const std::exception & ex) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Failed to process token as JWT: {}", processor_name, ex.what()); + } + } + + /// If username or user info is empty -- local validation failed, trying introspection via provider + if (username.empty() || user_info_json.empty()) + { + try + { + user_info_json = getObjectFromURI(userinfo_endpoint, token); + username = getValueByKey(user_info_json, username_claim).value(); + } + catch (...) + { + return false; + } + } + + if (user_info_json.empty()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Failed to obtain user info", processor_name); + return false; + } + + if (username.empty()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Failed to get username", processor_name); + return false; + } + + credentials.setUserName(username); + + /// For now, list of groups is expected in a claim with specified name either in token itself or in userinfo response (Keycloak works this way) + /// TODO: add support for custom endpoints for retrieving groups. Keycloak lists groups in /userinfo and token itself, which is not always the case. + if (!groups_claim.empty() && user_info_json.contains(groups_claim)) + { + if (!user_info_json[groups_claim].is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Failed to extract groups: invalid content in user data", processor_name); + return true; + } + + std::set external_groups_names; + + picojson::array groups_array = user_info_json[groups_claim].get(); + for (const auto & group: groups_array) + { + if (group.is()) + external_groups_names.insert(group.get()); + } + credentials.setGroups(external_groups_names); + } + + return true; +} + +} +#endif diff --git a/src/Access/TokenProcessorsParse.cpp b/src/Access/TokenProcessorsParse.cpp new file mode 100644 index 000000000000..fa83c5fa6a34 --- /dev/null +++ b/src/Access/TokenProcessorsParse.cpp @@ -0,0 +1,139 @@ +#include "TokenProcessors.h" + +#include +#include + +namespace DB { + +namespace ErrorCodes +{ + extern const int INVALID_CONFIG_PARAMETER; + extern const int SUPPORT_IS_DISABLED; +} + +#if USE_JWT_CPP +std::unique_ptr ITokenProcessor::parseTokenProcessor( + const Poco::Util::AbstractConfiguration & config, + const String & prefix, + const String & processor_name) +{ + if (!config.hasProperty(prefix + ".type")) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'type' parameter shall be specified in token_processor configuration.'"); + + auto provider_type = Poco::toLower(config.getString(prefix + ".type")); + + auto token_cache_lifetime = config.getUInt64(prefix + ".token_cache_lifetime", 3600); + auto username_claim = config.getString(prefix + ".username_claim", "sub"); + auto groups_claim = config.getString(prefix + ".groups_claim", "groups"); + auto expected_issuer = config.getString(prefix + ".expected_issuer", ""); + auto expected_audience = config.getString(prefix + ".expected_audience", ""); + auto allow_no_expiration = config.getBool(prefix + ".allow_no_expiration", false); + + if (provider_type == "google") + { + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim); + } + else if (provider_type == "azure") + { + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim); + } + else if (provider_type == "openid") + { + auto verifier_leeway = config.getUInt64(prefix + ".verifier_leeway", 60); + auto jwks_cache_lifetime = config.getUInt64(prefix + ".jwks_cache_lifetime", 3600); + + bool externally_configured = config.hasProperty(prefix + ".configuration_endpoint") && !config.hasProperty(prefix + ".jwks_uri"); + bool locally_configured = config.hasProperty(prefix + ".userinfo_endpoint") && config.hasProperty(prefix + ".token_introspection_endpoint"); + + if (externally_configured && ! locally_configured) + { + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, + expected_issuer, expected_audience, allow_no_expiration, + config.getString(prefix + ".configuration_endpoint"), + verifier_leeway, + jwks_cache_lifetime); + } + else if (locally_configured && !externally_configured) + { + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, + expected_issuer, expected_audience, allow_no_expiration, + config.getString(prefix + ".userinfo_endpoint"), + config.getString(prefix + ".token_introspection_endpoint"), + verifier_leeway, + config.getString(prefix + ".jwks_uri", ""), + jwks_cache_lifetime); + } + + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Either 'configuration_endpoint' or both 'userinfo_endpoint' and 'token_introspection_endpoint' (and, optionally, 'jwks_uri') must be specified for 'openid' processor"); + } + else if (provider_type == "jwt_static_key") + { + if (!config.hasProperty(prefix + ".static_key")) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'static_key' must be specified for 'jwt_static_key' processor"); + + if (!config.hasProperty(prefix + ".algo")) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'algo' must be specified for 'jwt_static_key' processor"); + + StaticKeyJwtParams params = {Poco::toLower(config.getString(prefix + ".algo")), + config.getString(prefix + ".static_key", ""), + config.getBool(prefix + ".static_key_in_base64", false), + config.getString(prefix + ".public_key", ""), + config.getString(prefix + ".private_key", ""), + config.getString(prefix + ".public_key_password", ""), + config.getString(prefix + ".private_key_password", ""), + config.getString(prefix + ".claims", "")}; + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_issuer, expected_audience, allow_no_expiration, params); + } + else if (provider_type == "jwt_static_jwks") + { + if (config.hasProperty(prefix + ".static_jwks") && config.hasProperty(prefix + ".static_jwks_file")) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'static_jwks' and 'static_jwks_file' cannot be specified simultaneously for 'jwt_static_jwks' processor"); + + if (!config.hasProperty(prefix + ".static_jwks") && !config.hasProperty(prefix + ".static_jwks_file")) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'static_jwks' or 'static_jwks_file' must be specified for 'jwt_static_jwks' processor"); + + if (config.hasProperty(prefix + ".jwks_uri")) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'jwks_uri' cannot be specified for 'jwt_static_jwks' processor"); + + StaticJWKSParams params + { + config.getString(prefix + ".static_jwks", ""), + config.getString(prefix + ".static_jwks_file", "") + }; + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, + expected_issuer, expected_audience, allow_no_expiration, + config.getString(prefix + ".claims", ""), + config.getUInt64(prefix + ".verifier_leeway", 0), + std::make_shared(params)); + } + if (provider_type == "jwt_dynamic_jwks") + { + if (config.hasProperty(prefix + ".static_jwks") || config.hasProperty(prefix + ".static_jwks_file")) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'static_jwks' and 'static_jwks_file' cannot be specified for 'jwt_dynamic_jwks' processor"); + if (!config.hasProperty(prefix + ".jwks_uri")) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'jwks_uri' must be specified for 'jwt_dynamic_jwks' processor"); + + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, + expected_issuer, expected_audience, allow_no_expiration, + config.getString(prefix + ".claims", ""), + config.getUInt64(prefix + ".verifier_leeway", 0), + config.getString(prefix + ".jwks_uri"), + config.getUInt(prefix + ".jwks_cache_lifetime", 3600)); + } + else + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Invalid type: {}", provider_type); + + // throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Failed to parse token processor: {}", processor_name); +} + +#else +std::unique_ptr ITokenProcessor::parseTokenProcessor( + const Poco::Util::AbstractConfiguration &, + const String &, + const String &) +{ + throw DB::Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Failed to parse token_processor, ClickHouse was built without JWT support."); +} +#endif + +} diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 894fbd4f0aec..1d32fef8dacd 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -5,6 +5,11 @@ #include #include #include +<<<<<<< HEAD +======= +#include +#include "Access/Credentials.h" +>>>>>>> 58b404dc6a5 (Merge pull request #1078 from Altinity/oauth-antalya-25.8) #include #include #include @@ -15,6 +20,808 @@ namespace DB { +<<<<<<< HEAD +======= +namespace ErrorCodes +{ + extern const int BAD_ARGUMENTS; + extern const int UNKNOWN_ADDRESS_PATTERN_TYPE; + extern const int THERE_IS_NO_PROFILE; + extern const int NOT_IMPLEMENTED; + extern const int SUPPORT_IS_DISABLED; +} + +namespace +{ + + UUID generateID(AccessEntityType type, const String & name) + { + Poco::MD5Engine md5; + md5.update(name); + char type_storage_chars[] = " USRSXML"; + type_storage_chars[0] = AccessEntityTypeInfo::get(type).unique_char; + md5.update(type_storage_chars, strlen(type_storage_chars)); + UUID result; + memcpy(&result, md5.digest().data(), md5.digestLength()); + transformEndianness(result); + return result; + } + + UUID generateID(const IAccessEntity & entity) { return generateID(entity.getType(), entity.getName()); } + + template + void parseGrant(T & entity, const String & string_query, const std::unordered_set & allowed_role_ids) + { + ParserGrantQuery parser; + parser.setParseWithoutGrantees(); + + String error_message; + const char * pos = string_query.data(); + auto ast = tryParseQuery(parser, pos, pos + string_query.size(), error_message, false, "", false, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS, true); + + if (!ast) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to parse grant query. Error: {}", error_message); + + auto & query = ast->as(); + + if (query.roles && query.is_revoke) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Roles can't be revoked in config file"); + + if (!query.cluster.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Can't grant on cluster using config file"); + + if (query.grantees) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "You can't specify grantees in query using config file"); + + for (auto & element : query.access_rights_elements) + { + if (query.is_revoke) + entity.access.revoke(element); + else + entity.access.grant(element); + } + + if (query.roles) + { + std::vector roles_to_grant; + roles_to_grant.reserve(query.roles->size()); + + for (const auto & role_name : query.roles->names) + { + auto role_id = generateID(AccessEntityType::ROLE, role_name); + if (!allowed_role_ids.contains(role_id)) + throw Exception(ErrorCodes::THERE_IS_NO_PROFILE, "Role {} was not found", role_name); + + roles_to_grant.push_back(role_id); + } + + if (query.admin_option) + entity.granted_roles.grantWithAdminOption(roles_to_grant); + else + entity.granted_roles.grant(roles_to_grant); + } + } + + UserPtr parseUser( + const Poco::Util::AbstractConfiguration & config, + const String & user_name, + const std::unordered_set & allowed_profile_ids, + const std::unordered_set & allowed_role_ids, + bool allow_no_password, + bool allow_plaintext_password) + { + const bool validate = true; + auto user = std::make_shared(); + user->setName(user_name); + String user_config = "users." + user_name; + bool has_no_password = config.has(user_config + ".no_password"); + bool has_password_plaintext = config.has(user_config + ".password"); + bool has_password_sha256_hex = config.has(user_config + ".password_sha256_hex"); + bool has_scram_password_sha256_hex = config.has(user_config + ".password_scram_sha256_hex"); + bool has_password_double_sha1_hex = config.has(user_config + ".password_double_sha1_hex"); + bool has_ldap = config.has(user_config + ".ldap"); + bool has_kerberos = config.has(user_config + ".kerberos"); + bool has_jwt = config.has(user_config + ".jwt"); + + const auto certificates_config = user_config + ".ssl_certificates"; + bool has_certificates = config.has(certificates_config); + + const auto ssh_keys_config = user_config + ".ssh_keys"; + bool has_ssh_keys = config.has(ssh_keys_config); + + const auto http_auth_config = user_config + ".http_authentication"; + bool has_http_auth = config.has(http_auth_config); + + size_t num_password_fields = has_no_password + has_password_plaintext + has_password_sha256_hex + has_password_double_sha1_hex + + has_ldap + has_kerberos + has_certificates + has_ssh_keys + has_http_auth + has_scram_password_sha256_hex + has_jwt; + + if (num_password_fields > 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "More than one field of 'password', 'password_sha256_hex', " + "'password_double_sha1_hex', 'no_password', 'ldap', 'kerberos', 'ssl_certificates', 'ssh_keys', " + "'http_authentication', 'jwt' are used to specify authentication info for user {}. " + "Must be only one of them.", user_name); + + if (num_password_fields < 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Either 'password' or 'password_sha256_hex' " + "or 'password_double_sha1_hex' or 'no_password' or 'ldap' or 'kerberos " + "or 'ssl_certificates' or 'ssh_keys' or 'http_authentication' or 'jwt' must be specified for user {}.", user_name); + + if (has_password_plaintext) + { + user->authentication_methods.emplace_back(AuthenticationType::PLAINTEXT_PASSWORD); + user->authentication_methods.back().setPassword(config.getString(user_config + ".password"), validate); + } + else if (has_password_sha256_hex) + { + user->authentication_methods.emplace_back(AuthenticationType::SHA256_PASSWORD); + user->authentication_methods.back().setPasswordHashHex(config.getString(user_config + ".password_sha256_hex"), validate); + } + else if (has_scram_password_sha256_hex) + { + user->authentication_methods.emplace_back(AuthenticationType::SCRAM_SHA256_PASSWORD); + user->authentication_methods.back().setPasswordHashHex(config.getString(user_config + ".password_scram_sha256_hex"), validate); + } + else if (has_password_double_sha1_hex) + { + user->authentication_methods.emplace_back(AuthenticationType::DOUBLE_SHA1_PASSWORD); + user->authentication_methods.back().setPasswordHashHex(config.getString(user_config + ".password_double_sha1_hex"), validate); + } + else if (has_ldap) + { + bool has_ldap_server = config.has(user_config + ".ldap.server"); + if (!has_ldap_server) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Missing mandatory 'server' in 'ldap', with LDAP server name, for user {}.", user_name); + + const auto ldap_server_name = config.getString(user_config + ".ldap.server"); + if (ldap_server_name.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "LDAP server name cannot be empty for user {}.", user_name); + + user->authentication_methods.emplace_back(AuthenticationType::LDAP); + user->authentication_methods.back().setLDAPServerName(ldap_server_name); + } + else if (has_kerberos) + { + const auto realm = config.getString(user_config + ".kerberos.realm", ""); + + user->authentication_methods.emplace_back(AuthenticationType::KERBEROS); + user->authentication_methods.back().setKerberosRealm(realm); + } + else if (has_certificates) + { +#if USE_SSL + user->authentication_methods.emplace_back(AuthenticationType::SSL_CERTIFICATE); + + /// Fill list of allowed certificates. + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(certificates_config, keys); + for (const String & key : keys) + { + if (key.starts_with("common_name")) + { + String value = config.getString(certificates_config + "." + key); + user->authentication_methods.back().addSSLCertificateSubject(X509Certificate::Subjects::Type::CN, std::move(value)); + } + else if (key.starts_with("subject_alt_name")) + { + String value = config.getString(certificates_config + "." + key); + if (value.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected ssl_certificates.subject_alt_name to not be empty"); + user->authentication_methods.back().addSSLCertificateSubject(X509Certificate::Subjects::Type::SAN, std::move(value)); + } + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown certificate pattern type: {}", key); + } +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL certificates support is disabled, because ClickHouse was built without SSL library"); +#endif + } + else if (has_ssh_keys) + { +#if USE_SSH + user->authentication_methods.emplace_back(AuthenticationType::SSH_KEY); + + Poco::Util::AbstractConfiguration::Keys entries; + config.keys(ssh_keys_config, entries); + std::vector keys; + for (const String& entry : entries) + { + const auto conf_pref = ssh_keys_config + "." + entry + "."; + if (entry.starts_with("ssh_key")) + { + String type; + String base64_key; + if (config.has(conf_pref + "type")) + { + type = config.getString(conf_pref + "type"); + } + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected type field in {} entry", entry); + if (config.has(conf_pref + "base64_key")) + { + base64_key = config.getString(conf_pref + "base64_key"); + } + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected base64_key field in {} entry", entry); + + + try + { + keys.emplace_back(SSHKeyFactory::makePublicKeyFromBase64(base64_key, type)); + } + catch (const std::invalid_argument &) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad SSH key in entry: {}", entry); + } + } + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown ssh_key entry pattern type: {}", entry); + } + user->authentication_methods.back().setSSHKeys(std::move(keys)); +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); +#endif + } + else if (has_http_auth) + { + user->authentication_methods.emplace_back(AuthenticationType::HTTP); + user->authentication_methods.back().setHTTPAuthenticationServerName(config.getString(http_auth_config + ".server")); + auto scheme = config.getString(http_auth_config + ".scheme"); + user->authentication_methods.back().setHTTPAuthenticationScheme(parseHTTPAuthenticationScheme(scheme)); + } + else if (has_jwt) + { + user->authentication_methods.emplace_back(AuthenticationType::JWT); + } + else + { + user->authentication_methods.emplace_back(); + } + + for (const auto & authentication_method : user->authentication_methods) + { + auto auth_type = authentication_method.getType(); + if (((auth_type == AuthenticationType::NO_PASSWORD) && !allow_no_password) || + ((auth_type == AuthenticationType::PLAINTEXT_PASSWORD) && !allow_plaintext_password)) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Authentication type {} is not allowed, check the setting allow_{} in the server configuration", + toString(auth_type), AuthenticationTypeInfo::get(auth_type).name); + } + } + + const auto profile_name_config = user_config + ".profile"; + if (config.has(profile_name_config)) + { + auto profile_name = config.getString(profile_name_config); + auto profile_id = generateID(AccessEntityType::SETTINGS_PROFILE, profile_name); + if (!allowed_profile_ids.contains(profile_id)) + throw Exception(ErrorCodes::THERE_IS_NO_PROFILE, "Profile {} was not found", profile_name); + SettingsProfileElement profile_element; + profile_element.parent_profile = profile_id; + user->settings.push_back(std::move(profile_element)); + } + + /// Fill list of allowed hosts. + const auto networks_config = user_config + ".networks"; + if (config.has(networks_config)) + { + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(networks_config, keys); + user->allowed_client_hosts.clear(); + for (const String & key : keys) + { + String value = config.getString(networks_config + "." + key); + if (key.starts_with("ip")) + user->allowed_client_hosts.addSubnet(value); + else if (key.starts_with("host_regexp")) + user->allowed_client_hosts.addNameRegexp(value); + else if (key.starts_with("host")) + user->allowed_client_hosts.addName(value); + else + throw Exception(ErrorCodes::UNKNOWN_ADDRESS_PATTERN_TYPE, "Unknown address pattern type: {}", key); + } + } + + /// Fill list of allowed databases. + const auto databases_config = user_config + ".allow_databases"; + std::optional databases; + if (config.has(databases_config)) + { + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(databases_config, keys); + databases.emplace(); + databases->reserve(keys.size()); + for (const auto & key : keys) + { + const auto database_name = config.getString(databases_config + "." + key); + databases->push_back(database_name); + } + } + + /// Fill list of allowed dictionaries. + const auto dictionaries_config = user_config + ".allow_dictionaries"; + std::optional dictionaries; + if (config.has(dictionaries_config)) + { + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(dictionaries_config, keys); + dictionaries.emplace(); + dictionaries->reserve(keys.size()); + for (const auto & key : keys) + { + const auto dictionary_name = config.getString(dictionaries_config + "." + key); + dictionaries->push_back(dictionary_name); + } + } + + const auto grants_config = user_config + ".grants"; + std::optional grant_queries; + if (config.has(grants_config)) + { + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(grants_config, keys); + grant_queries.emplace(); + grant_queries->reserve(keys.size()); + for (const auto & key : keys) + { + const auto query = config.getString(grants_config + "." + key); + grant_queries->push_back(query); + } + } + + bool access_management = config.getBool(user_config + ".access_management", false); + bool named_collection_control = config.getBool(user_config + ".named_collection_control", false) || config.getBool(user_config + ".named_collection_admin", false); + bool show_named_collections_secrets = config.getBool(user_config + ".show_named_collections_secrets", false); + + if (grant_queries) + if (databases || dictionaries || access_management || named_collection_control || show_named_collections_secrets) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Any other access control settings can't be specified with `grants`"); + + if (grant_queries) + { + for (const auto & string_query : *grant_queries) + parseGrant(*user, string_query, allowed_role_ids); + } + else + { + /// By default all databases are accessible + /// and the user can grant everything he has. + user->access.grantWithGrantOption(AccessType::ALL); + + if (databases) + { + user->access.revoke(AccessFlags::allFlags() - AccessFlags::allGlobalFlags()); + user->access.grantWithGrantOption(AccessType::TABLE_ENGINE); + user->access.grantWithGrantOption(AccessFlags::allDictionaryFlags(), IDictionary::NO_DATABASE_TAG); + for (const String & database : *databases) + user->access.grantWithGrantOption(AccessFlags::allFlags(), database); + } + + if (dictionaries) + { + user->access.revoke(AccessFlags::allDictionaryFlags(), IDictionary::NO_DATABASE_TAG); + for (const String & dictionary : *dictionaries) + user->access.grantWithGrantOption(AccessFlags::allDictionaryFlags(), IDictionary::NO_DATABASE_TAG, dictionary); + } + + if (!access_management) + { + user->access.revoke(AccessType::ACCESS_MANAGEMENT); + user->access.revokeGrantOption(AccessType::ALL); + } + + if (!named_collection_control) + { + user->access.revoke(AccessType::NAMED_COLLECTION_ADMIN); + } + + if (!show_named_collections_secrets) + { + user->access.revoke(AccessType::SHOW_NAMED_COLLECTIONS_SECRETS); + } + } + + String default_database = config.getString(user_config + ".default_database", ""); + user->default_database = default_database; + + return user; + } + + + std::vector parseUsers( + const Poco::Util::AbstractConfiguration & config, + const std::unordered_set & allowed_profile_ids, + const std::unordered_set & allowed_role_ids, + bool allow_no_password, + bool allow_plaintext_password) + { + Poco::Util::AbstractConfiguration::Keys user_names; + config.keys("users", user_names); + + std::vector users; + users.reserve(user_names.size()); + for (const auto & user_name : user_names) + { + try + { + users.push_back(parseUser(config, user_name, allowed_profile_ids, allowed_role_ids, allow_no_password, allow_plaintext_password)); + } + catch (Exception & e) + { + e.addMessage(fmt::format("while parsing user '{}' in users configuration file", user_name)); + throw; + } + } + + return users; + } + + RolePtr parseRole( + const Poco::Util::AbstractConfiguration & config, + const String & role_name, + const std::unordered_set & allowed_role_ids) + { + auto role = std::make_shared(); + role->setName(role_name); + String role_config = "roles." + role_name; + + const auto grants_config = role_config + ".grants"; + if (config.has(grants_config)) + { + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(grants_config, keys); + for (const auto & key : keys) + { + const auto query = config.getString(grants_config + "." + key); + parseGrant(*role, query, allowed_role_ids); + } + } + + return role; + } + + std::vector parseRoles( + const Poco::Util::AbstractConfiguration & config, + const std::unordered_set & allowed_role_ids) + { + Poco::Util::AbstractConfiguration::Keys role_names; + config.keys("roles", role_names); + + std::vector roles; + roles.reserve(role_names.size()); + for (const auto & role_name : role_names) + { + try + { + roles.push_back(parseRole(config, role_name, allowed_role_ids)); + } + catch (Exception & e) + { + e.addMessage(fmt::format("while parsing roles '{}' in users configuration file", role_name)); + throw; + } + } + + return roles; + } + + + QuotaPtr parseQuota(const Poco::Util::AbstractConfiguration & config, const String & quota_name, const std::vector & user_ids) + { + auto quota = std::make_shared(); + quota->setName(quota_name); + + String quota_config = "quotas." + quota_name; + if (config.has(quota_config + ".keyed_by_ip")) + quota->key_type = QuotaKeyType::IP_ADDRESS; + else if (config.has(quota_config + ".keyed_by_forwarded_ip")) + quota->key_type = QuotaKeyType::FORWARDED_IP_ADDRESS; + else if (config.has(quota_config + ".keyed")) + quota->key_type = QuotaKeyType::CLIENT_KEY_OR_USER_NAME; + else + quota->key_type = QuotaKeyType::USER_NAME; + + Poco::Util::AbstractConfiguration::Keys interval_keys; + config.keys(quota_config, interval_keys); + + for (const String & interval_key : interval_keys) + { + if (!startsWith(interval_key, "interval")) + continue; + + String interval_config = quota_config + "." + interval_key; + std::chrono::seconds duration{config.getInt(interval_config + ".duration", 0)}; + if (duration.count() <= 0) /// Skip quotas with non-positive duration. + continue; + + quota->all_limits.emplace_back(); + auto & limits = quota->all_limits.back(); + limits.duration = duration; + limits.randomize_interval = config.getBool(interval_config + ".randomize", false); + + for (auto quota_type : collections::range(QuotaType::MAX)) + { + const auto & type_info = QuotaTypeInfo::get(quota_type); + auto value = config.getString(interval_config + "." + type_info.name, "0"); + if (value != "0") + limits.max[static_cast(quota_type)] = type_info.stringToValue(value); + } + } + + quota->to_roles.add(user_ids); + return quota; + } + + + std::vector parseQuotas(const Poco::Util::AbstractConfiguration & config) + { + Poco::Util::AbstractConfiguration::Keys user_names; + config.keys("users", user_names); + std::unordered_map> quota_to_user_ids; + for (const auto & user_name : user_names) + { + if (config.has("users." + user_name + ".quota")) + quota_to_user_ids[config.getString("users." + user_name + ".quota")].push_back(generateID(AccessEntityType::USER, user_name)); + } + + Poco::Util::AbstractConfiguration::Keys quota_names; + config.keys("quotas", quota_names); + + std::vector quotas; + quotas.reserve(quota_names.size()); + + for (const auto & quota_name : quota_names) + { + try + { + auto it = quota_to_user_ids.find(quota_name); + const std::vector & quota_users = (it != quota_to_user_ids.end()) ? std::move(it->second) : std::vector{}; + quotas.push_back(parseQuota(config, quota_name, quota_users)); + } + catch (Exception & e) + { + e.addMessage(fmt::format("while parsing quota '{}' in users configuration file", quota_name)); + throw; + } + } + + return quotas; + } + + + std::vector parseRowPolicies(const Poco::Util::AbstractConfiguration & config, bool users_without_row_policies_can_read_rows) + { + std::map, std::unordered_map> all_filters_map; + + Poco::Util::AbstractConfiguration::Keys user_names; + config.keys("users", user_names); + + for (const String & user_name : user_names) + { + const String databases_config = "users." + user_name + ".databases"; + if (config.has(databases_config)) + { + Poco::Util::AbstractConfiguration::Keys database_keys; + config.keys(databases_config, database_keys); + + /// Read tables within databases + for (const String & database_key : database_keys) + { + const String database_config = databases_config + "." + database_key; + + String database_name; + if (((database_key == "database") || (database_key.starts_with("database["))) && config.has(database_config + "[@name]")) + database_name = config.getString(database_config + "[@name]"); + else if (size_t bracket_pos = database_key.find('['); bracket_pos != std::string::npos) + database_name = database_key.substr(0, bracket_pos); + else + database_name = database_key; + + Poco::Util::AbstractConfiguration::Keys table_keys; + config.keys(database_config, table_keys); + + /// Read table properties + for (const String & table_key : table_keys) + { + String table_config = database_config + "." + table_key; + String table_name; + if (((table_key == "table") || (table_key.starts_with("table["))) && config.has(table_config + "[@name]")) + table_name = config.getString(table_config + "[@name]"); + else if (size_t bracket_pos = table_key.find('['); bracket_pos != std::string::npos) + table_name = table_key.substr(0, bracket_pos); + else + table_name = table_key; + + String filter_config = table_config + ".filter"; + all_filters_map[{database_name, table_name}][user_name] = config.getString(filter_config); + } + } + } + } + + std::vector policies; + for (auto & [database_and_table_name, user_to_filters] : all_filters_map) + { + const auto & [database, table_name] = database_and_table_name; + for (const String & user_name : user_names) + { + String filter; + auto it = user_to_filters.find(user_name); + if (it != user_to_filters.end()) + { + filter = it->second; + } + else + { + if (users_without_row_policies_can_read_rows) + continue; + filter = "1"; + } + + auto policy = std::make_shared(); + policy->setFullName(user_name, database, table_name); + policy->filters[static_cast(RowPolicyFilterType::SELECT_FILTER)] = filter; + policy->to_roles.add(generateID(AccessEntityType::USER, user_name)); + policies.push_back(policy); + } + } + return policies; + } + + + SettingsProfileElements parseSettingsConstraints(const Poco::Util::AbstractConfiguration & config, + const String & path_to_constraints, + const AccessControl & access_control) + { + SettingsProfileElements profile_elements; + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(path_to_constraints, keys); + + for (const String & setting_name : keys) + { + access_control.checkSettingNameIsAllowed(setting_name); + + SettingsProfileElement profile_element; + profile_element.setting_name = setting_name; + Poco::Util::AbstractConfiguration::Keys constraint_types; + String path_to_name = path_to_constraints + "." + setting_name; + config.keys(path_to_name, constraint_types); + + size_t writability_count = 0; + for (const String & constraint_type : constraint_types) + { + if (constraint_type == "min") + profile_element.min_value = settingStringToValueUtil(setting_name, config.getString(path_to_name + "." + constraint_type)); + else if (constraint_type == "max") + profile_element.max_value = settingStringToValueUtil(setting_name, config.getString(path_to_name + "." + constraint_type)); + /// When the xml config is parsed, the first constraint_type is parsed as `disallowed` and the subsequent ones are parsed as + /// disallowed[1], disallowed[2] and so on. So, both `disallowed` and `disallowed[` should be considered as valid constraint types. + /// Example: + /// + /// + /// 50 + /// 3 + /// 4 + /// 5 + /// + /// + else if (constraint_type == "disallowed" || constraint_type.starts_with("disallowed[")) + profile_element.disallowed_values.push_back(settingStringToValueUtil(setting_name, config.getString(path_to_name + "." + constraint_type))); + else if (constraint_type == "readonly" || constraint_type == "const") + { + writability_count++; + profile_element.writability = SettingConstraintWritability::CONST; + } + else if (constraint_type == "changeable_in_readonly") + { + writability_count++; + if (access_control.doesSettingsConstraintsReplacePrevious()) + profile_element.writability = SettingConstraintWritability::CHANGEABLE_IN_READONLY; + else + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Setting changeable_in_readonly for {} is not allowed " + "unless settings_constraints_replace_previous is enabled", setting_name); + } + else + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Setting {} value for {} isn't supported", constraint_type, setting_name); + } + if (writability_count > 1) + throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not more than one constraint writability specifier " + "(const/readonly/changeable_in_readonly) is allowed for {}", setting_name); + + profile_elements.push_back(std::move(profile_element)); + } + + return profile_elements; + } + + std::shared_ptr parseSettingsProfile( + const Poco::Util::AbstractConfiguration & config, + const String & profile_name, + const std::unordered_set & allowed_parent_profile_ids, + const AccessControl & access_control) + { + auto profile = std::make_shared(); + profile->setName(profile_name); + String profile_config = "profiles." + profile_name; + + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(profile_config, keys); + + for (const std::string & key : keys) + { + if (key == "profile" || key.starts_with("profile[")) + { + String parent_profile_name = config.getString(profile_config + "." + key); + auto parent_profile_id = generateID(AccessEntityType::SETTINGS_PROFILE, parent_profile_name); + if (!allowed_parent_profile_ids.contains(parent_profile_id)) + throw Exception(ErrorCodes::THERE_IS_NO_PROFILE, "Parent profile '{}' was not found", parent_profile_name); + SettingsProfileElement profile_element; + profile_element.parent_profile = parent_profile_id; + profile->elements.emplace_back(std::move(profile_element)); + continue; + } + + if (key == "constraints" || key.starts_with("constraints[")) + { + profile->elements.merge(parseSettingsConstraints(config, profile_config + "." + key, access_control)); + continue; + } + + const auto & setting_name = key; + access_control.checkSettingNameIsAllowed(setting_name); + + SettingsProfileElement profile_element; + profile_element.setting_name = setting_name; + profile_element.value = settingStringToValueUtil(setting_name, config.getString(profile_config + "." + key)); + profile->elements.emplace_back(std::move(profile_element)); + } + + return profile; + } + + + std::vector parseSettingsProfiles( + const Poco::Util::AbstractConfiguration & config, + const std::unordered_set & allowed_parent_profile_ids, + const AccessControl & access_control) + { + Poco::Util::AbstractConfiguration::Keys profile_names; + config.keys("profiles", profile_names); + + std::vector profiles; + profiles.reserve(profile_names.size()); + + for (const auto & profile_name : profile_names) + { + try + { + profiles.push_back(parseSettingsProfile(config, profile_name, allowed_parent_profile_ids, access_control)); + } + catch (Exception & e) + { + e.addMessage(fmt::format("while parsing profile '{}' in users configuration file", profile_name)); + throw; + } + } + + return profiles; + } + + std::unordered_set getAllowedIDs( + const Poco::Util::AbstractConfiguration & config, + const String & configuration_key, + const AccessEntityType type) + { + Poco::Util::AbstractConfiguration::Keys keys; + config.keys(configuration_key, keys); + std::unordered_set ids; + for (const auto & key : keys) + ids.emplace(generateID(type, key)); + return ids; + } +} + +>>>>>>> 58b404dc6a5 (Merge pull request #1078 from Altinity/oauth-antalya-25.8) UsersConfigAccessStorage::UsersConfigAccessStorage(const String & storage_name_, AccessControl & access_control_, bool allow_backup_) : IAccessStorage(storage_name_) , access_control(access_control_) diff --git a/src/Core/ServerSettings.cpp b/src/Core/ServerSettings.cpp index ee28642bd4a8..1d99f022fdb9 100644 --- a/src/Core/ServerSettings.cpp +++ b/src/Core/ServerSettings.cpp @@ -838,6 +838,15 @@ namespace A value of `0` means unlimited. ::: )", 0) \ + DECLARE(Bool, enable_token_auth, true, R"( + Controls whether token-based (JWT) authentication is enabled. + When disabled: + - Token processors from the `token_processors` section are not parsed. + - `TokenAccessStorage` (token user directory) is not added. + - Authentication via tokens (`--jwt` option in clickhouse-client or `Authorization: Bearer` HTTP header) is rejected. + + Default value: `true` (token authentication is enabled). + )", 0) \ DECLARE(UInt64, concurrent_threads_soft_limit_num, 0, R"( The maximum number of query processing threads, excluding threads for retrieving data from remote servers, allowed to run all queries. This is not a hard limit. In case if the limit is reached the query will still get at least one thread to run. Query can upscale to desired number of threads during execution if more threads become available. diff --git a/src/Parsers/Access/ASTAuthenticationData.cpp b/src/Parsers/Access/ASTAuthenticationData.cpp index 35e7d2032d83..7fe8de9bdb5b 100644 --- a/src/Parsers/Access/ASTAuthenticationData.cpp +++ b/src/Parsers/Access/ASTAuthenticationData.cpp @@ -116,8 +116,11 @@ void ASTAuthenticationData::formatImpl(WriteBuffer & ostr, const FormatSettings } case AuthenticationType::JWT: { - prefix = "CLAIMS"; - parameter = true; + if (!children.empty()) + { + prefix = "CLAIMS"; + parameter = true; + } break; } case AuthenticationType::LDAP: diff --git a/src/Parsers/Access/ASTCreateUserQuery.h b/src/Parsers/Access/ASTCreateUserQuery.h index 3bf6d76f484f..92e3a8b250fd 100644 --- a/src/Parsers/Access/ASTCreateUserQuery.h +++ b/src/Parsers/Access/ASTCreateUserQuery.h @@ -17,7 +17,7 @@ class ASTAuthenticationData; /** CREATE USER [IF NOT EXISTS | OR REPLACE] name - * [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password|plaintext_password|sha256_password|sha256_hash|double_sha1_password|double_sha1_hash}] BY {'password'|'hash'}}|{WITH ldap SERVER 'server_name'}|{WITH kerberos [REALM 'realm']}] + * [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password|plaintext_password|sha256_password|sha256_hash|double_sha1_password|double_sha1_hash}] BY {'password'|'hash'}}|{WITH ldap SERVER 'server_name'}|{WITH kerberos [REALM 'realm']|{WITH jwt [CLAIMS 'json_object']}}] * [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [DEFAULT ROLE role [,...]] * [DEFAULT DATABASE database | NONE] @@ -26,7 +26,7 @@ class ASTAuthenticationData; * * ALTER USER [IF EXISTS] name * [RENAME TO new_name] - * [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password|plaintext_password|sha256_password|sha256_hash|double_sha1_password|double_sha1_hash}] BY {'password'|'hash'}}|{WITH ldap SERVER 'server_name'}|{WITH kerberos [REALM 'realm']}] + * [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password|plaintext_password|sha256_password|sha256_hash|double_sha1_password|double_sha1_hash}] BY {'password'|'hash'}}|{WITH ldap SERVER 'server_name'}|{WITH kerberos [REALM 'realm']|{WITH jwt [CLAIMS 'json_object']}}] * [[ADD|DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ] * [DEFAULT DATABASE database | NONE] diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index cb1561c0c912..5e520151e4f6 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -83,6 +83,7 @@ namespace bool expect_ssl_cert_subjects = false; bool expect_public_ssh_key = false; bool expect_http_auth_server = false; + bool expect_claims = false; // NOLINT auto parse_non_password_based_type = [&](auto check_type) { @@ -105,6 +106,7 @@ namespace expect_http_auth_server = true; else if (check_type == AuthenticationType::JWT) throw Exception(ErrorCodes::BAD_ARGUMENTS, "CREATE USER is not supported for JWT"); + // expect_claims = true; else if (check_type != AuthenticationType::NO_PASSWORD) expect_password = true; @@ -165,6 +167,7 @@ namespace ASTPtr http_auth_scheme; ASTPtr ssl_cert_subjects; std::optional ssl_cert_subject_type; + ASTPtr jwt_claims; if (expect_password || expect_hash) { @@ -229,6 +232,14 @@ namespace return false; } } + else if (expect_claims) + { + if (ParserKeyword{Keyword::CLAIMS}.ignore(pos, expected)) + { + if (!ParserStringAndSubstitution{}.parse(pos, jwt_claims, expected)) + return false; + } + } auth_data = make_intrusive(); @@ -254,6 +265,9 @@ namespace if (http_auth_scheme) auth_data->children.push_back(std::move(http_auth_scheme)); + if (jwt_claims) + auth_data->children.push_back(std::move(jwt_claims)); + parseValidUntil(pos, expected, auth_data->valid_until); return true; diff --git a/src/Parsers/Access/ParserCreateUserQuery.h b/src/Parsers/Access/ParserCreateUserQuery.h index 4dfff8713d76..5f4cfcd6c45f 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.h +++ b/src/Parsers/Access/ParserCreateUserQuery.h @@ -7,7 +7,7 @@ namespace DB { /** Parses queries like * CREATE USER [IF NOT EXISTS | OR REPLACE] name - * [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password|plaintext_password|sha256_password|sha256_hash|double_sha1_password|double_sha1_hash}] BY {'password'|'hash'}}|{WITH ldap SERVER 'server_name'}|{WITH kerberos [REALM 'realm']}] + * [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password|plaintext_password|sha256_password|sha256_hash|double_sha1_password|double_sha1_hash}] BY {'password'|'hash'}}|{WITH ldap SERVER 'server_name'}|{WITH kerberos [REALM 'realm']}|{WITH jwt}] * [HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [DEFAULT ROLE role [,...]] * [SETTINGS variable [= value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] | PROFILE 'profile_name'] [,...] @@ -15,7 +15,7 @@ namespace DB * * ALTER USER [IF EXISTS] name * [RENAME TO new_name] - * [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password|plaintext_password|sha256_password|sha256_hash|double_sha1_password|double_sha1_hash}] BY {'password'|'hash'}}|{WITH ldap SERVER 'server_name'}|{WITH kerberos [REALM 'realm']}] + * [NOT IDENTIFIED | IDENTIFIED {[WITH {no_password|plaintext_password|sha256_password|sha256_hash|double_sha1_password|double_sha1_hash}] BY {'password'|'hash'}}|{WITH ldap SERVER 'server_name'}|{WITH kerberos [REALM 'realm']}|{WITH jwt}] * [[ADD|DROP] HOST {LOCAL | NAME 'name' | REGEXP 'name_regexp' | IP 'address' | LIKE 'pattern'} [,...] | ANY | NONE] * [DEFAULT ROLE role [,...] | ALL | ALL EXCEPT role [,...] ] * [ADD|MODIFY SETTINGS variable [=value] [MIN [=] min_value] [MAX [=] max_value] [CONST|READONLY|WRITABLE|CHANGEABLE_IN_READONLY] [,...] ] diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h index 7ef94a4d2b1d..9376f6110e99 100644 --- a/src/Parsers/CommonParsers.h +++ b/src/Parsers/CommonParsers.h @@ -96,6 +96,7 @@ namespace DB MR_MACROS(CHECK_TABLE, "CHECK TABLE") \ MR_MACROS(CHECK_GRANT, "CHECK GRANT") \ MR_MACROS(CHECK, "CHECK") \ + MR_MACROS(CLAIMS, "CLAIMS") \ MR_MACROS(CLEANUP, "CLEANUP") \ MR_MACROS(CLEAR_COLUMN, "CLEAR COLUMN") \ MR_MACROS(CLEAR_INDEX, "CLEAR INDEX") \ diff --git a/src/Server/HTTP/authenticateUserByHTTP.cpp b/src/Server/HTTP/authenticateUserByHTTP.cpp index d2399a662b67..670f9c60694a 100644 --- a/src/Server/HTTP/authenticateUserByHTTP.cpp +++ b/src/Server/HTTP/authenticateUserByHTTP.cpp @@ -1,3 +1,4 @@ +#include #include #include #include @@ -16,7 +17,6 @@ # include #endif - namespace DB { @@ -86,6 +86,8 @@ bool authenticateUserByHTTP( bool has_http_credentials = request.hasCredentials() && request.get("Authorization") != "never"; bool has_credentials_in_query_params = params.has("user") || params.has("password"); + String bearer_token; + std::string spnego_challenge; #if USE_SSL X509Certificate::Subjects certificate_subjects; @@ -164,6 +166,12 @@ bool authenticateUserByHTTP( if (spnego_challenge.empty()) throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: SPNEGO challenge is empty"); } + else if (Poco::icompare(scheme, "Bearer") == 0) + { + bearer_token = auth_info; + if (bearer_token.empty()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: Bearer token is empty"); + } else { throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: '{}' HTTP Authorization scheme is not supported", scheme); @@ -221,6 +229,20 @@ bool authenticateUserByHTTP( } } #endif + else if (!bearer_token.empty()) + { + const auto & access_control = global_context->getAccessControl(); + if (!access_control.isTokenAuthEnabled()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Token authentication is disabled"); + + const auto token_credentials = TokenCredentials(bearer_token); + const auto & external_authenticators = access_control.getExternalAuthenticators(); + + if (!external_authenticators.checkTokenCredentials(token_credentials)) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: Token could not be verified."); + + current_credentials = std::make_unique(token_credentials); + } else // I.e., now using user name and password strings ("Basic"). { if (!current_credentials) diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index edd0f8bd6957..940c3679411b 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #include @@ -1962,6 +1963,10 @@ void TCPHandler::receiveHello() if (is_ssh_based_auth) user.erase(0, std::string_view(EncodedUserInfo::SSH_KEY_AUTHENTICAION_MARKER).size()); + is_jwt_based_auth = user.starts_with(EncodedUserInfo::JWT_AUTHENTICAION_MARKER); + if (is_jwt_based_auth) + user.erase(0, std::string_view(EncodedUserInfo::JWT_AUTHENTICAION_MARKER).size()); + session = makeSession(); const auto & client_info = session->getClientInfo(); @@ -2049,7 +2054,28 @@ void TCPHandler::receiveHello() } #endif +<<<<<<< HEAD session->authenticate(user, password, getClientAddress(client_info), socket().peerAddress()); +======= + if (is_jwt_based_auth) + { + const auto & access_control = server.context()->getAccessControl(); + if (!access_control.isTokenAuthEnabled()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Token authentication is disabled"); + + auto credentials = TokenCredentials(password); + + const auto & external_authenticators = access_control.getExternalAuthenticators(); + + if (!external_authenticators.checkTokenCredentials(credentials)) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Token is invalid"); + + session->authenticate(credentials, getClientAddress(client_info)); + return; + } + + session->authenticate(user, password, getClientAddress(client_info)); +>>>>>>> 58b404dc6a5 (Merge pull request #1078 from Altinity/oauth-antalya-25.8) } diff --git a/src/Server/TCPHandler.h b/src/Server/TCPHandler.h index 3ed235d5d998..1e78790b4f5a 100644 --- a/src/Server/TCPHandler.h +++ b/src/Server/TCPHandler.h @@ -238,6 +238,7 @@ class TCPHandler : public Poco::Net::TCPServerConnection String default_database; bool is_ssh_based_auth = false; /// authentication is via SSH pub-key challenge + bool is_jwt_based_auth = false; /// authentication is via JWT /// For inter-server secret (remote_server.*.secret) bool is_interserver_mode = false; bool is_interserver_authenticated = false; diff --git a/src/Storages/System/StorageSystemBuildOptions.cpp.in b/src/Storages/System/StorageSystemBuildOptions.cpp.in index db87bce4e733..769550d23dc1 100644 --- a/src/Storages/System/StorageSystemBuildOptions.cpp.in +++ b/src/Storages/System/StorageSystemBuildOptions.cpp.in @@ -77,6 +77,7 @@ const char * auto_config_build[] "GIT_BRANCH", R"IRjaNsZIL9Yh7FQ4(@GIT_BRANCH@)IRjaNsZIL9Yh7FQ4", "GIT_DATE", "@GIT_DATE@", "GIT_COMMIT_SUBJECT", R"Gi17KJMlbGCjErEN(@GIT_COMMIT_SUBJECT@)Gi17KJMlbGCjErEN", + "USE_JWT_CPP", "@USE_JWT_CPP@", nullptr, nullptr }; diff --git a/src/configure_config.cmake b/src/configure_config.cmake index c91c72e66461..199514053329 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -235,5 +235,11 @@ if (TARGET ch_rust::wasmtime) endif() set (USE_YTSAURUS 1) +<<<<<<< HEAD +======= +if (TARGET ch_contrib::jwt-cpp) + set(USE_JWT_CPP 1) +endif() +>>>>>>> 58b404dc6a5 (Merge pull request #1078 from Altinity/oauth-antalya-25.8) set(SOURCE_DIR ${PROJECT_SOURCE_DIR}) diff --git a/tests/integration/test_jwt_auth/__init__.py b/tests/integration/test_jwt_auth/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/integration/test_jwt_auth/configs/users.xml b/tests/integration/test_jwt_auth/configs/users.xml new file mode 100644 index 000000000000..b3d3372ebaa9 --- /dev/null +++ b/tests/integration/test_jwt_auth/configs/users.xml @@ -0,0 +1,15 @@ + + + + + + + + + + + default + default + + + diff --git a/tests/integration/test_jwt_auth/configs/validators.xml b/tests/integration/test_jwt_auth/configs/validators.xml new file mode 100644 index 000000000000..e6bb8a1d265e --- /dev/null +++ b/tests/integration/test_jwt_auth/configs/validators.xml @@ -0,0 +1,26 @@ + + + + + jwt_static_key + HS256 + my_secret + false + true + + + + jwt_static_key + hs256 + other_secret + false + true + + + + jwt_dynamic_jwks + http://resolver:8080/.well-known/jwks.json + true + + + diff --git a/tests/integration/test_jwt_auth/helpers/generate_private_key.py b/tests/integration/test_jwt_auth/helpers/generate_private_key.py new file mode 100644 index 000000000000..7b54fa63368b --- /dev/null +++ b/tests/integration/test_jwt_auth/helpers/generate_private_key.py @@ -0,0 +1,21 @@ +from cryptography.hazmat.primitives.asymmetric import rsa +from cryptography.hazmat.primitives import serialization +from cryptography.hazmat.backends import default_backend + +# Generate RSA private key +private_key = rsa.generate_private_key( + public_exponent=65537, + key_size=2048, # Key size of 2048 bits + backend=default_backend() +) + +# Save the private key to a PEM file +pem_private_key = private_key.private_bytes( + encoding=serialization.Encoding.PEM, + format=serialization.PrivateFormat.TraditionalOpenSSL, + encryption_algorithm=serialization.NoEncryption() # You can add encryption if needed +) + +# Write the private key to a file +with open("new_private_key", "wb") as pem_file: + pem_file.write(pem_private_key) diff --git a/tests/integration/test_jwt_auth/helpers/jwt_jwk.py b/tests/integration/test_jwt_auth/helpers/jwt_jwk.py new file mode 100644 index 000000000000..265882efce76 --- /dev/null +++ b/tests/integration/test_jwt_auth/helpers/jwt_jwk.py @@ -0,0 +1,113 @@ +from cryptography.hazmat.primitives.asymmetric import rsa +from cryptography.hazmat.primitives import serialization + +import base64 +import json +import jwt + + +""" +Only RS* family algorithms are supported!!! +""" +with open("./private_key_2", "rb") as key_file: + private_key = serialization.load_pem_private_key( + key_file.read(), + password=None, + ) + + +public_key = private_key.public_key() + + +def to_base64_url(data): + return base64.urlsafe_b64encode(data).decode("utf-8").rstrip("=") + + +def rsa_key_to_jwk(private_key=None, public_key=None): + if private_key: + # Convert the private key to its components + private_numbers = private_key.private_numbers() + public_numbers = private_key.public_key().public_numbers() + + jwk = { + "kty": "RSA", + "alg": "RS512", + "kid": "mykid", + "n": to_base64_url( + public_numbers.n.to_bytes( + (public_numbers.n.bit_length() + 7) // 8, byteorder="big" + ) + ), + "e": to_base64_url( + public_numbers.e.to_bytes( + (public_numbers.e.bit_length() + 7) // 8, byteorder="big" + ) + ), + "d": to_base64_url( + private_numbers.d.to_bytes( + (private_numbers.d.bit_length() + 7) // 8, byteorder="big" + ) + ), + "p": to_base64_url( + private_numbers.p.to_bytes( + (private_numbers.p.bit_length() + 7) // 8, byteorder="big" + ) + ), + "q": to_base64_url( + private_numbers.q.to_bytes( + (private_numbers.q.bit_length() + 7) // 8, byteorder="big" + ) + ), + "dp": to_base64_url( + private_numbers.dmp1.to_bytes( + (private_numbers.dmp1.bit_length() + 7) // 8, byteorder="big" + ) + ), + "dq": to_base64_url( + private_numbers.dmq1.to_bytes( + (private_numbers.dmq1.bit_length() + 7) // 8, byteorder="big" + ) + ), + "qi": to_base64_url( + private_numbers.iqmp.to_bytes( + (private_numbers.iqmp.bit_length() + 7) // 8, byteorder="big" + ) + ), + } + elif public_key: + # Convert the public key to its components + public_numbers = public_key.public_numbers() + + jwk = { + "kty": "RSA", + "alg": "RS512", + "kid": "mykid", + "n": to_base64_url( + public_numbers.n.to_bytes( + (public_numbers.n.bit_length() + 7) // 8, byteorder="big" + ) + ), + "e": to_base64_url( + public_numbers.e.to_bytes( + (public_numbers.e.bit_length() + 7) // 8, byteorder="big" + ) + ), + } + else: + raise ValueError("You must provide either a private or public key.") + + return jwk + + +# Convert to JWK +jwk_private = rsa_key_to_jwk(private_key=private_key) +jwk_public = rsa_key_to_jwk(public_key=public_key) + +print(f"Private JWK:\n{json.dumps(jwk_private)}\n") +print(f"Public JWK:\n{json.dumps(jwk_public)}\n") + +payload = {"sub": "jwt_user", "iss": "test_iss"} + +# Create a JWT +token = jwt.encode(payload, private_key, headers={"kid": "mykid"}, algorithm="RS512") +print(f"JWT:\n{token}") diff --git a/tests/integration/test_jwt_auth/helpers/jwt_static_secret.py b/tests/integration/test_jwt_auth/helpers/jwt_static_secret.py new file mode 100644 index 000000000000..5f1c7e0340af --- /dev/null +++ b/tests/integration/test_jwt_auth/helpers/jwt_static_secret.py @@ -0,0 +1,43 @@ +import jwt +import datetime + + +def create_jwt( + payload: dict, secret: str, algorithm: str = "HS256", expiration_minutes: int = None +) -> str: + """ + Create a JWT using a static secret and a specified encryption algorithm. + + :param payload: The payload to include in the JWT (as a dictionary). + :param secret: The secret key used to sign the JWT. + :param algorithm: The encryption algorithm to use (default is 'HS256'). + :param expiration_minutes: The time until the token expires (default is 60 minutes). + :return: The encoded JWT as a string. + """ + if expiration_minutes: + expiration = datetime.datetime.utcnow() + datetime.timedelta( + minutes=expiration_minutes + ) + payload["exp"] = expiration + + return jwt.encode(payload, secret, algorithm=algorithm) + + +if __name__ == "__main__": + secret = "my_secret" + payload = {"sub": "jwt_user"} # `sub` must contain user name + + """ + Supported algorithms: + | HMSC | RSA | ECDSA | PSS | EdDSA | + | ----- | ----- | ------ | ----- | ------- | + | HS256 | RS256 | ES256 | PS256 | Ed25519 | + | HS384 | RS384 | ES384 | PS384 | Ed448 | + | HS512 | RS512 | ES512 | PS512 | | + | | | ES256K | | | + And None + """ + algorithm = "HS256" + + token = create_jwt(payload, secret, algorithm) + print(f"Generated JWT: {token}") diff --git a/tests/integration/test_jwt_auth/helpers/private_key_1 b/tests/integration/test_jwt_auth/helpers/private_key_1 new file mode 100644 index 000000000000..a076a86e17a4 --- /dev/null +++ b/tests/integration/test_jwt_auth/helpers/private_key_1 @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEowIBAAKCAQEAlICGC8S5pObyASih5qfmwuclG0oKsbzY2z9vgwqyhTYQOWcq +YcTjVV4aQ30qb6E0+5W6rJ+jx9zx6GuAEGMiG/aWJEdbUAMGp+L1kz4lrw5U6Glw +oZIvk4wqoRwsiyc+mnDMQAmiZLBNyt3wU6YnKgYmb4O1cSzcZ5HMbImJpj4tpYjq +nIazvYMn/9Pxjkl0ezLCr52av0UkWHro1H4QMVfuEoNmHuWPww9jgHn+I+La0xdO +hRpAa0XnJi65dXZd4330uWjeJwt413yz881uS4n1OLOGKG8ImDcNlwU/guyvk0n0 +aqT0zkOAPp9/yYo13MPWmiRCfOX8ozdN7VDIJwIDAQABAoIBADZfiLUuZrrWRK3f +7sfBmmCquY9wYNILT2uXooDcndjgnrgl6gK6UHKlbgBgB/WvlPK5NAyYtyMq5vgu +xEk7wvVyKC9IYUq+kOVP2JL9IlcibDxcvvypxfnETKeI5VZeHDH4MxEPdgJf+1vY +P3KhV52vestB8mFqB5l0bOEgyuGvO3/3D1JjOnFLS/K2vOj8D/KDRmwXRCcGHTxj +dj3wJH4UbCIsLgiaQBPkFmTteJDICb+7//6YQuB0t8sR/DZS9Z0GWcfy04Cp/m/E +4rRoTNz80MbbU9+k0Ly360SxPizcjpPYSRSD025i8Iqv8jvelq7Nzg69Kubc0KfN +mMrRdMECgYEAz4b7+OX+aO5o2ZQS+fHc8dyWc5umC+uT5xrUm22wZLYA5O8x0Rgj +vdO/Ho/XyN/GCyvNNV2rI2+CBTxez6NqesGDEmJ2n7TQ03xXLCVsnwVz694sPSMO +pzTbU6e42jvDo5DMPDv0Pg1CVQuM9ka6wb4DcolMyDql6QddY3iXHBkCgYEAtzAl +xEAABqdFAnCs3zRf9EZphGJiJ4gtoWmCxQs+IcrfyBNQCy6GqrzJOZ7fQiEoAeII +V0JmsNcnx3U1W0lp8N+1QNZoB4fOWXaX08BvOEe7gbJ6Xl5t52j792vQp1txpBhE +UDhz8m5R9i5qb3BzrYBiSTfak0Pq56Xw3jRDjj8CgYEAqX2QS07kQqT8gz85ZGOR +1QMY6aCks7WaXTR/kdW7K/Wts0xb/m7dugq3W+mVDh0c7UC/36b5v/4xTb9pm+HW +dB2ZxCkgwvz1VNSHiamjFhlo/Km+rcv1CsDTpHYmNi57cRowg71flFJV64l8fiN0 +IgnjXOcgC6RCnpiCQFxb5fkCgYB+Zq2YleSuspqOjXrrZPNU1YUXgN9jkbaSqwA9 +wH01ygvRvWm83XS0uSFMLhC1S7WUXwgMVdgP69YZ7glMHQMJ3wLtY0RS9eVvm8I1 +rZHQzsZWPvXqydOiGrHJzs4hvJpUdR4mEF4JCRBrAyoUDQ70yCKJjQ24EeQzxS/H +015N9wKBgB8DdFPvKXyygTMnBoZdpAhkE/x3TTi7DsLBxj7QxKmSHzlHGz0TubIB +m5/p9dGawQNzD4JwASuY5r4lKXmvYr+4TQPLq6c7EnoIZSwLdge+6PDhnDWJzvk1 +S/RuHWW4FKGzBStTmstG3m0xzxTMnQkV3kPimMim3I3VsxxeGEdq +-----END RSA PRIVATE KEY----- diff --git a/tests/integration/test_jwt_auth/helpers/private_key_2 b/tests/integration/test_jwt_auth/helpers/private_key_2 new file mode 100644 index 000000000000..d0d1576f2017 --- /dev/null +++ b/tests/integration/test_jwt_auth/helpers/private_key_2 @@ -0,0 +1,27 @@ +-----BEGIN RSA PRIVATE KEY----- +MIIEowIBAAKCAQEA0RRsKcZ5j9UckjioG4Phvav3dkg2sXP6tQ7ug0yowAo/u2Hf +fB+1OjKuhWTpA3E3YkMKj0RrT+tuUpmZEXqCAipEV7XcfCv3o7Poa7HTq1ti/abV +wT/KyfGjoNBBSJH4LTNAyo2J8ySKSDtpAEU52iL7s40Ra6I0vqp7/aRuPF5M4zcH +zN3zarG5EfSVSG1+gTkaRv8XJbra0IeIINmKv0F4++ww8ZxXTR6cvI+MsArUiAPw +zf7s5dMR4DNRG6YNTrPA0pTOqQE9sRPd62XsfU08plYm27naOUZO5avIPl1YO5I6 +Gi4kPdTvv3WFIy+QvoKoPhPCaD6EbdBpe8BbTQIDAQABAoIBABghJsCFfucKHdOE +RWZziHx22cblW6aML41wzTcLBFixdhx+lafCEwzF551OgZPbn5wwB4p0R3xAPAm9 +X0yEmnd8gEmtG+aavmg+rZ6sNbULhXenpvi4D4PR5uP61OX2rrEsvpgB0L9mYq0m +ah5VXvFdYzYcHDwTSsoMa+XgcbZ2qCW6Si3jnbBA1TPIJS5GjfPUQlu9g2FKQL5H +tlJ7L4Wq39zkueS6LH7kEXOoM+jHgA8F4f7MIrajmilYqnuXanVcMV3+K/6FvH2B +VBiLggG3CerhB3QyEvZBshvEvvcyRff2NK64CGr/xrAElj4cPHk/E499M1uvUXjE +boCrD+ECgYEA9LvLljf59h8WWF4bKQZGNKprgFdQIZ2iCEf+VGdGWt/mNg+LyXyn +3gS/vReON1eaMuEGklZM4Guh/ZPhsPaNmlu16PjmeYTIW1vQTHiO3KR7tAmWep70 +w+gVxDDzuRvBkuDF5oQsZnD3Ri9I7r+J5y9OhyZUsDXe/LJARivF3x0CgYEA2rRx +wl4mfuYmikvcO8I4vuKXcK1UyYmZQLhp6EHKfhSVgrt7XsstZX9AP2OxUUAocRks +e6vU/sKUSni7TQrZzAZHc8JXonDgmCqoMPBXIuUncvysGR1kmgVIbN8ISPKJuZoV +8Dbj3fQfHZ0g0R+mUcuZ+xBO5CKcjPWHZXZoxfECgYAQ/5o8bNbnyXD74k1wpAbs +UYn1+BqQuyot+RIpOqMgXLzYtGu5Kvdd7GaE88XlAiirsAWM1IGydMdjnYnniLh9 +KDGSZPddKWPhNJdbOGRz3tjYwHG7Qp8tnEkmv1+uU8c2NHaKdFPBKceDEHW4X4Vs +kVSa/oaTVqqOUrM0LIYp4QKBgQCW1aIriiGEnZhxAvbGJCJczAvkAzcZtBOFBmrM +ayuLnwiqXEEu1HPfr06RKWFuhxAdSF5cgNrqRSpe3jtXXCdvxdjbpmooNy8+4xSS +g/+kqmR1snvC6nmqnAAiTgP5w4RnBDUjMcggGLCpDOhIMkrT2Na+x7WRM6nCsceK +m4qREQKBgEWqdb/QkOMvvKAz2DPDeSrwlTyisrZu1G/86uE3ESb97DisPK+TF2Ts +r4RGUlKL79W3j5xjvIvqGEEDLC+8QKpay9OYXk3lbViPGB8akWMSP6Tw/8AedhVu +sjFqcBEFGOELwm7VjAcDeP6bXeXibFe+rysBrfFHUGllytCmNoAV +-----END RSA PRIVATE KEY----- diff --git a/tests/integration/test_jwt_auth/jwks_server/server.py b/tests/integration/test_jwt_auth/jwks_server/server.py new file mode 100644 index 000000000000..96e07f02335e --- /dev/null +++ b/tests/integration/test_jwt_auth/jwks_server/server.py @@ -0,0 +1,33 @@ +import sys + +from bottle import response, route, run + + +@route("/.well-known/jwks.json") +def server(): + result = { + "keys": [ + { + "kty": "RSA", + "alg": "RS512", + "kid": "mykid", + "n": "0RRsKcZ5j9UckjioG4Phvav3dkg2sXP6tQ7ug0yowAo_u2HffB-1OjKuhWTpA3E3YkMKj0RrT-tuUpmZEXqCAipEV7XcfCv3o" + "7Poa7HTq1ti_abVwT_KyfGjoNBBSJH4LTNAyo2J8ySKSDtpAEU52iL7s40Ra6I0vqp7_aRuPF5M4zcHzN3zarG5EfSVSG1-gT" + "kaRv8XJbra0IeIINmKv0F4--ww8ZxXTR6cvI-MsArUiAPwzf7s5dMR4DNRG6YNTrPA0pTOqQE9sRPd62XsfU08plYm27naOUZ" + "O5avIPl1YO5I6Gi4kPdTvv3WFIy-QvoKoPhPCaD6EbdBpe8BbTQ", + "e": "AQAB"}, + ] + } + response.status = 200 + response.content_type = "application/json" + return result + + +@route("/") +def ping(): + response.content_type = "text/plain" + response.set_header("Content-Length", 2) + return "OK" + + +run(host="0.0.0.0", port=int(sys.argv[1])) diff --git a/tests/integration/test_jwt_auth/test.py b/tests/integration/test_jwt_auth/test.py new file mode 100644 index 000000000000..14d42ae08bde --- /dev/null +++ b/tests/integration/test_jwt_auth/test.py @@ -0,0 +1,82 @@ +import os +import pytest + +from helpers.cluster import ClickHouseCluster +from helpers.mock_servers import start_mock_servers + +SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__)) + +cluster = ClickHouseCluster(__file__) +instance = cluster.add_instance( + "instance", + main_configs=["configs/validators.xml"], + user_configs=["configs/users.xml"], + with_minio=True, + # We actually don't need minio, but we need to run dummy resolver + # (a shortcut not to change cluster.py in a more unclear way, TBC later). +) +client = cluster.add_instance( + "client", +) + + +def run_jwks_server(): + script_dir = os.path.join(os.path.dirname(__file__), "jwks_server") + start_mock_servers( + cluster, + script_dir, + [ + ("server.py", "resolver", "8080"), + ], + ) + + +@pytest.fixture(scope="module") +def started_cluster(): + try: + cluster.start() + run_jwks_server() + yield cluster + finally: + cluster.shutdown() + + +def curl_with_jwt(token, ip, https=False): + http_prefix = "https" if https else "http" + curl = f'curl -H "Authorization: Bearer {token}" "{http_prefix}://{ip}:8123/?query=SELECT%20currentUser()"' + return curl + + +# See helpers/ directory if you need to re-create tokens (or understand how they are created) +def test_static_key(started_cluster): + res = client.exec_in_container( + [ + "bash", + "-c", + curl_with_jwt( + token="eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJqd3RfdXNlciJ9." + "kfivQ8qD_oY0UvihydeadD7xvuiO3zSmhFOc_SGbEPQ", + ip=cluster.get_instance_ip(instance.name), + ), + ] + ) + assert res == "jwt_user\n" + + +def test_jwks_server(started_cluster): + res = client.exec_in_container( + [ + "bash", + "-c", + curl_with_jwt( + token="eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzUxMiIsImtpZCI6Im15a2lkIn0." + "eyJzdWIiOiJqd3RfdXNlciIsImlzcyI6InRlc3RfaXNzIn0.MjegqrrVyrMMpkxIM-J_q-" + "Sw68Vk5xZuFpxecLLMFs5qzvnh0jslWtyRfi-ANJeJTONPZM5m0yP1ITt8BExoHWobkkR11bXz0ylYEIOgwxqw" + "36XhL2GkE17p-wMvfhCPhGOVL3b7msDRUKXNN48aAJA-NxRbQFhMr-eEx3HsrZXy17Qc7z-" + "0dINe355kzAInGp6gMk3uksAlJ3vMODK8jE-WYFqXusr5GFhXubZXdE2mK0mIbMUGisOZhZLc4QVwvUsYDLBCgJ2RHr5vm" + "jp17j_ZArIedUJkjeC4o72ZMC97kLVnVw94QJwNvd4YisxL6A_mWLTRq9FqNLD4HmbcOQ", + ip=cluster.get_instance_ip(instance.name), + ), + ] + ) + assert res == "jwt_user\n" From 6d11c08f24144f3b5ee3ca6bbaeed77e2a548730 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov <32552679+zvonand@users.noreply.github.com> Date: Sun, 7 Jun 2026 17:37:16 +0200 Subject: [PATCH 02/12] Resolve conflicts in cherry-pick of #1078 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adapted `Authentication::areCredentialsValid` return statements from `bool` to `CredentialsCheckResult` (antalya-26.4 changed the return type of this function). Adapted `session->authenticate` call in `TCPHandler` to pass `socket().peerAddress()` as the connection_address argument (antalya-26.4 added this extra parameter). The antalya-26.4 branch refactored the inline user config parser from `UsersConfigAccessStorage.cpp` into a separate `UsersConfigParser.cpp`. The source PR's JWT user config additions (6 lines) were applied to `UsersConfigParser.cpp` instead of the now-empty inline section. Adapted: `Authentication::areCredentialsValid` return type `bool` → `CredentialsCheckResult` for TokenCredentials block Adapted: `TCPHandler::receiveHello` `session->authenticate` call added `socket().peerAddress()` argument Adapted: JWT user config support applied to `UsersConfigParser` instead of the refactored-out inline parser in `UsersConfigAccessStorage` Source-PR: #1078 (https://github.com/Altinity/ClickHouse/pull/1078) --- .../aspell-ignore/en/aspell-dict.txt | 8 +- src/Access/AccessControl.h | 3 - src/Access/Authentication.cpp | 11 +- src/Access/AuthenticationData.cpp | 10 +- src/Access/IAccessStorage.cpp | 3 - src/Access/UsersConfigAccessStorage.cpp | 807 ------------------ src/Access/UsersConfigParser.cpp | 12 +- src/Server/TCPHandler.cpp | 8 +- src/configure_config.cmake | 3 - 9 files changed, 19 insertions(+), 846 deletions(-) diff --git a/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt b/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt index f0c6fbaf3ba7..cafba00da5c2 100644 --- a/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt +++ b/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt @@ -653,12 +653,9 @@ JoinStrictness Jpan JumpConsistentHash Jupyter -<<<<<<< HEAD Jurc -======= jwks JWKS ->>>>>>> 58b404dc6a5 (Merge pull request #1078 from Altinity/oauth-antalya-25.8) KDevelop KafkaAssignedPartitions KafkaBackgroundReads @@ -3809,11 +3806,8 @@ uuid uuids uuidv vCPU -<<<<<<< HEAD -vLLM -======= validators ->>>>>>> 58b404dc6a5 (Merge pull request #1078 from Altinity/oauth-antalya-25.8) +vLLM varPop varPopStable varSamp diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h index a7a84e5d34a5..71c37d243359 100644 --- a/src/Access/AccessControl.h +++ b/src/Access/AccessControl.h @@ -312,11 +312,8 @@ class AccessControl : public MultipleAccessStorage std::atomic_bool allow_beta_tier_settings = true; std::atomic_bool enable_user_name_access_type = true; std::atomic_bool enable_read_write_grants = false; -<<<<<<< HEAD std::atomic_bool allow_impersonate_user = false; -======= std::atomic_bool enable_token_auth = true; ->>>>>>> 58b404dc6a5 (Merge pull request #1078 from Altinity/oauth-antalya-25.8) }; } diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index 0a4d1f978b91..a91d243dca2c 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -329,16 +329,11 @@ Authentication::CredentialsCheckResult Authentication::areCredentialsValid( const ClientInfo & client_info, SettingsChanges & settings) { -<<<<<<< HEAD - if (!credentials.isReady()) - return CredentialsCheckResult::Fail; -======= /// It is OK for TokenCredentials to be not ready: /// When auth request happens, we do not even know the username. /// Token is resolved a bit later and the user information will be put in credentials if (!typeid_cast(&credentials) && !credentials.isReady()) - return false; ->>>>>>> 58b404dc6a5 (Merge pull request #1078 from Altinity/oauth-antalya-25.8) + return CredentialsCheckResult::Fail; if (const auto * gss_acceptor_context = typeid_cast(&credentials)) { @@ -388,9 +383,9 @@ Authentication::CredentialsCheckResult Authentication::areCredentialsValid( if (const auto * token_credentials = typeid_cast(&credentials)) { if (authentication_method.getType() != AuthenticationType::JWT) - return false; + return CredentialsCheckResult::Fail; - return external_authenticators.checkTokenCredentials(*token_credentials); + return external_authenticators.checkTokenCredentials(*token_credentials) ? CredentialsCheckResult::Success : CredentialsCheckResult::Fail; } if ([[maybe_unused]] const auto * always_allow_credentials = typeid_cast(&credentials)) diff --git a/src/Access/AuthenticationData.cpp b/src/Access/AuthenticationData.cpp index e9834f0ef80f..cf1aa4ad265b 100644 --- a/src/Access/AuthenticationData.cpp +++ b/src/Access/AuthenticationData.cpp @@ -28,18 +28,16 @@ # include #endif -<<<<<<< HEAD +#if USE_JWT_CPP +#include +#endif + namespace CurrentMetrics { extern const Metric BcryptCacheBytes; extern const Metric BcryptCacheSize; } -======= -#if USE_JWT_CPP -#include -#endif ->>>>>>> 58b404dc6a5 (Merge pull request #1078 from Altinity/oauth-antalya-25.8) namespace DB { diff --git a/src/Access/IAccessStorage.cpp b/src/Access/IAccessStorage.cpp index dec29187ee54..a776ff71036c 100644 --- a/src/Access/IAccessStorage.cpp +++ b/src/Access/IAccessStorage.cpp @@ -34,11 +34,8 @@ namespace ErrorCodes extern const int ACCESS_ENTITY_NOT_FOUND; extern const int ACCESS_STORAGE_READONLY; extern const int ACCESS_STORAGE_DOESNT_ALLOW_BACKUP; -<<<<<<< HEAD extern const int REQUIRED_SECOND_FACTOR; -======= extern const int AUTHENTICATION_FAILED; ->>>>>>> 58b404dc6a5 (Merge pull request #1078 from Altinity/oauth-antalya-25.8) extern const int WRONG_PASSWORD; extern const int IP_ADDRESS_NOT_ALLOWED; extern const int LOGICAL_ERROR; diff --git a/src/Access/UsersConfigAccessStorage.cpp b/src/Access/UsersConfigAccessStorage.cpp index 1d32fef8dacd..894fbd4f0aec 100644 --- a/src/Access/UsersConfigAccessStorage.cpp +++ b/src/Access/UsersConfigAccessStorage.cpp @@ -5,11 +5,6 @@ #include #include #include -<<<<<<< HEAD -======= -#include -#include "Access/Credentials.h" ->>>>>>> 58b404dc6a5 (Merge pull request #1078 from Altinity/oauth-antalya-25.8) #include #include #include @@ -20,808 +15,6 @@ namespace DB { -<<<<<<< HEAD -======= -namespace ErrorCodes -{ - extern const int BAD_ARGUMENTS; - extern const int UNKNOWN_ADDRESS_PATTERN_TYPE; - extern const int THERE_IS_NO_PROFILE; - extern const int NOT_IMPLEMENTED; - extern const int SUPPORT_IS_DISABLED; -} - -namespace -{ - - UUID generateID(AccessEntityType type, const String & name) - { - Poco::MD5Engine md5; - md5.update(name); - char type_storage_chars[] = " USRSXML"; - type_storage_chars[0] = AccessEntityTypeInfo::get(type).unique_char; - md5.update(type_storage_chars, strlen(type_storage_chars)); - UUID result; - memcpy(&result, md5.digest().data(), md5.digestLength()); - transformEndianness(result); - return result; - } - - UUID generateID(const IAccessEntity & entity) { return generateID(entity.getType(), entity.getName()); } - - template - void parseGrant(T & entity, const String & string_query, const std::unordered_set & allowed_role_ids) - { - ParserGrantQuery parser; - parser.setParseWithoutGrantees(); - - String error_message; - const char * pos = string_query.data(); - auto ast = tryParseQuery(parser, pos, pos + string_query.size(), error_message, false, "", false, 0, DBMS_DEFAULT_MAX_PARSER_DEPTH, DBMS_DEFAULT_MAX_PARSER_BACKTRACKS, true); - - if (!ast) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to parse grant query. Error: {}", error_message); - - auto & query = ast->as(); - - if (query.roles && query.is_revoke) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Roles can't be revoked in config file"); - - if (!query.cluster.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Can't grant on cluster using config file"); - - if (query.grantees) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "You can't specify grantees in query using config file"); - - for (auto & element : query.access_rights_elements) - { - if (query.is_revoke) - entity.access.revoke(element); - else - entity.access.grant(element); - } - - if (query.roles) - { - std::vector roles_to_grant; - roles_to_grant.reserve(query.roles->size()); - - for (const auto & role_name : query.roles->names) - { - auto role_id = generateID(AccessEntityType::ROLE, role_name); - if (!allowed_role_ids.contains(role_id)) - throw Exception(ErrorCodes::THERE_IS_NO_PROFILE, "Role {} was not found", role_name); - - roles_to_grant.push_back(role_id); - } - - if (query.admin_option) - entity.granted_roles.grantWithAdminOption(roles_to_grant); - else - entity.granted_roles.grant(roles_to_grant); - } - } - - UserPtr parseUser( - const Poco::Util::AbstractConfiguration & config, - const String & user_name, - const std::unordered_set & allowed_profile_ids, - const std::unordered_set & allowed_role_ids, - bool allow_no_password, - bool allow_plaintext_password) - { - const bool validate = true; - auto user = std::make_shared(); - user->setName(user_name); - String user_config = "users." + user_name; - bool has_no_password = config.has(user_config + ".no_password"); - bool has_password_plaintext = config.has(user_config + ".password"); - bool has_password_sha256_hex = config.has(user_config + ".password_sha256_hex"); - bool has_scram_password_sha256_hex = config.has(user_config + ".password_scram_sha256_hex"); - bool has_password_double_sha1_hex = config.has(user_config + ".password_double_sha1_hex"); - bool has_ldap = config.has(user_config + ".ldap"); - bool has_kerberos = config.has(user_config + ".kerberos"); - bool has_jwt = config.has(user_config + ".jwt"); - - const auto certificates_config = user_config + ".ssl_certificates"; - bool has_certificates = config.has(certificates_config); - - const auto ssh_keys_config = user_config + ".ssh_keys"; - bool has_ssh_keys = config.has(ssh_keys_config); - - const auto http_auth_config = user_config + ".http_authentication"; - bool has_http_auth = config.has(http_auth_config); - - size_t num_password_fields = has_no_password + has_password_plaintext + has_password_sha256_hex + has_password_double_sha1_hex - + has_ldap + has_kerberos + has_certificates + has_ssh_keys + has_http_auth + has_scram_password_sha256_hex + has_jwt; - - if (num_password_fields > 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "More than one field of 'password', 'password_sha256_hex', " - "'password_double_sha1_hex', 'no_password', 'ldap', 'kerberos', 'ssl_certificates', 'ssh_keys', " - "'http_authentication', 'jwt' are used to specify authentication info for user {}. " - "Must be only one of them.", user_name); - - if (num_password_fields < 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Either 'password' or 'password_sha256_hex' " - "or 'password_double_sha1_hex' or 'no_password' or 'ldap' or 'kerberos " - "or 'ssl_certificates' or 'ssh_keys' or 'http_authentication' or 'jwt' must be specified for user {}.", user_name); - - if (has_password_plaintext) - { - user->authentication_methods.emplace_back(AuthenticationType::PLAINTEXT_PASSWORD); - user->authentication_methods.back().setPassword(config.getString(user_config + ".password"), validate); - } - else if (has_password_sha256_hex) - { - user->authentication_methods.emplace_back(AuthenticationType::SHA256_PASSWORD); - user->authentication_methods.back().setPasswordHashHex(config.getString(user_config + ".password_sha256_hex"), validate); - } - else if (has_scram_password_sha256_hex) - { - user->authentication_methods.emplace_back(AuthenticationType::SCRAM_SHA256_PASSWORD); - user->authentication_methods.back().setPasswordHashHex(config.getString(user_config + ".password_scram_sha256_hex"), validate); - } - else if (has_password_double_sha1_hex) - { - user->authentication_methods.emplace_back(AuthenticationType::DOUBLE_SHA1_PASSWORD); - user->authentication_methods.back().setPasswordHashHex(config.getString(user_config + ".password_double_sha1_hex"), validate); - } - else if (has_ldap) - { - bool has_ldap_server = config.has(user_config + ".ldap.server"); - if (!has_ldap_server) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Missing mandatory 'server' in 'ldap', with LDAP server name, for user {}.", user_name); - - const auto ldap_server_name = config.getString(user_config + ".ldap.server"); - if (ldap_server_name.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "LDAP server name cannot be empty for user {}.", user_name); - - user->authentication_methods.emplace_back(AuthenticationType::LDAP); - user->authentication_methods.back().setLDAPServerName(ldap_server_name); - } - else if (has_kerberos) - { - const auto realm = config.getString(user_config + ".kerberos.realm", ""); - - user->authentication_methods.emplace_back(AuthenticationType::KERBEROS); - user->authentication_methods.back().setKerberosRealm(realm); - } - else if (has_certificates) - { -#if USE_SSL - user->authentication_methods.emplace_back(AuthenticationType::SSL_CERTIFICATE); - - /// Fill list of allowed certificates. - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(certificates_config, keys); - for (const String & key : keys) - { - if (key.starts_with("common_name")) - { - String value = config.getString(certificates_config + "." + key); - user->authentication_methods.back().addSSLCertificateSubject(X509Certificate::Subjects::Type::CN, std::move(value)); - } - else if (key.starts_with("subject_alt_name")) - { - String value = config.getString(certificates_config + "." + key); - if (value.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected ssl_certificates.subject_alt_name to not be empty"); - user->authentication_methods.back().addSSLCertificateSubject(X509Certificate::Subjects::Type::SAN, std::move(value)); - } - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown certificate pattern type: {}", key); - } -#else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSL certificates support is disabled, because ClickHouse was built without SSL library"); -#endif - } - else if (has_ssh_keys) - { -#if USE_SSH - user->authentication_methods.emplace_back(AuthenticationType::SSH_KEY); - - Poco::Util::AbstractConfiguration::Keys entries; - config.keys(ssh_keys_config, entries); - std::vector keys; - for (const String& entry : entries) - { - const auto conf_pref = ssh_keys_config + "." + entry + "."; - if (entry.starts_with("ssh_key")) - { - String type; - String base64_key; - if (config.has(conf_pref + "type")) - { - type = config.getString(conf_pref + "type"); - } - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected type field in {} entry", entry); - if (config.has(conf_pref + "base64_key")) - { - base64_key = config.getString(conf_pref + "base64_key"); - } - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Expected base64_key field in {} entry", entry); - - - try - { - keys.emplace_back(SSHKeyFactory::makePublicKeyFromBase64(base64_key, type)); - } - catch (const std::invalid_argument &) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Bad SSH key in entry: {}", entry); - } - } - else - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Unknown ssh_key entry pattern type: {}", entry); - } - user->authentication_methods.back().setSSHKeys(std::move(keys)); -#else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "SSH is disabled, because ClickHouse is built without libssh"); -#endif - } - else if (has_http_auth) - { - user->authentication_methods.emplace_back(AuthenticationType::HTTP); - user->authentication_methods.back().setHTTPAuthenticationServerName(config.getString(http_auth_config + ".server")); - auto scheme = config.getString(http_auth_config + ".scheme"); - user->authentication_methods.back().setHTTPAuthenticationScheme(parseHTTPAuthenticationScheme(scheme)); - } - else if (has_jwt) - { - user->authentication_methods.emplace_back(AuthenticationType::JWT); - } - else - { - user->authentication_methods.emplace_back(); - } - - for (const auto & authentication_method : user->authentication_methods) - { - auto auth_type = authentication_method.getType(); - if (((auth_type == AuthenticationType::NO_PASSWORD) && !allow_no_password) || - ((auth_type == AuthenticationType::PLAINTEXT_PASSWORD) && !allow_plaintext_password)) - { - throw Exception(ErrorCodes::BAD_ARGUMENTS, - "Authentication type {} is not allowed, check the setting allow_{} in the server configuration", - toString(auth_type), AuthenticationTypeInfo::get(auth_type).name); - } - } - - const auto profile_name_config = user_config + ".profile"; - if (config.has(profile_name_config)) - { - auto profile_name = config.getString(profile_name_config); - auto profile_id = generateID(AccessEntityType::SETTINGS_PROFILE, profile_name); - if (!allowed_profile_ids.contains(profile_id)) - throw Exception(ErrorCodes::THERE_IS_NO_PROFILE, "Profile {} was not found", profile_name); - SettingsProfileElement profile_element; - profile_element.parent_profile = profile_id; - user->settings.push_back(std::move(profile_element)); - } - - /// Fill list of allowed hosts. - const auto networks_config = user_config + ".networks"; - if (config.has(networks_config)) - { - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(networks_config, keys); - user->allowed_client_hosts.clear(); - for (const String & key : keys) - { - String value = config.getString(networks_config + "." + key); - if (key.starts_with("ip")) - user->allowed_client_hosts.addSubnet(value); - else if (key.starts_with("host_regexp")) - user->allowed_client_hosts.addNameRegexp(value); - else if (key.starts_with("host")) - user->allowed_client_hosts.addName(value); - else - throw Exception(ErrorCodes::UNKNOWN_ADDRESS_PATTERN_TYPE, "Unknown address pattern type: {}", key); - } - } - - /// Fill list of allowed databases. - const auto databases_config = user_config + ".allow_databases"; - std::optional databases; - if (config.has(databases_config)) - { - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(databases_config, keys); - databases.emplace(); - databases->reserve(keys.size()); - for (const auto & key : keys) - { - const auto database_name = config.getString(databases_config + "." + key); - databases->push_back(database_name); - } - } - - /// Fill list of allowed dictionaries. - const auto dictionaries_config = user_config + ".allow_dictionaries"; - std::optional dictionaries; - if (config.has(dictionaries_config)) - { - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(dictionaries_config, keys); - dictionaries.emplace(); - dictionaries->reserve(keys.size()); - for (const auto & key : keys) - { - const auto dictionary_name = config.getString(dictionaries_config + "." + key); - dictionaries->push_back(dictionary_name); - } - } - - const auto grants_config = user_config + ".grants"; - std::optional grant_queries; - if (config.has(grants_config)) - { - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(grants_config, keys); - grant_queries.emplace(); - grant_queries->reserve(keys.size()); - for (const auto & key : keys) - { - const auto query = config.getString(grants_config + "." + key); - grant_queries->push_back(query); - } - } - - bool access_management = config.getBool(user_config + ".access_management", false); - bool named_collection_control = config.getBool(user_config + ".named_collection_control", false) || config.getBool(user_config + ".named_collection_admin", false); - bool show_named_collections_secrets = config.getBool(user_config + ".show_named_collections_secrets", false); - - if (grant_queries) - if (databases || dictionaries || access_management || named_collection_control || show_named_collections_secrets) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Any other access control settings can't be specified with `grants`"); - - if (grant_queries) - { - for (const auto & string_query : *grant_queries) - parseGrant(*user, string_query, allowed_role_ids); - } - else - { - /// By default all databases are accessible - /// and the user can grant everything he has. - user->access.grantWithGrantOption(AccessType::ALL); - - if (databases) - { - user->access.revoke(AccessFlags::allFlags() - AccessFlags::allGlobalFlags()); - user->access.grantWithGrantOption(AccessType::TABLE_ENGINE); - user->access.grantWithGrantOption(AccessFlags::allDictionaryFlags(), IDictionary::NO_DATABASE_TAG); - for (const String & database : *databases) - user->access.grantWithGrantOption(AccessFlags::allFlags(), database); - } - - if (dictionaries) - { - user->access.revoke(AccessFlags::allDictionaryFlags(), IDictionary::NO_DATABASE_TAG); - for (const String & dictionary : *dictionaries) - user->access.grantWithGrantOption(AccessFlags::allDictionaryFlags(), IDictionary::NO_DATABASE_TAG, dictionary); - } - - if (!access_management) - { - user->access.revoke(AccessType::ACCESS_MANAGEMENT); - user->access.revokeGrantOption(AccessType::ALL); - } - - if (!named_collection_control) - { - user->access.revoke(AccessType::NAMED_COLLECTION_ADMIN); - } - - if (!show_named_collections_secrets) - { - user->access.revoke(AccessType::SHOW_NAMED_COLLECTIONS_SECRETS); - } - } - - String default_database = config.getString(user_config + ".default_database", ""); - user->default_database = default_database; - - return user; - } - - - std::vector parseUsers( - const Poco::Util::AbstractConfiguration & config, - const std::unordered_set & allowed_profile_ids, - const std::unordered_set & allowed_role_ids, - bool allow_no_password, - bool allow_plaintext_password) - { - Poco::Util::AbstractConfiguration::Keys user_names; - config.keys("users", user_names); - - std::vector users; - users.reserve(user_names.size()); - for (const auto & user_name : user_names) - { - try - { - users.push_back(parseUser(config, user_name, allowed_profile_ids, allowed_role_ids, allow_no_password, allow_plaintext_password)); - } - catch (Exception & e) - { - e.addMessage(fmt::format("while parsing user '{}' in users configuration file", user_name)); - throw; - } - } - - return users; - } - - RolePtr parseRole( - const Poco::Util::AbstractConfiguration & config, - const String & role_name, - const std::unordered_set & allowed_role_ids) - { - auto role = std::make_shared(); - role->setName(role_name); - String role_config = "roles." + role_name; - - const auto grants_config = role_config + ".grants"; - if (config.has(grants_config)) - { - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(grants_config, keys); - for (const auto & key : keys) - { - const auto query = config.getString(grants_config + "." + key); - parseGrant(*role, query, allowed_role_ids); - } - } - - return role; - } - - std::vector parseRoles( - const Poco::Util::AbstractConfiguration & config, - const std::unordered_set & allowed_role_ids) - { - Poco::Util::AbstractConfiguration::Keys role_names; - config.keys("roles", role_names); - - std::vector roles; - roles.reserve(role_names.size()); - for (const auto & role_name : role_names) - { - try - { - roles.push_back(parseRole(config, role_name, allowed_role_ids)); - } - catch (Exception & e) - { - e.addMessage(fmt::format("while parsing roles '{}' in users configuration file", role_name)); - throw; - } - } - - return roles; - } - - - QuotaPtr parseQuota(const Poco::Util::AbstractConfiguration & config, const String & quota_name, const std::vector & user_ids) - { - auto quota = std::make_shared(); - quota->setName(quota_name); - - String quota_config = "quotas." + quota_name; - if (config.has(quota_config + ".keyed_by_ip")) - quota->key_type = QuotaKeyType::IP_ADDRESS; - else if (config.has(quota_config + ".keyed_by_forwarded_ip")) - quota->key_type = QuotaKeyType::FORWARDED_IP_ADDRESS; - else if (config.has(quota_config + ".keyed")) - quota->key_type = QuotaKeyType::CLIENT_KEY_OR_USER_NAME; - else - quota->key_type = QuotaKeyType::USER_NAME; - - Poco::Util::AbstractConfiguration::Keys interval_keys; - config.keys(quota_config, interval_keys); - - for (const String & interval_key : interval_keys) - { - if (!startsWith(interval_key, "interval")) - continue; - - String interval_config = quota_config + "." + interval_key; - std::chrono::seconds duration{config.getInt(interval_config + ".duration", 0)}; - if (duration.count() <= 0) /// Skip quotas with non-positive duration. - continue; - - quota->all_limits.emplace_back(); - auto & limits = quota->all_limits.back(); - limits.duration = duration; - limits.randomize_interval = config.getBool(interval_config + ".randomize", false); - - for (auto quota_type : collections::range(QuotaType::MAX)) - { - const auto & type_info = QuotaTypeInfo::get(quota_type); - auto value = config.getString(interval_config + "." + type_info.name, "0"); - if (value != "0") - limits.max[static_cast(quota_type)] = type_info.stringToValue(value); - } - } - - quota->to_roles.add(user_ids); - return quota; - } - - - std::vector parseQuotas(const Poco::Util::AbstractConfiguration & config) - { - Poco::Util::AbstractConfiguration::Keys user_names; - config.keys("users", user_names); - std::unordered_map> quota_to_user_ids; - for (const auto & user_name : user_names) - { - if (config.has("users." + user_name + ".quota")) - quota_to_user_ids[config.getString("users." + user_name + ".quota")].push_back(generateID(AccessEntityType::USER, user_name)); - } - - Poco::Util::AbstractConfiguration::Keys quota_names; - config.keys("quotas", quota_names); - - std::vector quotas; - quotas.reserve(quota_names.size()); - - for (const auto & quota_name : quota_names) - { - try - { - auto it = quota_to_user_ids.find(quota_name); - const std::vector & quota_users = (it != quota_to_user_ids.end()) ? std::move(it->second) : std::vector{}; - quotas.push_back(parseQuota(config, quota_name, quota_users)); - } - catch (Exception & e) - { - e.addMessage(fmt::format("while parsing quota '{}' in users configuration file", quota_name)); - throw; - } - } - - return quotas; - } - - - std::vector parseRowPolicies(const Poco::Util::AbstractConfiguration & config, bool users_without_row_policies_can_read_rows) - { - std::map, std::unordered_map> all_filters_map; - - Poco::Util::AbstractConfiguration::Keys user_names; - config.keys("users", user_names); - - for (const String & user_name : user_names) - { - const String databases_config = "users." + user_name + ".databases"; - if (config.has(databases_config)) - { - Poco::Util::AbstractConfiguration::Keys database_keys; - config.keys(databases_config, database_keys); - - /// Read tables within databases - for (const String & database_key : database_keys) - { - const String database_config = databases_config + "." + database_key; - - String database_name; - if (((database_key == "database") || (database_key.starts_with("database["))) && config.has(database_config + "[@name]")) - database_name = config.getString(database_config + "[@name]"); - else if (size_t bracket_pos = database_key.find('['); bracket_pos != std::string::npos) - database_name = database_key.substr(0, bracket_pos); - else - database_name = database_key; - - Poco::Util::AbstractConfiguration::Keys table_keys; - config.keys(database_config, table_keys); - - /// Read table properties - for (const String & table_key : table_keys) - { - String table_config = database_config + "." + table_key; - String table_name; - if (((table_key == "table") || (table_key.starts_with("table["))) && config.has(table_config + "[@name]")) - table_name = config.getString(table_config + "[@name]"); - else if (size_t bracket_pos = table_key.find('['); bracket_pos != std::string::npos) - table_name = table_key.substr(0, bracket_pos); - else - table_name = table_key; - - String filter_config = table_config + ".filter"; - all_filters_map[{database_name, table_name}][user_name] = config.getString(filter_config); - } - } - } - } - - std::vector policies; - for (auto & [database_and_table_name, user_to_filters] : all_filters_map) - { - const auto & [database, table_name] = database_and_table_name; - for (const String & user_name : user_names) - { - String filter; - auto it = user_to_filters.find(user_name); - if (it != user_to_filters.end()) - { - filter = it->second; - } - else - { - if (users_without_row_policies_can_read_rows) - continue; - filter = "1"; - } - - auto policy = std::make_shared(); - policy->setFullName(user_name, database, table_name); - policy->filters[static_cast(RowPolicyFilterType::SELECT_FILTER)] = filter; - policy->to_roles.add(generateID(AccessEntityType::USER, user_name)); - policies.push_back(policy); - } - } - return policies; - } - - - SettingsProfileElements parseSettingsConstraints(const Poco::Util::AbstractConfiguration & config, - const String & path_to_constraints, - const AccessControl & access_control) - { - SettingsProfileElements profile_elements; - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(path_to_constraints, keys); - - for (const String & setting_name : keys) - { - access_control.checkSettingNameIsAllowed(setting_name); - - SettingsProfileElement profile_element; - profile_element.setting_name = setting_name; - Poco::Util::AbstractConfiguration::Keys constraint_types; - String path_to_name = path_to_constraints + "." + setting_name; - config.keys(path_to_name, constraint_types); - - size_t writability_count = 0; - for (const String & constraint_type : constraint_types) - { - if (constraint_type == "min") - profile_element.min_value = settingStringToValueUtil(setting_name, config.getString(path_to_name + "." + constraint_type)); - else if (constraint_type == "max") - profile_element.max_value = settingStringToValueUtil(setting_name, config.getString(path_to_name + "." + constraint_type)); - /// When the xml config is parsed, the first constraint_type is parsed as `disallowed` and the subsequent ones are parsed as - /// disallowed[1], disallowed[2] and so on. So, both `disallowed` and `disallowed[` should be considered as valid constraint types. - /// Example: - /// - /// - /// 50 - /// 3 - /// 4 - /// 5 - /// - /// - else if (constraint_type == "disallowed" || constraint_type.starts_with("disallowed[")) - profile_element.disallowed_values.push_back(settingStringToValueUtil(setting_name, config.getString(path_to_name + "." + constraint_type))); - else if (constraint_type == "readonly" || constraint_type == "const") - { - writability_count++; - profile_element.writability = SettingConstraintWritability::CONST; - } - else if (constraint_type == "changeable_in_readonly") - { - writability_count++; - if (access_control.doesSettingsConstraintsReplacePrevious()) - profile_element.writability = SettingConstraintWritability::CHANGEABLE_IN_READONLY; - else - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Setting changeable_in_readonly for {} is not allowed " - "unless settings_constraints_replace_previous is enabled", setting_name); - } - else - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Setting {} value for {} isn't supported", constraint_type, setting_name); - } - if (writability_count > 1) - throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not more than one constraint writability specifier " - "(const/readonly/changeable_in_readonly) is allowed for {}", setting_name); - - profile_elements.push_back(std::move(profile_element)); - } - - return profile_elements; - } - - std::shared_ptr parseSettingsProfile( - const Poco::Util::AbstractConfiguration & config, - const String & profile_name, - const std::unordered_set & allowed_parent_profile_ids, - const AccessControl & access_control) - { - auto profile = std::make_shared(); - profile->setName(profile_name); - String profile_config = "profiles." + profile_name; - - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(profile_config, keys); - - for (const std::string & key : keys) - { - if (key == "profile" || key.starts_with("profile[")) - { - String parent_profile_name = config.getString(profile_config + "." + key); - auto parent_profile_id = generateID(AccessEntityType::SETTINGS_PROFILE, parent_profile_name); - if (!allowed_parent_profile_ids.contains(parent_profile_id)) - throw Exception(ErrorCodes::THERE_IS_NO_PROFILE, "Parent profile '{}' was not found", parent_profile_name); - SettingsProfileElement profile_element; - profile_element.parent_profile = parent_profile_id; - profile->elements.emplace_back(std::move(profile_element)); - continue; - } - - if (key == "constraints" || key.starts_with("constraints[")) - { - profile->elements.merge(parseSettingsConstraints(config, profile_config + "." + key, access_control)); - continue; - } - - const auto & setting_name = key; - access_control.checkSettingNameIsAllowed(setting_name); - - SettingsProfileElement profile_element; - profile_element.setting_name = setting_name; - profile_element.value = settingStringToValueUtil(setting_name, config.getString(profile_config + "." + key)); - profile->elements.emplace_back(std::move(profile_element)); - } - - return profile; - } - - - std::vector parseSettingsProfiles( - const Poco::Util::AbstractConfiguration & config, - const std::unordered_set & allowed_parent_profile_ids, - const AccessControl & access_control) - { - Poco::Util::AbstractConfiguration::Keys profile_names; - config.keys("profiles", profile_names); - - std::vector profiles; - profiles.reserve(profile_names.size()); - - for (const auto & profile_name : profile_names) - { - try - { - profiles.push_back(parseSettingsProfile(config, profile_name, allowed_parent_profile_ids, access_control)); - } - catch (Exception & e) - { - e.addMessage(fmt::format("while parsing profile '{}' in users configuration file", profile_name)); - throw; - } - } - - return profiles; - } - - std::unordered_set getAllowedIDs( - const Poco::Util::AbstractConfiguration & config, - const String & configuration_key, - const AccessEntityType type) - { - Poco::Util::AbstractConfiguration::Keys keys; - config.keys(configuration_key, keys); - std::unordered_set ids; - for (const auto & key : keys) - ids.emplace(generateID(type, key)); - return ids; - } -} - ->>>>>>> 58b404dc6a5 (Merge pull request #1078 from Altinity/oauth-antalya-25.8) UsersConfigAccessStorage::UsersConfigAccessStorage(const String & storage_name_, AccessControl & access_control_, bool allow_backup_) : IAccessStorage(storage_name_) , access_control(access_control_) diff --git a/src/Access/UsersConfigParser.cpp b/src/Access/UsersConfigParser.cpp index 08646897a54d..e0d1a021a966 100644 --- a/src/Access/UsersConfigParser.cpp +++ b/src/Access/UsersConfigParser.cpp @@ -117,7 +117,7 @@ namespace static const std::unordered_set auth_type_keys = { "no_password", "password", "password_sha256_hex", "password_scram_sha256_hex", "password_double_sha1_hex", "ldap", "kerberos", "ssl_certificates", - "ssh_keys", "http_authentication" + "ssh_keys", "http_authentication", "jwt" }; Poco::Util::AbstractConfiguration::Keys user_keys; config.keys(config_path, user_keys); @@ -182,8 +182,10 @@ namespace const auto http_auth_config = auth_method_path + ".http_authentication"; bool has_http_auth = config.has(http_auth_config); + bool has_jwt = config.has(auth_method_path + ".jwt"); + size_t num_authentication_types = has_no_password + has_password_plaintext + has_password_sha256_hex + has_password_double_sha1_hex - + has_ldap + has_kerberos + has_certificates + has_ssh_keys + has_http_auth + has_scram_password_sha256_hex; + + has_ldap + has_kerberos + has_certificates + has_ssh_keys + has_http_auth + has_scram_password_sha256_hex + has_jwt; if (num_authentication_types > 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, @@ -193,7 +195,7 @@ namespace if (num_authentication_types < 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "At least one authentication type (one of 'password', " "'password_sha256_hex', 'password_scram_sha256_hex', 'password_double_sha1_hex', 'no_password', 'ldap', 'kerberos', " - "'ssl_certificates', 'ssh_keys', 'http_authentication') must be specified for user {} in path {}.", user_name, auth_method_path); + "'ssl_certificates', 'ssh_keys', 'http_authentication', 'jwt') must be specified for user {} in path {}.", user_name, auth_method_path); AuthenticationData auth_data; @@ -351,6 +353,10 @@ namespace "Missing mandatory 'server' and 'scheme' in 'http_authentication' for user {}.", user_name); } } + else if (has_jwt) + { + auth_data = AuthenticationData(AuthenticationType::JWT); + } return auth_data; } diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 940c3679411b..6aabb6f558bc 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -2054,9 +2054,6 @@ void TCPHandler::receiveHello() } #endif -<<<<<<< HEAD - session->authenticate(user, password, getClientAddress(client_info), socket().peerAddress()); -======= if (is_jwt_based_auth) { const auto & access_control = server.context()->getAccessControl(); @@ -2070,12 +2067,11 @@ void TCPHandler::receiveHello() if (!external_authenticators.checkTokenCredentials(credentials)) throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Token is invalid"); - session->authenticate(credentials, getClientAddress(client_info)); + session->authenticate(credentials, getClientAddress(client_info), socket().peerAddress()); return; } - session->authenticate(user, password, getClientAddress(client_info)); ->>>>>>> 58b404dc6a5 (Merge pull request #1078 from Altinity/oauth-antalya-25.8) + session->authenticate(user, password, getClientAddress(client_info), socket().peerAddress()); } diff --git a/src/configure_config.cmake b/src/configure_config.cmake index 199514053329..cd949b0b473a 100644 --- a/src/configure_config.cmake +++ b/src/configure_config.cmake @@ -235,11 +235,8 @@ if (TARGET ch_rust::wasmtime) endif() set (USE_YTSAURUS 1) -<<<<<<< HEAD -======= if (TARGET ch_contrib::jwt-cpp) set(USE_JWT_CPP 1) endif() ->>>>>>> 58b404dc6a5 (Merge pull request #1078 from Altinity/oauth-antalya-25.8) set(SOURCE_DIR ${PROJECT_SOURCE_DIR}) From dd04a9dcefad6848f60f7532c16b4701932fbc8e Mon Sep 17 00:00:00 2001 From: Andrey Zvonov <32552679+zvonand@users.noreply.github.com> Date: Fri, 24 Apr 2026 22:24:29 +0200 Subject: [PATCH 03/12] Cherry-pick of https://github.com/Altinity/ClickHouse/pull/1606 with unresolved conflict markers (resolution in next commit) --- Original cherry-pick message follows: Merge pull request #1606 from Altinity/feature/client-IdP Add OAuth2 login to clickhouse-client (--login / --login=device) # Conflicts: # docs/en/interfaces/cli.md # programs/client/Client.cpp --- docs/en/interfaces/cli.md | 214 ++++++ programs/client/Client.cpp | 94 ++- src/Access/TokenProcessorsOpaque.cpp | 17 +- src/CMakeLists.txt | 7 - src/Client/OAuthFlowRunner.cpp | 694 ++++++++++++++++++ src/Client/OAuthFlowRunner.h | 22 + src/Client/OAuthJWTProvider.cpp | 64 ++ src/Client/OAuthLogin.cpp | 451 ++++++++++++ src/Client/OAuthLogin.h | 44 ++ src/Client/OAuthProviderPolicy.cpp | 127 ++++ src/Client/OAuthProviderPolicy.h | 58 ++ src/Client/tests/gtest_oauth_login.cpp | 331 +++++++++ .../compose/docker_compose_keycloak.yml | 21 + tests/integration/helpers/cluster.py | 62 ++ .../test_keycloak_auth/__init__.py | 0 .../test_keycloak_auth/configs/users.xml | 13 + .../test_keycloak_auth/configs/validators.xml | 20 + .../keycloak/realm-export.json | 48 ++ tests/integration/test_keycloak_auth/test.py | 420 +++++++++++ ...9_cloud_endpoint_auth_precedence.reference | 6 + .../03749_cloud_endpoint_auth_precedence.sh | 29 + 21 files changed, 2724 insertions(+), 18 deletions(-) create mode 100644 src/Client/OAuthFlowRunner.cpp create mode 100644 src/Client/OAuthFlowRunner.h create mode 100644 src/Client/OAuthJWTProvider.cpp create mode 100644 src/Client/OAuthLogin.cpp create mode 100644 src/Client/OAuthLogin.h create mode 100644 src/Client/OAuthProviderPolicy.cpp create mode 100644 src/Client/OAuthProviderPolicy.h create mode 100644 src/Client/tests/gtest_oauth_login.cpp create mode 100644 tests/integration/compose/docker_compose_keycloak.yml create mode 100644 tests/integration/test_keycloak_auth/__init__.py create mode 100644 tests/integration/test_keycloak_auth/configs/users.xml create mode 100644 tests/integration/test_keycloak_auth/configs/validators.xml create mode 100644 tests/integration/test_keycloak_auth/keycloak/realm-export.json create mode 100644 tests/integration/test_keycloak_auth/test.py diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 28543d790627..f75fefe811a7 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -405,9 +405,223 @@ clickhousectl skills --agent claude --agent codex ### Non-interactive flags {#non-interactive-flags} +<<<<<<< HEAD | Flag | Description | |------|-------------| | `--agent ` | Install Skills for a specific agent (can be repeated) | | `--global` | Use global scope; if omitted, project scope is used | | `--all` | Install Skills for all supported agents | | `--detected-only` | Install Skills for supported agents that were detected on the system | +======= +```bash +clickhouse-client clickhouse://localhost/my_database?s + +# equivalent to: +clickhouse-client clickhouse://localhost/my_database -s +``` + +Connect to the default host using the default port, the default user, and the default database. + +```bash +clickhouse-client clickhouse: +``` + +Connect to the default host using the default port, as the user `my_user` and no password. + +```bash +clickhouse-client clickhouse://my_user@ + +# Using a blank password between : and @ means to asking the user to enter the password before starting the connection. +clickhouse-client clickhouse://my_user:@ +``` + +Connect to `localhost` using the email as the user name. `@` symbol is percent encoded to `%40`. + +```bash +clickhouse-client clickhouse://some_user%40some_mail.com@localhost:9000 +``` + +Connect to one of two hosts: `192.168.1.15`, `192.168.1.25`. + +```bash +clickhouse-client clickhouse://192.168.1.15,192.168.1.25 +``` + +## Query ID format {#query-id-format} + +In interactive mode ClickHouse Client shows the query ID for every query. By default, the ID is formatted like this: + +```sql +Query id: 927f137d-00f1-4175-8914-0dd066365e96 +``` + +A custom format may be specified in a configuration file inside a `query_id_formats` tag. The `{query_id}` placeholder in the format string is replaced with the query ID. Several format strings are allowed inside the tag. +This feature can be used to generate URLs to facilitate profiling of queries. + +**Example** + +```xml + + + http://speedscope-host/#profileURL=qp%3Fid%3D{query_id} + + +``` + +With the configuration above, the ID of a query is shown in the following format: + +```response +speedscope:http://speedscope-host/#profileURL=qp%3Fid%3Dc8ecc783-e753-4b38-97f1-42cddfb98b7d +``` + +## Configuration files {#configuration_files} + +ClickHouse Client uses the first existing file of the following: + +- A file that is defined with the `-c [ -C, --config, --config-file ]` parameter. +- `./clickhouse-client.[xml|yaml|yml]` +- `$XDG_CONFIG_HOME/clickhouse/config.[xml|yaml|yml]` (or `~/.config/clickhouse/config.[xml|yaml|yml]` if `XDG_CONFIG_HOME` is not set) +- `~/.clickhouse-client/config.[xml|yaml|yml]` +- `/etc/clickhouse-client/config.[xml|yaml|yml]` + +See the sample configuration file in the ClickHouse repository: [`clickhouse-client.xml`](https://github.com/ClickHouse/ClickHouse/blob/master/programs/client/clickhouse-client.xml) + + + + ```xml + + username + password + true + + + /etc/ssl/cert.pem + + + + ``` + + + ```yaml + user: username + password: 'password' + secure: true + openSSL: + client: + caConfig: '/etc/ssl/cert.pem' + ``` + + + +## Environment variable options {#environment-variable-options} + +The user name, password and host can be set via environment variables `CLICKHOUSE_USER`, `CLICKHOUSE_PASSWORD` and `CLICKHOUSE_HOST`. +Command line arguments `--user`, `--password` or `--host`, or a [connection string](#connection_string) (if specified) take precedence over environment variables. + +## Command-line options {#command-line-options} + +All command-line options can be specified directly on the command line or as defaults in the [configuration file](#configuration_files). + +### General options {#command-line-options-general} + +| Option | Description | Default | +|-----------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------|------------------------------| +| `-c [ -C, --config, --config-file ] ` | The location of the configuration file for the client, if it is not at one of the default locations. See [Configuration Files](#configuration_files). | - | +| `--help` | Print usage summary and exit. Combine with `--verbose` to display all possible options including query settings. | - | +| `--history_file ` | Path to a file containing the command history. | - | +| `--history_max_entries` | Maximum number of entries in the history file. | `1000000` (1 million) | +| `--prompt ` | Specify a custom prompt. | The `display_name` of the server | +| `--verbose` | Increase output verbosity. | - | +| `-V [ --version ]` | Print version and exit. | - | + +### Connection options {#command-line-options-connection} + +| Option | Description | Default | +|----------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------| +| `--connection ` | The name of preconfigured connection details from the configuration file. See [Connection credentials](#connection-credentials). | - | +| `-d [ --database ] ` | Select the database to default to for this connection. | The current database from the server settings (`default` by default) | +| `-h [ --host ] ` | The hostname of the ClickHouse server to connect to. Can either be a hostname or an IPv4 or IPv6 address. Multiple hosts can be passed via multiple arguments. | `localhost` | +| `--jwt ` | Use JSON Web Token (JWT) for authentication.

Server JWT authorization is only available in ClickHouse Cloud. | - | +| `--login[=]` | Authenticate via OAuth2. Bare `--login` (no `=`) triggers ClickHouse Cloud automatic login — the provider is inferred from the server. To authenticate against a custom OpenID Connect provider, supply a `mode` and `--oauth-credentials`: `--login=browser` runs the Authorization Code + PKCE flow (opens a browser), `--login=device` runs the Device Authorization flow (prints a URL and short code — no browser needed). | - | +| `--oauth-credentials ` | Path to an OAuth2 credentials JSON file (Google Cloud Console format). Required when using `--login=browser` or `--login=device` with a custom OpenID Connect provider. See [OAuth credentials file format](#oauth-credentials-file) below. Refresh tokens are cached in `~/.clickhouse-client/oauth_cache.json` (mode `0600`). | `~/.clickhouse-client/oauth_client.json` | +| `--no-warnings` | Disable showing warnings from `system.warnings` when the client connects to the server. | - | +| `--no-server-client-version-message` | Suppress server-client version mismatch message when the client connects to the server. | - | +| `--password ` | The password of the database user. You can also specify the password for a connection in the configuration file. If you do not specify the password, the client will ask for it. | - | +| `--port ` | The port the server is accepting connections on. The default ports are 9440 (TLS) and 9000 (no TLS).

Note: The client uses the native protocol and not HTTP(S). | `9440` if `--secure` is specified, `9000` otherwise. Always defaults to `9440` if the hostname ends in `.clickhouse.cloud`. | +| `-s [ --secure ]` | Whether to use TLS.

Enabled automatically when connecting to port 9440 (the default secure port) or ClickHouse Cloud.

You might need to configure your CA certificates in the [configuration file](#configuration_files). The available configuration settings are the same as for [server-side TLS configuration](../operations/server-configuration-parameters/settings.md#openssl). | Auto-enabled when connecting to port 9440 or ClickHouse Cloud | +| `--ssh-key-file ` | File containing the SSH private key for authenticate with the server. | - | +| `--ssh-key-passphrase ` | Passphrase for the SSH private key specified in `--ssh-key-file`. | - | +| `--tls-sni-override ` | If using TLS, the server name (SNI) to pass in the handshake. | The host provided via `-h` or `--host`. | +| `-u [ --user ] ` | The database user to connect as. | `default` | + +:::note +Instead of the `--host`, `--port`, `--user` and `--password` options, the client also supports [connection strings](#connection_string). +::: + +### OAuth credentials file {#oauth-credentials-file} + +When using `--login=browser` or `--login=device` with a custom OpenID Connect provider, the client reads a credentials JSON file. The file uses the same format produced by the Google Cloud Console ("OAuth 2.0 Client IDs" → "Download JSON"): + +```json +{ + "installed": { + "client_id": "YOUR_CLIENT_ID", + "client_secret": "YOUR_CLIENT_SECRET", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "redirect_uris": ["http://127.0.0.1"] + } +} +``` + +The top-level key can be `installed` (desktop/CLI apps) or `web`. Required fields: `client_id`, `auth_uri`, `token_uri`. Optional fields: + +| Field | Description | +|---|---| +| `client_secret` | Confidential-client secret. Omit (or leave empty) for OIDC public clients — the auth-code flow is always protected by PKCE and the device flow by the device code, so a secret is not required by the protocol. When the field is absent the client never sends a `client_secret` form parameter, which is the form public-client registrations require (Auth0, Microsoft Entra ID, Keycloak, Okta and others reject empty secrets with `invalid_client`). | +| `device_authorization_uri` | Device authorization endpoint. Discovered automatically via OIDC Discovery if absent. | +| `issuer` | OIDC issuer URL (e.g. `https://accounts.google.com`). Used to locate the discovery document when `device_authorization_uri` is not set. | + +The default path is `~/.clickhouse-client/oauth_client.json`. Override it with `--oauth-credentials `. + +After a successful login the obtained refresh token is cached in `~/.clickhouse-client/oauth_cache.json` (file mode `0600`). Subsequent runs reuse the cached token silently and only open the browser or print a device code when the refresh token has expired. + +### Query options {#command-line-options-query} + +| Option | Description | +|---------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `--param_=` | Substitution value for a parameter of a [query with parameters](#cli-queries-with-parameters). | +| `-q [ --query ] ` | The query to run in batch mode. Can be specified multiple times (`--query "SELECT 1" --query "SELECT 2"`) or once with multiple semicolon-separated queries (`--query "SELECT 1; SELECT 2;"`). In the latter case, `INSERT` queries with formats other than `VALUES` must be separated by empty lines.

A single query can also be specified without a parameter: `clickhouse-client "SELECT 1"`

Cannot be used together with `--queries-file`. | +| `--queries-file ` | Path to a file containing queries. `--queries-file` can be specified multiple times, e.g. `--queries-file queries1.sql --queries-file queries2.sql`.

Cannot be used together with `--query`. | +| `-m [ --multiline ]` | If specified, allow multiline queries (do not send the query on Enter). Queries will be sent only when they are ended with a semicolon. | + +### Query settings {#command-line-options-query-settings} + +Query settings can be specified as command-line options in the client, for example: +```bash +$ clickhouse-client --max_threads 1 +``` + +See [Settings](../operations/settings/settings.md) for a list of settings. + +### Formatting options {#command-line-options-formatting} + +| Option | Description | Default | +|---------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------| +| `-f [ --format ] ` | Use the specified format to output the result.

See [Formats for Input and Output Data](formats.md) for a list of supported formats. | `TabSeparated` | +| `--pager ` | Pipe all output into this command. Typically `less` (e.g., `less -S` to display wide result sets) or similar. | - | +| `-E [ --vertical ]` | Use the [Vertical format](/interfaces/formats/Vertical) to output the result. This is the same as `–-format Vertical`. In this format, each value is printed on a separate line, which is helpful when displaying wide tables. | - | + +### Execution details {#command-line-options-execution-details} + +| Option | Description | Default | +|-----------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------| +| `--enable-progress-table-toggle` | Enable toggling of the progress table by pressing the control key (Space). Only applicable in interactive mode with progress table printing enabled. | `enabled` | +| `--hardware-utilization` | Print hardware utilization information in progress bar. | - | +| `--memory-usage` | If specified, print memory usage to `stderr` in non-interactive mode.

Possible values:
• `none` - do not print memory usage
• `default` - print number of bytes
• `readable` - print memory usage in human-readable format | - | +| `--print-profile-events` | Print `ProfileEvents` packets. | - | +| `--progress` | Print progress of query execution.

Possible values:
• `tty\|on\|1\|true\|yes` - outputs to the terminal in interactive mode
• `err` - outputs to `stderr` in non-interactive mode
• `off\|0\|false\|no` - disables progress printing | `tty` in interactive mode, `off` in non-interactive (batch) mode | +| `--progress-table` | Print a progress table with changing metrics during query execution.

Possible values:
• `tty\|on\|1\|true\|yes` - outputs to the terminal in interactive mode
• `err` - outputs to `stderr` non-interactive mode
• `off\|0\|false\|no` - disables the progress table | `tty` in interactive mode, `off` in non-interactive (batch) mode | +| `--stacktrace` | Print stack traces of exceptions. | - | +| `-t [ --time ]` | Print query execution time to `stderr` in non-interactive mode (for benchmarks). | - | +>>>>>>> 67683cd1b46 (Merge pull request #1606 from Altinity/feature/client-IdP) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index d37218c596e4..ec82250b80c8 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -27,6 +27,7 @@ #include #include +#include #include #include @@ -67,6 +68,7 @@ namespace ErrorCodes extern const int AUTHENTICATION_FAILED; extern const int REQUIRED_SECOND_FACTOR; extern const int REQUIRED_PASSWORD; + extern const int SUPPORT_IS_DISABLED; extern const int USER_EXPIRED; } @@ -282,7 +284,7 @@ void Client::initialize(Poco::Util::Application & self) (loaded_config.configuration->has("user") || loaded_config.configuration->has("password"))) { /// Config file has auth credentials, so disable the auto-added login flag - config().setBool("login", false); + config().setBool("cloud_oauth_pending", false); } #endif } @@ -372,7 +374,7 @@ try } #if USE_JWT_CPP && USE_SSL - if (config().getBool("login", false)) + if (config().getBool("cloud_oauth_pending", false) && !config().has("jwt")) { login(); } @@ -754,9 +756,19 @@ void Client::addExtraOptions(OptionsDescription & options_description) ("ssh-key-passphrase", po::value(), "Passphrase for the SSH private key specified by --ssh-key-file.") ("quota_key", po::value(), "A string to differentiate quotas when the user have keyed quotas configured on server") ("jwt", po::value(), "Use JWT for authentication") +<<<<<<< HEAD ("one-time-password", po::value(), "Time-based one-time password (TOTP) for two-factor authentication") +======= + ("login", po::value()->implicit_value(""), + "Authenticate via OAuth2. Optional mode: 'browser' (auth-code + PKCE, opens browser) " + "or 'device' (device flow, prints URL + code). " + "Example: --login=browser or --login=device. " + "Bare --login uses the ClickHouse Cloud auto-login path.") + ("oauth-credentials", po::value(), + "Path to OAuth credentials JSON file " + "(default: ~/.clickhouse-client/oauth_client.json)") +>>>>>>> 67683cd1b46 (Merge pull request #1606 from Altinity/feature/client-IdP) #if USE_JWT_CPP && USE_SSL - ("login", po::bool_switch(), "Use OAuth 2.0 to login") ("oauth-url", po::value(), "The base URL for the OAuth 2.0 authorization server") ("oauth-client-id", po::value(), "The client ID for the OAuth 2.0 application") ("oauth-audience", po::value(), "The audience for the OAuth 2.0 token") @@ -922,16 +934,77 @@ void Client::processOptions( config().setString("jwt", options["jwt"].as()); config().setString("user", ""); } -#if USE_JWT_CPP && USE_SSL - if (options["login"].as()) + if (options.count("oauth-credentials") && !options.count("login")) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "--oauth-credentials requires --login=browser or --login=device"); + + if (options.count("login")) { - if (!options["user"].defaulted()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "User and login flags can't be specified together"); + /// Reject mixed JWT + --login from any source. The --login branch below + /// ends up calling config().setString("jwt", jwt_provider->getJWT()), + /// which would silently overwrite a JWT supplied via --jwt or via the + /// XML config file. config().has("jwt") covers both: CLI --jwt was + /// already copied into config() above, and a element in + /// ~/.clickhouse-client/config.xml is loaded into config() at startup. if (config().has("jwt")) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "JWT and login flags can't be specified together"); - config().setBool("login", true); - config().setString("user", ""); + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "--login cannot be combined with a JWT (provided via --jwt or in the config file)"); + + const std::string login_mode = options["login"].as(); + if (!login_mode.empty() && login_mode != "browser" && login_mode != "device") + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "--login value must be 'browser' or 'device', got '{}'", + login_mode); + +#if USE_JWT_CPP && USE_SSL + if (!options["user"].defaulted()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "--user and --login cannot both be specified"); + + // Bare --login (empty mode, including auto-added for *.clickhouse.cloud) → cloud path. + // Explicit --login=browser or --login=device (or --oauth-credentials) → credentials-file + // OIDC path. This prevents the credentials file from hijacking the cloud auto-login. + const bool use_credentials_file + = !login_mode.empty() + || options.count("oauth-credentials"); + + if (use_credentials_file) + { + const char * home_path_cstr = getenv("HOME"); // NOLINT(concurrency-mt-unsafe) + const std::string default_creds_path = home_path_cstr + ? std::string(home_path_cstr) + "/.clickhouse-client/oauth_client.json" + : ""; + + const std::string creds_path = options.count("oauth-credentials") + ? options["oauth-credentials"].as() + : default_creds_path; + + auto creds = loadOAuthCredentials(creds_path); + const auto mode = (login_mode == "device") ? OAuthFlowMode::Device : OAuthFlowMode::AuthCode; + + // createOAuthJWTProvider runs the initial flow (trying the cached + // refresh token first) and returns a provider that Connection can + // call to refresh the id_token transparently during long sessions. + jwt_provider = createOAuthJWTProvider(creds, mode); + config().setString("jwt", jwt_provider->getJWT()); + config().setString("user", ""); + } + else + { + // Cloud-specific login path — bare --login, including auto-added for + // *.clickhouse.cloud endpoints. Use a separate config key so that + // argsToConfig() overwriting config["login"] with the raw string value + // cannot cause getBool("login") to throw in main(). + config().setBool("cloud_oauth_pending", true); + config().setString("user", ""); + } +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "OAuth login requires a build with JWT and SSL support"); +#endif } +#if USE_JWT_CPP && USE_SSL if (options.contains("oauth-url")) config().setString("oauth-url", options["oauth-url"].as()); if (options.contains("oauth-client-id")) @@ -1105,6 +1178,7 @@ void Client::readArguments( std::string_view arg(argv[i]); if (arg.starts_with("--user") || arg.starts_with("--password") || arg.starts_with("--jwt") || arg.starts_with("--ssh-key-file") || + arg == "--login" || arg.starts_with("--login=") || arg == "-u") { has_auth_in_cmdline = true; diff --git a/src/Access/TokenProcessorsOpaque.cpp b/src/Access/TokenProcessorsOpaque.cpp index 6a8ced064c96..d3c8614f3799 100644 --- a/src/Access/TokenProcessorsOpaque.cpp +++ b/src/Access/TokenProcessorsOpaque.cpp @@ -7,6 +7,9 @@ #include #include +#include +#include + namespace DB { namespace ErrorCodes @@ -114,7 +117,19 @@ bool GoogleTokenProcessor::resolveAndValidate(TokenCredentials & credentials) co auto token_info = getObjectFromURI(Poco::URI("https://www.googleapis.com/oauth2/v3/tokeninfo"), token); if (token_info.contains("exp")) - credentials.setExpiresAt(std::chrono::system_clock::from_time_t((getValueByKey(token_info, "exp").value()))); + { + /// picojson stores all numerics as double; we need to validate the + /// value is a finite, positive Unix timestamp that fits in time_t + /// before casting. + const double exp = getValueByKey(token_info, "exp").value(); + if (!std::isfinite(exp) || exp <= 0.0 + || exp > static_cast(std::numeric_limits::max())) + throw Exception( + ErrorCodes::AUTHENTICATION_FAILED, + "{}: tokeninfo response contains an out-of-range 'exp' value: {}", + processor_name, exp); + credentials.setExpiresAt(std::chrono::system_clock::from_time_t(static_cast(exp))); + } /// Groups info can only be retrieved if user email is known. /// If no email found in user info, we skip this step and there are no external roles for the user. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e62f7777d164..684c1e396930 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -555,10 +555,6 @@ target_link_libraries( Poco::Redis ) -if (TARGET ch_contrib::jwt-cpp) - target_link_libraries(clickhouse_common_io PUBLIC ch_contrib::jwt-cpp) -endif() - if (TARGET ch_contrib::mongocxx) target_link_libraries( dbms @@ -859,6 +855,3 @@ if (ENABLE_TESTS) endif() endif () -if (TARGET ch_contrib::jwt-cpp) - add_object_library(clickhouse_client_jwt Client/jwt) -endif() diff --git a/src/Client/OAuthFlowRunner.cpp b/src/Client/OAuthFlowRunner.cpp new file mode 100644 index 000000000000..ec9d673c2004 --- /dev/null +++ b/src/Client/OAuthFlowRunner.cpp @@ -0,0 +1,694 @@ +#include +#include + +#if USE_JWT_CPP && USE_SSL + +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#if defined(__APPLE__) || defined(__linux__) +# include +# include +#endif + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int AUTHENTICATION_FAILED; +} + +void writeCachedRefreshToken(const std::string & client_id, const std::string & refresh_token); + +namespace +{ + +constexpr int HTTP_TIMEOUT_SECONDS = 30; + +/// Hard cap on the size of an OAuth2 token / device-authorization response +/// body. Real responses are a few hundred bytes; we accept up to 1 MiB so a +/// hostile or compromised endpoint cannot stream gigabytes into a std::string +/// (memory-exhaustion DoS of clickhouse-client). Anything larger is treated +/// as a protocol error. +constexpr size_t OAUTH_MAX_RESPONSE_BYTES = 1 * 1024 * 1024; + +/// Bounds for RFC 8628 device-flow timing values. The client treats the device +/// authorization endpoint as untrusted: a hostile or misconfigured server must +/// not be able to push the client into a tight poll loop (interval <= 0), an +/// uninterruptible multi-hour sleep (interval huge), or an effectively +/// unbounded polling window (expires_in huge). Out-of-range values are treated +/// as a protocol error; in-range values are additionally clamped so that a +/// single sleep_for() never exceeds DEVICE_FLOW_MAX_INTERVAL_SECONDS, which +/// also caps how long Ctrl-C remains unresponsive. +constexpr int DEVICE_FLOW_MIN_INTERVAL_SECONDS = 1; +constexpr int DEVICE_FLOW_MAX_INTERVAL_SECONDS = 60; +constexpr int DEVICE_FLOW_INTERVAL_HARD_LIMIT_SECONDS = 3600; +constexpr int DEVICE_FLOW_DEFAULT_INTERVAL_SECONDS = 5; + +constexpr int DEVICE_FLOW_MIN_EXPIRES_IN_SECONDS = 60; +constexpr int DEVICE_FLOW_MAX_EXPIRES_IN_SECONDS = 1800; +constexpr int DEVICE_FLOW_EXPIRES_IN_HARD_LIMIT_SECONDS = 86400; +constexpr int DEVICE_FLOW_DEFAULT_EXPIRES_IN_SECONDS = 300; + +constexpr int DEVICE_FLOW_SLOW_DOWN_INCREMENT_SECONDS = 5; + +/// Cadence at which the device-flow polling loop emits a heartbeat to stderr. +/// Without this the client prints the URL/user_code once and then stays +/// completely silent (sometimes for the full 1800s expires_in clamp) while it +/// internally polls the token endpoint, leaving the user unable to tell +/// "still waiting for me to approve in the browser" apart from "the process +/// is wedged on a network call". 30s is short enough to give a perceptible +/// pulse for the default 300s window, and long enough that a 1800s window +/// produces ~60 lines of scrollback, not 360. The cadence is real-time, not +/// per-poll, so a server-driven slow_down ratchet doesn't silently stretch +/// the perceived gap between updates. +constexpr int DEVICE_FLOW_STATUS_INTERVAL_SECONDS = 30; + +int extractDeviceFlowInt(const Poco::JSON::Object::Ptr & resp, const std::string & key, int default_value) +{ + if (!resp->has(key)) + return default_value; + try + { + return resp->getValue(key); + } + catch (const Poco::Exception &) + { + throw Exception( + ErrorCodes::AUTHENTICATION_FAILED, + "Device authorization response value '{}' is not a valid integer", + key); + } +} + +/// Read up to max_bytes from `in` into `out`. Throws AUTHENTICATION_FAILED if +/// the stream contains more than max_bytes. Used to bound response sizes from +/// untrusted OAuth endpoints. +void copyStreamWithLimit(std::istream & in, std::string & out, size_t max_bytes) +{ + constexpr size_t buf_size = 8192; + char buffer[buf_size]; + out.clear(); + while (in) + { + in.read(buffer, static_cast(buf_size)); + const auto got = static_cast(in.gcount()); + if (got == 0) + break; + if (out.size() + got > max_bytes) + throw Exception( + ErrorCodes::AUTHENTICATION_FAILED, + "OAuth2 endpoint response exceeds size limit of {} bytes", + max_bytes); + out.append(buffer, got); + } +} + +std::string htmlEscape(const std::string & s) +{ + std::string out; + out.reserve(s.size()); + for (char c : s) + { + switch (c) + { + case '&': out += "&"; break; + case '<': out += "<"; break; + case '>': out += ">"; break; + case '"': out += """; break; + case '\'': out += "'"; break; + default: out += c; break; + } + } + return out; +} + +struct PKCEPair +{ + std::string verifier; + std::string challenge; +}; + +PKCEPair generatePKCE() +{ + unsigned char raw[32]; + if (RAND_bytes(raw, sizeof(raw)) != 1) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "RAND_bytes failed for PKCE verifier"); + + std::string verifier = base64Encode( + std::string(reinterpret_cast(raw), sizeof(raw)), + /*url_encoding=*/true, + /*no_padding=*/true); + + std::string sha = encodeSHA256(verifier); + std::string challenge = base64Encode(sha, /*url_encoding=*/true, /*no_padding=*/true); + return {verifier, challenge}; +} + +void openBrowser(const std::string & url) +{ + std::cerr << "Opening browser for authentication.\n" + << "If the browser does not open, visit:\n " << url << "\n"; + +#if defined(__APPLE__) || defined(__linux__) + const char * cmd = +# if defined(__APPLE__) + "open"; +# else + "xdg-open"; +# endif + const char * argv[] = {cmd, url.c_str(), nullptr}; + pid_t pid; + /// posix_spawnp returns the error number directly (not via errno); a + /// nonzero return means we never got to exec the helper at all (e.g. + /// xdg-open is not installed on a headless host). A zero return followed + /// by a nonzero waitpid exit status means the helper ran but failed to + /// launch a browser (xdg-open exits 3 when no handler is registered). + /// Without the diagnostic below, the caller would silently block in the + /// 120s callback wait, which is the L2 hazard. + if (posix_spawnp(&pid, cmd, nullptr, nullptr, const_cast(argv), nullptr) != 0) + { + std::cerr << "Unable to launch '" << cmd << "'; please copy the URL above into a browser manually.\n"; + return; + } + int status = 0; + if (waitpid(pid, &status, 0) < 0 || !WIFEXITED(status) || WEXITSTATUS(status) != 0) + std::cerr << "Unable to launch a browser via '" << cmd + << "'; please copy the URL above into a browser manually.\n"; +#else + std::cerr << "Automatic browser launch is not supported on this platform; " + "please copy the URL above into a browser manually.\n"; +#endif +} + +struct AuthCodeState +{ + std::mutex mtx; + std::condition_variable cv; + std::string code; + std::string error; + std::string received_state; + bool done = false; + + /// Pre-loaded before the server starts so that the loopback server can + /// serve the auth URL via a 302 redirect on /start. The browser helper is + /// then launched with only the loopback URL on its argv, keeping the CSRF + /// state and PKCE challenge out of /proc//cmdline of the helper. + /// Read-only after server.start(); never mutated by the handler. + std::string auth_url; +}; + +class AuthCodeHandler : public Poco::Net::HTTPRequestHandler +{ +public: + explicit AuthCodeHandler(AuthCodeState & state_) : state(state_) { } + + void handleRequest(Poco::Net::HTTPServerRequest & request, Poco::Net::HTTPServerResponse & response) override + { + Poco::URI uri("http://localhost" + request.getURI()); + + /// RFC 8252 §7.3: native-app loopback redirects must be accepted only + /// at the registered redirect URI, and the OAuth2 redirect is always a + /// GET. Any other method or path is either a stray request from the + /// browser (e.g. /favicon.ico) or a local attacker probing the + /// ephemeral port; in both cases we must respond with an error and + /// must not unblock the main thread, otherwise the legitimate IdP + /// redirect can be pre-empted (causing either a DoS of the flow or, + /// if the attacker has obtained the CSRF state via /proc//cmdline + /// of the spawned browser helper, a code-injection race). + if (request.getMethod() != Poco::Net::HTTPRequest::HTTP_GET) + { + response.setStatus(Poco::Net::HTTPResponse::HTTP_METHOD_NOT_ALLOWED); + response.setContentType("text/plain"); + response.send() << "Method Not Allowed"; + return; + } + + const std::string & path = uri.getPath(); + + /// The browser helper is launched against /start instead of the full + /// auth URL so the CSRF state and PKCE challenge do not appear in any + /// process argv. We hand the URL to the browser via a same-origin 302 + /// served by this loopback server. /start does not mutate auth state, + /// so it is safe to serve it more than once. + if (path == "/start") + { + std::string target; + { + std::lock_guard lock(state.mtx); + target = state.auth_url; + } + if (target.empty()) + { + response.setStatus(Poco::Net::HTTPResponse::HTTP_NOT_FOUND); + response.setContentType("text/plain"); + response.send() << "Not Found"; + return; + } + response.redirect(target, Poco::Net::HTTPResponse::HTTP_FOUND); + return; + } + + if (path != "/callback") + { + response.setStatus(Poco::Net::HTTPResponse::HTTP_NOT_FOUND); + response.setContentType("text/plain"); + response.send() << "Not Found"; + return; + } + + const auto params = uri.getQueryParameters(); + + std::string code; + std::string error; + std::string received_state; + for (const auto & [k, v] : params) + { + if (k == "code") + code = v; + else if (k == "error") + error = v; + else if (k == "state") + received_state = v; + } + + response.setStatus(Poco::Net::HTTPResponse::HTTP_OK); + response.setContentType("text/html"); + auto & out = response.send(); + if (!code.empty()) + out << "Authentication successful. You may close this tab."; + else + out << "Authentication failed: " << htmlEscape(error) << ""; + out.flush(); + + std::lock_guard lock(state.mtx); + /// Only the first valid /callback delivery wins; subsequent requests + /// (e.g. an attacker racing the IdP after the legitimate redirect has + /// already been recorded) are ignored so they cannot overwrite a + /// previously-validated code/state pair. + if (state.done) + return; + state.code = code; + state.error = error; + state.received_state = received_state; + state.done = true; + state.cv.notify_one(); + } + +private: + AuthCodeState & state; +}; + +class AuthCodeHandlerFactory : public Poco::Net::HTTPRequestHandlerFactory +{ +public: + explicit AuthCodeHandlerFactory(AuthCodeState & state_) : state(state_) { } + + Poco::Net::HTTPRequestHandler * createRequestHandler(const Poco::Net::HTTPServerRequest &) override + { + return new AuthCodeHandler(state); + } + +private: + AuthCodeState & state; +}; + +} + +std::string urlEncodeOAuth(const std::string & value) +{ + std::string result; + Poco::URI::encode(value, "", result); + return result; +} + +Poco::JSON::Object::Ptr postOAuthForm(const std::string & url, const std::string & body) +{ + Poco::URI uri(url); + Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_POST, uri.getPathAndQuery()); + request.setContentType("application/x-www-form-urlencoded"); + request.setContentLength(static_cast(body.size())); + + Poco::Net::HTTPResponse response; + std::string response_body; + + if (uri.getScheme() == "https") + { + Poco::Net::Context::Ptr ctx = Poco::Net::SSLManager::instance().defaultClientContext(); + Poco::Net::HTTPSClientSession session(uri.getHost(), uri.getPort(), ctx); + session.setTimeout(Poco::Timespan(HTTP_TIMEOUT_SECONDS, 0)); + auto & req_stream = session.sendRequest(request); + req_stream << body; + auto & resp_stream = session.receiveResponse(response); + copyStreamWithLimit(resp_stream, response_body, OAUTH_MAX_RESPONSE_BYTES); + } + else + { + Poco::Net::HTTPClientSession session(uri.getHost(), uri.getPort()); + session.setTimeout(Poco::Timespan(HTTP_TIMEOUT_SECONDS, 0)); + auto & req_stream = session.sendRequest(request); + req_stream << body; + auto & resp_stream = session.receiveResponse(response); + copyStreamWithLimit(resp_stream, response_body, OAUTH_MAX_RESPONSE_BYTES); + } + + Poco::Dynamic::Var parsed; + try + { + Poco::JSON::Parser parser; + parsed = parser.parse(response_body); + } + catch (...) + { + throw Exception( + ErrorCodes::AUTHENTICATION_FAILED, + "OAuth2 endpoint '{}' returned HTTP {} with non-JSON body: {}", + url, + static_cast(response.getStatus()), + response_body.substr(0, 512)); + } + + auto obj = parsed.extract(); + if (!obj) + throw Exception( + ErrorCodes::AUTHENTICATION_FAILED, + "OAuth2 endpoint '{}' returned HTTP {} with non-object JSON response: {}", + url, + static_cast(response.getStatus()), + response_body.substr(0, 512)); + return obj; +} + +std::string runOAuthAuthCodeFlow(const OAuthCredentials & creds) +{ + auto provider_policy = IOAuthProviderPolicy::create(creds); + auto pkce = generatePKCE(); + + unsigned char state_bytes[16]; + if (RAND_bytes(state_bytes, sizeof(state_bytes)) != 1) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "RAND_bytes failed for OAuth state"); + + std::string csrf_state; + csrf_state.reserve(32); + for (unsigned char b : state_bytes) + { + constexpr char digits[] = "0123456789abcdef"; + csrf_state += digits[(b >> 4) & 0xF]; + csrf_state += digits[b & 0xF]; + } + + Poco::Net::ServerSocket server_socket; + server_socket.bind(Poco::Net::SocketAddress("127.0.0.1", 0), /*reuse_address=*/true); + server_socket.listen(1); + const uint16_t port = server_socket.address().port(); + + /// RFC 8252 §7.3 recommends the loopback IP literal over the hostname + /// "localhost" for native-app redirect URIs. We bind only to 127.0.0.1, but + /// "localhost" can resolve to ::1 first on dual-stack hosts (RFC 6724 + /// default ordering, /etc/hosts, or NSS/AAAA preference): in that case the + /// browser's GET on the redirect lands on the IPv6 loopback where nothing + /// is listening, the auth code is silently dropped, and the main thread + /// hits the 120s wait_for() timeout even though the user successfully + /// completed the login. Using the IP literal keeps the redirect target + /// aligned with the bound socket regardless of resolver behaviour, and + /// matches the host already used for the /start browser entry URL below. + const std::string redirect_uri = "http://127.0.0.1:" + std::to_string(port) + "/callback"; + + std::string auth_url + = creds.auth_uri + + "?response_type=code" + "&client_id=" + urlEncodeOAuth(creds.client_id) + + "&redirect_uri=" + urlEncodeOAuth(redirect_uri) + + "&code_challenge=" + pkce.challenge + + "&code_challenge_method=S256" + + "&scope=" + urlEncodeOAuth(provider_policy->getAuthCodeScope()) + + "&state=" + csrf_state; + if (provider_policy->useAccessTypeOfflineForAuthCode()) + auth_url += "&access_type=offline"; + + AuthCodeState state; + /// Publish the auth URL to the loopback server before it starts so /start + /// can immediately redirect to it. This is set under the mutex for + /// happens-before with the handler thread; in practice it is only ever + /// read after server.start() but we keep the synchronization explicit. + { + std::lock_guard lock(state.mtx); + state.auth_url = auth_url; + } + auto params = Poco::AutoPtr(new Poco::Net::HTTPServerParams()); + params->setMaxQueued(1); + params->setMaxThreads(1); + Poco::Net::HTTPServer server(new AuthCodeHandlerFactory(state), server_socket, params); + server.start(); + + /// Launch the browser against the loopback /start endpoint instead of the + /// real auth URL. The full auth URL (including CSRF state and PKCE + /// challenge) therefore never appears in any process's argv, closing the + /// /proc//cmdline disclosure path that local same-UID attackers + /// previously had against the spawned xdg-open / open helper. + const std::string browser_entry_url = "http://127.0.0.1:" + std::to_string(port) + "/start"; + openBrowser(browser_entry_url); + + bool timed_out = false; + std::string received_code; + std::string received_error; + std::string received_state; + { + std::unique_lock lock(state.mtx); + timed_out = !state.cv.wait_for(lock, std::chrono::seconds(120), [&] { return state.done; }); + received_code = state.code; + received_error = state.error; + received_state = state.received_state; + } + server.stop(); + + if (timed_out) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "OAuth2 login timed out waiting for browser callback"); + if (!received_error.empty()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "OAuth2 authorization error: {}", received_error); + if (received_code.empty()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "OAuth2 callback did not contain an authorization code"); + if (received_state != csrf_state) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "OAuth2 CSRF check failed: unexpected state in callback"); + + std::string body + = "grant_type=authorization_code" + "&code=" + urlEncodeOAuth(received_code) + + "&redirect_uri=" + urlEncodeOAuth(redirect_uri) + + "&client_id=" + urlEncodeOAuth(creds.client_id) + + "&code_verifier=" + urlEncodeOAuth(pkce.verifier); + /// Confidential clients append the registered secret; public clients + /// (PKCE-only) must omit the parameter entirely. An empty value is not + /// equivalent to omission and is rejected by several IdPs as invalid_client. + if (!creds.client_secret.empty()) + body += "&client_secret=" + urlEncodeOAuth(creds.client_secret); + + auto resp = postOAuthForm(creds.token_uri, body); + if (resp->has("error")) + { + const std::string desc = resp->has("error_description") + ? resp->getValue("error_description") + : resp->getValue("error"); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "OAuth2 token exchange failed: {}", desc); + } + + if (!resp->has("id_token")) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "OAuth2 token response did not contain id_token"); + + if (resp->has("refresh_token")) + writeCachedRefreshToken(creds.client_id, resp->getValue("refresh_token")); + + return resp->getValue("id_token"); +} + +std::string runOAuthDeviceFlow(OAuthCredentials creds) +{ + auto provider_policy = IOAuthProviderPolicy::create(creds); + if (creds.device_auth_uri.empty()) + creds.device_auth_uri = provider_policy->resolveDeviceAuthorizationEndpoint(creds); + + const std::string device_scope = provider_policy->getDeviceScope(); + const std::string device_body + = "client_id=" + urlEncodeOAuth(creds.client_id) + + "&scope=" + urlEncodeOAuth(device_scope); + + auto device_resp = postOAuthForm(creds.device_auth_uri, device_body); + + if (device_resp->has("error")) + { + const std::string desc = device_resp->has("error_description") + ? device_resp->getValue("error_description") + : device_resp->getValue("error"); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Device authorization request failed: {}", desc); + } + + if (!device_resp->has("device_code") || !device_resp->has("user_code")) + throw Exception( + ErrorCodes::AUTHENTICATION_FAILED, + "Device authorization response from '{}' is missing required fields " + "(device_code / user_code). Response: {}", + creds.device_auth_uri, + [&] + { + std::ostringstream ss; + device_resp->stringify(ss); + return ss.str(); + }()); + + const std::string device_code = device_resp->getValue("device_code"); + const std::string user_code = device_resp->getValue("user_code"); + const std::string verification_uri = device_resp->has("verification_uri_complete") + ? device_resp->getValue("verification_uri_complete") + : device_resp->has("verification_uri") + ? device_resp->getValue("verification_uri") + : device_resp->has("verification_url") + ? device_resp->getValue("verification_url") + : throw Exception( + ErrorCodes::AUTHENTICATION_FAILED, + "Device authorization response from '{}' is missing verification_uri / " + "verification_uri_complete / verification_url. Response: {}", + creds.device_auth_uri, + [&] + { + std::ostringstream ss; + device_resp->stringify(ss); + return ss.str(); + }()); + + int interval = extractDeviceFlowInt(device_resp, "interval", DEVICE_FLOW_DEFAULT_INTERVAL_SECONDS); + int expires_in = extractDeviceFlowInt(device_resp, "expires_in", DEVICE_FLOW_DEFAULT_EXPIRES_IN_SECONDS); + + /// Reject values that are non-positive or wildly out of spec: a hostile or + /// misconfigured device endpoint must not be able to coerce the client + /// into a tight poll loop, a multi-hour uninterruptible sleep, or an + /// effectively unbounded polling window. + if (interval <= 0 || interval > DEVICE_FLOW_INTERVAL_HARD_LIMIT_SECONDS) + throw Exception( + ErrorCodes::AUTHENTICATION_FAILED, + "Device authorization response specified an out-of-range polling interval: {} seconds", + interval); + if (expires_in <= 0 || expires_in > DEVICE_FLOW_EXPIRES_IN_HARD_LIMIT_SECONDS) + throw Exception( + ErrorCodes::AUTHENTICATION_FAILED, + "Device authorization response specified an out-of-range expires_in: {} seconds", + expires_in); + + /// Clamp into a sensible operational window. The interval upper bound also + /// bounds how long a single sleep_for() blocks, which is the time window + /// during which Ctrl-C cannot interrupt the flow. + interval = std::clamp(interval, DEVICE_FLOW_MIN_INTERVAL_SECONDS, DEVICE_FLOW_MAX_INTERVAL_SECONDS); + expires_in = std::clamp(expires_in, DEVICE_FLOW_MIN_EXPIRES_IN_SECONDS, DEVICE_FLOW_MAX_EXPIRES_IN_SECONDS); + + std::cerr << "\nTo authenticate, visit:\n " << verification_uri << "\nAnd enter code: " << user_code << "\n\n"; + std::cerr << "Waiting for authorization (this code expires in " << expires_in << " seconds)...\n"; + + const auto start = std::chrono::steady_clock::now(); + const auto deadline = start + std::chrono::seconds(expires_in); + /// Real-time gate for the heartbeat below. Initialised to `start` rather + /// than to "after the first poll" so the first heartbeat fires ~30s after + /// the URL/code line, regardless of how long the first network round-trip + /// takes. + auto last_status = start; + while (std::chrono::steady_clock::now() < deadline) + { + std::this_thread::sleep_for(std::chrono::seconds(interval)); + + std::string poll_body + = "grant_type=urn:ietf:params:oauth:grant-type:device_code" + "&device_code=" + urlEncodeOAuth(device_code) + + "&client_id=" + urlEncodeOAuth(creds.client_id); + /// See runOAuthAuthCodeFlow() above: omit, do not send empty. + if (!creds.client_secret.empty()) + poll_body += "&client_secret=" + urlEncodeOAuth(creds.client_secret); + + auto resp = postOAuthForm(creds.token_uri, poll_body); + if (resp->has("error")) + { + const std::string err = resp->getValue("error"); + if (err == "authorization_pending" || err == "slow_down") + { + if (err == "slow_down") + { + /// Per RFC 8628 the client must increase its polling + /// interval, but the new value still has to stay inside + /// our operational bound so a server cannot ratchet the + /// interval up indefinitely. Surface the change as a + /// one-shot line — slow_down is rare in practice, and a + /// silent ratchet would be confusing if the user is + /// timing the flow against the deadline they were just + /// shown. Reset last_status so the next heartbeat doesn't + /// fire immediately afterwards. + interval = std::min( + interval + DEVICE_FLOW_SLOW_DOWN_INCREMENT_SECONDS, + DEVICE_FLOW_MAX_INTERVAL_SECONDS); + std::cerr << "Server requested slower polling; new interval is " << interval << "s.\n"; + last_status = std::chrono::steady_clock::now(); + } + + /// Heartbeat: keep the user oriented during the (potentially + /// very long) wait between issuing the user_code and the user + /// completing approval in their browser. Gated on real time + /// rather than poll count so the cadence is stable across + /// interval changes (default, slow_down, clamping). + const auto now = std::chrono::steady_clock::now(); + if (now - last_status >= std::chrono::seconds(DEVICE_FLOW_STATUS_INTERVAL_SECONDS)) + { + const auto remaining = std::chrono::duration_cast(deadline - now).count(); + std::cerr << "Still waiting for authorization... (" << remaining << "s remaining)\n"; + last_status = now; + } + continue; + } + const std::string desc = resp->has("error_description") ? resp->getValue("error_description") : err; + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Device flow error: {}", desc); + } + + if (!resp->has("id_token")) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Device flow token response did not contain id_token"); + + if (resp->has("refresh_token")) + writeCachedRefreshToken(creds.client_id, resp->getValue("refresh_token")); + + std::cerr << "Authentication successful.\n"; + return resp->getValue("id_token"); + } + + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Device flow timed out"); +} + +} // namespace DB + +#endif // USE_JWT_CPP && USE_SSL diff --git a/src/Client/OAuthFlowRunner.h b/src/Client/OAuthFlowRunner.h new file mode 100644 index 000000000000..de5ba06dc31e --- /dev/null +++ b/src/Client/OAuthFlowRunner.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include + +#if USE_JWT_CPP && USE_SSL + +#include + +#include + +namespace DB +{ + +std::string urlEncodeOAuth(const std::string & value); +Poco::JSON::Object::Ptr postOAuthForm(const std::string & url, const std::string & body); +std::string runOAuthAuthCodeFlow(const OAuthCredentials & creds); +std::string runOAuthDeviceFlow(OAuthCredentials creds); + +} + +#endif // USE_JWT_CPP && USE_SSL diff --git a/src/Client/OAuthJWTProvider.cpp b/src/Client/OAuthJWTProvider.cpp new file mode 100644 index 000000000000..1daeca08681a --- /dev/null +++ b/src/Client/OAuthJWTProvider.cpp @@ -0,0 +1,64 @@ +#include + +#if USE_JWT_CPP && USE_SSL + +#include +#include + +#include +#include + +#include +#include + +namespace DB +{ + +/// JWTProvider subclass for the credentials-file OIDC path (--login=browser / +/// --login=device). Extends JWTProvider so that Connection::sendQuery can call +/// getJWT() transparently to refresh the id_token before it expires, eliminating +/// the 1-hour session limit that arises when the token is obtained only once at +/// startup. +/// +/// getJWT() delegates to obtainIDToken() which already handles the full lifecycle: +/// 1. try cached refresh token from disk +/// 2. run interactive flow (browser or device) if the refresh token is absent +/// or rejected +class OAuthJWTProvider : public JWTProvider +{ +public: + OAuthJWTProvider(OAuthCredentials creds, OAuthFlowMode mode) + : JWTProvider("", creds.client_id, "", std::cerr, std::cerr) + , creds_(std::move(creds)) + , mode_(mode) + {} + + std::string getJWT() override + { + constexpr int EXPIRY_BUFFER_SECONDS = 30; + + if (!idp_access_token.empty() + && Poco::Timestamp() < idp_access_token_expires_at - Poco::Timespan(EXPIRY_BUFFER_SECONDS, 0)) + return idp_access_token; + + // obtainIDToken tries the disk-cached refresh token first and falls back + // to an interactive flow only when necessary. + idp_access_token = obtainIDToken(creds_, mode_); + idp_access_token_expires_at = getJwtExpiry(idp_access_token); + return idp_access_token; + } + +private: + OAuthCredentials creds_; + OAuthFlowMode mode_; +}; + +std::shared_ptr createOAuthJWTProvider( + const OAuthCredentials & creds, OAuthFlowMode mode) +{ + return std::make_shared(creds, mode); +} + +} // namespace DB + +#endif // USE_JWT_CPP && USE_SSL diff --git a/src/Client/OAuthLogin.cpp b/src/Client/OAuthLogin.cpp new file mode 100644 index 000000000000..d95dc0c00917 --- /dev/null +++ b/src/Client/OAuthLogin.cpp @@ -0,0 +1,451 @@ +#include +#include + +#if USE_JWT_CPP && USE_SSL + +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +} + +namespace +{ + +std::string cacheKey(const std::string & client_id) +{ + std::string hash = encodeSHA256(client_id); + std::string hex; + hex.reserve(32); + for (unsigned char c : hash) + { + constexpr char digits[] = "0123456789abcdef"; + hex += digits[(c >> 4) & 0xF]; + hex += digits[c & 0xF]; + } + return hex.substr(0, 16); +} + +std::string cacheFilePath() +{ + const char * home = std::getenv("HOME"); // NOLINT(concurrency-mt-unsafe) + if (!home) + return ""; + return std::string(home) + "/.clickhouse-client/oauth_cache.json"; +} + +std::string readCachedRefreshTokenImpl(const std::string & client_id) +{ + const std::string path = cacheFilePath(); + if (path.empty()) + return ""; + + std::ifstream f(path); + if (!f.is_open()) + return ""; + + std::string content((std::istreambuf_iterator(f)), std::istreambuf_iterator()); + try + { + Poco::JSON::Parser parser; + auto result = parser.parse(content); + const auto & obj = result.extract(); + const std::string key = cacheKey(client_id); + if (obj->has(key)) + return obj->getValue(key); + } + catch (...) + { + std::cerr << "Note: OAuth token cache at '" << cacheFilePath() + << "' could not be parsed and will be ignored.\n"; + } + return ""; +} + +} + +namespace +{ + +/// RAII close + unlock for a POSIX fd used as an advisory lock. close(2) +/// implicitly releases the lock; we unlock first only to keep the intent +/// explicit at the close site. +class ScopedFlockFd +{ +public: + explicit ScopedFlockFd(int fd_) : fd(fd_) {} + ~ScopedFlockFd() + { + if (fd >= 0) + { + ::flock(fd, LOCK_UN); + ::close(fd); + } + } + ScopedFlockFd(const ScopedFlockFd &) = delete; + ScopedFlockFd & operator=(const ScopedFlockFd &) = delete; + int get() const { return fd; } + +private: + int fd; +}; + +} + +namespace +{ + +/// Run `mutator` against the on-disk OAuth refresh-token cache as a single +/// atomic read-modify-write under an advisory exclusive flock. Both the writer +/// (cache a new refresh token) and the evictor (drop a rejected one) go +/// through this helper so they share the crash-safe rename and the +/// concurrency lock — using two different paths for the two operations would +/// reintroduce the lost-update race that M1 fixed. +/// +/// `warn_on_failure` controls the user-facing diagnostic policy: +/// - true (writers): a failed FS operation triggers a single warning that +/// names both the underlying cause (syscall + errno or fs error) AND the +/// user-visible consequence ("you will be prompted to log in again on the +/// next invocation"). Without naming the consequence, the syscall warning +/// alone is too cryptic for users to connect to the symptom they later +/// observe (unexpected re-auth on the next run), so they cannot act on +/// it; this is the bug we are fixing here. +/// - false (evictors): failures are silent. Eviction is best-effort cleanup +/// after the IdP has rejected a cached refresh token; if the cache file +/// is unwritable, the next interactive auth's writer attempt will produce +/// exactly the same warning anyway, and that is the message users +/// actually need (because it tells them caching is broken going forward, +/// not just that an already-dead token couldn't be deleted). +template +void mutateRefreshTokenCache(Mutator && mutator, bool warn_on_failure) +{ + /// Two-part diagnostic: first a one-line headline that names the + /// user-visible consequence (so the message is actionable even for users + /// who don't recognise the underlying syscall), then a second line with + /// the technical cause for operators / bug reports. Captured by reference + /// so each error path is one line at the call site. + auto fail = [&](const std::string & cause) + { + if (!warn_on_failure) + return; + std::cerr + << "Warning: OAuth refresh token will NOT be persisted to disk; " + "you may be prompted to log in again on the next invocation.\n" + << " Cause: " << cause << "\n"; + }; + /// errno-flavoured variant. Snapshot errno immediately on entry — the + /// global is fragile across any intervening allocation/IO. + auto fail_errno = [&](const char * what, const std::string & arg) + { + const int e = errno; + if (!warn_on_failure) + return; + /// generic_category().message is the thread-safe equivalent of + /// std::strerror, which is flagged by clang-tidy. + fail(std::string(what) + " '" + arg + "' failed: " + std::generic_category().message(e)); + }; + + const std::string path = cacheFilePath(); + if (path.empty()) + { + /// Pre-fix this branch returned silently, so users running without + /// $HOME (cron, systemd units without `User=`, sandboxed containers) + /// would re-auth on every invocation with no diagnostic at all. Now + /// we surface the cause on writes; reads still no-op silently because + /// "no HOME" on first run is indistinguishable from "no cache yet". + fail("cannot determine cache file path: HOME environment variable is unset"); + return; + } + + namespace fs = std::filesystem; + const fs::path cache_path(path); + const fs::path cache_dir = cache_path.parent_path(); + + std::error_code ec; + fs::create_directories(cache_dir, ec); + if (ec) + { + fail("failed to create directory '" + cache_dir.string() + "': " + ec.message()); + return; + } + + /// Serialize concurrent writers via an advisory exclusive lock on a + /// dedicated sibling file. cache_path itself cannot be locked because the + /// rename(2) below swaps its inode; the .lock file is never renamed, so + /// the lock survives the whole read-modify-write. Readers don't take this + /// lock — rename(2) is atomic on POSIX, so a concurrent reader observes + /// either the previous or the new cache, never a torn file. + const fs::path lock_path = cache_dir / ".oauth_cache.lock"; + int raw_lock_fd = ::open(lock_path.c_str(), O_RDWR | O_CREAT | O_CLOEXEC, 0600); + if (raw_lock_fd < 0) + { + fail_errno("open lock file", lock_path.string()); + return; + } + ScopedFlockFd lock_fd(raw_lock_fd); + if (::flock(lock_fd.get(), LOCK_EX) != 0) + { + fail_errno("flock", lock_path.string()); + return; + } + + /// Read existing entries under the lock so the read-modify-write is + /// atomic with respect to other concurrent writers (lost-update fix). + Poco::JSON::Object obj; + { + std::ifstream f(path); + if (f.is_open()) + { + std::string content((std::istreambuf_iterator(f)), std::istreambuf_iterator()); + try + { + Poco::JSON::Parser parser; + auto result = parser.parse(content); + const auto & existing = result.extract(); + for (const auto & [key, value] : *existing) + obj.set(key, value); + } + catch (...) + { + std::cerr << "Note: OAuth token cache at '" << path + << "' could not be parsed; existing entries will be lost.\n"; + } + } + } + + mutator(obj); + + /// mkstemp gives a process- and thread-unique path with O_EXCL semantics, + /// so concurrent invocations can no longer race on a fixed `.tmp` name and + /// corrupt each other's writes. POSIX requires mkstemp to create the file + /// 0600, so the refresh token is never on disk in a wider mode. The + /// template lives in the cache directory so rename(2) is same-FS and + /// atomic. We close the fd immediately and reopen via std::ofstream so + /// the existing serialization path stays unchanged; the random suffix + /// plus the held flock guarantee no other writer can interfere with the + /// reopen. + std::string tmpl = (cache_dir / "oauth_cache.XXXXXX").string(); + int tmp_fd = ::mkstemp(tmpl.data()); + if (tmp_fd < 0) + { + fail_errno("mkstemp", tmpl); + return; + } + ::close(tmp_fd); + + { + std::ofstream out(tmpl, std::ios::trunc | std::ios::binary); + if (!out.is_open()) + { + fail_errno("open for write", tmpl); + ::unlink(tmpl.c_str()); + return; + } + Poco::JSON::Stringifier::stringify(obj, out); + out.close(); + if (out.fail()) + { + /// iostreams don't reliably surface errno through fail(); we + /// can't blame a specific syscall, but the consequence message + /// is still the actionable part for the user. + fail("failed to serialize JSON to '" + tmpl + "'"); + ::unlink(tmpl.c_str()); + return; + } + } + + if (::rename(tmpl.c_str(), path.c_str()) != 0) + { + fail_errno("rename to", path); + ::unlink(tmpl.c_str()); + return; + } +} + +void removeCachedRefreshToken(const std::string & client_id) +{ + mutateRefreshTokenCache( + [&](Poco::JSON::Object & obj) { obj.remove(cacheKey(client_id)); }, + /*warn_on_failure=*/false); +} + +} + +void writeCachedRefreshToken(const std::string & client_id, const std::string & refresh_token) +{ + mutateRefreshTokenCache( + [&](Poco::JSON::Object & obj) { obj.set(cacheKey(client_id), refresh_token); }, + /*warn_on_failure=*/true); +} + +namespace +{ + +std::string tryRefreshToken(const OAuthCredentials & creds, const std::string & refresh_token) +{ + try + { + std::string body + = "grant_type=refresh_token" + "&client_id=" + urlEncodeOAuth(creds.client_id) + + "&refresh_token=" + urlEncodeOAuth(refresh_token); + /// Public clients (no registered secret) must omit the parameter + /// entirely; see loadOAuthCredentials() for the rationale. + if (!creds.client_secret.empty()) + body += "&client_secret=" + urlEncodeOAuth(creds.client_secret); + + auto resp = postOAuthForm(creds.token_uri, body); + if (resp->has("error")) + { + const std::string err = resp->getValue("error"); + std::cerr << "Note: cached refresh token was rejected (" << err << "); re-authenticating.\n"; + /// RFC 6749 §5.2: invalid_grant means the refresh token itself is + /// no longer usable (revoked / expired / mismatched redirect). + /// Evict it so subsequent invocations skip the doomed round-trip. + /// Other error codes (invalid_client, invalid_request, ...) mean + /// our request was wrong, not the token, so we keep the cache. + if (err == "invalid_grant") + removeCachedRefreshToken(creds.client_id); + return ""; + } + if (resp->has("refresh_token")) + writeCachedRefreshToken(creds.client_id, resp->getValue("refresh_token")); + if (resp->has("id_token")) + return resp->getValue("id_token"); + } + catch (const std::exception & e) + { + std::cerr << "Note: refresh token exchange failed (" << e.what() + << "); re-authenticating.\n"; + } + return ""; +} + +} + +OAuthCredentials loadOAuthCredentials(const std::string & path) +{ + std::ifstream f(path); + if (!f.is_open()) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "OAuth credentials file not found: '{}'\n" + "Place a Google-format credentials JSON at that path, or specify " + "--oauth-credentials /path/to/file.json", + path); + + std::string content((std::istreambuf_iterator(f)), std::istreambuf_iterator()); + + Poco::JSON::Parser parser; + Poco::Dynamic::Var parsed; + try + { + parsed = parser.parse(content); + } + catch (const std::exception & e) + { + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Failed to parse OAuth credentials file '{}': {}", path, e.what()); + } + + auto root = parsed.extract(); + + Poco::JSON::Object::Ptr app; + if (root->has("installed")) + app = root->getObject("installed"); + else if (root->has("web")) + app = root->getObject("web"); + else + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "OAuth credentials file '{}' must have an 'installed' or 'web' top-level key", + path); + + auto require = [&](const std::string & key) -> std::string + { + if (!app->has(key)) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "OAuth credentials file '{}' is missing required field '{}'", + path, + key); + return app->getValue(key); + }; + + OAuthCredentials creds; + creds.client_id = require("client_id"); + creds.auth_uri = require("auth_uri"); + creds.token_uri = require("token_uri"); + + /// client_secret is optional: per RFC 6749 §2.1 / RFC 8252 §8.4, native + /// OIDC clients are typically registered as "public" and have no secret; + /// PKCE (always used in the auth-code flow here) and the device_code + /// itself (in the device flow) provide the client-binding guarantee that a + /// secret would otherwise carry. An absent or empty value here causes the + /// downstream POST bodies to omit the client_secret form parameter + /// entirely — sending it with an empty value is treated by Auth0, Entra + /// ID, Keycloak and others as a malformed confidential-client credential + /// and rejected with invalid_client, so omission is required, not just + /// preferred. + if (app->has("client_secret")) + creds.client_secret = app->getValue("client_secret"); + + if (app->has("device_authorization_uri")) + creds.device_auth_uri = app->getValue("device_authorization_uri"); + if (app->has("issuer")) + creds.issuer = app->getValue("issuer"); + + auto warn_if_http = [&](const std::string & field, const std::string & uri) + { + if (uri.starts_with("http://")) + std::cerr << "Warning: OAuth credentials field '" << field << "' uses plain HTTP ('" + << uri << "'). Token exchanges over HTTP expose client credentials.\n"; + }; + warn_if_http("token_uri", creds.token_uri); + warn_if_http("auth_uri", creds.auth_uri); + if (!creds.device_auth_uri.empty()) + warn_if_http("device_authorization_uri", creds.device_auth_uri); + + return creds; +} + +std::string obtainIDToken(const OAuthCredentials & creds, OAuthFlowMode mode) +{ + const std::string cached_refresh = readCachedRefreshTokenImpl(creds.client_id); + if (!cached_refresh.empty()) + { + const std::string id_token = tryRefreshToken(creds, cached_refresh); + if (!id_token.empty()) + return id_token; + } + + if (mode == OAuthFlowMode::Device) + return runOAuthDeviceFlow(creds); + return runOAuthAuthCodeFlow(creds); +} + +} // namespace DB + +#endif // USE_JWT_CPP && USE_SSL diff --git a/src/Client/OAuthLogin.h b/src/Client/OAuthLogin.h new file mode 100644 index 000000000000..600e577fa8a4 --- /dev/null +++ b/src/Client/OAuthLogin.h @@ -0,0 +1,44 @@ +#pragma once + +#include +#include +#include + +namespace DB +{ + +class JWTProvider; // forward declaration — full type available with USE_JWT_CPP && USE_SSL + +enum class OAuthFlowMode +{ + AuthCode, + Device, +}; + +struct OAuthCredentials +{ + std::string client_id; + std::string client_secret; + std::string auth_uri; // authorization_endpoint + std::string token_uri; // token_endpoint + std::string device_auth_uri; // device_authorization_endpoint (discovered if empty) + std::string issuer; // OIDC issuer URL (optional; used to locate discovery document) +}; + +/// Load from Google-format JSON credentials file. +/// Throws if file not found or malformed. +OAuthCredentials loadOAuthCredentials(const std::string & path); + +/// Run OAuth flow, return ID token. Throws on failure. +std::string obtainIDToken(const OAuthCredentials & creds, OAuthFlowMode mode); + +#if USE_JWT_CPP && USE_SSL +/// Create a JWTProvider that runs the initial OAuth flow and then silently +/// refreshes the id_token via the cached refresh token for the lifetime +/// of the session. Assign the result to Client::jwt_provider so that +/// Connection::sendQuery can call getJWT() on each query. +std::shared_ptr createOAuthJWTProvider( + const OAuthCredentials & creds, OAuthFlowMode mode); +#endif + +} diff --git a/src/Client/OAuthProviderPolicy.cpp b/src/Client/OAuthProviderPolicy.cpp new file mode 100644 index 000000000000..1bf589e95912 --- /dev/null +++ b/src/Client/OAuthProviderPolicy.cpp @@ -0,0 +1,127 @@ +#include +#include + +#if USE_JWT_CPP && USE_SSL + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ +extern const int AUTHENTICATION_FAILED; +} + +namespace +{ + +constexpr int HTTP_TIMEOUT_SECONDS = 30; + +std::string fetchDeviceEndpointFromIssuer(const std::string & issuer) +{ + const std::string discovery_url = issuer + "/.well-known/openid-configuration"; + Poco::URI disc_uri(discovery_url); + + Poco::Net::HTTPRequest request(Poco::Net::HTTPRequest::HTTP_GET, disc_uri.getPathAndQuery()); + Poco::Net::HTTPResponse response; + std::string body; + + if (disc_uri.getScheme() == "https") + { + Poco::Net::Context::Ptr ctx = Poco::Net::SSLManager::instance().defaultClientContext(); + Poco::Net::HTTPSClientSession session(disc_uri.getHost(), disc_uri.getPort(), ctx); + session.setTimeout(Poco::Timespan(HTTP_TIMEOUT_SECONDS, 0)); + session.sendRequest(request); + auto & stream = session.receiveResponse(response); + Poco::StreamCopier::copyToString(stream, body); + } + else + { + Poco::Net::HTTPClientSession session(disc_uri.getHost(), disc_uri.getPort()); + session.setTimeout(Poco::Timespan(HTTP_TIMEOUT_SECONDS, 0)); + session.sendRequest(request); + auto & stream = session.receiveResponse(response); + Poco::StreamCopier::copyToString(stream, body); + } + + if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK) + throw Exception( + ErrorCodes::AUTHENTICATION_FAILED, + "OIDC discovery failed for '{}': {} {}", + discovery_url, + static_cast(response.getStatus()), + response.getReason()); + + Poco::JSON::Parser parser; + auto result = parser.parse(body); + const auto & obj = result.extract(); + + if (!obj->has("device_authorization_endpoint")) + throw Exception( + ErrorCodes::AUTHENTICATION_FAILED, + "OIDC discovery document at '{}' does not contain device_authorization_endpoint", + discovery_url); + + return obj->getValue("device_authorization_endpoint"); +} + +std::string inferIssuerFromTokenUri(const std::string & token_uri) +{ + Poco::URI uri(token_uri); + + std::string issuer = uri.getScheme() + "://" + uri.getHost(); + if (uri.getPort() != 0 + && !((uri.getScheme() == "https" && uri.getPort() == 443) + || (uri.getScheme() == "http" && uri.getPort() == 80))) + issuer += ":" + std::to_string(uri.getPort()); + + const auto & path = uri.getPath(); + const auto last_slash = path.rfind('/'); + if (last_slash != std::string::npos && last_slash != 0) + issuer += path.substr(0, last_slash); + + return issuer; +} + +} + +std::unique_ptr IOAuthProviderPolicy::create(const OAuthCredentials & creds) +{ + if (GoogleOAuthProviderPolicy::matches(creds)) + return std::make_unique(); + return std::make_unique(); +} + +std::string GoogleOAuthProviderPolicy::resolveDeviceAuthorizationEndpoint(const OAuthCredentials & creds) const +{ + if (!creds.device_auth_uri.empty()) + return creds.device_auth_uri; + + const std::string issuer = creds.issuer.empty() ? "https://accounts.google.com" : creds.issuer; + return fetchDeviceEndpointFromIssuer(issuer); +} + +std::string GenericOAuthProviderPolicy::resolveDeviceAuthorizationEndpoint(const OAuthCredentials & creds) const +{ + if (!creds.device_auth_uri.empty()) + return creds.device_auth_uri; + + const std::string issuer = creds.issuer.empty() ? inferIssuerFromTokenUri(creds.token_uri) : creds.issuer; + return fetchDeviceEndpointFromIssuer(issuer); +} + +} // namespace DB + +#endif // USE_JWT_CPP && USE_SSL diff --git a/src/Client/OAuthProviderPolicy.h b/src/Client/OAuthProviderPolicy.h new file mode 100644 index 000000000000..cbfb04e802c1 --- /dev/null +++ b/src/Client/OAuthProviderPolicy.h @@ -0,0 +1,58 @@ +#pragma once + +#include +#include + +#if USE_JWT_CPP && USE_SSL + +#include + +#include +#include + +namespace DB +{ + +/// Provider-specific behavior for OAuth/OIDC flows. +/// To add a new provider: subclass, implement all virtuals, add matches() check, +/// and register in IOAuthProviderPolicy::create(). +class IOAuthProviderPolicy +{ +public: + virtual ~IOAuthProviderPolicy() = default; + + virtual std::string getAuthCodeScope() const = 0; + virtual bool useAccessTypeOfflineForAuthCode() const = 0; + virtual std::string getDeviceScope() const = 0; + virtual std::string resolveDeviceAuthorizationEndpoint(const OAuthCredentials & creds) const = 0; + + static std::unique_ptr create(const OAuthCredentials & creds); +}; + +class GoogleOAuthProviderPolicy final : public IOAuthProviderPolicy +{ +public: + static bool matches(const OAuthCredentials & creds) + { + const std::string & host = Poco::URI(creds.token_uri).getHost(); + return host == "oauth2.googleapis.com" || host == "accounts.google.com"; + } + + std::string getAuthCodeScope() const override { return "openid email profile"; } + bool useAccessTypeOfflineForAuthCode() const override { return true; } + std::string getDeviceScope() const override { return "openid email profile"; } + std::string resolveDeviceAuthorizationEndpoint(const OAuthCredentials & creds) const override; +}; + +class GenericOAuthProviderPolicy final : public IOAuthProviderPolicy +{ +public: + std::string getAuthCodeScope() const override { return "openid email profile offline_access"; } + bool useAccessTypeOfflineForAuthCode() const override { return false; } + std::string getDeviceScope() const override { return "openid email profile offline_access"; } + std::string resolveDeviceAuthorizationEndpoint(const OAuthCredentials & creds) const override; +}; + +} + +#endif // USE_JWT_CPP && USE_SSL diff --git a/src/Client/tests/gtest_oauth_login.cpp b/src/Client/tests/gtest_oauth_login.cpp new file mode 100644 index 000000000000..2efcbc17aff8 --- /dev/null +++ b/src/Client/tests/gtest_oauth_login.cpp @@ -0,0 +1,331 @@ +#include + +#if USE_JWT_CPP && USE_SSL + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +using namespace DB; + +namespace +{ + +namespace fs = std::filesystem; + +/// Write content to a temp file and return its path. The caller owns the file. +std::string writeTempFile(const std::string & content) +{ + const fs::path tmp = fs::temp_directory_path() / fs::path("gtest_oauth_XXXXXX"); + // std::tmpnam is deprecated — build a unique name with mkstemp. + std::string tmpl = tmp.string(); + int fd = mkstemp(tmpl.data()); + if (fd < 0) + throw std::runtime_error("mkstemp failed"); + close(fd); + + std::ofstream f(tmpl, std::ios::trunc); + f << content; + return tmpl; +} + +} // anonymous namespace + +// --------------------------------------------------------------------------- +// loadOAuthCredentials — valid "installed" format +// --------------------------------------------------------------------------- + +TEST(OAuthLogin, LoadInstalledFormat) +{ + const std::string json = R"({ + "installed": { + "client_id": "test-client-id", + "client_secret": "test-secret", + "auth_uri": "https://auth.example.com/auth", + "token_uri": "https://auth.example.com/token", + "redirect_uris": ["http://localhost"] + } + })"; + + auto path = writeTempFile(json); + SCOPE_EXIT({ fs::remove(path); }); + + auto creds = loadOAuthCredentials(path); + EXPECT_EQ(creds.client_id, "test-client-id"); + EXPECT_EQ(creds.client_secret, "test-secret"); + EXPECT_EQ(creds.auth_uri, "https://auth.example.com/auth"); + EXPECT_EQ(creds.token_uri, "https://auth.example.com/token"); + EXPECT_TRUE(creds.device_auth_uri.empty()); +} + +// --------------------------------------------------------------------------- +// loadOAuthCredentials — valid "web" format +// --------------------------------------------------------------------------- + +TEST(OAuthLogin, LoadWebFormat) +{ + const std::string json = R"({ + "web": { + "client_id": "web-client", + "client_secret": "web-secret", + "auth_uri": "https://web.example.com/auth", + "token_uri": "https://web.example.com/token" + } + })"; + + auto path = writeTempFile(json); + SCOPE_EXIT({ fs::remove(path); }); + + auto creds = loadOAuthCredentials(path); + EXPECT_EQ(creds.client_id, "web-client"); + EXPECT_EQ(creds.client_secret, "web-secret"); +} + +// --------------------------------------------------------------------------- +// loadOAuthCredentials — optional device_authorization_uri is loaded +// --------------------------------------------------------------------------- + +TEST(OAuthLogin, LoadDeviceAuthUri) +{ + const std::string json = R"({ + "installed": { + "client_id": "x", + "client_secret": "y", + "auth_uri": "https://a.example.com/auth", + "token_uri": "https://a.example.com/token", + "device_authorization_uri": "https://a.example.com/device" + } + })"; + + auto path = writeTempFile(json); + SCOPE_EXIT({ fs::remove(path); }); + + auto creds = loadOAuthCredentials(path); + EXPECT_EQ(creds.device_auth_uri, "https://a.example.com/device"); +} + +// --------------------------------------------------------------------------- +// loadOAuthCredentials — missing top-level key throws BAD_ARGUMENTS +// --------------------------------------------------------------------------- + +TEST(OAuthLogin, MissingTopLevelKey) +{ + const std::string json = R"({ "other_key": {} })"; + + auto path = writeTempFile(json); + SCOPE_EXIT({ fs::remove(path); }); + + EXPECT_THROW(loadOAuthCredentials(path), Exception); +} + +// --------------------------------------------------------------------------- +// loadOAuthCredentials — public-client config (no client_secret) loads OK +// +// Per RFC 6749 §2.1 / RFC 8252 §8.4 native OIDC clients are typically +// registered as public clients with no secret; the flow is protected by PKCE +// (auth-code) or the device_code (device flow). The credential loader must +// not hard-require client_secret, otherwise valid public-client registrations +// cannot be used. This is the regression guard for that policy: the absence +// of the field is silently accepted, and the in-memory secret stays empty so +// the downstream POST builders omit the parameter rather than sending an +// empty value (which several IdPs reject as invalid_client). +// --------------------------------------------------------------------------- + +TEST(OAuthLogin, LoadPublicClientNoSecret) +{ + const std::string json = R"({ + "installed": { + "client_id": "public-client-id", + "auth_uri": "https://auth.example.com/auth", + "token_uri": "https://auth.example.com/token" + } + })"; + + auto path = writeTempFile(json); + SCOPE_EXIT({ fs::remove(path); }); + + auto creds = loadOAuthCredentials(path); + EXPECT_EQ(creds.client_id, "public-client-id"); + EXPECT_TRUE(creds.client_secret.empty()); + EXPECT_EQ(creds.auth_uri, "https://auth.example.com/auth"); + EXPECT_EQ(creds.token_uri, "https://auth.example.com/token"); +} + +// Empty-string client_secret is treated identically to an absent field: load +// succeeds and the in-memory value is empty, so the downstream POST bodies +// omit the form parameter. Without this property a credential file written +// by a tool that defaults the field to "" would produce invalid_client at +// the IdP rather than a working public-client request. +TEST(OAuthLogin, LoadPublicClientEmptySecret) +{ + const std::string json = R"({ + "installed": { + "client_id": "public-client-id", + "client_secret": "", + "auth_uri": "https://auth.example.com/auth", + "token_uri": "https://auth.example.com/token" + } + })"; + + auto path = writeTempFile(json); + SCOPE_EXIT({ fs::remove(path); }); + + auto creds = loadOAuthCredentials(path); + EXPECT_TRUE(creds.client_secret.empty()); +} + +// --------------------------------------------------------------------------- +// loadOAuthCredentials — missing required field throws BAD_ARGUMENTS +// --------------------------------------------------------------------------- + +TEST(OAuthLogin, MissingClientId) +{ + const std::string json = R"({ + "installed": { + "client_secret": "s", + "auth_uri": "https://a.example.com/auth", + "token_uri": "https://a.example.com/token" + } + })"; + + auto path = writeTempFile(json); + SCOPE_EXIT({ fs::remove(path); }); + + EXPECT_THROW(loadOAuthCredentials(path), Exception); +} + +TEST(OAuthLogin, MissingTokenUri) +{ + const std::string json = R"({ + "installed": { + "client_id": "c", + "client_secret": "s", + "auth_uri": "https://a.example.com/auth" + } + })"; + + auto path = writeTempFile(json); + SCOPE_EXIT({ fs::remove(path); }); + + EXPECT_THROW(loadOAuthCredentials(path), Exception); +} + +// --------------------------------------------------------------------------- +// loadOAuthCredentials — file not found throws BAD_ARGUMENTS +// --------------------------------------------------------------------------- + +TEST(OAuthLogin, FileNotFound) +{ + EXPECT_THROW(loadOAuthCredentials("/nonexistent/path/oauth_client.json"), Exception); +} + +// --------------------------------------------------------------------------- +// loadOAuthCredentials — invalid JSON throws BAD_ARGUMENTS +// --------------------------------------------------------------------------- + +TEST(OAuthLogin, InvalidJson) +{ + auto path = writeTempFile("not valid json {{{"); + SCOPE_EXIT({ fs::remove(path); }); + + EXPECT_THROW(loadOAuthCredentials(path), Exception); +} + +// --------------------------------------------------------------------------- +// loadOAuthCredentials — optional "issuer" field is loaded +// --------------------------------------------------------------------------- + +TEST(OAuthLogin, LoadIssuerField) +{ + const std::string json = R"({ + "installed": { + "client_id": "x", + "client_secret": "y", + "auth_uri": "https://a.example.com/auth", + "token_uri": "https://a.example.com/token", + "issuer": "https://a.example.com" + } + })"; + + auto path = writeTempFile(json); + SCOPE_EXIT({ fs::remove(path); }); + + auto creds = loadOAuthCredentials(path); + EXPECT_EQ(creds.issuer, "https://a.example.com"); +} + +TEST(OAuthLogin, IssuerFieldAbsent) +{ + const std::string json = R"({ + "installed": { + "client_id": "x", + "client_secret": "y", + "auth_uri": "https://a.example.com/auth", + "token_uri": "https://a.example.com/token" + } + })"; + + auto path = writeTempFile(json); + SCOPE_EXIT({ fs::remove(path); }); + + auto creds = loadOAuthCredentials(path); + EXPECT_TRUE(creds.issuer.empty()); +} + +// --------------------------------------------------------------------------- +// PKCE building blocks +// +// generatePKCE() is in the anonymous namespace so we test its constituent +// operations (base64url encoding and SHA-256) directly. This verifies the +// exact properties that RFC 7636 §4 requires of the verifier and challenge. +// --------------------------------------------------------------------------- + +TEST(OAuthLogin, Base64UrlEncodingProperties) +{ + // 32 bytes → 43 base64url chars (no padding, RFC 7636 §4.1 requires 43-128). + const std::string raw(32, '\xAB'); + const std::string encoded = base64Encode(raw, /*url_encoding=*/true, /*no_padding=*/true); + + EXPECT_EQ(encoded.size(), 43u); + + // Must contain only URL-safe base64 chars: A-Z a-z 0-9 - _ + const bool all_safe = std::all_of(encoded.begin(), encoded.end(), [](unsigned char c) { + return std::isalnum(c) || c == '-' || c == '_'; + }); + EXPECT_TRUE(all_safe) << "base64url output contains non-URL-safe characters: " << encoded; + + // Must NOT contain padding or standard base64 symbols. + EXPECT_EQ(encoded.find('='), std::string::npos); + EXPECT_EQ(encoded.find('+'), std::string::npos); + EXPECT_EQ(encoded.find('/'), std::string::npos); +} + +TEST(OAuthLogin, PKCEChallengeDerivation) +{ + // SHA256(verifier) encodes to 32 bytes; base64url(32 bytes) = 43 chars. + const std::string verifier = base64Encode(std::string(32, '\x01'), true, true); + const std::string sha = encodeSHA256(verifier); + EXPECT_EQ(sha.size(), 32u); + + const std::string challenge = base64Encode(sha, true, true); + EXPECT_EQ(challenge.size(), 43u); + + // Challenge must differ from verifier. + EXPECT_NE(challenge, verifier); + + // Challenge must be deterministic for the same verifier. + EXPECT_EQ(base64Encode(encodeSHA256(verifier), true, true), challenge); + + // Different verifiers must produce different challenges. + const std::string verifier2 = base64Encode(std::string(32, '\x02'), true, true); + EXPECT_NE(base64Encode(encodeSHA256(verifier2), true, true), challenge); +} + +#endif // USE_JWT_CPP && USE_SSL diff --git a/tests/integration/compose/docker_compose_keycloak.yml b/tests/integration/compose/docker_compose_keycloak.yml new file mode 100644 index 000000000000..dd2a66ab8107 --- /dev/null +++ b/tests/integration/compose/docker_compose_keycloak.yml @@ -0,0 +1,21 @@ +services: + keycloak: + image: quay.io/keycloak/keycloak:26.0 + command: start-dev --import-realm + environment: + KEYCLOAK_ADMIN: admin + KEYCLOAK_ADMIN_PASSWORD: admin + volumes: + - ${KEYCLOAK_REALM_FILE}:/opt/keycloak/data/import/realm.json:ro + ports: + - "${KEYCLOAK_EXTERNAL_PORT:-18080}:8080" + healthcheck: + test: + - CMD-SHELL + - > + curl -sf + http://localhost:8080/realms/clickhouse-test/.well-known/openid-configuration + || exit 1 + interval: 10s + timeout: 5s + retries: 15 diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 6965b51e6684..8a0f3adfb6d8 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -669,6 +669,7 @@ def __init__( self.with_redis = False self.with_cassandra = False self.with_ldap = False + self.with_keycloak = False self.with_jdbc_bridge = False self.with_nginx = False self.with_hive = False @@ -770,6 +771,11 @@ def __init__( self._ldap_external_port = 0 self.ldap_id = self.get_instance_docker_id(self.ldap_host) + # available when with_keycloak == True + self.keycloak_host = "keycloak" + self.keycloak_port = 18080 + self.base_keycloak_cmd = None + # available when with_rabbitmq == True self.rabbitmq_host = "rabbitmq1" self.rabbitmq_ip = None @@ -1883,6 +1889,25 @@ def setup_ldap_cmd(self, instance, env_variables, docker_compose_yml_dir): ) return self.base_ldap_cmd + def setup_keycloak_cmd(self, instance, env_variables, docker_compose_yml_dir): + self.with_keycloak = True + env_variables["KEYCLOAK_EXTERNAL_PORT"] = str(self.keycloak_port) + env_variables["KEYCLOAK_REALM_FILE"] = p.join( + self.base_dir, + "keycloak", + "realm-export.json", + ) + self.base_cmd.extend( + ["--file", p.join(docker_compose_yml_dir, "docker_compose_keycloak.yml")] + ) + self.base_keycloak_cmd = self.compose_cmd( + "--env-file", + instance.env_file, + "--file", + p.join(docker_compose_yml_dir, "docker_compose_keycloak.yml"), + ) + return self.base_keycloak_cmd + def setup_jdbc_bridge_cmd(self, instance, env_variables, docker_compose_yml_dir): self.with_jdbc_bridge = True env_variables["JDBC_DRIVER_LOGS"] = self.jdbc_driver_logs_dir @@ -2048,6 +2073,7 @@ def add_instance( with_azurite=False, with_cassandra=False, with_ldap=False, + with_keycloak=False, with_jdbc_bridge=False, with_hive=False, with_coredns=False, @@ -2190,6 +2216,7 @@ def add_instance( with_coredns=with_coredns, with_cassandra=with_cassandra, with_ldap=with_ldap, + with_keycloak=with_keycloak, with_iceberg_catalog=with_iceberg_catalog, with_glue_catalog=with_glue_catalog, with_hms_catalog=with_hms_catalog, @@ -2449,6 +2476,11 @@ def add_instance( self.setup_ldap_cmd(instance, env_variables, docker_compose_yml_dir) ) + if with_keycloak and not self.with_keycloak: + cmds.append( + self.setup_keycloak_cmd(instance, env_variables, docker_compose_yml_dir) + ) + if with_jdbc_bridge and not self.with_jdbc_bridge: cmds.append( self.setup_jdbc_bridge_cmd( @@ -3421,6 +3453,26 @@ def wait_ldap_to_start(self, timeout=180): raise Exception("Can't wait LDAP to start") + def wait_keycloak_to_start(self, timeout=120): + discovery_url = ( + f"http://localhost:{self.keycloak_port}" + f"/realms/clickhouse-test/.well-known/openid-configuration" + ) + start = time.time() + while time.time() - start < timeout: + try: + resp = requests.get(discovery_url, timeout=5) + if resp.status_code == 200: + logging.info("Keycloak is online") + return + except Exception as ex: + logging.warning("Waiting for Keycloak: %s", ex) + time.sleep(3) + raise Exception("Keycloak did not start in time") + + def get_keycloak_url(self): + return f"http://localhost:{self.keycloak_port}" + def wait_prometheus_to_start(self): if "writer" in self.prometheus_servers: self.prometheus_writer_ip = self.get_instance_ip(self.prometheus_writer_host) @@ -3961,6 +4013,11 @@ def logging_azurite_initialization(exception, retry_number, sleep_time): self.up_called = True self.wait_ldap_to_start() + if self.with_keycloak and self.base_keycloak_cmd: + subprocess_check_call(self.base_keycloak_cmd + ["up", "-d"]) + self.up_called = True + self.wait_keycloak_to_start() + if self.with_jdbc_bridge and self.base_jdbc_bridge_cmd: os.makedirs(self.jdbc_driver_logs_dir) os.chmod(self.jdbc_driver_logs_dir, stat.S_IRWXU | stat.S_IRWXO) @@ -4476,6 +4533,7 @@ def __init__( with_coredns, with_cassandra, with_ldap, + with_keycloak, with_iceberg_catalog, with_glue_catalog, with_hms_catalog, @@ -4599,6 +4657,7 @@ def __init__( self.with_azurite = with_azurite self.with_cassandra = with_cassandra self.with_ldap = with_ldap + self.with_keycloak = with_keycloak self.with_jdbc_bridge = with_jdbc_bridge self.with_hive = with_hive self.with_coredns = with_coredns @@ -5992,6 +6051,9 @@ def write_embedded_config(name, dest_dir, fix_log_level=False): if self.with_ldap: depends_on.append("openldap") + if self.with_keycloak: + depends_on.append("keycloak") + if self.with_rabbitmq: depends_on.append("rabbitmq1") diff --git a/tests/integration/test_keycloak_auth/__init__.py b/tests/integration/test_keycloak_auth/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/integration/test_keycloak_auth/configs/users.xml b/tests/integration/test_keycloak_auth/configs/users.xml new file mode 100644 index 000000000000..3c621c1506bc --- /dev/null +++ b/tests/integration/test_keycloak_auth/configs/users.xml @@ -0,0 +1,13 @@ + + + + 1 + 1 + + + + default + default + + + diff --git a/tests/integration/test_keycloak_auth/configs/validators.xml b/tests/integration/test_keycloak_auth/configs/validators.xml new file mode 100644 index 000000000000..f7e13a6c6784 --- /dev/null +++ b/tests/integration/test_keycloak_auth/configs/validators.xml @@ -0,0 +1,20 @@ + + + + + jwt_dynamic_jwks + http://keycloak:8080/realms/clickhouse-test/protocol/openid-connect/certs + http://keycloak:8080/realms/clickhouse-test + preferred_username + 60 + + + + + openid + http://keycloak:8080/realms/clickhouse-test/.well-known/openid-configuration + preferred_username + 60 + + + diff --git a/tests/integration/test_keycloak_auth/keycloak/realm-export.json b/tests/integration/test_keycloak_auth/keycloak/realm-export.json new file mode 100644 index 000000000000..c3067b6f65ec --- /dev/null +++ b/tests/integration/test_keycloak_auth/keycloak/realm-export.json @@ -0,0 +1,48 @@ +{ + "realm": "clickhouse-test", + "enabled": true, + "sslRequired": "none", + "registrationAllowed": false, + "clients": [ + { + "clientId": "clickhouse", + "enabled": true, + "secret": "test-secret", + "publicClient": false, + "directAccessGrantsEnabled": true, + "serviceAccountsEnabled": false, + "standardFlowEnabled": true, + "attributes": { + "oauth2.device.authorization.grant.enabled": "true" + }, + "redirectUris": ["*"], + "webOrigins": ["*"], + "protocol": "openid-connect" + } + ], + "users": [ + { + "username": "alice", + "enabled": true, + "emailVerified": true, + "email": "alice@example.com", + "firstName": "Alice", + "lastName": "Tester", + "requiredActions": [], + "credentials": [ + { + "type": "password", + "value": "secret", + "temporary": false + } + ], + "realmRoles": ["offline_access", "uma_authorization", "default-roles-clickhouse-test"], + "groups": ["analysts"] + } + ], + "groups": [ + { + "name": "analysts" + } + ] +} diff --git a/tests/integration/test_keycloak_auth/test.py b/tests/integration/test_keycloak_auth/test.py new file mode 100644 index 000000000000..46a92071adb4 --- /dev/null +++ b/tests/integration/test_keycloak_auth/test.py @@ -0,0 +1,420 @@ +""" +Integration tests for Keycloak-based JWT authentication in ClickHouse. + +Layer 2 of the OAuth2 test plan. Requires: + - A running Keycloak container (started via `with_keycloak=True` on the cluster) + - ClickHouse configured with `jwt_dynamic_jwks` and `openid` token processors + +Run: + pytest tests/integration/test_keycloak_auth/test.py -v +""" + +import base64 +import json +import logging +import re +import time +from html import unescape as html_unescape + +import pytest +import requests + +from helpers.cluster import ClickHouseCluster + +KEYCLOAK_REALM = "clickhouse-test" +KEYCLOAK_CLIENT_ID = "clickhouse" +KEYCLOAK_CLIENT_SECRET = "test-secret" + +cluster = ClickHouseCluster(__file__) + +node = cluster.add_instance( + "node", + main_configs=["configs/validators.xml"], + user_configs=["configs/users.xml"], + with_keycloak=True, + stay_alive=True, +) + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + yield cluster + finally: + cluster.shutdown() + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def keycloak_url(started_cluster): + return started_cluster.get_keycloak_url() + + +def get_keycloak_token(started_cluster, username="alice", password="secret"): + """Obtain an id_token from Keycloak using the resource-owner password grant.""" + url = f"{keycloak_url(started_cluster)}/realms/{KEYCLOAK_REALM}/protocol/openid-connect/token" + data = { + "grant_type": "password", + "client_id": KEYCLOAK_CLIENT_ID, + "client_secret": KEYCLOAK_CLIENT_SECRET, + "username": username, + "password": password, + "scope": "openid profile email", + } + resp = requests.post(url, data=data, timeout=30) + resp.raise_for_status() + token_data = resp.json() + assert "id_token" in token_data, f"No id_token in response: {token_data}" + return token_data["id_token"] + + +def query_with_token(node_instance, token, query): + """Execute a ClickHouse query using a JWT Bearer token via the HTTP interface.""" + resp = node_instance.http_request( + "", + method="POST", + data=query, + headers={"Authorization": f"Bearer {token}"}, + ) + resp.raise_for_status() + return resp.text + + +def decode_jwt_payload(token): + """Decode JWT payload without signature verification.""" + parts = token.split(".") + if len(parts) < 2: + return {} + payload_b64 = parts[1] + # Add padding + padding = 4 - len(payload_b64) % 4 + if padding != 4: + payload_b64 += "=" * padding + # Convert URL-safe base64 + payload_b64 = payload_b64.replace("-", "+").replace("_", "/") + return json.loads(base64.b64decode(payload_b64)) + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +def test_jwt_dynamic_jwks(started_cluster): + """Token validated via explicit JWKS URI (keycloak_jwks processor).""" + token = get_keycloak_token(started_cluster) + result = query_with_token(node, token, "SELECT 1") + assert result.strip() == "1" + + +def test_openid_discovery(started_cluster): + """Token validated via OIDC discovery document (keycloak_discovery processor).""" + token = get_keycloak_token(started_cluster) + result = query_with_token(node, token, "SELECT 1") + assert result.strip() == "1" + + +def test_username_claim(started_cluster): + """The `preferred_username` claim is mapped to the ClickHouse session user.""" + token = get_keycloak_token(started_cluster, username="alice") + result = query_with_token(node, token, "SELECT currentUser()") + assert result.strip() == "alice" + + +def test_token_refresh(started_cluster): + """Obtain a new id_token via the refresh_token grant and authenticate.""" + url = f"{keycloak_url(started_cluster)}/realms/{KEYCLOAK_REALM}/protocol/openid-connect/token" + + # Initial grant + data = { + "grant_type": "password", + "client_id": KEYCLOAK_CLIENT_ID, + "client_secret": KEYCLOAK_CLIENT_SECRET, + "username": "alice", + "password": "secret", + "scope": "openid profile email offline_access", + } + resp = requests.post(url, data=data, timeout=30) + resp.raise_for_status() + tokens = resp.json() + assert "refresh_token" in tokens, "Expected refresh_token in password grant response" + + # Refresh + refresh_data = { + "grant_type": "refresh_token", + "client_id": KEYCLOAK_CLIENT_ID, + "client_secret": KEYCLOAK_CLIENT_SECRET, + "refresh_token": tokens["refresh_token"], + } + refresh_resp = requests.post(url, data=refresh_data, timeout=30) + refresh_resp.raise_for_status() + refreshed = refresh_resp.json() + assert "id_token" in refreshed + + result = query_with_token(node, refreshed["id_token"], "SELECT 1") + assert result.strip() == "1" + + +def test_wrong_issuer_rejected(started_cluster): + """A token with a tampered issuer claim must be rejected.""" + token = get_keycloak_token(started_cluster) + payload = decode_jwt_payload(token) + + # Modify the issuer + payload["iss"] = "https://evil.example.com" + tampered_payload = ( + base64.urlsafe_b64encode(json.dumps(payload).encode()).rstrip(b"=").decode() + ) + + parts = token.split(".") + parts[1] = tampered_payload + tampered_token = ".".join(parts) + + # Authentication must fail + try: + query_with_token(node, tampered_token, "SELECT 1") + pytest.fail("Expected authentication failure for tampered token") + except Exception: + pass # Expected + + +def test_expired_token_rejected(started_cluster): + """A token with an expired `exp` claim must be rejected.""" + token = get_keycloak_token(started_cluster) + payload = decode_jwt_payload(token) + + # Set exp to a past timestamp + payload["exp"] = int(time.time()) - 3600 + expired_payload = ( + base64.urlsafe_b64encode(json.dumps(payload).encode()).rstrip(b"=").decode() + ) + + parts = token.split(".") + parts[1] = expired_payload + expired_token = ".".join(parts) + + try: + query_with_token(node, expired_token, "SELECT 1") + pytest.fail("Expected authentication failure for expired token") + except Exception: + pass # Expected + + +def _approve_device_code_via_browser( + keycloak_base_url, realm, user_code, username="alice", password="secret" +): + """ + Simulate a browser approving a Keycloak device authorization request. + + Keycloak's device flow requires a user to visit a verification URI, log in, + and confirm access. This helper drives that multi-step HTML form sequence + using a `requests.Session` so that session cookies are maintained across + the redirects. + """ + + s = requests.Session() + + def _strip_secure_flag(session): + """Keycloak >= 25 emits Set-Cookie with Secure;SameSite=None on every + response, but the integration tests reach Keycloak over plain HTTP. + ``requests`` honors the Secure flag and refuses to resend those cookies + on the next HTTP hop, which causes Keycloak to lose its session and + return ``cookie_not_found``. Clear the flag after every response so the + cookies are sent on subsequent HTTP requests.""" + for cookie in session.cookies: + if getattr(cookie, "secure", False): + cookie.secure = False + + def _follow(method, url, **kw): + """Manually walk redirects so we can strip the Secure flag between + hops; ``requests`` follows redirects internally before our hook can + run, which is too late once the chain has dropped a Secure cookie.""" + kw.setdefault("timeout", 30) + kw["allow_redirects"] = False + for _ in range(20): + r = s.request(method, url, **kw) + _strip_secure_flag(s) + if r.status_code not in (301, 302, 303, 307, 308): + return r + loc = r.headers.get("Location") + if not loc: + return r + if loc.startswith("/"): + from urllib.parse import urlparse + parsed = urlparse(url) + url = f"{parsed.scheme}://{parsed.netloc}{loc}" + else: + url = loc + method = "GET" + kw.pop("data", None) + kw.pop("json", None) + kw.pop("params", None) + raise RuntimeError("Too many redirects") + + def get(url, **kw): + return _follow("GET", url, **kw) + + def post(url, **kw): + return _follow("POST", url, **kw) + + def get_form(html, base_url=None): + """Return (action_url, field_dict) for the first
in *html*. + + Resolves relative ``action`` URLs against *base_url* when provided.""" + m = re.search(r']*\baction="([^"]+)"', html) + if not m: + return None, {} + action_url = html_unescape(m.group(1)) + if base_url and not re.match(r"^https?://", action_url): + from urllib.parse import urljoin + action_url = urljoin(base_url, action_url) + fields = {} + for inp in re.findall(r"]+>", html): + n = re.search(r'\bname="([^"]+)"', inp) + v = re.search(r'\bvalue="([^"]*)"', inp) + t = re.search(r'\btype="([^"]+)"', inp) + if n and (not t or t.group(1).lower() not in ("checkbox", "radio")): + fields[n.group(1)] = v.group(1) if v else "" + return action_url, fields + + # Step 1: Navigate to the device endpoint. Keycloak redirects to a login + # page when the user_code query parameter is provided and valid. + r = get( + f"{keycloak_base_url}/realms/{realm}/device", + params={"user_code": user_code}, + ) + r.raise_for_status() + + # Step 1a: If Keycloak shows a user-code entry form first (no user_code + # in the redirect), fill it in and submit. + if 'name="device_user_code"' in r.text or 'name="user_code"' in r.text: + action, fields = get_form(r.text, base_url=r.url) + fields["device_user_code"] = user_code + fields["user_code"] = user_code + r = post(action, data=fields) + r.raise_for_status() + + # Step 2: We should now be on the login page. Submit credentials. + assert 'type="password"' in r.text, ( + f"Expected Keycloak login page, got:\n{r.text[:800]}" + ) + action, fields = get_form(r.text, base_url=r.url) + fields["username"] = username + fields["password"] = password + r = post(action, data=fields) + r.raise_for_status() + + # Step 3: Submit the device consent / grant form. Keycloak renders a + # "Do you want to grant access?" page with an `accept` submit button. + action, fields = get_form(r.text, base_url=r.url) + if action: + if "accept" not in fields: + fields["accept"] = "" + post(action, data=fields) + + +def test_device_flow_initiation(started_cluster): + """ + Verify that Keycloak responds correctly to the device authorization request. + The polling / approval mechanics are covered by the Layer 1 unit tests. + """ + url = f"{keycloak_url(started_cluster)}/realms/{KEYCLOAK_REALM}/protocol/openid-connect/auth/device" + data = { + "client_id": KEYCLOAK_CLIENT_ID, + "client_secret": KEYCLOAK_CLIENT_SECRET, + "scope": "openid profile email", + } + resp = requests.post(url, data=data, timeout=30) + resp.raise_for_status() + + device_data = resp.json() + assert "device_code" in device_data, f"Missing device_code: {device_data}" + assert "user_code" in device_data, f"Missing user_code: {device_data}" + assert ( + "verification_uri" in device_data or "verification_uri_complete" in device_data + ), f"Missing verification_uri: {device_data}" + logging.info( + "Device flow initiated: user_code=%s verification_uri=%s", + device_data.get("user_code"), + device_data.get("verification_uri", device_data.get("verification_uri_complete")), + ) + + +def test_device_flow_round_trip(started_cluster): + """ + Full device-authorization-grant round-trip (RFC 8628). + + 1. Client initiates device flow → Keycloak returns `device_code` / `user_code`. + 2. User (simulated via `_approve_device_code_via_browser`) visits the + verification URI, logs in, and grants access. + 3. Client polls the token endpoint until an `id_token` is returned. + 4. `id_token` is used to authenticate a ClickHouse query — must return `1`. + """ + base_url = keycloak_url(started_cluster) + device_endpoint = ( + f"{base_url}/realms/{KEYCLOAK_REALM}/protocol/openid-connect/auth/device" + ) + token_endpoint = ( + f"{base_url}/realms/{KEYCLOAK_REALM}/protocol/openid-connect/token" + ) + + # --- 1. Initiate device authorization --- + init_resp = requests.post( + device_endpoint, + data={ + "client_id": KEYCLOAK_CLIENT_ID, + "client_secret": KEYCLOAK_CLIENT_SECRET, + "scope": "openid profile email", + }, + timeout=30, + ) + init_resp.raise_for_status() + device_data = init_resp.json() + device_code = device_data["device_code"] + user_code = device_data["user_code"] + interval = max(device_data.get("interval", 5), 1) + + logging.info( + "Device flow round-trip: user_code=%s device_code=%.8s…", user_code, device_code + ) + + # --- 2. Simulate user approving the request in a browser --- + _approve_device_code_via_browser(base_url, KEYCLOAK_REALM, user_code) + + # --- 3. Poll until the token arrives (or a 60-second deadline) --- + deadline = time.time() + 60 + id_token = None + while time.time() < deadline: + time.sleep(interval) + poll_resp = requests.post( + token_endpoint, + data={ + "grant_type": "urn:ietf:params:oauth:grant-type:device_code", + "client_id": KEYCLOAK_CLIENT_ID, + "client_secret": KEYCLOAK_CLIENT_SECRET, + "device_code": device_code, + }, + timeout=30, + ) + poll_data = poll_resp.json() + if "id_token" in poll_data: + id_token = poll_data["id_token"] + break + error = poll_data.get("error", "") + assert error in ("authorization_pending", "slow_down"), ( + f"Unexpected polling error: {poll_data}" + ) + if error == "slow_down": + interval += 5 + + assert id_token is not None, ( + "Device flow timed out: Keycloak never returned an id_token after approval" + ) + + # --- 4. Use the token to authenticate a ClickHouse query --- + result = query_with_token(node, id_token, "SELECT 1") + assert result.strip() == "1" diff --git a/tests/queries/0_stateless/03749_cloud_endpoint_auth_precedence.reference b/tests/queries/0_stateless/03749_cloud_endpoint_auth_precedence.reference index 0dcbc1ba33aa..322a4d1b25ad 100644 --- a/tests/queries/0_stateless/03749_cloud_endpoint_auth_precedence.reference +++ b/tests/queries/0_stateless/03749_cloud_endpoint_auth_precedence.reference @@ -14,4 +14,10 @@ Test 7: Connection string with user:password@ should not trigger OAuth OK Test 8: Multiple host/port format variations OK +Test 9: --login=device with missing credentials file gives clear error +OK +Test 10: --login=invalid should give BAD_ARGUMENTS +OK +Test 11: --jwt and --login together should give BAD_ARGUMENTS +OK All tests completed diff --git a/tests/queries/0_stateless/03749_cloud_endpoint_auth_precedence.sh b/tests/queries/0_stateless/03749_cloud_endpoint_auth_precedence.sh index ddd4632854bc..5bd5ff300449 100755 --- a/tests/queries/0_stateless/03749_cloud_endpoint_auth_precedence.sh +++ b/tests/queries/0_stateless/03749_cloud_endpoint_auth_precedence.sh @@ -101,4 +101,33 @@ else echo "FAILED: $failed commands failed" fi +# Test 9: --login=device with no credentials file should fail with a clear file-not-found error +# (not a crash or confusing message) +echo "Test 9: --login=device with missing credentials file gives clear error" +MISSING_CREDS="/tmp/nonexistent_oauth_creds_$$.json" +output=$($CLICKHOUSE_CLIENT_BINARY --login=device --oauth-credentials "$MISSING_CREDS" --query "SELECT 1" 2>&1) +if echo "$output" | grep -qi "not found\|No such file\|cannot open\|BAD_ARGUMENTS"; then + echo "OK" +else + echo "FAILED: expected file-not-found error, got: $output" +fi + +# Test 10: --login=invalid should give BAD_ARGUMENTS with descriptive message +echo "Test 10: --login=invalid should give BAD_ARGUMENTS" +output=$($CLICKHOUSE_CLIENT_BINARY --login=invalid --host="${CLICKHOUSE_HOST}" --port="${CLICKHOUSE_PORT_TCP}" --query "SELECT 1" 2>&1) +if echo "$output" | grep -qi "must be.*browser.*device\|BAD_ARGUMENTS"; then + echo "OK" +else + echo "FAILED: expected BAD_ARGUMENTS for invalid mode, got: $output" +fi + +# Test 11: --jwt and --login together should give BAD_ARGUMENTS +echo "Test 11: --jwt and --login together should give BAD_ARGUMENTS" +output=$($CLICKHOUSE_CLIENT_BINARY --jwt "sometoken" --login=browser --host="${CLICKHOUSE_HOST}" --port="${CLICKHOUSE_PORT_TCP}" --query "SELECT 1" 2>&1) +if echo "$output" | grep -qi "cannot both be specified\|BAD_ARGUMENTS"; then + echo "OK" +else + echo "FAILED: expected BAD_ARGUMENTS for --jwt + --login, got: $output" +fi + echo "All tests completed" From 8ffed1956ccdc15f6f4242ef707f016e318441f4 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov <32552679+zvonand@users.noreply.github.com> Date: Sun, 7 Jun 2026 17:44:36 +0200 Subject: [PATCH 04/12] Resolve conflicts in cherry-pick of #1606 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `cli.md` on `antalya-26.4` was refactored by commit `266165bf2c9` (Add `clickhousectl` docs page and move ClickHouse Client to `/interfaces/client`) into two files: `cli.md` became the `clickhousectl` reference, while the `clickhouse-client` documentation moved to `client.md`. The cherry-pick applied the source PR's `cli.md` edits to the new `clickhousectl` file and produced a conflict at the `### Non-interactive flags` section. Resolution: - `cli.md`: kept "ours" (the `clickhousectl` skills non-interactive flags table); dropped "theirs" (old `clickhouse-client` content now in `client.md`). - `client.md`: applied the source PR's changes there — updated the `login` option row to `--login[=]`, added the `--oauth-credentials` option row, and inserted the `### OAuth credentials file` section. Bucket-2 adaptation: the source PR's target file `cli.md` was renamed to `client.md` on `antalya-26.4`. - `Client.cpp`: kept both `("one-time-password", ...)` from "ours" (a separate `antalya-26.4` feature) and `("login", ...)` / `("oauth-credentials", ...)` from the source PR. Source-PR: #1606 (https://github.com/Altinity/ClickHouse/pull/1606) --- docs/en/interfaces/cli.md | 214 --------------------------- docs/en/interfaces/client.md | 31 +++- programs/client/Client.cpp | 3 - src/Access/AuthenticationData.cpp | 2 +- src/Access/Common/JWKSProvider.cpp | 2 +- src/Access/TokenAccessStorage.cpp | 2 +- src/Access/TokenProcessors.h | 1 - src/Access/TokenProcessorsOpaque.cpp | 4 +- 8 files changed, 34 insertions(+), 225 deletions(-) diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index f75fefe811a7..28543d790627 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -405,223 +405,9 @@ clickhousectl skills --agent claude --agent codex ### Non-interactive flags {#non-interactive-flags} -<<<<<<< HEAD | Flag | Description | |------|-------------| | `--agent ` | Install Skills for a specific agent (can be repeated) | | `--global` | Use global scope; if omitted, project scope is used | | `--all` | Install Skills for all supported agents | | `--detected-only` | Install Skills for supported agents that were detected on the system | -======= -```bash -clickhouse-client clickhouse://localhost/my_database?s - -# equivalent to: -clickhouse-client clickhouse://localhost/my_database -s -``` - -Connect to the default host using the default port, the default user, and the default database. - -```bash -clickhouse-client clickhouse: -``` - -Connect to the default host using the default port, as the user `my_user` and no password. - -```bash -clickhouse-client clickhouse://my_user@ - -# Using a blank password between : and @ means to asking the user to enter the password before starting the connection. -clickhouse-client clickhouse://my_user:@ -``` - -Connect to `localhost` using the email as the user name. `@` symbol is percent encoded to `%40`. - -```bash -clickhouse-client clickhouse://some_user%40some_mail.com@localhost:9000 -``` - -Connect to one of two hosts: `192.168.1.15`, `192.168.1.25`. - -```bash -clickhouse-client clickhouse://192.168.1.15,192.168.1.25 -``` - -## Query ID format {#query-id-format} - -In interactive mode ClickHouse Client shows the query ID for every query. By default, the ID is formatted like this: - -```sql -Query id: 927f137d-00f1-4175-8914-0dd066365e96 -``` - -A custom format may be specified in a configuration file inside a `query_id_formats` tag. The `{query_id}` placeholder in the format string is replaced with the query ID. Several format strings are allowed inside the tag. -This feature can be used to generate URLs to facilitate profiling of queries. - -**Example** - -```xml - - - http://speedscope-host/#profileURL=qp%3Fid%3D{query_id} - - -``` - -With the configuration above, the ID of a query is shown in the following format: - -```response -speedscope:http://speedscope-host/#profileURL=qp%3Fid%3Dc8ecc783-e753-4b38-97f1-42cddfb98b7d -``` - -## Configuration files {#configuration_files} - -ClickHouse Client uses the first existing file of the following: - -- A file that is defined with the `-c [ -C, --config, --config-file ]` parameter. -- `./clickhouse-client.[xml|yaml|yml]` -- `$XDG_CONFIG_HOME/clickhouse/config.[xml|yaml|yml]` (or `~/.config/clickhouse/config.[xml|yaml|yml]` if `XDG_CONFIG_HOME` is not set) -- `~/.clickhouse-client/config.[xml|yaml|yml]` -- `/etc/clickhouse-client/config.[xml|yaml|yml]` - -See the sample configuration file in the ClickHouse repository: [`clickhouse-client.xml`](https://github.com/ClickHouse/ClickHouse/blob/master/programs/client/clickhouse-client.xml) - - - - ```xml - - username - password - true - - - /etc/ssl/cert.pem - - - - ``` - - - ```yaml - user: username - password: 'password' - secure: true - openSSL: - client: - caConfig: '/etc/ssl/cert.pem' - ``` - - - -## Environment variable options {#environment-variable-options} - -The user name, password and host can be set via environment variables `CLICKHOUSE_USER`, `CLICKHOUSE_PASSWORD` and `CLICKHOUSE_HOST`. -Command line arguments `--user`, `--password` or `--host`, or a [connection string](#connection_string) (if specified) take precedence over environment variables. - -## Command-line options {#command-line-options} - -All command-line options can be specified directly on the command line or as defaults in the [configuration file](#configuration_files). - -### General options {#command-line-options-general} - -| Option | Description | Default | -|-----------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------|------------------------------| -| `-c [ -C, --config, --config-file ] ` | The location of the configuration file for the client, if it is not at one of the default locations. See [Configuration Files](#configuration_files). | - | -| `--help` | Print usage summary and exit. Combine with `--verbose` to display all possible options including query settings. | - | -| `--history_file ` | Path to a file containing the command history. | - | -| `--history_max_entries` | Maximum number of entries in the history file. | `1000000` (1 million) | -| `--prompt ` | Specify a custom prompt. | The `display_name` of the server | -| `--verbose` | Increase output verbosity. | - | -| `-V [ --version ]` | Print version and exit. | - | - -### Connection options {#command-line-options-connection} - -| Option | Description | Default | -|----------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------| -| `--connection ` | The name of preconfigured connection details from the configuration file. See [Connection credentials](#connection-credentials). | - | -| `-d [ --database ] ` | Select the database to default to for this connection. | The current database from the server settings (`default` by default) | -| `-h [ --host ] ` | The hostname of the ClickHouse server to connect to. Can either be a hostname or an IPv4 or IPv6 address. Multiple hosts can be passed via multiple arguments. | `localhost` | -| `--jwt ` | Use JSON Web Token (JWT) for authentication.

Server JWT authorization is only available in ClickHouse Cloud. | - | -| `--login[=]` | Authenticate via OAuth2. Bare `--login` (no `=`) triggers ClickHouse Cloud automatic login — the provider is inferred from the server. To authenticate against a custom OpenID Connect provider, supply a `mode` and `--oauth-credentials`: `--login=browser` runs the Authorization Code + PKCE flow (opens a browser), `--login=device` runs the Device Authorization flow (prints a URL and short code — no browser needed). | - | -| `--oauth-credentials ` | Path to an OAuth2 credentials JSON file (Google Cloud Console format). Required when using `--login=browser` or `--login=device` with a custom OpenID Connect provider. See [OAuth credentials file format](#oauth-credentials-file) below. Refresh tokens are cached in `~/.clickhouse-client/oauth_cache.json` (mode `0600`). | `~/.clickhouse-client/oauth_client.json` | -| `--no-warnings` | Disable showing warnings from `system.warnings` when the client connects to the server. | - | -| `--no-server-client-version-message` | Suppress server-client version mismatch message when the client connects to the server. | - | -| `--password ` | The password of the database user. You can also specify the password for a connection in the configuration file. If you do not specify the password, the client will ask for it. | - | -| `--port ` | The port the server is accepting connections on. The default ports are 9440 (TLS) and 9000 (no TLS).

Note: The client uses the native protocol and not HTTP(S). | `9440` if `--secure` is specified, `9000` otherwise. Always defaults to `9440` if the hostname ends in `.clickhouse.cloud`. | -| `-s [ --secure ]` | Whether to use TLS.

Enabled automatically when connecting to port 9440 (the default secure port) or ClickHouse Cloud.

You might need to configure your CA certificates in the [configuration file](#configuration_files). The available configuration settings are the same as for [server-side TLS configuration](../operations/server-configuration-parameters/settings.md#openssl). | Auto-enabled when connecting to port 9440 or ClickHouse Cloud | -| `--ssh-key-file ` | File containing the SSH private key for authenticate with the server. | - | -| `--ssh-key-passphrase ` | Passphrase for the SSH private key specified in `--ssh-key-file`. | - | -| `--tls-sni-override ` | If using TLS, the server name (SNI) to pass in the handshake. | The host provided via `-h` or `--host`. | -| `-u [ --user ] ` | The database user to connect as. | `default` | - -:::note -Instead of the `--host`, `--port`, `--user` and `--password` options, the client also supports [connection strings](#connection_string). -::: - -### OAuth credentials file {#oauth-credentials-file} - -When using `--login=browser` or `--login=device` with a custom OpenID Connect provider, the client reads a credentials JSON file. The file uses the same format produced by the Google Cloud Console ("OAuth 2.0 Client IDs" → "Download JSON"): - -```json -{ - "installed": { - "client_id": "YOUR_CLIENT_ID", - "client_secret": "YOUR_CLIENT_SECRET", - "auth_uri": "https://accounts.google.com/o/oauth2/auth", - "token_uri": "https://oauth2.googleapis.com/token", - "redirect_uris": ["http://127.0.0.1"] - } -} -``` - -The top-level key can be `installed` (desktop/CLI apps) or `web`. Required fields: `client_id`, `auth_uri`, `token_uri`. Optional fields: - -| Field | Description | -|---|---| -| `client_secret` | Confidential-client secret. Omit (or leave empty) for OIDC public clients — the auth-code flow is always protected by PKCE and the device flow by the device code, so a secret is not required by the protocol. When the field is absent the client never sends a `client_secret` form parameter, which is the form public-client registrations require (Auth0, Microsoft Entra ID, Keycloak, Okta and others reject empty secrets with `invalid_client`). | -| `device_authorization_uri` | Device authorization endpoint. Discovered automatically via OIDC Discovery if absent. | -| `issuer` | OIDC issuer URL (e.g. `https://accounts.google.com`). Used to locate the discovery document when `device_authorization_uri` is not set. | - -The default path is `~/.clickhouse-client/oauth_client.json`. Override it with `--oauth-credentials `. - -After a successful login the obtained refresh token is cached in `~/.clickhouse-client/oauth_cache.json` (file mode `0600`). Subsequent runs reuse the cached token silently and only open the browser or print a device code when the refresh token has expired. - -### Query options {#command-line-options-query} - -| Option | Description | -|---------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `--param_=` | Substitution value for a parameter of a [query with parameters](#cli-queries-with-parameters). | -| `-q [ --query ] ` | The query to run in batch mode. Can be specified multiple times (`--query "SELECT 1" --query "SELECT 2"`) or once with multiple semicolon-separated queries (`--query "SELECT 1; SELECT 2;"`). In the latter case, `INSERT` queries with formats other than `VALUES` must be separated by empty lines.

A single query can also be specified without a parameter: `clickhouse-client "SELECT 1"`

Cannot be used together with `--queries-file`. | -| `--queries-file ` | Path to a file containing queries. `--queries-file` can be specified multiple times, e.g. `--queries-file queries1.sql --queries-file queries2.sql`.

Cannot be used together with `--query`. | -| `-m [ --multiline ]` | If specified, allow multiline queries (do not send the query on Enter). Queries will be sent only when they are ended with a semicolon. | - -### Query settings {#command-line-options-query-settings} - -Query settings can be specified as command-line options in the client, for example: -```bash -$ clickhouse-client --max_threads 1 -``` - -See [Settings](../operations/settings/settings.md) for a list of settings. - -### Formatting options {#command-line-options-formatting} - -| Option | Description | Default | -|---------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------| -| `-f [ --format ] ` | Use the specified format to output the result.

See [Formats for Input and Output Data](formats.md) for a list of supported formats. | `TabSeparated` | -| `--pager ` | Pipe all output into this command. Typically `less` (e.g., `less -S` to display wide result sets) or similar. | - | -| `-E [ --vertical ]` | Use the [Vertical format](/interfaces/formats/Vertical) to output the result. This is the same as `–-format Vertical`. In this format, each value is printed on a separate line, which is helpful when displaying wide tables. | - | - -### Execution details {#command-line-options-execution-details} - -| Option | Description | Default | -|-----------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------| -| `--enable-progress-table-toggle` | Enable toggling of the progress table by pressing the control key (Space). Only applicable in interactive mode with progress table printing enabled. | `enabled` | -| `--hardware-utilization` | Print hardware utilization information in progress bar. | - | -| `--memory-usage` | If specified, print memory usage to `stderr` in non-interactive mode.

Possible values:
• `none` - do not print memory usage
• `default` - print number of bytes
• `readable` - print memory usage in human-readable format | - | -| `--print-profile-events` | Print `ProfileEvents` packets. | - | -| `--progress` | Print progress of query execution.

Possible values:
• `tty\|on\|1\|true\|yes` - outputs to the terminal in interactive mode
• `err` - outputs to `stderr` in non-interactive mode
• `off\|0\|false\|no` - disables progress printing | `tty` in interactive mode, `off` in non-interactive (batch) mode | -| `--progress-table` | Print a progress table with changing metrics during query execution.

Possible values:
• `tty\|on\|1\|true\|yes` - outputs to the terminal in interactive mode
• `err` - outputs to `stderr` non-interactive mode
• `off\|0\|false\|no` - disables the progress table | `tty` in interactive mode, `off` in non-interactive (batch) mode | -| `--stacktrace` | Print stack traces of exceptions. | - | -| `-t [ --time ]` | Print query execution time to `stderr` in non-interactive mode (for benchmarks). | - | ->>>>>>> 67683cd1b46 (Merge pull request #1606 from Altinity/feature/client-IdP) diff --git a/docs/en/interfaces/client.md b/docs/en/interfaces/client.md index 9cca5b32dc10..7a236a877d80 100644 --- a/docs/en/interfaces/client.md +++ b/docs/en/interfaces/client.md @@ -836,7 +836,8 @@ All command-line options can be specified directly on the command line or as def | `-d [ --database ] ` | Select the database to default to for this connection. | The current database from the server settings (`default` by default) | | `-h [ --host ] ` | The hostname of the ClickHouse server to connect to. Can either be a hostname or an IPv4 or IPv6 address. Multiple hosts can be passed via multiple arguments. | `localhost` | | `--jwt ` | Use JSON Web Token (JWT) for authentication.

Server JWT authorization is only available in ClickHouse Cloud. | - | -| `login` | Invokes the device grant OAuth flow in order to authenticate via an IDP.

For ClickHouse Cloud hosts, the OAuth variables are inferred otherwise they must be provided with `--oauth-url`, `--oauth-client-id` and `--oauth-audience`. | - | +| `--login[=]` | Authenticate via OAuth2. Bare `--login` (no `=`) triggers ClickHouse Cloud automatic login — the provider is inferred from the server. To authenticate against a custom OpenID Connect provider, supply a `mode` and `--oauth-credentials`: `--login=browser` runs the Authorization Code + PKCE flow (opens a browser), `--login=device` runs the Device Authorization flow (prints a URL and short code — no browser needed). | - | +| `--oauth-credentials ` | Path to an OAuth2 credentials JSON file (Google Cloud Console format). Required when using `--login=browser` or `--login=device` with a custom OpenID Connect provider. See [OAuth credentials file format](#oauth-credentials-file) below. Refresh tokens are cached in `~/.clickhouse-client/oauth_cache.json` (mode `0600`). | `~/.clickhouse-client/oauth_client.json` | | `--no-warnings` | Disable showing warnings from `system.warnings` when the client connects to the server. | - | | `--no-server-client-version-message` | Suppress server-client version mismatch message when the client connects to the server. | - | | `--password ` | The password of the database user. You can also specify the password for a connection in the configuration file. If you do not specify the password, the client will ask for it. | - | @@ -851,6 +852,34 @@ All command-line options can be specified directly on the command line or as def Instead of the `--host`, `--port`, `--user` and `--password` options, the client also supports [connection strings](#connection_string). ::: +### OAuth credentials file {#oauth-credentials-file} + +When using `--login=browser` or `--login=device` with a custom OpenID Connect provider, the client reads a credentials JSON file. The file uses the same format produced by the Google Cloud Console ("OAuth 2.0 Client IDs" → "Download JSON"): + +```json +{ + "installed": { + "client_id": "YOUR_CLIENT_ID", + "client_secret": "YOUR_CLIENT_SECRET", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "redirect_uris": ["http://127.0.0.1"] + } +} +``` + +The top-level key can be `installed` (desktop/CLI apps) or `web`. Required fields: `client_id`, `auth_uri`, `token_uri`. Optional fields: + +| Field | Description | +|---|---| +| `client_secret` | Confidential-client secret. Omit (or leave empty) for OIDC public clients — the auth-code flow is always protected by PKCE and the device flow by the device code, so a secret is not required by the protocol. When the field is absent the client never sends a `client_secret` form parameter, which is the form public-client registrations require (Auth0, Microsoft Entra ID, Keycloak, Okta and others reject empty secrets with `invalid_client`). | +| `device_authorization_uri` | Device authorization endpoint. Discovered automatically via OIDC Discovery if absent. | +| `issuer` | OIDC issuer URL (e.g. `https://accounts.google.com`). Used to locate the discovery document when `device_authorization_uri` is not set. | + +The default path is `~/.clickhouse-client/oauth_client.json`. Override it with `--oauth-credentials `. + +After a successful login the obtained refresh token is cached in `~/.clickhouse-client/oauth_cache.json` (file mode `0600`). Subsequent runs reuse the cached token silently and only open the browser or print a device code when the refresh token has expired. + ### Query options {#command-line-options-query} | Option | Description | diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index ec82250b80c8..ff0abe34d655 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -756,9 +756,7 @@ void Client::addExtraOptions(OptionsDescription & options_description) ("ssh-key-passphrase", po::value(), "Passphrase for the SSH private key specified by --ssh-key-file.") ("quota_key", po::value(), "A string to differentiate quotas when the user have keyed quotas configured on server") ("jwt", po::value(), "Use JWT for authentication") -<<<<<<< HEAD ("one-time-password", po::value(), "Time-based one-time password (TOTP) for two-factor authentication") -======= ("login", po::value()->implicit_value(""), "Authenticate via OAuth2. Optional mode: 'browser' (auth-code + PKCE, opens browser) " "or 'device' (device flow, prints URL + code). " @@ -767,7 +765,6 @@ void Client::addExtraOptions(OptionsDescription & options_description) ("oauth-credentials", po::value(), "Path to OAuth credentials JSON file " "(default: ~/.clickhouse-client/oauth_client.json)") ->>>>>>> 67683cd1b46 (Merge pull request #1606 from Altinity/feature/client-IdP) #if USE_JWT_CPP && USE_SSL ("oauth-url", po::value(), "The base URL for the OAuth 2.0 authorization server") ("oauth-client-id", po::value(), "The client ID for the OAuth 2.0 application") diff --git a/src/Access/AuthenticationData.cpp b/src/Access/AuthenticationData.cpp index cf1aa4ad265b..d88339ef6699 100644 --- a/src/Access/AuthenticationData.cpp +++ b/src/Access/AuthenticationData.cpp @@ -417,7 +417,7 @@ boost::intrusive_ptr AuthenticationData::toAST() const { const auto & claims = getJWTClaims(); if (!claims.empty()) - node->children.push_back(std::make_shared(claims)); + node->children.push_back(make_intrusive(claims)); break; } case AuthenticationType::KERBEROS: diff --git a/src/Access/Common/JWKSProvider.cpp b/src/Access/Common/JWKSProvider.cpp index 40814ea5eb86..6eb3cb571343 100644 --- a/src/Access/Common/JWKSProvider.cpp +++ b/src/Access/Common/JWKSProvider.cpp @@ -25,7 +25,7 @@ JWKSType JWKSClient::getJWKS() auto now = std::chrono::high_resolution_clock::now(); auto diff = std::chrono::duration(now - last_request_send).count(); - if (diff < refresh_timeout && cached_jwks.has_value()) + if (diff < static_cast(refresh_timeout) && cached_jwks.has_value()) return cached_jwks.value(); Poco::Net::HTTPResponse response; diff --git a/src/Access/TokenAccessStorage.cpp b/src/Access/TokenAccessStorage.cpp index e17bc7159cef..9ea1e0895675 100644 --- a/src/Access/TokenAccessStorage.cpp +++ b/src/Access/TokenAccessStorage.cpp @@ -583,7 +583,7 @@ std::optional TokenAccessStorage::authenticateImpl( } if (id) - return AuthResult{ .user_id = *id, .authentication_data = AuthenticationData(AuthenticationType::JWT) }; + return AuthResult{ .user_id = *id, .authentication_data = AuthenticationData(AuthenticationType::JWT), .user_name = credentials.getUserName() }; return std::nullopt; } diff --git a/src/Access/TokenProcessors.h b/src/Access/TokenProcessors.h index c898bfff15d4..1b94fbe790b4 100644 --- a/src/Access/TokenProcessors.h +++ b/src/Access/TokenProcessors.h @@ -215,7 +215,6 @@ class OpenIdTokenProcessor : public ITokenProcessor private: const String expected_issuer; const String expected_audience; - const bool allow_no_expiration; Poco::URI userinfo_endpoint; Poco::URI token_introspection_endpoint; diff --git a/src/Access/TokenProcessorsOpaque.cpp b/src/Access/TokenProcessorsOpaque.cpp index d3c8614f3799..f6397b143884 100644 --- a/src/Access/TokenProcessorsOpaque.cpp +++ b/src/Access/TokenProcessorsOpaque.cpp @@ -281,7 +281,6 @@ OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, UInt64 jwks_cache_lifetime_) : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), expected_issuer(expected_issuer_), expected_audience(expected_audience_), - allow_no_expiration(allow_no_expiration_), userinfo_endpoint(userinfo_endpoint_), token_introspection_endpoint(token_introspection_endpoint_) { if (!jwks_uri_.empty()) @@ -312,8 +311,7 @@ OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, UInt64 verifier_leeway_, UInt64 jwks_cache_lifetime_) : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), - expected_issuer(expected_issuer_), expected_audience(expected_audience_), - allow_no_expiration(allow_no_expiration_) + expected_issuer(expected_issuer_), expected_audience(expected_audience_) { const picojson::object openid_config = getObjectFromURI(Poco::URI(openid_config_endpoint_)); From aef36464b7173c9318237d22eff1acbb59666113 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov <32552679+zvonand@users.noreply.github.com> Date: Thu, 23 Apr 2026 13:36:18 +0200 Subject: [PATCH 05/12] Cherry-pick of https://github.com/Altinity/ClickHouse/pull/1658 with unresolved conflict markers (resolution in next commit) --- Original cherry-pick message follows: Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596 Antalya 26.3 forward-port: Token Authentication and Authorization # Conflicts: # ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt # docs/en/operations/external-authenticators/tokens.md # src/Access/Authentication.cpp # src/Access/Common/JWKSProvider.cpp # src/Access/ExternalAuthenticators.cpp # src/Access/ExternalAuthenticators.h # src/Access/TokenAccessStorage.cpp # src/Access/TokenProcessors.h # src/Access/TokenProcessorsJWT.cpp # src/Access/TokenProcessorsOpaque.cpp # src/Access/UsersConfigParser.cpp # tests/integration/test_jwt_auth/jwks_server/server.py # tests/integration/test_jwt_auth/test.py --- .../aspell-ignore/en/aspell-dict.txt | 6 + .../external-authenticators/tokens.md | 8 + src/Access/Authentication.cpp | 8 + src/Access/AuthenticationData.cpp | 1 - src/Access/Common/JWKSProvider.cpp | 18 +++ src/Access/ExternalAuthenticators.cpp | 61 +++++++ src/Access/ExternalAuthenticators.h | 4 + src/Access/TokenAccessStorage.cpp | 4 + src/Access/TokenProcessors.h | 20 +++ src/Access/TokenProcessorsJWT.cpp | 153 ++++++++++++++++++ src/Access/TokenProcessorsOpaque.cpp | 20 +++ src/Access/UsersConfigParser.cpp | 39 +++++ .../test_jwt_auth/jwks_server/server.py | 11 ++ tests/integration/test_jwt_auth/test.py | 20 +++ 14 files changed, 372 insertions(+), 1 deletion(-) diff --git a/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt b/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt index cafba00da5c2..fc1c00e4ee8b 100644 --- a/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt +++ b/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt @@ -653,7 +653,10 @@ JoinStrictness Jpan JumpConsistentHash Jupyter +<<<<<<< HEAD Jurc +======= +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) jwks JWKS KDevelop @@ -3807,7 +3810,10 @@ uuids uuidv vCPU validators +<<<<<<< HEAD vLLM +======= +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) varPop varPopStable varSamp diff --git a/docs/en/operations/external-authenticators/tokens.md b/docs/en/operations/external-authenticators/tokens.md index ed9ebee2bd09..acca18436e42 100644 --- a/docs/en/operations/external-authenticators/tokens.md +++ b/docs/en/operations/external-authenticators/tokens.md @@ -102,7 +102,11 @@ Only one of `static_jwks` or `static_jwks_file` keys must be present in one veri ::: :::note +<<<<<<< HEAD Only RS* family algorithms are supported! +======= +For JWKS-based validators (`jwt_static_jwks` and `jwt_dynamic_jwks`), RS* and ES* family algorithms are supported. +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) ::: ### JWT with remote JWKS @@ -212,7 +216,11 @@ Example (goes into `users.xml`): Here, the JWT payload must contain `["view-profile"]` on path `resource_access.account.roles`, otherwise authentication will not succeed even with a valid JWT. :::note +<<<<<<< HEAD If `claims` is defined, this user will not be able to authenticate using opaque tokens, so, only JWT-based authentication will be available. +======= +Per-user `claims` are enforced only when the token is a JWT (validated by a JWT processor such as `jwt_static_key` or `jwt_dynamic_jwks`). When the user authenticates with an opaque (access) token (e.g. via Azure, OpenID, or Google token processors), claims are not checked and authentication succeeds if the token is otherwise valid. +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) ::: ``` diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index a91d243dca2c..fd7e6d66a8cf 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -385,7 +385,15 @@ Authentication::CredentialsCheckResult Authentication::areCredentialsValid( if (authentication_method.getType() != AuthenticationType::JWT) return CredentialsCheckResult::Fail; +<<<<<<< HEAD return external_authenticators.checkTokenCredentials(*token_credentials) ? CredentialsCheckResult::Success : CredentialsCheckResult::Fail; +======= + return external_authenticators.checkTokenCredentials( + *token_credentials, + authentication_method.getTokenProcessorName(), + authentication_method.getJWTClaims()) ? + CredentialsCheckResult::Success : CredentialsCheckResult::Fail; +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) } if ([[maybe_unused]] const auto * always_allow_credentials = typeid_cast(&credentials)) diff --git a/src/Access/AuthenticationData.cpp b/src/Access/AuthenticationData.cpp index d88339ef6699..5434cc8712df 100644 --- a/src/Access/AuthenticationData.cpp +++ b/src/Access/AuthenticationData.cpp @@ -38,7 +38,6 @@ namespace CurrentMetrics extern const Metric BcryptCacheSize; } - namespace DB { diff --git a/src/Access/Common/JWKSProvider.cpp b/src/Access/Common/JWKSProvider.cpp index 6eb3cb571343..e799c74630e1 100644 --- a/src/Access/Common/JWKSProvider.cpp +++ b/src/Access/Common/JWKSProvider.cpp @@ -2,6 +2,10 @@ #if USE_JWT_CPP #include +<<<<<<< HEAD +======= +#include +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) #include #include #include @@ -20,11 +24,25 @@ namespace ErrorCodes JWKSType JWKSClient::getJWKS() { +<<<<<<< HEAD std::shared_lock lock(mutex); auto now = std::chrono::high_resolution_clock::now(); auto diff = std::chrono::duration(now - last_request_send).count(); +======= + { + std::shared_lock lock(mutex); + auto now = std::chrono::high_resolution_clock::now(); + auto diff = std::chrono::duration(now - last_request_send).count(); + if (diff < static_cast(refresh_timeout) && cached_jwks.has_value()) + return cached_jwks.value(); + } + + std::unique_lock lock(mutex); + auto now = std::chrono::high_resolution_clock::now(); + auto diff = std::chrono::duration(now - last_request_send).count(); +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) if (diff < static_cast(refresh_timeout) && cached_jwks.has_value()) return cached_jwks.value(); diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index f045165479b8..8a9188dc666f 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -626,15 +626,26 @@ bool ExternalAuthenticators::checkCredentialsAgainstProcessor(const ITokenProces cache_entry.external_roles = credentials.getGroups(); auto default_expiration_ts = std::chrono::system_clock::now() +<<<<<<< HEAD + std::chrono::minutes(processor.getTokenCacheLifetime()); +======= + + std::chrono::seconds(processor.getTokenCacheLifetime()); +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) if (credentials.getExpiresAt().has_value()) { if (credentials.getExpiresAt().value() < default_expiration_ts) cache_entry.expires_at = credentials.getExpiresAt().value(); else +<<<<<<< HEAD LOG_TRACE(getLogger("AccessTokenAuthentication"), "Attempt to authenticate user {} with expired access token by {}", credentials.getUserName(), processor.getProcessorName()); +======= + { + LOG_TRACE(getLogger("AccessTokenAuthentication"), "Token for user {} expires after default cache lifetime; using default TTL by {}", credentials.getUserName(), processor.getProcessorName()); + cache_entry.expires_at = default_expiration_ts; + } +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) } else { @@ -662,7 +673,11 @@ bool ExternalAuthenticators::checkCredentialsAgainstProcessor(const ITokenProces return false; } +<<<<<<< HEAD bool ExternalAuthenticators::checkTokenCredentials(const TokenCredentials & credentials, const String & processor_name) const +======= +bool ExternalAuthenticators::checkTokenCredentials(const TokenCredentials & credentials, const String & processor_name, const String & jwt_claims) const +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) { std::lock_guard lock{mutex}; @@ -672,15 +687,35 @@ bool ExternalAuthenticators::checkTokenCredentials(const TokenCredentials & cred if (token_processors.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Token authentication is not configured"); +<<<<<<< HEAD +======= + /// Per-user claims restriction applies only to JWT processors; opaque/access token processors ignore it. + auto check_claims_if_required = [&](const ITokenProcessor & processor) -> bool + { + if (jwt_claims.empty()) + return true; + if (!processor.supportsJwtClaimsRestriction()) + return true; + return processor.checkClaims(credentials, jwt_claims); + }; + +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) /// lookup token in local cache if not expired. auto cached_entry_iter = access_token_to_username_cache.find(credentials.getToken()); if (cached_entry_iter != access_token_to_username_cache.end()) { if (cached_entry_iter->second.expires_at <= std::chrono::system_clock::now()) // Token found in cache, but already outdated -- need to remove it. { +<<<<<<< HEAD LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} expired, removing", cached_entry_iter->second.user_name); access_token_to_username_cache.erase(cached_entry_iter); username_to_access_token_cache.erase(cached_entry_iter->second.user_name); +======= + const auto expired_user_name = cached_entry_iter->second.user_name; + LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} expired, removing", expired_user_name); + access_token_to_username_cache.erase(cached_entry_iter); + username_to_access_token_cache.erase(expired_user_name); +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) } else { @@ -688,12 +723,24 @@ bool ExternalAuthenticators::checkTokenCredentials(const TokenCredentials & cred const_cast(credentials).setUserName(user_data.user_name); const_cast(credentials).setGroups(user_data.external_roles); LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} found, using it to authenticate", cached_entry_iter->second.user_name); +<<<<<<< HEAD +======= + if (!jwt_claims.empty()) + { + if (processor_name.empty()) + return false; + const auto it = token_processors.find(processor_name); + if (it == token_processors.end() || !check_claims_if_required(*it->second)) + return false; + } +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) return true; } } if (processor_name.empty()) { +<<<<<<< HEAD for (const auto & it: token_processors) { if (checkCredentialsAgainstProcessor(*it.second, const_cast(credentials))) @@ -702,6 +749,20 @@ bool ExternalAuthenticators::checkTokenCredentials(const TokenCredentials & cred } else return token_processors.contains(processor_name) && checkCredentialsAgainstProcessor(*token_processors[processor_name], const_cast(credentials)); +======= + for (const auto & it : token_processors) + { + if (checkCredentialsAgainstProcessor(*it.second, const_cast(credentials))) + return check_claims_if_required(*it.second); + } + } + else + { + const auto it = token_processors.find(processor_name); + if (it != token_processors.end() && checkCredentialsAgainstProcessor(*it->second, const_cast(credentials))) + return check_claims_if_required(*it->second); + } +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) return false; } diff --git a/src/Access/ExternalAuthenticators.h b/src/Access/ExternalAuthenticators.h index 1601903c83a1..b8638e2e4342 100644 --- a/src/Access/ExternalAuthenticators.h +++ b/src/Access/ExternalAuthenticators.h @@ -49,7 +49,11 @@ class ExternalAuthenticators bool checkKerberosCredentials(const String & realm, const GSSAcceptorContext & credentials) const; bool checkHTTPBasicCredentials(const String & server, const BasicCredentials & credentials, const ClientInfo & client_info, SettingsChanges & settings) const; +<<<<<<< HEAD bool checkTokenCredentials(const TokenCredentials & credentials, const String & processor_name = "") const; +======= + bool checkTokenCredentials(const TokenCredentials & credentials, const String & processor_name = "", const String & jwt_claims = "") const; +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) GSSAcceptorContext::Params getKerberosParams() const; diff --git a/src/Access/TokenAccessStorage.cpp b/src/Access/TokenAccessStorage.cpp index 9ea1e0895675..5622039f3454 100644 --- a/src/Access/TokenAccessStorage.cpp +++ b/src/Access/TokenAccessStorage.cpp @@ -583,7 +583,11 @@ std::optional TokenAccessStorage::authenticateImpl( } if (id) +<<<<<<< HEAD return AuthResult{ .user_id = *id, .authentication_data = AuthenticationData(AuthenticationType::JWT), .user_name = credentials.getUserName() }; +======= + return AuthResult{ .user_id = *id, .authentication_data = AuthenticationData(AuthenticationType::JWT), .user_name = user->getName() }; +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) return std::nullopt; } diff --git a/src/Access/TokenProcessors.h b/src/Access/TokenProcessors.h index 1b94fbe790b4..afeeb6ba41ba 100644 --- a/src/Access/TokenProcessors.h +++ b/src/Access/TokenProcessors.h @@ -32,7 +32,14 @@ class ITokenProcessor throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for ITokenProcessor interface"); } +<<<<<<< HEAD virtual bool checkClaims(const TokenCredentials &, const String &) { return true; } +======= + virtual bool checkClaims(const TokenCredentials &, const String &) const { return true; } + + /// True only for JWT processors (static-key/JWKS). Opaque/access token processors do not use per-user claims. + virtual bool supportsJwtClaimsRestriction() const { return false; } +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) UInt64 getTokenCacheLifetime() const { return token_cache_lifetime; } String getProcessorName() const { return processor_name; } @@ -93,7 +100,12 @@ class StaticKeyJwtProcessor : public ITokenProcessor const StaticKeyJwtParams & params); bool resolveAndValidate(TokenCredentials & credentials) const override; +<<<<<<< HEAD bool checkClaims(const TokenCredentials & credentials, const String & claims_to_check) override; +======= + bool checkClaims(const TokenCredentials & credentials, const String & claims_to_check) const override; + bool supportsJwtClaimsRestriction() const override { return true; } +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) private: const String claims; @@ -144,7 +156,12 @@ class JwksJwtProcessor : public ITokenProcessor std::make_shared(jwks_uri_, jwks_cache_lifetime_)) {} bool resolveAndValidate(TokenCredentials & credentials) const override; +<<<<<<< HEAD bool checkClaims(const TokenCredentials & credentials, const String & claims_to_check) override; +======= + bool checkClaims(const TokenCredentials & credentials, const String & claims_to_check) const override; + bool supportsJwtClaimsRestriction() const override { return true; } +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) private: const String claims; @@ -213,8 +230,11 @@ class OpenIdTokenProcessor : public ITokenProcessor bool resolveAndValidate(TokenCredentials & credentials) const override; private: +<<<<<<< HEAD const String expected_issuer; const String expected_audience; +======= +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) Poco::URI userinfo_endpoint; Poco::URI token_introspection_endpoint; diff --git a/src/Access/TokenProcessorsJWT.cpp b/src/Access/TokenProcessorsJWT.cpp index e041e4329b24..52dcc5c41e14 100644 --- a/src/Access/TokenProcessorsJWT.cpp +++ b/src/Access/TokenProcessorsJWT.cpp @@ -4,6 +4,15 @@ #include #include #include +<<<<<<< HEAD +======= +#include +#include +#include +#include +#include +#include +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) namespace DB { @@ -156,6 +165,80 @@ bool check_claims(const String & claims, const picojson::value::object & payload return check_claims(json.get(), payload, ""); } +<<<<<<< HEAD +======= +std::string create_public_key_from_ec_components(const std::string & x, const std::string & y, int curve_nid) +{ + auto decode_base64url = [](const std::string & value) + { + return jwt::base::decode(jwt::base::pad(value)); + }; + + auto decoded_x = decode_base64url(x); + auto decoded_y = decode_base64url(y); + + size_t coordinate_size = 0; + if (curve_nid == NID_X9_62_prime256v1) + coordinate_size = 32; + else if (curve_nid == NID_secp384r1) + coordinate_size = 48; + else if (curve_nid == NID_secp521r1) + coordinate_size = 66; + else + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: unsupported EC curve"); + + if (decoded_x.size() > coordinate_size || decoded_y.size() > coordinate_size) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: invalid EC key coordinates length"); + + std::vector public_key_octets(1 + 2 * coordinate_size, 0); + public_key_octets[0] = 0x04; // Uncompressed point format. + std::memcpy(public_key_octets.data() + 1 + (coordinate_size - decoded_x.size()), decoded_x.data(), decoded_x.size()); + std::memcpy(public_key_octets.data() + 1 + coordinate_size + (coordinate_size - decoded_y.size()), decoded_y.data(), decoded_y.size()); + + const char * group_name = OBJ_nid2sn(curve_nid); + if (!group_name) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: unsupported EC curve"); + + std::unique_ptr params_bld(OSSL_PARAM_BLD_new(), OSSL_PARAM_BLD_free); + if (!params_bld) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: failed to allocate OpenSSL parameter builder"); + + if (OSSL_PARAM_BLD_push_utf8_string(params_bld.get(), OSSL_PKEY_PARAM_GROUP_NAME, group_name, 0) != 1) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: failed to set EC group parameter"); + + if (OSSL_PARAM_BLD_push_octet_string(params_bld.get(), OSSL_PKEY_PARAM_PUB_KEY, public_key_octets.data(), public_key_octets.size()) != 1) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: failed to set EC public key parameter"); + + std::unique_ptr params(OSSL_PARAM_BLD_to_param(params_bld.get()), OSSL_PARAM_free); + if (!params) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: failed to build OpenSSL parameters"); + + std::unique_ptr key_ctx(EVP_PKEY_CTX_new_from_name(nullptr, "EC", nullptr), EVP_PKEY_CTX_free); + if (!key_ctx) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: failed to create EVP key context"); + + if (EVP_PKEY_fromdata_init(key_ctx.get()) <= 0) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: failed to initialize EVP key import"); + + EVP_PKEY * raw_evp_key = nullptr; + if (EVP_PKEY_fromdata(key_ctx.get(), &raw_evp_key, EVP_PKEY_PUBLIC_KEY, params.get()) <= 0) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: failed to import EC public key"); + + std::unique_ptr evp_key(raw_evp_key, EVP_PKEY_free); + + std::unique_ptr bio(BIO_new(BIO_s_mem()), BIO_free); + if (!bio) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: failed to allocate BIO"); + + if (PEM_write_bio_PUBKEY(bio.get(), evp_key.get()) != 1) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: failed to encode EC public key"); + + char * data = nullptr; + auto len = BIO_get_mem_data(bio.get(), &data); + return std::string(data, len); +} + +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) } namespace @@ -285,12 +368,20 @@ bool checkUserClaims(const TokenCredentials & credentials, const String & claims } } +<<<<<<< HEAD bool StaticKeyJwtProcessor::checkClaims(const TokenCredentials & credentials, const String & claims_to_check) +======= +bool StaticKeyJwtProcessor::checkClaims(const TokenCredentials & credentials, const String & claims_to_check) const +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) { return checkUserClaims(credentials, claims_to_check); } +<<<<<<< HEAD bool JwksJwtProcessor::checkClaims(const TokenCredentials & credentials, const String & claims_to_check) +======= +bool JwksJwtProcessor::checkClaims(const TokenCredentials & credentials, const String & claims_to_check) const +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) { return checkUserClaims(credentials, claims_to_check); } @@ -392,12 +483,65 @@ bool JwksJwtProcessor::resolveAndValidate(TokenCredentials & credentials) const if (public_key.empty()) { +<<<<<<< HEAD if (!(jwk.has_jwk_claim("n") && jwk.has_jwk_claim("e"))) throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "{}: invalid JWK: 'n' or 'e' not found", processor_name); LOG_TRACE(getLogger("TokenAuthentication"), "{}: `issuer` or `x5c` not present, verifying {} with RSA components", processor_name, username); const auto modulus = jwk.get_jwk_claim("n").as_string(); const auto exponent = jwk.get_jwk_claim("e").as_string(); public_key = jwt::helper::create_public_key_from_rsa_components(modulus, exponent); +======= + const auto key_type = jwk.get_key_type(); + if (key_type == "EC") + { + if (!(jwk.has_jwk_claim("x") && jwk.has_jwk_claim("y"))) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "{}: invalid JWK: missing 'x'/'y' claims for EC key type", processor_name); + + int curve_nid = NID_undef; + std::optional expected_crv; + if (algo == "es256") + { + curve_nid = NID_X9_62_prime256v1; + expected_crv = "P-256"; + } + else if (algo == "es384") + { + curve_nid = NID_secp384r1; + expected_crv = "P-384"; + } + else if (algo == "es512") + { + curve_nid = NID_secp521r1; + expected_crv = "P-521"; + } + + if (curve_nid == NID_undef) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: unknown algorithm {}", algo); + + if (jwk.has_jwk_claim("crv")) + { + const auto crv = jwk.get_jwk_claim("crv").as_string(); + if (expected_crv.has_value() && crv != expected_crv.value()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: `crv` in JWK does not match JWT algorithm"); + } + + LOG_TRACE(getLogger("TokenAuthentication"), "{}: `x5c` not present, verifying {} with EC components", processor_name, username); + const auto x = jwk.get_jwk_claim("x").as_string(); + const auto y = jwk.get_jwk_claim("y").as_string(); + public_key = create_public_key_from_ec_components(x, y, curve_nid); + } + else if (key_type == "RSA") + { + if (!(jwk.has_jwk_claim("n") && jwk.has_jwk_claim("e"))) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "{}: invalid JWK: missing 'n'/'e' claims for RSA key type", processor_name); + LOG_TRACE(getLogger("TokenAuthentication"), "{}: `issuer` or `x5c` not present, verifying {} with RSA components", processor_name, username); + const auto modulus = jwk.get_jwk_claim("n").as_string(); + const auto exponent = jwk.get_jwk_claim("e").as_string(); + public_key = jwt::helper::create_public_key_from_rsa_components(modulus, exponent); + } + else + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "{}: invalid JWK key type '{}'", processor_name, key_type); +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) } if (jwk.has_algorithm() && Poco::toLower(jwk.get_algorithm()) != algo) @@ -409,6 +553,15 @@ bool JwksJwtProcessor::resolveAndValidate(TokenCredentials & credentials) const verifier = verifier.allow_algorithm(jwt::algorithm::rs384(public_key, "", "", "")); else if (algo == "rs512") verifier = verifier.allow_algorithm(jwt::algorithm::rs512(public_key, "", "", "")); +<<<<<<< HEAD +======= + else if (algo == "es256") + verifier = verifier.allow_algorithm(jwt::algorithm::es256(public_key, "", "", "")); + else if (algo == "es384") + verifier = verifier.allow_algorithm(jwt::algorithm::es384(public_key, "", "", "")); + else if (algo == "es512") + verifier = verifier.allow_algorithm(jwt::algorithm::es512(public_key, "", "", "")); +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) else throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: unknown algorithm {}", algo); diff --git a/src/Access/TokenProcessorsOpaque.cpp b/src/Access/TokenProcessorsOpaque.cpp index f6397b143884..cd0b4219927c 100644 --- a/src/Access/TokenProcessorsOpaque.cpp +++ b/src/Access/TokenProcessorsOpaque.cpp @@ -7,9 +7,12 @@ #include #include +<<<<<<< HEAD #include #include +======= +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) namespace DB { namespace ErrorCodes @@ -117,6 +120,7 @@ bool GoogleTokenProcessor::resolveAndValidate(TokenCredentials & credentials) co auto token_info = getObjectFromURI(Poco::URI("https://www.googleapis.com/oauth2/v3/tokeninfo"), token); if (token_info.contains("exp")) +<<<<<<< HEAD { /// picojson stores all numerics as double; we need to validate the /// value is a finite, positive Unix timestamp that fits in time_t @@ -130,6 +134,9 @@ bool GoogleTokenProcessor::resolveAndValidate(TokenCredentials & credentials) co processor_name, exp); credentials.setExpiresAt(std::chrono::system_clock::from_time_t(static_cast(exp))); } +======= + credentials.setExpiresAt(std::chrono::system_clock::from_time_t(static_cast(getValueByKey(token_info, "exp").value()))); +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) /// Groups info can only be retrieved if user email is known. /// If no email found in user info, we skip this step and there are no external roles for the user. @@ -280,7 +287,10 @@ OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, const String & jwks_uri_, UInt64 jwks_cache_lifetime_) : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), +<<<<<<< HEAD expected_issuer(expected_issuer_), expected_audience(expected_audience_), +======= +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) userinfo_endpoint(userinfo_endpoint_), token_introspection_endpoint(token_introspection_endpoint_) { if (!jwks_uri_.empty()) @@ -310,14 +320,24 @@ OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, const String & openid_config_endpoint_, UInt64 verifier_leeway_, UInt64 jwks_cache_lifetime_) +<<<<<<< HEAD : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), expected_issuer(expected_issuer_), expected_audience(expected_audience_) +======= + : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_) +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) { const picojson::object openid_config = getObjectFromURI(Poco::URI(openid_config_endpoint_)); if (!openid_config.contains("userinfo_endpoint") || !openid_config.contains("introspection_endpoint")) throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "{}: Cannot extract userinfo_endpoint or introspection_endpoint from OIDC configuration, consider manual configuration.", processor_name); +<<<<<<< HEAD +======= + userinfo_endpoint = Poco::URI(getValueByKey(openid_config, "userinfo_endpoint").value()); + token_introspection_endpoint = Poco::URI(getValueByKey(openid_config, "introspection_endpoint").value()); + +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) if (openid_config.contains("jwks_uri")) { LOG_TRACE(getLogger("TokenAuthentication"), "{}: JWKS URI set, local JWT processing will be attempted", processor_name_); diff --git a/src/Access/UsersConfigParser.cpp b/src/Access/UsersConfigParser.cpp index e0d1a021a966..6179cb9b10d8 100644 --- a/src/Access/UsersConfigParser.cpp +++ b/src/Access/UsersConfigParser.cpp @@ -155,8 +155,19 @@ namespace bool has_no_password = config.has(auth_method_path + ".no_password"); +<<<<<<< HEAD const auto password_plaintext_config = auth_method_path + ".password"; bool has_password_plaintext = config.has(password_plaintext_config); +======= + bool has_no_password = config.has(user_config + ".no_password"); + bool has_password_plaintext = config.has(user_config + ".password"); + bool has_password_sha256_hex = config.has(user_config + ".password_sha256_hex"); + bool has_scram_password_sha256_hex = config.has(user_config + ".password_scram_sha256_hex"); + bool has_password_double_sha1_hex = config.has(user_config + ".password_double_sha1_hex"); + bool has_ldap = config.has(user_config + ".ldap"); + bool has_kerberos = config.has(user_config + ".kerberos"); + bool has_jwt = config.has(user_config + ".jwt"); +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) const auto password_sha256_hex_config = auth_method_path + ".password_sha256_hex"; bool has_password_sha256_hex = config.has(password_sha256_hex_config); @@ -182,6 +193,7 @@ namespace const auto http_auth_config = auth_method_path + ".http_authentication"; bool has_http_auth = config.has(http_auth_config); +<<<<<<< HEAD bool has_jwt = config.has(auth_method_path + ".jwt"); size_t num_authentication_types = has_no_password + has_password_plaintext + has_password_sha256_hex + has_password_double_sha1_hex @@ -191,6 +203,21 @@ namespace throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot specify multiple authentication methods for user {} at {}. " "Specify only one authentication method.", user_name, auth_method_path); +======= + size_t num_password_fields = has_no_password + has_password_plaintext + has_password_sha256_hex + has_password_double_sha1_hex + + has_ldap + has_kerberos + has_certificates + has_ssh_keys + has_http_auth + has_scram_password_sha256_hex + has_jwt; + + if (num_password_fields > 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "More than one field of 'password', 'password_sha256_hex', " + "'password_double_sha1_hex', 'no_password', 'ldap', 'kerberos', 'ssl_certificates', 'ssh_keys', " + "'http_authentication', 'jwt' are used to specify authentication info for user {}. " + "Must be only one of them.", user_name); + + if (num_password_fields < 1) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Either 'password' or 'password_sha256_hex' " + "or 'password_double_sha1_hex' or 'no_password' or 'ldap' or 'kerberos " + "or 'ssl_certificates' or 'ssh_keys' or 'http_authentication' or 'jwt' must be specified for user {}.", user_name); +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) if (num_authentication_types < 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "At least one authentication type (one of 'password', " @@ -354,6 +381,18 @@ namespace } } else if (has_jwt) +<<<<<<< HEAD +======= + { + user->authentication_methods.emplace_back(AuthenticationType::JWT); + const auto jwt_config = user_config + ".jwt"; + if (config.has(jwt_config + ".processor")) + user->authentication_methods.back().setTokenProcessorName(config.getString(jwt_config + ".processor")); + if (config.has(jwt_config + ".claims")) + user->authentication_methods.back().setJWTClaims(config.getString(jwt_config + ".claims")); + } + else +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) { auth_data = AuthenticationData(AuthenticationType::JWT); } diff --git a/tests/integration/test_jwt_auth/jwks_server/server.py b/tests/integration/test_jwt_auth/jwks_server/server.py index 96e07f02335e..7ed0c31aaf3d 100644 --- a/tests/integration/test_jwt_auth/jwks_server/server.py +++ b/tests/integration/test_jwt_auth/jwks_server/server.py @@ -16,6 +16,17 @@ def server(): "kaRv8XJbra0IeIINmKv0F4--ww8ZxXTR6cvI-MsArUiAPwzf7s5dMR4DNRG6YNTrPA0pTOqQE9sRPd62XsfU08plYm27naOUZ" "O5avIPl1YO5I6Gi4kPdTvv3WFIy-QvoKoPhPCaD6EbdBpe8BbTQ", "e": "AQAB"}, +<<<<<<< HEAD +======= + { + "kty": "EC", + "alg": "ES384", + "kid": "ecmykid", + "crv": "P-384", + "x": "ewdB5ypKwp641N5cYmKJvTiwWLIc_IJduJwur2mit1SgQpPZdUwpDV3aNIAmry4Y", + "y": "Jajx21k25o2K-ik86kaaawu6O84awaSmvSirJn8WCeEuotu3O-4Gn-ryOMuDsH76", + }, +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) ] } response.status = 200 diff --git a/tests/integration/test_jwt_auth/test.py b/tests/integration/test_jwt_auth/test.py index 14d42ae08bde..0dd2add37b08 100644 --- a/tests/integration/test_jwt_auth/test.py +++ b/tests/integration/test_jwt_auth/test.py @@ -80,3 +80,23 @@ def test_jwks_server(started_cluster): ] ) assert res == "jwt_user\n" +<<<<<<< HEAD +======= + + +def test_jwks_server_ec_es384(started_cluster): + res = client.exec_in_container( + [ + "bash", + "-c", + curl_with_jwt( + token="eyJhbGciOiJFUzM4NCIsImtpZCI6ImVjbXlraWQiLCJ0eXAiOiJKV1QifQ." + "eyJzdWIiOiJqd3RfdXNlciIsImlzcyI6InRlc3RfaXNzIn0." + "3iGUcKfc07oLN4XmBA6BJSGSfu7cBsdQ6KAFh1sV64rWYkVL5VzYlAskHaWZ4R9hR3QK0Bv0EPjia8Vo-xdN9jS7-fVB7RF0" + "rGvbTOIuxE-yDumCyji3MYoLpcbOVasU", + ip=cluster.get_instance_ip(instance.name), + ), + ] + ) + assert res == "jwt_user\n" +>>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) From f7f5f63cea05da40cd52295bb7ce9fc2a5cae1b3 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov <32552679+zvonand@users.noreply.github.com> Date: Sun, 7 Jun 2026 18:02:44 +0200 Subject: [PATCH 06/12] Resolve conflicts in cherry-pick of #1658 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adapted `ExternalAuthenticators::checkTokenCredentials` call in `Authentication.cpp` to include the new `processor_name` and `jwt_claims` parameters added by the source PR. Adapted `UsersConfigParser.cpp` JWT handling block: antalya-26.4 refactored from direct `user->authentication_methods.emplace_back` mutation to the `auth_data` return style and `auth_method_path` config prefix. The source PR's new processor_name and claims config reading was translated accordingly (`user->authentication_methods.back().setXxx` → `auth_data.setXxx`, `user_config + ".jwt"` → `auth_method_path + ".jwt"`). Source-PR: #1658 (https://github.com/Altinity/ClickHouse/pull/1658) --- .../aspell-ignore/en/aspell-dict.txt | 6 --- .../external-authenticators/tokens.md | 8 ---- src/Access/Authentication.cpp | 4 -- src/Access/Common/JWKSProvider.cpp | 11 ----- src/Access/ExternalAuthenticators.cpp | 37 +--------------- src/Access/ExternalAuthenticators.h | 4 -- src/Access/TokenAccessStorage.cpp | 4 -- src/Access/TokenProcessors.h | 17 -------- src/Access/TokenProcessorsJWT.cpp | 26 ------------ src/Access/TokenProcessorsOpaque.cpp | 34 --------------- src/Access/UsersConfigParser.cpp | 42 ++----------------- .../test_jwt_auth/jwks_server/server.py | 3 -- tests/integration/test_jwt_auth/test.py | 3 -- 13 files changed, 5 insertions(+), 194 deletions(-) diff --git a/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt b/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt index fc1c00e4ee8b..cafba00da5c2 100644 --- a/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt +++ b/ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt @@ -653,10 +653,7 @@ JoinStrictness Jpan JumpConsistentHash Jupyter -<<<<<<< HEAD Jurc -======= ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) jwks JWKS KDevelop @@ -3810,10 +3807,7 @@ uuids uuidv vCPU validators -<<<<<<< HEAD vLLM -======= ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) varPop varPopStable varSamp diff --git a/docs/en/operations/external-authenticators/tokens.md b/docs/en/operations/external-authenticators/tokens.md index acca18436e42..74c02a56900b 100644 --- a/docs/en/operations/external-authenticators/tokens.md +++ b/docs/en/operations/external-authenticators/tokens.md @@ -102,11 +102,7 @@ Only one of `static_jwks` or `static_jwks_file` keys must be present in one veri ::: :::note -<<<<<<< HEAD -Only RS* family algorithms are supported! -======= For JWKS-based validators (`jwt_static_jwks` and `jwt_dynamic_jwks`), RS* and ES* family algorithms are supported. ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) ::: ### JWT with remote JWKS @@ -216,11 +212,7 @@ Example (goes into `users.xml`): Here, the JWT payload must contain `["view-profile"]` on path `resource_access.account.roles`, otherwise authentication will not succeed even with a valid JWT. :::note -<<<<<<< HEAD -If `claims` is defined, this user will not be able to authenticate using opaque tokens, so, only JWT-based authentication will be available. -======= Per-user `claims` are enforced only when the token is a JWT (validated by a JWT processor such as `jwt_static_key` or `jwt_dynamic_jwks`). When the user authenticates with an opaque (access) token (e.g. via Azure, OpenID, or Google token processors), claims are not checked and authentication succeeds if the token is otherwise valid. ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) ::: ``` diff --git a/src/Access/Authentication.cpp b/src/Access/Authentication.cpp index fd7e6d66a8cf..786744df45ed 100644 --- a/src/Access/Authentication.cpp +++ b/src/Access/Authentication.cpp @@ -385,15 +385,11 @@ Authentication::CredentialsCheckResult Authentication::areCredentialsValid( if (authentication_method.getType() != AuthenticationType::JWT) return CredentialsCheckResult::Fail; -<<<<<<< HEAD - return external_authenticators.checkTokenCredentials(*token_credentials) ? CredentialsCheckResult::Success : CredentialsCheckResult::Fail; -======= return external_authenticators.checkTokenCredentials( *token_credentials, authentication_method.getTokenProcessorName(), authentication_method.getJWTClaims()) ? CredentialsCheckResult::Success : CredentialsCheckResult::Fail; ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) } if ([[maybe_unused]] const auto * always_allow_credentials = typeid_cast(&credentials)) diff --git a/src/Access/Common/JWKSProvider.cpp b/src/Access/Common/JWKSProvider.cpp index e799c74630e1..16c0dbba2423 100644 --- a/src/Access/Common/JWKSProvider.cpp +++ b/src/Access/Common/JWKSProvider.cpp @@ -2,10 +2,7 @@ #if USE_JWT_CPP #include -<<<<<<< HEAD -======= #include ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) #include #include #include @@ -24,13 +21,6 @@ namespace ErrorCodes JWKSType JWKSClient::getJWKS() { -<<<<<<< HEAD - std::shared_lock lock(mutex); - - auto now = std::chrono::high_resolution_clock::now(); - auto diff = std::chrono::duration(now - last_request_send).count(); - -======= { std::shared_lock lock(mutex); auto now = std::chrono::high_resolution_clock::now(); @@ -42,7 +32,6 @@ JWKSType JWKSClient::getJWKS() std::unique_lock lock(mutex); auto now = std::chrono::high_resolution_clock::now(); auto diff = std::chrono::duration(now - last_request_send).count(); ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) if (diff < static_cast(refresh_timeout) && cached_jwks.has_value()) return cached_jwks.value(); diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index 8a9188dc666f..e2df9693b76a 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -626,26 +626,17 @@ bool ExternalAuthenticators::checkCredentialsAgainstProcessor(const ITokenProces cache_entry.external_roles = credentials.getGroups(); auto default_expiration_ts = std::chrono::system_clock::now() -<<<<<<< HEAD - + std::chrono::minutes(processor.getTokenCacheLifetime()); -======= + std::chrono::seconds(processor.getTokenCacheLifetime()); ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) if (credentials.getExpiresAt().has_value()) { if (credentials.getExpiresAt().value() < default_expiration_ts) cache_entry.expires_at = credentials.getExpiresAt().value(); else -<<<<<<< HEAD - LOG_TRACE(getLogger("AccessTokenAuthentication"), "Attempt to authenticate user {} with expired access token by {}", credentials.getUserName(), processor.getProcessorName()); - -======= { LOG_TRACE(getLogger("AccessTokenAuthentication"), "Token for user {} expires after default cache lifetime; using default TTL by {}", credentials.getUserName(), processor.getProcessorName()); cache_entry.expires_at = default_expiration_ts; } ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) } else { @@ -673,11 +664,7 @@ bool ExternalAuthenticators::checkCredentialsAgainstProcessor(const ITokenProces return false; } -<<<<<<< HEAD -bool ExternalAuthenticators::checkTokenCredentials(const TokenCredentials & credentials, const String & processor_name) const -======= bool ExternalAuthenticators::checkTokenCredentials(const TokenCredentials & credentials, const String & processor_name, const String & jwt_claims) const ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) { std::lock_guard lock{mutex}; @@ -687,8 +674,6 @@ bool ExternalAuthenticators::checkTokenCredentials(const TokenCredentials & cred if (token_processors.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "Token authentication is not configured"); -<<<<<<< HEAD -======= /// Per-user claims restriction applies only to JWT processors; opaque/access token processors ignore it. auto check_claims_if_required = [&](const ITokenProcessor & processor) -> bool { @@ -699,23 +684,17 @@ bool ExternalAuthenticators::checkTokenCredentials(const TokenCredentials & cred return processor.checkClaims(credentials, jwt_claims); }; ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) + /// lookup token in local cache if not expired. auto cached_entry_iter = access_token_to_username_cache.find(credentials.getToken()); if (cached_entry_iter != access_token_to_username_cache.end()) { if (cached_entry_iter->second.expires_at <= std::chrono::system_clock::now()) // Token found in cache, but already outdated -- need to remove it. { -<<<<<<< HEAD - LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} expired, removing", cached_entry_iter->second.user_name); - access_token_to_username_cache.erase(cached_entry_iter); - username_to_access_token_cache.erase(cached_entry_iter->second.user_name); -======= const auto expired_user_name = cached_entry_iter->second.user_name; LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} expired, removing", expired_user_name); access_token_to_username_cache.erase(cached_entry_iter); username_to_access_token_cache.erase(expired_user_name); ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) } else { @@ -723,8 +702,6 @@ bool ExternalAuthenticators::checkTokenCredentials(const TokenCredentials & cred const_cast(credentials).setUserName(user_data.user_name); const_cast(credentials).setGroups(user_data.external_roles); LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} found, using it to authenticate", cached_entry_iter->second.user_name); -<<<<<<< HEAD -======= if (!jwt_claims.empty()) { if (processor_name.empty()) @@ -733,23 +710,12 @@ bool ExternalAuthenticators::checkTokenCredentials(const TokenCredentials & cred if (it == token_processors.end() || !check_claims_if_required(*it->second)) return false; } ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) return true; } } if (processor_name.empty()) { -<<<<<<< HEAD - for (const auto & it: token_processors) - { - if (checkCredentialsAgainstProcessor(*it.second, const_cast(credentials))) - return true; - } - } - else - return token_processors.contains(processor_name) && checkCredentialsAgainstProcessor(*token_processors[processor_name], const_cast(credentials)); -======= for (const auto & it : token_processors) { if (checkCredentialsAgainstProcessor(*it.second, const_cast(credentials))) @@ -762,7 +728,6 @@ bool ExternalAuthenticators::checkTokenCredentials(const TokenCredentials & cred if (it != token_processors.end() && checkCredentialsAgainstProcessor(*it->second, const_cast(credentials))) return check_claims_if_required(*it->second); } ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) return false; } diff --git a/src/Access/ExternalAuthenticators.h b/src/Access/ExternalAuthenticators.h index b8638e2e4342..1c539fa1917c 100644 --- a/src/Access/ExternalAuthenticators.h +++ b/src/Access/ExternalAuthenticators.h @@ -49,11 +49,7 @@ class ExternalAuthenticators bool checkKerberosCredentials(const String & realm, const GSSAcceptorContext & credentials) const; bool checkHTTPBasicCredentials(const String & server, const BasicCredentials & credentials, const ClientInfo & client_info, SettingsChanges & settings) const; -<<<<<<< HEAD - bool checkTokenCredentials(const TokenCredentials & credentials, const String & processor_name = "") const; -======= bool checkTokenCredentials(const TokenCredentials & credentials, const String & processor_name = "", const String & jwt_claims = "") const; ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) GSSAcceptorContext::Params getKerberosParams() const; diff --git a/src/Access/TokenAccessStorage.cpp b/src/Access/TokenAccessStorage.cpp index 5622039f3454..fed2e86e1e0a 100644 --- a/src/Access/TokenAccessStorage.cpp +++ b/src/Access/TokenAccessStorage.cpp @@ -583,11 +583,7 @@ std::optional TokenAccessStorage::authenticateImpl( } if (id) -<<<<<<< HEAD - return AuthResult{ .user_id = *id, .authentication_data = AuthenticationData(AuthenticationType::JWT), .user_name = credentials.getUserName() }; -======= return AuthResult{ .user_id = *id, .authentication_data = AuthenticationData(AuthenticationType::JWT), .user_name = user->getName() }; ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) return std::nullopt; } diff --git a/src/Access/TokenProcessors.h b/src/Access/TokenProcessors.h index afeeb6ba41ba..f33f0300662d 100644 --- a/src/Access/TokenProcessors.h +++ b/src/Access/TokenProcessors.h @@ -32,14 +32,10 @@ class ITokenProcessor throw Exception(ErrorCodes::NOT_IMPLEMENTED, "Not implemented for ITokenProcessor interface"); } -<<<<<<< HEAD - virtual bool checkClaims(const TokenCredentials &, const String &) { return true; } -======= virtual bool checkClaims(const TokenCredentials &, const String &) const { return true; } /// True only for JWT processors (static-key/JWKS). Opaque/access token processors do not use per-user claims. virtual bool supportsJwtClaimsRestriction() const { return false; } ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) UInt64 getTokenCacheLifetime() const { return token_cache_lifetime; } String getProcessorName() const { return processor_name; } @@ -100,12 +96,8 @@ class StaticKeyJwtProcessor : public ITokenProcessor const StaticKeyJwtParams & params); bool resolveAndValidate(TokenCredentials & credentials) const override; -<<<<<<< HEAD - bool checkClaims(const TokenCredentials & credentials, const String & claims_to_check) override; -======= bool checkClaims(const TokenCredentials & credentials, const String & claims_to_check) const override; bool supportsJwtClaimsRestriction() const override { return true; } ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) private: const String claims; @@ -156,12 +148,8 @@ class JwksJwtProcessor : public ITokenProcessor std::make_shared(jwks_uri_, jwks_cache_lifetime_)) {} bool resolveAndValidate(TokenCredentials & credentials) const override; -<<<<<<< HEAD - bool checkClaims(const TokenCredentials & credentials, const String & claims_to_check) override; -======= bool checkClaims(const TokenCredentials & credentials, const String & claims_to_check) const override; bool supportsJwtClaimsRestriction() const override { return true; } ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) private: const String claims; @@ -230,11 +218,6 @@ class OpenIdTokenProcessor : public ITokenProcessor bool resolveAndValidate(TokenCredentials & credentials) const override; private: -<<<<<<< HEAD - const String expected_issuer; - const String expected_audience; -======= ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) Poco::URI userinfo_endpoint; Poco::URI token_introspection_endpoint; diff --git a/src/Access/TokenProcessorsJWT.cpp b/src/Access/TokenProcessorsJWT.cpp index 52dcc5c41e14..182556f24b6a 100644 --- a/src/Access/TokenProcessorsJWT.cpp +++ b/src/Access/TokenProcessorsJWT.cpp @@ -4,15 +4,12 @@ #include #include #include -<<<<<<< HEAD -======= #include #include #include #include #include #include ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) namespace DB { @@ -165,8 +162,6 @@ bool check_claims(const String & claims, const picojson::value::object & payload return check_claims(json.get(), payload, ""); } -<<<<<<< HEAD -======= std::string create_public_key_from_ec_components(const std::string & x, const std::string & y, int curve_nid) { auto decode_base64url = [](const std::string & value) @@ -238,7 +233,6 @@ std::string create_public_key_from_ec_components(const std::string & x, const st return std::string(data, len); } ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) } namespace @@ -368,20 +362,12 @@ bool checkUserClaims(const TokenCredentials & credentials, const String & claims } } -<<<<<<< HEAD -bool StaticKeyJwtProcessor::checkClaims(const TokenCredentials & credentials, const String & claims_to_check) -======= bool StaticKeyJwtProcessor::checkClaims(const TokenCredentials & credentials, const String & claims_to_check) const ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) { return checkUserClaims(credentials, claims_to_check); } -<<<<<<< HEAD -bool JwksJwtProcessor::checkClaims(const TokenCredentials & credentials, const String & claims_to_check) -======= bool JwksJwtProcessor::checkClaims(const TokenCredentials & credentials, const String & claims_to_check) const ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) { return checkUserClaims(credentials, claims_to_check); } @@ -483,14 +469,6 @@ bool JwksJwtProcessor::resolveAndValidate(TokenCredentials & credentials) const if (public_key.empty()) { -<<<<<<< HEAD - if (!(jwk.has_jwk_claim("n") && jwk.has_jwk_claim("e"))) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "{}: invalid JWK: 'n' or 'e' not found", processor_name); - LOG_TRACE(getLogger("TokenAuthentication"), "{}: `issuer` or `x5c` not present, verifying {} with RSA components", processor_name, username); - const auto modulus = jwk.get_jwk_claim("n").as_string(); - const auto exponent = jwk.get_jwk_claim("e").as_string(); - public_key = jwt::helper::create_public_key_from_rsa_components(modulus, exponent); -======= const auto key_type = jwk.get_key_type(); if (key_type == "EC") { @@ -541,7 +519,6 @@ bool JwksJwtProcessor::resolveAndValidate(TokenCredentials & credentials) const } else throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "{}: invalid JWK key type '{}'", processor_name, key_type); ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) } if (jwk.has_algorithm() && Poco::toLower(jwk.get_algorithm()) != algo) @@ -553,15 +530,12 @@ bool JwksJwtProcessor::resolveAndValidate(TokenCredentials & credentials) const verifier = verifier.allow_algorithm(jwt::algorithm::rs384(public_key, "", "", "")); else if (algo == "rs512") verifier = verifier.allow_algorithm(jwt::algorithm::rs512(public_key, "", "", "")); -<<<<<<< HEAD -======= else if (algo == "es256") verifier = verifier.allow_algorithm(jwt::algorithm::es256(public_key, "", "", "")); else if (algo == "es384") verifier = verifier.allow_algorithm(jwt::algorithm::es384(public_key, "", "", "")); else if (algo == "es512") verifier = verifier.allow_algorithm(jwt::algorithm::es512(public_key, "", "", "")); ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) else throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: unknown algorithm {}", algo); diff --git a/src/Access/TokenProcessorsOpaque.cpp b/src/Access/TokenProcessorsOpaque.cpp index cd0b4219927c..b56113061fcb 100644 --- a/src/Access/TokenProcessorsOpaque.cpp +++ b/src/Access/TokenProcessorsOpaque.cpp @@ -7,12 +7,6 @@ #include #include -<<<<<<< HEAD -#include -#include - -======= ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) namespace DB { namespace ErrorCodes @@ -120,23 +114,7 @@ bool GoogleTokenProcessor::resolveAndValidate(TokenCredentials & credentials) co auto token_info = getObjectFromURI(Poco::URI("https://www.googleapis.com/oauth2/v3/tokeninfo"), token); if (token_info.contains("exp")) -<<<<<<< HEAD - { - /// picojson stores all numerics as double; we need to validate the - /// value is a finite, positive Unix timestamp that fits in time_t - /// before casting. - const double exp = getValueByKey(token_info, "exp").value(); - if (!std::isfinite(exp) || exp <= 0.0 - || exp > static_cast(std::numeric_limits::max())) - throw Exception( - ErrorCodes::AUTHENTICATION_FAILED, - "{}: tokeninfo response contains an out-of-range 'exp' value: {}", - processor_name, exp); - credentials.setExpiresAt(std::chrono::system_clock::from_time_t(static_cast(exp))); - } -======= credentials.setExpiresAt(std::chrono::system_clock::from_time_t(static_cast(getValueByKey(token_info, "exp").value()))); ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) /// Groups info can only be retrieved if user email is known. /// If no email found in user info, we skip this step and there are no external roles for the user. @@ -287,10 +265,6 @@ OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, const String & jwks_uri_, UInt64 jwks_cache_lifetime_) : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), -<<<<<<< HEAD - expected_issuer(expected_issuer_), expected_audience(expected_audience_), -======= ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) userinfo_endpoint(userinfo_endpoint_), token_introspection_endpoint(token_introspection_endpoint_) { if (!jwks_uri_.empty()) @@ -320,24 +294,16 @@ OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, const String & openid_config_endpoint_, UInt64 verifier_leeway_, UInt64 jwks_cache_lifetime_) -<<<<<<< HEAD - : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), - expected_issuer(expected_issuer_), expected_audience(expected_audience_) -======= : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_) ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) { const picojson::object openid_config = getObjectFromURI(Poco::URI(openid_config_endpoint_)); if (!openid_config.contains("userinfo_endpoint") || !openid_config.contains("introspection_endpoint")) throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "{}: Cannot extract userinfo_endpoint or introspection_endpoint from OIDC configuration, consider manual configuration.", processor_name); -<<<<<<< HEAD -======= userinfo_endpoint = Poco::URI(getValueByKey(openid_config, "userinfo_endpoint").value()); token_introspection_endpoint = Poco::URI(getValueByKey(openid_config, "introspection_endpoint").value()); ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) if (openid_config.contains("jwks_uri")) { LOG_TRACE(getLogger("TokenAuthentication"), "{}: JWKS URI set, local JWT processing will be attempted", processor_name_); diff --git a/src/Access/UsersConfigParser.cpp b/src/Access/UsersConfigParser.cpp index 6179cb9b10d8..e9ab1485f9ed 100644 --- a/src/Access/UsersConfigParser.cpp +++ b/src/Access/UsersConfigParser.cpp @@ -155,19 +155,8 @@ namespace bool has_no_password = config.has(auth_method_path + ".no_password"); -<<<<<<< HEAD const auto password_plaintext_config = auth_method_path + ".password"; bool has_password_plaintext = config.has(password_plaintext_config); -======= - bool has_no_password = config.has(user_config + ".no_password"); - bool has_password_plaintext = config.has(user_config + ".password"); - bool has_password_sha256_hex = config.has(user_config + ".password_sha256_hex"); - bool has_scram_password_sha256_hex = config.has(user_config + ".password_scram_sha256_hex"); - bool has_password_double_sha1_hex = config.has(user_config + ".password_double_sha1_hex"); - bool has_ldap = config.has(user_config + ".ldap"); - bool has_kerberos = config.has(user_config + ".kerberos"); - bool has_jwt = config.has(user_config + ".jwt"); ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) const auto password_sha256_hex_config = auth_method_path + ".password_sha256_hex"; bool has_password_sha256_hex = config.has(password_sha256_hex_config); @@ -193,7 +182,6 @@ namespace const auto http_auth_config = auth_method_path + ".http_authentication"; bool has_http_auth = config.has(http_auth_config); -<<<<<<< HEAD bool has_jwt = config.has(auth_method_path + ".jwt"); size_t num_authentication_types = has_no_password + has_password_plaintext + has_password_sha256_hex + has_password_double_sha1_hex @@ -203,21 +191,6 @@ namespace throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot specify multiple authentication methods for user {} at {}. " "Specify only one authentication method.", user_name, auth_method_path); -======= - size_t num_password_fields = has_no_password + has_password_plaintext + has_password_sha256_hex + has_password_double_sha1_hex - + has_ldap + has_kerberos + has_certificates + has_ssh_keys + has_http_auth + has_scram_password_sha256_hex + has_jwt; - - if (num_password_fields > 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "More than one field of 'password', 'password_sha256_hex', " - "'password_double_sha1_hex', 'no_password', 'ldap', 'kerberos', 'ssl_certificates', 'ssh_keys', " - "'http_authentication', 'jwt' are used to specify authentication info for user {}. " - "Must be only one of them.", user_name); - - if (num_password_fields < 1) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Either 'password' or 'password_sha256_hex' " - "or 'password_double_sha1_hex' or 'no_password' or 'ldap' or 'kerberos " - "or 'ssl_certificates' or 'ssh_keys' or 'http_authentication' or 'jwt' must be specified for user {}.", user_name); ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) if (num_authentication_types < 1) throw Exception(ErrorCodes::BAD_ARGUMENTS, "At least one authentication type (one of 'password', " @@ -381,20 +354,13 @@ namespace } } else if (has_jwt) -<<<<<<< HEAD -======= { - user->authentication_methods.emplace_back(AuthenticationType::JWT); - const auto jwt_config = user_config + ".jwt"; + auth_data = AuthenticationData(AuthenticationType::JWT); + const auto jwt_config = auth_method_path + ".jwt"; if (config.has(jwt_config + ".processor")) - user->authentication_methods.back().setTokenProcessorName(config.getString(jwt_config + ".processor")); + auth_data.setTokenProcessorName(config.getString(jwt_config + ".processor")); if (config.has(jwt_config + ".claims")) - user->authentication_methods.back().setJWTClaims(config.getString(jwt_config + ".claims")); - } - else ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) - { - auth_data = AuthenticationData(AuthenticationType::JWT); + auth_data.setJWTClaims(config.getString(jwt_config + ".claims")); } return auth_data; diff --git a/tests/integration/test_jwt_auth/jwks_server/server.py b/tests/integration/test_jwt_auth/jwks_server/server.py index 7ed0c31aaf3d..67c4d6a4cf8d 100644 --- a/tests/integration/test_jwt_auth/jwks_server/server.py +++ b/tests/integration/test_jwt_auth/jwks_server/server.py @@ -16,8 +16,6 @@ def server(): "kaRv8XJbra0IeIINmKv0F4--ww8ZxXTR6cvI-MsArUiAPwzf7s5dMR4DNRG6YNTrPA0pTOqQE9sRPd62XsfU08plYm27naOUZ" "O5avIPl1YO5I6Gi4kPdTvv3WFIy-QvoKoPhPCaD6EbdBpe8BbTQ", "e": "AQAB"}, -<<<<<<< HEAD -======= { "kty": "EC", "alg": "ES384", @@ -26,7 +24,6 @@ def server(): "x": "ewdB5ypKwp641N5cYmKJvTiwWLIc_IJduJwur2mit1SgQpPZdUwpDV3aNIAmry4Y", "y": "Jajx21k25o2K-ik86kaaawu6O84awaSmvSirJn8WCeEuotu3O-4Gn-ryOMuDsH76", }, ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) ] } response.status = 200 diff --git a/tests/integration/test_jwt_auth/test.py b/tests/integration/test_jwt_auth/test.py index 0dd2add37b08..481c8117a73e 100644 --- a/tests/integration/test_jwt_auth/test.py +++ b/tests/integration/test_jwt_auth/test.py @@ -80,8 +80,6 @@ def test_jwks_server(started_cluster): ] ) assert res == "jwt_user\n" -<<<<<<< HEAD -======= def test_jwks_server_ec_es384(started_cluster): @@ -99,4 +97,3 @@ def test_jwks_server_ec_es384(started_cluster): ] ) assert res == "jwt_user\n" ->>>>>>> ad5b91c853d (Merge pull request #1658 from Altinity/feature/antalya-26.3/pr-1430-1596) From e4bcd5646eae4c12ba1e5ac8655c84686942e759 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov <32552679+zvonand@users.noreply.github.com> Date: Thu, 14 May 2026 10:32:37 +0200 Subject: [PATCH 07/12] Cherry-pick of https://github.com/Altinity/ClickHouse/pull/1777 with unresolved conflict markers (resolution in next commit) --- Original cherry-pick message follows: Merge pull request #1777 from Altinity/fix/antalya-26.3/oauth-address-audit Antalya 26.3: address OAuth security audit # Conflicts: # src/Access/ExternalAuthenticators.cpp --- src/Access/AccessControl.cpp | 6 + src/Access/AuthenticationData.cpp | 39 +- src/Access/Common/JWKSProvider.cpp | 143 +++++- src/Access/Common/JWKSProvider.h | 31 +- src/Access/ExternalAuthenticators.cpp | 311 +++++++++--- src/Access/ExternalAuthenticators.h | 58 ++- src/Access/TokenAccessStorage.cpp | 254 +++++++--- src/Access/TokenAccessStorage.h | 17 +- src/Access/TokenProcessors.h | 44 +- src/Access/TokenProcessorsJWT.cpp | 474 +++++++++++++----- src/Access/TokenProcessorsOpaque.cpp | 447 ++++++++++++++++- src/Access/TokenProcessorsParse.cpp | 102 +++- src/Interpreters/ClientInfo.cpp | 35 +- src/Interpreters/Session.cpp | 34 +- src/Interpreters/Session.h | 4 + src/Parsers/Access/ASTAuthenticationData.cpp | 25 +- src/Parsers/Access/ASTAuthenticationData.h | 10 + src/Parsers/Access/ParserCreateUserQuery.cpp | 41 +- src/Parsers/CommonParsers.h | 1 + src/Server/HTTP/authenticateUserByHTTP.cpp | 10 +- src/Server/TCPHandler.cpp | 7 +- tests/integration/test_jwt_auth/test.py | 110 ++++ .../test_keycloak_auth/configs/validators.xml | 4 +- .../0_stateless/01292_create_user.reference | 6 + .../queries/0_stateless/01292_create_user.sql | 19 +- 25 files changed, 1871 insertions(+), 361 deletions(-) diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index 10dfc1c29459..84ef237014a2 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -697,6 +697,12 @@ void AccessControl::restoreFromBackup(RestorerFromBackup & restorer, const Strin void AccessControl::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config) { + /// Re-read `enable_token_auth` on every config reload. `setupFromMainConfig` + /// runs only once at startup, so without this re-sync flipping the flag in + /// the config and triggering a reload would silently leave the previous + /// value in place -- operators who toggle token auth off in response to an + /// IdP outage or a credential leak would see no effect until restart. + setTokenAuthEnabled(config.getBool("enable_token_auth", true)); external_authenticators->setConfiguration(config, getLogger(), isTokenAuthEnabled()); } diff --git a/src/Access/AuthenticationData.cpp b/src/Access/AuthenticationData.cpp index 5434cc8712df..d8c13e154844 100644 --- a/src/Access/AuthenticationData.cpp +++ b/src/Access/AuthenticationData.cpp @@ -150,6 +150,13 @@ bool AuthenticationData::Util::checkPasswordBcrypt(std::string_view password [[m bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs) { + /// `MemoryAccessStorage::updateNoLock` short-circuits when the existing + /// entity equals the new one, so any field omitted from this comparator + /// becomes invisible to ALTER USER -- same-type ALTER would silently + /// no-op. JWT users carry two extra fields (`token_processor_name` and + /// `jwt_claims`) and they MUST take part in equality, otherwise re-pinning + /// a JWT user via ALTER USER is a no-op (CREATE USER OR REPLACE works + /// only by accident, via storage->insertOrReplace). return (lhs.type == rhs.type) && (lhs.password_hash == rhs.password_hash) && (lhs.ldap_server_name == rhs.ldap_server_name) && (lhs.kerberos_realm == rhs.kerberos_realm) #if USE_SSL @@ -160,6 +167,8 @@ bool operator ==(const AuthenticationData & lhs, const AuthenticationData & rhs) #endif && (lhs.http_auth_scheme == rhs.http_auth_scheme) && (lhs.http_auth_server_name == rhs.http_auth_server_name) + && (lhs.token_processor_name == rhs.token_processor_name) + && (lhs.jwt_claims == rhs.jwt_claims) && (lhs.valid_until == rhs.valid_until); } @@ -414,9 +423,22 @@ boost::intrusive_ptr AuthenticationData::toAST() const } case AuthenticationType::JWT: { + /// Round-trip into the same shape the parser produces: PROCESSOR + /// child first (when set), CLAIMS child after (when set), with the + /// AST flags telling the formatter which slot is which. + const auto & processor_name = getTokenProcessorName(); + if (!processor_name.empty()) + { + node->has_jwt_processor = true; + node->children.push_back(make_intrusive(processor_name)); + } + const auto & claims = getJWTClaims(); if (!claims.empty()) + { + node->has_jwt_claims = true; node->children.push_back(make_intrusive(claims)); + } break; } case AuthenticationType::KERBEROS: @@ -698,9 +720,22 @@ AuthenticationData AuthenticationData::fromAST(const ASTAuthenticationData & que #if USE_JWT_CPP else if (query.type == AuthenticationType::JWT) { - if (!args.empty()) + /// `query.has_jwt_processor` and `query.has_jwt_claims` describe which + /// of the two optional clauses the parser saw. Children are pushed in + /// PROCESSOR-then-CLAIMS order, so we walk them in that order. + size_t arg_idx = 0; + + if (query.has_jwt_processor) + { + String processor_name = checkAndGetLiteralArgument(args[arg_idx++], "processor"); + if (processor_name.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "JWT 'PROCESSOR' name must not be empty"); + auth_data.setTokenProcessorName(processor_name); + } + + if (query.has_jwt_claims) { - String value = checkAndGetLiteralArgument(args[0], "claims"); + String value = checkAndGetLiteralArgument(args[arg_idx++], "claims"); picojson::value json_obj; auto error = picojson::parse(json_obj, value); if (!error.empty()) diff --git a/src/Access/Common/JWKSProvider.cpp b/src/Access/Common/JWKSProvider.cpp index 16c0dbba2423..2b656df3a3a4 100644 --- a/src/Access/Common/JWKSProvider.cpp +++ b/src/Access/Common/JWKSProvider.cpp @@ -2,7 +2,11 @@ #if USE_JWT_CPP #include +#include +#include #include +#include +#include #include #include #include @@ -21,28 +25,72 @@ namespace ErrorCodes JWKSType JWKSClient::getJWKS() { + /// `last_request_send` semantics: timestamp of the most recent fetch + /// *attempt*, success or failure. Updated unconditionally before the + /// HTTP call so a failed fetch doesn't leave the timestamp stale and + /// invite every concurrent thread to re-hammer a failing endpoint + /// (L-02). Within `refresh_timeout` of an attempt: + /// - if a previously-successful JWKS is cached, serve it. + /// - otherwise, throw a "fetch in cooldown" exception so callers + /// don't queue up new attempts during the back-off window. + { std::shared_lock lock(mutex); - auto now = std::chrono::high_resolution_clock::now(); - auto diff = std::chrono::duration(now - last_request_send).count(); - if (diff < static_cast(refresh_timeout) && cached_jwks.has_value()) - return cached_jwks.value(); + auto now = std::chrono::steady_clock::now(); + if (last_request_send.has_value()) + { + auto diff = std::chrono::duration(now - *last_request_send).count(); + if (diff < static_cast(refresh_timeout)) + { + if (cached_jwks.has_value()) + return cached_jwks.value(); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "JWKS endpoint at '{}' is in cooldown after a recent failed fetch; will retry after the cache lifetime elapses", + jwks_uri.toString()); + } + } } std::unique_lock lock(mutex); - auto now = std::chrono::high_resolution_clock::now(); - auto diff = std::chrono::duration(now - last_request_send).count(); - if (diff < static_cast(refresh_timeout) && cached_jwks.has_value()) - return cached_jwks.value(); + auto now = std::chrono::steady_clock::now(); + if (last_request_send.has_value()) + { + auto diff = std::chrono::duration(now - *last_request_send).count(); + if (diff < static_cast(refresh_timeout)) + { + if (cached_jwks.has_value()) + return cached_jwks.value(); + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "JWKS endpoint at '{}' is in cooldown after a recent failed fetch; will retry after the cache lifetime elapses", + jwks_uri.toString()); + } + } + + /// Mark the attempt before issuing the network call so that even if the + /// fetch throws, subsequent waiters on this mutex see an updated + /// `last_request_send` and short-circuit via the cooldown branches above + /// instead of repeating the failing fetch back-to-back. + last_request_send = now; Poco::Net::HTTPResponse response; std::string response_string; Poco::Net::HTTPRequest request{Poco::Net::HTTPRequest::HTTP_GET, jwks_uri.getPathAndQuery()}; + /// Bound every JWKS fetch to a known limit. Without this, Poco's default + /// `HTTPSession` timeout of 60 seconds applies, and because the JWKS fetch + /// runs while `ExternalAuthenticators::mutex` is held by the outer + /// `checkTokenCredentials` call, a single slow or hung JWKS endpoint would + /// stall the whole auth subsystem (LDAP, Kerberos, HTTP basic, all other + /// token auth paths) for up to a full minute per request. 10 seconds is a + /// conservative cap: well above any healthy provider latency, well below + /// the default. + const Poco::Timespan jwks_http_timeout(/*seconds=*/10, 0); + if (jwks_uri.getScheme() == "https") { Poco::Net::HTTPSClientSession session = Poco::Net::HTTPSClientSession(jwks_uri.getHost(), jwks_uri.getPort()); + session.setTimeout(jwks_http_timeout, jwks_http_timeout, jwks_http_timeout); session.sendRequest(request); std::istream & response_stream = session.receiveResponse(response); if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK || !response_stream) @@ -53,6 +101,7 @@ JWKSType JWKSClient::getJWKS() else { Poco::Net::HTTPClientSession session = Poco::Net::HTTPClientSession(jwks_uri.getHost(), jwks_uri.getPort()); + session.setTimeout(jwks_http_timeout, jwks_http_timeout, jwks_http_timeout); session.sendRequest(request); std::istream & response_stream = session.receiveResponse(response); if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK || !response_stream) @@ -60,8 +109,6 @@ JWKSType JWKSClient::getJWKS() Poco::StreamCopier::copyToString(response_stream, response_string); } - last_request_send = std::chrono::high_resolution_clock::now(); - JWKSType parsed_jwks; try @@ -92,11 +139,18 @@ StaticJWKSParams::StaticJWKSParams(const std::string & static_jwks_, const std:: StaticJWKS::StaticJWKS(const StaticJWKSParams & params) { + static_jwks_file = params.static_jwks_file; + String content = String(params.static_jwks); - if (!params.static_jwks_file.empty()) + if (!static_jwks_file.empty()) { - std::ifstream ifs(params.static_jwks_file); + std::ifstream ifs(static_jwks_file); Poco::StreamCopier::copyToString(ifs, content); + /// Record the mtime so subsequent `getJWKS()` calls can notice rotation. + std::error_code ec; + const auto write_time = std::filesystem::last_write_time(static_jwks_file, ec); + if (!ec) + last_loaded_mtime = write_time; } try { @@ -109,5 +163,70 @@ StaticJWKS::StaticJWKS(const StaticJWKSParams & params) } } +void StaticJWKS::reloadFromFileIfChangedNoLock() +{ + /// Inline `static_jwks` source: nothing to refresh from disk. + if (static_jwks_file.empty()) + return; + + std::error_code ec; + const auto mtime = std::filesystem::last_write_time(static_jwks_file, ec); + if (ec) + { + /// File disappeared or became unreadable. Keep the previously-loaded + /// keys -- failing closed here would lock everyone out on a transient + /// filesystem hiccup. The operator gets a log signal. + LOG_WARNING(getLogger("TokenAuthentication"), + "StaticJWKS: failed to stat '{}' for refresh ({}); keeping previously-loaded keys.", + static_jwks_file, ec.message()); + return; + } + if (mtime <= last_loaded_mtime) + return; + + /// File has been rotated. Read + parse + swap. + String content; + try + { + std::ifstream ifs(static_jwks_file); + Poco::StreamCopier::copyToString(ifs, content); + auto new_keys = jwt::parse_jwks(content); + jwks = std::move(new_keys); + last_loaded_mtime = mtime; + LOG_INFO(getLogger("TokenAuthentication"), + "StaticJWKS: reloaded keys from '{}' after detecting mtime change.", static_jwks_file); + } + catch (const std::exception & e) + { + /// Malformed new JWKS: keep the old one. Loud signal so the operator + /// knows the rotation didn't take. + LOG_ERROR(getLogger("TokenAuthentication"), + "StaticJWKS: failed to parse '{}' on refresh: {}; keeping previously-loaded keys.", + static_jwks_file, e.what()); + } +} + +JWKSType StaticJWKS::getJWKS() +{ + /// Fast path: shared lock + mtime check. Refresh under exclusive lock only + /// when the file actually changed. + { + std::shared_lock lock(mutex); + if (static_jwks_file.empty()) + return jwks; + + std::error_code ec; + const auto mtime = std::filesystem::last_write_time(static_jwks_file, ec); + if (ec) + return jwks; + if (mtime <= last_loaded_mtime) + return jwks; + } + + std::unique_lock lock(mutex); + reloadFromFileIfChangedNoLock(); + return jwks; +} + } #endif diff --git a/src/Access/Common/JWKSProvider.h b/src/Access/Common/JWKSProvider.h index 566effd6e21e..c59266d9b40c 100644 --- a/src/Access/Common/JWKSProvider.h +++ b/src/Access/Common/JWKSProvider.h @@ -4,6 +4,7 @@ #include #include #include +#include #include #include @@ -44,7 +45,14 @@ class JWKSClient : public IJWKSProvider std::shared_mutex mutex; std::optional cached_jwks; - std::chrono::time_point last_request_send; + /// `steady_clock` (not `system_clock`): refresh-cooldown is an elapsed-time + /// measurement; a wall-clock jump must not skip or freeze it. + /// `std::nullopt` means "no fetch has ever been attempted" -- needed to + /// distinguish a never-attempted state from a recently-failed one, because + /// the steady-clock epoch may sit only a short distance in the past on + /// freshly-booted hosts / containers with isolated CLOCK_MONOTONIC, making + /// a zero-initialized time_point look like a "recent" attempt. + std::optional> last_request_send; }; struct StaticJWKSParams @@ -60,12 +68,25 @@ class StaticJWKS : public IJWKSProvider public: explicit StaticJWKS(const StaticJWKSParams ¶ms); + /// Reload the JWKS from disk if `static_jwks_file` was specified and the + /// file's mtime has advanced since the last load. Inline `static_jwks` + /// (no file path) is returned from the in-memory copy without I/O. + /// Without this, rotating the underlying file did NOT refresh the + /// in-memory keys -- admins had to trigger a full + /// `setExternalAuthenticatorsConfig` reload to pick up the new file. + JWKSType getJWKS() override; + private: - JWKSType getJWKS() override - { - return jwks; - } + void reloadFromFileIfChangedNoLock(); + + /// Source path -- empty when JWKS came from inline `` config. + String static_jwks_file; + /// `mtime` of the file at the most recent successful load. Used to detect + /// rotation. `file_time_type::min()` means "not loaded from a file" or + /// "never seen the file yet". + std::filesystem::file_time_type last_loaded_mtime = std::filesystem::file_time_type::min(); + mutable std::shared_mutex mutex; JWKSType jwks; }; diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index e2df9693b76a..003aef420d6d 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -300,7 +300,11 @@ void ExternalAuthenticators::reset() resetImpl(); } -void parseTokenProcessors(std::unordered_map> & token_processors, +/// Parse all token processors as an all-or-nothing operation. +/// +/// Throws if ANY processor fails to parse. The caller is expected to react by +/// disabling token authentication for this configuration cycle (fail-closed). +void parseTokenProcessors(std::map> & token_processors, const Poco::Util::AbstractConfiguration & config, const String & token_processors_config, LoggerPtr log) @@ -308,20 +312,26 @@ void parseTokenProcessors(std::unordered_map> parsed; for (const auto & processor : token_processors_keys) { String prefix = fmt::format("{}.{}", token_processors_config, processor); try { - token_processors[processor] = ITokenProcessor::parseTokenProcessor(config, prefix, processor); + parsed[processor] = ITokenProcessor::parseTokenProcessor(config, prefix, processor); } catch (...) { - tryLogCurrentException(log, "Could not parse token processor" + backQuote(processor)); + tryLogCurrentException(log, "Could not parse token processor " + backQuote(processor)); + /// Re-throw so the caller fails. + throw; } } + + token_processors = std::move(parsed); } bool ExternalAuthenticators::isTokenAuthEnabled() const @@ -330,6 +340,16 @@ bool ExternalAuthenticators::isTokenAuthEnabled() const return token_auth_enabled; } +bool ExternalAuthenticators::hasTokenProcessor(const String & name) const +{ + std::lock_guard lock(mutex); + if (!token_auth_enabled) + return false; + if (name.empty()) + return true; + return token_processors.contains(name); +} + void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfiguration & config, LoggerPtr log, bool token_auth_enabled_) { std::lock_guard lock(mutex); @@ -435,7 +455,22 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur } if (token_auth_enabled) - parseTokenProcessors(token_processors, config, token_processors_config, log); + { + try + { + parseTokenProcessors(token_processors, config, token_processors_config, log); + } + catch (...) + { + /// Fail closed: if any token processor failed to parse, refuse to + /// activate token auth at all for this config cycle. + tryLogCurrentException(log, + "One or more token processors failed to parse; " + "disabling token authentication entirely until the configuration is fixed"); + token_processors.clear(); + token_auth_enabled = false; + } + } else LOG_INFO(log, "Token authentication is disabled, skipping token processors configuration"); } @@ -619,114 +654,240 @@ HTTPAuthClientParams ExternalAuthenticators::getHTTPAuthenticationParams(const S bool ExternalAuthenticators::checkCredentialsAgainstProcessor(const ITokenProcessor & processor, TokenCredentials & credentials) const { - if (processor.resolveAndValidate(credentials)) + if (!processor.resolveAndValidate(credentials)) { - TokenCacheEntry cache_entry; - cache_entry.user_name = credentials.getUserName(); - cache_entry.external_roles = credentials.getGroups(); - - auto default_expiration_ts = std::chrono::system_clock::now() - + std::chrono::seconds(processor.getTokenCacheLifetime()); - - if (credentials.getExpiresAt().has_value()) - { - if (credentials.getExpiresAt().value() < default_expiration_ts) - cache_entry.expires_at = credentials.getExpiresAt().value(); - else - { - LOG_TRACE(getLogger("AccessTokenAuthentication"), "Token for user {} expires after default cache lifetime; using default TTL by {}", credentials.getUserName(), processor.getProcessorName()); - cache_entry.expires_at = default_expiration_ts; - } - } - else - { - cache_entry.expires_at = default_expiration_ts; - } - - LOG_DEBUG(getLogger("AccessTokenAuthentication"), "Authenticated user {} with access token by {}", credentials.getUserName(), processor.getProcessorName()); + LOG_TRACE(getLogger("AccessTokenAuthentication"), "Failed authentication with access token by {}", processor.getProcessorName()); + return false; + } - // CHeck if a cache entry for the same user but with another token exists -- old cache entry is considered outdated and removed - auto old_token_iter = username_to_access_token_cache.find(cache_entry.user_name); - if (old_token_iter != username_to_access_token_cache.end()) + /// Clamp the credentials' expires_at to the processor's cache lifetime so + /// upper layers (notably `Session`) bind their lifetime to whichever is + /// shorter -- the token's own expiry or the operator-configured TTL. This + /// is a *post-validation finalization* of the credentials, not a cache + /// write; the actual token-cache entry is written by `primeTokenCache`, + /// and only after any per-user `jwt_claims` policy has also accepted the + /// token (see `checkTokenCredentials`). + auto default_expiration_ts = std::chrono::system_clock::now() + + std::chrono::seconds(processor.getTokenCacheLifetime()); + + if (credentials.getExpiresAt().has_value()) + { + if (credentials.getExpiresAt().value() >= default_expiration_ts) { - access_token_to_username_cache.erase(old_token_iter->second); - username_to_access_token_cache.erase(old_token_iter); + LOG_TRACE(getLogger("AccessTokenAuthentication"), "Token for user {} expires after default cache lifetime; using default TTL by {}", credentials.getUserName(), processor.getProcessorName()); + credentials.setExpiresAt(default_expiration_ts); } - - access_token_to_username_cache[credentials.getToken()] = cache_entry; - username_to_access_token_cache[cache_entry.user_name] = credentials.getToken(); - LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} added", cache_entry.user_name); - - return true; } - LOG_TRACE(getLogger("AccessTokenAuthentication"), "Failed authentication with access token by {}", processor.getProcessorName()); + else + { + credentials.setExpiresAt(default_expiration_ts); + } - return false; + LOG_DEBUG(getLogger("AccessTokenAuthentication"), "Authenticated user {} with access token by {}", quoteString(credentials.getUserName()), processor.getProcessorName()); + return true; } -bool ExternalAuthenticators::checkTokenCredentials(const TokenCredentials & credentials, const String & processor_name, const String & jwt_claims) const +void ExternalAuthenticators::primeTokenCache(const ITokenProcessor & processor, + const TokenCredentials & credentials) const { - std::lock_guard lock{mutex}; + /// Build a cache entry from the credentials state that + /// `checkCredentialsAgainstProcessor` finalized. The caller is responsible + /// for invoking this only after both processor validation AND the per-user + /// `jwt_claims` policy have accepted the token -- caching before claims + /// have been evaluated would let later unconstrained lookups (e.g. the + /// HTTP/TCP pre-user-lookup call which passes empty `jwt_claims`) hit a + /// cache entry that never actually satisfied the user's policy. + TokenCacheEntry cache_entry; + cache_entry.user_name = credentials.getUserName(); + cache_entry.external_roles = credentials.getGroups(); + cache_entry.processor_name = processor.getProcessorName(); + cache_entry.expires_at = credentials.getExpiresAt().value_or( + std::chrono::system_clock::now() + std::chrono::seconds(processor.getTokenCacheLifetime())); + + /// If the same token already has a forward entry that maps to a DIFFERENT + /// user_name, clean up the stale reverse entry for that other user before + /// we overwrite the forward entry. This happens when two processors extract + /// different `username_claim` values from the same token (e.g. processor X + /// uses `sub`, processor Y uses `email`): without this, the rotation step + /// below would not see the old user's entry in the reverse map and the + /// bi-map would diverge -- forward saying token -> new_user while a stale + /// reverse says old_user -> token, surfacing later as a dangling reverse + /// pointer that breaks the single-token-per-user invariant. + auto existing_forward = access_token_to_username_cache.find(credentials.getToken()); + if (existing_forward != access_token_to_username_cache.end() + && existing_forward->second.user_name != cache_entry.user_name) + { + auto stale_reverse = username_to_access_token_cache.find(existing_forward->second.user_name); + if (stale_reverse != username_to_access_token_cache.end() + && stale_reverse->second == credentials.getToken()) + username_to_access_token_cache.erase(stale_reverse); + } - if (!token_auth_enabled) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Token authentication is disabled"); + /// If a previous entry exists for the same user under a different token, + /// drop it -- the user has rotated tokens and the old one is now stale. + auto old_token_iter = username_to_access_token_cache.find(cache_entry.user_name); + if (old_token_iter != username_to_access_token_cache.end()) + { + access_token_to_username_cache.erase(old_token_iter->second); + username_to_access_token_cache.erase(old_token_iter); + } - if (token_processors.empty()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "Token authentication is not configured"); + access_token_to_username_cache[credentials.getToken()] = cache_entry; + username_to_access_token_cache[cache_entry.user_name] = credentials.getToken(); + LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} added", quoteString(cache_entry.user_name)); +} - /// Per-user claims restriction applies only to JWT processors; opaque/access token processors ignore it. +bool ExternalAuthenticators::checkTokenCredentials(const TokenCredentials & credentials, + const String & processor_name, + const String & jwt_claims, + bool prime_cache_on_success) const +{ + /// Per-user claims restriction is binding: when a user is configured with `jwt_claims`, + /// authentication is only allowed via processors that can actually evaluate those claims + /// (i.e. JWT processors). If the resolving processor cannot enforce the restriction we + /// must deny -- silently treating it as "no restriction" would let an opaque/access-token + /// processor authenticate a token that fails the user's per-user policy. auto check_claims_if_required = [&](const ITokenProcessor & processor) -> bool { if (jwt_claims.empty()) return true; if (!processor.supportsJwtClaimsRestriction()) - return true; + { + LOG_TRACE(getLogger("AccessTokenAuthentication"), + "Processor {} does not support per-user JWT claims restriction; " + "denying authentication that requires claims to be checked", + processor.getProcessorName()); + return false; + } return processor.checkClaims(credentials, jwt_claims); }; +<<<<<<< HEAD /// lookup token in local cache if not expired. auto cached_entry_iter = access_token_to_username_cache.find(credentials.getToken()); if (cached_entry_iter != access_token_to_username_cache.end()) +======= + /// Snapshot the processor set under the mutex, then run the expensive + /// crypto verify WITHOUT the mutex (M-20). `shared_ptr` keeps each + /// processor alive even if a config reload swaps `token_processors` in + /// the middle of validation. Cache lookup stays under the mutex. + std::map> processors_snapshot; + +>>>>>>> 52e87d75685 (Merge pull request #1777 from Altinity/fix/antalya-26.3/oauth-address-audit) { - if (cached_entry_iter->second.expires_at <= std::chrono::system_clock::now()) // Token found in cache, but already outdated -- need to remove it. - { - const auto expired_user_name = cached_entry_iter->second.user_name; - LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} expired, removing", expired_user_name); - access_token_to_username_cache.erase(cached_entry_iter); - username_to_access_token_cache.erase(expired_user_name); - } - else + std::lock_guard lock{mutex}; + + if (!token_auth_enabled) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Token authentication is disabled"); + + if (token_processors.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "Token authentication is not configured"); + + /// lookup token in local cache if not expired. + auto cached_entry_iter = access_token_to_username_cache.find(credentials.getToken()); + if (cached_entry_iter != access_token_to_username_cache.end()) { - const auto & user_data = cached_entry_iter->second; - const_cast(credentials).setUserName(user_data.user_name); - const_cast(credentials).setGroups(user_data.external_roles); - LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} found, using it to authenticate", cached_entry_iter->second.user_name); - if (!jwt_claims.empty()) + if (cached_entry_iter->second.expires_at <= std::chrono::system_clock::now()) // Token found in cache, but already outdated -- need to remove it. { - if (processor_name.empty()) - return false; - const auto it = token_processors.find(processor_name); - if (it == token_processors.end() || !check_claims_if_required(*it->second)) - return false; + const auto expired_user_name = cached_entry_iter->second.user_name; + const auto expired_token = cached_entry_iter->first; + LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} expired, removing", quoteString(expired_user_name)); + access_token_to_username_cache.erase(cached_entry_iter); + + /// Only unlink the reverse mapping if it currently points at the token + /// we just evicted. The bi-map invariant is maintained by + /// `primeTokenCache`, but if a reverse entry is somehow stale (or if a + /// concurrent rotation under the same mutex hold has already pointed + /// the user's reverse mapping at a fresh, still-valid token), erasing + /// blindly here would unlink that fresh token's reverse entry -- + /// silently breaking the single-token-per-user invariant and extending + /// the stale token's effective retention. + auto reverse_it = username_to_access_token_cache.find(expired_user_name); + if (reverse_it != username_to_access_token_cache.end() && reverse_it->second == expired_token) + username_to_access_token_cache.erase(reverse_it); + } + /// Enforce the per-user processor pin even on cache hit. A cache entry produced by + /// processor A must NOT be used to satisfy an authentication request that is pinned + /// to a different processor B.When the caller did not pin a processor (processor_name is + /// empty) any cached entry is acceptable. + else if (processor_name.empty() || processor_name == cached_entry_iter->second.processor_name) + { + /// Evaluate per-user claims FIRST, before mutating the outer + /// `TokenCredentials`. The `const_cast`-ed `setUserName`/`setGroups`/ + /// `setExpiresAt` writes below would otherwise leak the cached + /// identity into the caller's credentials object even on rejection. + if (!jwt_claims.empty()) + { + const auto it = token_processors.find(cached_entry_iter->second.processor_name); + if (it == token_processors.end() || !check_claims_if_required(*it->second)) + return false; + } + + const auto & user_data = cached_entry_iter->second; + const_cast(credentials).setUserName(user_data.user_name); + const_cast(credentials).setGroups(user_data.external_roles); + const_cast(credentials).setExpiresAt(user_data.expires_at); + LOG_TRACE(getLogger("AccessTokenAuthentication"), "Cache entry for user {} found, using it to authenticate", quoteString(user_data.user_name)); + return true; + } + else + { + LOG_TRACE(getLogger("AccessTokenAuthentication"), + "Cached token entry was produced by processor {}, but authentication is pinned to {}; " + "ignoring cache and re-authenticating via the pinned processor", + cached_entry_iter->second.processor_name, processor_name); } - return true; } + + processors_snapshot = token_processors; } + /// Validation path runs WITHOUT the mutex. RSA/ECDSA verifies and any + /// expensive claim matching no longer serialize the auth subsystem. + auto try_processor = [&](const std::shared_ptr & proc) -> std::optional + { + if (!checkCredentialsAgainstProcessor(*proc, const_cast(credentials))) + return std::nullopt; + if (!check_claims_if_required(*proc)) + return false; + if (prime_cache_on_success) + { + std::lock_guard lock{mutex}; + primeTokenCache(*proc, credentials); + } + return true; + }; + if (processor_name.empty()) { - for (const auto & it : token_processors) + for (const auto & [name, proc] : processors_snapshot) { - if (checkCredentialsAgainstProcessor(*it.second, const_cast(credentials))) - return check_claims_if_required(*it.second); + if (!jwt_claims.empty() && !proc->supportsJwtClaimsRestriction()) + { + LOG_TRACE(getLogger("AccessTokenAuthentication"), + "Skipping processor {} during auto-discovery: it cannot enforce per-user JWT claims", + proc->getProcessorName()); + continue; + } + if (auto result = try_processor(proc); result.has_value()) + return *result; } } else { - const auto it = token_processors.find(processor_name); - if (it != token_processors.end() && checkCredentialsAgainstProcessor(*it->second, const_cast(credentials))) - return check_claims_if_required(*it->second); + const auto it = processors_snapshot.find(processor_name); + if (it == processors_snapshot.end()) + return false; + if (!jwt_claims.empty() && !it->second->supportsJwtClaimsRestriction()) + { + LOG_TRACE(getLogger("AccessTokenAuthentication"), + "Pinned processor {} cannot enforce per-user JWT claims; denying authentication", + it->second->getProcessorName()); + return false; + } + if (auto result = try_processor(it->second); result.has_value()) + return *result; } return false; diff --git a/src/Access/ExternalAuthenticators.h b/src/Access/ExternalAuthenticators.h index 1c539fa1917c..1486226f5bd0 100644 --- a/src/Access/ExternalAuthenticators.h +++ b/src/Access/ExternalAuthenticators.h @@ -43,13 +43,34 @@ class ExternalAuthenticators bool isTokenAuthEnabled() const; + /// Returns true if a token processor with the given name is currently + /// configured. Used by `Session::checkIfUserIsStillValid` to terminate + /// active sessions whose authenticating processor was removed by config + /// reload (M-28). Empty `name` is treated as "no specific pin" and + /// returns true (token auth must still be enabled, of course). + bool hasTokenProcessor(const String & name) const; + // The name and readiness of the credentials must be verified before calling these. bool checkLDAPCredentials(const String & server, const BasicCredentials & credentials, const LDAPClient::RoleSearchParamsList * role_search_params = nullptr, LDAPClient::SearchResultsList * role_search_results = nullptr) const; bool checkKerberosCredentials(const String & realm, const GSSAcceptorContext & credentials) const; bool checkHTTPBasicCredentials(const String & server, const BasicCredentials & credentials, const ClientInfo & client_info, SettingsChanges & settings) const; - bool checkTokenCredentials(const TokenCredentials & credentials, const String & processor_name = "", const String & jwt_claims = "") const; + /// `prime_cache_on_success` controls whether a successful validation populates the + /// token cache. Per-user authentication paths (the chain reached from + /// `Session::authenticate`) leave this at the default `true` -- their result is + /// gated by the user's pinned processor and per-user JWT claims, so the cache + /// entry it produces is safe to consult on subsequent requests. The HTTP and TCP + /// bearer entry points authenticate the token *before* the user is known + /// (they need the username from the token to drive user lookup) and so call + /// this with `false`: their decision is made under no processor pin and no + /// claims constraint, and a cache entry written from that context would be + /// trusted by a later per-user call whose `processor_name` is empty -- bypassing + /// the per-user processor and claim selection that would otherwise occur. + bool checkTokenCredentials(const TokenCredentials & credentials, + const String & processor_name = "", + const String & jwt_claims = "", + bool prime_cache_on_success = true) const; GSSAcceptorContext::Params getKerberosParams() const; @@ -72,13 +93,31 @@ class ExternalAuthenticators mutable LDAPCaches ldap_caches TSA_GUARDED_BY(mutex) ; std::optional kerberos_params TSA_GUARDED_BY(mutex) ; std::unordered_map http_auth_servers TSA_GUARDED_BY(mutex) ; - mutable std::unordered_map> token_processors TSA_GUARDED_BY(mutex) ; + /// Ordered (std::map, not unordered_map) so that the auto-discovery + /// dispatch order in `checkTokenCredentials` is deterministic across + /// process runs. Without an ordering, the iteration order of + /// `unordered_map` is implementation-defined and may differ run-to-run + /// or after rehashing -- which means the same unpinned token can be + /// validated by processor A in one run and processor B in another, + /// producing different cached identities, different role mappings (each + /// processor has its own `groups_claim`), and surprising debugging + /// outcomes. Alphabetical-by-name order makes "first to succeed wins" + /// stable and predictable from configuration alone. + /// + /// `shared_ptr` so callers can snapshot the relevant processor pointer + /// (or the whole map) under the mutex, RELEASE the mutex, and run the + /// expensive crypto verify without serializing the entire auth + /// subsystem behind a single attacker-driven RSA verify (M-20). Cheap: + /// processor count is tiny, snapshot is shared_ptr copies. + mutable std::map> token_processors TSA_GUARDED_BY(mutex) ; struct TokenCacheEntry { std::chrono::system_clock::time_point expires_at; String user_name; std::set external_roles; + /// Name of the token processor that produced this cache entry. + String processor_name; }; /// Home-made simple bi-mapping, needed to effectively clean up cache from old tokens. @@ -90,8 +129,21 @@ class ExternalAuthenticators bool token_auth_enabled TSA_GUARDED_BY(mutex) = true; + /// Validates the credentials with the given processor. On success, mutates + /// `credentials` (user name, groups, effective expires_at) and returns true. + /// Does NOT write the token cache -- caching is the responsibility of the + /// caller, after the per-user `jwt_claims` policy has been evaluated. + /// + /// MUST be called WITHOUT holding `mutex`: this is the expensive crypto + /// path (M-20). The processor must be passed by `shared_ptr` so it + /// outlives a concurrent config reload that resets `token_processors`. bool checkCredentialsAgainstProcessor(const ITokenProcessor & processor, - TokenCredentials & credentials) const TSA_REQUIRES(mutex); + TokenCredentials & credentials) const; + + /// Writes the per-token cache entry. Must be called only after both processor + /// validation AND any per-user `jwt_claims` policy have accepted the token. + void primeTokenCache(const ITokenProcessor & processor, + const TokenCredentials & credentials) const TSA_REQUIRES(mutex); void resetImpl() TSA_REQUIRES(mutex); }; diff --git a/src/Access/TokenAccessStorage.cpp b/src/Access/TokenAccessStorage.cpp index fed2e86e1e0a..72dc3ed394d1 100644 --- a/src/Access/TokenAccessStorage.cpp +++ b/src/Access/TokenAccessStorage.cpp @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -106,24 +107,17 @@ namespace return result; } - String applyTransform(const String & input, const String & pattern, const String & replacement, bool global) + String applyTransform(const String & input, const re2::RE2 & re, const String & replacement, bool global) { - if (pattern.empty()) - return input; - - re2::RE2 re(pattern); - if (!re.ok()) - return input; - + /// `re` is precompiled at storage construction (the constructor refuses + /// to load with an invalid pattern, so by the time we get here the + /// regex is guaranteed to be `ok()`). No per-call recompilation; no + /// silent no-op on a bad pattern. String result = input; if (global) - { RE2::GlobalReplace(&result, re, replacement); - } else - { RE2::Replace(&result, re, replacement); - } return result; } } @@ -137,13 +131,51 @@ TokenAccessStorage::TokenAccessStorage(const String & storage_name_, AccessContr const String prefix_str = (prefix.empty() ? "" : prefix + "."); if (config.has(prefix_str + "roles_filter")) - roles_filter.emplace(config.getString(prefix_str + "roles_filter")); + { + const String filter_pattern = config.getString(prefix_str + "roles_filter"); + roles_filter.emplace(filter_pattern); + + /// Fail closed on invalid regex. RE2 does not throw on bad patterns -- it + /// constructs an object with ok()==false and silently fails every match. + /// Reject the configuration up front so the + /// storage cannot be instantiated in a permissive state. + if (!roles_filter->ok()) + { + const String error = roles_filter->error(); + roles_filter.reset(); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Invalid 'roles_filter' regex for Token user directory '{}': {}. " + "Refusing to start with a misconfigured filter to avoid granting " + "all token groups as roles.", + storage_name_, error); + } + } if (config.has(prefix_str + "roles_transform")) { String transform = config.getString(prefix_str + "roles_transform"); ParsedTransform parsed = parseSedTransform(transform); - roles_transform_pattern = parsed.pattern; + + /// Compile and validate the regex up front. If we deferred compilation + /// to runtime (the previous behavior), an invalid regex would silently + /// return the input unchanged on every call -- meaning every role name + /// from the IdP would flow into role-mapping ungroomed, defeating the + /// purpose of `roles_transform`. Fail loudly at construction so the + /// misconfiguration is visible at startup. + if (!parsed.pattern.empty()) + { + roles_transform_pattern.emplace(parsed.pattern); + if (!roles_transform_pattern->ok()) + { + const String error = roles_transform_pattern->error(); + roles_transform_pattern.reset(); + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Invalid 'roles_transform' regex for Token user directory '{}': {}. " + "Refusing to start with a misconfigured transform to avoid admitting " + "ungroomed role names from the IdP.", + storage_name_, error); + } + } roles_transform_replacement = parsed.replacement; roles_transform_global = parsed.global; } @@ -165,6 +197,35 @@ TokenAccessStorage::TokenAccessStorage(const String & storage_name_, AccessContr if (config.has(prefix_str + "default_profile")) default_profile_name = config.getString(prefix_str + "default_profile"); + /// Optional IP allowlist for auto-provisioned users. Mirrors the + /// `users.xml` `` shape: `SUBNET` / + /// `NAME` / `REGEX` children. + /// Without this, every auto-created token user defaults to `AnyHost` and + /// admins have no way to restrict token-auth by network through standard + /// access-control config. + const auto networks_config_path = prefix_str + "networks"; + if (config.has(networks_config_path)) + { + AllowedClientHosts hosts; + Poco::Util::AbstractConfiguration::Keys network_keys; + config.keys(networks_config_path, network_keys); + for (const String & key : network_keys) + { + const String value = config.getString(networks_config_path + "." + key); + if (key.starts_with("ip")) + hosts.addSubnet(value); + else if (key.starts_with("host_regexp")) + hosts.addNameRegexp(value); + else if (key.starts_with("host")) + hosts.addName(value); + else + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "Token user directory '{}': unknown entry '{}'; expected 'ip', 'host', or 'host_regexp'.", + storage_name_, key); + } + auto_user_allowed_hosts = std::move(hosts); + } + user_external_roles.clear(); users_per_roles.clear(); roles_per_users.clear(); @@ -466,28 +527,6 @@ void TokenAccessStorage::assignProfileNoLock(User & user) const } } -void TokenAccessStorage::updateAssignedRolesNoLock(const UUID & id, const String & user_name, const std::set & external_roles) const -{ - // Map and grant the roles from scratch only if the list of external role has changed. - const auto it = user_external_roles.find(user_name); - if (it != user_external_roles.end() && it->second == external_roles) - return; - - auto update_func = [this, &external_roles] (const AccessEntityPtr & entity_, const UUID &) -> AccessEntityPtr - { - if (auto user = typeid_cast>(entity_)) - { - auto changed_user = typeid_cast>(user->clone()); - assignRolesNoLock(*changed_user, external_roles); - return changed_user; - } - return entity_; - }; - - memory_storage.update(id, update_func); -} - - std::optional TokenAccessStorage::authenticateImpl( const Credentials & credentials, const Poco::Net::IPAddress & address, @@ -498,10 +537,28 @@ std::optional TokenAccessStorage::authenticateImpl( bool /* allow_plaintext_password */) const { std::lock_guard lock(mutex); + + /// Reject mismatched credential types BEFORE the typeid_cast that would + /// throw a `LOGICAL_ERROR`. The reference-form `typeid_cast` is fatal on + /// mismatch, and `MultipleAccessStorage::authenticateImpl` does not catch + /// per-storage exceptions -- so a single Basic / SSL-cert / Kerberos / SSH + /// login attempt would propagate that exception out of the chain and abort + /// authentication for every later storage in `user_directories`. Concretely, + /// listing `` ahead of `` would lock out every Basic-auth + /// user. Return nullopt cleanly, matching the LDAP-side idiom in + /// `LDAPAccessStorage::areLDAPCredentialsValidNoLock`. + const auto * token_credentials_ptr = dynamic_cast(&credentials); + if (!token_credentials_ptr) + { + if (throw_if_user_not_exists) + throwNotFound(AccessEntityType::USER, credentials.getUserName(), getStorageName()); + return {}; + } + auto id = memory_storage.find(credentials.getUserName()); UserPtr user = id ? memory_storage.read(*id) : nullptr; - const auto & token_credentials = typeid_cast(credentials); + const auto & token_credentials = *token_credentials_ptr; if (!external_authenticators.checkTokenCredentials(token_credentials, provider_name)) { @@ -519,6 +576,17 @@ std::optional TokenAccessStorage::authenticateImpl( new_user = std::make_shared(); new_user->setName(credentials.getUserName()); new_user->authentication_methods.emplace_back(AuthenticationType::JWT); + /// Stamp the storage's pinned processor onto the auth method so the + /// per-request validity check (`Session::checkIfUserIsStillValid`) + /// can detect when an admin removes that processor and terminate + /// active sessions whose tokens were issued through it (M-28). + new_user->authentication_methods.back().setTokenProcessorName(provider_name); + /// If the operator configured a network allowlist for this storage, + /// stamp it onto the auto-created user so `isAddressAllowed` checks it + /// below. Without this, every auto-provisioned token user inherits + /// `AnyHostTag` and there is no way to restrict token auth by network. + if (auto_user_allowed_hosts.has_value()) + new_user->allowed_client_hosts = *auto_user_allowed_hosts; user = new_user; } @@ -526,20 +594,35 @@ std::optional TokenAccessStorage::authenticateImpl( throwAddressNotAllowed(address); std::set external_roles; - if (roles_filter.has_value() && roles_filter.value().ok()) + if (roles_filter.has_value()) { - LOG_TRACE(getLogger(), "{}: External role filter found, applying only matching groups", getStorageName()); - for (const auto & group: token_credentials.getGroups()) { - if (RE2::FullMatch(group, roles_filter.value())) - { - String transformed_group = group; - if (roles_transform_pattern.has_value() && roles_transform_replacement.has_value()) + /// Defensive: a broken regex must NEVER cause a fall-through to the + /// permissive "grant all groups" branch. Parse-time validation in the + /// constructor already rejects invalid patterns; this guard ensures the + /// invariant still holds if any future code path constructs the filter + /// without the parse-time check (e.g. config reload). + if (!roles_filter->ok()) + { + LOG_ERROR(getLogger(), + "{}: Configured 'roles_filter' is invalid ('{}'); refusing to map any " + "external roles for user '{}' to avoid granting all token groups.", + getStorageName(), roles_filter->error(), credentials.getUserName()); + } + else + { + LOG_TRACE(getLogger(), "{}: External role filter found, applying only matching groups", getStorageName()); + for (const auto & group: token_credentials.getGroups()) { + if (RE2::FullMatch(group, roles_filter.value())) { - transformed_group = applyTransform(group, roles_transform_pattern.value(), roles_transform_replacement.value(), roles_transform_global); - LOG_TRACE(getLogger(), "{}: Transformed group '{}' to '{}'", getStorageName(), group, transformed_group); + String transformed_group = group; + if (roles_transform_pattern.has_value() && roles_transform_replacement.has_value()) + { + transformed_group = applyTransform(group, roles_transform_pattern.value(), roles_transform_replacement.value(), roles_transform_global); + LOG_TRACE(getLogger(), "{}: Transformed group '{}' to '{}'", getStorageName(), group, transformed_group); + } + external_roles.insert(transformed_group); + LOG_TRACE(getLogger(), "{}: Granted role (group) {} to user", getStorageName(), transformed_group); } - external_roles.insert(transformed_group); - LOG_TRACE(getLogger(), "{}: Granted role (group) {} to user", getStorageName(), transformed_group); } } } @@ -566,22 +649,75 @@ std::optional TokenAccessStorage::authenticateImpl( } else { - // Just in case external_roles are changed. - updateAssignedRolesNoLock(*id, user->getName(), external_roles); + /// Apply role-set and profile changes atomically under a single + /// `memory_storage.update`. Splitting them into two separate updates + /// (the prior shape) opened a reader-observable window between + /// "new roles, old profile" and "new roles, new profile" -- a query + /// from another thread that read the user via `AccessControl::read` + /// would observe a mid-state, since `MemoryAccessStorage`'s lock is + /// independent of `TokenAccessStorage::mutex` (M-31). + /// + /// Preserve the existing early-return optimization: skip the update + /// when external_roles haven't changed AND the profile is already + /// assigned. The `assignRolesNoLock` cleanup still has to run if + /// the role set changes, so it lives inside the update lambda. + const bool roles_changed = [&] + { + const auto it = user_external_roles.find(user->getName()); + return it == user_external_roles.end() || it->second != external_roles; + }(); - // Also update profile if needed - memory_storage.update(*id, [this] (const AccessEntityPtr & entity_, const UUID &) -> AccessEntityPtr + if (roles_changed) { - if (auto user_entity = typeid_cast>(entity_)) + memory_storage.update(*id, [this, &external_roles] (const AccessEntityPtr & entity_, const UUID &) -> AccessEntityPtr { - auto changed_user = typeid_cast>(user_entity->clone()); - assignProfileNoLock(*changed_user); - return changed_user; - } - return entity_; - }); + if (auto user_entity = typeid_cast>(entity_)) + { + auto changed_user = typeid_cast>(user_entity->clone()); + assignRolesNoLock(*changed_user, external_roles); + assignProfileNoLock(*changed_user); + return changed_user; + } + return entity_; + }); + } + else + { + /// Roles are stable; just refresh the profile in case it was + /// added/changed in config since the last auth. + memory_storage.update(*id, [this] (const AccessEntityPtr & entity_, const UUID &) -> AccessEntityPtr + { + if (auto user_entity = typeid_cast>(entity_)) + { + auto changed_user = typeid_cast>(user_entity->clone()); + assignProfileNoLock(*changed_user); + return changed_user; + } + return entity_; + }); + } } + /// Flush queued user-entity events from this storage's `memory_storage` so + /// subscribers observe the freshly-resolved roles and profile right away. + /// + /// `memory_storage.insert` / `update` only enqueue `onEntityAdded` / + /// `onEntityUpdated` on the shared `AccessChangesNotifier`; without an + /// explicit `sendNotifications` they sit on the queue until some unrelated + /// access mutation (a SQL DDL on access entities, a config reload, a + /// replicated-storage sync) happens to trigger a drain. During that window + /// any existing `ContextAccess` bound to this user UUID keeps serving its + /// previously-cached authorization state -- a freshly-revoked role appears + /// "still granted" until the next unrelated trigger. + /// + /// Note: `applyRoleChangeNoLock` (the storage's other mutation site) does + /// NOT need an explicit flush -- it only runs inside `processRoleChange`, + /// which is itself dispatched from a `sendNotifications` drain; the events + /// it queues are picked up by the very loop that called it. Only + /// `authenticateImpl` runs outside of any drain and so is the one site + /// that has to flush explicitly. + access_control.getChangesNotifier().sendNotifications(); + if (id) return AuthResult{ .user_id = *id, .authentication_data = AuthenticationData(AuthenticationType::JWT), .user_name = user->getName() }; return std::nullopt; diff --git a/src/Access/TokenAccessStorage.h b/src/Access/TokenAccessStorage.h index aedf8843f2b9..9f15319a0d82 100644 --- a/src/Access/TokenAccessStorage.h +++ b/src/Access/TokenAccessStorage.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -49,12 +50,25 @@ class TokenAccessStorage : public IAccessStorage String provider_name; std::optional roles_filter = std::nullopt; - std::optional roles_transform_pattern = std::nullopt; + /// `roles_transform` regex compiled once at construction. Storing the + /// compiled `re2::RE2` (instead of the pattern string) avoids per-call + /// recompilation and -- more importantly -- makes parse-time validation + /// possible: an invalid regex now fails the storage construction loudly + /// rather than silently no-op'ing every transform at runtime (which would + /// admit ungroomed role names; symmetric with the `roles_filter` fail- + /// closed handling). + std::optional roles_transform_pattern = std::nullopt; std::optional roles_transform_replacement = std::nullopt; bool roles_transform_global = false; std::set common_role_names; // role name that should be granted to all users at all times String default_profile_name; // settings profile name that should be assigned to all users + /// Optional IP allowlist applied to auto-provisioned users at creation + /// time. When unset, auto-created users inherit the default `AnyHostTag` + /// (current behavior, no breakage). When set, only clients whose source + /// address matches this allowlist can authenticate as a token-auto-created + /// user, regardless of the IdP's verdict on the token. + std::optional auto_user_allowed_hosts; mutable std::map> user_external_roles; mutable std::map> users_per_roles; // role name -> user names (...it should be granted to; may but don't have to exist for common roles) mutable std::map> roles_per_users; // user name -> role names (...that should be granted to it; may but don't have to include common roles) @@ -70,7 +84,6 @@ class TokenAccessStorage : public IAccessStorage void applyRoleChangeNoLock(bool grant, const UUID & role_id, const String & role_name); void assignRolesNoLock(User & user, const std::set & external_roles) const; void assignProfileNoLock(User & user) const; - void updateAssignedRolesNoLock(const UUID & id, const String & user_name, const std::set & external_roles) const; protected: std::optional findImpl(AccessEntityType type, const String & name) const override; diff --git a/src/Access/TokenProcessors.h b/src/Access/TokenProcessors.h index f33f0300662d..f2ee32d90887 100644 --- a/src/Access/TokenProcessors.h +++ b/src/Access/TokenProcessors.h @@ -12,6 +12,8 @@ namespace DB { +class RemoteHostFilter; + namespace ErrorCodes { extern const int NOT_IMPLEMENTED; @@ -81,6 +83,12 @@ struct StaticKeyJwtParams /// JWT claims to validate (optional) String claims; + + /// Clock-drift tolerance for `exp`/`nbf`/`iat` checks, in seconds. + /// jwt-cpp's default is 0, which rejects tokens on any client/server skew. + /// 60 seconds matches the OpenID processor's default and standard + /// industry practice (RFC 7519 §4.1.4 hints at "small leeway"). + UInt64 verifier_leeway = 60; }; class StaticKeyJwtProcessor : public ITokenProcessor @@ -92,6 +100,7 @@ class StaticKeyJwtProcessor : public ITokenProcessor const String & groups_claim_, const String & expected_issuer_, const String & expected_audience_, + const String & expected_typ_, bool allow_no_expiration_, const StaticKeyJwtParams & params); @@ -103,6 +112,8 @@ class StaticKeyJwtProcessor : public ITokenProcessor const String claims; const String expected_issuer; const String expected_audience; + /// Required JWT `typ` header (RFC 8725 §3.11). Empty = no enforcement. + const String expected_typ; const bool allow_no_expiration; jwt::verifier verifier = jwt::verify(); }; @@ -117,13 +128,11 @@ class JwksJwtProcessor : public ITokenProcessor const String & groups_claim_, const String & expected_issuer_, const String & expected_audience_, + const String & expected_typ_, bool allow_no_expiration_, const String & claims_, size_t verifier_leeway_, - std::shared_ptr provider_) - : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), - claims(claims_), expected_issuer(expected_issuer_), expected_audience(expected_audience_), - allow_no_expiration(allow_no_expiration_), provider(provider_), verifier_leeway(verifier_leeway_) {} + std::shared_ptr provider_); explicit JwksJwtProcessor(const String & processor_name_, UInt64 token_cache_lifetime_, @@ -131,6 +140,7 @@ class JwksJwtProcessor : public ITokenProcessor const String & groups_claim_, const String & expected_issuer_, const String & expected_audience_, + const String & expected_typ_, bool allow_no_expiration_, const String & claims_, size_t verifier_leeway_, @@ -142,6 +152,7 @@ class JwksJwtProcessor : public ITokenProcessor groups_claim_, expected_issuer_, expected_audience_, + expected_typ_, allow_no_expiration_, claims_, verifier_leeway_, @@ -155,8 +166,13 @@ class JwksJwtProcessor : public ITokenProcessor const String claims; const String expected_issuer; const String expected_audience; + /// Required JWT `typ` header (RFC 8725 §3.11). Empty = no enforcement. + const String expected_typ; const bool allow_no_expiration; - mutable jwt::verifier verifier = jwt::verify(); + /// Verifier is built fresh per call inside `resolveAndValidate` (it depends + /// on the current JWT's `kid` -> JWKS-resolved key, which can rotate). A + /// local-per-call verifier also makes the function thread-safe so callers + /// can invoke it without holding the global `ExternalAuthenticators::mutex`. std::shared_ptr provider; const size_t verifier_leeway; }; @@ -169,10 +185,13 @@ class GoogleTokenProcessor : public ITokenProcessor GoogleTokenProcessor(const String & processor_name_, UInt64 token_cache_lifetime_, const String & username_claim_, - const String & groups_claim_) - : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_) {} + const String & groups_claim_, + const String & expected_audience_); bool resolveAndValidate(TokenCredentials & credentials) const override; + +private: + const String expected_audience; }; class AzureTokenProcessor : public ITokenProcessor @@ -181,10 +200,13 @@ class AzureTokenProcessor : public ITokenProcessor AzureTokenProcessor(const String & processor_name_, UInt64 token_cache_lifetime_, const String & username_claim_, - const String & groups_claim_) - : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_) {} + const String & groups_claim_, + const String & expected_audience_); bool resolveAndValidate(TokenCredentials & credentials) const override; + +private: + const String expected_audience; }; class OpenIdTokenProcessor : public ITokenProcessor @@ -214,7 +236,9 @@ class OpenIdTokenProcessor : public ITokenProcessor bool allow_no_expiration_, const String & openid_config_endpoint_, UInt64 verifier_leeway_, - UInt64 jwks_cache_lifetime_); + UInt64 jwks_cache_lifetime_, + const RemoteHostFilter & remote_host_filter_, + bool allow_http_discovery_urls_); bool resolveAndValidate(TokenCredentials & credentials) const override; private: diff --git a/src/Access/TokenProcessorsJWT.cpp b/src/Access/TokenProcessorsJWT.cpp index 182556f24b6a..21c3a641aeb6 100644 --- a/src/Access/TokenProcessorsJWT.cpp +++ b/src/Access/TokenProcessorsJWT.cpp @@ -3,6 +3,7 @@ #if USE_JWT_CPP #include #include +#include #include #include #include @@ -11,6 +12,11 @@ #include #include +/// Ensure picojson's parse-depth ceiling stays in line with H-28's claims-recursion bound. +/// If a future picojson bump removes or raises this, we'd silently re-expose stack-exhaustion. +static_assert(picojson::DEFAULT_MAX_DEPTHS <= 100, + "picojson::DEFAULT_MAX_DEPTHS bumped above 100; revisit JWT parse-depth safety"); + namespace DB { namespace ErrorCodes @@ -22,8 +28,18 @@ namespace ErrorCodes namespace { -bool check_claims(const picojson::value & claims, const picojson::value & payload, const String & path); -bool check_claims(const picojson::value::object & claims, const picojson::value::object & payload, const String & path) +/// Depth budget for `check_claims` recursion. +/// +/// `picojson::DEFAULT_MAX_DEPTHS = 100` rejects deeply-nested JSON at parse +/// time, which already prevents the stack-exhaustion variant. We carry a +/// smaller budget here as defense in depth: if a future contrib bump or +/// PICOJSON_USE_RVALUE change widens the parse-time limit, this bound still +/// caps recursion. 32 is well above any realistic operator-configured claim +/// shape but keeps the worst-case stack frame count modest. +constexpr int kMaxClaimsRecursionDepth = 32; + +bool check_claims(const picojson::value & claims, const picojson::value & payload, const String & path, int depth_remaining); +bool check_claims(const picojson::value::object & claims, const picojson::value::object & payload, const String & path, int depth_remaining) { for (const auto & it : claims) { @@ -33,7 +49,7 @@ bool check_claims(const picojson::value::object & claims, const picojson::value: LOG_TRACE(getLogger("TokenAuthentication"), "Key '{}.{}' not found in JWT payload", path, it.first); return false; } - if (!check_claims(it.second, payload_it->second, path + "." + it.first)) + if (!check_claims(it.second, payload_it->second, path + "." + it.first, depth_remaining)) { return false; } @@ -41,7 +57,7 @@ bool check_claims(const picojson::value::object & claims, const picojson::value: return true; } -bool check_claims(const picojson::value::array & claims, const picojson::value::array & payload, const String & path) +bool check_claims(const picojson::value::array & claims, const picojson::value::array & payload, const String & path, int depth_remaining) { if (claims.size() > payload.size()) { @@ -54,9 +70,18 @@ bool check_claims(const picojson::value::array & claims, const picojson::value:: const auto & claims_val = claims.at(claims_i); for (const auto & payload_val : payload) { - if (!check_claims(claims_val, payload_val, path + "[" + std::to_string(claims_i) + "]")) - continue; - found = true; + /// Break on the first match. Without this, the inner loop kept + /// scanning the rest of the payload even after finding a match, + /// turning the worst case into O(|claims_array| * |payload_array|) + /// even when matches are easy. Combined with `kMaxClaimsRecursionDepth`, + /// this caps CPU per `check_claims` call so a crafted token cannot + /// stall the global `ExternalAuthenticators::mutex` (H-19) for an + /// unbounded time. + if (check_claims(claims_val, payload_val, path + "[" + std::to_string(claims_i) + "]", depth_remaining)) + { + found = true; + break; + } } if (!found) { @@ -67,8 +92,18 @@ bool check_claims(const picojson::value::array & claims, const picojson::value:: return true; } -bool check_claims(const picojson::value & claims, const picojson::value & payload, const String & path) +bool check_claims(const picojson::value & claims, const picojson::value & payload, const String & path, int depth_remaining) { + if (depth_remaining <= 0) + { + LOG_ERROR(getLogger("TokenAuthentication"), + "JWT claims comparison exceeded the maximum recursion depth ({}) at '{}'; " + "rejecting to bound CPU under the auth mutex.", + kMaxClaimsRecursionDepth, path); + return false; + } + --depth_remaining; + if (claims.is()) { if (!payload.is()) @@ -76,7 +111,7 @@ bool check_claims(const picojson::value & claims, const picojson::value & payloa LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match key type 'array' in claims '{}'", path); return false; } - return check_claims(claims.get(), payload.get(), path); + return check_claims(claims.get(), payload.get(), path, depth_remaining); } if (claims.is()) { @@ -85,7 +120,7 @@ bool check_claims(const picojson::value & claims, const picojson::value & payloa LOG_TRACE(getLogger("TokenAuthentication"), "JWT payload does not match key type 'object' in claims '{}'", path); return false; } - return check_claims(claims.get(), payload.get(), path); + return check_claims(claims.get(), payload.get(), path, depth_remaining); } if (claims.is()) { @@ -159,7 +194,7 @@ bool check_claims(const String & claims, const picojson::value::object & payload throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Bad JWT claims: {}", errors); if (!json.is()) throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Bad JWT claims: is not an object"); - return check_claims(json.get(), payload, ""); + return check_claims(json.get(), payload, "", kMaxClaimsRecursionDepth); } std::string create_public_key_from_ec_components(const std::string & x, const std::string & y, int curve_nid) @@ -251,18 +286,90 @@ std::set parseGroupsFromJsonArray(picojson::array groups_array) } } +namespace +{ +/// Warn at construction time when a JWT processor is left without an +/// `expected_audience` (and/or `expected_issuer`) pin. Without `aud`, +/// the same token is replayable on any other deployment that trusts +/// the same IdP -- a token minted for cluster X authenticates on +/// cluster Y as well, because nothing ties a JWT to "this specific +/// relying party". Same shape as the Google/Azure warnings (H-10); +/// the warning is the only signal operators get since the verifier +/// just silently skips the check when the pin is empty. +void warnIfBindingsNotPinned(const String & processor_name, + const String & expected_issuer, + const String & expected_audience, + const String & expected_typ) +{ + if (expected_audience.empty()) + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: 'expected_audience' is not configured. Tokens issued by the same IdP for " + "any other relying party will be accepted here, including tokens minted for a " + "different ClickHouse deployment. Set 'expected_audience' to this deployment's " + "audience to prevent cross-cluster replay.", + processor_name); + if (expected_issuer.empty()) + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: 'expected_issuer' is not configured. The JWT 'iss' claim will not be enforced; " + "any token signed by a key in this processor's JWKS will be accepted regardless of " + "issuer. Set 'expected_issuer' to bind tokens to a specific IdP.", + processor_name); + if (expected_typ.empty()) + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: 'expected_typ' is not configured. The JWT 'typ' header will not be enforced; " + "ID tokens / refresh JWTs / internal-profile JWTs from the same IdP can be presented " + "as access tokens. RFC 8725 §3.11 / RFC 9068 recommend setting 'expected_typ' " + "(commonly 'at+jwt' for OAuth 2.0 access tokens) to prevent cross-token-class substitution.", + processor_name); +} + +/// Verify the JWT header `typ` matches the operator-configured pin. +/// Returns false (with a TRACE log) on mismatch; true if no pin or match. +/// Comparison is case-insensitive per RFC 7519 §5.1 ("JWT" and "jwt" both valid). +bool checkJwtTyp(const String & processor_name, + const String & expected_typ, + const jwt::decoded_jwt & decoded) +{ + if (expected_typ.empty()) + return true; + + if (!decoded.has_type()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Token has no 'typ' header but 'expected_typ' is configured to '{}'; rejecting.", + processor_name, expected_typ); + return false; + } + + const String actual_typ = decoded.get_type(); + if (Poco::toLower(actual_typ) != Poco::toLower(expected_typ)) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Token 'typ' header '{}' does not match 'expected_typ' '{}'; rejecting.", + processor_name, actual_typ, expected_typ); + return false; + } + + return true; +} +} + StaticKeyJwtProcessor::StaticKeyJwtProcessor(const String & processor_name_, UInt64 token_cache_lifetime_, const String & username_claim_, const String & groups_claim_, const String & expected_issuer_, const String & expected_audience_, + const String & expected_typ_, bool allow_no_expiration_, const StaticKeyJwtParams & params) : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), claims(params.claims), expected_issuer(expected_issuer_), expected_audience(expected_audience_), + expected_typ(expected_typ_), allow_no_expiration(allow_no_expiration_) { + warnIfBindingsNotPinned(processor_name, expected_issuer, expected_audience, expected_typ); + const String & algo = params.algo; const String & static_key = params.static_key; bool static_key_in_base64 = params.static_key_in_base64; @@ -340,6 +447,12 @@ StaticKeyJwtProcessor::StaticKeyJwtProcessor(const String & processor_name_, else throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "{}: Invalid token processor definition, unknown algorithm {}", processor_name, algo); + /// Apply clock-drift tolerance. jwt-cpp's default is 0, which rejects + /// tokens whose `exp`/`nbf` straddles even sub-second client/server skew. + /// Operators who set `verifier_leeway` in config get that value; + /// otherwise the parser-side default (60s) kicks in. + verifier = verifier.leeway(params.verifier_leeway); + if (!expected_issuer.empty()) verifier = verifier.with_issuer(expected_issuer); @@ -377,8 +490,16 @@ bool StaticKeyJwtProcessor::resolveAndValidate(TokenCredentials & credentials) c try { auto decoded_jwt = jwt::decode(credentials.getToken()); + + /// RFC 7515 §4.1.11: an unrecognized `crit` extension MUST cause rejection. + if (decoded_jwt.has_header_claim("crit")) + return false; + verifier.verify(decoded_jwt); + if (!checkJwtTyp(processor_name, expected_typ, decoded_jwt)) + return false; + if (!allow_no_expiration && !decoded_jwt.has_expires_at()) { LOG_TRACE(getLogger("TokenAuthentication"), "{}: Token missing 'exp' claim, rejecting", processor_name); @@ -394,7 +515,18 @@ bool StaticKeyJwtProcessor::resolveAndValidate(TokenCredentials & credentials) c return false; } - credentials.setUserName(decoded_jwt.get_payload_claim(username_claim).as_string()); + /// Reject empty `username_claim` value (M-27): a present-but-empty + /// claim would set user_name="" with `is_ready=false`, which the + /// cache would happily accept and collapse every empty-username + /// token into one dynamic user. + const auto user_name = decoded_jwt.get_payload_claim(username_claim).as_string(); + if (user_name.empty()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Resolved username from claim '{}' is empty; rejecting", processor_name, username_claim); + return false; + } + credentials.setUserName(user_name); if (decoded_jwt.has_payload_claim(groups_claim)) credentials.setGroups(parseGroupsFromJsonArray(decoded_jwt.get_payload_claim(groups_claim).as_array())); @@ -410,156 +542,246 @@ bool StaticKeyJwtProcessor::resolveAndValidate(TokenCredentials & credentials) c } } -bool JwksJwtProcessor::resolveAndValidate(TokenCredentials & credentials) const +JwksJwtProcessor::JwksJwtProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_issuer_, + const String & expected_audience_, + const String & expected_typ_, + bool allow_no_expiration_, + const String & claims_, + size_t verifier_leeway_, + std::shared_ptr provider_) + : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), + claims(claims_), expected_issuer(expected_issuer_), expected_audience(expected_audience_), + expected_typ(expected_typ_), + allow_no_expiration(allow_no_expiration_), provider(provider_), verifier_leeway(verifier_leeway_) { - auto decoded_jwt = jwt::decode(credentials.getToken()); - - if (!allow_no_expiration && !decoded_jwt.has_expires_at()) - { - LOG_TRACE(getLogger("TokenAuthentication"), "{}: Token missing 'exp' claim, rejecting", processor_name); - return false; - } + warnIfBindingsNotPinned(processor_name, expected_issuer, expected_audience, expected_typ); +} - if (!decoded_jwt.has_payload_claim(username_claim)) +bool JwksJwtProcessor::resolveAndValidate(TokenCredentials & credentials) const +{ + /// Whole-body try/catch mirrors `StaticKeyJwtProcessor::resolveAndValidate`. + /// + /// In the auto-discovery path inside `ExternalAuthenticators::checkTokenCredentials`, + /// processors are tried in turn and any exception out of one aborts the entire + /// loop -- later processors are never consulted. That is fine for "the token is + /// definitively bad", but the failures in this body are also raised when the + /// token simply belongs to a different processor (e.g. its `kid` is not in + /// THIS processor's JWKS, or its `alg` is one this JWKS does not know about, + /// or the JWK lacks the components this code path needs). In a multi-processor + /// deployment, raising in those cases denies a perfectly good token just + /// because a sibling processor happened to be iterated first. Convert every + /// such failure into a `false` return so the iterator can move on -- consistent + /// with how `StaticKeyJwtProcessor` already handles its own validation errors. + try { - LOG_ERROR(getLogger("TokenAuthentication"), "{}: Specified username_claim not found in token", processor_name); - return false; - } + auto decoded_jwt = jwt::decode(credentials.getToken()); - if (!decoded_jwt.has_key_id()) - { - LOG_ERROR(getLogger("TokenAuthentication"), "{}: 'kid' (key ID) claim not found in token", processor_name); - return false; - } + /// RFC 7515 §4.1.11: an unrecognized `crit` extension MUST cause rejection. + if (decoded_jwt.has_header_claim("crit")) + return false; - if (!provider->getJWKS().has_jwk(decoded_jwt.get_key_id())) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWKS error: no JWK found for JWT"); + if (!checkJwtTyp(processor_name, expected_typ, decoded_jwt)) + return false; - auto jwk = provider->getJWKS().get_jwk(decoded_jwt.get_key_id()); - auto username = decoded_jwt.get_payload_claim(username_claim).as_string(); + if (!allow_no_expiration && !decoded_jwt.has_expires_at()) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Token missing 'exp' claim, rejecting", processor_name); + return false; + } - if (!decoded_jwt.has_algorithm()) - { - LOG_ERROR(getLogger("TokenAuthentication"), "{}: Algorithm not specified in token", processor_name); - return false; - } - auto algo = Poco::toLower(decoded_jwt.get_algorithm()); + if (!decoded_jwt.has_payload_claim(username_claim)) + { + LOG_ERROR(getLogger("TokenAuthentication"), "{}: Specified username_claim not found in token", processor_name); + return false; + } + if (!decoded_jwt.has_key_id()) + { + LOG_ERROR(getLogger("TokenAuthentication"), "{}: 'kid' (key ID) claim not found in token", processor_name); + return false; + } - String public_key; + if (!provider->getJWKS().has_jwk(decoded_jwt.get_key_id())) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: No JWK matching token 'kid' {} in this processor's JWKS; rejecting (a sibling processor may still accept it).", + processor_name, quoteString(decoded_jwt.get_key_id())); + return false; + } - try - { - auto x5c = jwk.get_x5c_key_value(); + auto jwk = provider->getJWKS().get_jwk(decoded_jwt.get_key_id()); + auto username = decoded_jwt.get_payload_claim(username_claim).as_string(); - if (!x5c.empty()) + if (!decoded_jwt.has_algorithm()) { - LOG_TRACE(getLogger("TokenAuthentication"), "{}: Verifying {} with 'x5c' key", processor_name, username); - public_key = jwt::helper::convert_base64_der_to_pem(x5c); + LOG_ERROR(getLogger("TokenAuthentication"), "{}: Algorithm not specified in token", processor_name); + return false; } - } - catch (const jwt::error::claim_not_present_exception &) - { - LOG_TRACE(getLogger("TokenAuthentication"), "{}: x5c was not specified in JWK, will try RSA components", processor_name); - } - catch (const std::bad_cast &) - { - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: invalid claim value type found, claims must be strings"); - } + auto algo = Poco::toLower(decoded_jwt.get_algorithm()); - if (public_key.empty()) - { - const auto key_type = jwk.get_key_type(); - if (key_type == "EC") + + String public_key; + + try { - if (!(jwk.has_jwk_claim("x") && jwk.has_jwk_claim("y"))) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "{}: invalid JWK: missing 'x'/'y' claims for EC key type", processor_name); + auto x5c = jwk.get_x5c_key_value(); - int curve_nid = NID_undef; - std::optional expected_crv; - if (algo == "es256") + if (!x5c.empty()) { - curve_nid = NID_X9_62_prime256v1; - expected_crv = "P-256"; + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Verifying {} with 'x5c' key", processor_name, quoteString(username)); + public_key = jwt::helper::convert_base64_der_to_pem(x5c); } - else if (algo == "es384") + } + catch (const jwt::error::claim_not_present_exception &) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: x5c was not specified in JWK, will try RSA components", processor_name); + } + + if (public_key.empty()) + { + const auto key_type = jwk.get_key_type(); + if (key_type == "EC") { - curve_nid = NID_secp384r1; - expected_crv = "P-384"; + if (!(jwk.has_jwk_claim("x") && jwk.has_jwk_claim("y"))) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: JWK for token 'kid' is missing 'x'/'y' for EC key type; rejecting.", processor_name); + return false; + } + + int curve_nid = NID_undef; + std::optional expected_crv; + if (algo == "es256") + { + curve_nid = NID_X9_62_prime256v1; + expected_crv = "P-256"; + } + else if (algo == "es384") + { + curve_nid = NID_secp384r1; + expected_crv = "P-384"; + } + else if (algo == "es512") + { + curve_nid = NID_secp521r1; + expected_crv = "P-521"; + } + + if (curve_nid == NID_undef) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Unknown algorithm {} for EC key; rejecting.", processor_name, quoteString(algo)); + return false; + } + + if (jwk.has_jwk_claim("crv")) + { + const auto crv = jwk.get_jwk_claim("crv").as_string(); + if (expected_crv.has_value() && crv != expected_crv.value()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: JWK 'crv' {} does not match JWT algorithm {}; rejecting.", + processor_name, quoteString(crv), quoteString(algo)); + return false; + } + } + + LOG_TRACE(getLogger("TokenAuthentication"), "{}: `x5c` not present, verifying {} with EC components", processor_name, quoteString(username)); + const auto x = jwk.get_jwk_claim("x").as_string(); + const auto y = jwk.get_jwk_claim("y").as_string(); + public_key = create_public_key_from_ec_components(x, y, curve_nid); } - else if (algo == "es512") + else if (key_type == "RSA") { - curve_nid = NID_secp521r1; - expected_crv = "P-521"; + if (!(jwk.has_jwk_claim("n") && jwk.has_jwk_claim("e"))) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: JWK is missing 'n'/'e' for RSA key type; rejecting.", processor_name); + return false; + } + LOG_TRACE(getLogger("TokenAuthentication"), "{}: `issuer` or `x5c` not present, verifying {} with RSA components", processor_name, quoteString(username)); + const auto modulus = jwk.get_jwk_claim("n").as_string(); + const auto exponent = jwk.get_jwk_claim("e").as_string(); + public_key = jwt::helper::create_public_key_from_rsa_components(modulus, exponent); } - - if (curve_nid == NID_undef) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: unknown algorithm {}", algo); - - if (jwk.has_jwk_claim("crv")) + else { - const auto crv = jwk.get_jwk_claim("crv").as_string(); - if (expected_crv.has_value() && crv != expected_crv.value()) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: `crv` in JWK does not match JWT algorithm"); + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Unsupported JWK key type {}; rejecting.", processor_name, quoteString(key_type)); + return false; } - - LOG_TRACE(getLogger("TokenAuthentication"), "{}: `x5c` not present, verifying {} with EC components", processor_name, username); - const auto x = jwk.get_jwk_claim("x").as_string(); - const auto y = jwk.get_jwk_claim("y").as_string(); - public_key = create_public_key_from_ec_components(x, y, curve_nid); } - else if (key_type == "RSA") + + if (jwk.has_algorithm() && Poco::toLower(jwk.get_algorithm()) != algo) { - if (!(jwk.has_jwk_claim("n") && jwk.has_jwk_claim("e"))) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "{}: invalid JWK: missing 'n'/'e' claims for RSA key type", processor_name); - LOG_TRACE(getLogger("TokenAuthentication"), "{}: `issuer` or `x5c` not present, verifying {} with RSA components", processor_name, username); - const auto modulus = jwk.get_jwk_claim("n").as_string(); - const auto exponent = jwk.get_jwk_claim("e").as_string(); - public_key = jwt::helper::create_public_key_from_rsa_components(modulus, exponent); + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: JWK 'alg' does not match JWT algorithm {}; rejecting.", processor_name, quoteString(algo)); + return false; } - else - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "{}: invalid JWK key type '{}'", processor_name, key_type); - } - if (jwk.has_algorithm() && Poco::toLower(jwk.get_algorithm()) != algo) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT validation error: `alg` in JWK does not match the algorithm used in JWT"); - - if (algo == "rs256") - verifier = verifier.allow_algorithm(jwt::algorithm::rs256(public_key, "", "", "")); - else if (algo == "rs384") - verifier = verifier.allow_algorithm(jwt::algorithm::rs384(public_key, "", "", "")); - else if (algo == "rs512") - verifier = verifier.allow_algorithm(jwt::algorithm::rs512(public_key, "", "", "")); - else if (algo == "es256") - verifier = verifier.allow_algorithm(jwt::algorithm::es256(public_key, "", "", "")); - else if (algo == "es384") - verifier = verifier.allow_algorithm(jwt::algorithm::es384(public_key, "", "", "")); - else if (algo == "es512") - verifier = verifier.allow_algorithm(jwt::algorithm::es512(public_key, "", "", "")); - else - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "JWT cannot be validated: unknown algorithm {}", algo); + /// Build the verifier locally (was a `mutable` member; making it local + /// makes `resolveAndValidate` thread-safe so the caller can drop the + /// global auth mutex around the expensive crypto verify). + auto verifier = jwt::verify(); + if (algo == "rs256") + verifier = verifier.allow_algorithm(jwt::algorithm::rs256(public_key, "", "", "")); + else if (algo == "rs384") + verifier = verifier.allow_algorithm(jwt::algorithm::rs384(public_key, "", "", "")); + else if (algo == "rs512") + verifier = verifier.allow_algorithm(jwt::algorithm::rs512(public_key, "", "", "")); + else if (algo == "es256") + verifier = verifier.allow_algorithm(jwt::algorithm::es256(public_key, "", "", "")); + else if (algo == "es384") + verifier = verifier.allow_algorithm(jwt::algorithm::es384(public_key, "", "", "")); + else if (algo == "es512") + verifier = verifier.allow_algorithm(jwt::algorithm::es512(public_key, "", "", "")); + else + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Unknown JWT algorithm {}; rejecting.", processor_name, quoteString(algo)); + return false; + } - verifier = verifier.leeway(verifier_leeway); + verifier = verifier.leeway(verifier_leeway); - if (!expected_issuer.empty()) - verifier = verifier.with_issuer(expected_issuer); + if (!expected_issuer.empty()) + verifier = verifier.with_issuer(expected_issuer); - if (!expected_audience.empty()) - verifier = verifier.with_audience(expected_audience); + if (!expected_audience.empty()) + verifier = verifier.with_audience(expected_audience); - verifier.verify(decoded_jwt); + verifier.verify(decoded_jwt); - if (!claims.empty() && !check_claims(claims, decoded_jwt.get_payload_json())) - return false; + if (!claims.empty() && !check_claims(claims, decoded_jwt.get_payload_json())) + return false; - credentials.setUserName(decoded_jwt.get_payload_claim(username_claim).as_string()); + /// Reject empty resolved username (M-27); see the + /// `StaticKeyJwtProcessor` peer for rationale. + const auto user_name = decoded_jwt.get_payload_claim(username_claim).as_string(); + if (user_name.empty()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Resolved username from claim '{}' is empty; rejecting", processor_name, username_claim); + return false; + } + credentials.setUserName(user_name); - if (decoded_jwt.has_payload_claim(groups_claim)) - credentials.setGroups(parseGroupsFromJsonArray(decoded_jwt.get_payload_claim(groups_claim).as_array())); - else - LOG_TRACE(getLogger("TokenAuthentication"), "{}: Specified groups_claim {} not found in token, no external roles will be mapped", processor_name, groups_claim); + if (decoded_jwt.has_payload_claim(groups_claim)) + credentials.setGroups(parseGroupsFromJsonArray(decoded_jwt.get_payload_claim(groups_claim).as_array())); + else + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Specified groups_claim {} not found in token, no external roles will be mapped", processor_name, groups_claim); - return true; + return true; + } + catch (const std::exception & ex) + { + LOG_TRACE(getLogger("TokenAuthentication"), "{}: Failed to validate JWT: {}", processor_name, ex.what()); + return false; + } } } diff --git a/src/Access/TokenProcessorsOpaque.cpp b/src/Access/TokenProcessorsOpaque.cpp index b56113061fcb..5cd68e60276f 100644 --- a/src/Access/TokenProcessorsOpaque.cpp +++ b/src/Access/TokenProcessorsOpaque.cpp @@ -1,7 +1,9 @@ #include "TokenProcessors.h" #if USE_JWT_CPP +#include #include +#include #include #include #include @@ -56,6 +58,26 @@ namespace return value.get(); } + /// Bound every IdP-bound HTTP call (OIDC discovery, userinfo, introspection) + /// to a known limit. Without this, Poco's default `HTTPSession` timeout of + /// 60 seconds applies, and because `ExternalAuthenticators::mutex` is held + /// for the entire duration of `checkTokenCredentials` -- including the + /// outbound call this function makes -- a single slow or hung IdP would + /// stall the whole auth subsystem (LDAP, Kerberos, HTTP basic, every other + /// token auth) for up to a full minute per request. + /// + /// 10 seconds is a deliberately conservative cap: well above any healthy + /// IdP latency, well below the default. Operators who need a different + /// value would have to expose this via per-processor config; for now it + /// is hard-coded so deployments inherit the bounded behavior automatically. + constexpr int kIdpHttpTimeoutSeconds = 10; + + void applyIdpSessionTimeouts(Poco::Net::HTTPClientSession & session) + { + const Poco::Timespan timeout(kIdpHttpTimeoutSeconds, 0); + session.setTimeout(timeout, timeout, timeout); + } + picojson::object getObjectFromURI(const Poco::URI & uri, const String & token = "") { Poco::Net::HTTPResponse response; @@ -67,12 +89,14 @@ namespace if (uri.getScheme() == "https") { Poco::Net::HTTPSClientSession session(uri.getHost(), uri.getPort()); + applyIdpSessionTimeouts(session); session.sendRequest(request); Poco::StreamCopier::copyStream(session.receiveResponse(response), responseString); } else { Poco::Net::HTTPClientSession session(uri.getHost(), uri.getPort()); + applyIdpSessionTimeouts(session); session.sendRequest(request); Poco::StreamCopier::copyStream(session.receiveResponse(response), responseString); } @@ -93,6 +117,28 @@ namespace } } +GoogleTokenProcessor::GoogleTokenProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_audience_) + : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_) + , expected_audience(expected_audience_) +{ + /// Without an audience pin, this processor accepts any Google access token + /// that authenticates the user against Google -- including tokens minted for + /// completely unrelated OAuth clients (a classic confused-deputy scenario). + /// Operators who actually want token-based auth almost always want it bound + /// to their own client_id; surface this gap loudly at startup so it can't + /// stay silently un-enforced. + if (expected_audience.empty()) + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: 'expected_audience' is not configured for Google token processor. " + "Any valid Google access token (regardless of issuing client) will be accepted; " + "set 'expected_audience' to the OAuth client_id this processor should accept.", + processor_name); +} + bool GoogleTokenProcessor::resolveAndValidate(TokenCredentials & credentials) const { const String & token = credentials.getToken(); @@ -110,9 +156,41 @@ bool GoogleTokenProcessor::resolveAndValidate(TokenCredentials & credentials) co String user_name = user_info[username_claim]; + auto token_info = getObjectFromURI(Poco::URI("https://www.googleapis.com/oauth2/v3/tokeninfo"), token); + + /// Audience binding (H-10): the Google /tokeninfo endpoint authoritatively + /// reports the OAuth client_id the access token was issued for in its 'aud' + /// field. Without this check, a token minted for any other Google OAuth + /// client (the user's mobile app, a third-party tool) would authenticate + /// here too -- because Google /userinfo will happily honor any valid token. + /// Refusing tokens whose 'aud' does not match the configured client pin is + /// what makes the binding strict. + if (!expected_audience.empty()) + { + const auto aud = getValueByKey(token_info, "aud").value_or(""); + if (aud != expected_audience) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Google access token audience '{}' does not match configured 'expected_audience' '{}'; rejecting", + processor_name, aud, expected_audience); + return false; + } + } + + /// Reject empty resolved username (M-27). `TokenCredentials::setUserName` + /// leaves `is_ready=false` for empty input but the function would still + /// return true; the cache would then accept an entry under user_name "", + /// collapsing every empty-username token across all IdPs into the same + /// dynamic ClickHouse user. + if (user_name.empty()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Resolved username from token is empty; rejecting", processor_name); + return false; + } + credentials.setUserName(user_name); - auto token_info = getObjectFromURI(Poco::URI("https://www.googleapis.com/oauth2/v3/tokeninfo"), token); if (token_info.contains("exp")) credentials.setExpiresAt(std::chrono::system_clock::from_time_t(static_cast(getValueByKey(token_info, "exp").value()))); @@ -144,20 +222,43 @@ bool GoogleTokenProcessor::resolveAndValidate(TokenCredentials & credentials) co } auto group_data = group.get(); - String group_name = getValueByKey(group_data["groupKey"].get(), "id").value_or(""); + + /// Guard against a missing or non-object `groupKey`. Without + /// these checks `group_data["groupKey"].get()` + /// would auto-insert a null `picojson::value` (because picojson + /// objects are `std::map` and `[]` + /// default-constructs on a missing key) and then throw + /// `std::bad_cast` on the `.get()` call -- + /// which the `catch (const Exception &)` below does NOT + /// catch (`std::bad_cast` is `std::exception`-derived, not + /// `DB::Exception`-derived). The uncaught exception used to + /// propagate out of `resolveAndValidate` and abort auth. + auto group_key_it = group_data.find("groupKey"); + if (group_key_it == group_data.end() || !group_key_it->second.is()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Group entry without a 'groupKey' object; skipping", processor_name); + continue; + } + + String group_name = getValueByKey(group_key_it->second.get(), "id").value_or(""); if (!group_name.empty()) { external_groups_names.insert(group_name); LOG_TRACE(getLogger("TokenAuthentication"), - "{}: User {}: new external group {}", processor_name, user_name, group_name); + "{}: User {}: new external group {}", + processor_name, quoteString(user_name), quoteString(group_name)); } } credentials.setGroups(external_groups_names); } - catch (const Exception & e) + catch (const std::exception & e) { - /// Could not get groups info. Log it and skip it. + /// Defense in depth: catch `std::exception` (not just `DB::Exception`) + /// so picojson's `std::bad_cast` and `std::runtime_error` -- and any + /// other future deviation -- degrade to "no roles mapped" rather + /// than aborting the whole authentication. LOG_TRACE(getLogger("TokenAuthentication"), "{}: Failed to get Google groups, no external roles will be mapped. reason: {}", processor_name, e.what()); return true; @@ -167,6 +268,27 @@ bool GoogleTokenProcessor::resolveAndValidate(TokenCredentials & credentials) co return true; } +AzureTokenProcessor::AzureTokenProcessor(const String & processor_name_, + UInt64 token_cache_lifetime_, + const String & username_claim_, + const String & groups_claim_, + const String & expected_audience_) + : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_) + , expected_audience(expected_audience_) +{ + /// Without an audience pin, this processor accepts any Azure AD access token + /// that Microsoft Graph happens to honor -- which includes tokens minted for + /// other applications inside the same tenant. Surface the gap so operators + /// can lock the processor to their own application's audience. + if (expected_audience.empty()) + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: 'expected_audience' is not configured for Azure token processor. " + "Any Azure access token Microsoft Graph accepts will authenticate here, " + "regardless of which application it was issued for; set 'expected_audience' " + "to the audience this processor should accept.", + processor_name); +} + bool AzureTokenProcessor::resolveAndValidate(TokenCredentials & credentials) const { /// Token is a JWT in this case, but we cannot directly verify it against Azure AD JWKS. @@ -177,22 +299,66 @@ bool AzureTokenProcessor::resolveAndValidate(TokenCredentials & credentials) con const String & token = credentials.getToken(); + String username; try { picojson::object user_info_json = getObjectFromURI(Poco::URI("https://graph.microsoft.com/oidc/userinfo"), token); - String username = getValueByKey(user_info_json, username_claim).value(); - - if (!username.empty()) - credentials.setUserName(username); - else - LOG_TRACE(getLogger("TokenAuthentication"), "{}: Failed to get username with token", processor_name); - + username = getValueByKey(user_info_json, username_claim).value(); } catch (...) { return false; } + /// Audience binding (H-10): only after Microsoft Graph has accepted the + /// token (proving it is a real, signed Azure AD token) do we trust its + /// claims. We then enforce that the 'aud' claim matches the operator-pinned + /// audience -- without this check, *any* token issued for *any* application + /// in the tenant that has Graph access would authenticate. With the check, + /// tokens minted for other applications are rejected even though Graph + /// itself would honor them. + if (!expected_audience.empty()) + { + try + { + auto decoded_token = jwt::decode(token); + if (!decoded_token.has_audience()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Azure access token has no 'aud' claim; cannot enforce 'expected_audience' '{}'; rejecting", + processor_name, expected_audience); + return false; + } + const auto auds = decoded_token.get_audience(); + if (auds.find(expected_audience) == auds.end()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Azure access token audience does not contain configured 'expected_audience' '{}'; rejecting", + processor_name, expected_audience); + return false; + } + } + catch (const std::exception & e) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Failed to decode Azure access token while enforcing 'expected_audience': {}; rejecting", + processor_name, e.what()); + return false; + } + } + + /// Reject empty resolved username (M-27). Previously this branch only + /// logged the gap and proceeded to return true at the end of the function, + /// which would cache an entry under user_name "" and collapse every + /// empty-username token across all IdPs into the same dynamic user. + if (username.empty()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Resolved username from token is empty; rejecting", processor_name); + return false; + } + credentials.setUserName(username); + try { credentials.setExpiresAt(jwt::decode(token).get_expires_at()); @@ -229,20 +395,43 @@ bool AzureTokenProcessor::resolveAndValidate(TokenCredentials & credentials) con } auto group_data = group.get(); - if (!group_data.contains("displayName")) + + /// Use the immutable `id` (GUID), not the mutable `displayName`, + /// for role-mapping. `displayName` can be renamed by an Azure AD + /// admin -- and on rename, every ClickHouse role-mapping regex + /// that referenced the old name silently stops matching, while + /// every regex that matches the new name silently starts. Two + /// distinct AAD groups can also share a display name and merge + /// into a single ClickHouse group; deleting and recreating a + /// group with the same name silently inherits the old grants. + /// `id` is a GUID assigned by AAD at group creation; it never + /// changes, never collides, and is never reused. + /// + /// Operators upgrading from a build that emitted `displayName` + /// must update their `roles_filter` / `roles_transform` regex + /// to reference the GUIDs Azure AD assigns to the groups they + /// want to map. The role identifier is not human-friendly -- + /// that is the cost of using an immutable handle. + if (!group_data.contains("id")) continue; - String group_name = getValueByKey(group_data, "displayName").value_or(""); + String group_name = getValueByKey(group_data, "id").value_or(""); if (!group_name.empty()) { external_groups_names.insert(group_name); - LOG_TRACE(getLogger("TokenAuthentication"), "{}: User {}: new external group {}", processor_name, credentials.getUserName(), group_name); + String display_name = getValueByKey(group_data, "displayName").value_or(""); + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: User {}: new external group id={} (displayName={})", + processor_name, quoteString(credentials.getUserName()), + quoteString(group_name), quoteString(display_name)); } } } - catch (const Exception & e) + catch (const std::exception & e) { - /// Could not get groups info. Log it and skip it. + /// Defense in depth (M-10 sibling): broadened to `std::exception` so a + /// picojson `std::bad_cast` from a malformed response degrades to "no + /// roles mapped" rather than aborting the whole authentication. LOG_TRACE(getLogger("TokenAuthentication"), "{}: Failed to get Azure groups, no external roles will be mapped. reason: {}", processor_name, e.what()); return true; @@ -267,15 +456,35 @@ OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), userinfo_endpoint(userinfo_endpoint_), token_introspection_endpoint(token_introspection_endpoint_) { + /// Without `jwks_uri`, no `jwt_validator` is created and so `expected_issuer` + /// / `expected_audience` cannot be enforced anywhere on the validation path + /// -- the runtime falls straight to the userinfo endpoint, which only + /// answers "the IdP describes this user", not "the token's `iss`/`aud` + /// match what this deployment pinned". Refuse to load with that combination + /// rather than silently dropping the operator's bindings. + if (jwks_uri_.empty() && (!expected_issuer_.empty() || !expected_audience_.empty())) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "{}: 'expected_issuer' / 'expected_audience' are configured but no 'jwks_uri' is provided. " + "These bindings can only be enforced via local JWT validation against a JWKS; the userinfo " + "fallback alone cannot enforce them. Configure 'jwks_uri' (or, if you intentionally want " + "userinfo-only validation, clear 'expected_issuer'/'expected_audience').", + processor_name); + if (!jwks_uri_.empty()) { LOG_TRACE(getLogger("TokenAuthentication"), "{}: JWKS URI set, local JWT processing will be attempted", processor_name_); + /// `expected_typ` is left empty here: OpenID's JWT-fastpath inherits no + /// `typ` enforcement from the operator config (the parser doesn't surface + /// `expected_typ` for the `openid` processor type yet). Operators who + /// want strict `typ` enforcement should use `jwt_static_jwks` / + /// `jwt_dynamic_jwks` directly instead of `openid`. jwt_validator.emplace(processor_name_ + "jwks_val", token_cache_lifetime_, username_claim_, groups_claim_, expected_issuer_, expected_audience_, + /*expected_typ=*/"", allow_no_expiration_, "", verifier_leeway_, @@ -293,26 +502,169 @@ OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, bool allow_no_expiration_, const String & openid_config_endpoint_, UInt64 verifier_leeway_, - UInt64 jwks_cache_lifetime_) + UInt64 jwks_cache_lifetime_, + const RemoteHostFilter & remote_host_filter_, + bool allow_http_discovery_urls_) : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_) { + /// Defense in depth: the discovery endpoint itself was already validated by + /// the parser, but re-check here in case this constructor is reached via a + /// future code path that bypasses parseTokenProcessor. + try + { + remote_host_filter_.checkURL(Poco::URI(openid_config_endpoint_)); + } + catch (const Exception & e) + { + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "{}: 'configuration_endpoint' URL '{}' is not in : {}", + processor_name, openid_config_endpoint_, e.message()); + } + const picojson::object openid_config = getObjectFromURI(Poco::URI(openid_config_endpoint_)); - if (!openid_config.contains("userinfo_endpoint") || !openid_config.contains("introspection_endpoint")) - throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "{}: Cannot extract userinfo_endpoint or introspection_endpoint from OIDC configuration, consider manual configuration.", processor_name); + /// Only `userinfo_endpoint` is mandatory: it backs the runtime userinfo + /// fallback (and is the sole user-info source when no JWKS is configured). + /// `introspection_endpoint` is currently unused at runtime -- it's plumbed + /// for a future RFC 7662 introspection feature -- so a discovery document + /// that omits it should not block processor construction. + if (!openid_config.contains("userinfo_endpoint")) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "{}: Cannot extract userinfo_endpoint from OIDC configuration at '{}'; consider manual configuration.", + processor_name, openid_config_endpoint_); + + /// The discovery document is untrusted: even with the issuer-anchor check + /// below (H-08), a poisoned or misdirected response can still try to point + /// trust-chain endpoints (jwks_uri, userinfo_endpoint, introspection_endpoint) + /// at hosts the operator never approved. Refuse to load the processor when + /// any returned URL is outside ; this prevents the + /// server from reaching out to attacker-controlled endpoints during token + /// validation. + /// + /// Additionally, refuse non-HTTPS schemes on discovery-returned URLs. + /// Without this, an attacker who can MITM the discovery fetch (operator + /// typed an `http://` configuration_endpoint, or any TLS interception path) + /// can substitute a discovery doc whose `jwks_uri` is `http://169.254.169.254/...` + /// (cloud metadata), `http://127.0.0.1:...` (local admin ports), or + /// `http://kubernetes.default.svc:...` -- and the server issues a one-shot + /// HTTP GET under its own process identity. `` is + /// the primary defense, but not every deployment configures it; an + /// HTTPS-only rule on returned URLs is a cheap, orthogonal layer that + /// blocks all three of those targets independently. Operators who run an + /// IdP over plain HTTP intentionally can wire the trust chain manually + /// (`userinfo_endpoint`/`token_introspection_endpoint`/`jwks_uri` directly) + /// instead of relying on discovery, or opt out of this check by setting + /// `true` on the + /// processor (false by default; still applies). + if (allow_http_discovery_urls_) + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: 'allow_http_discovery_urls' is enabled; HTTPS check on URLs returned by OIDC discovery " + "is suppressed. Make sure restricts which targets the server may " + "be redirected to via a poisoned discovery document.", + processor_name); + auto require_allowed_discovery_url = [&](const std::string & url, const char * field) + { + Poco::URI parsed_uri(url); + if (!allow_http_discovery_urls_ && parsed_uri.getScheme() != "https") + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "{}: OIDC discovery at '{}' returned non-HTTPS '{}' URL '{}' (scheme '{}'). " + "The trust-chain URLs from discovery must use HTTPS so a poisoned discovery " + "document cannot redirect token validation through internal endpoints " + "(cloud metadata, localhost, in-cluster service IPs). If the IdP genuinely " + "runs over plain HTTP, either configure the trust chain manually instead of " + "using 'configuration_endpoint', or set " + "'true' on this processor.", + processor_name, openid_config_endpoint_, field, url, parsed_uri.getScheme()); + + try + { + remote_host_filter_.checkURL(parsed_uri); + } + catch (const Exception & e) + { + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "{}: OIDC discovery at '{}' returned '{}' URL '{}' which is not in " + ": {}", + processor_name, openid_config_endpoint_, field, url, e.message()); + } + }; + + require_allowed_discovery_url(getValueByKey(openid_config, "userinfo_endpoint").value(), "userinfo_endpoint"); + if (openid_config.contains("introspection_endpoint")) + require_allowed_discovery_url(getValueByKey(openid_config, "introspection_endpoint").value(), "introspection_endpoint"); + if (openid_config.contains("jwks_uri")) + require_allowed_discovery_url(getValueByKey(openid_config, "jwks_uri").value(), "jwks_uri"); + + /// Anchor the discovery document to a known issuer when one is configured. + /// + /// OIDC Discovery 1.0 §4.3 / RFC 8414 §3.3 require the metadata's "issuer" + /// to be tied to the URL used to fetch it. Without this anchor a poisoned + /// or misdirected discovery response can redirect the entire trust chain + /// (jwks_uri, userinfo_endpoint, introspection_endpoint) to URLs the + /// operator never approved -- and because the embedded JWT verifier only + /// enforces the `iss` claim when expected_issuer is non-empty, JWTs signed + /// by the attacker's keys would be silently accepted at runtime. + /// + /// Policy: + /// - expected_issuer configured => discovery's "issuer" MUST match it + /// (refuse to construct on mismatch or + /// absence). Verifier is pinned to it. + /// - expected_issuer empty => log a warning so the gap is visible + /// in operator logs, then proceed with + /// the historical (lax) behavior. The + /// verifier is left without an issuer + /// pin to preserve compatibility. + const auto issuer_from_discovery = getValueByKey(openid_config, "issuer").value_or(""); + + if (!expected_issuer_.empty()) + { + if (issuer_from_discovery.empty()) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "{}: OIDC discovery document at '{}' does not advertise an 'issuer'; " + "cannot verify it against the configured 'expected_issuer' '{}'.", + processor_name, openid_config_endpoint_, expected_issuer_); + + if (issuer_from_discovery != expected_issuer_) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "{}: OIDC discovery 'issuer' mismatch: configured 'expected_issuer' is '{}' " + "but discovery document at '{}' returned issuer '{}'. Refusing to load the " + "processor to avoid trusting metadata that belongs to a different issuer.", + processor_name, expected_issuer_, openid_config_endpoint_, issuer_from_discovery); + } + else + { + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: 'expected_issuer' is not configured for OIDC discovery at '{}'. " + "The JWT 'iss' claim will NOT be enforced.", processor_name, openid_config_endpoint_); + } userinfo_endpoint = Poco::URI(getValueByKey(openid_config, "userinfo_endpoint").value()); - token_introspection_endpoint = Poco::URI(getValueByKey(openid_config, "introspection_endpoint").value()); + if (openid_config.contains("introspection_endpoint")) + token_introspection_endpoint = Poco::URI(getValueByKey(openid_config, "introspection_endpoint").value()); + + /// See manual-constructor comment: `expected_issuer` / `expected_audience` + /// can only be enforced via local JWT validation. If the discovery document + /// does not advertise a `jwks_uri`, no `jwt_validator` will be created and + /// the userinfo fallback alone cannot enforce these bindings. Refuse the + /// configuration rather than silently dropping them. + if (!openid_config.contains("jwks_uri") && (!expected_issuer_.empty() || !expected_audience_.empty())) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "{}: OIDC discovery at '{}' did not advertise a 'jwks_uri', but 'expected_issuer' / " + "'expected_audience' are configured. These bindings can only be enforced via local JWT " + "validation against a JWKS; userinfo cannot enforce them. Refusing to load.", + processor_name, openid_config_endpoint_); if (openid_config.contains("jwks_uri")) { LOG_TRACE(getLogger("TokenAuthentication"), "{}: JWKS URI set, local JWT processing will be attempted", processor_name_); + /// `expected_typ` empty for the same reason as the manual constructor. jwt_validator.emplace(processor_name_ + "jwks_val", token_cache_lifetime_, username_claim_, groups_claim_, expected_issuer_, expected_audience_, + /*expected_typ=*/"", allow_no_expiration_, "", verifier_leeway_, @@ -327,8 +679,34 @@ bool OpenIdTokenProcessor::resolveAndValidate(TokenCredentials & credentials) co String username; picojson::object user_info_json; - if (jwt_validator.has_value() && jwt_validator.value().resolveAndValidate(credentials)) + if (jwt_validator.has_value()) { + /// When a `jwt_validator` is configured, it owns the operator's + /// `expected_issuer` / `expected_audience` / `allow_no_expiration` + /// bindings. If it rejects the token we MUST NOT fall back to the + /// userinfo endpoint: userinfo only confirms "the IdP describes this + /// user", it has no notion of the operator-pinned audience or issuer + /// and does not enforce the local expiration policy. Falling back here + /// would silently bypass exactly the bindings the operator opted into, + /// e.g. a JWT with the wrong `aud` would still authenticate because + /// the IdP's own userinfo accepts it for itself. + if (!jwt_validator.value().resolveAndValidate(credentials)) + { + /// DEBUG, not TRACE: this is the binding-rejection path. Operators + /// running with DEBUG enabled will see a clear signal that the + /// JWT-fastpath (which enforces `expected_issuer` / `expected_audience` + /// / `allow_no_expiration`) rejected a token. The auth failure itself + /// is also visible to the client, but the log line tells the operator + /// *why* it was rejected on the local side. + LOG_DEBUG(getLogger("TokenAuthentication"), + "{}: Local JWT validation rejected the token. Refusing to fall back to " + "userinfo: the operator-configured bindings (expected_issuer / expected_audience / " + "allow_no_expiration) cannot be enforced by userinfo, and a fallback would silently " + "bypass them.", + processor_name); + return false; + } + try { auto decoded_token = jwt::decode(token); @@ -341,11 +719,32 @@ bool OpenIdTokenProcessor::resolveAndValidate(TokenCredentials & credentials) co } catch (const std::exception & ex) { - LOG_TRACE(getLogger("TokenAuthentication"), "{}: Failed to process token as JWT: {}", processor_name, ex.what()); + /// WARNING: validation passed but extracting the payload locally + /// failed -- a genuinely rare condition (the same token was just + /// successfully verified, so its bytes ARE a valid JWT). The + /// processor is about to fall back to userinfo for username + /// extraction. Bindings were already enforced by `jwt_validator`, + /// so this fallback is safe -- but the underlying mismatch + /// (decode failure on a verified token) usually means an IdP + /// behavioral change, a clock skew, or a payload-format drift, + /// and operators should know about it loudly. + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: JWT validation succeeded but payload extraction failed: {}. " + "Falling back to userinfo for username; the operator-configured " + "bindings have ALREADY been enforced by JWT validation, so this " + "fallback is safe -- but the decode failure indicates an unexpected " + "JWT shape from the IdP.", + processor_name, ex.what()); } } - /// If username or user info is empty -- local validation failed, trying introspection via provider + /// Userinfo path: only reachable when no `jwt_validator` is configured + /// (the constructor guarantees that combination is incompatible with any + /// `expected_issuer` / `expected_audience` pin), or when local JWT validation + /// passed but extracting the username/payload from the decoded token failed + /// for an unrelated reason -- in which case the bindings have already been + /// enforced by `jwt_validator` and userinfo is just being asked for the user + /// identity. if (username.empty() || user_info_json.empty()) { try diff --git a/src/Access/TokenProcessorsParse.cpp b/src/Access/TokenProcessorsParse.cpp index fa83c5fa6a34..62602b1efd81 100644 --- a/src/Access/TokenProcessorsParse.cpp +++ b/src/Access/TokenProcessorsParse.cpp @@ -1,7 +1,9 @@ #include "TokenProcessors.h" +#include #include #include +#include namespace DB { @@ -27,44 +29,109 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( auto groups_claim = config.getString(prefix + ".groups_claim", "groups"); auto expected_issuer = config.getString(prefix + ".expected_issuer", ""); auto expected_audience = config.getString(prefix + ".expected_audience", ""); + /// `expected_typ` is the JWT header `typ` to require. RFC 8725 §3.11 and + /// RFC 9068 recommend type discrimination to prevent cross-token-class + /// substitution -- e.g. accepting an ID token (intended for client login) + /// where an access token (intended for resource access) is expected. + /// Common values: "at+jwt" (RFC 9068 access tokens), "JWT" (generic). + /// Empty (the default) means no `typ` enforcement; the JWT processors warn + /// at startup when this is left empty so the gap is visible. + auto expected_typ = config.getString(prefix + ".expected_typ", ""); auto allow_no_expiration = config.getBool(prefix + ".allow_no_expiration", false); + /// Constrain every OIDC/JWT trust-chain fetch (discovery, userinfo, + /// introspection, JWKS) to the operator-approved . + /// + /// Without this gate, any URL the operator pastes into the processor config + /// -- and any URL returned by an OIDC discovery document -- is fetched + /// blindly. A misconfigured or attacker-influenced discovery response can + /// then redirect token validation through hosts the operator never approved. + /// + /// We pre-validate every URL the operator typed into the processor config + /// here, at parse time, so a bad config fails fast at startup rather than + /// at first authentication. Discovery-derived URLs (jwks_uri etc.) are + /// validated separately, after the discovery fetch, inside the processor. + /// + /// If is absent the filter degrades to its + /// historical permissive behavior: this matches every other ClickHouse + /// outbound URL site and avoids breaking existing deployments. + RemoteHostFilter remote_host_filter; + remote_host_filter.setValuesFromConfig(config); + + auto require_allowed_url = [&](const String & raw_url, const char * param_name) + { + if (raw_url.empty()) + return; + try + { + remote_host_filter.checkURL(Poco::URI(raw_url)); + } + catch (const Exception & e) + { + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "Token processor '{}': '{}' URL '{}' is not in : {}", + processor_name, param_name, raw_url, e.message()); + } + }; + if (provider_type == "google") { - return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim); + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_audience); } else if (provider_type == "azure") { - return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim); + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_audience); } else if (provider_type == "openid") { auto verifier_leeway = config.getUInt64(prefix + ".verifier_leeway", 60); auto jwks_cache_lifetime = config.getUInt64(prefix + ".jwks_cache_lifetime", 3600); + /// `token_introspection_endpoint` is currently unused at runtime: the + /// processor relies on JWT-local validation (when JWKS is configured) + /// or on userinfo, never on RFC 7662 introspection. Don't require it + /// for "locally configured" mode -- forcing operators to set a value + /// that does nothing is a footgun. If introspection is wired up later, + /// the field is already plumbed and can become required at that point. bool externally_configured = config.hasProperty(prefix + ".configuration_endpoint") && !config.hasProperty(prefix + ".jwks_uri"); - bool locally_configured = config.hasProperty(prefix + ".userinfo_endpoint") && config.hasProperty(prefix + ".token_introspection_endpoint"); + bool locally_configured = config.hasProperty(prefix + ".userinfo_endpoint"); if (externally_configured && ! locally_configured) { + const auto configuration_endpoint = config.getString(prefix + ".configuration_endpoint"); + require_allowed_url(configuration_endpoint, "configuration_endpoint"); + /// Opt-out for the HTTPS-on-discovery-returned-URLs check. False by + /// default; operators who knowingly run an IdP over plain HTTP can + /// enable it without falling back to manual trust-chain config. + const auto allow_http_discovery_urls = config.getBool(prefix + ".allow_http_discovery_urls", false); return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_issuer, expected_audience, allow_no_expiration, - config.getString(prefix + ".configuration_endpoint"), + configuration_endpoint, verifier_leeway, - jwks_cache_lifetime); + jwks_cache_lifetime, + remote_host_filter, + allow_http_discovery_urls); } else if (locally_configured && !externally_configured) { + const auto userinfo_endpoint = config.getString(prefix + ".userinfo_endpoint"); + const auto token_introspection_endpoint = config.getString(prefix + ".token_introspection_endpoint", ""); + const auto jwks_uri = config.getString(prefix + ".jwks_uri", ""); + require_allowed_url(userinfo_endpoint, "userinfo_endpoint"); + require_allowed_url(token_introspection_endpoint, "token_introspection_endpoint"); + require_allowed_url(jwks_uri, "jwks_uri"); return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_issuer, expected_audience, allow_no_expiration, - config.getString(prefix + ".userinfo_endpoint"), - config.getString(prefix + ".token_introspection_endpoint"), + userinfo_endpoint, + token_introspection_endpoint, verifier_leeway, - config.getString(prefix + ".jwks_uri", ""), + jwks_uri, jwks_cache_lifetime); } - throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Either 'configuration_endpoint' or both 'userinfo_endpoint' and 'token_introspection_endpoint' (and, optionally, 'jwks_uri') must be specified for 'openid' processor"); + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "Either 'configuration_endpoint' or 'userinfo_endpoint' " + "(and, optionally, 'token_introspection_endpoint' / 'jwks_uri') must be specified for 'openid' processor"); } else if (provider_type == "jwt_static_key") { @@ -81,8 +148,9 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( config.getString(prefix + ".private_key", ""), config.getString(prefix + ".public_key_password", ""), config.getString(prefix + ".private_key_password", ""), - config.getString(prefix + ".claims", "")}; - return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_issuer, expected_audience, allow_no_expiration, params); + config.getString(prefix + ".claims", ""), + config.getUInt64(prefix + ".verifier_leeway", 60)}; + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_issuer, expected_audience, expected_typ, allow_no_expiration, params); } else if (provider_type == "jwt_static_jwks") { @@ -101,9 +169,9 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( config.getString(prefix + ".static_jwks_file", "") }; return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, - expected_issuer, expected_audience, allow_no_expiration, + expected_issuer, expected_audience, expected_typ, allow_no_expiration, config.getString(prefix + ".claims", ""), - config.getUInt64(prefix + ".verifier_leeway", 0), + config.getUInt64(prefix + ".verifier_leeway", 60), std::make_shared(params)); } if (provider_type == "jwt_dynamic_jwks") @@ -113,11 +181,13 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( if (!config.hasProperty(prefix + ".jwks_uri")) throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'jwks_uri' must be specified for 'jwt_dynamic_jwks' processor"); + const auto jwks_uri = config.getString(prefix + ".jwks_uri"); + require_allowed_url(jwks_uri, "jwks_uri"); return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, - expected_issuer, expected_audience, allow_no_expiration, + expected_issuer, expected_audience, expected_typ, allow_no_expiration, config.getString(prefix + ".claims", ""), - config.getUInt64(prefix + ".verifier_leeway", 0), - config.getString(prefix + ".jwks_uri"), + config.getUInt64(prefix + ".verifier_leeway", 60), + jwks_uri, config.getUInt(prefix + ".jwks_cache_lifetime", 3600)); } else diff --git a/src/Interpreters/ClientInfo.cpp b/src/Interpreters/ClientInfo.cpp index 39bc2e0ec362..e86583295995 100644 --- a/src/Interpreters/ClientInfo.cpp +++ b/src/Interpreters/ClientInfo.cpp @@ -147,13 +147,23 @@ void ClientInfo::write(WriteBuffer & out, UInt64 server_protocol_revision) const if (server_protocol_revision >= DBMS_MIN_REVISON_WITH_JWT_IN_INTERSERVER) { - if (!jwt.empty()) - { - writeBinary(static_cast(1), out); - writeBinary(jwt, out); - } - else - writeBinary(static_cast(0), out); + /// Never serialize the bearer token over the interserver wire. + /// + /// Distributed queries use this `ClientInfo` to fan out to remote shards + /// and replicas. Interserver transport is plaintext by default + /// (`interserver_http_port` vs `interserver_https_port`), so writing the + /// raw JWT here exposes session credentials on the internal network for + /// every distributed query whenever the operator hasn't opted into TLS + /// for interserver -- and no code on the receiving side currently reads + /// `client_info.jwt`, so the transmission is pure leakage with no + /// functional benefit. + /// + /// The protocol-revision byte is still emitted (always `0` = "no JWT") + /// to preserve wire compatibility with peers that expect this field at + /// this offset; receivers will read it as "no JWT present" and skip + /// the body. The `jwt` member of `ClientInfo` is retained for any + /// in-process use within the same node. + writeBinary(static_cast(0), out); } } @@ -346,8 +356,17 @@ void ClientInfo::setFromHTTPRequest(const Poco::Net::HTTPRequest & request) for (const auto & header : request) { /// These headers can contain authentication info and shouldn't be accessible by the user. + /// + /// The standard HTTP authorization header is `Authorization` (RFC 7235 §4.2); + /// `Authentication` is a separate header (RFC 7615) that ClickHouse does not use + /// for credentials. Filter both: `Authorization` is the actual credential header + /// (Basic, Bearer, etc.) and must not be exposed via `getClientHTTPHeader` or + /// relayed through `` on HTTP auth servers; `Authentication` is + /// filtered defensively to preserve prior behavior. String key_lowercase = Poco::toLower(header.first); - if (key_lowercase.starts_with("x-clickhouse") || key_lowercase == "authentication") + if (key_lowercase.starts_with("x-clickhouse") + || key_lowercase == "authorization" + || key_lowercase == "authentication") continue; http_headers[header.first] = header.second; } diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 7bf6d4ebfb1d..21b07e3ebc9b 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -385,6 +386,13 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So user_id = auth_result.user_id; user_authenticated_with = auth_result.authentication_data; settings_from_auth_server = auth_result.settings; + + /// Bind the session lifetime to the access-token lifetime when applicable. + if (const auto * token_credentials = typeid_cast(&credentials_)) + auth_token_expires_at = token_credentials->getExpiresAt(); + else + auth_token_expires_at.reset(); + LOG_DEBUG(log, "{} Authenticated with global context as user {}", toString(auth_id), toString(*user_id)); @@ -411,13 +419,33 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So void Session::checkIfUserIsStillValid() { + const auto now = std::chrono::system_clock::now(); + if (const auto valid_until = user_authenticated_with.getValidUntil()) { - const time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); - - if (now > valid_until) + if (std::chrono::system_clock::to_time_t(now) > valid_until) throw Exception(ErrorCodes::USER_EXPIRED, "Authentication method used has expired"); } + + /// For sessions established via a bearer/access token (JWT or opaque), enforce token expiry. + if (auth_token_expires_at.has_value() && now >= *auth_token_expires_at) + throw Exception(ErrorCodes::USER_EXPIRED, "Access token used to authenticate the session has expired"); + + /// For JWT/token sessions, also re-validate that the authenticating + /// processor is still configured. Without this, an admin removing a + /// processor (or disabling token auth entirely) would NOT terminate + /// active sessions until each session's token expired naturally -- a + /// gap of up to one token TTL (~1h for typical IdPs) between the + /// admin's "stop accepting tokens from this IdP" intent and actual + /// session termination (M-28). + if (user_authenticated_with.getType() == AuthenticationType::JWT) + { + const auto & processor_name = user_authenticated_with.getTokenProcessorName(); + if (!global_context->getAccessControl().getExternalAuthenticators().hasTokenProcessor(processor_name)) + throw Exception(ErrorCodes::USER_EXPIRED, + "Token processor '{}' that authenticated this session is no longer configured", + processor_name.empty() ? "" : processor_name); + } } void Session::onAuthenticationFailure(const std::optional & user_name, const Poco::Net::SocketAddress & address_, const Exception & e) diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index babd16a9975a..f62113f83452 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -121,6 +121,10 @@ class Session std::vector external_roles; AuthenticationData user_authenticated_with; + /// When the user was authenticated with a bearer/access token, this holds the + /// effective token expiry captured at authentication time. + std::optional auth_token_expires_at; + ContextMutablePtr session_context; mutable bool query_context_created = false; diff --git a/src/Parsers/Access/ASTAuthenticationData.cpp b/src/Parsers/Access/ASTAuthenticationData.cpp index 7fe8de9bdb5b..ec1cfc02908a 100644 --- a/src/Parsers/Access/ASTAuthenticationData.cpp +++ b/src/Parsers/Access/ASTAuthenticationData.cpp @@ -116,12 +116,29 @@ void ASTAuthenticationData::formatImpl(WriteBuffer & ostr, const FormatSettings } case AuthenticationType::JWT: { - if (!children.empty()) + /// JWT carries two independent optional clauses (PROCESSOR and + /// CLAIMS), so it does not fit the single-prefix/single-parameter + /// shape the rest of this function uses. Emit directly here and + /// short-circuit the prefix/parameter pipeline by returning at + /// the end of this case. + ostr << " " << auth_type_name; + + size_t child_idx = 0; + if (has_jwt_processor) { - prefix = "CLAIMS"; - parameter = true; + ostr << " PROCESSOR "; + children[child_idx++]->format(ostr, settings); } - break; + if (has_jwt_claims) + { + ostr << " CLAIMS "; + children[child_idx++]->format(ostr, settings); + } + + if (valid_until) + formatValidUntil(*valid_until, ostr, settings); + + return; } case AuthenticationType::LDAP: { diff --git a/src/Parsers/Access/ASTAuthenticationData.h b/src/Parsers/Access/ASTAuthenticationData.h index ab2da84fcaf2..742c7f5e39d3 100644 --- a/src/Parsers/Access/ASTAuthenticationData.h +++ b/src/Parsers/Access/ASTAuthenticationData.h @@ -41,6 +41,16 @@ class ASTAuthenticationData : public IAST bool contains_password = false; bool contains_hash = false; + /// IDENTIFIED WITH jwt accepts two optional clauses: + /// PROCESSOR '' + /// CLAIMS '' + /// Both are stored in `children` in this order; flags below tell which slots + /// are populated (children layout depends on which were specified). The + /// processor pin is what protects against the H-14 / H-17 cache-priming + /// bypass for SQL-declared JWT users; without it the per-user lookup goes + /// through the iterate-all-processors auto-discovery path with empty pin. + bool has_jwt_processor = false; + bool has_jwt_claims = false; ASTPtr valid_until; protected: diff --git a/src/Parsers/Access/ParserCreateUserQuery.cpp b/src/Parsers/Access/ParserCreateUserQuery.cpp index 5e520151e4f6..372b964d2ce9 100644 --- a/src/Parsers/Access/ParserCreateUserQuery.cpp +++ b/src/Parsers/Access/ParserCreateUserQuery.cpp @@ -83,7 +83,7 @@ namespace bool expect_ssl_cert_subjects = false; bool expect_public_ssh_key = false; bool expect_http_auth_server = false; - bool expect_claims = false; // NOLINT + bool expect_jwt_args = false; auto parse_non_password_based_type = [&](auto check_type) { @@ -105,8 +105,7 @@ namespace else if (check_type == AuthenticationType::HTTP) expect_http_auth_server = true; else if (check_type == AuthenticationType::JWT) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "CREATE USER is not supported for JWT"); - // expect_claims = true; + expect_jwt_args = true; else if (check_type != AuthenticationType::NO_PASSWORD) expect_password = true; @@ -167,6 +166,7 @@ namespace ASTPtr http_auth_scheme; ASTPtr ssl_cert_subjects; std::optional ssl_cert_subject_type; + ASTPtr jwt_processor; ASTPtr jwt_claims; if (expect_password || expect_hash) @@ -232,12 +232,30 @@ namespace return false; } } - else if (expect_claims) + else if (expect_jwt_args) { - if (ParserKeyword{Keyword::CLAIMS}.ignore(pos, expected)) + /// IDENTIFIED WITH jwt accepts two optional clauses, in either order: + /// PROCESSOR '' -- pin to a specific token_processor + /// CLAIMS '' -- per-user claims requirement + /// Either, both, or neither may appear. Pinning a processor is what + /// gates SQL-declared JWT users out of the iterate-all-processors + /// auto-discovery path. + for (size_t i = 0; i < 2; ++i) { - if (!ParserStringAndSubstitution{}.parse(pos, jwt_claims, expected)) - return false; + if (!jwt_processor && ParserKeyword{Keyword::PROCESSOR}.ignore(pos, expected)) + { + if (!ParserStringAndSubstitution{}.parse(pos, jwt_processor, expected)) + return false; + } + else if (!jwt_claims && ParserKeyword{Keyword::CLAIMS}.ignore(pos, expected)) + { + if (!ParserStringAndSubstitution{}.parse(pos, jwt_claims, expected)) + return false; + } + else + { + break; + } } } @@ -265,8 +283,17 @@ namespace if (http_auth_scheme) auth_data->children.push_back(std::move(http_auth_scheme)); + if (jwt_processor) + { + auth_data->has_jwt_processor = true; + auth_data->children.push_back(std::move(jwt_processor)); + } + if (jwt_claims) + { + auth_data->has_jwt_claims = true; auth_data->children.push_back(std::move(jwt_claims)); + } parseValidUntil(pos, expected, auth_data->valid_until); diff --git a/src/Parsers/CommonParsers.h b/src/Parsers/CommonParsers.h index 9376f6110e99..cbaa289d82ae 100644 --- a/src/Parsers/CommonParsers.h +++ b/src/Parsers/CommonParsers.h @@ -422,6 +422,7 @@ namespace DB MR_MACROS(PRIMARY_KEY, "PRIMARY KEY") \ MR_MACROS(PRIORITY, "PRIORITY") \ MR_MACROS(PRIMARY, "PRIMARY") \ + MR_MACROS(PROCESSOR, "PROCESSOR") \ MR_MACROS(PROFILE, "PROFILE") \ MR_MACROS(PROFILES, "PROFILES") \ MR_MACROS(PROJECTION, "PROJECTION") \ diff --git a/src/Server/HTTP/authenticateUserByHTTP.cpp b/src/Server/HTTP/authenticateUserByHTTP.cpp index 670f9c60694a..b1cbb03195da 100644 --- a/src/Server/HTTP/authenticateUserByHTTP.cpp +++ b/src/Server/HTTP/authenticateUserByHTTP.cpp @@ -238,7 +238,15 @@ bool authenticateUserByHTTP( const auto token_credentials = TokenCredentials(bearer_token); const auto & external_authenticators = access_control.getExternalAuthenticators(); - if (!external_authenticators.checkTokenCredentials(token_credentials)) + /// Pre-user-lookup token validation. Pass `prime_cache_on_success=false` + /// so this unconstrained call (no processor pin, no JWT claims) does not + /// populate the token cache. The cache is reserved for entries produced + /// by the per-user authentication path (`Authentication::areCredentialsValid`), + /// which applies the user's pinned processor and per-user claims. + /// Without this, a user whose `` block omits `` would + /// satisfy a later cache lookup with empty `processor_name` -- silently + /// inheriting whichever processor happened to win this auto-discovery race. + if (!external_authenticators.checkTokenCredentials(token_credentials, /*processor_name=*/"", /*jwt_claims=*/"", /*prime_cache_on_success=*/false)) throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Invalid authentication: Token could not be verified."); current_credentials = std::make_unique(token_credentials); diff --git a/src/Server/TCPHandler.cpp b/src/Server/TCPHandler.cpp index 6aabb6f558bc..09768fca4784 100644 --- a/src/Server/TCPHandler.cpp +++ b/src/Server/TCPHandler.cpp @@ -2064,7 +2064,12 @@ void TCPHandler::receiveHello() const auto & external_authenticators = access_control.getExternalAuthenticators(); - if (!external_authenticators.checkTokenCredentials(credentials)) + /// Pre-user-lookup token validation. Pass `prime_cache_on_success=false` + /// so this unconstrained call (no processor pin, no JWT claims) does not + /// populate the token cache; the per-user authentication path is the only + /// site allowed to populate it, after applying the user's pinned processor + /// and per-user claims. See `ExternalAuthenticators::checkTokenCredentials`. + if (!external_authenticators.checkTokenCredentials(credentials, /*processor_name=*/"", /*jwt_claims=*/"", /*prime_cache_on_success=*/false)) throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Token is invalid"); session->authenticate(credentials, getClientAddress(client_info), socket().peerAddress()); diff --git a/tests/integration/test_jwt_auth/test.py b/tests/integration/test_jwt_auth/test.py index 481c8117a73e..f7b00246cdde 100644 --- a/tests/integration/test_jwt_auth/test.py +++ b/tests/integration/test_jwt_auth/test.py @@ -97,3 +97,113 @@ def test_jwks_server_ec_es384(started_cluster): ] ) assert res == "jwt_user\n" + + +# Helper: request `SELECT currentUser()` over HTTP with the given bearer token +# and return the body. Caller decides whether to assert on the username or on +# rejection (rejected requests return a non-username error body). +def http_select_current_user(token: str) -> str: + return client.exec_in_container( + [ + "bash", + "-c", + curl_with_jwt(token=token, ip=cluster.get_instance_ip(instance.name)), + ] + ) + + +def make_token(payload: dict, secret: str) -> str: + """Sign an HS256 JWT with the given secret. Matches the secrets configured + for `single_key_processor` (`my_secret`) and `another_single_key_processor` + (`other_secret`) in `configs/validators.xml`.""" + import jwt + return jwt.encode(payload, secret, algorithm="HS256") + + +def test_sql_create_jwt_user_with_processor_pin(started_cluster): + """SQL `CREATE USER ... IDENTIFIED WITH jwt PROCESSOR ''` actually + pins the auth path: a token that validates against a different processor + in the same chain must NOT authenticate the SQL-pinned user. Without the + pin the iterate-all-processors auto-discovery branch would happily accept + either token (this is the H-22 / H-14 bypass surface).""" + + instance.query( + "CREATE USER OR REPLACE sql_jwt_user IDENTIFIED WITH jwt PROCESSOR 'single_key_processor'" + ) + + # Round-trip: SHOW CREATE USER must emit the PROCESSOR clause we just set. + # `TSVRaw` is used so single quotes in the SQL literal are not TSV-escaped. + show = instance.query("SHOW CREATE USER sql_jwt_user FORMAT TSVRaw").strip() + assert "PROCESSOR 'single_key_processor'" in show, show + assert "CLAIMS" not in show, show + + token_my = make_token({"sub": "sql_jwt_user"}, "my_secret") + token_other = make_token({"sub": "sql_jwt_user"}, "other_secret") + + # Pinned processor accepts the my_secret-signed token. + assert http_select_current_user(token_my) == "sql_jwt_user\n" + + # The other_secret-signed token validates fine against + # `another_single_key_processor`, but the user is pinned to + # `single_key_processor` -- the pin must reject it. + rejected = http_select_current_user(token_other) + assert "AUTHENTICATION_FAILED" in rejected, rejected + + # Re-pin via ALTER and the relationship inverts. + instance.query( + "ALTER USER sql_jwt_user IDENTIFIED WITH jwt PROCESSOR 'another_single_key_processor'" + ) + assert http_select_current_user(token_other) == "sql_jwt_user\n" + rejected = http_select_current_user(token_my) + assert "AUTHENTICATION_FAILED" in rejected, rejected + + instance.query("DROP USER sql_jwt_user") + + +def test_sql_create_jwt_user_with_claims(started_cluster): + """`CLAIMS ''` must be enforced for SQL-declared JWT users: a token + that is valid against the pinned processor but lacks the required claim + must be rejected, and a token that has the claim must be accepted.""" + + instance.query( + "CREATE USER OR REPLACE sql_jwt_claims_user " + "IDENTIFIED WITH jwt PROCESSOR 'single_key_processor' " + "CLAIMS '{\"role\":\"admin\"}'" + ) + + show = instance.query("SHOW CREATE USER sql_jwt_claims_user FORMAT TSVRaw").strip() + assert "PROCESSOR 'single_key_processor'" in show, show + assert "CLAIMS '{\"role\":\"admin\"}'" in show, show + + # Token signed with the pinned processor's secret but no `role` claim: + # processor accepts, per-user CLAIMS rejects. + token_no_claim = make_token({"sub": "sql_jwt_claims_user"}, "my_secret") + rejected = http_select_current_user(token_no_claim) + assert "AUTHENTICATION_FAILED" in rejected, rejected + + # Token with the required claim: both gates pass. + token_with_claim = make_token( + {"sub": "sql_jwt_claims_user", "role": "admin"}, "my_secret" + ) + assert http_select_current_user(token_with_claim) == "sql_jwt_claims_user\n" + + instance.query("DROP USER sql_jwt_claims_user") + + +def test_sql_jwt_user_no_pin_uses_auto_discovery(started_cluster): + """Without `PROCESSOR`, the SQL JWT user falls back to auto-discovery: any + configured processor that validates the token will be accepted. This is + the documented behavior for users who explicitly chose not to pin.""" + + instance.query("CREATE USER OR REPLACE sql_jwt_unpinned IDENTIFIED WITH jwt") + + show = instance.query("SHOW CREATE USER sql_jwt_unpinned FORMAT TSVRaw").strip() + assert "PROCESSOR" not in show, show + + # Both tokens (each valid against a different processor) authenticate the + # same unpinned SQL user. + for secret in ("my_secret", "other_secret"): + token = make_token({"sub": "sql_jwt_unpinned"}, secret) + assert http_select_current_user(token) == "sql_jwt_unpinned\n" + + instance.query("DROP USER sql_jwt_unpinned") diff --git a/tests/integration/test_keycloak_auth/configs/validators.xml b/tests/integration/test_keycloak_auth/configs/validators.xml index f7e13a6c6784..c98805fc7be8 100644 --- a/tests/integration/test_keycloak_auth/configs/validators.xml +++ b/tests/integration/test_keycloak_auth/configs/validators.xml @@ -9,10 +9,12 @@ 60 - + openid http://keycloak:8080/realms/clickhouse-test/.well-known/openid-configuration + true preferred_username 60 diff --git a/tests/queries/0_stateless/01292_create_user.reference b/tests/queries/0_stateless/01292_create_user.reference index b93bddafb6d5..0aa4225e4394 100644 --- a/tests/queries/0_stateless/01292_create_user.reference +++ b/tests/queries/0_stateless/01292_create_user.reference @@ -125,3 +125,9 @@ CREATE USER u1_01292 IDENTIFIED WITH no_password CREATE USER `u1_01292@192.168.%.%` IDENTIFIED WITH no_password HOST LIKE \'192.168.%.%\' CREATE USER `u2_01292@192.168.%.%` IDENTIFIED WITH no_password HOST LIKE \'192.168.%.%\' -- creating user identified with JWT +CREATE USER user1 IDENTIFIED WITH jwt +CREATE USER user1 IDENTIFIED WITH jwt PROCESSOR \'my_processor\' +CREATE USER user1 IDENTIFIED WITH jwt CLAIMS \'{"role":"admin"}\' +CREATE USER user1 IDENTIFIED WITH jwt PROCESSOR \'my_processor\' CLAIMS \'{"role":"admin"}\' +CREATE USER user1 IDENTIFIED WITH jwt PROCESSOR \'my_processor\' CLAIMS \'{"role":"admin"}\' +CREATE USER user1 IDENTIFIED WITH jwt PROCESSOR \'other_processor\' diff --git a/tests/queries/0_stateless/01292_create_user.sql b/tests/queries/0_stateless/01292_create_user.sql index 830163d7345d..3b6176a6a063 100644 --- a/tests/queries/0_stateless/01292_create_user.sql +++ b/tests/queries/0_stateless/01292_create_user.sql @@ -263,5 +263,20 @@ SHOW CREATE USER u2_01292@'192.168.%.%'; DROP USER u1_01292, u1_01292@'192.168.%.%', u2_01292@'192.168.%.%'; SELECT '-- creating user identified with JWT'; -CREATE USER user1 IDENTIFIED WITH jwt BY '1'; -- { clientError BAD_ARGUMENTS } -CREATE USER user1 IDENTIFIED WITH jwt; -- { clientError BAD_ARGUMENTS } +CREATE USER user1 IDENTIFIED WITH jwt BY '1'; -- { clientError SYNTAX_ERROR } +CREATE USER user1 IDENTIFIED WITH jwt; +SHOW CREATE USER user1; +CREATE USER OR REPLACE user1 IDENTIFIED WITH jwt PROCESSOR 'my_processor'; +SHOW CREATE USER user1; +CREATE USER OR REPLACE user1 IDENTIFIED WITH jwt CLAIMS '{"role":"admin"}'; +SHOW CREATE USER user1; +CREATE USER OR REPLACE user1 IDENTIFIED WITH jwt PROCESSOR 'my_processor' CLAIMS '{"role":"admin"}'; +SHOW CREATE USER user1; +CREATE USER OR REPLACE user1 IDENTIFIED WITH jwt CLAIMS '{"role":"admin"}' PROCESSOR 'my_processor'; +SHOW CREATE USER user1; +ALTER USER user1 IDENTIFIED WITH jwt PROCESSOR 'other_processor'; +SHOW CREATE USER user1; +DROP USER user1; +CREATE USER user1 IDENTIFIED WITH jwt PROCESSOR ''; -- { serverError BAD_ARGUMENTS } +CREATE USER user1 IDENTIFIED WITH jwt CLAIMS 'not-json'; -- { serverError BAD_ARGUMENTS } +CREATE USER user1 IDENTIFIED WITH jwt CLAIMS '[]'; -- { serverError BAD_ARGUMENTS } From cc0f0a87c1d061d9ef03a04b53287aa5d08e5407 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov <32552679+zvonand@users.noreply.github.com> Date: Sun, 7 Jun 2026 18:06:41 +0200 Subject: [PATCH 08/12] Resolve conflicts in cherry-pick of #1777 Source-PR: #1777 (https://github.com/Altinity/ClickHouse/pull/1777) --- src/Access/ExternalAuthenticators.cpp | 7 ------- 1 file changed, 7 deletions(-) diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index 003aef420d6d..9a444a6c2388 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -762,19 +762,12 @@ bool ExternalAuthenticators::checkTokenCredentials(const TokenCredentials & cred return processor.checkClaims(credentials, jwt_claims); }; -<<<<<<< HEAD - - /// lookup token in local cache if not expired. - auto cached_entry_iter = access_token_to_username_cache.find(credentials.getToken()); - if (cached_entry_iter != access_token_to_username_cache.end()) -======= /// Snapshot the processor set under the mutex, then run the expensive /// crypto verify WITHOUT the mutex (M-20). `shared_ptr` keeps each /// processor alive even if a config reload swaps `token_processors` in /// the middle of validation. Cache lookup stays under the mutex. std::map> processors_snapshot; ->>>>>>> 52e87d75685 (Merge pull request #1777 from Altinity/fix/antalya-26.3/oauth-address-audit) { std::lock_guard lock{mutex}; From 0f166983a529275ca25b985eedec130a56a4e657 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov <32552679+zvonand@users.noreply.github.com> Date: Fri, 15 May 2026 12:19:31 +0200 Subject: [PATCH 09/12] Merge pull request #1784 from Altinity/fix/antalya-26.3/oauth-fix-azure [WiP] Antalya 26.3: OAuth -- rework Entra ID workflow Source-PR: #1784 (https://github.com/Altinity/ClickHouse/pull/1784) --- .../external-authenticators/tokens.md | 74 ++++++-- src/Access/TokenAccessStorage.cpp | 114 ++++++++---- src/Access/TokenAccessStorage.h | 4 + src/Access/TokenProcessors.h | 15 -- src/Access/TokenProcessorsOpaque.cpp | 173 ------------------ src/Access/TokenProcessorsParse.cpp | 72 +++++++- .../test_token_roles_mapping/__init__.py | 0 .../configs/users.xml | 11 ++ .../configs/validators.xml | 29 +++ .../test_token_roles_mapping/test.py | 89 +++++++++ 10 files changed, 341 insertions(+), 240 deletions(-) create mode 100644 tests/integration/test_token_roles_mapping/__init__.py create mode 100644 tests/integration/test_token_roles_mapping/configs/users.xml create mode 100644 tests/integration/test_token_roles_mapping/configs/validators.xml create mode 100644 tests/integration/test_token_roles_mapping/test.py diff --git a/docs/en/operations/external-authenticators/tokens.md b/docs/en/operations/external-authenticators/tokens.md index 74c02a56900b..aefa7e6eb549 100644 --- a/docs/en/operations/external-authenticators/tokens.md +++ b/docs/en/operations/external-authenticators/tokens.md @@ -29,7 +29,7 @@ To use token-based authentication, add `token_processors` section to `config.xml Its contents are different for different token processor types. **Common parameters** -- `type` -- type of token processor. Supported values: "jwt_static_key", "jwt_static_jwks", "jwt_dynamic_jwks", "azure", "openid". Mandatory. Case-insensitive. +- `type` -- type of token processor. Supported values: `jwt_static_key`, `jwt_static_jwks`, `jwt_dynamic_jwks`, `entra` (`azure` is accepted as a back-compat alias and resolves to the same `entra` processor — see the [Entra](#entra) section), `openid`. Mandatory. Case-insensitive. - `token_cache_lifetime` -- maximum lifetime of cached token (in seconds). Optional, default: 3600. - `username_claim` -- name of claim (field) that will be treated as ClickHouse username. Optional, default: "sub". - `groups_claim` -- name of claim (field) that contains list of groups user belongs to. This claim will be looked up in the token itself (in case token is a valid JWT, e.g. in Keycloak) or in response from `/userinfo`. Optional, default: "groups". @@ -129,22 +129,61 @@ For JWKS-based validators (`jwt_static_jwks` and `jwt_dynamic_jwks`), RS* and ES - `allow_no_expiration` - If `true`, tokens without the `exp` (expiration) claim are accepted. Otherwise they are rejected. Optional, default: `false`. -## Processors with external providers +## IdP-specific presets and generic external providers -Some tokens cannot be decoded and validated locally. External service is needed in this case. "Azure" and "OpenID" (a generic type) are supported now. +This section covers two related kinds of processor: per-IdP convenience presets built on top of the generic JWT processors (currently `entra`), and the generic `openid` processor that talks to an arbitrary OIDC-compliant identity provider. + +### Entra (Microsoft Entra ID, pure OIDC) {#entra} + +`entra` is a preset for Microsoft Entra ID built on top of `jwt_dynamic_jwks`. Tokens are validated **locally** against Entra's per-tenant JWKS — no Microsoft Graph call, no userinfo round trip, no OIDC discovery fetch. `username_claim` and `groups_claim` are read directly from the JWT payload. Use this when the access token's `aud` is your own app (registered via Entra's *Expose an API* blade), not `https://graph.microsoft.com`. + +:::note Migrating from the legacy `azure` processor +`azure` is now an **alias** for `entra` — at config-parse time the type string is rewritten and the rest of the pipeline is identical. The previous `azure` implementation (which round-tripped every token through Microsoft Graph's `/oidc/userinfo` and `/v1.0/me/memberOf` endpoints) has been removed entirely. + +For operators upgrading: an `azure` block that previously had no other parameters will now fail to load with `'tenant_id' must be specified for 'entra' processor`. To migrate, add `` (and ideally ``) and make sure your application is configured to mint tokens whose `aud` is your own app, not Microsoft Graph. The setup recipe lives in `docs/entra-setup-draft.md`. +::: + +Minimum configuration — only `tenant_id` is required; all other parameters have sensible defaults: -### Azure ```xml - - azure - + + entra + aaaabbbb-0000-cccc-1111-dddd2222eeee + ``` -No additional parameters are required. +Example with common overrides (audience binding to a specific app, Entra-flavored username/groups claims): + +```xml + + entra + aaaabbbb-0000-cccc-1111-dddd2222eeee + api://clickhouse + preferred_username + roles + +``` + +**Parameters:** + +- `tenant_id` — Microsoft Entra tenant identifier (a GUID, or an `*.onmicrosoft.com` domain). **Mandatory.** Multi-tenant aliases (`common`, `organizations`, `consumers`) are rejected because `JwksJwtProcessor` does exact-match issuer validation. + +All remaining parameters are optional: + +- `jwks_uri` — Override for the JWKS endpoint. Default: `https://login.microsoftonline.com/{tenant_id}/discovery/v2.0/keys`. Override only for sovereign clouds (`login.microsoftonline.us`, `login.partner.microsoftonline.cn`). +- `expected_issuer` — Expected value of the `iss` claim. Default: `https://login.microsoftonline.com/{tenant_id}/v2.0` (derived from `tenant_id`). Override for v1.0 tokens (`https://sts.windows.net/{tenant_id}/`) or sovereign clouds. +- `expected_audience` — Expected value of the `aud` claim, normally your app's Application ID URI (e.g. `api://clickhouse`) or client ID. If unset, no audience check is performed (any signature-valid token from the tenant will authenticate); a warning is logged at startup so the gap is visible. +- `username_claim` — JWT claim to use as the ClickHouse username. Default: `sub`. Common Entra alternatives: `preferred_username`, `upn`, `oid`. +- `groups_claim` — JWT claim that carries the array of group identifiers. Default: `groups`. Set to `roles` if you use App Roles in Entra instead of security-group claims. +- `expected_typ`, `verifier_leeway`, `jwks_cache_lifetime`, `claims`, `allow_no_expiration`, `token_cache_lifetime` — Same as for `jwt_dynamic_jwks`. + +:::note +The `groups` claim must be enabled in the app registration's manifest (`"groupMembershipClaims": "ApplicationGroup"` is recommended) and exposed in access tokens via `optionalClaims.accessToken`. Group identifiers in the token are object IDs (GUIDs) by default; map them to ClickHouse roles via the user-directory's `roles_mapping` block (see [Identity Provider as an External User Directory](#idp-external-user-directory)). +::: ### OpenID ```xml @@ -212,7 +251,7 @@ Example (goes into `users.xml`): Here, the JWT payload must contain `["view-profile"]` on path `resource_access.account.roles`, otherwise authentication will not succeed even with a valid JWT. :::note -Per-user `claims` are enforced only when the token is a JWT (validated by a JWT processor such as `jwt_static_key` or `jwt_dynamic_jwks`). When the user authenticates with an opaque (access) token (e.g. via Azure, OpenID, or Google token processors), claims are not checked and authentication succeeds if the token is otherwise valid. +Per-user `claims` are enforced only when the token is a JWT (validated by a JWT processor such as `jwt_static_key`, `jwt_dynamic_jwks`, or `entra`). When the user authenticates with an opaque (access) token (e.g. via OpenID or Google token processors), claims are not checked and authentication succeeds if the token is otherwise valid. ::: ``` @@ -256,6 +295,16 @@ All this implies that the SQL-driven [Access Control and Account Management](/do my_profile + + + 8a1b2c3d-4e5f-6789-abcd-ef0123456789 + ch_admin + + + 9f8e7d6c-5b4a-3210-fedc-ba0987654321 + ch_analyst + + \bclickhouse-[a-zA-Z0-9]+\b @@ -274,5 +323,8 @@ For now, no more than one `token` section can be defined inside `user_directorie - `processor` — Name of one of processors defined in `token_processors` config section described above. This parameter is mandatory and cannot be empty. - `common_roles` — Section with a list of locally defined roles that will be assigned to each user retrieved from the IdP. Optional. - `default_profile` — Name of a locally defined settings profile that will be assigned to each user retrieved from the IdP. If the profile does not exist, a warning will be logged and the user will be created without a profile. Optional. -- `roles_filter` — Regex string for groups filtering. Only groups matching this regex will be mapped to roles. Optional. -- `roles_transform` — Sed-style transform pattern to apply to group names before mapping to roles. Format: `s/pattern/replacement/flags`. The `g` flag applies the replacement globally (all occurrences). Example: `s/-/_/g` converts `clickhouse-grp-dba` to `clickhouse_grp_dba`. Optional. +- `roles_mapping` — Explicit map from incoming group identifier (e.g. an Entra security-group object ID) to a ClickHouse role name. Each entry is a `` element with `` and `` children. Applied **before** `roles_filter` and `roles_transform`; groups absent from the map pass through unchanged, so the filter stage can be used to drop unmapped entries. Optional. +- `roles_filter` — Regex string for groups filtering. Only groups (after `roles_mapping` is applied) that match this regex will be considered. Optional. +- `roles_transform` — Sed-style transform pattern applied to group names (after `roles_mapping` and `roles_filter`) before mapping to roles. Format: `s/pattern/replacement/flags`. The `g` flag applies the replacement globally (all occurrences). Example: `s/-/_/g` converts `clickhouse-grp-dba` to `clickhouse_grp_dba`. Optional. + +The three stages run in this order: `roles_mapping` → `roles_filter` → `roles_transform`. Stages are independent and any of them may be omitted. diff --git a/src/Access/TokenAccessStorage.cpp b/src/Access/TokenAccessStorage.cpp index 72dc3ed394d1..9b2a92a54999 100644 --- a/src/Access/TokenAccessStorage.cpp +++ b/src/Access/TokenAccessStorage.cpp @@ -180,6 +180,38 @@ TokenAccessStorage::TokenAccessStorage(const String & storage_name_, AccessContr roles_transform_global = parsed.global; } + /// Explicit `roles_mapping` entries are read as a list of XY + /// children. The mapping rewrites incoming group names BEFORE `roles_filter` / `roles_transform`, + /// so each subsequent stage operates on the mapped value. Groups not listed here pass through + /// to filter/transform unchanged. + if (config.has(prefix_str + "roles_mapping")) + { + Poco::Util::AbstractConfiguration::Keys map_keys; + config.keys(prefix_str + "roles_mapping", map_keys); + + for (const auto & key : map_keys) + { + const String entry_prefix = prefix_str + "roles_mapping." + key; + if (!config.has(entry_prefix + ".from") || !config.has(entry_prefix + ".to")) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "roles_mapping entry '{}' must contain both 'from' and 'to' subelements", key); + + const String from = config.getString(entry_prefix + ".from"); + const String to = config.getString(entry_prefix + ".to"); + + if (from.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "roles_mapping entry '{}': 'from' must not be empty", key); + if (to.empty()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "roles_mapping entry '{}': 'to' must not be empty", key); + + auto [it, inserted] = roles_mapping.emplace(from, to); + if (!inserted) + throw Exception(ErrorCodes::BAD_ARGUMENTS, + "roles_mapping has duplicate 'from' value '{}' (already mapped to '{}', cannot remap to '{}')", + from, it->second, to); + } + } + provider_name = config.getString(prefix_str + "processor"); if (provider_name.empty()) throw Exception(ErrorCodes::BAD_ARGUMENTS, "'processor' must be specified for Token user directory"); @@ -593,51 +625,59 @@ std::optional TokenAccessStorage::authenticateImpl( if (!isAddressAllowed(*user, address)) throwAddressNotAllowed(address); + /// Pipeline: incoming group --(roles_mapping)--> mapped name --(roles_filter)--> kept/dropped --(roles_transform)--> CH role name. + /// Each stage is independent and optional; groups absent from `roles_mapping` pass through unchanged. std::set external_roles; - if (roles_filter.has_value()) + + /// Defensive: a broken filter regex must NEVER fall through to the permissive + /// "grant everything that survives the rest of the pipeline" branch. Parse-time + /// validation in the constructor already rejects invalid patterns; this guard + /// preserves the invariant in case any future code path constructs the filter + /// without the parse-time check (e.g. config reload). + if (roles_filter.has_value() && !roles_filter->ok()) { - /// Defensive: a broken regex must NEVER cause a fall-through to the - /// permissive "grant all groups" branch. Parse-time validation in the - /// constructor already rejects invalid patterns; this guard ensures the - /// invariant still holds if any future code path constructs the filter - /// without the parse-time check (e.g. config reload). - if (!roles_filter->ok()) - { - LOG_ERROR(getLogger(), - "{}: Configured 'roles_filter' is invalid ('{}'); refusing to map any " - "external roles for user '{}' to avoid granting all token groups.", - getStorageName(), roles_filter->error(), credentials.getUserName()); - } - else - { - LOG_TRACE(getLogger(), "{}: External role filter found, applying only matching groups", getStorageName()); - for (const auto & group: token_credentials.getGroups()) { - if (RE2::FullMatch(group, roles_filter.value())) - { - String transformed_group = group; - if (roles_transform_pattern.has_value() && roles_transform_replacement.has_value()) - { - transformed_group = applyTransform(group, roles_transform_pattern.value(), roles_transform_replacement.value(), roles_transform_global); - LOG_TRACE(getLogger(), "{}: Transformed group '{}' to '{}'", getStorageName(), group, transformed_group); - } - external_roles.insert(transformed_group); - LOG_TRACE(getLogger(), "{}: Granted role (group) {} to user", getStorageName(), transformed_group); - } - } - } + LOG_ERROR(getLogger(), + "{}: Configured 'roles_filter' is invalid ('{}'); refusing to map any " + "external roles for user '{}' to avoid granting all token groups.", + getStorageName(), roles_filter->error(), credentials.getUserName()); } else { - LOG_TRACE(getLogger(), "{}: No external role filtering set, applying all available groups", getStorageName()); - for (const auto & group: token_credentials.getGroups()) + const bool has_filter = roles_filter.has_value(); + const bool has_transform = roles_transform_pattern.has_value() && roles_transform_replacement.has_value(); + + for (const auto & group : token_credentials.getGroups()) { - String transformed_group = group; - if (roles_transform_pattern.has_value() && roles_transform_replacement.has_value()) + String name = group; + + if (!roles_mapping.empty()) { - transformed_group = applyTransform(group, roles_transform_pattern.value(), roles_transform_replacement.value(), roles_transform_global); - LOG_TRACE(getLogger(), "{}: Transformed group '{}' to '{}'", getStorageName(), group, transformed_group); + const auto it = roles_mapping.find(group); + if (it != roles_mapping.end()) + { + name = it->second; + LOG_TRACE(getLogger(), "{}: Mapped group '{}' to '{}'", getStorageName(), group, name); + } } - external_roles.insert(transformed_group); + + if (has_filter && !RE2::FullMatch(name, roles_filter.value())) + { + LOG_TRACE(getLogger(), "{}: Group '{}' (after mapping) did not match roles_filter, skipping", getStorageName(), name); + continue; + } + + if (has_transform) + { + String transformed = applyTransform(name, roles_transform_pattern.value(), roles_transform_replacement.value(), roles_transform_global); + if (transformed != name) + { + LOG_TRACE(getLogger(), "{}: Transformed '{}' to '{}'", getStorageName(), name, transformed); + name = std::move(transformed); + } + } + + external_roles.insert(name); + LOG_TRACE(getLogger(), "{}: Granted role (group) {} to user", getStorageName(), name); } } diff --git a/src/Access/TokenAccessStorage.h b/src/Access/TokenAccessStorage.h index 9f15319a0d82..fb2ee458d63b 100644 --- a/src/Access/TokenAccessStorage.h +++ b/src/Access/TokenAccessStorage.h @@ -49,6 +49,10 @@ class TokenAccessStorage : public IAccessStorage const String & prefix; String provider_name; + /// Explicit mapping from incoming group (e.g. Entra group object ID) to a ClickHouse role name. + /// Applied BEFORE `roles_filter` and `roles_transform`. Groups absent from this map pass through + /// unchanged, so the filter stage can be used to drop unmapped entries. + std::map roles_mapping; std::optional roles_filter = std::nullopt; /// `roles_transform` regex compiled once at construction. Storing the /// compiled `re2::RE2` (instead of the pattern string) avoids per-call diff --git a/src/Access/TokenProcessors.h b/src/Access/TokenProcessors.h index f2ee32d90887..35c6a218b5b1 100644 --- a/src/Access/TokenProcessors.h +++ b/src/Access/TokenProcessors.h @@ -194,21 +194,6 @@ class GoogleTokenProcessor : public ITokenProcessor const String expected_audience; }; -class AzureTokenProcessor : public ITokenProcessor -{ -public: - AzureTokenProcessor(const String & processor_name_, - UInt64 token_cache_lifetime_, - const String & username_claim_, - const String & groups_claim_, - const String & expected_audience_); - - bool resolveAndValidate(TokenCredentials & credentials) const override; - -private: - const String expected_audience; -}; - class OpenIdTokenProcessor : public ITokenProcessor { public: diff --git a/src/Access/TokenProcessorsOpaque.cpp b/src/Access/TokenProcessorsOpaque.cpp index 5cd68e60276f..aba8efa7ccfc 100644 --- a/src/Access/TokenProcessorsOpaque.cpp +++ b/src/Access/TokenProcessorsOpaque.cpp @@ -268,179 +268,6 @@ bool GoogleTokenProcessor::resolveAndValidate(TokenCredentials & credentials) co return true; } -AzureTokenProcessor::AzureTokenProcessor(const String & processor_name_, - UInt64 token_cache_lifetime_, - const String & username_claim_, - const String & groups_claim_, - const String & expected_audience_) - : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_) - , expected_audience(expected_audience_) -{ - /// Without an audience pin, this processor accepts any Azure AD access token - /// that Microsoft Graph happens to honor -- which includes tokens minted for - /// other applications inside the same tenant. Surface the gap so operators - /// can lock the processor to their own application's audience. - if (expected_audience.empty()) - LOG_WARNING(getLogger("TokenAuthentication"), - "{}: 'expected_audience' is not configured for Azure token processor. " - "Any Azure access token Microsoft Graph accepts will authenticate here, " - "regardless of which application it was issued for; set 'expected_audience' " - "to the audience this processor should accept.", - processor_name); -} - -bool AzureTokenProcessor::resolveAndValidate(TokenCredentials & credentials) const -{ - /// Token is a JWT in this case, but we cannot directly verify it against Azure AD JWKS. - /// We will not trust user data in this token except for 'exp' value to determine caching duration. - /// Explanation here: https://stackoverflow.com/questions/60778634/failing-signature-validation-of-jwt-tokens-from-azure-ad - /// Let Azure validate it: only valid tokens will be accepted. - /// Use GET https://graph.microsoft.com/oidc/userinfo to verify token and get user info at the same time - - const String & token = credentials.getToken(); - - String username; - try - { - picojson::object user_info_json = getObjectFromURI(Poco::URI("https://graph.microsoft.com/oidc/userinfo"), token); - username = getValueByKey(user_info_json, username_claim).value(); - } - catch (...) - { - return false; - } - - /// Audience binding (H-10): only after Microsoft Graph has accepted the - /// token (proving it is a real, signed Azure AD token) do we trust its - /// claims. We then enforce that the 'aud' claim matches the operator-pinned - /// audience -- without this check, *any* token issued for *any* application - /// in the tenant that has Graph access would authenticate. With the check, - /// tokens minted for other applications are rejected even though Graph - /// itself would honor them. - if (!expected_audience.empty()) - { - try - { - auto decoded_token = jwt::decode(token); - if (!decoded_token.has_audience()) - { - LOG_TRACE(getLogger("TokenAuthentication"), - "{}: Azure access token has no 'aud' claim; cannot enforce 'expected_audience' '{}'; rejecting", - processor_name, expected_audience); - return false; - } - const auto auds = decoded_token.get_audience(); - if (auds.find(expected_audience) == auds.end()) - { - LOG_TRACE(getLogger("TokenAuthentication"), - "{}: Azure access token audience does not contain configured 'expected_audience' '{}'; rejecting", - processor_name, expected_audience); - return false; - } - } - catch (const std::exception & e) - { - LOG_TRACE(getLogger("TokenAuthentication"), - "{}: Failed to decode Azure access token while enforcing 'expected_audience': {}; rejecting", - processor_name, e.what()); - return false; - } - } - - /// Reject empty resolved username (M-27). Previously this branch only - /// logged the gap and proceeded to return true at the end of the function, - /// which would cache an entry under user_name "" and collapse every - /// empty-username token across all IdPs into the same dynamic user. - if (username.empty()) - { - LOG_TRACE(getLogger("TokenAuthentication"), - "{}: Resolved username from token is empty; rejecting", processor_name); - return false; - } - credentials.setUserName(username); - - try - { - credentials.setExpiresAt(jwt::decode(token).get_expires_at()); - } - catch (...) { - LOG_TRACE(getLogger("TokenAuthentication"), - "{}: No expiration data found in a valid token, will use default cache lifetime", processor_name); - } - - std::set external_groups_names; - const Poco::URI get_groups_uri = Poco::URI("https://graph.microsoft.com/v1.0/me/memberOf"); - - try - { - auto groups_response = getObjectFromURI(get_groups_uri, token); - - if (!groups_response.contains("value") || !groups_response["value"].is()) - { - LOG_TRACE(getLogger("TokenAuthentication"), - "{}: Failed to get Azure groups: invalid content in response from server", processor_name); - return true; - } - - picojson::array groups_array = groups_response["value"].get(); - - for (const auto & group: groups_array) - { - /// Got some invalid response. Ignore this, log this. - if (!group.is()) - { - LOG_TRACE(getLogger("TokenAuthentication"), - "{}: Failed to get Azure groups: invalid content in response from server", processor_name); - continue; - } - - auto group_data = group.get(); - - /// Use the immutable `id` (GUID), not the mutable `displayName`, - /// for role-mapping. `displayName` can be renamed by an Azure AD - /// admin -- and on rename, every ClickHouse role-mapping regex - /// that referenced the old name silently stops matching, while - /// every regex that matches the new name silently starts. Two - /// distinct AAD groups can also share a display name and merge - /// into a single ClickHouse group; deleting and recreating a - /// group with the same name silently inherits the old grants. - /// `id` is a GUID assigned by AAD at group creation; it never - /// changes, never collides, and is never reused. - /// - /// Operators upgrading from a build that emitted `displayName` - /// must update their `roles_filter` / `roles_transform` regex - /// to reference the GUIDs Azure AD assigns to the groups they - /// want to map. The role identifier is not human-friendly -- - /// that is the cost of using an immutable handle. - if (!group_data.contains("id")) - continue; - - String group_name = getValueByKey(group_data, "id").value_or(""); - if (!group_name.empty()) - { - external_groups_names.insert(group_name); - String display_name = getValueByKey(group_data, "displayName").value_or(""); - LOG_TRACE(getLogger("TokenAuthentication"), - "{}: User {}: new external group id={} (displayName={})", - processor_name, quoteString(credentials.getUserName()), - quoteString(group_name), quoteString(display_name)); - } - } - } - catch (const std::exception & e) - { - /// Defense in depth (M-10 sibling): broadened to `std::exception` so a - /// picojson `std::bad_cast` from a malformed response degrades to "no - /// roles mapped" rather than aborting the whole authentication. - LOG_TRACE(getLogger("TokenAuthentication"), - "{}: Failed to get Azure groups, no external roles will be mapped. reason: {}", processor_name, e.what()); - return true; - } - - credentials.setGroups(external_groups_names); - return true; -} - OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, UInt64 token_cache_lifetime_, const String & username_claim_, diff --git a/src/Access/TokenProcessorsParse.cpp b/src/Access/TokenProcessorsParse.cpp index 62602b1efd81..f48ee3fddd07 100644 --- a/src/Access/TokenProcessorsParse.cpp +++ b/src/Access/TokenProcessorsParse.cpp @@ -24,6 +24,14 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( auto provider_type = Poco::toLower(config.getString(prefix + ".type")); + /// `azure` is a back-compat alias for `entra`. The legacy `azure` processor + /// validated tokens by round-tripping through Microsoft Graph; the `entra` + /// processor does pure local JWKS validation, which is what every operator + /// actually wants. Treat both names as the same processor type so existing + /// configs continue to parse, just under stricter validation rules. + if (provider_type == "azure") + provider_type = "entra"; + auto token_cache_lifetime = config.getUInt64(prefix + ".token_cache_lifetime", 3600); auto username_claim = config.getString(prefix + ".username_claim", "sub"); auto groups_claim = config.getString(prefix + ".groups_claim", "groups"); @@ -78,10 +86,6 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( { return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_audience); } - else if (provider_type == "azure") - { - return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_audience); - } else if (provider_type == "openid") { auto verifier_leeway = config.getUInt64(prefix + ".verifier_leeway", 60); @@ -133,6 +137,66 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( "Either 'configuration_endpoint' or 'userinfo_endpoint' " "(and, optionally, 'token_introspection_endpoint' / 'jwks_uri') must be specified for 'openid' processor"); } + else if (provider_type == "entra") + { + /// Preset for Microsoft Entra ID built on top of the pure-JWKS JWT processor. + /// Validation is fully local: signature against Entra's published JWKS plus the + /// operator-chosen iss/aud/typ/claims pins. No OIDC discovery fetch, no userinfo + /// endpoint, no Microsoft Graph URL stored on the processor. `groups_claim` and + /// `username_claim` are read directly from the JWT payload -- which requires the + /// access token's audience to be the operator's own app, not Microsoft Graph + /// (Graph-audience tokens are not JWKS-verifiable -- their signing keys are not + /// in the tenant JWKS and their headers carry a `nonce` that breaks third-party + /// validation; see `docs/entra-setup-draft.md` for how to mint app-audience tokens). + if (!config.hasProperty(prefix + ".tenant_id")) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'tenant_id' must be specified for 'entra' processor"); + + const String tenant_id = config.getString(prefix + ".tenant_id"); + + if (tenant_id.empty()) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'tenant_id' must not be empty for 'entra' processor"); + + for (char c : tenant_id) + { + if (!std::isalnum(static_cast(c)) && c != '-' && c != '.' && c != '_') + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "'tenant_id' {} contains invalid characters", tenant_id); + } + + /// Multi-tenant aliases require templated-issuer validation that JwksJwtProcessor does not + /// implement (it does exact-match on `iss`). Reject explicitly rather than silently failing + /// issuer checks at token-validation time. + const String lower_tenant_id = Poco::toLower(tenant_id); + if (lower_tenant_id == "common" || lower_tenant_id == "organizations" || lower_tenant_id == "consumers") + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "Multi-tenant 'tenant_id' '{}' is not supported for 'entra' processor type: " + "exact issuer validation requires a single tenant identifier (GUID or onmicrosoft.com domain).", + tenant_id); + + const String default_jwks_uri = "https://login.microsoftonline.com/" + tenant_id + "/discovery/v2.0/keys"; + const String jwks_uri = config.getString(prefix + ".jwks_uri", default_jwks_uri); + require_allowed_url(jwks_uri, "jwks_uri"); + + /// `expected_issuer` is auto-derived from `tenant_id` since the v2.0 issuer URL is fully + /// determined by the tenant. Users can still override -- typically for v1.0 tokens + /// ('https://sts.windows.net/{tenant_id}/') or for sovereign-cloud authorities + /// ('https://login.microsoftonline.us/{tenant_id}/v2.0' etc.). + const String default_issuer = "https://login.microsoftonline.com/" + tenant_id + "/v2.0"; + const String issuer = config.getString(prefix + ".expected_issuer", default_issuer); + + if (expected_audience.empty()) + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: 'expected_audience' is not set for 'entra' processor: the 'aud' claim will not be validated, " + "so tokens issued for any application will be accepted as long as the signature is valid.", + processor_name); + + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, + issuer, expected_audience, expected_typ, allow_no_expiration, + config.getString(prefix + ".claims", ""), + config.getUInt64(prefix + ".verifier_leeway", 60), + jwks_uri, + config.getUInt64(prefix + ".jwks_cache_lifetime", 3600)); + } else if (provider_type == "jwt_static_key") { if (!config.hasProperty(prefix + ".static_key")) diff --git a/tests/integration/test_token_roles_mapping/__init__.py b/tests/integration/test_token_roles_mapping/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/integration/test_token_roles_mapping/configs/users.xml b/tests/integration/test_token_roles_mapping/configs/users.xml new file mode 100644 index 000000000000..7e7e96a262b5 --- /dev/null +++ b/tests/integration/test_token_roles_mapping/configs/users.xml @@ -0,0 +1,11 @@ + + + + + + + 1 + 1 + + + diff --git a/tests/integration/test_token_roles_mapping/configs/validators.xml b/tests/integration/test_token_roles_mapping/configs/validators.xml new file mode 100644 index 000000000000..e52b92574d49 --- /dev/null +++ b/tests/integration/test_token_roles_mapping/configs/validators.xml @@ -0,0 +1,29 @@ + + + + jwt_static_key + HS256 + roles_mapping_test_secret + false + true + + + + + + hs256_groups + default + + + 8a1b2c3d-4e5f-6789-abcd-ef0123456789 + ch_admin + + + 9f8e7d6c-5b4a-3210-fedc-ba0987654321 + ch_analyst + + + ^ch_[a-z_]+$ + + + diff --git a/tests/integration/test_token_roles_mapping/test.py b/tests/integration/test_token_roles_mapping/test.py new file mode 100644 index 000000000000..60c18d138c47 --- /dev/null +++ b/tests/integration/test_token_roles_mapping/test.py @@ -0,0 +1,89 @@ +""" +Smoke tests for the `` stage in TokenAccessStorage. + +The mapping rewrites incoming group identifiers (e.g. Entra security-group object IDs) +to ClickHouse role names BEFORE `roles_filter` and `roles_transform` run. The processor +under test is `jwt_static_key` with HS256 so tokens can be crafted inline without an IdP. + +Run: + pytest tests/integration/test_token_roles_mapping/test.py -v +""" + +import jwt +import pytest + +from helpers.cluster import ClickHouseCluster + +SECRET = "roles_mapping_test_secret" + +GUID_ADMIN = "8a1b2c3d-4e5f-6789-abcd-ef0123456789" +GUID_ANALYST = "9f8e7d6c-5b4a-3210-fedc-ba0987654321" +GUID_UNMAPPED = "11111111-2222-3333-4444-555555555555" + +cluster = ClickHouseCluster(__file__) +node = cluster.add_instance( + "node", + main_configs=["configs/validators.xml"], + user_configs=["configs/users.xml"], + stay_alive=True, +) + + +@pytest.fixture(scope="module", autouse=True) +def started_cluster(): + try: + cluster.start() + node.query("DROP ROLE IF EXISTS ch_admin") + node.query("DROP ROLE IF EXISTS ch_analyst") + node.query("CREATE ROLE ch_admin") + node.query("CREATE ROLE ch_analyst") + yield cluster + finally: + cluster.shutdown() + + +def make_jwt(sub, groups): + return jwt.encode({"sub": sub, "groups": groups}, SECRET, algorithm="HS256") + + +def query_with_token(token, sql): + resp = node.http_request( + "", + method="POST", + data=sql, + headers={"Authorization": f"Bearer {token}"}, + ) + resp.raise_for_status() + return resp.text + + +def current_roles(sub, groups): + token = make_jwt(sub, groups) + raw = query_with_token( + token, + "SELECT role_name FROM system.current_roles ORDER BY role_name FORMAT TabSeparated", + ) + return [line for line in raw.splitlines() if line] + + +def test_mapped_guid_grants_mapped_role(): + """A GUID listed in resolves to the mapped ClickHouse role.""" + assert current_roles("alice", [GUID_ADMIN]) == ["ch_admin"] + + +def test_multiple_guids_grant_multiple_roles(): + assert current_roles("bob", [GUID_ADMIN, GUID_ANALYST]) == ["ch_admin", "ch_analyst"] + + +def test_unmapped_guid_is_dropped_by_filter(): + """An unmapped GUID passes through `roles_mapping` unchanged and is then rejected by + `roles_filter` (^ch_[a-z_]+$ doesn't match a raw GUID), so only the mapped role survives.""" + assert current_roles("charlie", [GUID_ADMIN, GUID_UNMAPPED]) == ["ch_admin"] + + +def test_only_unmapped_guids_yield_no_roles(): + """No GUID is in the mapping and the filter rejects all of them: no roles are granted, + but authentication itself still succeeds and the user is created from the token.""" + assert current_roles("dave", [GUID_UNMAPPED]) == [] + token = make_jwt("dave", [GUID_UNMAPPED]) + assert query_with_token(token, "SELECT currentUser() FORMAT TabSeparated").strip() == "dave" From 12d0ec333396ec3a13734198b4450ad0e6ebd5e3 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov <32552679+zvonand@users.noreply.github.com> Date: Thu, 21 May 2026 11:05:17 +0200 Subject: [PATCH 10/12] Merge pull request #1799 from Altinity/improvement/antalya-26.3/oauth-polish Antalya 26.3: OAuth improvements Source-PR: #1799 (https://github.com/Altinity/ClickHouse/pull/1799) --- .../external-authenticators/tokens.md | 124 ++++++-- src/Access/TokenAccessStorage.cpp | 7 +- src/Access/TokenProcessors.h | 20 +- src/Access/TokenProcessorsOpaque.cpp | 265 +++++++++++++----- src/Access/TokenProcessorsParse.cpp | 91 ++++-- .../compose/docker_compose_mock_oidc.yml | 15 + tests/integration/helpers/cluster.py | 62 ++++ .../validators_discovery_introspect.xml | 17 ++ .../configs/validators_manual_introspect.xml | 15 + ...alidators_manual_introspect_bad_secret.xml | 16 ++ .../mock_oidc/openid-configuration | 5 + tests/integration/test_keycloak_auth/test.py | 144 ++++++++++ 12 files changed, 658 insertions(+), 123 deletions(-) create mode 100644 tests/integration/compose/docker_compose_mock_oidc.yml create mode 100644 tests/integration/test_keycloak_auth/configs/validators_discovery_introspect.xml create mode 100644 tests/integration/test_keycloak_auth/configs/validators_manual_introspect.xml create mode 100644 tests/integration/test_keycloak_auth/configs/validators_manual_introspect_bad_secret.xml create mode 100644 tests/integration/test_keycloak_auth/mock_oidc/openid-configuration diff --git a/docs/en/operations/external-authenticators/tokens.md b/docs/en/operations/external-authenticators/tokens.md index aefa7e6eb549..4bc2151c2498 100644 --- a/docs/en/operations/external-authenticators/tokens.md +++ b/docs/en/operations/external-authenticators/tokens.md @@ -133,6 +133,10 @@ For JWKS-based validators (`jwt_static_jwks` and `jwt_dynamic_jwks`), RS* and ES This section covers two related kinds of processor: per-IdP convenience presets built on top of the generic JWT processors (currently `entra`), and the generic `openid` processor that talks to an arbitrary OIDC-compliant identity provider. +:::note +If the IdP issues access tokens that follow [RFC 9068](https://datatracker.ietf.org/doc/html/rfc9068) (the *JSON Web Token Profile for OAuth 2.0 Access Tokens*), the access token is itself a verifiable JWT and is best handled by one of the JWT processors above (typically `jwt_dynamic_jwks`) — no `/userinfo` or `/tokeninfo` round-trip is needed. The processors in this section exist for IdPs whose access tokens are opaque (e.g. Google), or whose JWT access tokens you prefer to validate by asking the IdP rather than locally. +::: + ### Entra (Microsoft Entra ID, pure OIDC) {#entra} `entra` is a preset for Microsoft Entra ID built on top of `jwt_dynamic_jwks`. Tokens are validated **locally** against Entra's per-tenant JWKS — no Microsoft Graph call, no userinfo round trip, no OIDC discovery fetch. `username_claim` and `groups_claim` are read directly from the JWT payload. Use this when the access token's `aud` is your own app (registered via Entra's *Expose an API* blade), not `https://graph.microsoft.com`. @@ -178,52 +182,118 @@ All remaining parameters are optional: - `expected_issuer` — Expected value of the `iss` claim. Default: `https://login.microsoftonline.com/{tenant_id}/v2.0` (derived from `tenant_id`). Override for v1.0 tokens (`https://sts.windows.net/{tenant_id}/`) or sovereign clouds. - `expected_audience` — Expected value of the `aud` claim, normally your app's Application ID URI (e.g. `api://clickhouse`) or client ID. If unset, no audience check is performed (any signature-valid token from the tenant will authenticate); a warning is logged at startup so the gap is visible. - `username_claim` — JWT claim to use as the ClickHouse username. Default: `sub`. Common Entra alternatives: `preferred_username`, `upn`, `oid`. -- `groups_claim` — JWT claim that carries the array of group identifiers. Default: `groups`. Set to `roles` if you use App Roles in Entra instead of security-group claims. +- `groups_claim` — JWT claim that carries the array of group identifiers. Default: `groups`. Set to `roles` when using App Roles. See [Mapping groups to ClickHouse roles](#entra-group-mapping) for how to get human-readable values instead of GUIDs. - `expected_typ`, `verifier_leeway`, `jwks_cache_lifetime`, `claims`, `allow_no_expiration`, `token_cache_lifetime` — Same as for `jwt_dynamic_jwks`. +#### Mapping groups to ClickHouse roles {#entra-group-mapping} + +By default the `groups` claim contains group **object IDs (GUIDs)**, not names. Three ways to surface human-readable identifiers, in order of preference: + +**Option A — App Roles** (recommended) + +Operator-chosen role strings in a separate `roles` claim. Compact even for users in many groups (no `hasgroups` overage indicator), and immune to Entra-side group renames. + +1. App registration → **App roles** → **Create app role**. Set `Value` to the string ClickHouse should receive (e.g. `ch_admin`); `Allowed member types` = `Users/Groups`. +2. Enterprise application → **Properties** → `Assignment required` = **Yes**. +3. Enterprise application → **Users and groups** → assign each user or security group to a role. Group assignment requires Entra ID P1/P2; free-tier tenants can only assign individual users here. +4. On the processor: `roles`. + +**Option B — Format the `groups` claim** + +Names emitted in the existing `groups` claim. Works on free tier; useful when group membership is already maintained in Entra and a separate role-assignment surface is not wanted. + +Prerequisites in the app registration: + +- `"groupMembershipClaims": "ApplicationGroup"` (or `"SecurityGroup"` for tenant-wide). +- `optionalClaims.accessToken` entry for `groups` with `additionalProperties` set to one or more of: + +| Value | Effect | +|---|---| +| `sam_account_name` | On-prem-synced groups emit as `sAMAccountName`. | +| `dns_domain_and_sam_account_name` | On-prem-synced groups emit as `DOMAIN\sAMAccountName`. | +| `cloud_displayname` | Cloud-only groups emit their `displayName`. | + +Entra picks per group; groups not covered by a chosen format still emit as GUIDs. Display names are mutable — a rename in Entra silently breaks the mapping until config is updated. + +Leave `groups` (the default). + +**Option C — `roles_mapping`** + +Keep GUIDs in the token and translate them in the user-directory config (see [Identity Provider as an External User Directory](#idp-external-user-directory)). Always works, including on free tier. Tedious for many groups but immune to renames. + :::note -The `groups` claim must be enabled in the app registration's manifest (`"groupMembershipClaims": "ApplicationGroup"` is recommended) and exposed in access tokens via `optionalClaims.accessToken`. Group identifiers in the token are object IDs (GUIDs) by default; map them to ClickHouse roles via the user-directory's `roles_mapping` block (see [Identity Provider as an External User Directory](#idp-external-user-directory)). +When switching from GUIDs to names, retune any `roles_filter` regex — for example `\bclickhouse-[a-zA-Z0-9]+\b` will not match strings like `ch_admin`. ::: ### OpenID + +The `openid` processor speaks the OIDC protocol surface — `/userinfo` for identity, plus (when discovered or configured) the local JWT fast-path against the IdP's JWKS and RFC 7662 token introspection. Two mutually-exclusive configuration shapes: + +- **Discovery** — point `configuration_endpoint` at `.well-known/openid-configuration`. Endpoints and the issuer are resolved from the doc. When it advertises `jwks_uri`, JWT access tokens (RFC 9068) are validated locally. When it advertises `introspection_endpoint` and you supply `introspection_client_id`/`introspection_client_secret`, RFC 7662 introspection runs on each authentication — alongside the JWT fast-path if both are available, since JWT validates signature/`exp` while introspection adds the revocation check. + +- **Manual** — `userinfo_endpoint` is mandatory. For RFC 9068 JWT access tokens prefer `jwt_dynamic_jwks`. Add `token_introspection_endpoint` + `introspection_client_id` + `introspection_client_secret` for RFC 7662 liveness, expiry, and `iss`/`aud` enforcement; without them, manual mode is `/userinfo` only. + ```xml - + openid - url/.well-known/openid-configuration - 60 - 3600 - - + https://idp.example.com/.well-known/openid-configuration + my-clickhouse-client-id + clickhouse-rs + ... + + + openid - url/userinfo - url/tokeninfo - url/.well-known/jwks.json - 60 - 3600 - + https://idp.example.com/userinfo + https://idp.example.com/introspect + clickhouse-rs + ... + https://idp.example.com + clickhouse-rs + ``` -:::note -Either `configuration_endpoint` or both `userinfo_endpoint` and `token_introspection_endpoint` (and, optionally, `jwks_uri`) shall be set. If none of them are set or all three are set, this is an invalid configuration that will not be parsed. +:::note Parser rules +- `configuration_endpoint` and `userinfo_endpoint` are mutually exclusive. +- `jwks_uri` is rejected in both shapes — use `jwt_dynamic_jwks` for an explicit JWKS URL. +- `introspection_client_id` and `introspection_client_secret` must be set together; both honor `from_env=` / `from_zk=` for secrets handling. +- In manual mode, `expected_issuer` / `expected_audience` are accepted only when introspection is wired (`/userinfo` carries neither claim and so cannot enforce them). ::: -**Parameters:** +#### Setting up the introspection client at your IdP -- `configuration_endpoint` - URI of OpenID configuration (often ends with `.well-known/openid-configuration`); -- `userinfo_endpoint` - URI of endpoint that returns user information in exchange for a valid token; -- `token_introspection_endpoint` - URI of token introspection endpoint (returns information about a valid token); -- `jwks_uri` - URI of OpenID configuration (often ends with `.well-known/jwks.json`) -- `jwks_cache_lifetime` - Period for resend request for refreshing JWKS. Optional, default: 3600. -- `verifier_leeway` - Clock skew tolerance (seconds). Useful for handling small differences in system clocks between ClickHouse and the token issuer. Optional, default: 60 -- `expected_issuer` - Expected value of the `iss` (issuer) claim in the JWT. If specified, tokens with a different issuer will be rejected. Optional. -- `expected_audience` - Expected value of the `aud` (audience) claim in the JWT. If specified, tokens with a different audience will be rejected. Optional. -- `allow_no_expiration` - If `true`, tokens without the `exp` (expiration) claim are accepted. Otherwise they are rejected. Optional, default: `false`. +Introspection needs an OAuth client representing ClickHouse-as-resource-server — separate from any user-facing client app, with no redirect URIs. + +| IdP | RFC 7662 introspection | How to create the introspection client | +|---|---|---| +| **Keycloak** | Yes | Realm → Clients → confidential client with *Service Accounts* enabled; copy `client_id` and the secret from the *Credentials* tab | +| **Okta** | Yes (Org AS + Custom AS) | Admin → Applications → Create App Integration → *API Services* | +| **Auth0** | Not for opaque user tokens | Auth0 does not provide `/introspect` for the opaque tokens issued at the `/userinfo` audience; for custom-API JWT access tokens use `jwt_dynamic_jwks` instead | +| **Google**, **GitHub**, **Microsoft Entra ID** (MS Graph) | No | No RFC 7662 endpoint — use the provider-specific processor (`google`) or JWT validation against your own API's tokens (`entra`, `jwt_dynamic_jwks`) | + +#### Parameters + +*Discovery mode:* +- `configuration_endpoint` — URI of the OIDC configuration document. Mandatory. +- `expected_issuer` — Expected `iss`. Enforced via the JWT fast-path or RFC 7662 introspection (whichever the discovery doc surfaces); also anchors the discovery doc's own `issuer` field. Optional. +- `expected_audience` — Expected `aud`. Same enforcement scope as `expected_issuer`. Optional. +- `introspection_client_id`, `introspection_client_secret` — `client_secret_basic` credentials for the introspection endpoint. Both must be set together. Optional; required only if you want introspection enabled. +- `allow_no_expiration` — Accept JWTs without `exp` on the JWT fast-path. Optional, default `false`. +- `verifier_leeway` — Clock-skew tolerance (seconds) for the JWT fast-path. Optional, default 60. +- `jwks_cache_lifetime` — JWKS refresh interval. Optional, default 3600. +- `allow_http_discovery_urls` — Allow non-HTTPS URLs returned by the discovery document. Optional, default `false`. + +*Manual mode:* +- `userinfo_endpoint` — URI of the OIDC userinfo endpoint. Mandatory. +- `token_introspection_endpoint` — URI of an RFC 7662 introspection endpoint. Optional; when set together with introspection credentials, enables liveness, `exp`, and `iss`/`aud` enforcement. +- `introspection_client_id`, `introspection_client_secret` — As above. Required iff `token_introspection_endpoint` is set. +- `expected_issuer`, `expected_audience` — Accepted only when introspection is wired; enforced against the introspection response. Optional. -Sometimes a token is a valid JWT. In that case token will be decoded and validated locally if configuration endpoint returns JWKS URI (or `jwks_uri` is specified alongside `userinfo_endpoint` and `token_introspection_endpoint`). +If the IdP issues access tokens that follow [RFC 9068](https://datatracker.ietf.org/doc/html/rfc9068), prefer `jwt_dynamic_jwks` for direct local validation. The `openid` processor is for opaque tokens (via userinfo and/or introspection) and for cases where you want to consult the IdP rather than validate locally. ### Tokens cache To reduce number of requests to IdP, tokens are cached internally for a maximum period of `token_cache_lifetime` seconds. diff --git a/src/Access/TokenAccessStorage.cpp b/src/Access/TokenAccessStorage.cpp index 9b2a92a54999..5723d2d9d783 100644 --- a/src/Access/TokenAccessStorage.cpp +++ b/src/Access/TokenAccessStorage.cpp @@ -568,7 +568,7 @@ std::optional TokenAccessStorage::authenticateImpl( bool /* allow_no_password */, bool /* allow_plaintext_password */) const { - std::lock_guard lock(mutex); + std::unique_lock lock(mutex); /// Reject mismatched credential types BEFORE the typeid_cast that would /// throw a `LOGICAL_ERROR`. The reference-form `typeid_cast` is fatal on @@ -756,6 +756,11 @@ std::optional TokenAccessStorage::authenticateImpl( /// it queues are picked up by the very loop that called it. Only /// `authenticateImpl` runs outside of any drain and so is the one site /// that has to flush explicitly. + /// Release `mutex` first: the notifier drain re-enters this storage via + /// `processRoleChange` (subscribed for Role changes) while holding the + /// notifier's `sending_notifications`, so holding both in opposite order + /// here would deadlock (tsan lock-order-inversion vs. CREATE ROLE). + lock.unlock(); access_control.getChangesNotifier().sendNotifications(); if (id) diff --git a/src/Access/TokenProcessors.h b/src/Access/TokenProcessors.h index 35c6a218b5b1..7f2fea416980 100644 --- a/src/Access/TokenProcessors.h +++ b/src/Access/TokenProcessors.h @@ -197,19 +197,18 @@ class GoogleTokenProcessor : public ITokenProcessor class OpenIdTokenProcessor : public ITokenProcessor { public: - /// Specify endpoints manually + /// Manual mode: `/userinfo` for identity, plus RFC 7662 introspection + /// before it when an introspection endpoint and client credentials are set. OpenIdTokenProcessor(const String & processor_name_, UInt64 token_cache_lifetime_, const String & username_claim_, const String & groups_claim_, const String & expected_issuer_, const String & expected_audience_, - bool allow_no_expiration_, const String & userinfo_endpoint_, const String & token_introspection_endpoint_, - UInt64 verifier_leeway_, - const String & jwks_uri_, - UInt64 jwks_cache_lifetime_); + const String & introspection_client_id_, + const String & introspection_client_secret_); /// Obtain endpoints from openid-configuration URL OpenIdTokenProcessor(const String & processor_name_, @@ -222,15 +221,24 @@ class OpenIdTokenProcessor : public ITokenProcessor const String & openid_config_endpoint_, UInt64 verifier_leeway_, UInt64 jwks_cache_lifetime_, + const String & introspection_client_id_, + const String & introspection_client_secret_, const RemoteHostFilter & remote_host_filter_, bool allow_http_discovery_urls_); bool resolveAndValidate(TokenCredentials & credentials) const override; private: + /// True on `active=true`; populates `expires_at` from `exp` if present. + bool runIntrospection(const String & token, std::chrono::system_clock::time_point & expires_at) const; + Poco::URI userinfo_endpoint; Poco::URI token_introspection_endpoint; + String expected_issuer; + String expected_audience; + String introspection_client_id; + String introspection_client_secret; - /// Access token is often a valid JWT, so we can validate it locally to avoid unnecesary network requests. + /// Populated only by the discovery constructor when the doc advertises a `jwks_uri`. std::optional jwt_validator = std::nullopt; }; diff --git a/src/Access/TokenProcessorsOpaque.cpp b/src/Access/TokenProcessorsOpaque.cpp index aba8efa7ccfc..16ff14897481 100644 --- a/src/Access/TokenProcessorsOpaque.cpp +++ b/src/Access/TokenProcessorsOpaque.cpp @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include #include #include @@ -84,6 +86,7 @@ namespace std::ostringstream responseString; Poco::Net::HTTPRequest request{Poco::Net::HTTPRequest::HTTP_GET, uri.getPathAndQuery()}; + request.add("Accept", "application/json"); if (!token.empty()) request.add("Authorization", "Bearer " + token); @@ -115,6 +118,74 @@ namespace throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "Failed to parse server response: {}", e.what()); } } + + /// RFC 7662 form-POST with `client_secret_basic` auth. Returns parsed JSON; + /// non-200 throws so callers can distinguish "inactive" (200+active:false) + /// from "client auth or transport failure". + picojson::object postFormToURI(const Poco::URI & uri, + const std::vector> & form, + const String & basic_user, + const String & basic_password) + { + Poco::Net::HTTPResponse response; + std::ostringstream responseString; + + String body; + for (const auto & [key, value] : form) + { + if (!body.empty()) + body += '&'; + String encoded_key; + String encoded_value; + Poco::URI::encode(key, "", encoded_key); + Poco::URI::encode(value, "", encoded_value); + body += encoded_key + "=" + encoded_value; + } + + Poco::Net::HTTPRequest request{Poco::Net::HTTPRequest::HTTP_POST, uri.getPathAndQuery(), + Poco::Net::HTTPMessage::HTTP_1_1}; + request.setContentType("application/x-www-form-urlencoded"); + request.setContentLength(body.size()); + request.add("Accept", "application/json"); + if (!basic_user.empty()) + { + Poco::Net::HTTPBasicCredentials creds(basic_user, basic_password); + creds.authenticate(request); + } + + auto send_and_receive = [&](Poco::Net::HTTPClientSession & session) + { + applyIdpSessionTimeouts(session); + session.sendRequest(request) << body; + Poco::StreamCopier::copyStream(session.receiveResponse(response), responseString); + }; + + if (uri.getScheme() == "https") + { + Poco::Net::HTTPSClientSession session(uri.getHost(), uri.getPort()); + send_and_receive(session); + } + else + { + Poco::Net::HTTPClientSession session(uri.getHost(), uri.getPort()); + send_and_receive(session); + } + + if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "POST to '{}' returned HTTP {} ({})", + uri.toString(), static_cast(response.getStatus()), response.getReason()); + + try + { + return parseJSON(responseString.str()); + } + catch (const std::runtime_error & e) + { + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "Failed to parse JSON response from '{}': {}", uri.toString(), e.what()); + } + } } GoogleTokenProcessor::GoogleTokenProcessor(const String & processor_name_, @@ -274,50 +345,18 @@ OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, const String & groups_claim_, const String & expected_issuer_, const String & expected_audience_, - bool allow_no_expiration_, const String & userinfo_endpoint_, const String & token_introspection_endpoint_, - UInt64 verifier_leeway_, - const String & jwks_uri_, - UInt64 jwks_cache_lifetime_) + const String & introspection_client_id_, + const String & introspection_client_secret_) : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), - userinfo_endpoint(userinfo_endpoint_), token_introspection_endpoint(token_introspection_endpoint_) + userinfo_endpoint(userinfo_endpoint_), + token_introspection_endpoint(token_introspection_endpoint_), + expected_issuer(expected_issuer_), + expected_audience(expected_audience_), + introspection_client_id(introspection_client_id_), + introspection_client_secret(introspection_client_secret_) { - /// Without `jwks_uri`, no `jwt_validator` is created and so `expected_issuer` - /// / `expected_audience` cannot be enforced anywhere on the validation path - /// -- the runtime falls straight to the userinfo endpoint, which only - /// answers "the IdP describes this user", not "the token's `iss`/`aud` - /// match what this deployment pinned". Refuse to load with that combination - /// rather than silently dropping the operator's bindings. - if (jwks_uri_.empty() && (!expected_issuer_.empty() || !expected_audience_.empty())) - throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, - "{}: 'expected_issuer' / 'expected_audience' are configured but no 'jwks_uri' is provided. " - "These bindings can only be enforced via local JWT validation against a JWKS; the userinfo " - "fallback alone cannot enforce them. Configure 'jwks_uri' (or, if you intentionally want " - "userinfo-only validation, clear 'expected_issuer'/'expected_audience').", - processor_name); - - if (!jwks_uri_.empty()) - { - LOG_TRACE(getLogger("TokenAuthentication"), "{}: JWKS URI set, local JWT processing will be attempted", processor_name_); - /// `expected_typ` is left empty here: OpenID's JWT-fastpath inherits no - /// `typ` enforcement from the operator config (the parser doesn't surface - /// `expected_typ` for the `openid` processor type yet). Operators who - /// want strict `typ` enforcement should use `jwt_static_jwks` / - /// `jwt_dynamic_jwks` directly instead of `openid`. - jwt_validator.emplace(processor_name_ + "jwks_val", - token_cache_lifetime_, - username_claim_, - groups_claim_, - expected_issuer_, - expected_audience_, - /*expected_typ=*/"", - allow_no_expiration_, - "", - verifier_leeway_, - jwks_uri_, - jwks_cache_lifetime_); - } } OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, @@ -330,9 +369,15 @@ OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, const String & openid_config_endpoint_, UInt64 verifier_leeway_, UInt64 jwks_cache_lifetime_, + const String & introspection_client_id_, + const String & introspection_client_secret_, const RemoteHostFilter & remote_host_filter_, bool allow_http_discovery_urls_) - : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_) + : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), + expected_issuer(expected_issuer_), + expected_audience(expected_audience_), + introspection_client_id(introspection_client_id_), + introspection_client_secret(introspection_client_secret_) { /// Defense in depth: the discovery endpoint itself was already validated by /// the parser, but re-check here in case this constructor is reached via a @@ -350,11 +395,6 @@ OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, const picojson::object openid_config = getObjectFromURI(Poco::URI(openid_config_endpoint_)); - /// Only `userinfo_endpoint` is mandatory: it backs the runtime userinfo - /// fallback (and is the sole user-info source when no JWKS is configured). - /// `introspection_endpoint` is currently unused at runtime -- it's plumbed - /// for a future RFC 7662 introspection feature -- so a discovery document - /// that omits it should not block processor construction. if (!openid_config.contains("userinfo_endpoint")) throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "{}: Cannot extract userinfo_endpoint from OIDC configuration at '{}'; consider manual configuration.", @@ -469,16 +509,24 @@ OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, if (openid_config.contains("introspection_endpoint")) token_introspection_endpoint = Poco::URI(getValueByKey(openid_config, "introspection_endpoint").value()); - /// See manual-constructor comment: `expected_issuer` / `expected_audience` - /// can only be enforced via local JWT validation. If the discovery document - /// does not advertise a `jwks_uri`, no `jwt_validator` will be created and - /// the userinfo fallback alone cannot enforce these bindings. Refuse the - /// configuration rather than silently dropping them. - if (!openid_config.contains("jwks_uri") && (!expected_issuer_.empty() || !expected_audience_.empty())) + const bool can_enforce_via_jwks = openid_config.contains("jwks_uri"); + const bool can_enforce_via_introspection = + openid_config.contains("introspection_endpoint") && !introspection_client_id_.empty(); + + /// Catch creds configured for a discovery doc that does not advertise an + /// introspection endpoint -- otherwise the credentials would be silently + /// ignored at runtime. + if (!introspection_client_id_.empty() && !openid_config.contains("introspection_endpoint")) throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, - "{}: OIDC discovery at '{}' did not advertise a 'jwks_uri', but 'expected_issuer' / " - "'expected_audience' are configured. These bindings can only be enforced via local JWT " - "validation against a JWKS; userinfo cannot enforce them. Refusing to load.", + "{}: 'introspection_client_id' / 'introspection_client_secret' are set but the OIDC " + "discovery at '{}' does not advertise an 'introspection_endpoint'.", + processor_name, openid_config_endpoint_); + + if (!can_enforce_via_jwks && !can_enforce_via_introspection + && (!expected_issuer_.empty() || !expected_audience_.empty())) + throw Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "{}: 'expected_issuer' / 'expected_audience' need either a 'jwks_uri' or an " + "'introspection_endpoint' (with operator credentials) in the discovery doc at '{}'.", processor_name, openid_config_endpoint_); if (openid_config.contains("jwks_uri")) @@ -500,6 +548,93 @@ OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, } } +bool OpenIdTokenProcessor::runIntrospection(const String & token, + std::chrono::system_clock::time_point & expires_at) const +{ + expires_at = {}; + + picojson::object response; + try + { + response = postFormToURI(token_introspection_endpoint, + {{"token", token}, {"token_type_hint", "access_token"}}, + introspection_client_id, + introspection_client_secret); + } + catch (const Exception & e) + { + /// LOG_WARNING (not TRACE): a non-200 from the introspection endpoint + /// almost always means the operator's `introspection_client_*` is + /// wrong or the IdP is unreachable -- worth surfacing by default. + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: Token introspection request failed: {}", processor_name, e.message()); + return false; + } + + /// active=true is authoritative per RFC 7662 §2.2. + const auto active_opt = getValueByKey(response, "active"); + if (!active_opt.has_value() || !active_opt.value()) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Token introspection reported active=false (or missing); rejecting", processor_name); + return false; + } + + if (!expected_issuer.empty()) + { + const auto iss = getValueByKey(response, "iss").value_or(""); + if (iss != expected_issuer) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Token introspection 'iss' '{}' does not match expected_issuer '{}'; rejecting", + processor_name, iss, expected_issuer); + return false; + } + } + + /// `aud` may be a string or an array (RFC 7519 §4.1.3). + if (!expected_audience.empty()) + { + auto aud_it = response.find("aud"); + bool ok = false; + if (aud_it != response.end()) + { + const picojson::value & aud_val = aud_it->second; + if (aud_val.is()) + ok = (aud_val.get() == expected_audience); + else if (aud_val.is()) + for (const auto & v : aud_val.get()) + if (v.is() && v.get() == expected_audience) + ok = true; + } + if (!ok) + { + LOG_TRACE(getLogger("TokenAuthentication"), + "{}: Token introspection 'aud' does not contain expected_audience '{}'; rejecting", + processor_name, expected_audience); + return false; + } + } + + if (response.contains("exp")) + { + const auto exp_opt = getValueByKey(response, "exp"); + const double exp = exp_opt.value_or(0.0); + if (exp_opt.has_value() && std::isfinite(exp) && exp > 0.0 + && exp <= static_cast(std::numeric_limits::max())) + expires_at = std::chrono::system_clock::from_time_t(static_cast(exp)); + else + /// IdP advertised an `exp` we cannot use. Authentication still + /// succeeds (the token IS active), but the cache loses its tighter + /// upper bound; surface so operators see IdP drift. + LOG_WARNING(getLogger("TokenAuthentication"), + "{}: Token introspection returned malformed 'exp'; cache TTL falls back to token_cache_lifetime", + processor_name); + } + + return true; +} + bool OpenIdTokenProcessor::resolveAndValidate(TokenCredentials & credentials) const { const String & token = credentials.getToken(); @@ -540,7 +675,6 @@ bool OpenIdTokenProcessor::resolveAndValidate(TokenCredentials & credentials) co user_info_json = decoded_token.get_payload_json(); username = getValueByKey(user_info_json, username_claim).value(); - /// TODO: Now we work only with Keycloak -- and it provides expires_at in token itself. Need to add actual token introspection logic for other OIDC providers. if (decoded_token.has_expires_at()) credentials.setExpiresAt(decoded_token.get_expires_at()); } @@ -565,13 +699,18 @@ bool OpenIdTokenProcessor::resolveAndValidate(TokenCredentials & credentials) co } } - /// Userinfo path: only reachable when no `jwt_validator` is configured - /// (the constructor guarantees that combination is incompatible with any - /// `expected_issuer` / `expected_audience` pin), or when local JWT validation - /// passed but extracting the username/payload from the decoded token failed - /// for an unrelated reason -- in which case the bindings have already been - /// enforced by `jwt_validator` and userinfo is just being asked for the user - /// identity. + /// Run introspection whenever the operator configured it -- the JWT + /// fast-path validates signature/exp but cannot detect server-side + /// revocation, which is the whole reason to add introspection. + if (!token_introspection_endpoint.empty() && !introspection_client_id.empty()) + { + std::chrono::system_clock::time_point introspection_expires_at; + if (!runIntrospection(token, introspection_expires_at)) + return false; + if (introspection_expires_at != std::chrono::system_clock::time_point{}) + credentials.setExpiresAt(introspection_expires_at); + } + if (username.empty() || user_info_json.empty()) { try diff --git a/src/Access/TokenProcessorsParse.cpp b/src/Access/TokenProcessorsParse.cpp index f48ee3fddd07..015060cffb75 100644 --- a/src/Access/TokenProcessorsParse.cpp +++ b/src/Access/TokenProcessorsParse.cpp @@ -88,54 +88,95 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( } else if (provider_type == "openid") { - auto verifier_leeway = config.getUInt64(prefix + ".verifier_leeway", 60); - auto jwks_cache_lifetime = config.getUInt64(prefix + ".jwks_cache_lifetime", 3600); - - /// `token_introspection_endpoint` is currently unused at runtime: the - /// processor relies on JWT-local validation (when JWKS is configured) - /// or on userinfo, never on RFC 7662 introspection. Don't require it - /// for "locally configured" mode -- forcing operators to set a value - /// that does nothing is a footgun. If introspection is wired up later, - /// the field is already plumbed and can become required at that point. - bool externally_configured = config.hasProperty(prefix + ".configuration_endpoint") && !config.hasProperty(prefix + ".jwks_uri"); + bool externally_configured = config.hasProperty(prefix + ".configuration_endpoint"); bool locally_configured = config.hasProperty(prefix + ".userinfo_endpoint"); - if (externally_configured && ! locally_configured) + if (externally_configured && locally_configured) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "Token processor '{}': 'configuration_endpoint' and 'userinfo_endpoint' are mutually exclusive.", + processor_name); + + const auto introspection_client_id = config.getString(prefix + ".introspection_client_id", ""); + const auto introspection_client_secret = config.getString(prefix + ".introspection_client_secret", ""); + if (introspection_client_id.empty() != introspection_client_secret.empty()) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "Token processor '{}': 'introspection_client_id' and 'introspection_client_secret' " + "must be configured together.", + processor_name); + + auto reject_unsupported_key = [&](const char * key, const char * hint) { + if (config.hasProperty(prefix + "." + key)) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "Token processor '{}': '{}' is not supported in this mode. {}", + processor_name, key, hint); + }; + + if (externally_configured) + { + reject_unsupported_key("jwks_uri", + "In discovery mode the JWKS URL is resolved from the discovery document; " + "for an explicit JWKS URL use a 'jwt_dynamic_jwks' processor."); + + auto verifier_leeway = config.getUInt64(prefix + ".verifier_leeway", 60); + auto jwks_cache_lifetime = config.getUInt64(prefix + ".jwks_cache_lifetime", 3600); const auto configuration_endpoint = config.getString(prefix + ".configuration_endpoint"); require_allowed_url(configuration_endpoint, "configuration_endpoint"); - /// Opt-out for the HTTPS-on-discovery-returned-URLs check. False by - /// default; operators who knowingly run an IdP over plain HTTP can - /// enable it without falling back to manual trust-chain config. const auto allow_http_discovery_urls = config.getBool(prefix + ".allow_http_discovery_urls", false); return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_issuer, expected_audience, allow_no_expiration, configuration_endpoint, verifier_leeway, jwks_cache_lifetime, + introspection_client_id, + introspection_client_secret, remote_host_filter, allow_http_discovery_urls); } - else if (locally_configured && !externally_configured) + + if (locally_configured) { - const auto userinfo_endpoint = config.getString(prefix + ".userinfo_endpoint"); + reject_unsupported_key("jwks_uri", + "For local JWT validation against a JWKS use a 'jwt_dynamic_jwks' processor."); + reject_unsupported_key("allow_no_expiration", "It applies only to JWT validation."); + reject_unsupported_key("verifier_leeway", "It applies only to JWT validation."); + reject_unsupported_key("jwks_cache_lifetime", "It applies only to JWKS-backed processors."); + const auto token_introspection_endpoint = config.getString(prefix + ".token_introspection_endpoint", ""); - const auto jwks_uri = config.getString(prefix + ".jwks_uri", ""); + const bool has_introspection = !token_introspection_endpoint.empty(); + + if (has_introspection && introspection_client_id.empty()) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "Token processor '{}': 'token_introspection_endpoint' is set but " + "'introspection_client_id' / 'introspection_client_secret' are not.", + processor_name); + if (!has_introspection && !introspection_client_id.empty()) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "Token processor '{}': 'introspection_client_id' / 'introspection_client_secret' " + "are set but no 'token_introspection_endpoint' is configured.", + processor_name); + + if ((config.hasProperty(prefix + ".expected_issuer") || config.hasProperty(prefix + ".expected_audience")) + && !has_introspection) + throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, + "Token processor '{}': 'expected_issuer' / 'expected_audience' need either a " + "'token_introspection_endpoint' (RFC 7662) or a 'jwt_dynamic_jwks' processor.", + processor_name); + + const auto userinfo_endpoint = config.getString(prefix + ".userinfo_endpoint"); require_allowed_url(userinfo_endpoint, "userinfo_endpoint"); require_allowed_url(token_introspection_endpoint, "token_introspection_endpoint"); - require_allowed_url(jwks_uri, "jwks_uri"); return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, - expected_issuer, expected_audience, allow_no_expiration, + expected_issuer, expected_audience, userinfo_endpoint, token_introspection_endpoint, - verifier_leeway, - jwks_uri, - jwks_cache_lifetime); + introspection_client_id, + introspection_client_secret); } throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, - "Either 'configuration_endpoint' or 'userinfo_endpoint' " - "(and, optionally, 'token_introspection_endpoint' / 'jwks_uri') must be specified for 'openid' processor"); + "Either 'configuration_endpoint' (discovery) or 'userinfo_endpoint' (manual) " + "must be specified for 'openid' processor"); } else if (provider_type == "entra") { @@ -256,8 +297,6 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( } else throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Invalid type: {}", provider_type); - - // throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Failed to parse token processor: {}", processor_name); } #else diff --git a/tests/integration/compose/docker_compose_mock_oidc.yml b/tests/integration/compose/docker_compose_mock_oidc.yml new file mode 100644 index 000000000000..a17532905c0d --- /dev/null +++ b/tests/integration/compose/docker_compose_mock_oidc.yml @@ -0,0 +1,15 @@ +services: + mock-oidc: + image: nginx:alpine + volumes: + - ${MOCK_OIDC_CONFIG_FILE}:/usr/share/nginx/html/.well-known/openid-configuration:ro + ports: + - "${MOCK_OIDC_EXTERNAL_PORT:-18091}:80" + healthcheck: + test: + - CMD-SHELL + - > + wget -qO- http://localhost/.well-known/openid-configuration > /dev/null || exit 1 + interval: 5s + timeout: 3s + retries: 15 diff --git a/tests/integration/helpers/cluster.py b/tests/integration/helpers/cluster.py index 8a0f3adfb6d8..324c6ce3a216 100644 --- a/tests/integration/helpers/cluster.py +++ b/tests/integration/helpers/cluster.py @@ -670,6 +670,7 @@ def __init__( self.with_cassandra = False self.with_ldap = False self.with_keycloak = False + self.with_mock_oidc = False self.with_jdbc_bridge = False self.with_nginx = False self.with_hive = False @@ -776,6 +777,11 @@ def __init__( self.keycloak_port = 18080 self.base_keycloak_cmd = None + # available when with_mock_oidc == True + self.mock_oidc_host = "mock-oidc" + self.mock_oidc_port = 18091 + self.base_mock_oidc_cmd = None + # available when with_rabbitmq == True self.rabbitmq_host = "rabbitmq1" self.rabbitmq_ip = None @@ -1908,6 +1914,25 @@ def setup_keycloak_cmd(self, instance, env_variables, docker_compose_yml_dir): ) return self.base_keycloak_cmd + def setup_mock_oidc_cmd(self, instance, env_variables, docker_compose_yml_dir): + self.with_mock_oidc = True + env_variables["MOCK_OIDC_EXTERNAL_PORT"] = str(self.mock_oidc_port) + env_variables["MOCK_OIDC_CONFIG_FILE"] = p.join( + self.base_dir, + "mock_oidc", + "openid-configuration", + ) + self.base_cmd.extend( + ["--file", p.join(docker_compose_yml_dir, "docker_compose_mock_oidc.yml")] + ) + self.base_mock_oidc_cmd = self.compose_cmd( + "--env-file", + instance.env_file, + "--file", + p.join(docker_compose_yml_dir, "docker_compose_mock_oidc.yml"), + ) + return self.base_mock_oidc_cmd + def setup_jdbc_bridge_cmd(self, instance, env_variables, docker_compose_yml_dir): self.with_jdbc_bridge = True env_variables["JDBC_DRIVER_LOGS"] = self.jdbc_driver_logs_dir @@ -2074,6 +2099,7 @@ def add_instance( with_cassandra=False, with_ldap=False, with_keycloak=False, + with_mock_oidc=False, with_jdbc_bridge=False, with_hive=False, with_coredns=False, @@ -2217,6 +2243,7 @@ def add_instance( with_cassandra=with_cassandra, with_ldap=with_ldap, with_keycloak=with_keycloak, + with_mock_oidc=with_mock_oidc, with_iceberg_catalog=with_iceberg_catalog, with_glue_catalog=with_glue_catalog, with_hms_catalog=with_hms_catalog, @@ -2481,6 +2508,11 @@ def add_instance( self.setup_keycloak_cmd(instance, env_variables, docker_compose_yml_dir) ) + if with_mock_oidc and not self.with_mock_oidc: + cmds.append( + self.setup_mock_oidc_cmd(instance, env_variables, docker_compose_yml_dir) + ) + if with_jdbc_bridge and not self.with_jdbc_bridge: cmds.append( self.setup_jdbc_bridge_cmd( @@ -3473,6 +3505,26 @@ def wait_keycloak_to_start(self, timeout=120): def get_keycloak_url(self): return f"http://localhost:{self.keycloak_port}" + def wait_mock_oidc_to_start(self, timeout=60): + url = ( + f"http://localhost:{self.mock_oidc_port}" + f"/.well-known/openid-configuration" + ) + start = time.time() + while time.time() - start < timeout: + try: + resp = requests.get(url, timeout=5) + if resp.status_code == 200: + logging.info("mock-oidc is online") + return + except Exception as ex: + logging.warning("Waiting for mock-oidc: %s", ex) + time.sleep(2) + raise Exception("mock-oidc did not start in time") + + def get_mock_oidc_url(self): + return f"http://localhost:{self.mock_oidc_port}" + def wait_prometheus_to_start(self): if "writer" in self.prometheus_servers: self.prometheus_writer_ip = self.get_instance_ip(self.prometheus_writer_host) @@ -4018,6 +4070,11 @@ def logging_azurite_initialization(exception, retry_number, sleep_time): self.up_called = True self.wait_keycloak_to_start() + if self.with_mock_oidc and self.base_mock_oidc_cmd: + subprocess_check_call(self.base_mock_oidc_cmd + ["up", "-d"]) + self.up_called = True + self.wait_mock_oidc_to_start() + if self.with_jdbc_bridge and self.base_jdbc_bridge_cmd: os.makedirs(self.jdbc_driver_logs_dir) os.chmod(self.jdbc_driver_logs_dir, stat.S_IRWXU | stat.S_IRWXO) @@ -4534,6 +4591,7 @@ def __init__( with_cassandra, with_ldap, with_keycloak, + with_mock_oidc, with_iceberg_catalog, with_glue_catalog, with_hms_catalog, @@ -4658,6 +4716,7 @@ def __init__( self.with_cassandra = with_cassandra self.with_ldap = with_ldap self.with_keycloak = with_keycloak + self.with_mock_oidc = with_mock_oidc self.with_jdbc_bridge = with_jdbc_bridge self.with_hive = with_hive self.with_coredns = with_coredns @@ -6054,6 +6113,9 @@ def write_embedded_config(name, dest_dir, fix_log_level=False): if self.with_keycloak: depends_on.append("keycloak") + if self.with_mock_oidc: + depends_on.append("mock-oidc") + if self.with_rabbitmq: depends_on.append("rabbitmq1") diff --git a/tests/integration/test_keycloak_auth/configs/validators_discovery_introspect.xml b/tests/integration/test_keycloak_auth/configs/validators_discovery_introspect.xml new file mode 100644 index 000000000000..5397e9b7a6f2 --- /dev/null +++ b/tests/integration/test_keycloak_auth/configs/validators_discovery_introspect.xml @@ -0,0 +1,17 @@ + + + + + openid + http://mock-oidc/.well-known/openid-configuration + clickhouse + test-secret + preferred_username + 3 + true + + + diff --git a/tests/integration/test_keycloak_auth/configs/validators_manual_introspect.xml b/tests/integration/test_keycloak_auth/configs/validators_manual_introspect.xml new file mode 100644 index 000000000000..c14800ed9243 --- /dev/null +++ b/tests/integration/test_keycloak_auth/configs/validators_manual_introspect.xml @@ -0,0 +1,15 @@ + + + + + openid + http://keycloak:8080/realms/clickhouse-test/protocol/openid-connect/userinfo + http://keycloak:8080/realms/clickhouse-test/protocol/openid-connect/token/introspect + clickhouse + test-secret + preferred_username + 3 + + + diff --git a/tests/integration/test_keycloak_auth/configs/validators_manual_introspect_bad_secret.xml b/tests/integration/test_keycloak_auth/configs/validators_manual_introspect_bad_secret.xml new file mode 100644 index 000000000000..702761d20a83 --- /dev/null +++ b/tests/integration/test_keycloak_auth/configs/validators_manual_introspect_bad_secret.xml @@ -0,0 +1,16 @@ + + + + + openid + http://keycloak:8080/realms/clickhouse-test/protocol/openid-connect/userinfo + http://keycloak:8080/realms/clickhouse-test/protocol/openid-connect/token/introspect + clickhouse + wrong-secret + preferred_username + 3 + + + diff --git a/tests/integration/test_keycloak_auth/mock_oidc/openid-configuration b/tests/integration/test_keycloak_auth/mock_oidc/openid-configuration new file mode 100644 index 000000000000..fdd3b314b35f --- /dev/null +++ b/tests/integration/test_keycloak_auth/mock_oidc/openid-configuration @@ -0,0 +1,5 @@ +{ + "issuer": "http://keycloak:8080/realms/clickhouse-test", + "userinfo_endpoint": "http://keycloak:8080/realms/clickhouse-test/protocol/openid-connect/userinfo", + "introspection_endpoint": "http://keycloak:8080/realms/clickhouse-test/protocol/openid-connect/token/introspect" +} diff --git a/tests/integration/test_keycloak_auth/test.py b/tests/integration/test_keycloak_auth/test.py index 46a92071adb4..cb03a14e8de8 100644 --- a/tests/integration/test_keycloak_auth/test.py +++ b/tests/integration/test_keycloak_auth/test.py @@ -35,6 +35,34 @@ stay_alive=True, ) +# Each introspection scenario gets its own node so the targeted processor is the +# only one that can authenticate a token -- otherwise we cannot tell which +# processor handled a successful auth. +node_manual_introspect = cluster.add_instance( + "node_manual_introspect", + main_configs=["configs/validators_manual_introspect.xml"], + user_configs=["configs/users.xml"], + with_keycloak=True, + stay_alive=True, +) + +node_discovery_introspect = cluster.add_instance( + "node_discovery_introspect", + main_configs=["configs/validators_discovery_introspect.xml"], + user_configs=["configs/users.xml"], + with_keycloak=True, + with_mock_oidc=True, + stay_alive=True, +) + +node_manual_introspect_bad = cluster.add_instance( + "node_manual_introspect_bad", + main_configs=["configs/validators_manual_introspect_bad_secret.xml"], + user_configs=["configs/users.xml"], + with_keycloak=True, + stay_alive=True, +) + @pytest.fixture(scope="module", autouse=True) def started_cluster(): @@ -418,3 +446,119 @@ def test_device_flow_round_trip(started_cluster): # --- 4. Use the token to authenticate a ClickHouse query --- result = query_with_token(node, id_token, "SELECT 1") assert result.strip() == "1" + + +KEYCLOAK_INTROSPECT_PATH = ( + f"/realms/{KEYCLOAK_REALM}/protocol/openid-connect/token/introspect" +) +KEYCLOAK_REVOKE_PATH = ( + f"/realms/{KEYCLOAK_REALM}/protocol/openid-connect/revoke" +) + +# Pin Host header on backchannel calls so tokens have the same `iss` as the URL +# ClickHouse uses to introspect them (the existing helpers used by other tests +# keep the host-mapped URL so device-flow HTML redirects stay reachable). +KEYCLOAK_BACKCHANNEL_HOST = "keycloak:8080" + + +def _keycloak_backchannel_headers(): + return {"Host": KEYCLOAK_BACKCHANNEL_HOST} + + +def get_keycloak_access_token(started_cluster, username="alice", password="secret"): + url = f"{keycloak_url(started_cluster)}/realms/{KEYCLOAK_REALM}/protocol/openid-connect/token" + data = { + "grant_type": "password", + "client_id": KEYCLOAK_CLIENT_ID, + "client_secret": KEYCLOAK_CLIENT_SECRET, + "username": username, + "password": password, + "scope": "openid profile email", + } + resp = requests.post(url, data=data, headers=_keycloak_backchannel_headers(), timeout=30) + resp.raise_for_status() + body = resp.json() + assert "access_token" in body, f"No access_token in response: {body}" + return body["access_token"] + + +def introspect_directly(started_cluster, token): + """POST to Keycloak's introspection endpoint with the client credentials + we use in the ClickHouse config. Returns the parsed JSON body.""" + url = f"{keycloak_url(started_cluster)}{KEYCLOAK_INTROSPECT_PATH}" + resp = requests.post( + url, + data={"token": token, "token_type_hint": "access_token"}, + auth=(KEYCLOAK_CLIENT_ID, KEYCLOAK_CLIENT_SECRET), + headers=_keycloak_backchannel_headers(), + timeout=10, + ) + resp.raise_for_status() + return resp.json() + + +def revoke_keycloak_token(started_cluster, token): + url = f"{keycloak_url(started_cluster)}{KEYCLOAK_REVOKE_PATH}" + resp = requests.post( + url, + data={"token": token, "token_type_hint": "access_token"}, + auth=(KEYCLOAK_CLIENT_ID, KEYCLOAK_CLIENT_SECRET), + headers=_keycloak_backchannel_headers(), + timeout=10, + ) + resp.raise_for_status() + + +def _expect_auth_failure(node_instance, token): + """Assert that ClickHouse rejects the token with a 401/403, not some + other 5xx that would falsely satisfy a blanket-catch test.""" + try: + query_with_token(node_instance, token, "SELECT 1") + except requests.HTTPError as ex: + assert ex.response.status_code in (401, 403), \ + f"expected 401/403, got {ex.response.status_code}: {ex.response.text}" + return + pytest.fail("Expected authentication failure but query succeeded") + + +# token_cache_lifetime in the introspection validator configs. +INTROSPECT_CACHE_TTL_SECONDS = 3 + + +def _assert_revocation_detected(started_cluster, node_instance): + """Fresh token works, then is revoked, then is rejected after the cache TTL + elapses. Only passes when introspection runs on the second request.""" + token = get_keycloak_access_token(started_cluster) + + assert introspect_directly(started_cluster, token)["active"] is True + assert query_with_token(node_instance, token, "SELECT 1").strip() == "1" + + revoke_keycloak_token(started_cluster, token) + + # Wait past the cache TTL with a margin generous enough for slow CI. + time.sleep(INTROSPECT_CACHE_TTL_SECONDS + 3) + + assert introspect_directly(started_cluster, token)["active"] is False + _expect_auth_failure(node_instance, token) + + +def test_manual_introspect_detects_revocation(started_cluster): + """Manual mode: opaque-flow introspection rejects a token after revocation.""" + _assert_revocation_detected(started_cluster, node_manual_introspect) + + +def test_discovery_introspect_detects_revocation(started_cluster): + """Discovery mode against a mock OIDC doc that omits jwks_uri: the only + available validation path is RFC 7662, exercised end-to-end here.""" + _assert_revocation_detected(started_cluster, node_discovery_introspect) + + +def test_manual_introspect_rejects_on_bad_client_secret(started_cluster): + """When the resource server cannot authenticate to the introspection + endpoint (Keycloak returns 401), ClickHouse must reject the bearer token + rather than fall through to /userinfo.""" + token = get_keycloak_access_token(started_cluster) + # Sanity: the token itself is fine -- the failure must come from the + # ClickHouse-side introspection auth, not from the token being invalid. + assert introspect_directly(started_cluster, token)["active"] is True + _expect_auth_failure(node_manual_introspect_bad, token) From c552968978ecce490b618d70b31e0d8097a19689 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov <32552679+zvonand@users.noreply.github.com> Date: Sun, 31 May 2026 15:07:41 +0200 Subject: [PATCH 11/12] Cherry-pick of https://github.com/Altinity/ClickHouse/pull/1809 with unresolved conflict markers (resolution in next commit) --- Original cherry-pick message follows: Merge pull request #1809 from Altinity/feature/antalya-26.3/oauth-executable-token-in-client Antalya 26.3: add `--jwt-command` to client # Conflicts: # docs/en/interfaces/cli.md --- docs/en/interfaces/cli.md | 228 ++++++++++++++++++ programs/client/Client.cpp | 181 +++++++++----- src/Access/AccessControl.cpp | 4 +- src/Access/AccessControl.h | 3 +- src/Access/Common/JWKSProvider.cpp | 14 +- src/Access/Common/JWKSProvider.h | 5 +- src/Access/ExternalAuthenticators.cpp | 17 +- src/Access/ExternalAuthenticators.h | 9 +- src/Access/TokenProcessors.h | 20 +- src/Access/TokenProcessorsOpaque.cpp | 63 ++--- src/Access/TokenProcessorsParse.cpp | 20 +- src/Client/CommandJWTProvider.cpp | 129 ++++++++++ src/Client/CommandJWTProvider.h | 33 +++ src/Client/Connection.cpp | 59 +++-- src/Client/JWTProvider.cpp | 15 ++ src/Client/JWTProvider.h | 2 + src/Common/ShellCommand.cpp | 6 + src/Common/ShellCommand.h | 4 + src/Interpreters/Context.cpp | 6 +- src/Interpreters/Session.cpp | 33 +-- src/Interpreters/Session.h | 4 - .../0_stateless/04206_jwt_command.reference | 25 ++ .../queries/0_stateless/04206_jwt_command.sh | 136 +++++++++++ 23 files changed, 820 insertions(+), 196 deletions(-) create mode 100644 src/Client/CommandJWTProvider.cpp create mode 100644 src/Client/CommandJWTProvider.h create mode 100644 tests/queries/0_stateless/04206_jwt_command.reference create mode 100755 tests/queries/0_stateless/04206_jwt_command.sh diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 28543d790627..229e31df2926 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -405,9 +405,237 @@ clickhousectl skills --agent claude --agent codex ### Non-interactive flags {#non-interactive-flags} +<<<<<<< HEAD | Flag | Description | |------|-------------| | `--agent ` | Install Skills for a specific agent (can be repeated) | | `--global` | Use global scope; if omitted, project scope is used | | `--all` | Install Skills for all supported agents | | `--detected-only` | Install Skills for supported agents that were detected on the system | +======= +```bash +clickhouse-client clickhouse://localhost/my_database?s + +# equivalent to: +clickhouse-client clickhouse://localhost/my_database -s +``` + +Connect to the default host using the default port, the default user, and the default database. + +```bash +clickhouse-client clickhouse: +``` + +Connect to the default host using the default port, as the user `my_user` and no password. + +```bash +clickhouse-client clickhouse://my_user@ + +# Using a blank password between : and @ means to asking the user to enter the password before starting the connection. +clickhouse-client clickhouse://my_user:@ +``` + +Connect to `localhost` using the email as the user name. `@` symbol is percent encoded to `%40`. + +```bash +clickhouse-client clickhouse://some_user%40some_mail.com@localhost:9000 +``` + +Connect to one of two hosts: `192.168.1.15`, `192.168.1.25`. + +```bash +clickhouse-client clickhouse://192.168.1.15,192.168.1.25 +``` + +## Query ID format {#query-id-format} + +In interactive mode ClickHouse Client shows the query ID for every query. By default, the ID is formatted like this: + +```sql +Query id: 927f137d-00f1-4175-8914-0dd066365e96 +``` + +A custom format may be specified in a configuration file inside a `query_id_formats` tag. The `{query_id}` placeholder in the format string is replaced with the query ID. Several format strings are allowed inside the tag. +This feature can be used to generate URLs to facilitate profiling of queries. + +**Example** + +```xml + + + http://speedscope-host/#profileURL=qp%3Fid%3D{query_id} + + +``` + +With the configuration above, the ID of a query is shown in the following format: + +```response +speedscope:http://speedscope-host/#profileURL=qp%3Fid%3Dc8ecc783-e753-4b38-97f1-42cddfb98b7d +``` + +## Configuration files {#configuration_files} + +ClickHouse Client uses the first existing file of the following: + +- A file that is defined with the `-c [ -C, --config, --config-file ]` parameter. +- `./clickhouse-client.[xml|yaml|yml]` +- `$XDG_CONFIG_HOME/clickhouse/config.[xml|yaml|yml]` (or `~/.config/clickhouse/config.[xml|yaml|yml]` if `XDG_CONFIG_HOME` is not set) +- `~/.clickhouse-client/config.[xml|yaml|yml]` +- `/etc/clickhouse-client/config.[xml|yaml|yml]` + +See the sample configuration file in the ClickHouse repository: [`clickhouse-client.xml`](https://github.com/ClickHouse/ClickHouse/blob/master/programs/client/clickhouse-client.xml) + + + + ```xml + + username + password + true + + + /etc/ssl/cert.pem + + + + ``` + + + ```yaml + user: username + password: 'password' + secure: true + openSSL: + client: + caConfig: '/etc/ssl/cert.pem' + ``` + + + +## Environment variable options {#environment-variable-options} + +The user name, password and host can be set via environment variables `CLICKHOUSE_USER`, `CLICKHOUSE_PASSWORD` and `CLICKHOUSE_HOST`. +Command line arguments `--user`, `--password` or `--host`, or a [connection string](#connection_string) (if specified) take precedence over environment variables. + +## Command-line options {#command-line-options} + +All command-line options can be specified directly on the command line or as defaults in the [configuration file](#configuration_files). + +### General options {#command-line-options-general} + +| Option | Description | Default | +|-----------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------|------------------------------| +| `-c [ -C, --config, --config-file ] ` | The location of the configuration file for the client, if it is not at one of the default locations. See [Configuration Files](#configuration_files). | - | +| `--help` | Print usage summary and exit. Combine with `--verbose` to display all possible options including query settings. | - | +| `--history_file ` | Path to a file containing the command history. | - | +| `--history_max_entries` | Maximum number of entries in the history file. | `1000000` (1 million) | +| `--prompt ` | Specify a custom prompt. | The `display_name` of the server | +| `--verbose` | Increase output verbosity. | - | +| `-V [ --version ]` | Print version and exit. | - | + +### Connection options {#command-line-options-connection} + +| Option | Description | Default | +|----------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------| +| `--connection ` | The name of preconfigured connection details from the configuration file. See [Connection credentials](#connection-credentials). | - | +| `-d [ --database ] ` | Select the database to default to for this connection. | The current database from the server settings (`default` by default) | +| `-h [ --host ] ` | The hostname of the ClickHouse server to connect to. Can either be a hostname or an IPv4 or IPv6 address. Multiple hosts can be passed via multiple arguments. | `localhost` | +| `--jwt ` | Use JSON Web Token (JWT) for authentication.

Server JWT authorization is only available in ClickHouse Cloud. | - | +| `--jwt-command ` | Shell command whose stdout is used as the JWT. Invoked on the first connect, before reconnects when the cached JWT is near expiry, and after the server rejects the cached token. See [`--jwt-command` details](#jwt-command-details) below. | - | +| `--jwt-command-timeout ` | Timeout for `--jwt-command`. Also settable as `` in the config file; CLI wins. | `30` | +| `--login[=]` | Authenticate via OAuth2. Bare `--login` (no `=`) triggers ClickHouse Cloud automatic login — the provider is inferred from the server. To authenticate against a custom OpenID Connect provider, supply a `mode` and `--oauth-credentials`: `--login=browser` runs the Authorization Code + PKCE flow (opens a browser), `--login=device` runs the Device Authorization flow (prints a URL and short code — no browser needed). | - | +| `--oauth-credentials ` | Path to an OAuth2 credentials JSON file (Google Cloud Console format). Required when using `--login=browser` or `--login=device` with a custom OpenID Connect provider. See [OAuth credentials file format](#oauth-credentials-file) below. Refresh tokens are cached in `~/.clickhouse-client/oauth_cache.json` (mode `0600`). | `~/.clickhouse-client/oauth_client.json` | +| `--no-warnings` | Disable showing warnings from `system.warnings` when the client connects to the server. | - | +| `--no-server-client-version-message` | Suppress server-client version mismatch message when the client connects to the server. | - | +| `--password ` | The password of the database user. You can also specify the password for a connection in the configuration file. If you do not specify the password, the client will ask for it. | - | +| `--port ` | The port the server is accepting connections on. The default ports are 9440 (TLS) and 9000 (no TLS).

Note: The client uses the native protocol and not HTTP(S). | `9440` if `--secure` is specified, `9000` otherwise. Always defaults to `9440` if the hostname ends in `.clickhouse.cloud`. | +| `-s [ --secure ]` | Whether to use TLS.

Enabled automatically when connecting to port 9440 (the default secure port) or ClickHouse Cloud.

You might need to configure your CA certificates in the [configuration file](#configuration_files). The available configuration settings are the same as for [server-side TLS configuration](../operations/server-configuration-parameters/settings.md#openssl). | Auto-enabled when connecting to port 9440 or ClickHouse Cloud | +| `--ssh-key-file ` | File containing the SSH private key for authenticate with the server. | - | +| `--ssh-key-passphrase ` | Passphrase for the SSH private key specified in `--ssh-key-file`. | - | +| `--tls-sni-override ` | If using TLS, the server name (SNI) to pass in the handshake. | The host provided via `-h` or `--host`. | +| `-u [ --user ] ` | The database user to connect as. | `default` | + +:::note +Instead of the `--host`, `--port`, `--user` and `--password` options, the client also supports [connection strings](#connection_string). +::: + +### OAuth credentials file {#oauth-credentials-file} + +When using `--login=browser` or `--login=device` with a custom OpenID Connect provider, the client reads a credentials JSON file. The file uses the same format produced by the Google Cloud Console ("OAuth 2.0 Client IDs" → "Download JSON"): + +```json +{ + "installed": { + "client_id": "YOUR_CLIENT_ID", + "client_secret": "YOUR_CLIENT_SECRET", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "redirect_uris": ["http://127.0.0.1"] + } +} +``` + +The top-level key can be `installed` (desktop/CLI apps) or `web`. Required fields: `client_id`, `auth_uri`, `token_uri`. Optional fields: + +| Field | Description | +|---|---| +| `client_secret` | Confidential-client secret. Omit (or leave empty) for OIDC public clients — the auth-code flow is always protected by PKCE and the device flow by the device code, so a secret is not required by the protocol. When the field is absent the client never sends a `client_secret` form parameter, which is the form public-client registrations require (Auth0, Microsoft Entra ID, Keycloak, Okta and others reject empty secrets with `invalid_client`). | +| `device_authorization_uri` | Device authorization endpoint. Discovered automatically via OIDC Discovery if absent. | +| `issuer` | OIDC issuer URL (e.g. `https://accounts.google.com`). Used to locate the discovery document when `device_authorization_uri` is not set. | + +The default path is `~/.clickhouse-client/oauth_client.json`. Override it with `--oauth-credentials `. + +After a successful login the obtained refresh token is cached in `~/.clickhouse-client/oauth_cache.json` (file mode `0600`). Subsequent runs reuse the cached token silently and only open the browser or print a device code when the refresh token has expired. + +### `--jwt-command` details {#jwt-command-details} + +The command is executed via `/bin/sh -c`. Stdout is taken as the JWT (one trailing newline stripped); any human-facing output (prompts, URLs, device codes) must go to stderr — it is forwarded unbuffered to the client's stderr. Stdin is closed. + +The command runs on the first connect to obtain the initial token. On subsequent (re)connects the client reuses the cached token; it re-invokes the command only when (a) the cached token parses as a JWT whose `exp` claim is within 30 seconds, or (b) the server rejects the cached token with an authentication failure, in which case the client refetches the token and retries the handshake once. Opaque tokens (anything that does not parse as a JWT) and JWTs without a usable `exp` claim are reused until the server rejects them — caching/refresh in those cases is the script's responsibility. + +```bash +clickhouse-client --jwt-command "curl -sS https://idp.example/token | jq -r .access_token" +``` + +Cannot be combined with `--jwt`, `--login`, or a non-default `--user`. Non-zero exit, empty output, or exceeding `--jwt-command-timeout` (default `30`s, overridable via `` in `~/.clickhouse-client/config.xml`) fails authentication. On timeout the entire helper subprocess tree is terminated. + +### Query options {#command-line-options-query} + +| Option | Description | +|---------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `--param_=` | Substitution value for a parameter of a [query with parameters](#cli-queries-with-parameters). | +| `-q [ --query ] ` | The query to run in batch mode. Can be specified multiple times (`--query "SELECT 1" --query "SELECT 2"`) or once with multiple semicolon-separated queries (`--query "SELECT 1; SELECT 2;"`). In the latter case, `INSERT` queries with formats other than `VALUES` must be separated by empty lines.

A single query can also be specified without a parameter: `clickhouse-client "SELECT 1"`

Cannot be used together with `--queries-file`. | +| `--queries-file ` | Path to a file containing queries. `--queries-file` can be specified multiple times, e.g. `--queries-file queries1.sql --queries-file queries2.sql`.

Cannot be used together with `--query`. | +| `-m [ --multiline ]` | If specified, allow multiline queries (do not send the query on Enter). Queries will be sent only when they are ended with a semicolon. | + +### Query settings {#command-line-options-query-settings} + +Query settings can be specified as command-line options in the client, for example: +```bash +$ clickhouse-client --max_threads 1 +``` + +See [Settings](../operations/settings/settings.md) for a list of settings. + +### Formatting options {#command-line-options-formatting} + +| Option | Description | Default | +|---------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------| +| `-f [ --format ] ` | Use the specified format to output the result.

See [Formats for Input and Output Data](formats.md) for a list of supported formats. | `TabSeparated` | +| `--pager ` | Pipe all output into this command. Typically `less` (e.g., `less -S` to display wide result sets) or similar. | - | +| `-E [ --vertical ]` | Use the [Vertical format](/interfaces/formats/Vertical) to output the result. This is the same as `–-format Vertical`. In this format, each value is printed on a separate line, which is helpful when displaying wide tables. | - | + +### Execution details {#command-line-options-execution-details} + +| Option | Description | Default | +|-----------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------| +| `--enable-progress-table-toggle` | Enable toggling of the progress table by pressing the control key (Space). Only applicable in interactive mode with progress table printing enabled. | `enabled` | +| `--hardware-utilization` | Print hardware utilization information in progress bar. | - | +| `--memory-usage` | If specified, print memory usage to `stderr` in non-interactive mode.

Possible values:
• `none` - do not print memory usage
• `default` - print number of bytes
• `readable` - print memory usage in human-readable format | - | +| `--print-profile-events` | Print `ProfileEvents` packets. | - | +| `--progress` | Print progress of query execution.

Possible values:
• `tty\|on\|1\|true\|yes` - outputs to the terminal in interactive mode
• `err` - outputs to `stderr` in non-interactive mode
• `off\|0\|false\|no` - disables progress printing | `tty` in interactive mode, `off` in non-interactive (batch) mode | +| `--progress-table` | Print a progress table with changing metrics during query execution.

Possible values:
• `tty\|on\|1\|true\|yes` - outputs to the terminal in interactive mode
• `err` - outputs to `stderr` non-interactive mode
• `off\|0\|false\|no` - disables the progress table | `tty` in interactive mode, `off` in non-interactive (batch) mode | +| `--stacktrace` | Print stack traces of exceptions. | - | +| `-t [ --time ]` | Print query execution time to `stderr` in non-interactive mode (for benchmarks). | - | +>>>>>>> 40a2b77fcc6 (Merge pull request #1809 from Altinity/feature/antalya-26.3/oauth-executable-token-in-client) diff --git a/programs/client/Client.cpp b/programs/client/Client.cpp index ff0abe34d655..39784d1d3237 100644 --- a/programs/client/Client.cpp +++ b/programs/client/Client.cpp @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -374,10 +375,30 @@ try } #if USE_JWT_CPP && USE_SSL - if (config().getBool("cloud_oauth_pending", false) && !config().has("jwt")) + /// Empty-value check; `config().has(k)` returns true for empty XML elements too. + const bool has_jwt_command_value = !config().getString("jwt-command", "").empty(); + const bool has_jwt_value = !config().getString("jwt", "").empty(); + + if (has_jwt_command_value && has_jwt_value) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "jwt-command and jwt cannot both be specified"); + + if (has_jwt_command_value) + { + int timeout = config().getInt("jwt-command-timeout", DEFAULT_JWT_COMMAND_TIMEOUT_SECONDS); + if (timeout <= 0) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "jwt-command-timeout must be positive, got {}", timeout); + + jwt_provider = std::make_shared(config().getString("jwt-command"), timeout); + config().setString("jwt", ""); + } + + if (config().getBool("cloud_oauth_pending", false) && !has_jwt_value && !has_jwt_command_value) { login(); } +#else + if (!config().getString("jwt-command", "").empty() || !config().getString("jwt", "").empty()) + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is disabled, because ClickHouse is built without JWT or SSL support"); #endif bool asked_password = false; @@ -393,10 +414,16 @@ try { auto code = e.code(); + /// Don't prompt for a password on a JWT auth failure. + const bool jwt_auth_in_use = + !config().getString("jwt", "").empty() + || !config().getString("jwt-command", "").empty(); + bool should_ask_password = !asked_password && is_interactive && (code == ErrorCodes::AUTHENTICATION_FAILED || code == ErrorCodes::REQUIRED_PASSWORD) && !config().has("password") && !config().getBool("ask-password", false) && - !config().has("ssh-key-file"); + !config().has("ssh-key-file") && + !jwt_auth_in_use; if (should_ask_password) { @@ -742,6 +769,10 @@ void Client::printHelpMessage(const OptionsDescription & options_description) void Client::addExtraOptions(OptionsDescription & options_description) { + static const std::string jwt_command_timeout_help = + "Timeout in seconds for --jwt-command. Default: " + std::to_string(DEFAULT_JWT_COMMAND_TIMEOUT_SECONDS) + + ". Also configurable as in the client config file."; + /// Main commandline options related to client functionality and all parameters from Settings. options_description.main_description->add_options() ("config,c", po::value(), "config-file path (another shorthand)") @@ -756,6 +787,11 @@ void Client::addExtraOptions(OptionsDescription & options_description) ("ssh-key-passphrase", po::value(), "Passphrase for the SSH private key specified by --ssh-key-file.") ("quota_key", po::value(), "A string to differentiate quotas when the user have keyed quotas configured on server") ("jwt", po::value(), "Use JWT for authentication") + ("jwt-command", po::value(), + "Shell command whose stdout is used as the JWT. Invoked on the first connect, " + "before reconnects when the cached JWT is near expiry, and after the server " + "rejects the cached token with an authentication failure.") + ("jwt-command-timeout", po::value(), jwt_command_timeout_help.c_str()) ("one-time-password", po::value(), "Time-based one-time password (TOTP) for two-factor authentication") ("login", po::value()->implicit_value(""), "Authenticate via OAuth2. Optional mode: 'browser' (auth-code + PKCE, opens browser) " @@ -931,6 +967,27 @@ void Client::processOptions( config().setString("jwt", options["jwt"].as()); config().setString("user", ""); } + if (options.contains("jwt-command-timeout")) + config().setInt("jwt-command-timeout", options["jwt-command-timeout"].as()); + + if (options.contains("jwt-command")) + { +#if USE_JWT_CPP && USE_SSL + if (options.contains("jwt")) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "--jwt-command and --jwt cannot both be specified"); + if (options.contains("login")) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "--jwt-command and --login cannot both be specified"); + if (!options["user"].defaulted()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "User and JWT flags can't be specified together"); + + /// Defer execution to Client::main, after processConfig has loaded the XML config. + /// Reading config().getInt("jwt-command-timeout", ...) here would miss the XML value. + config().setString("jwt-command", options["jwt-command"].as()); + config().setString("user", ""); +#else + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "JWT is disabled, because ClickHouse is built without JWT or SSL support"); +#endif + } if (options.count("oauth-credentials") && !options.count("login")) throw Exception( ErrorCodes::BAD_ARGUMENTS, @@ -938,68 +995,80 @@ void Client::processOptions( if (options.count("login")) { - /// Reject mixed JWT + --login from any source. The --login branch below - /// ends up calling config().setString("jwt", jwt_provider->getJWT()), - /// which would silently overwrite a JWT supplied via --jwt or via the - /// XML config file. config().has("jwt") covers both: CLI --jwt was - /// already copied into config() above, and a element in - /// ~/.clickhouse-client/config.xml is loaded into config() at startup. - if (config().has("jwt")) - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "--login cannot be combined with a JWT (provided via --jwt or in the config file)"); - - const std::string login_mode = options["login"].as(); - if (!login_mode.empty() && login_mode != "browser" && login_mode != "device") - throw Exception( - ErrorCodes::BAD_ARGUMENTS, - "--login value must be 'browser' or 'device', got '{}'", - login_mode); + bool defer_to_existing_jwt = false; #if USE_JWT_CPP && USE_SSL - if (!options["user"].defaulted()) - throw Exception(ErrorCodes::BAD_ARGUMENTS, "--user and --login cannot both be specified"); - - // Bare --login (empty mode, including auto-added for *.clickhouse.cloud) → cloud path. - // Explicit --login=browser or --login=device (or --oauth-credentials) → credentials-file - // OIDC path. This prevents the credentials file from hijacking the cloud auto-login. - const bool use_credentials_file - = !login_mode.empty() - || options.count("oauth-credentials"); + /// --login would overwrite config["jwt"]; reject if a JWT is already configured. + /// Auto-added --login (cloud endpoint, no CLI auth) defers silently to it instead. + const bool jwt_already_configured + = !config().getString("jwt", "").empty() + || !config().getString("jwt-command", "").empty(); - if (use_credentials_file) + if (jwt_already_configured) { - const char * home_path_cstr = getenv("HOME"); // NOLINT(concurrency-mt-unsafe) - const std::string default_creds_path = home_path_cstr - ? std::string(home_path_cstr) + "/.clickhouse-client/oauth_client.json" - : ""; - - const std::string creds_path = options.count("oauth-credentials") - ? options["oauth-credentials"].as() - : default_creds_path; - - auto creds = loadOAuthCredentials(creds_path); - const auto mode = (login_mode == "device") ? OAuthFlowMode::Device : OAuthFlowMode::AuthCode; - - // createOAuthJWTProvider runs the initial flow (trying the cached - // refresh token first) and returns a provider that Connection can - // call to refresh the id_token transparently during long sessions. - jwt_provider = createOAuthJWTProvider(creds, mode); - config().setString("jwt", jwt_provider->getJWT()); - config().setString("user", ""); + if (!login_was_auto_added) + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "--login cannot be combined with a JWT (provided via --jwt, --jwt-command, or in the config file)"); + login_was_auto_added = false; + defer_to_existing_jwt = true; } - else +#endif + + if (!defer_to_existing_jwt) { - // Cloud-specific login path — bare --login, including auto-added for - // *.clickhouse.cloud endpoints. Use a separate config key so that - // argsToConfig() overwriting config["login"] with the raw string value - // cannot cause getBool("login") to throw in main(). - config().setBool("cloud_oauth_pending", true); - config().setString("user", ""); - } + const std::string login_mode = options["login"].as(); + if (!login_mode.empty() && login_mode != "browser" && login_mode != "device") + throw Exception( + ErrorCodes::BAD_ARGUMENTS, + "--login value must be 'browser' or 'device', got '{}'", + login_mode); + +#if USE_JWT_CPP && USE_SSL + if (!options["user"].defaulted()) + throw Exception(ErrorCodes::BAD_ARGUMENTS, "--user and --login cannot both be specified"); + + // Bare --login (empty mode, including auto-added for *.clickhouse.cloud) → cloud path. + // Explicit --login=browser or --login=device (or --oauth-credentials) → credentials-file + // OIDC path. This prevents the credentials file from hijacking the cloud auto-login. + const bool use_credentials_file + = !login_mode.empty() + || options.count("oauth-credentials"); + + if (use_credentials_file) + { + const char * home_path_cstr = getenv("HOME"); // NOLINT(concurrency-mt-unsafe) + const std::string default_creds_path = home_path_cstr + ? std::string(home_path_cstr) + "/.clickhouse-client/oauth_client.json" + : ""; + + const std::string creds_path = options.count("oauth-credentials") + ? options["oauth-credentials"].as() + : default_creds_path; + + auto creds = loadOAuthCredentials(creds_path); + const auto mode = (login_mode == "device") ? OAuthFlowMode::Device : OAuthFlowMode::AuthCode; + + // createOAuthJWTProvider runs the initial flow (trying the cached + // refresh token first) and returns a provider that Connection can + // call to refresh the id_token transparently during long sessions. + jwt_provider = createOAuthJWTProvider(creds, mode); + config().setString("jwt", jwt_provider->getJWT()); + config().setString("user", ""); + } + else + { + // Cloud-specific login path — bare --login, including auto-added for + // *.clickhouse.cloud endpoints. Use a separate config key so that + // argsToConfig() overwriting config["login"] with the raw string value + // cannot cause getBool("login") to throw in main(). + config().setBool("cloud_oauth_pending", true); + config().setString("user", ""); + } #else - throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "OAuth login requires a build with JWT and SSL support"); + throw Exception(ErrorCodes::SUPPORT_IS_DISABLED, "OAuth login requires a build with JWT and SSL support"); #endif + } } #if USE_JWT_CPP && USE_SSL if (options.contains("oauth-url")) diff --git a/src/Access/AccessControl.cpp b/src/Access/AccessControl.cpp index 84ef237014a2..546c1f238526 100644 --- a/src/Access/AccessControl.cpp +++ b/src/Access/AccessControl.cpp @@ -695,7 +695,7 @@ void AccessControl::restoreFromBackup(RestorerFromBackup & restorer, const Strin changes_notifier->sendNotifications(); } -void AccessControl::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config) +void AccessControl::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config, const ConnectionTimeouts & token_http_timeouts) { /// Re-read `enable_token_auth` on every config reload. `setupFromMainConfig` /// runs only once at startup, so without this re-sync flipping the flag in @@ -703,7 +703,7 @@ void AccessControl::setExternalAuthenticatorsConfig(const Poco::Util::AbstractCo /// value in place -- operators who toggle token auth off in response to an /// IdP outage or a credential leak would see no effect until restart. setTokenAuthEnabled(config.getBool("enable_token_auth", true)); - external_authenticators->setConfiguration(config, getLogger(), isTokenAuthEnabled()); + external_authenticators->setConfiguration(config, getLogger(), token_http_timeouts, isTokenAuthEnabled()); } diff --git a/src/Access/AccessControl.h b/src/Access/AccessControl.h index 71c37d243359..9e14921b6691 100644 --- a/src/Access/AccessControl.h +++ b/src/Access/AccessControl.h @@ -50,6 +50,7 @@ class SettingsProfileElements; class ClientInfo; class ExternalAuthenticators; class AccessChangesNotifier; +struct ConnectionTimeouts; struct Settings; @@ -135,7 +136,7 @@ class AccessControl : public MultipleAccessStorage /// Makes a backup of access entities. void restoreFromBackup(RestorerFromBackup & restorer, const String & data_path_in_backup) override; - void setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config); + void setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config, const ConnectionTimeouts & token_http_timeouts); /// Sets the default profile's name. /// The default profile's settings are always applied before any other profile's. diff --git a/src/Access/Common/JWKSProvider.cpp b/src/Access/Common/JWKSProvider.cpp index 2b656df3a3a4..5daa09abb54e 100644 --- a/src/Access/Common/JWKSProvider.cpp +++ b/src/Access/Common/JWKSProvider.cpp @@ -77,20 +77,10 @@ JWKSType JWKSClient::getJWKS() Poco::Net::HTTPRequest request{Poco::Net::HTTPRequest::HTTP_GET, jwks_uri.getPathAndQuery()}; - /// Bound every JWKS fetch to a known limit. Without this, Poco's default - /// `HTTPSession` timeout of 60 seconds applies, and because the JWKS fetch - /// runs while `ExternalAuthenticators::mutex` is held by the outer - /// `checkTokenCredentials` call, a single slow or hung JWKS endpoint would - /// stall the whole auth subsystem (LDAP, Kerberos, HTTP basic, all other - /// token auth paths) for up to a full minute per request. 10 seconds is a - /// conservative cap: well above any healthy provider latency, well below - /// the default. - const Poco::Timespan jwks_http_timeout(/*seconds=*/10, 0); - if (jwks_uri.getScheme() == "https") { Poco::Net::HTTPSClientSession session = Poco::Net::HTTPSClientSession(jwks_uri.getHost(), jwks_uri.getPort()); - session.setTimeout(jwks_http_timeout, jwks_http_timeout, jwks_http_timeout); + setTimeouts(session, timeouts); session.sendRequest(request); std::istream & response_stream = session.receiveResponse(response); if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK || !response_stream) @@ -101,7 +91,7 @@ JWKSType JWKSClient::getJWKS() else { Poco::Net::HTTPClientSession session = Poco::Net::HTTPClientSession(jwks_uri.getHost(), jwks_uri.getPort()); - session.setTimeout(jwks_http_timeout, jwks_http_timeout, jwks_http_timeout); + setTimeouts(session, timeouts); session.sendRequest(request); std::istream & response_stream = session.receiveResponse(response); if (response.getStatus() != Poco::Net::HTTPResponse::HTTP_OK || !response_stream) diff --git a/src/Access/Common/JWKSProvider.h b/src/Access/Common/JWKSProvider.h index c59266d9b40c..91045a0bd4b7 100644 --- a/src/Access/Common/JWKSProvider.h +++ b/src/Access/Common/JWKSProvider.h @@ -1,6 +1,7 @@ #include #if USE_JWT_CPP +#include #include #include #include @@ -29,7 +30,8 @@ class IJWKSProvider class JWKSClient : public IJWKSProvider { public: - explicit JWKSClient(const String & uri, const size_t refresh_ms_): refresh_timeout(refresh_ms_), jwks_uri(uri) {} + explicit JWKSClient(const String & uri, const size_t refresh_ms_, const ConnectionTimeouts & timeouts_) + : refresh_timeout(refresh_ms_), jwks_uri(uri), timeouts(timeouts_) {} ~JWKSClient() override = default; JWKSClient(const JWKSClient &) = delete; @@ -42,6 +44,7 @@ class JWKSClient : public IJWKSProvider private: size_t refresh_timeout; Poco::URI jwks_uri; + ConnectionTimeouts timeouts; std::shared_mutex mutex; std::optional cached_jwks; diff --git a/src/Access/ExternalAuthenticators.cpp b/src/Access/ExternalAuthenticators.cpp index 9a444a6c2388..47092ece598b 100644 --- a/src/Access/ExternalAuthenticators.cpp +++ b/src/Access/ExternalAuthenticators.cpp @@ -307,6 +307,7 @@ void ExternalAuthenticators::reset() void parseTokenProcessors(std::map> & token_processors, const Poco::Util::AbstractConfiguration & config, const String & token_processors_config, + const ConnectionTimeouts & timeouts, LoggerPtr log) { Poco::Util::AbstractConfiguration::Keys token_processors_keys; @@ -321,7 +322,7 @@ void parseTokenProcessors(std::map> & t String prefix = fmt::format("{}.{}", token_processors_config, processor); try { - parsed[processor] = ITokenProcessor::parseTokenProcessor(config, prefix, processor); + parsed[processor] = ITokenProcessor::parseTokenProcessor(config, prefix, processor, timeouts); } catch (...) { @@ -340,17 +341,7 @@ bool ExternalAuthenticators::isTokenAuthEnabled() const return token_auth_enabled; } -bool ExternalAuthenticators::hasTokenProcessor(const String & name) const -{ - std::lock_guard lock(mutex); - if (!token_auth_enabled) - return false; - if (name.empty()) - return true; - return token_processors.contains(name); -} - -void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfiguration & config, LoggerPtr log, bool token_auth_enabled_) +void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfiguration & config, LoggerPtr log, const ConnectionTimeouts & token_http_timeouts, bool token_auth_enabled_) { std::lock_guard lock(mutex); resetImpl(); @@ -458,7 +449,7 @@ void ExternalAuthenticators::setConfiguration(const Poco::Util::AbstractConfigur { try { - parseTokenProcessors(token_processors, config, token_processors_config, log); + parseTokenProcessors(token_processors, config, token_processors_config, token_http_timeouts, log); } catch (...) { diff --git a/src/Access/ExternalAuthenticators.h b/src/Access/ExternalAuthenticators.h index 1486226f5bd0..1957b6d6d563 100644 --- a/src/Access/ExternalAuthenticators.h +++ b/src/Access/ExternalAuthenticators.h @@ -39,17 +39,10 @@ class ExternalAuthenticators { public: void reset(); - void setConfiguration(const Poco::Util::AbstractConfiguration & config, LoggerPtr log, bool token_auth_enabled = true); + void setConfiguration(const Poco::Util::AbstractConfiguration & config, LoggerPtr log, const ConnectionTimeouts & token_http_timeouts, bool token_auth_enabled = true); bool isTokenAuthEnabled() const; - /// Returns true if a token processor with the given name is currently - /// configured. Used by `Session::checkIfUserIsStillValid` to terminate - /// active sessions whose authenticating processor was removed by config - /// reload (M-28). Empty `name` is treated as "no specific pin" and - /// returns true (token auth must still be enabled, of course). - bool hasTokenProcessor(const String & name) const; - // The name and readiness of the credentials must be verified before calling these. bool checkLDAPCredentials(const String & server, const BasicCredentials & credentials, const LDAPClient::RoleSearchParamsList * role_search_params = nullptr, LDAPClient::SearchResultsList * role_search_results = nullptr) const; diff --git a/src/Access/TokenProcessors.h b/src/Access/TokenProcessors.h index 7f2fea416980..24e7006b4bfa 100644 --- a/src/Access/TokenProcessors.h +++ b/src/Access/TokenProcessors.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #if USE_JWT_CPP @@ -45,7 +46,8 @@ class ITokenProcessor static std::unique_ptr parseTokenProcessor( const Poco::Util::AbstractConfiguration & config, const String & prefix, - const String & processor_name); + const String & processor_name, + const ConnectionTimeouts & timeouts); protected: const String processor_name; @@ -145,7 +147,8 @@ class JwksJwtProcessor : public ITokenProcessor const String & claims_, size_t verifier_leeway_, const String & jwks_uri_, - size_t jwks_cache_lifetime_) + size_t jwks_cache_lifetime_, + const ConnectionTimeouts & timeouts_) : JwksJwtProcessor(processor_name_, token_cache_lifetime_, username_claim_, @@ -156,7 +159,7 @@ class JwksJwtProcessor : public ITokenProcessor allow_no_expiration_, claims_, verifier_leeway_, - std::make_shared(jwks_uri_, jwks_cache_lifetime_)) {} + std::make_shared(jwks_uri_, jwks_cache_lifetime_, timeouts_)) {} bool resolveAndValidate(TokenCredentials & credentials) const override; bool checkClaims(const TokenCredentials & credentials, const String & claims_to_check) const override; @@ -186,12 +189,14 @@ class GoogleTokenProcessor : public ITokenProcessor UInt64 token_cache_lifetime_, const String & username_claim_, const String & groups_claim_, - const String & expected_audience_); + const String & expected_audience_, + const ConnectionTimeouts & timeouts_); bool resolveAndValidate(TokenCredentials & credentials) const override; private: const String expected_audience; + const ConnectionTimeouts timeouts; }; class OpenIdTokenProcessor : public ITokenProcessor @@ -208,7 +213,8 @@ class OpenIdTokenProcessor : public ITokenProcessor const String & userinfo_endpoint_, const String & token_introspection_endpoint_, const String & introspection_client_id_, - const String & introspection_client_secret_); + const String & introspection_client_secret_, + const ConnectionTimeouts & timeouts_); /// Obtain endpoints from openid-configuration URL OpenIdTokenProcessor(const String & processor_name_, @@ -224,7 +230,8 @@ class OpenIdTokenProcessor : public ITokenProcessor const String & introspection_client_id_, const String & introspection_client_secret_, const RemoteHostFilter & remote_host_filter_, - bool allow_http_discovery_urls_); + bool allow_http_discovery_urls_, + const ConnectionTimeouts & timeouts_); bool resolveAndValidate(TokenCredentials & credentials) const override; private: @@ -237,6 +244,7 @@ class OpenIdTokenProcessor : public ITokenProcessor String expected_audience; String introspection_client_id; String introspection_client_secret; + ConnectionTimeouts timeouts; /// Populated only by the discovery constructor when the doc advertises a `jwks_uri`. std::optional jwt_validator = std::nullopt; diff --git a/src/Access/TokenProcessorsOpaque.cpp b/src/Access/TokenProcessorsOpaque.cpp index 16ff14897481..1bba0747fb44 100644 --- a/src/Access/TokenProcessorsOpaque.cpp +++ b/src/Access/TokenProcessorsOpaque.cpp @@ -60,27 +60,7 @@ namespace return value.get(); } - /// Bound every IdP-bound HTTP call (OIDC discovery, userinfo, introspection) - /// to a known limit. Without this, Poco's default `HTTPSession` timeout of - /// 60 seconds applies, and because `ExternalAuthenticators::mutex` is held - /// for the entire duration of `checkTokenCredentials` -- including the - /// outbound call this function makes -- a single slow or hung IdP would - /// stall the whole auth subsystem (LDAP, Kerberos, HTTP basic, every other - /// token auth) for up to a full minute per request. - /// - /// 10 seconds is a deliberately conservative cap: well above any healthy - /// IdP latency, well below the default. Operators who need a different - /// value would have to expose this via per-processor config; for now it - /// is hard-coded so deployments inherit the bounded behavior automatically. - constexpr int kIdpHttpTimeoutSeconds = 10; - - void applyIdpSessionTimeouts(Poco::Net::HTTPClientSession & session) - { - const Poco::Timespan timeout(kIdpHttpTimeoutSeconds, 0); - session.setTimeout(timeout, timeout, timeout); - } - - picojson::object getObjectFromURI(const Poco::URI & uri, const String & token = "") + picojson::object getObjectFromURI(const Poco::URI & uri, const ConnectionTimeouts & timeouts, const String & token = "") { Poco::Net::HTTPResponse response; std::ostringstream responseString; @@ -92,14 +72,14 @@ namespace if (uri.getScheme() == "https") { Poco::Net::HTTPSClientSession session(uri.getHost(), uri.getPort()); - applyIdpSessionTimeouts(session); + setTimeouts(session, timeouts); session.sendRequest(request); Poco::StreamCopier::copyStream(session.receiveResponse(response), responseString); } else { Poco::Net::HTTPClientSession session(uri.getHost(), uri.getPort()); - applyIdpSessionTimeouts(session); + setTimeouts(session, timeouts); session.sendRequest(request); Poco::StreamCopier::copyStream(session.receiveResponse(response), responseString); } @@ -125,7 +105,8 @@ namespace picojson::object postFormToURI(const Poco::URI & uri, const std::vector> & form, const String & basic_user, - const String & basic_password) + const String & basic_password, + const ConnectionTimeouts & timeouts) { Poco::Net::HTTPResponse response; std::ostringstream responseString; @@ -155,7 +136,7 @@ namespace auto send_and_receive = [&](Poco::Net::HTTPClientSession & session) { - applyIdpSessionTimeouts(session); + setTimeouts(session, timeouts); session.sendRequest(request) << body; Poco::StreamCopier::copyStream(session.receiveResponse(response), responseString); }; @@ -192,9 +173,11 @@ GoogleTokenProcessor::GoogleTokenProcessor(const String & processor_name_, UInt64 token_cache_lifetime_, const String & username_claim_, const String & groups_claim_, - const String & expected_audience_) + const String & expected_audience_, + const ConnectionTimeouts & timeouts_) : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_) , expected_audience(expected_audience_) + , timeouts(timeouts_) { /// Without an audience pin, this processor accepts any Google access token /// that authenticates the user against Google -- including tokens minted for @@ -215,7 +198,7 @@ bool GoogleTokenProcessor::resolveAndValidate(TokenCredentials & credentials) co const String & token = credentials.getToken(); std::unordered_map user_info; - picojson::object user_info_json = getObjectFromURI(Poco::URI("https://www.googleapis.com/oauth2/v3/userinfo"), token); + picojson::object user_info_json = getObjectFromURI(Poco::URI("https://www.googleapis.com/oauth2/v3/userinfo"), timeouts, token); if (!user_info_json.contains("email")) throw Exception(ErrorCodes::AUTHENTICATION_FAILED, @@ -227,7 +210,7 @@ bool GoogleTokenProcessor::resolveAndValidate(TokenCredentials & credentials) co String user_name = user_info[username_claim]; - auto token_info = getObjectFromURI(Poco::URI("https://www.googleapis.com/oauth2/v3/tokeninfo"), token); + auto token_info = getObjectFromURI(Poco::URI("https://www.googleapis.com/oauth2/v3/tokeninfo"), timeouts, token); /// Audience binding (H-10): the Google /tokeninfo endpoint authoritatively /// reports the OAuth client_id the access token was issued for in its 'aud' @@ -274,7 +257,7 @@ bool GoogleTokenProcessor::resolveAndValidate(TokenCredentials & credentials) co try { - auto groups_response = getObjectFromURI(get_groups_uri, token); + auto groups_response = getObjectFromURI(get_groups_uri, timeouts, token); if (!groups_response.contains("memberships") || !groups_response["memberships"].is()) { @@ -348,14 +331,16 @@ OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, const String & userinfo_endpoint_, const String & token_introspection_endpoint_, const String & introspection_client_id_, - const String & introspection_client_secret_) + const String & introspection_client_secret_, + const ConnectionTimeouts & timeouts_) : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), userinfo_endpoint(userinfo_endpoint_), token_introspection_endpoint(token_introspection_endpoint_), expected_issuer(expected_issuer_), expected_audience(expected_audience_), introspection_client_id(introspection_client_id_), - introspection_client_secret(introspection_client_secret_) + introspection_client_secret(introspection_client_secret_), + timeouts(timeouts_) { } @@ -372,12 +357,14 @@ OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, const String & introspection_client_id_, const String & introspection_client_secret_, const RemoteHostFilter & remote_host_filter_, - bool allow_http_discovery_urls_) + bool allow_http_discovery_urls_, + const ConnectionTimeouts & timeouts_) : ITokenProcessor(processor_name_, token_cache_lifetime_, username_claim_, groups_claim_), expected_issuer(expected_issuer_), expected_audience(expected_audience_), introspection_client_id(introspection_client_id_), - introspection_client_secret(introspection_client_secret_) + introspection_client_secret(introspection_client_secret_), + timeouts(timeouts_) { /// Defense in depth: the discovery endpoint itself was already validated by /// the parser, but re-check here in case this constructor is reached via a @@ -393,7 +380,7 @@ OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, processor_name, openid_config_endpoint_, e.message()); } - const picojson::object openid_config = getObjectFromURI(Poco::URI(openid_config_endpoint_)); + const picojson::object openid_config = getObjectFromURI(Poco::URI(openid_config_endpoint_), timeouts); if (!openid_config.contains("userinfo_endpoint")) throw Exception(ErrorCodes::AUTHENTICATION_FAILED, @@ -544,7 +531,8 @@ OpenIdTokenProcessor::OpenIdTokenProcessor(const String & processor_name_, "", verifier_leeway_, getValueByKey(openid_config, "jwks_uri").value(), - jwks_cache_lifetime_); + jwks_cache_lifetime_, + timeouts); } } @@ -559,7 +547,8 @@ bool OpenIdTokenProcessor::runIntrospection(const String & token, response = postFormToURI(token_introspection_endpoint, {{"token", token}, {"token_type_hint", "access_token"}}, introspection_client_id, - introspection_client_secret); + introspection_client_secret, + timeouts); } catch (const Exception & e) { @@ -715,7 +704,7 @@ bool OpenIdTokenProcessor::resolveAndValidate(TokenCredentials & credentials) co { try { - user_info_json = getObjectFromURI(userinfo_endpoint, token); + user_info_json = getObjectFromURI(userinfo_endpoint, timeouts, token); username = getValueByKey(user_info_json, username_claim).value(); } catch (...) diff --git a/src/Access/TokenProcessorsParse.cpp b/src/Access/TokenProcessorsParse.cpp index 015060cffb75..0b0273e8433c 100644 --- a/src/Access/TokenProcessorsParse.cpp +++ b/src/Access/TokenProcessorsParse.cpp @@ -17,7 +17,8 @@ namespace ErrorCodes std::unique_ptr ITokenProcessor::parseTokenProcessor( const Poco::Util::AbstractConfiguration & config, const String & prefix, - const String & processor_name) + const String & processor_name, + const ConnectionTimeouts & timeouts) { if (!config.hasProperty(prefix + ".type")) throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "'type' parameter shall be specified in token_processor configuration.'"); @@ -84,7 +85,7 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( if (provider_type == "google") { - return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_audience); + return std::make_unique(processor_name, token_cache_lifetime, username_claim, groups_claim, expected_audience, timeouts); } else if (provider_type == "openid") { @@ -131,7 +132,8 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( introspection_client_id, introspection_client_secret, remote_host_filter, - allow_http_discovery_urls); + allow_http_discovery_urls, + timeouts); } if (locally_configured) @@ -171,7 +173,8 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( userinfo_endpoint, token_introspection_endpoint, introspection_client_id, - introspection_client_secret); + introspection_client_secret, + timeouts); } throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, @@ -236,7 +239,8 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( config.getString(prefix + ".claims", ""), config.getUInt64(prefix + ".verifier_leeway", 60), jwks_uri, - config.getUInt64(prefix + ".jwks_cache_lifetime", 3600)); + config.getUInt64(prefix + ".jwks_cache_lifetime", 3600), + timeouts); } else if (provider_type == "jwt_static_key") { @@ -293,7 +297,8 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( config.getString(prefix + ".claims", ""), config.getUInt64(prefix + ".verifier_leeway", 60), jwks_uri, - config.getUInt(prefix + ".jwks_cache_lifetime", 3600)); + config.getUInt(prefix + ".jwks_cache_lifetime", 3600), + timeouts); } else throw DB::Exception(ErrorCodes::INVALID_CONFIG_PARAMETER, "Invalid type: {}", provider_type); @@ -303,7 +308,8 @@ std::unique_ptr ITokenProcessor::parseTokenProcessor( std::unique_ptr ITokenProcessor::parseTokenProcessor( const Poco::Util::AbstractConfiguration &, const String &, - const String &) + const String &, + const ConnectionTimeouts &) { throw DB::Exception(ErrorCodes::SUPPORT_IS_DISABLED, "Failed to parse token_processor, ClickHouse was built without JWT support."); } diff --git a/src/Client/CommandJWTProvider.cpp b/src/Client/CommandJWTProvider.cpp new file mode 100644 index 000000000000..40b7dd92830f --- /dev/null +++ b/src/Client/CommandJWTProvider.cpp @@ -0,0 +1,129 @@ +#include + +#if USE_JWT_CPP && USE_SSL +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace DB +{ + +namespace ErrorCodes +{ + extern const int AUTHENTICATION_FAILED; +} + +CommandJWTProvider::CommandJWTProvider(std::string command_, int timeout_seconds_) + : JWTProvider(/*auth_url=*/"", /*client_id=*/"", /*audience=*/"", std::cout, std::cerr) + , command(std::move(command_)) + , timeout_seconds(timeout_seconds_) +{ +} + +std::string CommandJWTProvider::getJWT() +{ + ShellCommand::Config config(command); + config.new_process_group = true; // so the watchdog can kill the whole tree, not just /bin/sh + auto child = ShellCommand::execute(config); + child->in.close(); // we don't write to the script's stdin; close so reads see EOF + const pid_t pid = child->getPid(); + + std::mutex mutex; + std::condition_variable cv; + bool finished = false; + std::atomic timed_out{false}; + + /// Default-construct the threads first and install the cleanup guard before assigning, + /// so a thread-constructor failure mid-assignment cannot leave a joinable thread that + /// would call std::terminate on destruction. + std::thread watchdog; + std::thread stderr_forwarder; + SCOPE_EXIT({ + { + std::lock_guard lock(mutex); + finished = true; + } + cv.notify_all(); + if (stderr_forwarder.joinable()) stderr_forwarder.join(); + if (watchdog.joinable()) watchdog.join(); + }); + + watchdog = std::thread([&, pid]() + { + std::unique_lock lock(mutex); + if (!cv.wait_for(lock, std::chrono::seconds(timeout_seconds), [&]{ return finished; })) + { + timed_out = true; + ::kill(-pid, SIGKILL); + } + }); + + /// Drain stderr on a separate thread so the child doesn't block on a full pipe. + /// 1-byte buffer flushes each byte so interactive prompts (e.g. device-flow URL) surface live. + stderr_forwarder = std::thread([&child]() + { + try + { + WriteBufferFromOStream wb(std::cerr, /*size=*/1); + copyData(child->err, wb); + wb.finalize(); + } + catch (...) {} + }); + + std::string token; + readStringUntilEOF(token, child->out); + + /// Drain stderr fully before tryWait, since tryWait closes child->err and reading + /// a buffer whose fd has just been closed from another thread is UB. + stderr_forwarder.join(); + + /// Cancel the watchdog before tryWait. After tryWait reaps the child, the kernel + /// may recycle the pid; if the watchdog then fires kill(-pid, ...) it could hit an + /// unrelated process group. + { + std::lock_guard lock(mutex); + finished = true; + } + cv.notify_all(); + watchdog.join(); + + /// Reap with a catch: on timeout the child is signaled, and we want our own + /// error message rather than the noisy CHILD_WAS_NOT_EXITED_NORMALLY one. + int retcode = 0; + try { retcode = child->tryWait(); } + catch (...) { if (!timed_out.load()) throw; } + + if (timed_out.load()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "--jwt-command timed out after {} seconds", timeout_seconds); + + if (retcode != 0) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, + "--jwt-command exited with non-zero status {}", retcode); + + if (!token.empty() && token.back() == '\n') + token.pop_back(); + + if (token.empty()) + throw Exception(ErrorCodes::AUTHENTICATION_FAILED, "--jwt-command produced empty output"); + + return token; +} + +} + +#endif diff --git a/src/Client/CommandJWTProvider.h b/src/Client/CommandJWTProvider.h new file mode 100644 index 000000000000..0e8805415063 --- /dev/null +++ b/src/Client/CommandJWTProvider.h @@ -0,0 +1,33 @@ +#pragma once + +#include + +namespace DB +{ + +inline constexpr int DEFAULT_JWT_COMMAND_TIMEOUT_SECONDS = 30; + +} + +#if USE_JWT_CPP && USE_SSL + +#include + +namespace DB +{ + +class CommandJWTProvider : public JWTProvider +{ +public: + CommandJWTProvider(std::string command_, int timeout_seconds_); + + std::string getJWT() override; + +private: + std::string command; + int timeout_seconds; +}; + +} + +#endif diff --git a/src/Client/Connection.cpp b/src/Client/Connection.cpp index dd336dd8e3c0..e2bf73e37d6a 100644 --- a/src/Client/Connection.cpp +++ b/src/Client/Connection.cpp @@ -86,6 +86,9 @@ namespace ErrorCodes extern const int BAD_ARGUMENTS; extern const int EMPTY_DATA_PASSED; extern const int LOGICAL_ERROR; +#if USE_JWT_CPP && USE_SSL + extern const int AUTHENTICATION_FAILED; +#endif } Connection::~Connection() @@ -146,7 +149,29 @@ void Connection::connect(const ConnectionTimeouts & timeouts) /// if connection was broken it is necessary to cancel it before reconnecting disconnect(); +#if USE_JWT_CPP && USE_SSL + /// Skip fetch when current JWT is still usable; opaque tokens and JWTs without + /// a usable `exp` claim are refreshed reactively on rejection below. + if (jwt_provider && jwt.empty()) + { + jwt = jwt_provider->getJWT(); + } + else if (jwt_provider && JWTProvider::isJWT(jwt)) + { + const Poco::Timestamp expiry = JWTProvider::getJwtExpiry(jwt); + const Poco::Timestamp refresh_threshold = Poco::Timestamp() + Poco::Timespan(30, 0); + if (expiry > Poco::Timestamp(0) && expiry < refresh_threshold) + jwt = jwt_provider->getJWT(); + } + + bool jwt_retried_on_rejection = false; +#endif + ProfileEvents::increment(ProfileEvents::DistributedConnectionConnectCount); + +#if USE_JWT_CPP && USE_SSL +retry_handshake_with_fresh_jwt: +#endif try { LOG_TRACE(log_wrapper.get(), "Connecting. Database: {}. User: {}{}{}. Bind_Host: {}", @@ -359,6 +384,23 @@ void Connection::connect(const ConnectionTimeouts & timeouts) e.addMessage("({})", getDescription(/*with_extra*/ true)); throw; } +#if USE_JWT_CPP && USE_SSL + catch (DB::Exception & e) + { + disconnect(); + + if (e.code() == ErrorCodes::AUTHENTICATION_FAILED && jwt_provider && !jwt_retried_on_rejection) + { + LOG_DEBUG(log_wrapper.get(), + "Server rejected JWT during handshake, fetching a fresh token and retrying once"); + jwt_retried_on_rejection = true; + jwt = jwt_provider->getJWT(); + goto retry_handshake_with_fresh_jwt; + } + + throw; + } +#endif catch (Poco::Net::NetException & e) { disconnect(); @@ -848,23 +890,6 @@ void Connection::sendQuery( client_info = &new_client_info; } -#if USE_JWT_CPP && USE_SSL - if (jwt_provider && !jwt.empty()) - { - if (JWTProvider::getJwtExpiry(jwt) < (Poco::Timestamp() + Poco::Timespan(30, 0))) - { - String new_jwt = jwt_provider->getJWT(); - if (!new_jwt.empty()) - { - jwt = new_jwt; - // We have a new token, so we need to reconnect. - // The current connection is still using the old token. - disconnect(); - } - } - } -#endif - if (!connected) connect(timeouts); diff --git a/src/Client/JWTProvider.cpp b/src/Client/JWTProvider.cpp index a7dca94f8403..7f804bcc2ab4 100644 --- a/src/Client/JWTProvider.cpp +++ b/src/Client/JWTProvider.cpp @@ -252,6 +252,21 @@ Poco::Timestamp JWTProvider::getJwtExpiry(const std::string & token) } } +bool JWTProvider::isJWT(const std::string & token) +{ + if (token.empty()) + return false; + try + { + jwt::decode(token); + return true; + } + catch (const std::exception &) + { + return false; + } +} + std::unique_ptr createJwtProvider( const std::string & auth_url, const std::string & client_id, diff --git a/src/Client/JWTProvider.h b/src/Client/JWTProvider.h index 3d18a73a854c..fa9b87504675 100644 --- a/src/Client/JWTProvider.h +++ b/src/Client/JWTProvider.h @@ -27,6 +27,8 @@ class JWTProvider /// including initial login and subsequent refreshes. virtual std::string getJWT(); static Poco::Timestamp getJwtExpiry(const std::string & token); + /// Returns true iff `token` decodes as a JWT (distinguishes JWTs from opaque tokens). + static bool isJWT(const std::string & token); protected: virtual std::string getAudience() const { return oauth_audience; } diff --git a/src/Common/ShellCommand.cpp b/src/Common/ShellCommand.cpp index ccee2943fcd6..ce76781237d0 100644 --- a/src/Common/ShellCommand.cpp +++ b/src/Common/ShellCommand.cpp @@ -27,6 +27,7 @@ namespace CANNOT_EXEC = 0x55555558, CANNOT_DUP_READ_DESCRIPTOR = 0x55555559, CANNOT_DUP_WRITE_DESCRIPTOR = 0x55555560, + CANNOT_SETPGID = 0x55555561, }; } @@ -218,6 +219,9 @@ std::unique_ptr ShellCommand::executeImpl( sigprocmask(0, nullptr, &mask); // NOLINT(concurrency-mt-unsafe) sigprocmask(SIG_UNBLOCK, &mask, nullptr); // NOLINT(concurrency-mt-unsafe) + if (config.new_process_group && setpgid(0, 0) != 0) + _exit(static_cast(ReturnCodes::CANNOT_SETPGID)); + execv(filename, argv); /// If the process is running, then `execv` does not return here. @@ -385,6 +389,8 @@ void ShellCommand::handleProcessRetcode(int retcode) const throw Exception(ErrorCodes::CANNOT_CREATE_CHILD_PROCESS, "Cannot dup2 read descriptor of child process"); case static_cast(ReturnCodes::CANNOT_DUP_WRITE_DESCRIPTOR): throw Exception(ErrorCodes::CANNOT_CREATE_CHILD_PROCESS, "Cannot dup2 write descriptor of child process"); + case static_cast(ReturnCodes::CANNOT_SETPGID): + throw Exception(ErrorCodes::CANNOT_CREATE_CHILD_PROCESS, "Cannot setpgid in child process"); default: throw Exception(ErrorCodes::CHILD_WAS_NOT_EXITED_NORMALLY, "Child process was exited with return code {}", toString(retcode)); } diff --git a/src/Common/ShellCommand.h b/src/Common/ShellCommand.h index 7d8d12bb3131..e39703e47f76 100644 --- a/src/Common/ShellCommand.h +++ b/src/Common/ShellCommand.h @@ -65,6 +65,10 @@ class ShellCommand final bool pipe_stdin_only = false; + /// Put the child in its own process group, so that a single `kill(-pid, ...)` + /// from the parent terminates the entire subprocess tree. + bool new_process_group = false; + DestructorStrategy terminate_in_destructor_strategy = DestructorStrategy(false, 0); }; diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp index 6c02bd513cde..e6829d9c9138 100644 --- a/src/Interpreters/Context.cpp +++ b/src/Interpreters/Context.cpp @@ -56,6 +56,7 @@ #include #include #include +#include #include #include #include @@ -853,7 +854,7 @@ struct ContextSharedPart : boost::noncopyable std::lock_guard lock(mutex); config = config_value; - access_control->setExternalAuthenticatorsConfig(*config_value); + access_control->setExternalAuthenticatorsConfig(*config_value, ConnectionTimeouts::getHTTPTimeouts(Settings(), server_settings)); } const Poco::Util::AbstractConfiguration & getConfigRefWithLock(const std::lock_guard &) const TSA_REQUIRES(this->mutex) @@ -1872,8 +1873,9 @@ const AccessControl & Context::getAccessControl() const void Context::setExternalAuthenticatorsConfig(const Poco::Util::AbstractConfiguration & config) { + auto token_http_timeouts = ConnectionTimeouts::getHTTPTimeouts(getSettingsRef(), getServerSettings()); std::lock_guard lock(shared->mutex); - shared->access_control->setExternalAuthenticatorsConfig(config); + shared->access_control->setExternalAuthenticatorsConfig(config, token_http_timeouts); } std::unique_ptr Context::makeGSSAcceptorContext() const diff --git a/src/Interpreters/Session.cpp b/src/Interpreters/Session.cpp index 21b07e3ebc9b..a63dc42c8327 100644 --- a/src/Interpreters/Session.cpp +++ b/src/Interpreters/Session.cpp @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -387,12 +386,6 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So user_authenticated_with = auth_result.authentication_data; settings_from_auth_server = auth_result.settings; - /// Bind the session lifetime to the access-token lifetime when applicable. - if (const auto * token_credentials = typeid_cast(&credentials_)) - auth_token_expires_at = token_credentials->getExpiresAt(); - else - auth_token_expires_at.reset(); - LOG_DEBUG(log, "{} Authenticated with global context as user {}", toString(auth_id), toString(*user_id)); @@ -419,32 +412,12 @@ void Session::authenticate(const Credentials & credentials_, const Poco::Net::So void Session::checkIfUserIsStillValid() { - const auto now = std::chrono::system_clock::now(); - if (const auto valid_until = user_authenticated_with.getValidUntil()) { - if (std::chrono::system_clock::to_time_t(now) > valid_until) - throw Exception(ErrorCodes::USER_EXPIRED, "Authentication method used has expired"); - } + const time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); - /// For sessions established via a bearer/access token (JWT or opaque), enforce token expiry. - if (auth_token_expires_at.has_value() && now >= *auth_token_expires_at) - throw Exception(ErrorCodes::USER_EXPIRED, "Access token used to authenticate the session has expired"); - - /// For JWT/token sessions, also re-validate that the authenticating - /// processor is still configured. Without this, an admin removing a - /// processor (or disabling token auth entirely) would NOT terminate - /// active sessions until each session's token expired naturally -- a - /// gap of up to one token TTL (~1h for typical IdPs) between the - /// admin's "stop accepting tokens from this IdP" intent and actual - /// session termination (M-28). - if (user_authenticated_with.getType() == AuthenticationType::JWT) - { - const auto & processor_name = user_authenticated_with.getTokenProcessorName(); - if (!global_context->getAccessControl().getExternalAuthenticators().hasTokenProcessor(processor_name)) - throw Exception(ErrorCodes::USER_EXPIRED, - "Token processor '{}' that authenticated this session is no longer configured", - processor_name.empty() ? "" : processor_name); + if (now > valid_until) + throw Exception(ErrorCodes::USER_EXPIRED, "Authentication method used has expired"); } } diff --git a/src/Interpreters/Session.h b/src/Interpreters/Session.h index f62113f83452..babd16a9975a 100644 --- a/src/Interpreters/Session.h +++ b/src/Interpreters/Session.h @@ -121,10 +121,6 @@ class Session std::vector external_roles; AuthenticationData user_authenticated_with; - /// When the user was authenticated with a bearer/access token, this holds the - /// effective token expiry captured at authentication time. - std::optional auth_token_expires_at; - ContextMutablePtr session_context; mutable bool query_context_created = false; diff --git a/tests/queries/0_stateless/04206_jwt_command.reference b/tests/queries/0_stateless/04206_jwt_command.reference new file mode 100644 index 000000000000..031c6306cebd --- /dev/null +++ b/tests/queries/0_stateless/04206_jwt_command.reference @@ -0,0 +1,25 @@ +Test 1: --jwt and --jwt-command together should give BAD_ARGUMENTS +OK +Test 2: --jwt-command with non-default --user should give BAD_ARGUMENTS +OK +Test 3: --jwt-command with --login should give BAD_ARGUMENTS +OK +Test 4: --jwt-command with empty stdout should fail with AUTHENTICATION_FAILED +OK +Test 5: --jwt-command exiting with non-zero status should fail with AUTHENTICATION_FAILED +OK +Test 6: --jwt-command stderr should be forwarded to client stderr +OK +Test 7: --jwt-command-timeout kills a hanging script +OK +Test 8: --jwt-command-timeout=0 should be rejected +OK +Test 9: --jwt-command is actually executed +OK +Test 10: --jwt-command-timeout from XML config file takes effect +OK +Test 11: stdin-reading script completes promptly (stdin is closed) +OK +Test 12: CLI --jwt-command-timeout overrides XML config +OK +All tests completed diff --git a/tests/queries/0_stateless/04206_jwt_command.sh b/tests/queries/0_stateless/04206_jwt_command.sh new file mode 100755 index 000000000000..d4aeed201391 --- /dev/null +++ b/tests/queries/0_stateless/04206_jwt_command.sh @@ -0,0 +1,136 @@ +#!/usr/bin/env bash +# Tags: no-fasttest +# Tag no-fasttest: --jwt-command requires a build with JWT and SSL support + +CUR_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd) +# shellcheck source=../shell_config.sh +. "$CUR_DIR"/../shell_config.sh + +# Well-formed JWT with far-future exp; server will reject, but the client must accept its shape. +SAMPLE_JWT="eyJhbGciOiJIUzI1NiJ9.eyJleHAiOjk5OTk5OTk5OTksInN1YiI6InRlc3QifQ.fake" + +echo "Test 1: --jwt and --jwt-command together should give BAD_ARGUMENTS" +output=$($CLICKHOUSE_CLIENT_BINARY --jwt "$SAMPLE_JWT" --jwt-command "echo $SAMPLE_JWT" --query "SELECT 1" 2>&1) +if echo "$output" | grep -qi "cannot both be specified\|BAD_ARGUMENTS"; then + echo "OK" +else + echo "FAILED: expected BAD_ARGUMENTS, got: $output" +fi + +echo "Test 2: --jwt-command with non-default --user should give BAD_ARGUMENTS" +output=$($CLICKHOUSE_CLIENT_BINARY --user alice --jwt-command "echo $SAMPLE_JWT" --query "SELECT 1" 2>&1) +if echo "$output" | grep -qi "User and JWT flags\|BAD_ARGUMENTS"; then + echo "OK" +else + echo "FAILED: expected BAD_ARGUMENTS, got: $output" +fi + +echo "Test 3: --jwt-command with --login should give BAD_ARGUMENTS" +output=$($CLICKHOUSE_CLIENT_BINARY --login=device --jwt-command "echo $SAMPLE_JWT" --query "SELECT 1" 2>&1) +if echo "$output" | grep -qi "cannot both be specified\|BAD_ARGUMENTS"; then + echo "OK" +else + echo "FAILED: expected BAD_ARGUMENTS, got: $output" +fi + +echo "Test 4: --jwt-command with empty stdout should fail with AUTHENTICATION_FAILED" +output=$($CLICKHOUSE_CLIENT_BINARY --jwt-command "true" --query "SELECT 1" 2>&1) +if echo "$output" | grep -qi "empty output.*AUTHENTICATION_FAILED\|AUTHENTICATION_FAILED.*empty output"; then + echo "OK" +else + echo "FAILED: expected AUTHENTICATION_FAILED for empty output, got: $output" +fi + +echo "Test 5: --jwt-command exiting with non-zero status should fail with AUTHENTICATION_FAILED" +output=$($CLICKHOUSE_CLIENT_BINARY --jwt-command "exit 42" --query "SELECT 1" 2>&1) +if echo "$output" | grep -qi "non-zero status 42.*AUTHENTICATION_FAILED\|AUTHENTICATION_FAILED.*non-zero status 42"; then + echo "OK" +else + echo "FAILED: expected AUTHENTICATION_FAILED with retcode 42, got: $output" +fi + +echo "Test 6: --jwt-command stderr should be forwarded to client stderr" +MARKER="forwarded-from-script-stderr" +output=$($CLICKHOUSE_CLIENT_BINARY --jwt-command "echo $MARKER 1>&2; echo $SAMPLE_JWT" --host localhost --port 1 --query "SELECT 1" 2>&1) +if echo "$output" | grep -q "$MARKER"; then + echo "OK" +else + echo "FAILED: expected stderr marker '$MARKER' in output, got: $output" +fi + +echo "Test 7: --jwt-command-timeout kills a hanging script" +start=$SECONDS +output=$($CLICKHOUSE_CLIENT_BINARY --jwt-command "sleep 30; echo $SAMPLE_JWT" --jwt-command-timeout 1 --query "SELECT 1" 2>&1) +elapsed=$((SECONDS - start)) +if echo "$output" | grep -qi "timed out after 1 seconds.*AUTHENTICATION_FAILED\|AUTHENTICATION_FAILED.*timed out after 1 seconds" && [ "$elapsed" -lt 10 ]; then + echo "OK" +else + echo "FAILED: expected AUTHENTICATION_FAILED timeout under 10s, elapsed=${elapsed}s, got: $output" +fi + +echo "Test 8: --jwt-command-timeout=0 should be rejected" +output=$($CLICKHOUSE_CLIENT_BINARY --jwt-command "echo $SAMPLE_JWT" --jwt-command-timeout 0 --query "SELECT 1" 2>&1) +if echo "$output" | grep -qi "must be positive.*BAD_ARGUMENTS\|BAD_ARGUMENTS.*must be positive"; then + echo "OK" +else + echo "FAILED: expected BAD_ARGUMENTS for non-positive timeout, got: $output" +fi + +echo "Test 9: --jwt-command is actually executed" +MARKER_FILE="${CLICKHOUSE_TMP}/04206_jwt_command_marker_$$" +rm -f "$MARKER_FILE" +$CLICKHOUSE_CLIENT_BINARY --jwt-command "echo ran > '$MARKER_FILE'; echo $SAMPLE_JWT" --host localhost --port 1 --query "SELECT 1" > /dev/null 2>&1 +if [ -f "$MARKER_FILE" ]; then + echo "OK" +else + echo "FAILED: marker file not created, command did not run" +fi +rm -f "$MARKER_FILE" + +echo "Test 10: --jwt-command-timeout from XML config file takes effect" +CFG="${CLICKHOUSE_TMP}/04206_jwt_command_cfg_$$.xml" +cat > "$CFG" < + 1 + +EOF +start=$SECONDS +output=$($CLICKHOUSE_CLIENT_BINARY --config-file "$CFG" --jwt-command "sleep 30; echo $SAMPLE_JWT" --query "SELECT 1" 2>&1) +elapsed=$((SECONDS - start)) +if echo "$output" | grep -qi "timed out after 1 seconds" && [ "$elapsed" -lt 10 ]; then + echo "OK" +else + echo "FAILED: expected timeout under 10s from XML config, elapsed=${elapsed}s, got: $output" +fi +rm -f "$CFG" + +echo "Test 11: stdin-reading script completes promptly (stdin is closed)" +# If the child's stdin is closed by the parent, 'read X' returns immediately on EOF and +# the JWT is echoed before the 1s watchdog fires. If stdin were left open, 'read X' would +# block and the watchdog would surface 'timed out after 1 seconds'. We assert on that +# message rather than wall-clock time so the test is not flaky under loaded CI runs. +output=$($CLICKHOUSE_CLIENT_BINARY --jwt-command "read X; echo $SAMPLE_JWT" --jwt-command-timeout 1 --host localhost --port 1 --query "SELECT 1" 2>&1) +if echo "$output" | grep -qi "timed out"; then + echo "FAILED: jwt-command child's stdin was not closed (got: $output)" +else + echo "OK" +fi + +echo "Test 12: CLI --jwt-command-timeout overrides XML config" +CFG="${CLICKHOUSE_TMP}/04206_jwt_command_cfg_override_$$.xml" +cat > "$CFG" < + 30 + +EOF +start=$SECONDS +output=$($CLICKHOUSE_CLIENT_BINARY --config-file "$CFG" --jwt-command "sleep 30; echo $SAMPLE_JWT" --jwt-command-timeout 1 --query "SELECT 1" 2>&1) +elapsed=$((SECONDS - start)) +if echo "$output" | grep -qi "timed out after 1 seconds" && [ "$elapsed" -lt 10 ]; then + echo "OK" +else + echo "FAILED: expected CLI(1) to override XML(30), elapsed=${elapsed}s, got: $output" +fi +rm -f "$CFG" + +echo "All tests completed" From 2a06b35191ae0b102455c280824ce3ba0be3bb23 Mon Sep 17 00:00:00 2001 From: Andrey Zvonov <32552679+zvonand@users.noreply.github.com> Date: Sun, 7 Jun 2026 18:13:10 +0200 Subject: [PATCH 12/12] Resolve conflicts in cherry-pick of #1809 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In antalya-26.4, `docs/en/interfaces/cli.md` was repurposed for `clickhousectl` documentation; `clickhouse-client` docs moved to `docs/en/interfaces/client.md`. The conflict in `cli.md` arose because antalya-26.3 still used that file for `clickhouse-client` docs. Resolution: - `cli.md`: keep "ours" (the `clickhousectl` `Non-interactive flags` table), drop "theirs" (the old `clickhouse-client` content). - `client.md`: add `--jwt-command` and `--jwt-command-timeout` rows to the Connection options table, and add the `--jwt-command details` section — verbatim from PR #1809, routed to the correct file on antalya-26.4. Adapted: `--jwt-command`/`--jwt-command-timeout` doc added to `docs/en/interfaces/client.md` instead of `docs/en/interfaces/cli.md`, because `cli.md` on antalya-26.4 documents `clickhousectl`, not `clickhouse-client`. Source-PR: #1809 (https://github.com/Altinity/ClickHouse/pull/1809) --- docs/en/interfaces/cli.md | 228 ----------------------------------- docs/en/interfaces/client.md | 14 +++ 2 files changed, 14 insertions(+), 228 deletions(-) diff --git a/docs/en/interfaces/cli.md b/docs/en/interfaces/cli.md index 229e31df2926..28543d790627 100644 --- a/docs/en/interfaces/cli.md +++ b/docs/en/interfaces/cli.md @@ -405,237 +405,9 @@ clickhousectl skills --agent claude --agent codex ### Non-interactive flags {#non-interactive-flags} -<<<<<<< HEAD | Flag | Description | |------|-------------| | `--agent ` | Install Skills for a specific agent (can be repeated) | | `--global` | Use global scope; if omitted, project scope is used | | `--all` | Install Skills for all supported agents | | `--detected-only` | Install Skills for supported agents that were detected on the system | -======= -```bash -clickhouse-client clickhouse://localhost/my_database?s - -# equivalent to: -clickhouse-client clickhouse://localhost/my_database -s -``` - -Connect to the default host using the default port, the default user, and the default database. - -```bash -clickhouse-client clickhouse: -``` - -Connect to the default host using the default port, as the user `my_user` and no password. - -```bash -clickhouse-client clickhouse://my_user@ - -# Using a blank password between : and @ means to asking the user to enter the password before starting the connection. -clickhouse-client clickhouse://my_user:@ -``` - -Connect to `localhost` using the email as the user name. `@` symbol is percent encoded to `%40`. - -```bash -clickhouse-client clickhouse://some_user%40some_mail.com@localhost:9000 -``` - -Connect to one of two hosts: `192.168.1.15`, `192.168.1.25`. - -```bash -clickhouse-client clickhouse://192.168.1.15,192.168.1.25 -``` - -## Query ID format {#query-id-format} - -In interactive mode ClickHouse Client shows the query ID for every query. By default, the ID is formatted like this: - -```sql -Query id: 927f137d-00f1-4175-8914-0dd066365e96 -``` - -A custom format may be specified in a configuration file inside a `query_id_formats` tag. The `{query_id}` placeholder in the format string is replaced with the query ID. Several format strings are allowed inside the tag. -This feature can be used to generate URLs to facilitate profiling of queries. - -**Example** - -```xml - - - http://speedscope-host/#profileURL=qp%3Fid%3D{query_id} - - -``` - -With the configuration above, the ID of a query is shown in the following format: - -```response -speedscope:http://speedscope-host/#profileURL=qp%3Fid%3Dc8ecc783-e753-4b38-97f1-42cddfb98b7d -``` - -## Configuration files {#configuration_files} - -ClickHouse Client uses the first existing file of the following: - -- A file that is defined with the `-c [ -C, --config, --config-file ]` parameter. -- `./clickhouse-client.[xml|yaml|yml]` -- `$XDG_CONFIG_HOME/clickhouse/config.[xml|yaml|yml]` (or `~/.config/clickhouse/config.[xml|yaml|yml]` if `XDG_CONFIG_HOME` is not set) -- `~/.clickhouse-client/config.[xml|yaml|yml]` -- `/etc/clickhouse-client/config.[xml|yaml|yml]` - -See the sample configuration file in the ClickHouse repository: [`clickhouse-client.xml`](https://github.com/ClickHouse/ClickHouse/blob/master/programs/client/clickhouse-client.xml) - - - - ```xml - - username - password - true - - - /etc/ssl/cert.pem - - - - ``` - - - ```yaml - user: username - password: 'password' - secure: true - openSSL: - client: - caConfig: '/etc/ssl/cert.pem' - ``` - - - -## Environment variable options {#environment-variable-options} - -The user name, password and host can be set via environment variables `CLICKHOUSE_USER`, `CLICKHOUSE_PASSWORD` and `CLICKHOUSE_HOST`. -Command line arguments `--user`, `--password` or `--host`, or a [connection string](#connection_string) (if specified) take precedence over environment variables. - -## Command-line options {#command-line-options} - -All command-line options can be specified directly on the command line or as defaults in the [configuration file](#configuration_files). - -### General options {#command-line-options-general} - -| Option | Description | Default | -|-----------------------------------------------------|------------------------------------------------------------------------------------------------------------------------------------|------------------------------| -| `-c [ -C, --config, --config-file ] ` | The location of the configuration file for the client, if it is not at one of the default locations. See [Configuration Files](#configuration_files). | - | -| `--help` | Print usage summary and exit. Combine with `--verbose` to display all possible options including query settings. | - | -| `--history_file ` | Path to a file containing the command history. | - | -| `--history_max_entries` | Maximum number of entries in the history file. | `1000000` (1 million) | -| `--prompt ` | Specify a custom prompt. | The `display_name` of the server | -| `--verbose` | Increase output verbosity. | - | -| `-V [ --version ]` | Print version and exit. | - | - -### Connection options {#command-line-options-connection} - -| Option | Description | Default | -|----------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------------------------------------------| -| `--connection ` | The name of preconfigured connection details from the configuration file. See [Connection credentials](#connection-credentials). | - | -| `-d [ --database ] ` | Select the database to default to for this connection. | The current database from the server settings (`default` by default) | -| `-h [ --host ] ` | The hostname of the ClickHouse server to connect to. Can either be a hostname or an IPv4 or IPv6 address. Multiple hosts can be passed via multiple arguments. | `localhost` | -| `--jwt ` | Use JSON Web Token (JWT) for authentication.

Server JWT authorization is only available in ClickHouse Cloud. | - | -| `--jwt-command ` | Shell command whose stdout is used as the JWT. Invoked on the first connect, before reconnects when the cached JWT is near expiry, and after the server rejects the cached token. See [`--jwt-command` details](#jwt-command-details) below. | - | -| `--jwt-command-timeout ` | Timeout for `--jwt-command`. Also settable as `` in the config file; CLI wins. | `30` | -| `--login[=]` | Authenticate via OAuth2. Bare `--login` (no `=`) triggers ClickHouse Cloud automatic login — the provider is inferred from the server. To authenticate against a custom OpenID Connect provider, supply a `mode` and `--oauth-credentials`: `--login=browser` runs the Authorization Code + PKCE flow (opens a browser), `--login=device` runs the Device Authorization flow (prints a URL and short code — no browser needed). | - | -| `--oauth-credentials ` | Path to an OAuth2 credentials JSON file (Google Cloud Console format). Required when using `--login=browser` or `--login=device` with a custom OpenID Connect provider. See [OAuth credentials file format](#oauth-credentials-file) below. Refresh tokens are cached in `~/.clickhouse-client/oauth_cache.json` (mode `0600`). | `~/.clickhouse-client/oauth_client.json` | -| `--no-warnings` | Disable showing warnings from `system.warnings` when the client connects to the server. | - | -| `--no-server-client-version-message` | Suppress server-client version mismatch message when the client connects to the server. | - | -| `--password ` | The password of the database user. You can also specify the password for a connection in the configuration file. If you do not specify the password, the client will ask for it. | - | -| `--port ` | The port the server is accepting connections on. The default ports are 9440 (TLS) and 9000 (no TLS).

Note: The client uses the native protocol and not HTTP(S). | `9440` if `--secure` is specified, `9000` otherwise. Always defaults to `9440` if the hostname ends in `.clickhouse.cloud`. | -| `-s [ --secure ]` | Whether to use TLS.

Enabled automatically when connecting to port 9440 (the default secure port) or ClickHouse Cloud.

You might need to configure your CA certificates in the [configuration file](#configuration_files). The available configuration settings are the same as for [server-side TLS configuration](../operations/server-configuration-parameters/settings.md#openssl). | Auto-enabled when connecting to port 9440 or ClickHouse Cloud | -| `--ssh-key-file ` | File containing the SSH private key for authenticate with the server. | - | -| `--ssh-key-passphrase ` | Passphrase for the SSH private key specified in `--ssh-key-file`. | - | -| `--tls-sni-override ` | If using TLS, the server name (SNI) to pass in the handshake. | The host provided via `-h` or `--host`. | -| `-u [ --user ] ` | The database user to connect as. | `default` | - -:::note -Instead of the `--host`, `--port`, `--user` and `--password` options, the client also supports [connection strings](#connection_string). -::: - -### OAuth credentials file {#oauth-credentials-file} - -When using `--login=browser` or `--login=device` with a custom OpenID Connect provider, the client reads a credentials JSON file. The file uses the same format produced by the Google Cloud Console ("OAuth 2.0 Client IDs" → "Download JSON"): - -```json -{ - "installed": { - "client_id": "YOUR_CLIENT_ID", - "client_secret": "YOUR_CLIENT_SECRET", - "auth_uri": "https://accounts.google.com/o/oauth2/auth", - "token_uri": "https://oauth2.googleapis.com/token", - "redirect_uris": ["http://127.0.0.1"] - } -} -``` - -The top-level key can be `installed` (desktop/CLI apps) or `web`. Required fields: `client_id`, `auth_uri`, `token_uri`. Optional fields: - -| Field | Description | -|---|---| -| `client_secret` | Confidential-client secret. Omit (or leave empty) for OIDC public clients — the auth-code flow is always protected by PKCE and the device flow by the device code, so a secret is not required by the protocol. When the field is absent the client never sends a `client_secret` form parameter, which is the form public-client registrations require (Auth0, Microsoft Entra ID, Keycloak, Okta and others reject empty secrets with `invalid_client`). | -| `device_authorization_uri` | Device authorization endpoint. Discovered automatically via OIDC Discovery if absent. | -| `issuer` | OIDC issuer URL (e.g. `https://accounts.google.com`). Used to locate the discovery document when `device_authorization_uri` is not set. | - -The default path is `~/.clickhouse-client/oauth_client.json`. Override it with `--oauth-credentials `. - -After a successful login the obtained refresh token is cached in `~/.clickhouse-client/oauth_cache.json` (file mode `0600`). Subsequent runs reuse the cached token silently and only open the browser or print a device code when the refresh token has expired. - -### `--jwt-command` details {#jwt-command-details} - -The command is executed via `/bin/sh -c`. Stdout is taken as the JWT (one trailing newline stripped); any human-facing output (prompts, URLs, device codes) must go to stderr — it is forwarded unbuffered to the client's stderr. Stdin is closed. - -The command runs on the first connect to obtain the initial token. On subsequent (re)connects the client reuses the cached token; it re-invokes the command only when (a) the cached token parses as a JWT whose `exp` claim is within 30 seconds, or (b) the server rejects the cached token with an authentication failure, in which case the client refetches the token and retries the handshake once. Opaque tokens (anything that does not parse as a JWT) and JWTs without a usable `exp` claim are reused until the server rejects them — caching/refresh in those cases is the script's responsibility. - -```bash -clickhouse-client --jwt-command "curl -sS https://idp.example/token | jq -r .access_token" -``` - -Cannot be combined with `--jwt`, `--login`, or a non-default `--user`. Non-zero exit, empty output, or exceeding `--jwt-command-timeout` (default `30`s, overridable via `` in `~/.clickhouse-client/config.xml`) fails authentication. On timeout the entire helper subprocess tree is terminated. - -### Query options {#command-line-options-query} - -| Option | Description | -|---------------------------------|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `--param_=` | Substitution value for a parameter of a [query with parameters](#cli-queries-with-parameters). | -| `-q [ --query ] ` | The query to run in batch mode. Can be specified multiple times (`--query "SELECT 1" --query "SELECT 2"`) or once with multiple semicolon-separated queries (`--query "SELECT 1; SELECT 2;"`). In the latter case, `INSERT` queries with formats other than `VALUES` must be separated by empty lines.

A single query can also be specified without a parameter: `clickhouse-client "SELECT 1"`

Cannot be used together with `--queries-file`. | -| `--queries-file ` | Path to a file containing queries. `--queries-file` can be specified multiple times, e.g. `--queries-file queries1.sql --queries-file queries2.sql`.

Cannot be used together with `--query`. | -| `-m [ --multiline ]` | If specified, allow multiline queries (do not send the query on Enter). Queries will be sent only when they are ended with a semicolon. | - -### Query settings {#command-line-options-query-settings} - -Query settings can be specified as command-line options in the client, for example: -```bash -$ clickhouse-client --max_threads 1 -``` - -See [Settings](../operations/settings/settings.md) for a list of settings. - -### Formatting options {#command-line-options-formatting} - -| Option | Description | Default | -|---------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|----------------| -| `-f [ --format ] ` | Use the specified format to output the result.

See [Formats for Input and Output Data](formats.md) for a list of supported formats. | `TabSeparated` | -| `--pager ` | Pipe all output into this command. Typically `less` (e.g., `less -S` to display wide result sets) or similar. | - | -| `-E [ --vertical ]` | Use the [Vertical format](/interfaces/formats/Vertical) to output the result. This is the same as `–-format Vertical`. In this format, each value is printed on a separate line, which is helpful when displaying wide tables. | - | - -### Execution details {#command-line-options-execution-details} - -| Option | Description | Default | -|-----------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|---------------------------------------------------------------------| -| `--enable-progress-table-toggle` | Enable toggling of the progress table by pressing the control key (Space). Only applicable in interactive mode with progress table printing enabled. | `enabled` | -| `--hardware-utilization` | Print hardware utilization information in progress bar. | - | -| `--memory-usage` | If specified, print memory usage to `stderr` in non-interactive mode.

Possible values:
• `none` - do not print memory usage
• `default` - print number of bytes
• `readable` - print memory usage in human-readable format | - | -| `--print-profile-events` | Print `ProfileEvents` packets. | - | -| `--progress` | Print progress of query execution.

Possible values:
• `tty\|on\|1\|true\|yes` - outputs to the terminal in interactive mode
• `err` - outputs to `stderr` in non-interactive mode
• `off\|0\|false\|no` - disables progress printing | `tty` in interactive mode, `off` in non-interactive (batch) mode | -| `--progress-table` | Print a progress table with changing metrics during query execution.

Possible values:
• `tty\|on\|1\|true\|yes` - outputs to the terminal in interactive mode
• `err` - outputs to `stderr` non-interactive mode
• `off\|0\|false\|no` - disables the progress table | `tty` in interactive mode, `off` in non-interactive (batch) mode | -| `--stacktrace` | Print stack traces of exceptions. | - | -| `-t [ --time ]` | Print query execution time to `stderr` in non-interactive mode (for benchmarks). | - | ->>>>>>> 40a2b77fcc6 (Merge pull request #1809 from Altinity/feature/antalya-26.3/oauth-executable-token-in-client) diff --git a/docs/en/interfaces/client.md b/docs/en/interfaces/client.md index 7a236a877d80..4710ef4bf85f 100644 --- a/docs/en/interfaces/client.md +++ b/docs/en/interfaces/client.md @@ -836,6 +836,8 @@ All command-line options can be specified directly on the command line or as def | `-d [ --database ] ` | Select the database to default to for this connection. | The current database from the server settings (`default` by default) | | `-h [ --host ] ` | The hostname of the ClickHouse server to connect to. Can either be a hostname or an IPv4 or IPv6 address. Multiple hosts can be passed via multiple arguments. | `localhost` | | `--jwt ` | Use JSON Web Token (JWT) for authentication.

Server JWT authorization is only available in ClickHouse Cloud. | - | +| `--jwt-command ` | Shell command whose stdout is used as the JWT. Invoked on the first connect, before reconnects when the cached JWT is near expiry, and after the server rejects the cached token. See [`--jwt-command` details](#jwt-command-details) below. | - | +| `--jwt-command-timeout ` | Timeout for `--jwt-command`. Also settable as `` in the config file; CLI wins. | `30` | | `--login[=]` | Authenticate via OAuth2. Bare `--login` (no `=`) triggers ClickHouse Cloud automatic login — the provider is inferred from the server. To authenticate against a custom OpenID Connect provider, supply a `mode` and `--oauth-credentials`: `--login=browser` runs the Authorization Code + PKCE flow (opens a browser), `--login=device` runs the Device Authorization flow (prints a URL and short code — no browser needed). | - | | `--oauth-credentials ` | Path to an OAuth2 credentials JSON file (Google Cloud Console format). Required when using `--login=browser` or `--login=device` with a custom OpenID Connect provider. See [OAuth credentials file format](#oauth-credentials-file) below. Refresh tokens are cached in `~/.clickhouse-client/oauth_cache.json` (mode `0600`). | `~/.clickhouse-client/oauth_client.json` | | `--no-warnings` | Disable showing warnings from `system.warnings` when the client connects to the server. | - | @@ -880,6 +882,18 @@ The default path is `~/.clickhouse-client/oauth_client.json`. Override it with ` After a successful login the obtained refresh token is cached in `~/.clickhouse-client/oauth_cache.json` (file mode `0600`). Subsequent runs reuse the cached token silently and only open the browser or print a device code when the refresh token has expired. +### `--jwt-command` details {#jwt-command-details} + +The command is executed via `/bin/sh -c`. Stdout is taken as the JWT (one trailing newline stripped); any human-facing output (prompts, URLs, device codes) must go to stderr — it is forwarded unbuffered to the client's stderr. Stdin is closed. + +The command runs on the first connect to obtain the initial token. On subsequent (re)connects the client reuses the cached token; it re-invokes the command only when (a) the cached token parses as a JWT whose `exp` claim is within 30 seconds, or (b) the server rejects the cached token with an authentication failure, in which case the client refetches the token and retries the handshake once. Opaque tokens (anything that does not parse as a JWT) and JWTs without a usable `exp` claim are reused until the server rejects them — caching/refresh in those cases is the script's responsibility. + +```bash +clickhouse-client --jwt-command "curl -sS https://idp.example/token | jq -r .access_token" +``` + +Cannot be combined with `--jwt`, `--login`, or a non-default `--user`. Non-zero exit, empty output, or exceeding `--jwt-command-timeout` (default `30`s, overridable via `` in `~/.clickhouse-client/config.xml`) fails authentication. On timeout the entire helper subprocess tree is terminated. + ### Query options {#command-line-options-query} | Option | Description |