apache · JingsongLi · May 3, 2026 · Apr 29, 2026 · Apr 29, 2026 · Apr 30, 2026
diff --git a/paimon-python/pypaimon/catalog/rest/rest_catalog.py b/paimon-python/pypaimon/catalog/rest/rest_catalog.py
@@ -483,10 +483,6 @@ def to_table_metadata(self, db: str, response: GetTableResponse) -> TableMetadat
         options[CoreOptions.PATH.key()] = response.get_path()
         response.put_audit_options_to(options)
 
-        identifier = Identifier.create(db, response.get_name())
-        if identifier.get_branch_name() is not None:
-            options[CoreOptions.BRANCH.key()] = identifier.get_branch_name()
-
         return TableMetadata(
             schema=schema.copy(options),
             is_external=response.get_is_external(),

diff --git a/paimon-python/pypaimon/catalog/rest/rest_token_file_io.py b/paimon-python/pypaimon/catalog/rest/rest_token_file_io.py
@@ -29,7 +29,7 @@
 from pypaimon.filesystem.pyarrow_file_io import PyArrowFileIO
 from pypaimon.api.auth.bearer import BearTokenAuthProvider
 from pypaimon.api.auth.dlf_provider import DLFAuthProvider
-from pypaimon.common.identifier import Identifier, SYSTEM_TABLE_SPLITTER
+from pypaimon.common.identifier import Identifier
 from pypaimon.common.options import Options
 from pypaimon.common.options.config import CatalogOptions, OssOptions
 from pypaimon.common.uri_reader import UriReaderFactory
@@ -273,12 +273,14 @@ def refresh_token(self):
             self.api_instance = RESTApi(self.properties, False)
 
         table_identifier = self.identifier
-        if SYSTEM_TABLE_SPLITTER in self.identifier.get_object_name():
-            base_table = self.identifier.get_object_name().split(SYSTEM_TABLE_SPLITTER)[0]
-            table_identifier = Identifier(
-                database=self.identifier.get_database_name(),
-                object=base_table,
-                branch=self.identifier.get_branch_name())
+        if self.identifier.is_system_table():
+            # Strip the system-table suffix; preserve the branch so the token
+            # request resolves against the correct branch backing.
+            table_identifier = Identifier.create(
+                self.identifier.get_database_name(),
+                self.identifier.get_table_name(),
+                branch=self.identifier.get_branch_name(),
+            )
 
         response = self.api_instance.load_table_token(table_identifier)
         self.log.info(

diff --git a/paimon-python/pypaimon/common/identifier.py b/paimon-python/pypaimon/common/identifier.py
@@ -21,37 +21,93 @@
 from pypaimon.common.json_util import json_field
 
 SYSTEM_TABLE_SPLITTER = '$'
-SYSTEM_BRANCH_PREFIX = 'branch-'
+SYSTEM_BRANCH_PREFIX = 'branch_'
 DEFAULT_MAIN_BRANCH = 'main'
+UNKNOWN_DATABASE = 'unknown'
 
 
-@dataclass
+@dataclass(init=False)
 class Identifier:
+    """Identifies a database object (table, view, etc.).
+
+    1:1 port of ``org.apache.paimon.catalog.Identifier``: the on-the-wire
+    shape is exactly two fields, ``database`` and ``object``. Any branch /
+    system-table portion is encoded into the ``object`` field using the
+    ``$`` separator and the ``branch_`` prefix, so JSON written by Python
+    is round-trippable through the Java REST server (and vice versa).
+
+    Mirrors Java's three public constructors via a single signature:
+      * ``Identifier(database, object)`` — JSON-create form. ``object``
+        is the final, possibly-encoded string.
+      * ``Identifier(database, table, branch=...)`` — encodes ``branch``
+        into ``object``.
+      * ``Identifier(database, table, branch=..., system_table=...)`` —
+        encodes both.
+
+    ``branch == "main"`` (case-insensitive) is treated as the default
+    branch and is not encoded into the object name, matching Java.
+    """
 
     database: str = json_field("database", default=None)
     object: str = json_field("object", default=None)
-    branch: Optional[str] = json_field("branch", default=None)
+
+    def __init__(self, database: str, object: Optional[str] = None,
+                 branch: Optional[str] = None,
+                 system_table: Optional[str] = None):
+        self.database = database
+        if branch is None and system_table is None:
+            # @JsonCreator form: ``object`` is already the final, encoded
+            # string. Components are decoded lazily by _split_object_name().
+            self.object = object
+            self._table: Optional[str] = None
+            self._branch: Optional[str] = None
+            self._system_table: Optional[str] = None
+        else:
+            # Encoding form: ``object`` is the bare table name; encode
+            # branch / system_table into the on-wire ``object``.
+            builder = object
+            if branch is not None and branch.lower() != DEFAULT_MAIN_BRANCH:
+                builder = (builder + SYSTEM_TABLE_SPLITTER
+                           + SYSTEM_BRANCH_PREFIX + branch)
+            if system_table is not None:
+                builder = builder + SYSTEM_TABLE_SPLITTER + system_table
+            self.object = builder
+            self._table = object
+            self._branch = branch
+            self._system_table = system_table
 
     @classmethod
-    def create(cls, database: str, object: str) -> "Identifier":
-        return cls(database, object)
+    def create(cls, database: str, table: str,
+               branch: Optional[str] = None,
+               system_table: Optional[str] = None) -> "Identifier":
+        """Create an Identifier.
+
+        Two-arg form ``create(database, object)`` mirrors Java's
+        ``Identifier.create``: the second argument is treated as the final
+        ``object`` string (may already carry encoded branch / system_table
+        segments).
+
+        Multi-arg form ``create(database, table, branch=..., system_table=...)``
+        is a Python convenience that encodes the components into ``object``
+        for you, equivalent to ``Identifier(database, table, branch=...,
+        system_table=...)``.
+        """
+        return cls(database, table, branch=branch, system_table=system_table)
 
     @classmethod
     def from_string(cls, full_name: str) -> "Identifier":
-        """Parse a 'database.object' identifier, with optional backtick quoting."""
+        """Parse a ``database.object`` identifier, with optional backtick quoting."""
         if not full_name or not full_name.strip():
             raise ValueError("fullName cannot be null or empty")
 
-        # Check if backticks are used - if so, parse with backtick support
         if '`' in full_name:
             return cls._parse_with_backticks(full_name)
 
-        # Otherwise, use Java-compatible split on first period only
         parts = full_name.split(".", 1)
 
         if len(parts) != 2:
             raise ValueError(
-                f"Cannot get splits from '{full_name}' to get database and object"
+                "Cannot get splits from '{}' to get database and object".format(full_name)
             )
 
         return cls(parts[0], parts[1])
@@ -75,41 +131,103 @@ def _parse_with_backticks(cls, full_name: str) -> "Identifier":
             parts.append(current)
 
         if in_backticks:
-            raise ValueError(f"Unclosed backtick in identifier: {full_name}")
+            raise ValueError("Unclosed backtick in identifier: {}".format(full_name))
 
         if len(parts) != 2:
-            raise ValueError(f"Invalid identifier format: {full_name}")
+            raise ValueError("Invalid identifier format: {}".format(full_name))
 
         return cls(parts[0], parts[1])
 
+    def _split_object_name(self) -> None:
+        if self._table is not None:
+            return
+
+        splits = self.object.split(SYSTEM_TABLE_SPLITTER)
+        if len(splits) == 1:
+            self._table = self.object
+            self._branch = None
+            self._system_table = None
+        elif len(splits) == 2:
+            self._table = splits[0]
+            if splits[1].startswith(SYSTEM_BRANCH_PREFIX):
+                self._branch = splits[1][len(SYSTEM_BRANCH_PREFIX):]
+                self._system_table = None
+            else:
+                self._branch = None
+                self._system_table = splits[1]
+        elif len(splits) == 3:
+            if not splits[1].startswith(SYSTEM_BRANCH_PREFIX):
+                raise ValueError(
+                    "System table can only contain one '$' separator, "
+                    "but this is: " + self.object
+                )
+            self._table = splits[0]
+            self._branch = splits[1][len(SYSTEM_BRANCH_PREFIX):]
+            self._system_table = splits[2]
+        else:
+            raise ValueError("Invalid object name: " + self.object)
+
     def get_full_name(self) -> str:
-        if self.branch:
-            return "{}.{}.{}".format(self.database, self.object, self.branch)
+        # Match Java: tables created without an explicit database (e.g. some
+        # ad-hoc query paths) land in the special "unknown" database, in which
+        # case the database segment is dropped from the rendered name.
+        if UNKNOWN_DATABASE == self.database:
+            return self.object
         return "{}.{}".format(self.database, self.object)
 
     def get_database_name(self) -> str:
         return self.database
 
     def get_table_name(self) -> str:
-        return self.object
+        self._split_object_name()
+        return self._table
 
     def get_object_name(self) -> str:
         return self.object
 
     def get_branch_name(self) -> Optional[str]:
-        return self.branch
+        self._split_object_name()
+        return self._branch
 
     def get_branch_name_or_default(self) -> str:
-        """Get branch name or return default 'main' if branch is None."""
-        return self.branch if self.branch else "main"
+        """Get branch name or return ``DEFAULT_MAIN_BRANCH`` if no branch is encoded."""
+        branch = self.get_branch_name()
+        return branch if branch is not None else DEFAULT_MAIN_BRANCH
 
-    def __hash__(self):
-        return hash((self.database, self.object, self.branch))
+    def get_system_table_name(self) -> Optional[str]:
+        self._split_object_name()
+        return self._system_table
 
     def is_system_table(self) -> bool:
-        if SYSTEM_TABLE_SPLITTER not in self.object:
-            return False
-        parts = self.object.split(SYSTEM_TABLE_SPLITTER)
-        if len(parts) == 2:
-            return not parts[1].startswith(SYSTEM_BRANCH_PREFIX)
-        return len(parts) == 3
+        return self.get_system_table_name() is not None
+
+    @property
+    def branch(self) -> Optional[str]:
+        # Read/write alias for callers that previously accessed the
+        # ``Identifier.branch`` dataclass field directly. Java's
+        # ``branch`` is transient/private and not exposed; Python kept
+        # it public, so this property tides external code over.
+        return self.get_branch_name()
+
+    @branch.setter
+    def branch(self, value: Optional[str]) -> None:
+        # Re-encode ``object`` so the wire shape stays consistent with
+        # the new value (equivalent to Identifier(db, table, branch=value,
+        # system_table=current_system_table)).
+        table = self.get_table_name()
+        system_table = self.get_system_table_name()
+        rebuilt = Identifier(
+            self.database, table, branch=value, system_table=system_table
+        )
+        self.object = rebuilt.object
+        self._table = table
+        self._branch = value
+        self._system_table = system_table
+
+    def __hash__(self):
+        return hash((self.database, self.object))
+
+    def __eq__(self, other):
+        if not isinstance(other, Identifier):
+            return NotImplemented
+        return self.database == other.database and self.object == other.object
diff --git a/paimon-python/pypaimon/snapshot/catalog_snapshot_commit.py b/paimon-python/pypaimon/snapshot/catalog_snapshot_commit.py
@@ -20,13 +20,13 @@
 from typing import List
 
 from pypaimon.catalog.catalog import Catalog
-
-logger = logging.getLogger(__name__)
 from pypaimon.common.identifier import Identifier
 from pypaimon.snapshot.snapshot import Snapshot
 from pypaimon.snapshot.snapshot_commit import (PartitionStatistics,
                                                SnapshotCommit)
 
+logger = logging.getLogger(__name__)
+
 
 class CatalogSnapshotCommit(SnapshotCommit):
     """A SnapshotCommit using Catalog to commit."""
@@ -37,20 +37,19 @@ def __init__(self, catalog: Catalog, identifier: Identifier, uuid: str):
 
         Args:
             catalog: The catalog instance to use for committing
-            identifier: The table identifier
+            identifier: The table identifier (already encodes branch in object name)
             uuid: Optional table UUID for verification
         """
         self.catalog = catalog
         self.identifier = identifier
         self.uuid = uuid
 
-    def commit(self, snapshot: Snapshot, branch: str, statistics: List[PartitionStatistics]) -> bool:
+    def commit(self, snapshot: Snapshot, statistics: List[PartitionStatistics]) -> bool:
         """
         Commit the snapshot using the catalog.
 
         Args:
             snapshot: The snapshot to commit
-            branch: The branch name to commit to
             statistics: List of partition statistics
 
         Returns:
@@ -59,17 +58,11 @@ def commit(self, snapshot: Snapshot, branch: str, statistics: List[PartitionStat
         Raises:
             Exception: If commit fails
         """
-        new_identifier = Identifier(
-            database=self.identifier.get_database_name(),
-            object=self.identifier.get_table_name(),
-            branch=branch
-        )
-
         # Call catalog's commit_snapshot method
         if hasattr(self.catalog, 'commit_snapshot'):
-            success = self.catalog.commit_snapshot(new_identifier, self.uuid, snapshot, statistics)
+            success = self.catalog.commit_snapshot(self.identifier, self.uuid, snapshot, statistics)
             if success:
-                logger.info("Catalog snapshot commit succeeded for %s, snapshot id %d", new_identifier, snapshot.id)
+                logger.info("Catalog snapshot commit succeeded for %s, snapshot id %d", self.identifier, snapshot.id)
             return success
         else:
             # Fallback for catalogs that don't support snapshot commits

diff --git a/paimon-python/pypaimon/snapshot/renaming_snapshot_commit.py b/paimon-python/pypaimon/snapshot/renaming_snapshot_commit.py
@@ -48,13 +48,12 @@ def __init__(self, snapshot_manager: SnapshotManager):
         self.snapshot_manager = snapshot_manager
         self.file_io: FileIO = snapshot_manager.file_io
 
-    def commit(self, snapshot: Snapshot, branch: str, statistics: List[PartitionStatistics]) -> bool:
+    def commit(self, snapshot: Snapshot, statistics: List[PartitionStatistics]) -> bool:
         """
         Commit the snapshot using file renaming.
 
         Args:
             snapshot: The snapshot to commit
-            branch: The branch name to commit to
             statistics: List of partition statistics (currently unused but kept for interface compatibility)
 
         Returns:

diff --git a/paimon-python/pypaimon/snapshot/snapshot_commit.py b/paimon-python/pypaimon/snapshot/snapshot_commit.py
@@ -73,13 +73,12 @@ class SnapshotCommit(ABC):
     """Interface to commit snapshot atomically."""
 
     @abstractmethod
-    def commit(self, snapshot: Snapshot, branch: str, statistics: List[PartitionStatistics]) -> bool:
+    def commit(self, snapshot: Snapshot, statistics: List[PartitionStatistics]) -> bool:
         """
         Commit the given snapshot.
 
         Args:
             snapshot: The snapshot to commit
-            branch: The branch name to commit to
             statistics: List of partition statistics
 
         Returns:

diff --git a/paimon-python/pypaimon/table/file_store_table.py b/paimon-python/pypaimon/table/file_store_table.py
@@ -83,8 +83,8 @@ def from_path(cls, table_path: str) -> 'FileStoreTable':
         return cls(file_io, identifier, table_path, table_schema)
 
     def current_branch(self) -> str:
-        """Get the current branch name from options."""
-        return self.options.branch()
+        """Get the current branch name from the identifier."""
+        return self.identifier.get_branch_name_or_default()
 
     def comment(self) -> Optional[str]:
         """Get the table comment."""
@@ -406,8 +406,23 @@ def copy(self, options: dict) -> 'FileStoreTable':
         if time_travel_schema is not None:
             new_table_schema = time_travel_schema
 
-        return FileStoreTable(self.file_io, self.identifier, self.table_path, new_table_schema,
-                              self.catalog_environment)
+        # Re-encode the branch into the identifier when the option changes, so
+        # current_branch() and any catalog-routed snapshot commit see the
+        # branched object name without an extra side channel.
+        new_identifier = self.identifier
+        catalog_env = self.catalog_environment
+        branch_key = CoreOptions.BRANCH.key()
+        if branch_key in options:
+            new_branch = options[branch_key]
+            new_identifier = Identifier.create(
+                self.identifier.get_database_name(),
+                self.identifier.get_table_name(),
+                branch=new_branch,
+            )
+            catalog_env = self.catalog_environment.copy(new_identifier)
+
+        return FileStoreTable(self.file_io, new_identifier, self.table_path, new_table_schema,
+                              catalog_env)
 
     def _try_time_travel(self, options: Options) -> Optional[TableSchema]:
         """