Extractor¶

claude_conversation_extractor.extractor ¶

Core conversation extraction logic.

ConversationExtractor ¶

Extracts conversations from Claude export files using streaming JSON parsing.

Source code in src/claude_conversation_extractor/extractor.py

class ConversationExtractor:
    """Extracts conversations from Claude export files using streaming JSON parsing."""

    def __init__(self, export_file_path: str | Path):
        """Initialize with path to Claude export file.

        Args:
            export_file_path: Path to the JSON export file
        """
        self.export_file_path = Path(export_file_path)

    def stream_conversations(self) -> Iterator[Conversation]:
        """Stream conversations from the export file without loading everything into memory.

        Yields:
            Conversation objects one at a time

        Raises:
            FileNotFoundError: If export file doesn't exist
            ijson.JSONError: If file is not valid JSON
        """
        if not self.export_file_path.exists():
            raise FileNotFoundError(f"Export file not found: {self.export_file_path}")

        with open(self.export_file_path, "rb") as f:
            # Parse conversations array items directly
            conversations = ijson.items(f, "item")

            for conversation_data in conversations:
                try:
                    # Create Conversation object and yield it
                    conversation = Conversation.model_validate(conversation_data)
                    yield conversation
                except Exception as e:
                    # Log validation errors but continue processing
                    print(f"Warning: Skipping invalid conversation: {e}")
                    continue

    def find_conversation(self, uuid: str) -> Conversation | None:
        """Find a conversation by its UUID using streaming.

        Args:
            uuid: The conversation UUID to search for

        Returns:
            The conversation if found, None otherwise
        """
        for conversation in self.stream_conversations():
            if conversation.uuid == uuid:
                return conversation

        return None

    def extract_conversation(self, uuid: str) -> Conversation | None:
        """Extract conversation by UUID using streaming.

        Args:
            uuid: The conversation UUID to extract

        Returns:
            The conversation if found, None otherwise
        """
        return self.find_conversation(uuid)

    def get_conversation_count(self) -> int:
        """Get the total number of conversations in the export file.

        Returns:
            Number of conversations
        """
        count = 0
        for _ in self.stream_conversations():
            count += 1
        return count

    def list_conversations(self, limit: int = 10) -> list[Conversation]:
        """List conversations up to a limit using streaming.

        Args:
            limit: Maximum number of conversations to return

        Returns:
            List of conversations
        """
        conversations = []
        for conversation in self.stream_conversations():
            conversations.append(conversation)
            if len(conversations) >= limit:
                break
        return conversations

init ¶

__init__(export_file_path)

Initialize with path to Claude export file.

Parameters:

Name	Type	Description	Default
`export_file_path`	`str \| Path`	Path to the JSON export file	required

Source code in src/claude_conversation_extractor/extractor.py

def __init__(self, export_file_path: str | Path):
    """Initialize with path to Claude export file.

    Args:
        export_file_path: Path to the JSON export file
    """
    self.export_file_path = Path(export_file_path)

extract_conversation ¶

extract_conversation(uuid)

Extract conversation by UUID using streaming.

Parameters:

Name	Type	Description	Default
`uuid`	`str`	The conversation UUID to extract	required

Returns:

Type	Description
`Conversation \| None`	The conversation if found, None otherwise

Source code in src/claude_conversation_extractor/extractor.py

def extract_conversation(self, uuid: str) -> Conversation | None:
    """Extract conversation by UUID using streaming.

    Args:
        uuid: The conversation UUID to extract

    Returns:
        The conversation if found, None otherwise
    """
    return self.find_conversation(uuid)

find_conversation ¶

find_conversation(uuid)

Find a conversation by its UUID using streaming.

Parameters:

Name	Type	Description	Default
`uuid`	`str`	The conversation UUID to search for	required

Returns:

Type	Description
`Conversation \| None`	The conversation if found, None otherwise

Source code in src/claude_conversation_extractor/extractor.py

def find_conversation(self, uuid: str) -> Conversation | None:
    """Find a conversation by its UUID using streaming.

    Args:
        uuid: The conversation UUID to search for

    Returns:
        The conversation if found, None otherwise
    """
    for conversation in self.stream_conversations():
        if conversation.uuid == uuid:
            return conversation

    return None

get_conversation_count ¶

get_conversation_count()

Get the total number of conversations in the export file.

Returns:

Type	Description
`int`	Number of conversations

Source code in src/claude_conversation_extractor/extractor.py

def get_conversation_count(self) -> int:
    """Get the total number of conversations in the export file.

    Returns:
        Number of conversations
    """
    count = 0
    for _ in self.stream_conversations():
        count += 1
    return count

list_conversations ¶

list_conversations(limit=10)

List conversations up to a limit using streaming.

Parameters:

Name	Type	Description	Default
`limit`	`int`	Maximum number of conversations to return	`10`

Returns:

Type	Description
`list[Conversation]`	List of conversations

Source code in src/claude_conversation_extractor/extractor.py

def list_conversations(self, limit: int = 10) -> list[Conversation]:
    """List conversations up to a limit using streaming.

    Args:
        limit: Maximum number of conversations to return

    Returns:
        List of conversations
    """
    conversations = []
    for conversation in self.stream_conversations():
        conversations.append(conversation)
        if len(conversations) >= limit:
            break
    return conversations

stream_conversations ¶

stream_conversations()

Stream conversations from the export file without loading everything into memory.

Yields:

Type	Description
`Conversation`	Conversation objects one at a time

Raises:

Type	Description
`FileNotFoundError`	If export file doesn't exist
`JSONError`	If file is not valid JSON

Source code in src/claude_conversation_extractor/extractor.py

def stream_conversations(self) -> Iterator[Conversation]:
    """Stream conversations from the export file without loading everything into memory.

    Yields:
        Conversation objects one at a time

    Raises:
        FileNotFoundError: If export file doesn't exist
        ijson.JSONError: If file is not valid JSON
    """
    if not self.export_file_path.exists():
        raise FileNotFoundError(f"Export file not found: {self.export_file_path}")

    with open(self.export_file_path, "rb") as f:
        # Parse conversations array items directly
        conversations = ijson.items(f, "item")

        for conversation_data in conversations:
            try:
                # Create Conversation object and yield it
                conversation = Conversation.model_validate(conversation_data)
                yield conversation
            except Exception as e:
                # Log validation errors but continue processing
                print(f"Warning: Skipping invalid conversation: {e}")
                continue

Extractor¶

claude_conversation_extractor.extractor ¶

ConversationExtractor ¶

__init__ ¶

extract_conversation ¶

find_conversation ¶

get_conversation_count ¶

list_conversations ¶

stream_conversations ¶

init ¶