Skip to content

Extractor

claude_conversation_extractor.extractor

Core conversation extraction logic.

ConversationExtractor

Extracts conversations from Claude export files using streaming JSON parsing.

Source code in src/claude_conversation_extractor/extractor.py
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
class ConversationExtractor:
    """Extracts conversations from Claude export files using streaming JSON parsing."""

    def __init__(self, export_file_path: str | Path):
        """Initialize with path to Claude export file.

        Args:
            export_file_path: Path to the JSON export file
        """
        self.export_file_path = Path(export_file_path)

    def stream_conversations(self) -> Iterator[Conversation]:
        """Stream conversations from the export file without loading everything into memory.

        Yields:
            Conversation objects one at a time

        Raises:
            FileNotFoundError: If export file doesn't exist
            ijson.JSONError: If file is not valid JSON
        """
        if not self.export_file_path.exists():
            raise FileNotFoundError(f"Export file not found: {self.export_file_path}")

        with open(self.export_file_path, "rb") as f:
            # Parse conversations array items directly
            conversations = ijson.items(f, "item")

            for conversation_data in conversations:
                try:
                    # Create Conversation object and yield it
                    conversation = Conversation.model_validate(conversation_data)
                    yield conversation
                except Exception as e:
                    # Log validation errors but continue processing
                    print(f"Warning: Skipping invalid conversation: {e}")
                    continue

    def find_conversation(self, uuid: str) -> Conversation | None:
        """Find a conversation by its UUID using streaming.

        Args:
            uuid: The conversation UUID to search for

        Returns:
            The conversation if found, None otherwise
        """
        for conversation in self.stream_conversations():
            if conversation.uuid == uuid:
                return conversation

        return None

    def extract_conversation(self, uuid: str) -> Conversation | None:
        """Extract conversation by UUID using streaming.

        Args:
            uuid: The conversation UUID to extract

        Returns:
            The conversation if found, None otherwise
        """
        return self.find_conversation(uuid)

    def get_conversation_count(self) -> int:
        """Get the total number of conversations in the export file.

        Returns:
            Number of conversations
        """
        count = 0
        for _ in self.stream_conversations():
            count += 1
        return count

    def list_conversations(self, limit: int = 10) -> list[Conversation]:
        """List conversations up to a limit using streaming.

        Args:
            limit: Maximum number of conversations to return

        Returns:
            List of conversations
        """
        conversations = []
        for conversation in self.stream_conversations():
            conversations.append(conversation)
            if len(conversations) >= limit:
                break
        return conversations

__init__

__init__(export_file_path)

Initialize with path to Claude export file.

Parameters:

Name Type Description Default
export_file_path str | Path

Path to the JSON export file

required
Source code in src/claude_conversation_extractor/extractor.py
14
15
16
17
18
19
20
def __init__(self, export_file_path: str | Path):
    """Initialize with path to Claude export file.

    Args:
        export_file_path: Path to the JSON export file
    """
    self.export_file_path = Path(export_file_path)

extract_conversation

extract_conversation(uuid)

Extract conversation by UUID using streaming.

Parameters:

Name Type Description Default
uuid str

The conversation UUID to extract

required

Returns:

Type Description
Conversation | None

The conversation if found, None otherwise

Source code in src/claude_conversation_extractor/extractor.py
64
65
66
67
68
69
70
71
72
73
def extract_conversation(self, uuid: str) -> Conversation | None:
    """Extract conversation by UUID using streaming.

    Args:
        uuid: The conversation UUID to extract

    Returns:
        The conversation if found, None otherwise
    """
    return self.find_conversation(uuid)

find_conversation

find_conversation(uuid)

Find a conversation by its UUID using streaming.

Parameters:

Name Type Description Default
uuid str

The conversation UUID to search for

required

Returns:

Type Description
Conversation | None

The conversation if found, None otherwise

Source code in src/claude_conversation_extractor/extractor.py
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def find_conversation(self, uuid: str) -> Conversation | None:
    """Find a conversation by its UUID using streaming.

    Args:
        uuid: The conversation UUID to search for

    Returns:
        The conversation if found, None otherwise
    """
    for conversation in self.stream_conversations():
        if conversation.uuid == uuid:
            return conversation

    return None

get_conversation_count

get_conversation_count()

Get the total number of conversations in the export file.

Returns:

Type Description
int

Number of conversations

Source code in src/claude_conversation_extractor/extractor.py
75
76
77
78
79
80
81
82
83
84
def get_conversation_count(self) -> int:
    """Get the total number of conversations in the export file.

    Returns:
        Number of conversations
    """
    count = 0
    for _ in self.stream_conversations():
        count += 1
    return count

list_conversations

list_conversations(limit=10)

List conversations up to a limit using streaming.

Parameters:

Name Type Description Default
limit int

Maximum number of conversations to return

10

Returns:

Type Description
list[Conversation]

List of conversations

Source code in src/claude_conversation_extractor/extractor.py
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
def list_conversations(self, limit: int = 10) -> list[Conversation]:
    """List conversations up to a limit using streaming.

    Args:
        limit: Maximum number of conversations to return

    Returns:
        List of conversations
    """
    conversations = []
    for conversation in self.stream_conversations():
        conversations.append(conversation)
        if len(conversations) >= limit:
            break
    return conversations

stream_conversations

stream_conversations()

Stream conversations from the export file without loading everything into memory.

Yields:

Type Description
Conversation

Conversation objects one at a time

Raises:

Type Description
FileNotFoundError

If export file doesn't exist

JSONError

If file is not valid JSON

Source code in src/claude_conversation_extractor/extractor.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
def stream_conversations(self) -> Iterator[Conversation]:
    """Stream conversations from the export file without loading everything into memory.

    Yields:
        Conversation objects one at a time

    Raises:
        FileNotFoundError: If export file doesn't exist
        ijson.JSONError: If file is not valid JSON
    """
    if not self.export_file_path.exists():
        raise FileNotFoundError(f"Export file not found: {self.export_file_path}")

    with open(self.export_file_path, "rb") as f:
        # Parse conversations array items directly
        conversations = ijson.items(f, "item")

        for conversation_data in conversations:
            try:
                # Create Conversation object and yield it
                conversation = Conversation.model_validate(conversation_data)
                yield conversation
            except Exception as e:
                # Log validation errors but continue processing
                print(f"Warning: Skipping invalid conversation: {e}")
                continue