Basic Usage

from compressed_lists import CompressedIntegerList, CompressedStringList

# Create a CompressedIntegerList
int_data = [[1, 2, 3], [4, 5], [6, 7, 8, 9]]
names = ["A", "B", "C"]
int_list = CompressedIntegerList.from_list(int_data, names)

# Access elements
print(int_list[0])      # [1, 2, 3]
print(int_list["B"])    # [4, 5]
print(int_list[1:3])    # Slice of elements

# Apply a function to each element
squared = int_list.lapply(lambda x: [i**2 for i in x])
print(squared[0])       # [1, 4, 9]

# Convert to a regular Python list
regular_list = int_list.to_list()

# Create a CompressedStringList
char_data = [["apple", "banana"], ["cherry", "date", "elderberry"], ["fig"]]
char_list = CompressedStringList.from_list(char_data)
[1 2 3]
[4 5]
class: CompressedIntegerList
number of elements: (2) of type: integer
unlist_data: [np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8), np.int64(9)]
partitioning: [(np.int64(0), np.int64(2)), (np.int64(2), np.int64(6))]
element_metadata(0): 
metadata(0): 

[1 4 9]

Partitioning

The Partitioning class handles the information about where each element begins and ends in the concatenated data. It allows for efficient extraction of elements without storing each element separately.

from compressed_lists import Partitioning

# Create partitioning from end positions
ends = [3, 5, 10]
names = ["A", "B", "C"]
part = Partitioning(ends, names)

# Get partition range for an element
start, end = part[1]
print(start, end)
3 5

Creating Custom CompressedList Subclasses

CompressedList can be easily it can be extended to support custom data types. Here’s a step-by-step guide to creating your own CompressedList subclass:

1. Subclass CompressedList

Create a new class that inherits from CompressedList with appropriate type annotations:

from typing import List, TypeVar, Generic
from compressed_lists import CompressedList, Partitioning
import numpy as np

class CustomCompressedList(CompressedList):
    """A custom CompressedList for your data type."""
    pass

2. Implement the Constructor

The constructor should initialize the superclass with the appropriate data:

def __init__(self, 
             unlist_data: Any,  # Replace with your data type 
             partitioning: Partitioning,
             element_metadata: dict = None,
             metadata: dict = None):
    super().__init__(unlist_data, partitioning, 
                    element_type="custom_type",  # Set your element type
                    element_metadata=element_metadata,
                    metadata=metadata)

3. Implement _extract_range Method

This method defines how to extract a range of elements from your unlisted data:

def _extract_range(self, start: int, end: int) -> List[T]:
    """Extract a range from unlisted data."""
    # For example, with numpy arrays:
    return self.unlist_data[start:end].tolist()
    
    # Or for other data types:
    # return self.unlist_data[start:end]

4. Implement from_list Class Method

This factory method creates a new instance from a list:

@classmethod
def from_list(cls, lst: List[List[T]], names: list = None, 
             metadata: dict = None) -> 'CustomCompressedList':
    """Create a new CustomCompressedList from a list."""
    # Flatten the list
    flat_data = []
    for sublist in lst:
        flat_data.extend(sublist)
    
    # Create partitioning
    partitioning = Partitioning.from_list(lst, names)
    
    # Create unlisted data in your preferred format
    # For example, with numpy:
    unlist_data = np.array(flat_data, dtype=np.float64)
    
    return cls(unlist_data, partitioning, metadata=metadata)

Complete Example: CompressedFloatList

Here’s a complete example of a custom CompressedList for floating-point numbers:

import numpy as np
from compressed_lists import CompressedList, Partitioning
from typing import List

class CompressedFloatList(CompressedList):
    def __init__(self, 
                unlist_data: np.ndarray, 
                partitioning: Partitioning,
                element_metadata: dict = None,
                metadata: dict = None):
        super().__init__(unlist_data, partitioning, 
                        element_type="float",
                        element_metadata=element_metadata,
                        metadata=metadata)
    
    def _extract_range(self, start: int, end: int) -> List[float]:
        return self.unlist_data[start:end].tolist()
    
    @classmethod
    def from_list(cls, lst: List[List[float]], names: list = None, 
                 metadata: dict = None) -> 'CompressedFloatList':
        # Flatten the list
        flat_data = []
        for sublist in lst:
            flat_data.extend(sublist)
        
        # Create partitioning
        partitioning = Partitioning.from_list(lst, names)
        
        # Create unlist_data
        unlist_data = np.array(flat_data, dtype=np.float64)
        
        return cls(unlist_data, partitioning, metadata=metadata)

# Usage
float_data = [[1.1, 2.2, 3.3], [4.4, 5.5], [6.6, 7.7, 8.8, 9.9]]
float_list = CompressedFloatList.from_list(float_data, names=["X", "Y", "Z"])
print(float_list["Y"])
[4.4, 5.5]

For More Complex Data Types

For more complex data types, you would follow the same pattern but customize the storage and extraction methods to suit your data.

For example, with a custom object:

class MyObject:
    def __init__(self, value):
        self.value = value

class CompressedMyObjectList(CompressedList[List[MyObject]]):
    # Implementation details...
    
    def _extract_range(self, start: int, end: int) -> List[MyObject]:
        return self.unlist_data[start:end]
    
    @classmethod
    def from_list(cls, lst: List[List[MyObject]], ...):
        # Custom flattening and storage logic
        # ...