from PIL import Image, UnidentifiedImageError
import io

import os


def extract_images(prefix, byte_data):
    # JPEG and PNG start and end markers
    jpeg_start_marker = b'\xFF\xD8'
    jpeg_end_marker = b'\xFF\xD9'

    png_start_marker = b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A'
    png_end_marker = b'IEND\xAE\x42\x60\x82'

    images = []
    start = 0

    while start < len(byte_data):
        jpeg_start = byte_data.find(jpeg_start_marker, start)
        png_start = byte_data.find(png_start_marker, start)

        # Determine the next image type (JPEG or PNG) and its start position
        if (jpeg_start != -1 and (jpeg_start < png_start or png_start == -1)):
            # Processing JPEG image
            jpeg_end = byte_data.find(jpeg_end_marker, jpeg_start) + len(jpeg_end_marker)
            if jpeg_end == -1:
                break  # No valid end found, likely corrupted
            image_data = byte_data[jpeg_start:jpeg_end]
            start = jpeg_end  # Move start to the end of the current image
        elif (png_start != -1 and (png_start < jpeg_start or jpeg_start == -1)):
            # Processing PNG image
            png_end = byte_data.find(png_end_marker, png_start) + len(png_end_marker)
            if png_end == -1:
                break  # No valid end found, likely corrupted
            image_data = byte_data[png_start:png_end]
            start = png_end  # Move start to the end of the current image
        else:
            break  # No more images found

        try:
            # Load the image using PIL to verify it's valid
            image = Image.open(io.BytesIO(image_data))
            image.verify()  # Verify that it is indeed an image
            images.append(image_data)
        except (IOError, UnidentifiedImageError):
            # Skip this image if it cannot be identified
            print(f"Skipping invalid image data at position {start}")
            continue

    # Process all extracted images
    for idx, image_data in enumerate(images):
        # Reload the image (since `verify()` puts the file in an unusable state)
        image = Image.open(io.BytesIO(image_data))

        # Save the image (you can also display it using image.show())
        extension = 'jpg' if image.format == 'JPEG' else 'png'
        image.save(f"./output/{prefix}_image_{idx+1}.{extension}")
        print(f"Extracted {image.format} image {idx+1}")


files = os.listdir("./input/")

for file in files:
    filename, _ = os.path.splitext(file)

    with open(f"./input/{file}", "rb") as file:
        extract_images(filename, file.read())