From 2bd1393ad5cd09903193ea9686491533f044cf43 Mon Sep 17 00:00:00 2001 From: Bulat Kurbanov Date: Sun, 11 Aug 2024 03:24:45 +0200 Subject: [PATCH] Unit --- README.md | 4 +++ input/.gitkeep | 0 main.py | 68 ++++++++++++++++++++++++++++++++++++++++++++++++ output/.gitkeep | 0 requirements.txt | 1 + run.bat | 2 ++ 6 files changed, 75 insertions(+) create mode 100644 input/.gitkeep create mode 100644 main.py create mode 100644 output/.gitkeep create mode 100644 requirements.txt create mode 100644 run.bat diff --git a/README.md b/README.md index d98546c..d39ca10 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,6 @@ # image_cutter + +## Run + +Open `run.bat` diff --git a/input/.gitkeep b/input/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/main.py b/main.py new file mode 100644 index 0000000..2f11df5 --- /dev/null +++ b/main.py @@ -0,0 +1,68 @@ +from PIL import Image, UnidentifiedImageError +import io + +import os + + +def extract_images(prefix, byte_data): + # JPEG and PNG start and end markers + jpeg_start_marker = b'\xFF\xD8' + jpeg_end_marker = b'\xFF\xD9' + + png_start_marker = b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A' + png_end_marker = b'IEND\xAE\x42\x60\x82' + + images = [] + start = 0 + + while start < len(byte_data): + jpeg_start = byte_data.find(jpeg_start_marker, start) + png_start = byte_data.find(png_start_marker, start) + + # Determine the next image type (JPEG or PNG) and its start position + if (jpeg_start != -1 and (jpeg_start < png_start or png_start == -1)): + # Processing JPEG image + jpeg_end = byte_data.find(jpeg_end_marker, jpeg_start) + len(jpeg_end_marker) + if jpeg_end == -1: + break # No valid end found, likely corrupted + image_data = byte_data[jpeg_start:jpeg_end] + start = jpeg_end # Move start to the end of the current image + elif (png_start != -1 and (png_start < jpeg_start or jpeg_start == -1)): + # Processing PNG image + png_end = byte_data.find(png_end_marker, png_start) + len(png_end_marker) + if png_end == -1: + break # No valid end found, likely corrupted + image_data = byte_data[png_start:png_end] + start = png_end # Move start to the end of the current image + else: + break # No more images found + + try: + # Load the image using PIL to verify it's valid + image = Image.open(io.BytesIO(image_data)) + image.verify() # Verify that it is indeed an image + images.append(image_data) + except (IOError, UnidentifiedImageError): + # Skip this image if it cannot be identified + print(f"Skipping invalid image data at position {start}") + continue + + # Process all extracted images + for idx, image_data in enumerate(images): + # Reload the image (since `verify()` puts the file in an unusable state) + image = Image.open(io.BytesIO(image_data)) + + # Save the image (you can also display it using image.show()) + extension = 'jpg' if image.format == 'JPEG' else 'png' + image.save(f"./output/{prefix}_image_{idx+1}.{extension}") + print(f"Extracted {image.format} image {idx+1}") + + + +files = os.listdir("./input/") + +for file in files: + filename, _ = os.path.splitext(file) + + with open(f"./input/{file}", "rb") as file: + extract_images(filename, file.read()) diff --git a/output/.gitkeep b/output/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3868fb1 --- /dev/null +++ b/requirements.txt @@ -0,0 +1 @@ +pillow diff --git a/run.bat b/run.bat new file mode 100644 index 0000000..f8e7856 --- /dev/null +++ b/run.bat @@ -0,0 +1,2 @@ +pip install -r requirements.txt +python main.py