Unit
This commit is contained in:
0
input/.gitkeep
Normal file
0
input/.gitkeep
Normal file
68
main.py
Normal file
68
main.py
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
from PIL import Image, UnidentifiedImageError
|
||||||
|
import io
|
||||||
|
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
|
def extract_images(prefix, byte_data):
|
||||||
|
# JPEG and PNG start and end markers
|
||||||
|
jpeg_start_marker = b'\xFF\xD8'
|
||||||
|
jpeg_end_marker = b'\xFF\xD9'
|
||||||
|
|
||||||
|
png_start_marker = b'\x89\x50\x4E\x47\x0D\x0A\x1A\x0A'
|
||||||
|
png_end_marker = b'IEND\xAE\x42\x60\x82'
|
||||||
|
|
||||||
|
images = []
|
||||||
|
start = 0
|
||||||
|
|
||||||
|
while start < len(byte_data):
|
||||||
|
jpeg_start = byte_data.find(jpeg_start_marker, start)
|
||||||
|
png_start = byte_data.find(png_start_marker, start)
|
||||||
|
|
||||||
|
# Determine the next image type (JPEG or PNG) and its start position
|
||||||
|
if (jpeg_start != -1 and (jpeg_start < png_start or png_start == -1)):
|
||||||
|
# Processing JPEG image
|
||||||
|
jpeg_end = byte_data.find(jpeg_end_marker, jpeg_start) + len(jpeg_end_marker)
|
||||||
|
if jpeg_end == -1:
|
||||||
|
break # No valid end found, likely corrupted
|
||||||
|
image_data = byte_data[jpeg_start:jpeg_end]
|
||||||
|
start = jpeg_end # Move start to the end of the current image
|
||||||
|
elif (png_start != -1 and (png_start < jpeg_start or jpeg_start == -1)):
|
||||||
|
# Processing PNG image
|
||||||
|
png_end = byte_data.find(png_end_marker, png_start) + len(png_end_marker)
|
||||||
|
if png_end == -1:
|
||||||
|
break # No valid end found, likely corrupted
|
||||||
|
image_data = byte_data[png_start:png_end]
|
||||||
|
start = png_end # Move start to the end of the current image
|
||||||
|
else:
|
||||||
|
break # No more images found
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Load the image using PIL to verify it's valid
|
||||||
|
image = Image.open(io.BytesIO(image_data))
|
||||||
|
image.verify() # Verify that it is indeed an image
|
||||||
|
images.append(image_data)
|
||||||
|
except (IOError, UnidentifiedImageError):
|
||||||
|
# Skip this image if it cannot be identified
|
||||||
|
print(f"Skipping invalid image data at position {start}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Process all extracted images
|
||||||
|
for idx, image_data in enumerate(images):
|
||||||
|
# Reload the image (since `verify()` puts the file in an unusable state)
|
||||||
|
image = Image.open(io.BytesIO(image_data))
|
||||||
|
|
||||||
|
# Save the image (you can also display it using image.show())
|
||||||
|
extension = 'jpg' if image.format == 'JPEG' else 'png'
|
||||||
|
image.save(f"./output/{prefix}_image_{idx+1}.{extension}")
|
||||||
|
print(f"Extracted {image.format} image {idx+1}")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
files = os.listdir("./input/")
|
||||||
|
|
||||||
|
for file in files:
|
||||||
|
filename, _ = os.path.splitext(file)
|
||||||
|
|
||||||
|
with open(f"./input/{file}", "rb") as file:
|
||||||
|
extract_images(filename, file.read())
|
||||||
0
output/.gitkeep
Normal file
0
output/.gitkeep
Normal file
1
requirements.txt
Normal file
1
requirements.txt
Normal file
@@ -0,0 +1 @@
|
|||||||
|
pillow
|
||||||
Reference in New Issue
Block a user