Python对照片进行文档扫描处理

Tags: /超级猛料/Picture.图形图像编程/ Date Created:

以下代码，对图片进行扫描处理，处理效果如下图

代码实现以下功能（保存为test.py）：

1. 如果命令行指定输入文件，则按指定文件_scanned.png输出，如果命令行指定输入和输出，则按指定的输出文件名输出。同时保存处理后的图片到剪切板。

2. 如果命令行不带任何参数，则读取剪切板，如果剪切板是文件名，则按文件名_scanned.png输出。同时保存处理后的图片到剪切板。

3. 如果剪切板是图片，则处理图片并重新保存到剪切板，并保存为 scanned_from_clipboard.png

使用示例

python test.py input.jpg output.jpg

python test.py input.jpg

python test.py

test.py

from io import BytesIO
import cv2
import numpy as np
from PIL import ImageGrab, Image
import pyperclip
import sys
import os
import win32clipboard

# Step 1: 读取图片
def load_image(image_path):
    image = cv2.imread(image_path)
    orig = image.copy()
    ratio = image.shape[0] / 500.0
    image = cv2.resize(image, (int(image.shape[1] / ratio), 500))
    return image, orig, ratio

# Step 2: 边缘检测
def detect_edges(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (5, 5), 0)
    edged = cv2.Canny(blurred, 50, 150)
    return edged

# Step 3: 找到文档的轮廓
def find_document_contour(edged_image):
    contours, _ = cv2.findContours(edged_image, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]

    for c in contours:
        perimeter = cv2.arcLength(c, True)
        approx = cv2.approxPolyDP(c, 0.02 * perimeter, True)

        if len(approx) == 4:
            return approx
    return None

# Step 4: 透视变换
def four_point_transform(image, points):
    rect = order_points(points)
    (tl, tr, br, bl) = rect

    widthA = np.sqrt(((br[0] - bl[0]) ** 2) + ((br[1] - bl[1]) ** 2))
    widthB = np.sqrt(((tr[0] - tl[0]) ** 2) + ((tr[1] - tl[1]) ** 2))
    maxWidth = max(int(widthA), int(widthB))

    heightA = np.sqrt(((tr[0] - br[0]) ** 2) + ((tr[1] - br[1]) ** 2))
    heightB = np.sqrt(((tl[0] - bl[0]) ** 2) + ((tl[1] - bl[1]) ** 2))
    maxHeight = max(int(heightA), int(heightB))

    dst = np.array([
        [0, 0],
        [maxWidth - 1, 0],
        [maxWidth - 1, maxHeight - 1],
        [0, maxHeight - 1]], dtype="float32")

    M = cv2.getPerspectiveTransform(rect, dst)
    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
    return warped

# 辅助函数：整理轮廓顶点顺序
def order_points(pts):
    rect = np.zeros((4, 2), dtype="float32")
    s = pts.sum(axis=1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]

    diff = np.diff(pts, axis=1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]
    return rect

# Step 5: 图像增强（黑白效果）
def enhance_image(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blurred = cv2.GaussianBlur(gray, (3, 3), 0)
    _, enhanced = cv2.threshold(blurred, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    return enhanced

# Step 6: 更柔和的背景清理
def clean_background(image):
    binary = cv2.adaptiveThreshold(image, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY, 15, 15)
    kernel = np.ones((2, 2), np.uint8)
    cleaned_bg = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel, iterations=1)
    return cleaned_bg

# Step 7: 保留文字，适度平滑
def final_smooth(image):
    smoothed = cv2.medianBlur(image, 3)
    return smoothed

# 处理并保存图像的函数
def process_and_save_image(image, output_path):
    # edged = detect_edges(image)
    # docContour = find_document_contour(edged)

    # if docContour is None:
        # print("未检测到文档轮廓，处理整个图片")
        # warped = image  # 无法检测到轮廓时，直接使用原图
    # else:
        # warped = four_point_transform(image, docContour.reshape(4, 2))

    enhanced_image = enhance_image(image)
    cleaned_background = clean_background(enhanced_image)
    final_image = final_smooth(cleaned_background)

    cv2.imwrite(output_path, final_image)
    print(f"图像处理完成，已保存为 {output_path}")

    return final_image

def save_to_clipboard(image):
    img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    output = BytesIO()
    img.save(output, format='BMP')
    data = output.getvalue()[14:]  # 去掉 BMP 头信息

    win32clipboard.OpenClipboard()
    win32clipboard.EmptyClipboard()
    win32clipboard.SetClipboardData(win32clipboard.CF_DIB, data)
    win32clipboard.CloseClipboard()

# 从剪切板读取图片
def read_image_from_clipboard():
    img = ImageGrab.grabclipboard()
    if isinstance(img, Image.Image):
        return np.array(img)
    return None

# 从剪切板读取文件路径
def read_file_from_clipboard():
    win32clipboard.OpenClipboard()
    try:
        data = win32clipboard.GetClipboardData(win32clipboard.CF_HDROP)
        if data:
            return data[0]  # 只取第一个文件路径
    except TypeError:
        return None
    finally:
        win32clipboard.CloseClipboard()

# 主函数
def main():
    # 检查命令行参数
    if len(sys.argv) == 2:  # 只有一个参数
        input_path = sys.argv[1]
        output_path = os.path.splitext(input_path)[0] + "_scanned.png"
        print(f"处理命令行输入的文件：{input_path}")
        image = cv2.imread(input_path)
        if image is None:
            print(f"无法读取文件：{input_path}")
            return

        processed_image = process_and_save_image(image, output_path)
        save_to_clipboard(processed_image)

    elif len(sys.argv) == 3:  # 输入和输出两个参数
        input_path = sys.argv[1]
        output_path = sys.argv[2]
        print(f"处理命令行输入的文件：{input_path}")
        image = cv2.imread(input_path)
        if image is None:
            print(f"无法读取文件：{input_path}")
            return

        processed_image = process_and_save_image(image, output_path)
        save_to_clipboard(processed_image)

    else:  # 命令行不带任何参数
        clipboard_image = read_image_from_clipboard()
        file_path = read_file_from_clipboard()

        if file_path and os.path.exists(file_path):  # 处理复制的文件
            print(f"从剪切板中检测到文件：{file_path}")
            image = Image.open(file_path)
            image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)  # 转换为 OpenCV 格式
            # image = cv2.imread(file_path)
            if image is None:
                print(f"无法读取文件：{file_path}")
                return
            
            output_path = os.path.splitext(file_path)[0] + "_scanned.png"
            processed_image = process_and_save_image(image, output_path)
            save_to_clipboard(processed_image)

        elif clipboard_image is not None:  # 处理剪切板中的图片
            print("从剪切板中读取到图片")
            processed_image = process_and_save_image(clipboard_image, "scanned_image_from_clipboard.png")
            save_to_clipboard(processed_image)

        else:
            print("剪切板中没有有效的图片或文件，也未指定文件名。")

if __name__ == "__main__":
    main()

上面代码对有些图片处理不理想的，可以弹出窗口让用户选择效果：

from io import BytesIO
import cv2
import numpy as np
from PIL import ImageGrab, Image
import pyperclip
import sys
import os
import win32clipboard
import time

# 调整图片的显示大小，使其适应屏幕或窗口大小
def resize_image_to_fit(image, max_width=1920, max_height=1080):
    h, w = image.shape[:2]
    aspect_ratio = w / h

    if w > max_width or h > max_height:
        if w / max_width > h / max_height:
            new_w = max_width
            new_h = int(new_w / aspect_ratio)
        else:
            new_h = max_height
            new_w = int(new_h * aspect_ratio)
        resized_image = cv2.resize(image, (new_w, new_h))
        return resized_image
    return image

def adjust_gamma(image, gamma=1.0):
    # 建立一个查找表用于Gamma校正
    invGamma = 1.0 / gamma
    table = np.array([(i / 255.0) ** invGamma * 255
                      for i in np.arange(0, 256)]).astype("uint8")
    return cv2.LUT(image, table)

def process_image_v1(image):
    # 转换为灰度图像
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

    # 使用自适应阈值进行二值化处理，去除背景
    binary = cv2.adaptiveThreshold(
        gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 15, 15
    )

    # 使用形态学操作去除噪声，增强文字
    kernel = np.ones((1, 1), np.uint8)
    morph = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)

    # 进行Gamma校正，调整亮度和对比度
    return adjust_gamma(morph, gamma=1.5)

def process_image_v2(image):
    # 1. 高斯差分边缘检测
    gaussian_1 = cv2.GaussianBlur(image, (5, 5), 50)  # Radius 1
    gaussian_2 = cv2.GaussianBlur(image, (5, 5), 0)   # Radius 2
    edge_image = cv2.subtract(gaussian_1, gaussian_2)

    # 2. 反转图像
    inverted_image = cv2.bitwise_not(edge_image)

    # 3. 去除空白区域
    gray_image = cv2.cvtColor(inverted_image, cv2.COLOR_BGR2GRAY)
    _, thresholded = cv2.threshold(gray_image, 30, 255, cv2.THRESH_BINARY)

    # 查找轮廓
    contours, _ = cv2.findContours(thresholded, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    # 创建掩模
    mask = np.zeros_like(gray_image)
    cv2.drawContours(mask, contours, -1, (255), thickness=cv2.FILLED)

    # 应用掩模
    final_image = cv2.bitwise_and(inverted_image, inverted_image, mask=mask)

    # 5. 白平衡（简单方法）
    result = cv2.cvtColor(final_image, cv2.COLOR_BGR2GRAY)
    result = cv2.normalize(result, None, 255, 0, cv2.NORM_MINMAX)

    return result

# 显示两种处理结果，让用户选择
def show_images_and_choose(image1, image2):
    # 缩放图片以适应窗口大小
    image1_resized = resize_image_to_fit(image1)
    image2_resized = resize_image_to_fit(image2)

    combined_image = np.hstack((image1_resized, image2_resized))

    # 创建可缩放的窗口并最大化
    cv2.namedWindow('Select Left or Right Image: Press 1 for Left, 2 for Right', cv2.WINDOW_NORMAL)
    cv2.imshow('Select Left or Right Image: Press 1 for Left, 2 for Right', combined_image)

    key = cv2.waitKey(0)

    if key == ord('1'):
        print("你选择了左边的图像")
        return image1
    elif key == ord('2'):
        print("你选择了右边的图像")
        return image2
    else:
        print("未做选择，默认选择左边的图像")
        return image1

def save_to_clipboard(image):
    img = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    output = BytesIO()
    img.save(output, format='BMP')
    data = output.getvalue()[14:]  # 去掉 BMP 头信息

    win32clipboard.OpenClipboard()
    win32clipboard.EmptyClipboard()
    win32clipboard.SetClipboardData(win32clipboard.CF_DIB, data)
    win32clipboard.CloseClipboard()

# 从剪切板读取图片
def read_image_from_clipboard():
    img = ImageGrab.grabclipboard()
    if isinstance(img, Image.Image):
        return np.array(img)
    return None

# 从剪切板读取文件路径
def read_file_from_clipboard():
    win32clipboard.OpenClipboard()
    try:
        data = win32clipboard.GetClipboardData(win32clipboard.CF_HDROP)
        if data:
            return data[0]  # 只取第一个文件路径
    except TypeError:
        return None
    finally:
        win32clipboard.CloseClipboard()

# 保存图像的函数，支持中文路径
def save_image(image, output_path):
    pil_image = Image.fromarray(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))  # 转换回 PIL.Image
    pil_image.save(output_path)  # 使用 PIL 的 save 方法保存图像

# 主函数
def main():
    if len(sys.argv) >= 2:  # 命令行输入
        input_path = sys.argv[1]
        if len(sys.argv) > 2 and sys.argv[2].strip():
            output_path = sys.argv[2]
        else:
            output_path = os.path.splitext(input_path)[0] + "_scanned.png"
        print(f"处理文件：{input_path}")

        # 使用 PIL.Image.open 打开图片以支持中文路径
        image = Image.open(input_path)
        image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)  # 转换为 OpenCV 格式

        processed_image_v1 = process_image_v1(image)
        processed_image_v2 = process_image_v2(image)

        final_image = show_images_and_choose(processed_image_v1, processed_image_v2)
        save_image(final_image, output_path)
        print(f"已保存图像为 {output_path}")

    else:  # 剪切板输入
        clipboard_image = read_image_from_clipboard()
        file_path = read_file_from_clipboard()

        if file_path and os.path.exists(file_path):
            print(f"从剪切板中检测到文件：{file_path}")

            # 使用 PIL.Image.open 打开图片
            image = Image.open(file_path)
            image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)  # 转换为 OpenCV 格式

            processed_image_v1 = process_image_v1(image)
            processed_image_v2 = process_image_v2(image)

            final_image = show_images_and_choose(processed_image_v1, processed_image_v2)
            save_to_clipboard(final_image)
            print(f"已保存图像到剪切板")

        elif clipboard_image is not None:
            print("从剪切板中读取到图片")
            processed_image_v1 = process_image_v1(clipboard_image)
            processed_image_v2 = process_image_v2(clipboard_image)

            final_image = show_images_and_choose(processed_image_v1, processed_image_v2)
            save_to_clipboard(final_image)
            print("已保存图像到剪切板")

        else:
            print("剪切板中没有有效的图片或文件，也未指定文件名。")

if __name__ == "__main__":
    main()
    time.sleep(3)  # 暂停 5 秒

scan.py (2.4KB)