【Android代码】绘本翻页时通过AI识别,自动通过手机/pad朗读绘本

核心功能:

  • 打开摄像头(可支持外接摄像头)
  • 检测翻页(后续考虑添加图像差异算法)
  • 拍照后用 识图
  • 自动用 TextToSpeech 朗读文字内容

📌 说明:

  • 使用了 CameraX(Android Jetpack)处理摄像头输入

  • 使用 ML Kit 做文字识别

  • 使用 TextToSpeech 实现朗读

java 复制代码
// Project: StoryBookReaderApp
// MainActivity.java --- Android Studio 项目主类

package com.example.storybookreaderapp;

import android.Manifest;
import android.content.pm.PackageManager;
import android.os.Bundle;
import android.speech.tts.TextToSpeech;
import android.util.Log;
import android.view.SurfaceView;
import android.widget.Toast;

import androidx.annotation.NonNull;
import androidx.appcompat.app.AppCompatActivity;
import androidx.camera.core.CameraSelector;
import androidx.camera.core.ImageAnalysis;
import androidx.camera.core.ImageCapture;
import androidx.camera.core.ImageCaptureException;
import androidx.camera.core.ImageProxy;
import androidx.camera.lifecycle.ProcessCameraProvider;
import androidx.core.app.ActivityCompat;
import androidx.core.content.ContextCompat;

import com.google.common.util.concurrent.ListenableFuture;
import com.google.mlkit.vision.common.InputImage;
import com.google.mlkit.vision.text.TextRecognition;
import com.google.mlkit.vision.text.TextRecognizer;

import java.util.Locale;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class MainActivity extends AppCompatActivity {
    private static final int REQUEST_CODE_PERMISSIONS = 10;
    private static final String[] REQUIRED_PERMISSIONS = new String[]{Manifest.permission.CAMERA};

    private ExecutorService cameraExecutor;
    private ImageCapture imageCapture;
    private TextToSpeech tts;

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        SurfaceView view = new SurfaceView(this);
        setContentView(view);

        if (allPermissionsGranted()) {
            startCamera();
        } else {
            ActivityCompat.requestPermissions(this, REQUIRED_PERMISSIONS, REQUEST_CODE_PERMISSIONS);
        }

        cameraExecutor = Executors.newSingleThreadExecutor();

        tts = new TextToSpeech(this, status -> {
            if (status != TextToSpeech.ERROR) {
                tts.setLanguage(Locale.US);
            }
        });
    }

    private boolean allPermissionsGranted() {
        for (String permission : REQUIRED_PERMISSIONS) {
            if (ContextCompat.checkSelfPermission(this, permission) != PackageManager.PERMISSION_GRANTED) {
                return false;
            }
        }
        return true;
    }

    private void startCamera() {
        ListenableFuture<ProcessCameraProvider> cameraProviderFuture = ProcessCameraProvider.getInstance(this);
        cameraProviderFuture.addListener(() -> {
            try {
                ProcessCameraProvider cameraProvider = cameraProviderFuture.get();

                imageCapture = new ImageCapture.Builder().build();

                ImageAnalysis imageAnalysis = new ImageAnalysis.Builder().build();
                imageAnalysis.setAnalyzer(cameraExecutor, image -> {
                    detectPageTurn(image);
                    image.close();
                });

                CameraSelector cameraSelector = CameraSelector.DEFAULT_BACK_CAMERA;

                cameraProvider.unbindAll();
                cameraProvider.bindToLifecycle(this, cameraSelector, imageCapture, imageAnalysis);

            } catch (Exception e) {
                Log.e("CameraX", "Binding failed", e);
            }
        }, ContextCompat.getMainExecutor(this));
    }

    private void detectPageTurn(ImageProxy image) {
        takePhotoAndRead();
    }

    private void takePhotoAndRead() {
        imageCapture.takePicture(ContextCompat.getMainExecutor(this), new ImageCapture.OnImageCapturedCallback() {
            @Override
            public void onCaptureSuccess(@NonNull ImageProxy image) {
                InputImage inputImage = InputImage.fromMediaImage(image.getImage(), image.getImageInfo().getRotationDegrees());
                TextRecognizer recognizer = TextRecognition.getClient();

                recognizer.process(inputImage)
                        .addOnSuccessListener(result -> {
                            String text = result.getText();
                            tts.speak(text, TextToSpeech.QUEUE_FLUSH, null, null);
                        })
                        .addOnFailureListener(e -> Toast.makeText(MainActivity.this, "Text recognition failed", Toast.LENGTH_SHORT).show());
                image.close();
            }

            @Override
            public void onError(@NonNull ImageCaptureException exception) {
                Log.e("CameraX", "Capture failed", exception);
            }
        });
    }

    @Override
    protected void onDestroy() {
        if (tts != null) {
            tts.stop();
            tts.shutdown();
        }
        cameraExecutor.shutdown();
        super.onDestroy();
    }

    @Override
    public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions, @NonNull int[] grantResults) {
        if (requestCode == REQUEST_CODE_PERMISSIONS) {
            if (allPermissionsGranted()) {
                startCamera();
            } else {
                Toast.makeText(this, "Permissions not granted", Toast.LENGTH_SHORT).show();
                finish();
            }
        }
    }
} // End of class
相关推荐
Shawn_Shawn3 小时前
mcp学习笔记(一)-mcp核心概念梳理
人工智能·llm·mcp
33三 三like5 小时前
《基于知识图谱和智能推荐的养老志愿服务系统》开发日志
人工智能·知识图谱
芝士爱知识a5 小时前
【工具推荐】2026公考App横向评测:粉笔、华图与智蛙面试App功能对比
人工智能·软件推荐·ai教育·结构化面试·公考app·智蛙面试app·公考上岸
百锦再5 小时前
React编程高级主题:测试代码
android·前端·javascript·react.js·前端框架·reactjs
腾讯云开发者6 小时前
港科大熊辉|AI时代的职场新坐标——为什么你应该去“数据稀疏“的地方?
人工智能
工程师老罗6 小时前
YoloV1数据集格式转换,VOC XML→YOLOv1张量
xml·人工智能·yolo
2501_916008896 小时前
全面介绍Fiddler、Wireshark、HttpWatch、SmartSniff和firebug抓包工具功能与使用
android·ios·小程序·https·uni-app·iphone·webview
Coder_Boy_7 小时前
技术让开发更轻松的底层矛盾
java·大数据·数据库·人工智能·深度学习
啊森要自信7 小时前
CANN ops-cv:面向计算机视觉的 AI 硬件端高效算子库核心架构与开发逻辑
人工智能·计算机视觉·架构·cann