【Android代码】绘本翻页时通过AI识别,自动通过手机/pad朗读绘本

核心功能:

  • 打开摄像头(可支持外接摄像头)
  • 检测翻页(后续考虑添加图像差异算法)
  • 拍照后用 识图
  • 自动用 TextToSpeech 朗读文字内容

📌 说明:

  • 使用了 CameraX(Android Jetpack)处理摄像头输入

  • 使用 ML Kit 做文字识别

  • 使用 TextToSpeech 实现朗读

java 复制代码
// Project: StoryBookReaderApp
// MainActivity.java --- Android Studio 项目主类

package com.example.storybookreaderapp;

import android.Manifest;
import android.content.pm.PackageManager;
import android.os.Bundle;
import android.speech.tts.TextToSpeech;
import android.util.Log;
import android.view.SurfaceView;
import android.widget.Toast;

import androidx.annotation.NonNull;
import androidx.appcompat.app.AppCompatActivity;
import androidx.camera.core.CameraSelector;
import androidx.camera.core.ImageAnalysis;
import androidx.camera.core.ImageCapture;
import androidx.camera.core.ImageCaptureException;
import androidx.camera.core.ImageProxy;
import androidx.camera.lifecycle.ProcessCameraProvider;
import androidx.core.app.ActivityCompat;
import androidx.core.content.ContextCompat;

import com.google.common.util.concurrent.ListenableFuture;
import com.google.mlkit.vision.common.InputImage;
import com.google.mlkit.vision.text.TextRecognition;
import com.google.mlkit.vision.text.TextRecognizer;

import java.util.Locale;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class MainActivity extends AppCompatActivity {
    private static final int REQUEST_CODE_PERMISSIONS = 10;
    private static final String[] REQUIRED_PERMISSIONS = new String[]{Manifest.permission.CAMERA};

    private ExecutorService cameraExecutor;
    private ImageCapture imageCapture;
    private TextToSpeech tts;

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        SurfaceView view = new SurfaceView(this);
        setContentView(view);

        if (allPermissionsGranted()) {
            startCamera();
        } else {
            ActivityCompat.requestPermissions(this, REQUIRED_PERMISSIONS, REQUEST_CODE_PERMISSIONS);
        }

        cameraExecutor = Executors.newSingleThreadExecutor();

        tts = new TextToSpeech(this, status -> {
            if (status != TextToSpeech.ERROR) {
                tts.setLanguage(Locale.US);
            }
        });
    }

    private boolean allPermissionsGranted() {
        for (String permission : REQUIRED_PERMISSIONS) {
            if (ContextCompat.checkSelfPermission(this, permission) != PackageManager.PERMISSION_GRANTED) {
                return false;
            }
        }
        return true;
    }

    private void startCamera() {
        ListenableFuture<ProcessCameraProvider> cameraProviderFuture = ProcessCameraProvider.getInstance(this);
        cameraProviderFuture.addListener(() -> {
            try {
                ProcessCameraProvider cameraProvider = cameraProviderFuture.get();

                imageCapture = new ImageCapture.Builder().build();

                ImageAnalysis imageAnalysis = new ImageAnalysis.Builder().build();
                imageAnalysis.setAnalyzer(cameraExecutor, image -> {
                    detectPageTurn(image);
                    image.close();
                });

                CameraSelector cameraSelector = CameraSelector.DEFAULT_BACK_CAMERA;

                cameraProvider.unbindAll();
                cameraProvider.bindToLifecycle(this, cameraSelector, imageCapture, imageAnalysis);

            } catch (Exception e) {
                Log.e("CameraX", "Binding failed", e);
            }
        }, ContextCompat.getMainExecutor(this));
    }

    private void detectPageTurn(ImageProxy image) {
        takePhotoAndRead();
    }

    private void takePhotoAndRead() {
        imageCapture.takePicture(ContextCompat.getMainExecutor(this), new ImageCapture.OnImageCapturedCallback() {
            @Override
            public void onCaptureSuccess(@NonNull ImageProxy image) {
                InputImage inputImage = InputImage.fromMediaImage(image.getImage(), image.getImageInfo().getRotationDegrees());
                TextRecognizer recognizer = TextRecognition.getClient();

                recognizer.process(inputImage)
                        .addOnSuccessListener(result -> {
                            String text = result.getText();
                            tts.speak(text, TextToSpeech.QUEUE_FLUSH, null, null);
                        })
                        .addOnFailureListener(e -> Toast.makeText(MainActivity.this, "Text recognition failed", Toast.LENGTH_SHORT).show());
                image.close();
            }

            @Override
            public void onError(@NonNull ImageCaptureException exception) {
                Log.e("CameraX", "Capture failed", exception);
            }
        });
    }

    @Override
    protected void onDestroy() {
        if (tts != null) {
            tts.stop();
            tts.shutdown();
        }
        cameraExecutor.shutdown();
        super.onDestroy();
    }

    @Override
    public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions, @NonNull int[] grantResults) {
        if (requestCode == REQUEST_CODE_PERMISSIONS) {
            if (allPermissionsGranted()) {
                startCamera();
            } else {
                Toast.makeText(this, "Permissions not granted", Toast.LENGTH_SHORT).show();
                finish();
            }
        }
    }
} // End of class
相关推荐
一点一木20 小时前
深度体验TRAE SOLO移动端7天:作为独立开发者,我把工作流揣进了兜里
前端·人工智能·trae
赏金术士21 小时前
Compose 教学项目
android·kotlin·compose
Lee川21 小时前
mini-cursor 揭秘:从 Tool 定义到 Agent 循环的完整实现
前端·人工智能·后端
晓梦林21 小时前
ximai靶场学习笔记
android·笔记·学习
weelinking21 小时前
【产品】00_产品经理用Claude实现产品系列介绍
数据库·人工智能·sql·数据挖掘·github·产品经理
Agent产品评测局21 小时前
制造业模具管理AI系统,主流产品能力对比详解:2026年智能制造选型深度洞察
人工智能·ai·chatgpt·制造
研华科技Advantech1 天前
如何用一套实训设备,打通工业AI预测性维护技术全流程?
人工智能
Lab_AI1 天前
AI for Science: MaXFlow AI Agent+ 报告体验双升级,让AI智能体更高效易用!
人工智能·ai for science·ai agent·ai智能体
李坤1 天前
让 Codex 和 Claude 互相 Review:告别手动复制
人工智能·openai·claude
南屹川1 天前
【API设计】GraphQL实战:从REST到GraphQL的演进
人工智能