【Android代码】绘本翻页时通过AI识别,自动通过手机/pad朗读绘本

核心功能:

  • 打开摄像头(可支持外接摄像头)
  • 检测翻页(后续考虑添加图像差异算法)
  • 拍照后用 识图
  • 自动用 TextToSpeech 朗读文字内容

📌 说明:

  • 使用了 CameraX(Android Jetpack)处理摄像头输入

  • 使用 ML Kit 做文字识别

  • 使用 TextToSpeech 实现朗读

java 复制代码
// Project: StoryBookReaderApp
// MainActivity.java --- Android Studio 项目主类

package com.example.storybookreaderapp;

import android.Manifest;
import android.content.pm.PackageManager;
import android.os.Bundle;
import android.speech.tts.TextToSpeech;
import android.util.Log;
import android.view.SurfaceView;
import android.widget.Toast;

import androidx.annotation.NonNull;
import androidx.appcompat.app.AppCompatActivity;
import androidx.camera.core.CameraSelector;
import androidx.camera.core.ImageAnalysis;
import androidx.camera.core.ImageCapture;
import androidx.camera.core.ImageCaptureException;
import androidx.camera.core.ImageProxy;
import androidx.camera.lifecycle.ProcessCameraProvider;
import androidx.core.app.ActivityCompat;
import androidx.core.content.ContextCompat;

import com.google.common.util.concurrent.ListenableFuture;
import com.google.mlkit.vision.common.InputImage;
import com.google.mlkit.vision.text.TextRecognition;
import com.google.mlkit.vision.text.TextRecognizer;

import java.util.Locale;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class MainActivity extends AppCompatActivity {
    private static final int REQUEST_CODE_PERMISSIONS = 10;
    private static final String[] REQUIRED_PERMISSIONS = new String[]{Manifest.permission.CAMERA};

    private ExecutorService cameraExecutor;
    private ImageCapture imageCapture;
    private TextToSpeech tts;

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        SurfaceView view = new SurfaceView(this);
        setContentView(view);

        if (allPermissionsGranted()) {
            startCamera();
        } else {
            ActivityCompat.requestPermissions(this, REQUIRED_PERMISSIONS, REQUEST_CODE_PERMISSIONS);
        }

        cameraExecutor = Executors.newSingleThreadExecutor();

        tts = new TextToSpeech(this, status -> {
            if (status != TextToSpeech.ERROR) {
                tts.setLanguage(Locale.US);
            }
        });
    }

    private boolean allPermissionsGranted() {
        for (String permission : REQUIRED_PERMISSIONS) {
            if (ContextCompat.checkSelfPermission(this, permission) != PackageManager.PERMISSION_GRANTED) {
                return false;
            }
        }
        return true;
    }

    private void startCamera() {
        ListenableFuture<ProcessCameraProvider> cameraProviderFuture = ProcessCameraProvider.getInstance(this);
        cameraProviderFuture.addListener(() -> {
            try {
                ProcessCameraProvider cameraProvider = cameraProviderFuture.get();

                imageCapture = new ImageCapture.Builder().build();

                ImageAnalysis imageAnalysis = new ImageAnalysis.Builder().build();
                imageAnalysis.setAnalyzer(cameraExecutor, image -> {
                    detectPageTurn(image);
                    image.close();
                });

                CameraSelector cameraSelector = CameraSelector.DEFAULT_BACK_CAMERA;

                cameraProvider.unbindAll();
                cameraProvider.bindToLifecycle(this, cameraSelector, imageCapture, imageAnalysis);

            } catch (Exception e) {
                Log.e("CameraX", "Binding failed", e);
            }
        }, ContextCompat.getMainExecutor(this));
    }

    private void detectPageTurn(ImageProxy image) {
        takePhotoAndRead();
    }

    private void takePhotoAndRead() {
        imageCapture.takePicture(ContextCompat.getMainExecutor(this), new ImageCapture.OnImageCapturedCallback() {
            @Override
            public void onCaptureSuccess(@NonNull ImageProxy image) {
                InputImage inputImage = InputImage.fromMediaImage(image.getImage(), image.getImageInfo().getRotationDegrees());
                TextRecognizer recognizer = TextRecognition.getClient();

                recognizer.process(inputImage)
                        .addOnSuccessListener(result -> {
                            String text = result.getText();
                            tts.speak(text, TextToSpeech.QUEUE_FLUSH, null, null);
                        })
                        .addOnFailureListener(e -> Toast.makeText(MainActivity.this, "Text recognition failed", Toast.LENGTH_SHORT).show());
                image.close();
            }

            @Override
            public void onError(@NonNull ImageCaptureException exception) {
                Log.e("CameraX", "Capture failed", exception);
            }
        });
    }

    @Override
    protected void onDestroy() {
        if (tts != null) {
            tts.stop();
            tts.shutdown();
        }
        cameraExecutor.shutdown();
        super.onDestroy();
    }

    @Override
    public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions, @NonNull int[] grantResults) {
        if (requestCode == REQUEST_CODE_PERMISSIONS) {
            if (allPermissionsGranted()) {
                startCamera();
            } else {
                Toast.makeText(this, "Permissions not granted", Toast.LENGTH_SHORT).show();
                finish();
            }
        }
    }
} // End of class
相关推荐
m0_650108242 小时前
【论文精读】CMD:迈向高效视频生成的新范式
人工智能·论文精读·视频扩散模型·高效生成·内容 - 运动分解·latent 空间
电鱼智能的电小鱼2 小时前
基于电鱼 AI 工控机的智慧工地视频智能分析方案——边缘端AI检测,实现无人值守下的实时安全预警
网络·人工智能·嵌入式硬件·算法·安全·音视频
年年测试2 小时前
AI驱动的测试:用Dify工作流实现智能缺陷分析与分类
人工智能·分类·数据挖掘
唐兴通个人3 小时前
人工智能Deepseek医药AI培训师培训讲师唐兴通讲课课程纲要
大数据·人工智能
共绩算力4 小时前
Llama 4 Maverick Scout 多模态MoE新里程碑
人工智能·llama·共绩算力
DashVector5 小时前
向量检索服务 DashVector产品计费
数据库·数据仓库·人工智能·算法·向量检索
AI纪元故事会5 小时前
【计算机视觉目标检测算法对比:R-CNN、YOLO与SSD全面解析】
人工智能·算法·目标检测·计算机视觉
音视频牛哥5 小时前
从协议规范和使用场景探讨为什么SmartMediaKit没有支持DASH
人工智能·音视频·大牛直播sdk·dash·dash还是rtmp·dash还是rtsp·dash还是hls
赞奇科技Xsuperzone5 小时前
DGX Spark 实战解析:模型选择与效率优化全指南
大数据·人工智能·gpt·spark·nvidia
音视频牛哥5 小时前
SmartMediaKit:如何让智能系统早人一步“跟上现实”的时间架构--从实时流媒体到系统智能的演进
人工智能·计算机视觉·音视频·音视频开发·具身智能·十五五规划具身智能·smartmediakit