【Android代码】绘本翻页时通过AI识别,自动通过手机/pad朗读绘本

核心功能:

  • 打开摄像头(可支持外接摄像头)
  • 检测翻页(后续考虑添加图像差异算法)
  • 拍照后用 识图
  • 自动用 TextToSpeech 朗读文字内容

📌 说明:

  • 使用了 CameraX(Android Jetpack)处理摄像头输入

  • 使用 ML Kit 做文字识别

  • 使用 TextToSpeech 实现朗读

java 复制代码
// Project: StoryBookReaderApp
// MainActivity.java --- Android Studio 项目主类

package com.example.storybookreaderapp;

import android.Manifest;
import android.content.pm.PackageManager;
import android.os.Bundle;
import android.speech.tts.TextToSpeech;
import android.util.Log;
import android.view.SurfaceView;
import android.widget.Toast;

import androidx.annotation.NonNull;
import androidx.appcompat.app.AppCompatActivity;
import androidx.camera.core.CameraSelector;
import androidx.camera.core.ImageAnalysis;
import androidx.camera.core.ImageCapture;
import androidx.camera.core.ImageCaptureException;
import androidx.camera.core.ImageProxy;
import androidx.camera.lifecycle.ProcessCameraProvider;
import androidx.core.app.ActivityCompat;
import androidx.core.content.ContextCompat;

import com.google.common.util.concurrent.ListenableFuture;
import com.google.mlkit.vision.common.InputImage;
import com.google.mlkit.vision.text.TextRecognition;
import com.google.mlkit.vision.text.TextRecognizer;

import java.util.Locale;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class MainActivity extends AppCompatActivity {
    private static final int REQUEST_CODE_PERMISSIONS = 10;
    private static final String[] REQUIRED_PERMISSIONS = new String[]{Manifest.permission.CAMERA};

    private ExecutorService cameraExecutor;
    private ImageCapture imageCapture;
    private TextToSpeech tts;

    @Override
    protected void onCreate(Bundle savedInstanceState) {
        super.onCreate(savedInstanceState);
        SurfaceView view = new SurfaceView(this);
        setContentView(view);

        if (allPermissionsGranted()) {
            startCamera();
        } else {
            ActivityCompat.requestPermissions(this, REQUIRED_PERMISSIONS, REQUEST_CODE_PERMISSIONS);
        }

        cameraExecutor = Executors.newSingleThreadExecutor();

        tts = new TextToSpeech(this, status -> {
            if (status != TextToSpeech.ERROR) {
                tts.setLanguage(Locale.US);
            }
        });
    }

    private boolean allPermissionsGranted() {
        for (String permission : REQUIRED_PERMISSIONS) {
            if (ContextCompat.checkSelfPermission(this, permission) != PackageManager.PERMISSION_GRANTED) {
                return false;
            }
        }
        return true;
    }

    private void startCamera() {
        ListenableFuture<ProcessCameraProvider> cameraProviderFuture = ProcessCameraProvider.getInstance(this);
        cameraProviderFuture.addListener(() -> {
            try {
                ProcessCameraProvider cameraProvider = cameraProviderFuture.get();

                imageCapture = new ImageCapture.Builder().build();

                ImageAnalysis imageAnalysis = new ImageAnalysis.Builder().build();
                imageAnalysis.setAnalyzer(cameraExecutor, image -> {
                    detectPageTurn(image);
                    image.close();
                });

                CameraSelector cameraSelector = CameraSelector.DEFAULT_BACK_CAMERA;

                cameraProvider.unbindAll();
                cameraProvider.bindToLifecycle(this, cameraSelector, imageCapture, imageAnalysis);

            } catch (Exception e) {
                Log.e("CameraX", "Binding failed", e);
            }
        }, ContextCompat.getMainExecutor(this));
    }

    private void detectPageTurn(ImageProxy image) {
        takePhotoAndRead();
    }

    private void takePhotoAndRead() {
        imageCapture.takePicture(ContextCompat.getMainExecutor(this), new ImageCapture.OnImageCapturedCallback() {
            @Override
            public void onCaptureSuccess(@NonNull ImageProxy image) {
                InputImage inputImage = InputImage.fromMediaImage(image.getImage(), image.getImageInfo().getRotationDegrees());
                TextRecognizer recognizer = TextRecognition.getClient();

                recognizer.process(inputImage)
                        .addOnSuccessListener(result -> {
                            String text = result.getText();
                            tts.speak(text, TextToSpeech.QUEUE_FLUSH, null, null);
                        })
                        .addOnFailureListener(e -> Toast.makeText(MainActivity.this, "Text recognition failed", Toast.LENGTH_SHORT).show());
                image.close();
            }

            @Override
            public void onError(@NonNull ImageCaptureException exception) {
                Log.e("CameraX", "Capture failed", exception);
            }
        });
    }

    @Override
    protected void onDestroy() {
        if (tts != null) {
            tts.stop();
            tts.shutdown();
        }
        cameraExecutor.shutdown();
        super.onDestroy();
    }

    @Override
    public void onRequestPermissionsResult(int requestCode, @NonNull String[] permissions, @NonNull int[] grantResults) {
        if (requestCode == REQUEST_CODE_PERMISSIONS) {
            if (allPermissionsGranted()) {
                startCamera();
            } else {
                Toast.makeText(this, "Permissions not granted", Toast.LENGTH_SHORT).show();
                finish();
            }
        }
    }
} // End of class
相关推荐
说私域几秒前
AI智能名片链动2+1模式小程序在消费者商家全链路互动中的应用研究
大数据·人工智能·小程序·流量运营·私域运营
千流出海2 分钟前
谷歌和苹果应用商店发现数十款AI去衣应用
人工智能
SelectDB技术团队7 分钟前
上市大模型企业数据基础设施的选择:MiniMax 基于阿里云 SelectDB 版,打造全球统一AI可观测中台
数据库·数据仓库·人工智能·ai·apache
橘子师兄8 分钟前
C++AI大模型接入SDK—Genimi接入封装
c++·人工智能·后端
向上的车轮9 分钟前
飞桨(PaddlePaddle):OCR识别原理
人工智能·ocr·paddlepaddle
njsgcs13 分钟前
langchain框架怎么让 Skills 与 MCP(tools)协同使用
人工智能·langchain·skills
newsxun13 分钟前
申晨案例解析:解码猫王如何从情怀走向现象级品牌的“熊猫罗盘”重塑之路
大数据·人工智能
Yang-Never14 分钟前
Open GL ES -> 应用前后台、Recent切换,SurfaceView纹理贴图闪烁问题分析解决
android·开发语言·kotlin·android studio·贴图
拐爷16 分钟前
Vibe‑coding九阳神功之夯:Git 基础操作,AI 时代的刹车系统(附速查表)
人工智能·git
Ro Jace16 分钟前
Leveraging AI Algorithms for Electronic Warfare Data Processing
人工智能