Build a React Native Document Scanner

In the previous article, we’ve built a document normalization React Native Vision Camera frame processor based on Dynamsoft Document Normalizer SDK. In this article, we are going to use it to build a React Native document scanning demo app.

Preview of the final result:

The app can automatically detect the document and run perspective transformation to get the document image. The image can be saved in several color modes: binary, gray and color.

Other React Native Vision Camera Frame Processor Plugins

Build a React Native Document Scanner

Let’s do this in steps.

New Project

Create a new React Native project with TypeScript support.

npx react-native init DocumentScanner --template react-native-template-typescript

Add Dependencies

  • Install the following packages to use the camera with the document normalization frame processor plugin:

    npm install vision-camera-dynamsoft-document-normalizer react-native-vision-camera@3.9.2 react-native-worklets-core
    

    We also need to update babel.config.js file for the frame processor plugin:

     module.exports = {
       presets: ['module:metro-react-native-babel-preset'],
    +  plugins: [['react-native-worklets-core/plugin']],
     };
    
  • Install @react-navigation/native and dependent packages to add navigation.

    npm install @react-navigation/native @react-navigation/native-stack react-native-safe-area-context react-native-screens
    
  • Install react-native-svg to draw the overlay for detect documents.

    npm install react-native-svg
    
  • Install react-native-share to share the normalized image.

    npm install react-native-share
    
  • Install react-native-simple-radio-button to provide a radio button component.

    npm install react-native-simple-radio-button
    

Add Camera Permission

For Android, add the following to AndroidManifest.xml.

<uses-permission android:name="android.permission.CAMERA" />

For iOS, add the following to Info.plist.

<key>NSCameraUsageDescription</key>
<string>For document scanning</string>
  1. Create a src folder and move App.tsx into it. Replace its content with the following:

    import * as React from 'react';
    import { NavigationContainer } from '@react-navigation/native';
    import { createNativeStackNavigator } from '@react-navigation/native-stack';
    import ScannerScreen from './screens/Scanner';
    import HomeScreen from './screens/Home';
    import ResultViewerScreen from './screens/ResultViewer';
    
    const Stack = createNativeStackNavigator();
    
    export default function App() {
      return (
        <NavigationContainer>
          <Stack.Navigator>
            <Stack.Screen name="Home" component={HomeScreen} />
            <Stack.Screen name="Scanner" component={ScannerScreen} />
            <Stack.Screen name="ResultViewer" component={ResultViewerScreen} />
          </Stack.Navigator>
        </NavigationContainer>
      );
    }
    
  2. Create a screen folder with the following files:

    screens\Home.tsx
    screens\ResultViewer.tsx
    screens\Scanner.tsx
    

    A template of the files looks like this:

    import React from "react";
    import { SafeAreaView, StyleSheet } from "react-native";
    
    export default function HomeScreen({route, navigation}) {
      return (
        <SafeAreaView style={styles.container}>
        </SafeAreaView>
      );
    }
    
    const styles = StyleSheet.create({
      container: {
        flex:1,
      },
    });
    

Next, we are going to implement the three files.

Home Page

In the home page, add a button to navigate to the scanner page.

export default function HomeScreen({route, navigation}) {
  const onPressed = () => {
    navigation.navigate(
      {
        name: "Scanner"
      }
    );
  }

  return (
    <SafeAreaView style={styles.container}>
      <TouchableOpacity
        style={styles.button}
        onPress={() => onPressed()}
      >
        <Text style={styles.buttonText}>Scan Document</Text>
      </TouchableOpacity>
    </SafeAreaView>
  );
}

Scanner Page

  1. In the scanner page, add a camera component first to test if the camera works.

    export default function ScannerScreen({route, navigation}) {
      const camera = useRef<Camera>(null)
      const [hasPermission, setHasPermission] = useState(false);
      const device = useCameraDevice("back");
      const cameraFormat = useCameraFormat(device, [
        { videoResolution: { width: 1280, height: 720 } },
        { fps: 60 }
      ])
      useEffect(() => {
        (async () => {
          const status = await Camera.requestCameraPermission();
          setHasPermission(status === 'granted');
        })();
      }, []);
    
      return (
         <SafeAreaView style={styles.container}>
           {device != null &&
           hasPermission && (
           <>
               <Camera
                 style={StyleSheet.absoluteFill}
                 ref={camera}
                 isActive={true}
                 device={device}
                 format={cameraFormat}
                 pixelFormat='yuv'
               />
           </>)}
         </SafeAreaView>
      );
    }
    
    const styles = StyleSheet.create({
      container: {
        flex: 1
      },
    });
    
  2. Use the document normalization frame processor to detect documents.

    Define the frame processor:

    import * as DDN from "vision-camera-dynamsoft-document-normalizer";
    const frameProcessor = useFrameProcessor((frame) => {
      'worklet'
      const results = DDN.detect(frame);
      console.log(results);
    }, [])
    

    Then pass it to the camera component’s props.

    <Camera
      style={StyleSheet.absoluteFill}
      ref={camera}
      isActive={true}
      device={device}
      format={cameraFormat}
      pixelFormat='yuv'
      frameProcessor={frameProcessor}
    />
    

    Remember to init the license for Dynamsoft Document Normalizer (apply for a trial license).

    let result = await DDN.initLicense("DLS2eyJoYW5kc2hha2VDb2RlIjoiMjAwMDAxLTE2NDk4Mjk3OTI2MzUiLCJvcmdhbml6YXRpb25JRCI6IjIwMDAwMSIsInNlc3Npb25QYXNzd29yZCI6IndTcGR6Vm05WDJrcEQ5YUoifQ==");
    
  3. Draw the overlay for the detected document.

    1. Add an SVG component above the camera component.

      <Svg preserveAspectRatio='xMidYMid slice' style={StyleSheet.absoluteFill} viewBox={viewBox}>
        {pointsText != "default" && (
          <Polygon
            points={pointsText}
            fill="lime"
            stroke="green"
            opacity="0.5"
            strokeWidth="1"
          />
        )}
      </Svg>
      
    2. Create several shared values so that we can pass values from the frame processor to the main function.

      import { Worklets,useSharedValue } from 'react-native-worklets-core';
            
      const detectionResults = useSharedValue([] as DetectedQuadResult[]);
      const frameWidth = useSharedValue(0);
      const frameHeight = useSharedValue(0);
      const frameProcessor = useFrameProcessor((frame) => {
        'worklet'
        if (takenShared.value === false) { //stop detecting when a photo is taken
          runAtTargetFps(3, () => {
            'worklet'
            const results = DDN.detect(frame);
            frameWidth.value = frame.width;
            frameHeight.value = frame.height;
            detectionResults.value = results;
            updateViewBoxJS();
            updatePointsDataJS();
          })
        }
      }, [])
      
    3. Define several functions to update states required to draw the overlay with SVG.

      viewBox for the SVG’s viewBox attribute:

      const updateViewBox = () => {
        const frameSize = getFrameSize();
        setViewBox("0 0 "+frameSize[0]+" "+frameSize[1]);
        console.log("viewBox"+viewBox);
      }
      
      const updateViewBoxJS = Worklets.createRunInJsFn(updateViewBox);
      

      pointsText which is used to represent the polygon SVG element’s location:

      const [pointsText, setPointsText] = useState("default");
      const updatePointsData = () => {
        if (detectionResults.value.length>0) {
          let result = detectionResults.value[0];
          if (result) {
            let location = result.location;
            let pointsData = location.points[0].x + "," + location.points[0].y + " ";
            pointsData = pointsData + location.points[1].x + "," + location.points[1].y +" ";
            pointsData = pointsData + location.points[2].x + "," + location.points[2].y +" ";
            pointsData = pointsData + location.points[3].x + "," + location.points[3].y;
            setPointsText(pointsData);
          }
        }
      }
      
      const updatePointsDataJS = Worklets.createRunInJsFn(updatePointsData);
      
  4. Take a photo if the IoUs (Intersection over Union) of three consecutive polygons are over 90%.

    const previousResults = useRef([] as DetectedQuadResult[]);
    useEffect(() => {
      if (pointsText != "default") {
        checkIfSteady();
      }
    }, [pointsText]);
       
    const checkIfSteady = async () => {
      let result = detectionResults.value[0];
      if (result) {
        if (previousResults.current.length >= 3) {
          if (steady() == true) {
            await takePhoto();
            console.log("steady");
          }else{
            console.log("shift and add result");
            previousResults.current.shift();
            previousResults.current.push(result);
          }
        }else{
          console.log("add result");
          previousResults.current.push(result);
        }
      }
    }
       
    const steady = () => {
      if (previousResults.current[0] && previousResults.current[1] && previousResults.current[2]) {
        let iou1 = intersectionOverUnion(previousResults.current[0].location.points,previousResults.current[1].location.points);
        let iou2 = intersectionOverUnion(previousResults.current[1].location.points,previousResults.current[2].location.points);
        let iou3 = intersectionOverUnion(previousResults.current[0].location.points,previousResults.current[2].location.points);
        if (iou1>0.9 && iou2>0.9 && iou3>0.9) {
          return true;
        }else{
          return false;
        }
      }
      return false;
    }
    

    The takePhoto function:

    const takenShared = useSharedValue(false);
    const [taken,setTaken] = useState(false);
    const photo = useRef<PhotoFile|null>(null);
    const takePhoto = async () => {
      if (camera.current) {
        setTaken(true);
        takenShared.value = true;
        await sleep(1000);
        photo.current = await camera.current.takePhoto();
        if (photo.current) {
          setIsActive(false);
          if (Platform.OS === "android") {
            if (photo.current.metadata && photo.current.metadata.Orientation === 6) {
              console.log("rotate bitmap for Android");
              await DDN.rotateFile(photo.current.path,90);
            }
          }
        }else{
          Alert.alert("","Failed to take a photo");
          setTaken(false);
          takenShared.value = false;
        }
      }
    }
    

    A Utils.tsx is created to store the functions calculating the IoUs between polygons.

    import type { Point, Rect } from "vision-camera-dynamsoft-document-normalizer";
    
    export function intersectionOverUnion(pts1:Point[] ,pts2:Point[]) : number {
      let rect1 = getRectFromPoints(pts1);
      let rect2 = getRectFromPoints(pts2);
      return rectIntersectionOverUnion(rect1, rect2);
    }
    
    function rectIntersectionOverUnion(rect1:Rect, rect2:Rect) : number {
      let leftColumnMax = Math.max(rect1.left, rect2.left);
      let rightColumnMin = Math.min(rect1.right,rect2.right);
      let upRowMax = Math.max(rect1.top, rect2.top);
      let downRowMin = Math.min(rect1.bottom,rect2.bottom);
    
      if (leftColumnMax>=rightColumnMin || downRowMin<=upRowMax){
        return 0;
      }
    
      let s1 = rect1.width*rect1.height;
      let s2 = rect2.width*rect2.height;
      let sCross = (downRowMin-upRowMax)*(rightColumnMin-leftColumnMax);
      return sCross/(s1+s2-sCross);
    }
    
    function getRectFromPoints(points:Point[]) : Rect {
      if (points[0]) {
        let left:number;
        let top:number;
        let right:number;
        let bottom:number;
           
        left = points[0].x;
        top = points[0].y;
        right = 0;
        bottom = 0;
    
        points.forEach(point => {
          left = Math.min(point.x,left);
          top = Math.min(point.y,top);
          right = Math.max(point.x,right);
          bottom = Math.max(point.y,bottom);
        });
    
        let r:Rect = {
          left: left,
          top: top,
          right: right,
          bottom: bottom,
          width: right - left,
          height: bottom - top
        };
           
        return r;
      }else{
        throw new Error("Invalid number of points");
      }
    }
    

After the photo is taken, navigate to the result viewer page.

navigation.navigate(
  {
    params: {photoPath:photo.path},
    name: "ResultViewer"
  }
);

Result Viewer Page

The document in the photo will be cropped as a normalized image and displayed in the result viewer page. It uses an Image component to display the normalized image and three radio buttons to select which color mode to use for the normalization. There is also a share button to share the normalized image.

const radio_props = [
  {label: 'Binary', value: 0 },
  {label: 'Gray', value: 1 },
  {label: 'Color', value: 2 }
];

let normalizedResult:any = {};

export default function ResultViewerScreen({route, navigation}) {
  const [normalizedImagePath, setNormalizedImagePath] = useState<undefined|string>(undefined);

  useEffect(() => {
    normalizedResult = {};
    normalize(0);
  }, []);

  const share = () => {
    console.log("save");
    let options:ShareOptions = {};
    options.url = "file://"+normalizedImagePath;
    Share.open(options);
  }

  const normalize = async (value:number) => {
    if (normalizedResult[value]) {
      setNormalizedImagePath(normalizedResult[value]);
    }else{
      let templateName = "";
      if (value === 0) {
        templateName = "NormalizeDocument_Binary";
      } else if (value === 1) {
        templateName = "NormalizeDocument_Gray";
      } else {
        templateName = "NormalizeDocument_Color";
      }
      let photoPath = route.params.photoPath;
      let results = await DDN.detectFile(photoPath);
      let points = results[0].location.points;
      let detectionResult:DetectedQuadResult = {
        confidenceAsDocumentBoundary:90,
        location:{
          points:[points[0]!,points[1]!,points[2]!,points[3]!]
        }
      }
      let normalizedImageResult = await DDN.normalizeFile(photoPath, detectionResult.location,{saveNormalizationResultAsFile:true},templateName);
      if (normalizedImageResult.imageURL) {
        normalizedResult[value] = normalizedImageResult.imageURL;
        setNormalizedImagePath(normalizedImageResult.imageURL)
      }
    }
  }

  return (
    <SafeAreaView style={styles.container}>
      {normalizedImagePath && (
        <Image
          style={[StyleSheet.absoluteFill,styles.image]}
          source={{uri:"file://"+normalizedImagePath}}
        />
      )}
      <View style={styles.control}>
        <View style={styles.buttonContainer}>
          <TouchableOpacity onPress={share} style={styles.button}>
            <Text style={{fontSize: 15, color: "black", alignSelf: "center"}}>Share</Text>
          </TouchableOpacity>
        </View>
        <View style={styles.radioContainer}>
          <RadioForm
            radio_props={radio_props}
            initial={0}
            formHorizontal={true}
            labelHorizontal={false}
            
            onPress={(value) => {normalize(value)}}
          />
        </View>
        
      </View>
    </SafeAreaView>
  );
}

const styles = StyleSheet.create({
  container: {
    flex:1,
  },
  control:{
    flexDirection:"row",
    position: 'absolute',
    bottom: 0,
    height: "15%",
    width:"100%",
    alignSelf:"flex-start",
    alignItems: 'center',
  },
  radioContainer:{
    flex: 0.7,
    padding: 5,
    margin: 3,
  },
  buttonContainer:{
    flex: 0.3,
    padding: 5,
    margin: 3,
  },
  button: {
    backgroundColor: "ghostwhite",
    borderColor:"black", 
    borderWidth:2, 
    borderRadius:5,
    padding: 8,
    margin: 3,
  },
  image: {
    resizeMode:"contain",
  }
});

Source Code

We’ve now completed the demo. Get the source code and have a try: https://github.com/tony-xlh/react-native-document-scanner