Fujitsu ScanSnap: Difference between revisions

From Wiki
No edit summary
 
(52 intermediate revisions by the same user not shown)
Line 1: Line 1:
== Linux Scan Server ==
== Prepare Raspberry OS ==
* Scan Button Daemon
<pre>
apt update
apt full-upgrade
apt install aptitude sane-utils img2pdf
</pre>
 
* set /tmp to ram in /etc/fstab:
<pre>
tmpfs    /tmp    tmpfs    defaults,noatime,nosuid 0 0
</pre>
 
* create folders
<pre>
mkdir /srv/scanfolder_combined
mkdir /srv/scanfolder_ocred
mkdir /srv/scanfolder_uploaded
</pre>
 
== Test Scanner Connection ==
<pre>
lsusb
sane-find-scanner
scanimage -L
</pre>
 
== Scan Button Daemon ==
* Install via apt:
<pre>
<pre>
apt install scanbd
apt install scanbd
</pre>
</pre>


* OCRmyPDF via apt
* change /etc/scanbd/dll.conf to fix kernel message:
<pre>
#canon_pp
fujitsu
plustek_pp
</pre>
 
* Test:
<pre>
systemctl -t service
tail -F /var/log/syslog
</pre>
 
* /etc/scanbd/scanbd.conf
<pre>
action scan {
                filter = "^scan.*"
                numerical-trigger {
                        from-value = 1
                        to-value  = 0
                }
                desc  = "Scan to file"
                script = "/srv/scan.script"
 
include(scanner.d/fujitsu.conf)
</pre>
 
* /srv/scan.script
<pre>
#!/bin/bash
 
TMP_DIR=$(mktemp -d)
OUT_DIR=/srv/scanfolder_combined
TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S)
cd $TMP_DIR
 
scanimage --batch="$TMP_DIR/scan_%03d.tiff" --format=tiff \
          --resolution 300 --mode color --source "ADF Duplex" \
          --brightness 25 --contrast 15 \
          --page-width 210 --page-height 297 -x 210 -y 297 \
          --device-name='fujitsu:ScanSnap S1500:74986'
# --mode gray | color
 
echo "convert to pdf..."
img2pdf --pagesize A4 *.tiff --output ${OUT_DIR}/${TIMESTAMP}.pdf
 
rm *.tiff
</pre>
 
== Testing manual scans ==
* disable scanbd to test manual scanning
<pre>
systemctl stop scanbd
</pre>
* simple scan
<pre>
scanimage --batch="/srv/scan_%03d.pnm" --format=pnm --resolution 300 --mode Color --source "ADF Duplex"
</pre>
 
== OCRmyPDF via pip3 ==
* install
<pre>
sudo apt install ghostscript libxml2 tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu pngquant unpaper leptonica-progs libleptonica-dev automake libtool zlib1g-dev libjpeg-dev python3 python3-pip libxml2-dev libxslt1-dev libffi-dev git
 
git clone https://github.com/agl/jbig2enc
cd jbig2enc
./autogen.sh
./configure && make
sudo make install
cd ..
 
git clone https://github.com/qpdf/qpdf
cd qpdf
./configure && make
sudo make install
sudo ldconfig
cd ..
 
sudo pip3 install --upgrade pip
sudo pip3 install pybind11
sudo pip3 install ocrmypdf
</pre>
 
* processing pdf files that appear in folder
** /srv/daemon_ocr.sh:
<pre>
#!/bin/bash
 
IN_DIR=/srv/scanfolder_combined
OUT_DIR=/srv/scanfolder_ocred
 
 
cd $IN_DIR
 
while true
do
    FILE_TO_PROCESS=$(ls -1 -t *.pdf | head -1)
    if [[ -z $FILE_TO_PROCESS ]]; then
        echo "no files found"
        sleep 15
        continue
    fi
 
    echo "-----> processing : "$FILE_TO_PROCESS
    ocrmypdf --output-type 'pdfa' \
            --rotate-pages --deskew --clean-final --optimize 3 \
            --language 'deu+eng' \
            $FILE_TO_PROCESS $OUT_DIR/$FILE_TO_PROCESS
 
    if [[ -f "$OUT_DIR/$FILE_TO_PROCESS" ]]; then
        echo "ocr file was successfully created, deleting input file"
        rm $FILE_TO_PROCESS
    fi
 
    sleep 5
done
</pre>
 
== upload to nextcloud ==
* /srv/daemon_upload.sh
<pre>
<pre>
apt install ocrmypdf
#!/bin/bash
 
IN_DIR=/srv/scanfolder_ocred
OUT_DIR=/srv/scanfolder_uploaded
 
cd $IN_DIR


# ocrmypdf on ubuntu 20.04 installs also the following dependencies:
while true
apt install fontconfig fontconfig-config fonts-dejavu-core fonts-urw-base35 ghostscript icc-profiles-free libarchive13 libavahi-client3 libavahi-common-data libavahi-common3 libcairo2 libcups2 libdatrie1 libfontconfig1 libfreetype6 libfribidi0 libgif7 libgraphite2-3 libgs9 libgs9-common libharfbuzz0b libidn11 libijs-0.35 libimagequant0 libjbig0 libjbig2dec0 libjpeg-turbo8 libjpeg8 liblcms2-2 liblept5 libopenjp2-7 libpango-1.0-0 libpangocairo-1.0-0 libpangoft2-1.0-0 libpaper1 libpixman-1-0 libqpdf26 libtesseract4 libthai-data libthai0 libtiff5 libwebp6 libwebpdemux2 libwebpmux3 libx11-6 libx11-data libxau6 libxcb-render0 libxcb-shm0 libxcb1 libxdmcp6 libxext6 libxrender1 libxslt1.1 ocrmypdf poppler-data python3-cffi-backend python3-img2pdf python3-lxml python3-pdfminer python3-pikepdf python3-pil python3-reportlab python3-reportlab-accel python3-sortedcontainers python3-tqdm tesseract-ocr tesseract-ocr-eng tesseract-ocr-osd
do
    FILE_TO_PROCESS=$(ls -1 -t *.pdf | head -1)
    if [[ -z $FILE_TO_PROCESS ]]; then
        echo "no files found"
        sleep 15
        continue
    fi
 
    echo "-----> processing : "$FILE_TO_PROCESS
 
    curl -X PUT "https://nextcloud.domain.com/remote.php/webdav/00_Document Archive/00_New Scans/" -T $FILE_TO_PROCESS -u user:pass
    if [[ $? == "0" ]]; then
        echo "upload successful, moving file to out folder"
        mv $FILE_TO_PROCESS $OUT_DIR/$FILE_TO_PROCESS
    fi
 
    sleep 5
done
</pre>
</pre>


* OCRmyPDF via pip3
* autostart daemons via /etc/rc.local
<pre>
<pre>
python3 -m pip3 install --upgrade pip
sleep 30
apt install python3-pip python3-setuptools python3-wheel python3-lxml python3-dev
apt install ghostscript tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu tesseract-ocr-osd libqpdf26


./srv/daemon_ocr.sh &
./srv/daemon_upload.sh &
</pre>
</pre>


== Links ==
* https://ocrmypdf.readthedocs.io/en/latest/cookbook.html
* https://askubuntu.com/questions/246647/convert-a-directory-of-jpeg-files-to-a-single-pdf-document
* https://superuser.com/questions/104656/convert-a-pdf-to-greyscale-on-the-command-line-in-floss
* https://unix.stackexchange.com/questions/93959/how-to-convert-a-color-pdf-to-black-white
* https://superuser.com/questions/508472/how-to-recognize-black-and-white-images


== Windows: Deactivate ScanSnap folder ==
== Windows: Deactivate ScanSnap folder ==

Latest revision as of 10:45, 4 July 2020

Prepare Raspberry OS

apt update
apt full-upgrade
apt install aptitude sane-utils img2pdf
  • set /tmp to ram in /etc/fstab:
tmpfs     /tmp    tmpfs    defaults,noatime,nosuid 0 0
  • create folders
mkdir /srv/scanfolder_combined
mkdir /srv/scanfolder_ocred
mkdir /srv/scanfolder_uploaded

Test Scanner Connection

lsusb
sane-find-scanner
scanimage -L

Scan Button Daemon

  • Install via apt:
apt install scanbd
  • change /etc/scanbd/dll.conf to fix kernel message:
#canon_pp
fujitsu
plustek_pp
  • Test:
systemctl -t service
tail -F /var/log/syslog
  • /etc/scanbd/scanbd.conf
 action scan {
                filter = "^scan.*"
                numerical-trigger {
                        from-value = 1
                        to-value   = 0
                }
                desc   = "Scan to file"
                script = "/srv/scan.script"

include(scanner.d/fujitsu.conf)
  • /srv/scan.script
#!/bin/bash

TMP_DIR=$(mktemp -d)
OUT_DIR=/srv/scanfolder_combined
TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S)
cd $TMP_DIR

scanimage --batch="$TMP_DIR/scan_%03d.tiff" --format=tiff \
          --resolution 300 --mode color --source "ADF Duplex" \
          --brightness 25 --contrast 15 \
          --page-width 210 --page-height 297 -x 210 -y 297 \
          --device-name='fujitsu:ScanSnap S1500:74986'
# --mode gray | color

echo "convert to pdf..."
img2pdf --pagesize A4 *.tiff --output ${OUT_DIR}/${TIMESTAMP}.pdf

rm *.tiff

Testing manual scans

  • disable scanbd to test manual scanning
systemctl stop scanbd
  • simple scan
scanimage --batch="/srv/scan_%03d.pnm" --format=pnm --resolution 300 --mode Color --source "ADF Duplex"

OCRmyPDF via pip3

  • install
sudo apt install ghostscript libxml2 tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu pngquant unpaper leptonica-progs libleptonica-dev automake libtool zlib1g-dev libjpeg-dev python3 python3-pip libxml2-dev libxslt1-dev libffi-dev git

git clone https://github.com/agl/jbig2enc
cd jbig2enc
./autogen.sh
./configure && make
sudo make install
cd .. 

git clone https://github.com/qpdf/qpdf 
cd qpdf
./configure && make
sudo make install
sudo ldconfig
cd ..

sudo pip3 install --upgrade pip
sudo pip3 install pybind11
sudo pip3 install ocrmypdf
  • processing pdf files that appear in folder
    • /srv/daemon_ocr.sh:
#!/bin/bash

IN_DIR=/srv/scanfolder_combined
OUT_DIR=/srv/scanfolder_ocred


cd $IN_DIR

while true
do
    FILE_TO_PROCESS=$(ls -1 -t *.pdf | head -1)
    if [[ -z $FILE_TO_PROCESS ]]; then
        echo "no files found"
        sleep 15
        continue
    fi

    echo "-----> processing : "$FILE_TO_PROCESS
    ocrmypdf --output-type 'pdfa' \
             --rotate-pages --deskew --clean-final --optimize 3 \
             --language 'deu+eng' \
             $FILE_TO_PROCESS $OUT_DIR/$FILE_TO_PROCESS

    if [[ -f "$OUT_DIR/$FILE_TO_PROCESS" ]]; then
        echo "ocr file was successfully created, deleting input file"
        rm $FILE_TO_PROCESS
    fi

    sleep 5
done

upload to nextcloud

  • /srv/daemon_upload.sh
#!/bin/bash

IN_DIR=/srv/scanfolder_ocred
OUT_DIR=/srv/scanfolder_uploaded

cd $IN_DIR

while true
do
    FILE_TO_PROCESS=$(ls -1 -t *.pdf | head -1)
    if [[ -z $FILE_TO_PROCESS ]]; then
        echo "no files found"
        sleep 15
        continue
    fi

    echo "-----> processing : "$FILE_TO_PROCESS

    curl -X PUT "https://nextcloud.domain.com/remote.php/webdav/00_Document Archive/00_New Scans/" -T $FILE_TO_PROCESS -u user:pass
    if [[ $? == "0" ]]; then
        echo "upload successful, moving file to out folder"
        mv $FILE_TO_PROCESS $OUT_DIR/$FILE_TO_PROCESS
    fi

    sleep 5
done
  • autostart daemons via /etc/rc.local
sleep 30

./srv/daemon_ocr.sh &
./srv/daemon_upload.sh &


Links

Windows: Deactivate ScanSnap folder

  • regsvr32 /u "C:\Program Files (x86)\PFU\ScanSnap\SSFolder\SSFolder.dll"