Fujitsu ScanSnap: Difference between revisions

From Wiki
 
(25 intermediate revisions by the same user not shown)
Line 3: Line 3:
apt update
apt update
apt full-upgrade
apt full-upgrade
apt install aptitude sane-utils
apt install aptitude sane-utils img2pdf
</pre>
 
* set /tmp to ram in /etc/fstab:
<pre>
tmpfs    /tmp    tmpfs    defaults,noatime,nosuid 0 0
</pre>
 
* create folders
<pre>
mkdir /srv/scanfolder_combined
mkdir /srv/scanfolder_ocred
mkdir /srv/scanfolder_uploaded
</pre>
</pre>


Line 14: Line 26:


== Scan Button Daemon ==
== Scan Button Daemon ==
* Install via apt
* Install via apt:
<pre>
<pre>
apt install scanbd
apt install scanbd
Line 24: Line 36:
fujitsu
fujitsu
plustek_pp
plustek_pp
</pre>
* Test:
<pre>
systemctl -t service
tail -F /var/log/syslog
</pre>
</pre>


Line 35: Line 53:
                 }
                 }
                 desc  = "Scan to file"
                 desc  = "Scan to file"
                 script = "/srv/scanbd/scanadf.script"
                 script = "/srv/scan.script"


include(scanner.d/fujitsu.conf)
include(scanner.d/fujitsu.conf)
</pre>
* /srv/scan.script
<pre>
#!/bin/bash
TMP_DIR=$(mktemp -d)
OUT_DIR=/srv/scanfolder_combined
TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S)
cd $TMP_DIR
scanimage --batch="$TMP_DIR/scan_%03d.tiff" --format=tiff \
          --resolution 300 --mode color --source "ADF Duplex" \
          --brightness 25 --contrast 15 \
          --page-width 210 --page-height 297 -x 210 -y 297 \
          --device-name='fujitsu:ScanSnap S1500:74986'
# --mode gray | color
echo "convert to pdf..."
img2pdf --pagesize A4 *.tiff --output ${OUT_DIR}/${TIMESTAMP}.pdf
rm *.tiff
</pre>
== Testing manual scans ==
* disable scanbd to test manual scanning
<pre>
systemctl stop scanbd
</pre>
* simple scan
<pre>
scanimage --batch="/srv/scan_%03d.pnm" --format=pnm --resolution 300 --mode Color --source "ADF Duplex"
</pre>
</pre>


== OCRmyPDF via pip3 ==
== OCRmyPDF via pip3 ==
* install
<pre>
<pre>
sudo apt update
sudo apt install ghostscript libxml2 tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu pngquant unpaper leptonica-progs libleptonica-dev automake libtool zlib1g-dev libjpeg-dev python3 python3-pip libxml2-dev libxslt1-dev libffi-dev git
sudo apt full-upgrade
sudo apt install ghostscript libxml2 tesseract-ocr tesseract-ocr-deu pngquant unpaper leptonica-progs libleptonica-dev automake libtool zlib1g-dev libjpeg-dev python3 python3-pip libxml2-dev libxslt1-dev libffi-dev git


git clone https://github.com/agl/jbig2enc
git clone https://github.com/agl/jbig2enc
Line 65: Line 114:
</pre>
</pre>


* processing pdf files that appear in folder
** /srv/daemon_ocr.sh:
<pre>
#!/bin/bash


== Links ==
IN_DIR=/srv/scanfolder_combined
* https://ocrmypdf.readthedocs.io/en/latest/cookbook.html
OUT_DIR=/srv/scanfolder_ocred
 
 
cd $IN_DIR
 
while true
do
    FILE_TO_PROCESS=$(ls -1 -t *.pdf | head -1)
    if [[ -z $FILE_TO_PROCESS ]]; then
        echo "no files found"
        sleep 15
        continue
    fi
 
    echo "-----> processing : "$FILE_TO_PROCESS
    ocrmypdf --output-type 'pdfa' \
            --rotate-pages --deskew --clean-final --optimize 3 \
            --language 'deu+eng' \
            $FILE_TO_PROCESS $OUT_DIR/$FILE_TO_PROCESS
 
    if [[ -f "$OUT_DIR/$FILE_TO_PROCESS" ]]; then
        echo "ocr file was successfully created, deleting input file"
        rm $FILE_TO_PROCESS
    fi
 
    sleep 5
done
</pre>
 
== upload to nextcloud ==
* /srv/daemon_upload.sh
<pre>
#!/bin/bash
 
IN_DIR=/srv/scanfolder_ocred
OUT_DIR=/srv/scanfolder_uploaded
 
cd $IN_DIR
 
while true
do
    FILE_TO_PROCESS=$(ls -1 -t *.pdf | head -1)
    if [[ -z $FILE_TO_PROCESS ]]; then
        echo "no files found"
        sleep 15
        continue
    fi
 
    echo "-----> processing : "$FILE_TO_PROCESS
 
    curl -X PUT "https://nextcloud.domain.com/remote.php/webdav/00_Document Archive/00_New Scans/" -T $FILE_TO_PROCESS -u user:pass
    if [[ $? == "0" ]]; then
        echo "upload successful, moving file to out folder"
        mv $FILE_TO_PROCESS $OUT_DIR/$FILE_TO_PROCESS
    fi
 
    sleep 5
done
</pre>
 
* autostart daemons via /etc/rc.local
<pre>
sleep 30


== Troubleshooting Links ==
./srv/daemon_ocr.sh &
* https://bugs.launchpad.net/ubuntu/+source/scanbd/+bug/1747115
./srv/daemon_upload.sh &
** User A
</pre>
*** /etc/dbus-1/system.d/scanbd_dbus.conf (user -> root)
*** /lib/systemd/system/scanbm@.service (user -> root)
*** /etc/scanbd/scanbd.conf (user -> root)
** User B
*** /lib/udev/rules.d/99-saned.rules (ENV{libsane_matched}=="yes", GROUP="scanner")




== Links ==
* https://ocrmypdf.readthedocs.io/en/latest/cookbook.html
* https://askubuntu.com/questions/246647/convert-a-directory-of-jpeg-files-to-a-single-pdf-document
* https://superuser.com/questions/104656/convert-a-pdf-to-greyscale-on-the-command-line-in-floss
* https://unix.stackexchange.com/questions/93959/how-to-convert-a-color-pdf-to-black-white
* https://superuser.com/questions/508472/how-to-recognize-black-and-white-images


== Windows: Deactivate ScanSnap folder ==
== Windows: Deactivate ScanSnap folder ==

Latest revision as of 10:45, 4 July 2020

Prepare Raspberry OS

apt update
apt full-upgrade
apt install aptitude sane-utils img2pdf
  • set /tmp to ram in /etc/fstab:
tmpfs     /tmp    tmpfs    defaults,noatime,nosuid 0 0
  • create folders
mkdir /srv/scanfolder_combined
mkdir /srv/scanfolder_ocred
mkdir /srv/scanfolder_uploaded

Test Scanner Connection

lsusb
sane-find-scanner
scanimage -L

Scan Button Daemon

  • Install via apt:
apt install scanbd
  • change /etc/scanbd/dll.conf to fix kernel message:
#canon_pp
fujitsu
plustek_pp
  • Test:
systemctl -t service
tail -F /var/log/syslog
  • /etc/scanbd/scanbd.conf
 action scan {
                filter = "^scan.*"
                numerical-trigger {
                        from-value = 1
                        to-value   = 0
                }
                desc   = "Scan to file"
                script = "/srv/scan.script"

include(scanner.d/fujitsu.conf)
  • /srv/scan.script
#!/bin/bash

TMP_DIR=$(mktemp -d)
OUT_DIR=/srv/scanfolder_combined
TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S)
cd $TMP_DIR

scanimage --batch="$TMP_DIR/scan_%03d.tiff" --format=tiff \
          --resolution 300 --mode color --source "ADF Duplex" \
          --brightness 25 --contrast 15 \
          --page-width 210 --page-height 297 -x 210 -y 297 \
          --device-name='fujitsu:ScanSnap S1500:74986'
# --mode gray | color

echo "convert to pdf..."
img2pdf --pagesize A4 *.tiff --output ${OUT_DIR}/${TIMESTAMP}.pdf

rm *.tiff

Testing manual scans

  • disable scanbd to test manual scanning
systemctl stop scanbd
  • simple scan
scanimage --batch="/srv/scan_%03d.pnm" --format=pnm --resolution 300 --mode Color --source "ADF Duplex"

OCRmyPDF via pip3

  • install
sudo apt install ghostscript libxml2 tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu pngquant unpaper leptonica-progs libleptonica-dev automake libtool zlib1g-dev libjpeg-dev python3 python3-pip libxml2-dev libxslt1-dev libffi-dev git

git clone https://github.com/agl/jbig2enc
cd jbig2enc
./autogen.sh
./configure && make
sudo make install
cd .. 

git clone https://github.com/qpdf/qpdf 
cd qpdf
./configure && make
sudo make install
sudo ldconfig
cd ..

sudo pip3 install --upgrade pip
sudo pip3 install pybind11
sudo pip3 install ocrmypdf
  • processing pdf files that appear in folder
    • /srv/daemon_ocr.sh:
#!/bin/bash

IN_DIR=/srv/scanfolder_combined
OUT_DIR=/srv/scanfolder_ocred


cd $IN_DIR

while true
do
    FILE_TO_PROCESS=$(ls -1 -t *.pdf | head -1)
    if [[ -z $FILE_TO_PROCESS ]]; then
        echo "no files found"
        sleep 15
        continue
    fi

    echo "-----> processing : "$FILE_TO_PROCESS
    ocrmypdf --output-type 'pdfa' \
             --rotate-pages --deskew --clean-final --optimize 3 \
             --language 'deu+eng' \
             $FILE_TO_PROCESS $OUT_DIR/$FILE_TO_PROCESS

    if [[ -f "$OUT_DIR/$FILE_TO_PROCESS" ]]; then
        echo "ocr file was successfully created, deleting input file"
        rm $FILE_TO_PROCESS
    fi

    sleep 5
done

upload to nextcloud

  • /srv/daemon_upload.sh
#!/bin/bash

IN_DIR=/srv/scanfolder_ocred
OUT_DIR=/srv/scanfolder_uploaded

cd $IN_DIR

while true
do
    FILE_TO_PROCESS=$(ls -1 -t *.pdf | head -1)
    if [[ -z $FILE_TO_PROCESS ]]; then
        echo "no files found"
        sleep 15
        continue
    fi

    echo "-----> processing : "$FILE_TO_PROCESS

    curl -X PUT "https://nextcloud.domain.com/remote.php/webdav/00_Document Archive/00_New Scans/" -T $FILE_TO_PROCESS -u user:pass
    if [[ $? == "0" ]]; then
        echo "upload successful, moving file to out folder"
        mv $FILE_TO_PROCESS $OUT_DIR/$FILE_TO_PROCESS
    fi

    sleep 5
done
  • autostart daemons via /etc/rc.local
sleep 30

./srv/daemon_ocr.sh &
./srv/daemon_upload.sh &


Links

Windows: Deactivate ScanSnap folder

  • regsvr32 /u "C:\Program Files (x86)\PFU\ScanSnap\SSFolder\SSFolder.dll"