Fujitsu ScanSnap: Difference between revisions
(20 intermediate revisions by the same user not shown) | |||
Line 3: | Line 3: | ||
apt update | apt update | ||
apt full-upgrade | apt full-upgrade | ||
apt install aptitude sane-utils | apt install aptitude sane-utils img2pdf | ||
</pre> | |||
* set /tmp to ram in /etc/fstab: | |||
<pre> | |||
tmpfs /tmp tmpfs defaults,noatime,nosuid 0 0 | |||
</pre> | |||
* create folders | |||
<pre> | |||
mkdir /srv/scanfolder_combined | |||
mkdir /srv/scanfolder_ocred | |||
mkdir /srv/scanfolder_uploaded | |||
</pre> | </pre> | ||
Line 41: | Line 53: | ||
} | } | ||
desc = "Scan to file" | desc = "Scan to file" | ||
script = "/srv | script = "/srv/scan.script" | ||
include(scanner.d/fujitsu.conf) | include(scanner.d/fujitsu.conf) | ||
</pre> | </pre> | ||
* disable to test manual scanning | * /srv/scan.script | ||
<pre> | |||
#!/bin/bash | |||
TMP_DIR=$(mktemp -d) | |||
OUT_DIR=/srv/scanfolder_combined | |||
TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S) | |||
cd $TMP_DIR | |||
scanimage --batch="$TMP_DIR/scan_%03d.tiff" --format=tiff \ | |||
--resolution 300 --mode color --source "ADF Duplex" \ | |||
--brightness 25 --contrast 15 \ | |||
--page-width 210 --page-height 297 -x 210 -y 297 \ | |||
--device-name='fujitsu:ScanSnap S1500:74986' | |||
# --mode gray | color | |||
echo "convert to pdf..." | |||
img2pdf --pagesize A4 *.tiff --output ${OUT_DIR}/${TIMESTAMP}.pdf | |||
rm *.tiff | |||
</pre> | |||
== Testing manual scans == | |||
* disable scanbd to test manual scanning | |||
<pre> | <pre> | ||
systemctl stop scanbd | systemctl stop scanbd | ||
</pre> | |||
* simple scan | |||
<pre> | |||
scanimage --batch="/srv/scan_%03d.pnm" --format=pnm --resolution 300 --mode Color --source "ADF Duplex" | |||
</pre> | </pre> | ||
== OCRmyPDF via pip3 == | == OCRmyPDF via pip3 == | ||
* install | |||
<pre> | <pre> | ||
sudo apt install ghostscript libxml2 tesseract-ocr tesseract-ocr-deu pngquant unpaper leptonica-progs libleptonica-dev automake libtool zlib1g-dev libjpeg-dev python3 python3-pip libxml2-dev libxslt1-dev libffi-dev git | sudo apt install ghostscript libxml2 tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu pngquant unpaper leptonica-progs libleptonica-dev automake libtool zlib1g-dev libjpeg-dev python3 python3-pip libxml2-dev libxslt1-dev libffi-dev git | ||
git clone https://github.com/agl/jbig2enc | git clone https://github.com/agl/jbig2enc | ||
Line 74: | Line 114: | ||
</pre> | </pre> | ||
== | * processing pdf files that appear in folder | ||
* https:// | ** /srv/daemon_ocr.sh: | ||
<pre> | |||
#!/bin/bash | |||
IN_DIR=/srv/scanfolder_combined | |||
OUT_DIR=/srv/scanfolder_ocred | |||
cd $IN_DIR | |||
while true | |||
do | |||
FILE_TO_PROCESS=$(ls -1 -t *.pdf | head -1) | |||
if [[ -z $FILE_TO_PROCESS ]]; then | |||
echo "no files found" | |||
sleep 15 | |||
continue | |||
fi | |||
echo "-----> processing : "$FILE_TO_PROCESS | |||
ocrmypdf --output-type 'pdfa' \ | |||
--rotate-pages --deskew --clean-final --optimize 3 \ | |||
--language 'deu+eng' \ | |||
$FILE_TO_PROCESS $OUT_DIR/$FILE_TO_PROCESS | |||
if [[ -f "$OUT_DIR/$FILE_TO_PROCESS" ]]; then | |||
echo "ocr file was successfully created, deleting input file" | |||
rm $FILE_TO_PROCESS | |||
fi | |||
sleep 5 | |||
done | |||
</pre> | |||
== upload to nextcloud == | |||
* /srv/daemon_upload.sh | |||
<pre> | |||
#!/bin/bash | |||
IN_DIR=/srv/scanfolder_ocred | |||
OUT_DIR=/srv/scanfolder_uploaded | |||
cd $IN_DIR | |||
while true | |||
do | |||
FILE_TO_PROCESS=$(ls -1 -t *.pdf | head -1) | |||
if [[ -z $FILE_TO_PROCESS ]]; then | |||
echo "no files found" | |||
sleep 15 | |||
continue | |||
fi | |||
echo "-----> processing : "$FILE_TO_PROCESS | |||
curl -X PUT "https://nextcloud.domain.com/remote.php/webdav/00_Document Archive/00_New Scans/" -T $FILE_TO_PROCESS -u user:pass | |||
if [[ $? == "0" ]]; then | |||
echo "upload successful, moving file to out folder" | |||
mv $FILE_TO_PROCESS $OUT_DIR/$FILE_TO_PROCESS | |||
fi | |||
sleep 5 | |||
done | |||
</pre> | |||
* autostart daemons via /etc/rc.local | |||
<pre> | |||
sleep 30 | |||
./srv/daemon_ocr.sh & | |||
./srv/daemon_upload.sh & | |||
</pre> | |||
== Links == | |||
* https://ocrmypdf.readthedocs.io/en/latest/cookbook.html | |||
* https://askubuntu.com/questions/246647/convert-a-directory-of-jpeg-files-to-a-single-pdf-document | |||
* https://superuser.com/questions/104656/convert-a-pdf-to-greyscale-on-the-command-line-in-floss | |||
* https://unix.stackexchange.com/questions/93959/how-to-convert-a-color-pdf-to-black-white | |||
* https://superuser.com/questions/508472/how-to-recognize-black-and-white-images | |||
== Windows: Deactivate ScanSnap folder == | == Windows: Deactivate ScanSnap folder == |
Latest revision as of 10:45, 4 July 2020
Prepare Raspberry OS
apt update apt full-upgrade apt install aptitude sane-utils img2pdf
- set /tmp to ram in /etc/fstab:
tmpfs /tmp tmpfs defaults,noatime,nosuid 0 0
- create folders
mkdir /srv/scanfolder_combined mkdir /srv/scanfolder_ocred mkdir /srv/scanfolder_uploaded
Test Scanner Connection
lsusb sane-find-scanner scanimage -L
Scan Button Daemon
- Install via apt:
apt install scanbd
- change /etc/scanbd/dll.conf to fix kernel message:
#canon_pp fujitsu plustek_pp
- Test:
systemctl -t service tail -F /var/log/syslog
- /etc/scanbd/scanbd.conf
action scan { filter = "^scan.*" numerical-trigger { from-value = 1 to-value = 0 } desc = "Scan to file" script = "/srv/scan.script" include(scanner.d/fujitsu.conf)
- /srv/scan.script
#!/bin/bash TMP_DIR=$(mktemp -d) OUT_DIR=/srv/scanfolder_combined TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S) cd $TMP_DIR scanimage --batch="$TMP_DIR/scan_%03d.tiff" --format=tiff \ --resolution 300 --mode color --source "ADF Duplex" \ --brightness 25 --contrast 15 \ --page-width 210 --page-height 297 -x 210 -y 297 \ --device-name='fujitsu:ScanSnap S1500:74986' # --mode gray | color echo "convert to pdf..." img2pdf --pagesize A4 *.tiff --output ${OUT_DIR}/${TIMESTAMP}.pdf rm *.tiff
Testing manual scans
- disable scanbd to test manual scanning
systemctl stop scanbd
- simple scan
scanimage --batch="/srv/scan_%03d.pnm" --format=pnm --resolution 300 --mode Color --source "ADF Duplex"
OCRmyPDF via pip3
- install
sudo apt install ghostscript libxml2 tesseract-ocr tesseract-ocr-eng tesseract-ocr-deu pngquant unpaper leptonica-progs libleptonica-dev automake libtool zlib1g-dev libjpeg-dev python3 python3-pip libxml2-dev libxslt1-dev libffi-dev git git clone https://github.com/agl/jbig2enc cd jbig2enc ./autogen.sh ./configure && make sudo make install cd .. git clone https://github.com/qpdf/qpdf cd qpdf ./configure && make sudo make install sudo ldconfig cd .. sudo pip3 install --upgrade pip sudo pip3 install pybind11 sudo pip3 install ocrmypdf
- processing pdf files that appear in folder
- /srv/daemon_ocr.sh:
#!/bin/bash IN_DIR=/srv/scanfolder_combined OUT_DIR=/srv/scanfolder_ocred cd $IN_DIR while true do FILE_TO_PROCESS=$(ls -1 -t *.pdf | head -1) if [[ -z $FILE_TO_PROCESS ]]; then echo "no files found" sleep 15 continue fi echo "-----> processing : "$FILE_TO_PROCESS ocrmypdf --output-type 'pdfa' \ --rotate-pages --deskew --clean-final --optimize 3 \ --language 'deu+eng' \ $FILE_TO_PROCESS $OUT_DIR/$FILE_TO_PROCESS if [[ -f "$OUT_DIR/$FILE_TO_PROCESS" ]]; then echo "ocr file was successfully created, deleting input file" rm $FILE_TO_PROCESS fi sleep 5 done
upload to nextcloud
- /srv/daemon_upload.sh
#!/bin/bash IN_DIR=/srv/scanfolder_ocred OUT_DIR=/srv/scanfolder_uploaded cd $IN_DIR while true do FILE_TO_PROCESS=$(ls -1 -t *.pdf | head -1) if [[ -z $FILE_TO_PROCESS ]]; then echo "no files found" sleep 15 continue fi echo "-----> processing : "$FILE_TO_PROCESS curl -X PUT "https://nextcloud.domain.com/remote.php/webdav/00_Document Archive/00_New Scans/" -T $FILE_TO_PROCESS -u user:pass if [[ $? == "0" ]]; then echo "upload successful, moving file to out folder" mv $FILE_TO_PROCESS $OUT_DIR/$FILE_TO_PROCESS fi sleep 5 done
- autostart daemons via /etc/rc.local
sleep 30 ./srv/daemon_ocr.sh & ./srv/daemon_upload.sh &
Links
- https://ocrmypdf.readthedocs.io/en/latest/cookbook.html
- https://askubuntu.com/questions/246647/convert-a-directory-of-jpeg-files-to-a-single-pdf-document
- https://superuser.com/questions/104656/convert-a-pdf-to-greyscale-on-the-command-line-in-floss
- https://unix.stackexchange.com/questions/93959/how-to-convert-a-color-pdf-to-black-white
- https://superuser.com/questions/508472/how-to-recognize-black-and-white-images
Windows: Deactivate ScanSnap folder
- regsvr32 /u "C:\Program Files (x86)\PFU\ScanSnap\SSFolder\SSFolder.dll"