From 1e58a461e59a372a4e49ad01f776159ba93970d4 Mon Sep 17 00:00:00 2001
From: Stegallo
Date: Thu, 27 Dec 2018 11:31:42 -0800
Subject: [PATCH 1/6] Add Dockerfile and README.md to help development
---
docker/Dockerfile | 26 +++++++++++++++++++++++
docker/README.md | 54 +++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 80 insertions(+)
create mode 100644 docker/Dockerfile
create mode 100644 docker/README.md
diff --git a/docker/Dockerfile b/docker/Dockerfile
new file mode 100644
index 0000000..1b5a6ce
--- /dev/null
+++ b/docker/Dockerfile
@@ -0,0 +1,26 @@
+FROM ubuntu
+
+RUN apt-get update \
+ && apt-get install -y \
+ apt-utils \
+ curl \
+ wget \
+ nano \
+ libsm6 \
+ libxrender1 \
+ libxext6 \
+ ghostscript \
+ python3-minimal \
+ python3-setuptools \
+ python3-pip \
+ && ln -s /usr/bin/python3 /usr/bin/python \
+ && ln -s /usr/bin/pip3 /usr/bin/pip
+
+RUN pip install excalibur-py[dev]
+
+EXPOSE 5000
+
+ENV LC_ALL=C.UTF-8
+ENV LANG=C.UTF-8
+
+WORKDIR /excalibur/
diff --git a/docker/README.md b/docker/README.md
new file mode 100644
index 0000000..043db38
--- /dev/null
+++ b/docker/README.md
@@ -0,0 +1,54 @@
+
+
+
+
+# Excalibur: Docker
+This is the Docker configuration which allows you to run Apache Spark without installing any dependencies on your machine!
+OK, any except `docker`.
+
+## Prerequisites
+
+As stated, the thing you need is `docker`.
+
+Follow the instructions on [Install Docker](https://docs.docker.com/engine/installation/) for your environment if you haven't got `docker` already.
+
+## Usage
+
+### Prepare the image
+
+Switch to `docker` directory here and run `docker build -t excalibur .` (don't forget the final `.`) to build your docker image. That may take some time but is only required once. Or perhaps a few times after you tweak something in a `Dockerfile`.
+
+After the process is finished you have a `excalibur` image, that will be the base for your experiments. You can confirm that looking on results of `docker images` command.
+
+### Run the container
+
+From your project folder, run `docker run -it -p 5000:5000 -v $(pwd):/excalibur/ excalibur /bin/bash`
+This will start the container and open up a bash console inside it.
+
+At this point you need to initialize the metadata database using:
+
+
+$ excalibur initdb
+
+
+Once initialized, you need to enable connectivity from outside the container:
+
+Use nano to open the config file ...
+
+
+$ nano /root/excalibur/excalibur.cfg
+
+
+... and modify the [webserver] section as:
+
+
+web_server_host = 0.0.0.0
+
+
+And then start the webserver using:
+
+
+$ excalibur webserver
+
+
+That's it! Now you can go to http://localhost:5000 and start extracting tabular data from your PDFs.
From cb2fe9ac54cf116dbb9b818d49633768f79d7043 Mon Sep 17 00:00:00 2001
From: Stegallo
Date: Fri, 1 Mar 2019 07:46:46 -0800
Subject: [PATCH 2/6] Update README.md
---
docker/README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docker/README.md b/docker/README.md
index 043db38..7b6f367 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -3,7 +3,7 @@
# Excalibur: Docker
-This is the Docker configuration which allows you to run Apache Spark without installing any dependencies on your machine!
+This is the Docker configuration which allows you to run Excalibur without installing any dependencies on your machine!
OK, any except `docker`.
## Prerequisites
From 5e0187903da10ae248ffc20cd43b3b7b194d05b3 Mon Sep 17 00:00:00 2001
From: Stefano Gallotti
Date: Sat, 17 Apr 2021 19:12:47 -0700
Subject: [PATCH 3/6] change Dokerfile to use p3.9.4-alpine
---
docker/Dockerfile | 25 ++++++++-----------------
docker/requirements.txt | 2 ++
2 files changed, 10 insertions(+), 17 deletions(-)
create mode 100644 docker/requirements.txt
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 1b5a6ce..a047b7e 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,26 +1,17 @@
-FROM ubuntu
+FROM python:3.9.4-alpine3.13
-RUN apt-get update \
- && apt-get install -y \
- apt-utils \
- curl \
- wget \
- nano \
- libsm6 \
- libxrender1 \
- libxext6 \
- ghostscript \
- python3-minimal \
- python3-setuptools \
- python3-pip \
- && ln -s /usr/bin/python3 /usr/bin/python \
- && ln -s /usr/bin/pip3 /usr/bin/pip
+COPY requirements.txt /requirements.txt
+
+RUN /sbin/apk add --no-cache ghostscript-dev libstdc++
+RUN /sbin/apk add --no-cache --repository http://dl-cdn.alpinelinux.org/alpine/v3.9/main qt-x11
+RUN /usr/local/bin/pip install --no-cache-dir --requirement /requirements.txt
-RUN pip install excalibur-py[dev]
EXPOSE 5000
ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8
+ENV EXCALIBUR_HOME="/excalibur" PYTHONUNBUFFERED="1"
WORKDIR /excalibur/
+CMD ["excalibur", "webserver"]
diff --git a/docker/requirements.txt b/docker/requirements.txt
new file mode 100644
index 0000000..7d03101
--- /dev/null
+++ b/docker/requirements.txt
@@ -0,0 +1,2 @@
+--extra-index-url https://alpine-wheels.github.io/index
+excalibur-py==0.4.3
From 3c97ec2dd9c3dd30b56278ddbe8ee7afa3f51b6f Mon Sep 17 00:00:00 2001
From: Stefano Gallotti
Date: Sat, 17 Apr 2021 19:39:53 -0700
Subject: [PATCH 4/6] and readme to use /bin/sh
---
docker/README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/docker/README.md b/docker/README.md
index 7b6f367..d45207e 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -22,7 +22,7 @@ After the process is finished you have a `excalibur` image, that will be the bas
### Run the container
-From your project folder, run `docker run -it -p 5000:5000 -v $(pwd):/excalibur/ excalibur /bin/bash`
+From your project folder, run `docker run -it -p 5000:5000 -v $(pwd):/excalibur/ excalibur /bin/sh`
This will start the container and open up a bash console inside it.
At this point you need to initialize the metadata database using:
From 3ee6e828a5db0f15982a1aa1906cf811a4693392 Mon Sep 17 00:00:00 2001
From: Stefano Gallotti
Date: Sun, 18 Apr 2021 09:47:02 -0700
Subject: [PATCH 5/6] add compose
---
docker/README.md | 37 ++++++-------------------
docker/docker-compose.yml | 17 ++++++++++++
docker/{ => excalibur}/Dockerfile | 0
docker/{ => excalibur}/requirements.txt | 0
docker/nginx/Dockerfile | 8 ++++++
docker/nginx/nginx.conf | 9 ++++++
6 files changed, 43 insertions(+), 28 deletions(-)
create mode 100644 docker/docker-compose.yml
rename docker/{ => excalibur}/Dockerfile (100%)
rename docker/{ => excalibur}/requirements.txt (100%)
create mode 100644 docker/nginx/Dockerfile
create mode 100644 docker/nginx/nginx.conf
diff --git a/docker/README.md b/docker/README.md
index d45207e..e8fa741 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -14,41 +14,22 @@ Follow the instructions on [Install Docker](https://docs.docker.com/engine/insta
## Usage
-### Prepare the image
-
-Switch to `docker` directory here and run `docker build -t excalibur .` (don't forget the final `.`) to build your docker image. That may take some time but is only required once. Or perhaps a few times after you tweak something in a `Dockerfile`.
-
-After the process is finished you have a `excalibur` image, that will be the base for your experiments. You can confirm that looking on results of `docker images` command.
-
-### Run the container
+### With compose
-From your project folder, run `docker run -it -p 5000:5000 -v $(pwd):/excalibur/ excalibur /bin/sh`
-This will start the container and open up a bash console inside it.
+Switch to `docker` directory and run `docker-compose up --build`
-At this point you need to initialize the metadata database using:
+Open your browser to http://localhost and start extracting tabular data from your PDFs.
-
-$ excalibur initdb
-
+### Running the container youself
-Once initialized, you need to enable connectivity from outside the container:
-
-Use nano to open the config file ...
-
-
-$ nano /root/excalibur/excalibur.cfg
-
+### Prepare the image
-... and modify the [webserver] section as:
+Switch to `docker` directory and run `docker build -t excalibur ./excalibur` to build your docker image. That may take some time but is only required once. Or perhaps a few times after you tweak something in a `Dockerfile`.
-
-web_server_host = 0.0.0.0
-
+After the process is finished you have a `excalibur` image, that will be the base for your experiments. You can confirm that looking on results of `docker images` command.
-And then start the webserver using:
+### Run the container
-
-$ excalibur webserver
-
+From your project folder, run `docker run -it -p 5000:5000 excalibur`
That's it! Now you can go to http://localhost:5000 and start extracting tabular data from your PDFs.
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
new file mode 100644
index 0000000..e8d4ec9
--- /dev/null
+++ b/docker/docker-compose.yml
@@ -0,0 +1,17 @@
+version: "3.7"
+
+services:
+
+ excalibur:
+ build: ./excalibur
+ container_name: excalibur
+ restart: always
+ expose:
+ - 5000
+
+ nginx:
+ build: ./nginx
+ container_name: nginx
+ restart: always
+ ports:
+ - "80:80"
diff --git a/docker/Dockerfile b/docker/excalibur/Dockerfile
similarity index 100%
rename from docker/Dockerfile
rename to docker/excalibur/Dockerfile
diff --git a/docker/requirements.txt b/docker/excalibur/requirements.txt
similarity index 100%
rename from docker/requirements.txt
rename to docker/excalibur/requirements.txt
diff --git a/docker/nginx/Dockerfile b/docker/nginx/Dockerfile
new file mode 100644
index 0000000..e60bd5b
--- /dev/null
+++ b/docker/nginx/Dockerfile
@@ -0,0 +1,8 @@
+# Use the Nginx image
+FROM nginx
+
+# Remove the default nginx.conf
+RUN rm /etc/nginx/conf.d/default.conf
+
+# Replace with our own nginx.conf
+COPY nginx.conf /etc/nginx/conf.d/
diff --git a/docker/nginx/nginx.conf b/docker/nginx/nginx.conf
new file mode 100644
index 0000000..c52369d
--- /dev/null
+++ b/docker/nginx/nginx.conf
@@ -0,0 +1,9 @@
+server {
+
+ listen 80;
+
+ location / {
+ proxy_pass http://excalibur:5000;
+ }
+
+}
From d1f0854ef2e459e5e2f3293e91141b330b577fea Mon Sep 17 00:00:00 2001
From: Stefano Gallotti
Date: Sun, 18 Apr 2021 09:49:19 -0700
Subject: [PATCH 6/6] automatic allow connectivity
---
docker/excalibur/Dockerfile | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/docker/excalibur/Dockerfile b/docker/excalibur/Dockerfile
index a047b7e..418cf70 100644
--- a/docker/excalibur/Dockerfile
+++ b/docker/excalibur/Dockerfile
@@ -13,5 +13,9 @@ ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8
ENV EXCALIBUR_HOME="/excalibur" PYTHONUNBUFFERED="1"
+
+RUN excalibur initdb
+RUN cat /excalibur/excalibur.cfg | sed 's/127.0.0.1/0.0.0.0/g' > /excalibur/excalibur.tmp; \
+ mv /excalibur/excalibur.tmp /excalibur/excalibur.cfg
WORKDIR /excalibur/
CMD ["excalibur", "webserver"]