From 1e58a461e59a372a4e49ad01f776159ba93970d4 Mon Sep 17 00:00:00 2001 From: Stegallo Date: Thu, 27 Dec 2018 11:31:42 -0800 Subject: [PATCH 1/6] Add Dockerfile and README.md to help development --- docker/Dockerfile | 26 +++++++++++++++++++++++ docker/README.md | 54 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+) create mode 100644 docker/Dockerfile create mode 100644 docker/README.md diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..1b5a6ce --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,26 @@ +FROM ubuntu + +RUN apt-get update \ + && apt-get install -y \ + apt-utils \ + curl \ + wget \ + nano \ + libsm6 \ + libxrender1 \ + libxext6 \ + ghostscript \ + python3-minimal \ + python3-setuptools \ + python3-pip \ + && ln -s /usr/bin/python3 /usr/bin/python \ + && ln -s /usr/bin/pip3 /usr/bin/pip + +RUN pip install excalibur-py[dev] + +EXPOSE 5000 + +ENV LC_ALL=C.UTF-8 +ENV LANG=C.UTF-8 + +WORKDIR /excalibur/ diff --git a/docker/README.md b/docker/README.md new file mode 100644 index 0000000..043db38 --- /dev/null +++ b/docker/README.md @@ -0,0 +1,54 @@ +

+ +

+ +# Excalibur: Docker +This is the Docker configuration which allows you to run Apache Spark without installing any dependencies on your machine!
+OK, any except `docker`. + +## Prerequisites + +As stated, the thing you need is `docker`. + +Follow the instructions on [Install Docker](https://docs.docker.com/engine/installation/) for your environment if you haven't got `docker` already. + +## Usage + +### Prepare the image + +Switch to `docker` directory here and run `docker build -t excalibur .` (don't forget the final `.`) to build your docker image. That may take some time but is only required once. Or perhaps a few times after you tweak something in a `Dockerfile`. + +After the process is finished you have a `excalibur` image, that will be the base for your experiments. You can confirm that looking on results of `docker images` command. + +### Run the container + +From your project folder, run `docker run -it -p 5000:5000 -v $(pwd):/excalibur/ excalibur /bin/bash` +This will start the container and open up a bash console inside it. + +At this point you need to initialize the metadata database using: + +
+$ excalibur initdb
+
+ +Once initialized, you need to enable connectivity from outside the container: + +Use nano to open the config file ... + +
+$ nano /root/excalibur/excalibur.cfg
+
+ +... and modify the [webserver] section as: + +
+web_server_host = 0.0.0.0
+
+ +And then start the webserver using: + +
+$ excalibur webserver
+
+ +That's it! Now you can go to http://localhost:5000 and start extracting tabular data from your PDFs. From cb2fe9ac54cf116dbb9b818d49633768f79d7043 Mon Sep 17 00:00:00 2001 From: Stegallo Date: Fri, 1 Mar 2019 07:46:46 -0800 Subject: [PATCH 2/6] Update README.md --- docker/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/README.md b/docker/README.md index 043db38..7b6f367 100644 --- a/docker/README.md +++ b/docker/README.md @@ -3,7 +3,7 @@

# Excalibur: Docker -This is the Docker configuration which allows you to run Apache Spark without installing any dependencies on your machine!
+This is the Docker configuration which allows you to run Excalibur without installing any dependencies on your machine!
OK, any except `docker`. ## Prerequisites From 5e0187903da10ae248ffc20cd43b3b7b194d05b3 Mon Sep 17 00:00:00 2001 From: Stefano Gallotti Date: Sat, 17 Apr 2021 19:12:47 -0700 Subject: [PATCH 3/6] change Dokerfile to use p3.9.4-alpine --- docker/Dockerfile | 25 ++++++++----------------- docker/requirements.txt | 2 ++ 2 files changed, 10 insertions(+), 17 deletions(-) create mode 100644 docker/requirements.txt diff --git a/docker/Dockerfile b/docker/Dockerfile index 1b5a6ce..a047b7e 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -1,26 +1,17 @@ -FROM ubuntu +FROM python:3.9.4-alpine3.13 -RUN apt-get update \ - && apt-get install -y \ - apt-utils \ - curl \ - wget \ - nano \ - libsm6 \ - libxrender1 \ - libxext6 \ - ghostscript \ - python3-minimal \ - python3-setuptools \ - python3-pip \ - && ln -s /usr/bin/python3 /usr/bin/python \ - && ln -s /usr/bin/pip3 /usr/bin/pip +COPY requirements.txt /requirements.txt + +RUN /sbin/apk add --no-cache ghostscript-dev libstdc++ +RUN /sbin/apk add --no-cache --repository http://dl-cdn.alpinelinux.org/alpine/v3.9/main qt-x11 +RUN /usr/local/bin/pip install --no-cache-dir --requirement /requirements.txt -RUN pip install excalibur-py[dev] EXPOSE 5000 ENV LC_ALL=C.UTF-8 ENV LANG=C.UTF-8 +ENV EXCALIBUR_HOME="/excalibur" PYTHONUNBUFFERED="1" WORKDIR /excalibur/ +CMD ["excalibur", "webserver"] diff --git a/docker/requirements.txt b/docker/requirements.txt new file mode 100644 index 0000000..7d03101 --- /dev/null +++ b/docker/requirements.txt @@ -0,0 +1,2 @@ +--extra-index-url https://alpine-wheels.github.io/index +excalibur-py==0.4.3 From 3c97ec2dd9c3dd30b56278ddbe8ee7afa3f51b6f Mon Sep 17 00:00:00 2001 From: Stefano Gallotti Date: Sat, 17 Apr 2021 19:39:53 -0700 Subject: [PATCH 4/6] and readme to use /bin/sh --- docker/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docker/README.md b/docker/README.md index 7b6f367..d45207e 100644 --- a/docker/README.md +++ b/docker/README.md @@ -22,7 +22,7 @@ After the process is finished you have a `excalibur` image, that will be the bas ### Run the container -From your project folder, run `docker run -it -p 5000:5000 -v $(pwd):/excalibur/ excalibur /bin/bash` +From your project folder, run `docker run -it -p 5000:5000 -v $(pwd):/excalibur/ excalibur /bin/sh` This will start the container and open up a bash console inside it. At this point you need to initialize the metadata database using: From 3ee6e828a5db0f15982a1aa1906cf811a4693392 Mon Sep 17 00:00:00 2001 From: Stefano Gallotti Date: Sun, 18 Apr 2021 09:47:02 -0700 Subject: [PATCH 5/6] add compose --- docker/README.md | 37 ++++++------------------- docker/docker-compose.yml | 17 ++++++++++++ docker/{ => excalibur}/Dockerfile | 0 docker/{ => excalibur}/requirements.txt | 0 docker/nginx/Dockerfile | 8 ++++++ docker/nginx/nginx.conf | 9 ++++++ 6 files changed, 43 insertions(+), 28 deletions(-) create mode 100644 docker/docker-compose.yml rename docker/{ => excalibur}/Dockerfile (100%) rename docker/{ => excalibur}/requirements.txt (100%) create mode 100644 docker/nginx/Dockerfile create mode 100644 docker/nginx/nginx.conf diff --git a/docker/README.md b/docker/README.md index d45207e..e8fa741 100644 --- a/docker/README.md +++ b/docker/README.md @@ -14,41 +14,22 @@ Follow the instructions on [Install Docker](https://docs.docker.com/engine/insta ## Usage -### Prepare the image - -Switch to `docker` directory here and run `docker build -t excalibur .` (don't forget the final `.`) to build your docker image. That may take some time but is only required once. Or perhaps a few times after you tweak something in a `Dockerfile`. - -After the process is finished you have a `excalibur` image, that will be the base for your experiments. You can confirm that looking on results of `docker images` command. - -### Run the container +### With compose -From your project folder, run `docker run -it -p 5000:5000 -v $(pwd):/excalibur/ excalibur /bin/sh` -This will start the container and open up a bash console inside it. +Switch to `docker` directory and run `docker-compose up --build` -At this point you need to initialize the metadata database using: +Open your browser to http://localhost and start extracting tabular data from your PDFs. -
-$ excalibur initdb
-
+### Running the container youself -Once initialized, you need to enable connectivity from outside the container: - -Use nano to open the config file ... - -
-$ nano /root/excalibur/excalibur.cfg
-
+### Prepare the image -... and modify the [webserver] section as: +Switch to `docker` directory and run `docker build -t excalibur ./excalibur` to build your docker image. That may take some time but is only required once. Or perhaps a few times after you tweak something in a `Dockerfile`. -
-web_server_host = 0.0.0.0
-
+After the process is finished you have a `excalibur` image, that will be the base for your experiments. You can confirm that looking on results of `docker images` command. -And then start the webserver using: +### Run the container -
-$ excalibur webserver
-
+From your project folder, run `docker run -it -p 5000:5000 excalibur` That's it! Now you can go to http://localhost:5000 and start extracting tabular data from your PDFs. diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml new file mode 100644 index 0000000..e8d4ec9 --- /dev/null +++ b/docker/docker-compose.yml @@ -0,0 +1,17 @@ +version: "3.7" + +services: + + excalibur: + build: ./excalibur + container_name: excalibur + restart: always + expose: + - 5000 + + nginx: + build: ./nginx + container_name: nginx + restart: always + ports: + - "80:80" diff --git a/docker/Dockerfile b/docker/excalibur/Dockerfile similarity index 100% rename from docker/Dockerfile rename to docker/excalibur/Dockerfile diff --git a/docker/requirements.txt b/docker/excalibur/requirements.txt similarity index 100% rename from docker/requirements.txt rename to docker/excalibur/requirements.txt diff --git a/docker/nginx/Dockerfile b/docker/nginx/Dockerfile new file mode 100644 index 0000000..e60bd5b --- /dev/null +++ b/docker/nginx/Dockerfile @@ -0,0 +1,8 @@ +# Use the Nginx image +FROM nginx + +# Remove the default nginx.conf +RUN rm /etc/nginx/conf.d/default.conf + +# Replace with our own nginx.conf +COPY nginx.conf /etc/nginx/conf.d/ diff --git a/docker/nginx/nginx.conf b/docker/nginx/nginx.conf new file mode 100644 index 0000000..c52369d --- /dev/null +++ b/docker/nginx/nginx.conf @@ -0,0 +1,9 @@ +server { + + listen 80; + + location / { + proxy_pass http://excalibur:5000; + } + +} From d1f0854ef2e459e5e2f3293e91141b330b577fea Mon Sep 17 00:00:00 2001 From: Stefano Gallotti Date: Sun, 18 Apr 2021 09:49:19 -0700 Subject: [PATCH 6/6] automatic allow connectivity --- docker/excalibur/Dockerfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docker/excalibur/Dockerfile b/docker/excalibur/Dockerfile index a047b7e..418cf70 100644 --- a/docker/excalibur/Dockerfile +++ b/docker/excalibur/Dockerfile @@ -13,5 +13,9 @@ ENV LC_ALL=C.UTF-8 ENV LANG=C.UTF-8 ENV EXCALIBUR_HOME="/excalibur" PYTHONUNBUFFERED="1" + +RUN excalibur initdb +RUN cat /excalibur/excalibur.cfg | sed 's/127.0.0.1/0.0.0.0/g' > /excalibur/excalibur.tmp; \ + mv /excalibur/excalibur.tmp /excalibur/excalibur.cfg WORKDIR /excalibur/ CMD ["excalibur", "webserver"]