Skip to content

Commit c093da1

Browse files
committed
Switch to PercentEscaper (metafacture-fix#273)
This enables both: whitespace converted to a plus sign "+" (which is the default to be backwards compatible) and whitespace escaped as percent "%20". - add guava dependency (uses the same version as metafacture-fix) - introduce test - add to schemata
1 parent 2cec789 commit c093da1

File tree

5 files changed

+119
-11
lines changed

5 files changed

+119
-11
lines changed

build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ subprojects {
2828
versions = [
2929
'assertj_core': '3.11.1',
3030
'commons_compress': '1.21',
31+
'guava': '29.0-jre',
3132
'jackson_databind': '2.15.1',
3233
'junit': '4.12',
3334
'mockito': '2.5.7',

metamorph/build.gradle

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ dependencies {
2525
implementation project(':metafacture-io')
2626
implementation project(':metafacture-mangling')
2727
implementation project(':metafacture-javaintegration')
28+
implementation "com.google.guava:guava:${versions.guava}"
29+
implementation "org.slf4j:slf4j-api:${versions.slf4j}"
2830
implementation "org.slf4j:slf4j-api:${versions.slf4j}"
2931
testRuntimeOnly "org.slf4j:slf4j-simple:${versions.slf4j}"
3032
testImplementation "junit:junit:${versions.junit}"
Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright 2013, 2014 Deutsche Nationalbibliothek
2+
* Copyright 2013, 2023 Deutsche Nationalbibliothek et al
33
*
44
* Licensed under the Apache License, Version 2.0 the "License";
55
* you may not use this file except in compliance with the License.
@@ -16,19 +16,24 @@
1616

1717
package org.metafacture.metamorph.functions;
1818

19-
import org.metafacture.metamorph.api.MorphExecutionException;
2019
import org.metafacture.metamorph.api.helpers.AbstractSimpleStatelessFunction;
2120

22-
import java.io.UnsupportedEncodingException;
23-
import java.net.URLEncoder;
21+
import com.google.common.net.PercentEscaper;
2422

2523
/**
2624
* URL encodes the received value.
25+
* Default is to convert a whitespace " "to a plus sign "+". This can be set so that a whitespace " " is escaped to
26+
* "%20".
27+
* Safe characters for this escaper are the ranges 0..9, a..z and A..Z. These are always safe and should not be
28+
* specified.
2729
*
2830
* @author Markus Michael Geipel
29-
*
31+
* @author Pascal Christoph (dr0i)
3032
*/
3133
public final class URLEncode extends AbstractSimpleStatelessFunction {
34+
private String safeChars = "";
35+
private Boolean plusForSpace = true;
36+
private PercentEscaper percentEscaper = new PercentEscaper(safeChars, plusForSpace);
3237

3338
/**
3439
* Creates an instance of {@link URLEncode}.
@@ -38,12 +43,29 @@ public URLEncode() {
3843

3944
@Override
4045
public String process(final String value) {
41-
try {
42-
return URLEncoder.encode(value, "UTF-8");
43-
}
44-
catch (final UnsupportedEncodingException e) {
45-
throw new MorphExecutionException("urlencode: unsupported encoding UTF-8", e);
46-
}
46+
return percentEscaper.escape(value);
47+
}
48+
49+
/**
50+
* Sets a URI escaper with the specified safe characters. The ranges 0..9, a..z and A..Z are always safe
51+
* and should not be specified.
52+
*
53+
* @param safeChars the chars which will not be escaped
54+
*/
55+
public void setSafeChars(final String safeChars) {
56+
this.safeChars = safeChars;
57+
percentEscaper = new PercentEscaper(safeChars, plusForSpace);
4758
}
4859

60+
/**
61+
* Sets if a space should be converted into a plus sign "+" or percent escaped as "%20".
62+
* <p>
63+
* Default is "true", i.e. to escape the space character as "+".
64+
*
65+
* @param plusForSpace true if space character " " should be converted into a plus sign "+"
66+
*/
67+
public void setPlusForSpace(final Boolean plusForSpace) {
68+
this.plusForSpace = plusForSpace;
69+
percentEscaper = new PercentEscaper(safeChars, plusForSpace);
70+
}
4971
}

metamorph/src/main/resources/schemata/metamorph.xsd

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -977,6 +977,21 @@
977977
</documentation>
978978
</annotation>
979979
<complexType>
980+
<attribute name="safechars" type="string" use="optional">
981+
<annotation>
982+
<documentation>Chars which will not be escaped. The ranges
983+
0..9, a..z and A..Z are always safe and should not be
984+
specified.
985+
</documentation>
986+
</annotation>
987+
</attribute>
988+
<attribute name="plusforspace" type="boolean" use="optional" default="true">
989+
<annotation>
990+
<documentation>Sets if a space should be converted into a
991+
plus sign "+" or percent escaped as "%20".
992+
</documentation>
993+
</annotation>
994+
</attribute>
980995
</complexType>
981996
</element>
982997

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* Copyright 2023 hbz
3+
*
4+
* Licensed under the Apache License, Version 2.0 the "License";
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
package org.metafacture.metamorph.functions;
18+
19+
import static org.junit.Assert.*;
20+
import org.junit.Test;
21+
22+
/**
23+
* tests {@link ISBN}
24+
*
25+
* @author Pascal Christoph (dr0i)
26+
*/
27+
28+
public final class URLEncodeTest {
29+
30+
private static final String CAFE_UTF8 = "café";
31+
private static final String CAFE_ENCODED = "caf%C3%A9";
32+
private static final String SOME_CHARS = "/&%\\+";
33+
private static final String SOME_CHARS_ENCODED = "%2F%26%25%5C%2B";
34+
private static final String WHITESPACE = " ";
35+
private static final String WHITESPACE_AS_PLUS_ENCODED = "+";
36+
private static final String WHITESPACE_PERCENT_ENCODED = "%20";
37+
38+
@Test
39+
public void testUtf8(){
40+
final URLEncode urlEncode = new URLEncode();
41+
assertEquals(CAFE_ENCODED, urlEncode.process(CAFE_UTF8));
42+
}
43+
@Test
44+
public void testSomeChars(){
45+
final URLEncode urlEncode = new URLEncode();
46+
assertEquals(SOME_CHARS_ENCODED, urlEncode.process(SOME_CHARS));
47+
}
48+
@Test
49+
public void testEscapeSpaceAsPlus(){
50+
final URLEncode urlEncode = new URLEncode();
51+
assertEquals(WHITESPACE_AS_PLUS_ENCODED, urlEncode.process(WHITESPACE));
52+
}
53+
54+
@Test
55+
public void testEscapeSpaceAsPercentEncoded(){
56+
final URLEncode urlEncode = new URLEncode();
57+
urlEncode.setPlusForSpace(false);
58+
assertEquals(WHITESPACE_PERCENT_ENCODED, urlEncode.process(WHITESPACE));
59+
}
60+
61+
@Test
62+
public void testSafeChars(){
63+
final URLEncode urlEncode = new URLEncode();
64+
urlEncode.setSafeChars(SOME_CHARS);
65+
assertEquals(SOME_CHARS, urlEncode.process(SOME_CHARS));
66+
}
67+
68+
}

0 commit comments

Comments
 (0)