Skip to content

Commit b135142

Browse files
committed
using segmenter to split sentences
1 parent e0e93fa commit b135142

File tree

6 files changed

+253
-1075
lines changed

6 files changed

+253
-1075
lines changed

compiler-rs/Cargo.lock

Lines changed: 212 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

compiler-rs/clients_schema_to_openapi/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ serde = {version = "1.0", features=["derive"]}
1111
serde_json = "1.0"
1212
serde_path_to_error = "0.1"
1313
serde_ignored = "0.1"
14+
icu_segmenter = "1.5.0"
1415
openapiv3 = "1.0"
1516
anyhow = "1.0"
1617
indexmap = "1.9"

compiler-rs/clients_schema_to_openapi/src/paths.rs

Lines changed: 24 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ use std::fmt::Write;
2121
use anyhow::{anyhow, bail};
2222
use clients_schema::Property;
2323
use indexmap::indexmap;
24+
use icu_segmenter::SentenceSegmenter;
2425
use openapiv3::{
2526
MediaType, Parameter, ParameterData, ParameterSchemaOrContent, PathItem, PathStyle, Paths, QueryStyle, ReferenceOr,
2627
RequestBody, Response, Responses, StatusCode,
@@ -191,13 +192,13 @@ pub fn add_endpoint(
191192

192193
parameters.append(&mut query_params.clone());
193194

194-
let sum_desc = split_summary_desc(endpoint.description.clone());
195+
let sum_desc = split_summary_desc(&endpoint.description);
195196

196197
// Create the operation, it will be repeated if we have several methods
197198
let operation = openapiv3::Operation {
198199
tags: vec![endpoint.name.clone()],
199-
summary: Some(sum_desc.summary),
200-
description: Some(sum_desc.description),
200+
summary: sum_desc.summary,
201+
description: sum_desc.description,
201202
external_docs: tac.convert_external_docs(endpoint),
202203
operation_id: None, // set in clone_operation below with operation_counter
203204
parameters,
@@ -313,23 +314,31 @@ fn get_path_parameters(template: &str) -> Vec<&str> {
313314
result
314315
}
315316

316-
fn split_summary_desc(desc: String) -> SplitDesc{
317-
let mut parts = desc.split(['.','\n',':']);
318-
let first_line = parts.next().unwrap_or_else(|| "");
319-
320-
let new_desc = desc.replace(first_line,"");
321-
let trim = new_desc.trim();
322-
let remove_period = trim.strip_prefix('.').unwrap_or_else(|| trim);
323-
let remove_column = remove_period.strip_prefix(':').unwrap_or_else(|| remove_period);
317+
fn split_summary_desc(desc: &str) -> SplitDesc{
318+
let segmenter = SentenceSegmenter::new();
319+
320+
let breakpoints: Vec<usize> = segmenter
321+
.segment_str(desc)
322+
.collect();
323+
324+
if breakpoints.len()<2{
325+
return SplitDesc {
326+
summary: None,
327+
description: None
328+
}
329+
}
330+
let first_line = &desc[breakpoints[0]..breakpoints[1]];
331+
let rest = &desc[breakpoints[1]..breakpoints[breakpoints.len()-1]];
332+
324333
SplitDesc {
325-
summary: String::from(first_line.trim()),
326-
description: String::from(remove_column.trim())
334+
summary: Option::from(String::from(first_line.trim().strip_suffix('.').unwrap_or(first_line))),
335+
description: if !rest.is_empty() {Option::from(String::from(rest.trim()))} else {None}
327336
}
328337
}
329338

330339
struct SplitDesc {
331-
summary: String,
332-
description: String
340+
summary: Option<String>,
341+
description: Option<String>
333342
}
334343

335344
#[cfg(test)]
Binary file not shown.

0 commit comments

Comments
 (0)