diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 1a6bd1c3..5c930c7f 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -1,12 +1,9 @@ name: CI on: - pull_request: - branches: - - master push: - branches: - - master - tags: '*' + branches: [main, master] + tags: ["*"] + pull_request: jobs: test: name: Julia ${{ matrix.version }} - ${{ matrix.os }} - ${{ matrix.arch }} - ${{ github.event_name }} @@ -15,32 +12,39 @@ jobs: fail-fast: false matrix: version: - - '1.0' - - '1' - - 'nightly' + - '1' # automatically expands to the latest stable 1.x release of Julia + - 'min' + - 'pre' os: - ubuntu-latest + - windows-latest arch: - x64 include: - - version: '1' - os: ubuntu-latest - arch: x86 - - version: '1' - os: windows-latest - arch: x64 - - version: '1' - os: macos-latest - arch: x64 + - os: macOS-latest + arch: aarch64 + version: 1 steps: - uses: actions/checkout@v4 - uses: julia-actions/setup-julia@v2 with: version: ${{ matrix.version }} arch: ${{ matrix.arch }} + - uses: julia-actions/cache@v2 - uses: julia-actions/julia-buildpkg@v1 - uses: julia-actions/julia-runtest@v1 - uses: julia-actions/julia-processcoverage@v1 - - uses: codecov/codecov-action@v3 + - uses: codecov/codecov-action@v5 with: - file: lcov.info + files: lcov.info + token: ${{ secrets.CODECOV_TOKEN }} + docs: + name: Documentation + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/julia-buildpkg@latest + - uses: julia-actions/julia-docdeploy@latest + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + DOCUMENTER_KEY: ${{ secrets.DOCUMENTER_KEY }} \ No newline at end of file diff --git a/.github/workflows/CompatHelper.yml b/.github/workflows/CompatHelper.yml new file mode 100644 index 00000000..7ddad2a9 --- /dev/null +++ b/.github/workflows/CompatHelper.yml @@ -0,0 +1,45 @@ +name: CompatHelper +on: + schedule: + - cron: 0 0 * * * + workflow_dispatch: +permissions: + contents: write + pull-requests: write +jobs: + CompatHelper: + runs-on: ubuntu-latest + steps: + - name: Check if Julia is already available in the PATH + id: julia_in_path + run: which julia + continue-on-error: true + - name: Install Julia, but only if it is not already available in the PATH + uses: julia-actions/setup-julia@v1 + with: + version: '1' + # arch: ${{ runner.arch }} + if: steps.julia_in_path.outcome != 'success' + - name: "Add the General registry via Git" + run: | + import Pkg + ENV["JULIA_PKG_SERVER"] = "" + Pkg.Registry.add("General") + shell: julia --color=yes {0} + - name: "Install CompatHelper" + run: | + import Pkg + name = "CompatHelper" + uuid = "aa819f21-2bde-4658-8897-bab36330d9b7" + version = "3" + Pkg.add(; name, uuid, version) + shell: julia --color=yes {0} + - name: "Run CompatHelper" + run: | + import CompatHelper + CompatHelper.main() + shell: julia --color=yes {0} + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + COMPATHELPER_PRIV: ${{ secrets.DOCUMENTER_KEY }} + # COMPATHELPER_PRIV: ${{ secrets.COMPATHELPER_PRIV }} diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml index 778c06fe..ae8c9c1e 100644 --- a/.github/workflows/TagBot.yml +++ b/.github/workflows/TagBot.yml @@ -11,4 +11,5 @@ jobs: steps: - uses: JuliaRegistries/TagBot@v1 with: - token: ${{ secrets.GITHUB_TOKEN }} + ssh: ${{ secrets.DOCUMENTER_KEY }} + token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore index 1e2aa42c..3e1d467e 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ *.mem data/*.json Manifest.toml +test/jsonchecker/** +test/JSONTestSuite/** \ No newline at end of file diff --git a/LICENSE.md b/LICENSE.md index d916e61a..fd78a4c2 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -1,9 +1,8 @@ The Julia JSON package is licensed under the MIT Expat License: -> Copyright (c) 2002: JSON.org, 2012–2016: Avik Sengupta, Stefan Karpinski, +> Copyright (c) 2002: JSON.org, 2012–2025: Jacob Quinn, Avik Sengupta, Stefan Karpinski, > David de Laat, Dirk Gadsen, Milo Yip and other contributors > – https://github.com/JuliaLang/JSON.jl/contributors -> and https://github.com/miloyip/nativejson-benchmark/contributors > > Permission is hereby granted, free of charge, to any person obtaining > a copy of this software and associated documentation files (the diff --git a/Project.toml b/Project.toml index 33cabfbb..2a03bab6 100644 --- a/Project.toml +++ b/Project.toml @@ -1,24 +1,26 @@ name = "JSON" uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" -version = "0.21.4" +version = "1.0.0" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" +Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" Mmap = "a63ad114-7e13-5084-954f-fe012c677804" Parsers = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" +PrecompileTools = "aea7be01-6a6a-4083-8856-8a6e6704d82a" +StructUtils = "ec057cc2-7a8d-4b58-b3b3-92acb9f63b42" +UUIDs = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" -[extras] -DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" -Distributed = "8ba89e20-285c-5b6f-9357-94700520ee1b" -FixedPointNumbers = "53c48c17-4a7d-5ca2-90c5-79b7896eea93" -OffsetArrays = "6fe1bfb0-de20-5000-8ca7-80f57d26f881" -Sockets = "6462fe0b-24de-5631-8697-dd941f90decc" -Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" - [compat] -julia = "0.7, 1.0" Parsers = "1, 2" +PrecompileTools = "1" +StructUtils = "2" +julia = "1.9" + +[extras] +Tar = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" [targets] -test = ["DataStructures", "Distributed", "FixedPointNumbers", "OffsetArrays", "Sockets", "Test"] +test = ["Tar", "Test"] diff --git a/README.md b/README.md index 1a936054..add9c2e7 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # JSON.jl -This package provides for parsing and printing JSON in pure Julia. See also alternative packages [JSON3.jl](https://github.com/quinnj/JSON3.jl), and [Serde.jl](https://github.com/bhftbootcamp/Serde.jl). +This package provides for parsing and printing JSON in pure Julia. [![Build Status](https://github.com/JuliaIO/JSON.jl/workflows/CI/badge.svg)](https://github.com/JuliaIO/JSON.jl/actions/workflows/CI.yml?query=branch%3Amaster) [![codecov.io](http://codecov.io/github/JuliaIO/JSON.jl/coverage.svg?branch=master)](http://codecov.io/github/JuliaIO/JSON.jl?branch=master) @@ -9,332 +9,69 @@ This package provides for parsing and printing JSON in pure Julia. See also alte Type `] add JSON` and then hit ⏎ Return at the REPL. You should see `pkg> add JSON`. -## Basic Usage - -```julia -import JSON - -# JSON.parse - string or stream to Julia data structures -s = "{\"a_number\" : 5.0, \"an_array\" : [\"string\", 9]}" -j = JSON.parse(s) -# Dict{AbstractString,Any} with 2 entries: -# "an_array" => {"string",9} -# "a_number" => 5.0 - -# JSON.json - Julia data structures to a string -JSON.json([2,3]) -# "[2,3]" -JSON.json(j) -# "{\"an_array\":[\"string\",9],\"a_number\":5.0}" -``` - ## Documentation -```julia -JSON.print(io::IO, s::AbstractString) -JSON.print(io::IO, s::Union{Integer, AbstractFloat}) -JSON.print(io::IO, n::Nothing) -JSON.print(io::IO, b::Bool) -JSON.print(io::IO, a::AbstractDict) -JSON.print(io::IO, v::AbstractVector) -JSON.print(io::IO, v::Array) -``` - -Writes a compact (no extra whitespace or indentation) JSON representation -to the supplied IO. - -```julia -JSON.print(a::AbstractDict, indent) -JSON.print(io::IO, a::AbstractDict, indent) -``` - -Writes a JSON representation with newlines, and indentation if specified. Non-zero `indent` will be applied recursively to nested elements. - - -```julia -json(a::Any) -``` - -Returns a compact JSON representation as an `AbstractString`. - -```julia -JSON.parse(s::AbstractString; dicttype=Dict, inttype=Int64) -JSON.parse(io::IO; dicttype=Dict, inttype=Int64) -JSON.parsefile(filename::AbstractString; dicttype=Dict, inttype=Int64, use_mmap=true) -``` - -Parses a JSON `AbstractString` or IO stream into a nested `Array` or `Dict`. - -The `dicttype` indicates the dictionary type (`<: Associative`), or a function that -returns an instance of a dictionary type, -that JSON objects are parsed to. It defaults to `Dict` (the built-in Julia -dictionary), but a different type can be passed for additional functionality. -For example, if you `import DataStructures` -(assuming the [DataStructures -package](https://github.com/JuliaLang/DataStructures.jl) is -installed) - - - you can pass `dicttype=DataStructures.OrderedDict` to maintain the insertion order - of the items in the object; - - or you can pass `()->DefaultDict{String,Any}(Missing)` to having any non-found keys - return `missing` when you index the result. - - -The `inttype` argument controls how integers are parsed. If a number in a JSON -file is recognized to be an integer, it is parsed as one; otherwise it is parsed -as a `Float64`. The `inttype` defaults to `Int64`, but, for example, if you know -that your integer numbers are all small and want to save space, you can pass -`inttype=Int32`. Alternatively, if your JSON input has integers which are too large -for Int64, you can pass `inttype=Int128` or `inttype=BigInt`. `inttype` can be any -subtype of `Real`. - -```julia -JSONText(s::AbstractString) -``` -A wrapper around a Julia string representing JSON-formatted text, -which is inserted *as-is* in the JSON output of `JSON.print` and `JSON.json`. - -```julia -JSON.lower(p::Point2D) = [p.x, p.y] -``` - -Define a custom serialization rule for a particular data type. Must return a -value that can be directly serialized; see help for more details. - -### Customizing JSON - -Users may find the default behaviour of JSON inappropriate for their use case. In -such cases, JSON provides two mechanisms for users to customize serialization. The -first method, `JSON.Writer.StructuralContext`, is used to customize the cosmetic -properties of the serialized JSON. (For example, the default pretty printing vs. -compact printing is supported by provided two different `StructuralContext`s.) -Examples of applications for which `StructuralContext` is appropriate include: -particular formatting demands for JSON (maybe not in compliance with the JSON -standard) or JSON-like formats with different syntax. - -The second method, `JSON.Serializations.Serialization`, is used to control the -translation of Julia objects into JSON serialization instructions. In most cases, -writing a method for `JSON.lower` (as mentioned above) is sufficient to define -JSON serializations for user-defined objects. However, this is not appropriate for -overriding or deleting predefined serializations (since that would globally affect -users of the `JSON` module and is an instance of dangerous -[type piracy](https://docs.julialang.org/en/v1/manual/style-guide/index.html#Avoid-type-piracy-1)). -For these use-cases, users should define a custom instance of `Serialization`. -An example of an application for this use case includes: a commonly requested -extension to JSON which serializes float NaN and infinite values as `NaN` or `Inf`, -in contravention of the JSON standard. - -Both methods are controlled by the `JSON.show_json` function, which has the following -signature: - -``` -JSON.show_json(io::StructuralContext, serialization::Serialization, object) -``` +- [**STABLE**](https://juliaio.github.io/JSON.jl/stable) — **most recently tagged version of the documentation.** +- [**LATEST**](https://juliaio.github.io/JSON.jl/dev) — *in-development version of the documentation.* -which is expected to write to `io` in a way appropriate based on the rules of -`Serialization`, but here `io` is usually (but not required to be) handled in a -higher-level manner than a raw `IO` object would ordinarily be. +Documentation includes extensive guides and examples for reading, writing, and migrating from JSON.jl pre.10 or JSON3.jl. -#### StructuralContext - -To define a new `StructuralContext`, the following boilerplate is recommended: - -```julia -import JSON.Writer.StructuralContext -[mutable] struct MyContext <: StructuralContext - io::IO - [ ... additional state / settings for context goes here ... ] -end -``` - -If your structural context is going to be very similar to the existing JSON -contexts, it is also possible to instead subtype the abstract subtype -`JSONContext` of `StructuralContext`. If this is the case, an `io::IO` field (as -above) is preferred, although the default implementation will only use this -for `write`, so replacing that method is enough to avoid this requirement. - -The following methods should be defined for your context, regardless of whether it -subtypes `JSONContext` or `StructuralContext` directly. If some of these methods -are omitted, then `CommonSerialization` cannot be generally used with this context. - -``` -# called when the next object in a vector or next pair of a dict is to be written -# (requiring a newline and indent for some contexts) -# can do nothing if the context need not support indenting -JSON.Writer.indent(io::MyContext) - -# called for vectors/dicts to separate items, usually writes "," -# unless this is the first element in a JSON array -# (default implementation for JSONContext exists, but requires a mutable bool -# `first` field, and this is an implementation detail not to be relied on; -# to define own or delegate explicitly) -JSON.Writer.delimit(io::MyContext) - -# called for dicts to separate key and value, usually writes ": " -JSON.Writer.separate(io::MyContext) - -# called to indicate start and end of a vector -JSON.Writer.begin_array(io::MyContext) -JSON.Writer.end_array(io::MyContext) - -# called to indicate start and end of a dict -JSON.Writer.begin_object(io::MyContext) -JSON.Writer.end_object(io::MyContext) -``` - -For the following methods, `JSONContext` provides a default implementation, -but it can be specialized. For `StructuralContext`s which are not -`JSONContext`s, the `JSONContext` defaults are not appropriate and so are -not available. +## Basic Usage ```julia -# directly write a specific byte (if supported) -# default implementation writes to underlying `.io` field -# note that this enables JSONContext to act as any `io::IO`, -# i.e. one can use `print`, `show`, etc. -Base.write(io::MyContext, byte::UInt8) - -# write "null" -# default implementation writes to underlying `.io` field -JSON.Writer.show_null(io::MyContext) - -# write an object or string in a manner safe for JSON string -# default implementation calls `print` but escapes each byte as appropriate -# and adds double quotes around the content of `elt` -JSON.Writer.show_string(io::MyContext, elt) - -# write a new element of JSON array -# default implementation calls delimit, then indent, then show_json -JSON.Writer.show_element(io::MyContext, elt) - -# write a key for a JSON object -# default implementation calls delimit, then indent, then show_string, -# then separate -JSON.Writer.show_key(io::MyContext, elt) - -# write a key-value pair for a JSON object -# default implementation calls show key, then show_json -JSON.Writer.show_pair(io::MyContext, s::Serialization, key, value) -``` - -What follows is an example of a `JSONContext` subtype which is very similar -to the default context, but which uses `None` instead of `null` for JSON nulls, -which is then generally compatible with Python object literal notation (PYON). It -wraps a default `JSONContext` to delegate all the required methods to. Since -the wrapped context already has a `.io`, this object does not need to include -an `.io` field, and so the `write` method must also be delegated, since the default -is not appropriate. The only other specialization needed is `show_null`. +import JSON -```julia -import JSON.Writer -import JSON.Writer.JSONContext -mutable struct PYONContext <: JSONContext - underlying::JSONContext -end +# JSON.parse - JSON to Julia +json = """{"a": 1, "b": null, "c": [1, 2, 3]}""" -for delegate in [:indent, - :delimit, - :separate, - :begin_array, - :end_array, - :begin_object, - :end_object] - @eval JSON.Writer.$delegate(io::PYONContext) = JSON.Writer.$delegate(io.underlying) -end -Base.write(io::PYONContext, byte::UInt8) = write(io.underlying, byte) +# parse into default Julia types +j = JSON.parse(json) +# JSON.Object{String, Any} with 3 entries: +# "a" => 1 +# "b" => nothing +# "c" => Any[1, 2, 3] -JSON.Writer.show_null(io::PYONContext) = print(io, "None") -pyonprint(io::IO, obj) = let io = PYONContext(JSON.Writer.PrettyContext(io, 4)) - JSON.print(io, obj) - return +struct MyType + a::Int + b::Union{Nothing, String} + c::Vector{Int} end -``` - -The usage of this `pyonprint` function is as any other `print` function, e.g. - -```julia -julia> pyonprint(stdout, [1, 2, nothing]) -[ - 1, - 2, - None -] - -julia> sprint(pyonprint, missing) -"None" -``` -#### Serialization +# parse into a custom type +j = JSON.parse(json, MyType) +# MyType(1, nothing, [1, 2, 3]) -For cases where the JSON cosmetics are unimportant, but how objects are converted into their -JSON equivalents (arrays, objects, numbers, etc.) need to be changed, the appropriate -abstraction is `Serialization`. A `Serialization` instance is used as the second argument in -`show_json`. Thus, specializing `show_json` for custom `Serialization` instances enables -either creating more restrictive or different ways to convert objects into JSON. +# parse into existing container +dict = Dict{String, Any}() +JSON.parse!(json, dict) -The default serialization is called `JSON.Serializations.StandardSerialization`, which is a -subtype of `CommonSerialization`. Methods of `show_json` are not added to -`StandardSerialization`, but rather to `CommonSerialization`, by both `JSON` and by -other packages for their own types. The `lower` functionality is also specific to -`CommonSerialization`. Therefore, to create a serialization instance that inherits from and -may extend or override parts of the standard serialization, it suffices to define a new -serialization subtyping `CommonSerialization`. In the example below, the new serialization -is the same as `StandardSerialization` except that numbers are serialized with an additional -type tag. +# JSON.parsefile - JSON file to Julia +x = JSON.parsefile("test.json") -```julia -import JSON.Serializations: CommonSerialization, StandardSerialization -import JSON.Writer: StructuralContext, show_json -struct TaggedNumberSerialization <: CommonSerialization end - -tag(f::Real) = Dict(:type => string(typeof(f)), :value => f) -show_json(io::StructuralContext, - ::TaggedNumberSerialization, - f::Union{Integer, AbstractFloat}) = - show_json(io, StandardSerialization(), tag(f)) -``` +# JSON.json - Julia to JSON +JSON.json([2,3]) +# "[2,3]" -Note that the recursive call constructs a `StandardSerialization()`, as otherwise this would -result in a stack overflow, and serializes a `Dict` using that. In this toy example, this is -fine (with only the overhead of constructing a `Dict`), but this is not always possible. -(For instance, if the constructed `Dict` could have other numbers within its values that -need to be tagged.) +# Julia struct to JSON, pretty printed +JSON.json(stdout, j; pretty=true) +# { +# "a": 1, +# "b": null, +# "c": [ +# 1, +# 2, +# 3 +# ] +# } -To deal with these more complex cases, or simply to eliminate the overhead of constructing -the intermediate `Dict`, the `show_json` method can be implemented more carefully by -explicitly calling the context’s `begin_object`, `show_pair`, and `end_object` methods, as -documented above, and use the `StandardSerialization()` only for the `show_pair` call for -`f`. +# test that JSON is valid +JSON.isvalidjson(json) -```julia -# More careful implementation -# No difference in this case, but could be needed if recursive data structures are to be -# serialized in more complex cases. -import JSON.Writer: begin_object, show_pair, end_object -function show_json(io::StructuralContext, - s::TaggedNumberSerialization, - f::Union{Integer, AbstractFloat}) - begin_object(io) - show_pair(io, s, :tag => string(typeof(f))) - show_pair(io, StandardSerialization(), :value => f) - end_object(io) -end +# Write JSON to file +JSON.json("test.json", j) ``` -To use the custom serialization, `sprint` can be used (and this can be encapsulated by a -convenient user-defined interface): - -```julia -julia> JSON.parse(sprint(show_json, TaggedNumberSerialization(), Any[1, 2.0, "hi"])) -3-element Array{Any,1}: - Dict{String,Any}("value" => 1,"type" => "Int64") - Dict{String,Any}("value" => 2.0,"type" => "Float64") - "hi" -``` +## Contributing and Questions -If it is not desired to inherit all the functionality of `StandardSerialization`, users may -also choose to start from scratch by directly subtyping `JSON.Serializations.Serialization`. -This is useful if the user wishes to enforce a strict JSON which throws errors when -attempting to serialize objects that aren’t explicitly supported. Note that this means you -will need to define a method to support serializing any kind of object, including the -standard JSON objects like booleans, integers, strings, etc.! +Contributions are very welcome, as are feature requests and suggestions. Please open an +[issue][https://github.com/JuliaIO/JSON.jl/issues] if you encounter any problems or would just like to ask a question. diff --git a/bench/bench.jl b/bench/bench.jl deleted file mode 100644 index ee36d9b4..00000000 --- a/bench/bench.jl +++ /dev/null @@ -1,98 +0,0 @@ -#!/usr/bin/julia --color=yes - -using ArgParse -using JSON - - -function bench(f, flags, parsefile, simulate=false) - fp = joinpath(JSON_DATA_DIR, string(f, ".json")) - if !isfile(fp) - println("Downloading benchmark file...") - download(DATA_SOURCES[f], fp) - end - GC.gc() # run gc so it doesn't affect benchmarks - t = if parsefile - @elapsed JSON.parsefile(fp) - else - data = read(fp, String) - @elapsed JSON.Parser.parse(data) - end - - if !simulate - printstyled(" [Bench$flags] "; color=:yellow) - println(f, " ", t, " seconds") - end - t -end - - -const JSON_DATA_DIR = joinpath(dirname(dirname(@__FILE__)), "data") -const s = ArgParseSettings(description="Benchmark JSON.jl") - -const DATA_SOURCES = Dict( - "canada" => "https://raw.githubusercontent.com/miloyip/nativejson-benchmark/v1.0.0/data/canada.json", - "citm_catalog" => "https://raw.githubusercontent.com/miloyip/nativejson-benchmark/v1.0.0/data/citm_catalog.json", - "citylots" => "https://raw.githubusercontent.com/zemirco/sf-city-lots-json/master/citylots.json", - "twitter" => "https://raw.githubusercontent.com/miloyip/nativejson-benchmark/v1.0.0/data/twitter.json") - -function main() - @add_arg_table s begin - "parse" - action = :command - help = "Run a JSON parser benchmark" - "list" - action = :command - help = "List available JSON files for use" - end - - @add_arg_table s["parse"] begin - "--include-compile", "-c" - help = "If set, include the compile time in measurements" - action = :store_true - "--parse-file", "-f" - help = "If set, measure JSON.parsefile, hence including IO time" - action = :store_true - "file" - help = "The JSON file to benchmark (leave out to benchmark all)" - required = false - end - - args = parse_args(ARGS, s) - - if args["%COMMAND%"] == "parse" - include_compile = args["parse"]["include-compile"] - parsefile = args["parse"]["parse-file"] - - flags = string(include_compile ? "C" : "", - parsefile ? "F" : "") - - if args["parse"]["file"] ≠ nothing - file = args["parse"]["file"] - - if !include_compile - bench(file, flags, parsefile, true) - end - bench(file, flags, parsefile) - else - times = 1.0 - if include_compile - error("Option --include-compile can only be used for single file.") - end - for k in sort(collect(keys(DATA_SOURCES))) - bench(k, flags, parsefile, true) # warm up compiler - end - for k in sort(collect(keys(DATA_SOURCES))) - times *= bench(k, flags, parsefile) # do benchmark - end - printstyled(" [Bench$flags] ", color=:yellow) - println("Total (G.M.) ", times^(1/length(DATA_SOURCES)), " seconds") - end - elseif args["%COMMAND%"] == "list" - println("Available benchmarks are:") - for k in sort(collect(keys(DATA_SOURCES))) - println(" • $k") - end - end -end - -main() diff --git a/bench/bench.py b/bench/bench.py deleted file mode 100644 index 2408e197..00000000 --- a/bench/bench.py +++ /dev/null @@ -1,19 +0,0 @@ -from functools import reduce -from textwrap import dedent as dd -from timeit import repeat - - -sources = ["canada", "citm_catalog", "citylots", "twitter"] - -min_times = [] -for source in sources: - s = dd(f"""\ - with open("../data/{source}.json") as f: - json.load(f)""") - times = repeat(stmt=s, setup="import json", repeat=3, number=1) - t = reduce(min, times) - print(f"{source} {t:0.06f} seconds") - min_times.append(t) - -geo_mean = reduce(lambda a, b: a*b, min_times)**(1/len(min_times)) -print(f"Total (G.M): {geo_mean:0.06f}") diff --git a/bench/micro.jl b/bench/micro.jl deleted file mode 100644 index 44b452bb..00000000 --- a/bench/micro.jl +++ /dev/null @@ -1,56 +0,0 @@ -# JSON Microbenchmarks -# 0.6 required for running benchmarks - -using JSON -using BenchmarkTools -using Dates - -const suite = BenchmarkGroup() - -suite["print"] = BenchmarkGroup(["serialize"]) -suite["pretty-print"] = BenchmarkGroup(["serialize"]) - -struct CustomListType - x::Int - y::Float64 - z::Union{CustomListType, Nothing} -end - -struct CustomTreeType - x::String - y::Union{CustomTreeType, Nothing} - z::Union{CustomTreeType, Nothing} -end - -list(x) = x == 0 ? nothing : CustomListType(1, 1.0, list(x - 1)) -tree(x) = x == 0 ? nothing : CustomTreeType("!!!", tree(x - 1), tree(x - 1)) - -const micros = Dict( - "integer" => 88, - "float" => -88.8, - "ascii" => "Hello World!", - "ascii-1024" => "x" ^ 1024, - "unicode" => "ສະ​ບາຍ​ດີ​ຊາວ​ໂລກ!", - "unicode-1024" => "ℜ" ^ 1024, - "bool" => true, - "null" => nothing, - "flat-homogeneous-array-16" => collect(1:16), - "flat-homogeneous-array-1024" => collect(1:1024), - "heterogeneous-array" => [ - 1, 2, 3, 7, "A", "C", "E", "N", "Q", "R", "Shuttle to Grand Central"], - "nested-array-16^2" => [collect(1:16) for _ in 1:16], - "nested-array-16^3" => [[collect(1:16) for _ in 1:16] for _ in 1:16], - "small-dict" => Dict( - :a => :b, :c => "💙💙💙💙💙💙", :e => 10, :f => Dict(:a => :b)), - "flat-dict-128" => Dict(zip(collect(1:128), collect(1:128))), - "date" => Date(2016, 08, 09), - "matrix-16" => [i == j ? 1.0 : 0.0 for i in 1:16, j in 1:16], - "custom-list-128" => list(128), - "custom-tree-8" => tree(8)) - -for (k, v) in micros - io = IOBuffer() - suite["print"][k] = @benchmarkable JSON.print($(IOBuffer()), $v) - suite["pretty-print"][k] = @benchmarkable JSON.print( - $(IOBuffer()), $v, 4) -end diff --git a/benchmarks/benchmarks.jl b/benchmarks/benchmarks.jl new file mode 100644 index 00000000..09a7102c --- /dev/null +++ b/benchmarks/benchmarks.jl @@ -0,0 +1,93 @@ +using JSON, Chairmarks + +include(joinpath(dirname(pathof(JSON)), "../benchmarks/structs.jl")) +# test that compile time isn't unreasonable +@time JSON.parse(root_json, Root) + +struct A + a::Int + b::Int + c::Int + d::Int +end + +@b JSON.parse("""{ "a": 1,"b": 2,"c": 3,"d": 4}""") +@b JSON.json(A(1, 2, 3, 4)) +@b JSON.parse("""{ "a": 1,"b": 2,"c": 3,"d": 4}""", A) + +# integers with varying number of digits +@b JSON.parse("""[1,2234,323423423,4234234234234,23232,456454545,56767676,6767,6767,6767676,6767,6767,1,0,-123,-3333]""") +@b JSON.json([1,2234,323423423,4234234234234,23232,456454545,56767676,6767,6767,6767676,6767,6767,1,0,-123,-3333]) + +# floats +@b JSON.parse("""[1.123,2.345,3e21,-4e-5,5.1234567890123456789,6.1234567890123456789,7.1234567890123456789,8.1234567890123456789,9.1234567890123456789,1.23,3.14,3.43,34.32,-0.001,0.000023,0.123]""") +@b JSON.json([1.123,2.345,3e21,-4e-5,5.1234567890123456789,6.1234567890123456789,7.1234567890123456789,8.1234567890123456789,9.1234567890123456789,1.23,3.14,3.43,34.32,-0.001,0.000023,0.123]) + +# bools +@b JSON.parse("""[true,false,true,false,true,false,true,false,true,false,true,false,true,false,true,false]""") +@b JSON.json([true,false,true,false,true,false,true,false,true,false,true,false,true,false,true,false]) + +# nulls +@b JSON.parse("""[null,null,null,null,null,null,null,null,null,null,null,null,null,null,null,null]""") +@b JSON.json([nothing,nothing,nothing,nothing,nothing,nothing,nothing,nothing,nothing,nothing,nothing,nothing,nothing,nothing,nothing,nothing]) + +# strings +@b JSON.parse("""["1","ab","abc","abcd","abcde","abcdef","abcdef","abcdefg","abcdefgh","abcdefghi","abcdefghij","abcdefghijk","abcdefghijkl","abcdefghijklm","abcdefghijklmn","abcdefghijklmno"]""") +@b JSON.json(["1","ab","abc","abcd","abcde","abcdef","abcdef","abcdefg","abcdefgh","abcdefghi","abcdefghij","abcdefghijk","abcdefghijkl","abcdefghijklm","abcdefghijklmn","abcdefghijklmno"]) + +# strings with json-encoded unicode and escape sequences +@b JSON.parse("""["\\n","\\r","\\t","\\b","\\f","\\\\","\\\"","\\u1234","\\u5678","\\u9abc","\\u9abc","\\uABCD","\\u9abc","\\u1234","\\u5678","\\u9abc"]""") +@b JSON.json(["\\n","\\r","\\t","\\b","\\f","\\\\","\\\"","\\u1234","\\u5678","\\u9abc","\\u9abc","\\uABCD","\\u9abc","\\u1234","\\u5678","\\u9abc"]) + +# arrays +@b JSON.parse("""[[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16],[17,18,19,20],[21,22,23,24],[25,26,27,28],[29,30,31,32],[33,34,35,36],[37,38,39,40],[41,42,43,44],[45,46,47,48],[49,50,51,52],[53,54,55,56],[57,58,59,60],[61,62,63,64]]""") +@b JSON.json([[1,2,3,4],[5,6,7,8],[9,10,11,12],[13,14,15,16],[17,18,19,20],[21,22,23,24],[25,26,27,28],[29,30,31,32],[33,34,35,36],[37,38,39,40],[41,42,43,44],[45,46,47,48],[49,50,51,52],[53,54,55,56],[57,58,59,60],[61,62,63,64]]) + +# objects +@b JSON.parse("""{ "a": {"a": 1,"b": 2,"c": 3,"d": 4},"b": {"a": 5,"b": 6,"c": 7,"d": 8},"c": {"a": 9,"b": 10,"c": 11,"d": 12},"d": {"a": 13,"b": 14,"c": 15,"d": 16}}""") +@b JSON.json(Dict("a" => Dict("a" => 1,"b" => 2,"c" => 3,"d" => 4),"b" => Dict("a" => 5,"b" => 6,"c" => 7,"d" => 8),"c" => Dict("a" => 9,"b" => 10,"c" => 11,"d" => 12),"d" => Dict("a" => 13,"b" => 14,"c" => 15,"d" => 16))) + +# objects with more than 32 keys +@b JSON.parse("""{"a":1,"b":2,"c":3,"d":4,"e":5,"f":6,"g":7,"h":8,"i":9,"j":10,"k":11,"l":12,"m":13,"n":14,"o":15,"p":16,"q":17,"r":18,"s":19,"t":20,"u":21,"v":22,"w":23,"x":24,"y":25,"z":26,"aa":27,"ab":28,"ac":29,"ad":30,"ae":31,"af":32,"ag":33,"ah":34,"ai":35,"aj":36,"ak":37,"al":38,"am":39,"an":40,"ao":41,"ap":42,"aq":43,"ar":44,"as":45,"at":46,"au":47,"av":48,"aw":49,"ax":50,"ay":51,"az":52}""") +@b JSON.json(Dict("a" => 1,"b" => 2,"c" => 3,"d" => 4,"e" => 5,"f" => 6,"g" => 7,"h" => 8,"i" => 9,"j" => 10,"k" => 11,"l" => 12,"m" => 13,"n" => 14,"o" => 15,"p" => 16,"q" => 17,"r" => 18,"s" => 19,"t" => 20,"u" => 21,"v" => 22,"w" => 23,"x" => 24,"y" => 25,"z" => 26,"aa" => 27,"ab" => 28,"ac" => 29,"ad" => 30,"ae" => 31,"af" => 32,"ag" => 33,"ah" => 34,"ai" => 35,"aj" => 36,"ak" => 37,"al" => 38,"am" => 39,"an" => 40,"ao" => 41,"ap" => 42,"aq" => 43,"ar" => 44,"as" => 45,"at" =>46, "au"=>47, "av"=>48, "aw"=>49, "ax"=>50, "ay"=>51, "az"=>52)) + +# JSON.parse! with mutable struct +@noarg mutable struct B + a::Int + b::Int + c::Int + d::Int +end + +const b = B() +@b JSON.parse!("""{ "a": 1,"b": 2,"c": 3,"d": 4}""", b) + +const dict = Dict{String, Any}() +@b JSON.parse!("""{ "a": 1,"b": 2,"c": 3,"d": 4}""", dict) + +const ticketjson="{\"topic\":\"trade.BTCUSDT\",\"data\":[{\"symbol\":\"BTCUSDT\",\"tick_direction\":\"PlusTick\",\"price\":\"19431.00\",\"size\":0.2,\"timestamp\":\"2022-10-18T14:50:20.000Z\",\"trade_time_ms\":\"1666104620275\",\"side\":\"Buy\",\"trade_id\":\"e6be9409-2886-5eb6-bec9-de01e1ec6bf6\",\"is_block_trade\":\"false\"},{\"symbol\":\"BTCUSDT\",\"tick_direction\":\"MinusTick\",\"price\":\"19430.50\",\"size\":1.989,\"timestamp\":\"2022-10-18T14:50:20.000Z\",\"trade_time_ms\":\"1666104620299\",\"side\":\"Sell\",\"trade_id\":\"bb706542-5d3b-5e34-8767-c05ab4df7556\",\"is_block_trade\":\"false\"},{\"symbol\":\"BTCUSDT\",\"tick_direction\":\"ZeroMinusTick\",\"price\":\"19430.50\",\"size\":0.007,\"timestamp\":\"2022-10-18T14:50:20.000Z\",\"trade_time_ms\":\"1666104620314\",\"side\":\"Sell\",\"trade_id\":\"a143da10-3409-5383-b557-b93ceeba4ca8\",\"is_block_trade\":\"false\"},{\"symbol\":\"BTCUSDT\",\"tick_direction\":\"PlusTick\",\"price\":\"19431.00\",\"size\":0.001,\"timestamp\":\"2022-10-18T14:50:20.000Z\",\"trade_time_ms\":\"1666104620327\",\"side\":\"Buy\",\"trade_id\":\"7bae9053-e42b-52bd-92c5-6be8a4283525\",\"is_block_trade\":\"false\"}]}" + +struct Ticket + symbol::String + tick_direction::String + price::String + size::Float64 + timestamp::String + trade_time_ms::String + side::String + trade_id::String + is_block_trade::String +end + +struct Tape + topic::String + data::Vector{Ticket} +end + +@b JSON.parse(ticketjson) +@b JSON.parse(ticketjson, Tape) + +const ticket_obj = JSON.parse(ticketjson) +const ticket_struct = JSON.parse(ticketjson, Tape) +@b JSON.json(ticket_obj) +@b JSON.json(ticket_struct) \ No newline at end of file diff --git a/benchmarks/structs.jl b/benchmarks/structs.jl new file mode 100644 index 00000000..415cbf06 --- /dev/null +++ b/benchmarks/structs.jl @@ -0,0 +1,212 @@ +const root_json = """ +{ + "store": { + "book": [ + { + "id": 1, + "category": "reference", + "author": "Nigel Rees", + "title": "Sayings of the Century", + "price": 8.95, + "tags": ["classic", "quotes"], + "available": true, + "metadata": null + }, + { + "id": 2, + "category": "fiction", + "author": "Herman Melville", + "title": "Moby Dick", + "isbn": "0-553-21311-3", + "price": 8.99, + "tags": ["whale", "sea", "epic"], + "available": false, + "metadata": { + "pages": 635, + "language": "en", + "awards": [] + } + }, + { + "id": 3, + "category": "fiction", + "author": "J.R.R. Tolkien", + "title": "The Lord of the Rings", + "isbn": "0-395-19395-8", + "price": 22.99, + "tags": ["fantasy", "adventure"], + "available": true, + "metadata": { + "pages": 1216, + "language": "en", + "awards": ["Prometheus Hall of Fame"] + } + } + ], + "bicycle": { + "id": "bike123", + "color": "red", + "price": 19.95, + "features": { + "gears": 21, + "electric": false, + "dimensions": { + "length_cm": 180, + "height_cm": 110, + "weight_kg": 14.5 + } + } + }, + "warehouse": [ + { + "location": "North", + "inventory": { + "books": 1500, + "bicycles": 34, + "lastRestock": "2024-11-15T10:30:00Z", + "active": true + } + }, + { + "location": "South", + "inventory": { + "books": 980, + "bicycles": 12, + "lastRestock": null, + "active": false + } + } + ] + }, + "expensive": 10, + "config": { + "version": "1.2.3", + "featuresEnabled": ["wishlist", "reviews", "recommendations"], + "limits": { + "maxBooksPerUser": 20, + "maxSessions": 5, + "discounts": { + "student": 0.15, + "senior": 0.2 + } + }, + "debug": false + }, + "users": [ + { + "id": 1001, + "name": "Alice", + "email": "alice@example.com", + "lastLogin": "2025-03-27T16:45:00Z", + "preferences": { + "language": "en", + "currency": "USD", + "newsletter": true + } + }, + { + "id": 1002, + "name": "Bob", + "email": null, + "lastLogin": null, + "preferences": { + "language": "fr", + "currency": "EUR", + "newsletter": false + } + } + ] +} +""" +struct Preferences + language::String + currency::String + newsletter::Bool +end + +struct User + id::Int + name::String + email::Union{String, Nothing} + lastLogin::Union{String, Nothing} + preferences::Preferences +end + +struct Discounts + student::Float64 + senior::Float64 +end + +struct Limits + maxBooksPerUser::Int + maxSessions::Int + discounts::Discounts +end + +struct Config + version::String + featuresEnabled::Vector{String} + limits::Limits + debug::Bool +end + +struct Inventory + books::Int + bicycles::Int + lastRestock::Union{String, Nothing} + active::Bool +end + +struct Warehouse + location::String + inventory::Inventory +end + +struct BikeDimensions + length_cm::Int + height_cm::Int + weight_kg::Float64 +end + +struct BikeFeatures + gears::Int + electric::Bool + dimensions::BikeDimensions +end + +struct Bicycle + id::String + color::String + price::Float64 + features::BikeFeatures +end + +struct BookMetadata + pages::Int + language::String + awards::Vector{String} +end + +struct Book + id::Int + category::String + author::String + title::String + price::Float64 + tags::Vector{String} + available::Bool + metadata::Union{BookMetadata, Nothing} +end + +struct Store + book::Vector{Book} + bicycle::Bicycle + warehouse::Vector{Warehouse} +end + +struct Root + store::Store + expensive::Int + config::Config + users::Vector{User} +end diff --git a/data/jsonchecker/fail01.json b/data/jsonchecker/fail01.json deleted file mode 100644 index 92a451e3..00000000 --- a/data/jsonchecker/fail01.json +++ /dev/null @@ -1 +0,0 @@ -fable diff --git a/data/jsonchecker/fail02.json b/data/jsonchecker/fail02.json deleted file mode 100644 index 6b7c11e5..00000000 --- a/data/jsonchecker/fail02.json +++ /dev/null @@ -1 +0,0 @@ -["Unclosed array" \ No newline at end of file diff --git a/data/jsonchecker/fail03.json b/data/jsonchecker/fail03.json deleted file mode 100644 index 168c81eb..00000000 --- a/data/jsonchecker/fail03.json +++ /dev/null @@ -1 +0,0 @@ -{unquoted_key: "keys must be quoted"} \ No newline at end of file diff --git a/data/jsonchecker/fail04.json b/data/jsonchecker/fail04.json deleted file mode 100644 index 9de168bf..00000000 --- a/data/jsonchecker/fail04.json +++ /dev/null @@ -1 +0,0 @@ -["extra comma",] \ No newline at end of file diff --git a/data/jsonchecker/fail05.json b/data/jsonchecker/fail05.json deleted file mode 100644 index ddf3ce3d..00000000 --- a/data/jsonchecker/fail05.json +++ /dev/null @@ -1 +0,0 @@ -["double extra comma",,] \ No newline at end of file diff --git a/data/jsonchecker/fail06.json b/data/jsonchecker/fail06.json deleted file mode 100644 index ed91580e..00000000 --- a/data/jsonchecker/fail06.json +++ /dev/null @@ -1 +0,0 @@ -[ , "<-- missing value"] \ No newline at end of file diff --git a/data/jsonchecker/fail07.json b/data/jsonchecker/fail07.json deleted file mode 100644 index 8a96af3e..00000000 --- a/data/jsonchecker/fail07.json +++ /dev/null @@ -1 +0,0 @@ -["Comma after the close"], \ No newline at end of file diff --git a/data/jsonchecker/fail08.json b/data/jsonchecker/fail08.json deleted file mode 100644 index b28479c6..00000000 --- a/data/jsonchecker/fail08.json +++ /dev/null @@ -1 +0,0 @@ -["Extra close"]] \ No newline at end of file diff --git a/data/jsonchecker/fail09.json b/data/jsonchecker/fail09.json deleted file mode 100644 index 5815574f..00000000 --- a/data/jsonchecker/fail09.json +++ /dev/null @@ -1 +0,0 @@ -{"Extra comma": true,} \ No newline at end of file diff --git a/data/jsonchecker/fail10.json b/data/jsonchecker/fail10.json deleted file mode 100644 index 5d8c0047..00000000 --- a/data/jsonchecker/fail10.json +++ /dev/null @@ -1 +0,0 @@ -{"Extra value after close": true} "misplaced quoted value" \ No newline at end of file diff --git a/data/jsonchecker/fail11.json b/data/jsonchecker/fail11.json deleted file mode 100644 index 76eb95b4..00000000 --- a/data/jsonchecker/fail11.json +++ /dev/null @@ -1 +0,0 @@ -{"Illegal expression": 1 + 2} \ No newline at end of file diff --git a/data/jsonchecker/fail12.json b/data/jsonchecker/fail12.json deleted file mode 100644 index 77580a45..00000000 --- a/data/jsonchecker/fail12.json +++ /dev/null @@ -1 +0,0 @@ -{"Illegal invocation": alert()} \ No newline at end of file diff --git a/data/jsonchecker/fail13.json b/data/jsonchecker/fail13.json deleted file mode 100644 index 379406b5..00000000 --- a/data/jsonchecker/fail13.json +++ /dev/null @@ -1 +0,0 @@ -{"Numbers cannot have leading zeroes": 013} \ No newline at end of file diff --git a/data/jsonchecker/fail14.json b/data/jsonchecker/fail14.json deleted file mode 100644 index 0ed366b3..00000000 --- a/data/jsonchecker/fail14.json +++ /dev/null @@ -1 +0,0 @@ -{"Numbers cannot be hex": 0x14} \ No newline at end of file diff --git a/data/jsonchecker/fail15.json b/data/jsonchecker/fail15.json deleted file mode 100644 index fc8376b6..00000000 --- a/data/jsonchecker/fail15.json +++ /dev/null @@ -1 +0,0 @@ -["Illegal backslash escape: \x15"] \ No newline at end of file diff --git a/data/jsonchecker/fail16.json b/data/jsonchecker/fail16.json deleted file mode 100644 index 3fe21d4b..00000000 --- a/data/jsonchecker/fail16.json +++ /dev/null @@ -1 +0,0 @@ -[\naked] \ No newline at end of file diff --git a/data/jsonchecker/fail17.json b/data/jsonchecker/fail17.json deleted file mode 100644 index 62b9214a..00000000 --- a/data/jsonchecker/fail17.json +++ /dev/null @@ -1 +0,0 @@ -["Illegal backslash escape: \017"] \ No newline at end of file diff --git a/data/jsonchecker/fail18.json b/data/jsonchecker/fail18.json deleted file mode 100644 index bd7f1d64..00000000 --- a/data/jsonchecker/fail18.json +++ /dev/null @@ -1,2 +0,0 @@ -"mutliple" -"things" diff --git a/data/jsonchecker/fail19.json b/data/jsonchecker/fail19.json deleted file mode 100644 index 3b9c46fa..00000000 --- a/data/jsonchecker/fail19.json +++ /dev/null @@ -1 +0,0 @@ -{"Missing colon" null} \ No newline at end of file diff --git a/data/jsonchecker/fail20.json b/data/jsonchecker/fail20.json deleted file mode 100644 index 27c1af3e..00000000 --- a/data/jsonchecker/fail20.json +++ /dev/null @@ -1 +0,0 @@ -{"Double colon":: null} \ No newline at end of file diff --git a/data/jsonchecker/fail21.json b/data/jsonchecker/fail21.json deleted file mode 100644 index 62474573..00000000 --- a/data/jsonchecker/fail21.json +++ /dev/null @@ -1 +0,0 @@ -{"Comma instead of colon", null} \ No newline at end of file diff --git a/data/jsonchecker/fail22.json b/data/jsonchecker/fail22.json deleted file mode 100644 index a7752581..00000000 --- a/data/jsonchecker/fail22.json +++ /dev/null @@ -1 +0,0 @@ -["Colon instead of comma": false] \ No newline at end of file diff --git a/data/jsonchecker/fail23.json b/data/jsonchecker/fail23.json deleted file mode 100644 index 494add1c..00000000 --- a/data/jsonchecker/fail23.json +++ /dev/null @@ -1 +0,0 @@ -["Bad value", truth] \ No newline at end of file diff --git a/data/jsonchecker/fail24.json b/data/jsonchecker/fail24.json deleted file mode 100644 index caff239b..00000000 --- a/data/jsonchecker/fail24.json +++ /dev/null @@ -1 +0,0 @@ -['single quote'] \ No newline at end of file diff --git a/data/jsonchecker/fail25.json b/data/jsonchecker/fail25.json deleted file mode 100644 index 8b7ad23e..00000000 --- a/data/jsonchecker/fail25.json +++ /dev/null @@ -1 +0,0 @@ -[" tab character in string "] \ No newline at end of file diff --git a/data/jsonchecker/fail26.json b/data/jsonchecker/fail26.json deleted file mode 100644 index 845d26a6..00000000 --- a/data/jsonchecker/fail26.json +++ /dev/null @@ -1 +0,0 @@ -["tab\ character\ in\ string\ "] \ No newline at end of file diff --git a/data/jsonchecker/fail27.json b/data/jsonchecker/fail27.json deleted file mode 100644 index 6b01a2ca..00000000 --- a/data/jsonchecker/fail27.json +++ /dev/null @@ -1,2 +0,0 @@ -["line -break"] \ No newline at end of file diff --git a/data/jsonchecker/fail28.json b/data/jsonchecker/fail28.json deleted file mode 100644 index 621a0101..00000000 --- a/data/jsonchecker/fail28.json +++ /dev/null @@ -1,2 +0,0 @@ -["line\ -break"] \ No newline at end of file diff --git a/data/jsonchecker/fail29.json b/data/jsonchecker/fail29.json deleted file mode 100644 index 47ec421b..00000000 --- a/data/jsonchecker/fail29.json +++ /dev/null @@ -1 +0,0 @@ -[0e] \ No newline at end of file diff --git a/data/jsonchecker/fail30.json b/data/jsonchecker/fail30.json deleted file mode 100644 index 8ab0bc4b..00000000 --- a/data/jsonchecker/fail30.json +++ /dev/null @@ -1 +0,0 @@ -[0e+] \ No newline at end of file diff --git a/data/jsonchecker/fail31.json b/data/jsonchecker/fail31.json deleted file mode 100644 index 1cce602b..00000000 --- a/data/jsonchecker/fail31.json +++ /dev/null @@ -1 +0,0 @@ -[0e+-1] \ No newline at end of file diff --git a/data/jsonchecker/fail32.json b/data/jsonchecker/fail32.json deleted file mode 100644 index cb1f5607..00000000 --- a/data/jsonchecker/fail32.json +++ /dev/null @@ -1 +0,0 @@ -{"Comma instead of closing brace": true, diff --git a/data/jsonchecker/fail33.json b/data/jsonchecker/fail33.json deleted file mode 100644 index ca5eb19d..00000000 --- a/data/jsonchecker/fail33.json +++ /dev/null @@ -1 +0,0 @@ -["mismatch"} \ No newline at end of file diff --git a/data/jsonchecker/fail34.json b/data/jsonchecker/fail34.json deleted file mode 100644 index 7ce16bd5..00000000 --- a/data/jsonchecker/fail34.json +++ /dev/null @@ -1 +0,0 @@ -{"garbage" before : "separator"} diff --git a/data/jsonchecker/fail35.json b/data/jsonchecker/fail35.json deleted file mode 100644 index 7a469732..00000000 --- a/data/jsonchecker/fail35.json +++ /dev/null @@ -1 +0,0 @@ -{"no separator" diff --git a/data/jsonchecker/fail36.json b/data/jsonchecker/fail36.json deleted file mode 100644 index bf084005..00000000 --- a/data/jsonchecker/fail36.json +++ /dev/null @@ -1 +0,0 @@ -{"no closing brace": true diff --git a/data/jsonchecker/fail37.json b/data/jsonchecker/fail37.json deleted file mode 100644 index 558ed37d..00000000 --- a/data/jsonchecker/fail37.json +++ /dev/null @@ -1 +0,0 @@ -[ diff --git a/data/jsonchecker/fail38.json b/data/jsonchecker/fail38.json deleted file mode 100644 index 98232c64..00000000 --- a/data/jsonchecker/fail38.json +++ /dev/null @@ -1 +0,0 @@ -{ diff --git a/data/jsonchecker/pass01.json b/data/jsonchecker/pass01.json deleted file mode 100644 index 2c10f226..00000000 --- a/data/jsonchecker/pass01.json +++ /dev/null @@ -1,58 +0,0 @@ -[ - "JSON Test Pattern pass1", - {"object with 1 member":["array with 1 element"]}, - {}, - [], - -42, - true, - false, - null, - { - "integer": 1234567890, - "real": -9876.543210, - "e": 0.123456789e-12, - "E": 1.234567890E+34, - "": 23456789012E66, - "zero": 0, - "one": 1, - "space": " ", - "quote": "\"", - "backslash": "\\", - "controls": "\b\f\n\r\t", - "slash": "/ & \/", - "alpha": "abcdefghijklmnopqrstuvwyz", - "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ", - "digit": "0123456789", - "0123456789": "digit", - "special": "`1~!@#$%^&*()_+-={':[,]}|;.?", - "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A", - "true": true, - "false": false, - "null": null, - "array":[ ], - "object":{ }, - "address": "50 St. James Street", - "url": "http://www.JSON.org/", - "comment": "// /* */": " ", - " s p a c e d " :[1,2 , 3 - -, - -4 , 5 , 6 ,7 ],"compact":[1,2,3,4,5,6,7], - "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}", - "quotes": "" \u0022 %22 0x22 034 "", - "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?" -: "A key can be any string" - }, - 0.5 ,98.6 -, -99.44 -, - -1066, -1e1, -0.1e1, -1e-1, -1e00,2e+00,2e-00 -,"rosebud"] diff --git a/data/jsonchecker/pass02.json b/data/jsonchecker/pass02.json deleted file mode 100644 index fea57100..00000000 --- a/data/jsonchecker/pass02.json +++ /dev/null @@ -1 +0,0 @@ -[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]] diff --git a/data/jsonchecker/pass03.json b/data/jsonchecker/pass03.json deleted file mode 100644 index 4528d51f..00000000 --- a/data/jsonchecker/pass03.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "JSON Test Pattern pass3": { - "The outermost value": "must be an object or array.", - "In this test": "It is an object." - } -} diff --git a/data/jsonchecker/readme.txt b/data/jsonchecker/readme.txt deleted file mode 100644 index 321d89d9..00000000 --- a/data/jsonchecker/readme.txt +++ /dev/null @@ -1,3 +0,0 @@ -Test suite from http://json.org/JSON_checker/. - -If the JSON_checker is working correctly, it must accept all of the pass*.json files and reject all of the fail*.json files. diff --git a/data/roundtrip/roundtrip01.json b/data/roundtrip/roundtrip01.json deleted file mode 100644 index 500db4a8..00000000 --- a/data/roundtrip/roundtrip01.json +++ /dev/null @@ -1 +0,0 @@ -[null] \ No newline at end of file diff --git a/data/roundtrip/roundtrip02.json b/data/roundtrip/roundtrip02.json deleted file mode 100644 index de601e30..00000000 --- a/data/roundtrip/roundtrip02.json +++ /dev/null @@ -1 +0,0 @@ -[true] \ No newline at end of file diff --git a/data/roundtrip/roundtrip03.json b/data/roundtrip/roundtrip03.json deleted file mode 100644 index 67b2f076..00000000 --- a/data/roundtrip/roundtrip03.json +++ /dev/null @@ -1 +0,0 @@ -[false] \ No newline at end of file diff --git a/data/roundtrip/roundtrip04.json b/data/roundtrip/roundtrip04.json deleted file mode 100644 index 6e7ea636..00000000 --- a/data/roundtrip/roundtrip04.json +++ /dev/null @@ -1 +0,0 @@ -[0] \ No newline at end of file diff --git a/data/roundtrip/roundtrip05.json b/data/roundtrip/roundtrip05.json deleted file mode 100644 index 6dfd2983..00000000 --- a/data/roundtrip/roundtrip05.json +++ /dev/null @@ -1 +0,0 @@ -["foo"] \ No newline at end of file diff --git a/data/roundtrip/roundtrip06.json b/data/roundtrip/roundtrip06.json deleted file mode 100644 index 0637a088..00000000 --- a/data/roundtrip/roundtrip06.json +++ /dev/null @@ -1 +0,0 @@ -[] \ No newline at end of file diff --git a/data/roundtrip/roundtrip07.json b/data/roundtrip/roundtrip07.json deleted file mode 100644 index 9e26dfee..00000000 --- a/data/roundtrip/roundtrip07.json +++ /dev/null @@ -1 +0,0 @@ -{} \ No newline at end of file diff --git a/data/roundtrip/roundtrip08.json b/data/roundtrip/roundtrip08.json deleted file mode 100644 index bfa34124..00000000 --- a/data/roundtrip/roundtrip08.json +++ /dev/null @@ -1 +0,0 @@ -[0,1] \ No newline at end of file diff --git a/data/roundtrip/roundtrip09.json b/data/roundtrip/roundtrip09.json deleted file mode 100644 index 9f5dd4e3..00000000 --- a/data/roundtrip/roundtrip09.json +++ /dev/null @@ -1 +0,0 @@ -{"foo":"bar"} \ No newline at end of file diff --git a/data/roundtrip/roundtrip10.json b/data/roundtrip/roundtrip10.json deleted file mode 100644 index 2355b4df..00000000 --- a/data/roundtrip/roundtrip10.json +++ /dev/null @@ -1 +0,0 @@ -{"a":null,"foo":"bar"} \ No newline at end of file diff --git a/data/roundtrip/roundtrip11.json b/data/roundtrip/roundtrip11.json deleted file mode 100644 index 99d21a2a..00000000 --- a/data/roundtrip/roundtrip11.json +++ /dev/null @@ -1 +0,0 @@ -[-1] \ No newline at end of file diff --git a/data/roundtrip/roundtrip12.json b/data/roundtrip/roundtrip12.json deleted file mode 100644 index 56c78bef..00000000 --- a/data/roundtrip/roundtrip12.json +++ /dev/null @@ -1 +0,0 @@ -[-2147483648] \ No newline at end of file diff --git a/data/roundtrip/roundtrip13.json b/data/roundtrip/roundtrip13.json deleted file mode 100644 index 029580f6..00000000 --- a/data/roundtrip/roundtrip13.json +++ /dev/null @@ -1 +0,0 @@ -[-1234567890123456789] \ No newline at end of file diff --git a/data/roundtrip/roundtrip14.json b/data/roundtrip/roundtrip14.json deleted file mode 100644 index d8658000..00000000 --- a/data/roundtrip/roundtrip14.json +++ /dev/null @@ -1 +0,0 @@ -[-9223372036854775808] \ No newline at end of file diff --git a/data/roundtrip/roundtrip15.json b/data/roundtrip/roundtrip15.json deleted file mode 100644 index bace2a0b..00000000 --- a/data/roundtrip/roundtrip15.json +++ /dev/null @@ -1 +0,0 @@ -[1] \ No newline at end of file diff --git a/data/roundtrip/roundtrip16.json b/data/roundtrip/roundtrip16.json deleted file mode 100644 index dfe696db..00000000 --- a/data/roundtrip/roundtrip16.json +++ /dev/null @@ -1 +0,0 @@ -[2147483647] \ No newline at end of file diff --git a/data/roundtrip/roundtrip17.json b/data/roundtrip/roundtrip17.json deleted file mode 100644 index 6640b07f..00000000 --- a/data/roundtrip/roundtrip17.json +++ /dev/null @@ -1 +0,0 @@ -[4294967295] \ No newline at end of file diff --git a/data/roundtrip/roundtrip18.json b/data/roundtrip/roundtrip18.json deleted file mode 100644 index a3ab143b..00000000 --- a/data/roundtrip/roundtrip18.json +++ /dev/null @@ -1 +0,0 @@ -[1234567890123456789] \ No newline at end of file diff --git a/data/roundtrip/roundtrip19.json b/data/roundtrip/roundtrip19.json deleted file mode 100644 index 8ab4a507..00000000 --- a/data/roundtrip/roundtrip19.json +++ /dev/null @@ -1 +0,0 @@ -[9223372036854775807] \ No newline at end of file diff --git a/data/roundtrip/roundtrip20.json b/data/roundtrip/roundtrip20.json deleted file mode 100644 index 92df1df1..00000000 --- a/data/roundtrip/roundtrip20.json +++ /dev/null @@ -1 +0,0 @@ -[0.0] \ No newline at end of file diff --git a/data/roundtrip/roundtrip21.json b/data/roundtrip/roundtrip21.json deleted file mode 100644 index cfef8154..00000000 --- a/data/roundtrip/roundtrip21.json +++ /dev/null @@ -1 +0,0 @@ -[-0.0] \ No newline at end of file diff --git a/data/roundtrip/roundtrip22.json b/data/roundtrip/roundtrip22.json deleted file mode 100644 index a7b7eefc..00000000 --- a/data/roundtrip/roundtrip22.json +++ /dev/null @@ -1 +0,0 @@ -[1.2345] \ No newline at end of file diff --git a/data/roundtrip/roundtrip23.json b/data/roundtrip/roundtrip23.json deleted file mode 100644 index b553e84b..00000000 --- a/data/roundtrip/roundtrip23.json +++ /dev/null @@ -1 +0,0 @@ -[-1.2345] \ No newline at end of file diff --git a/data/roundtrip/roundtrip24.json b/data/roundtrip/roundtrip24.json deleted file mode 100644 index f01efb6d..00000000 --- a/data/roundtrip/roundtrip24.json +++ /dev/null @@ -1 +0,0 @@ -[5e-324] \ No newline at end of file diff --git a/data/roundtrip/roundtrip25.json b/data/roundtrip/roundtrip25.json deleted file mode 100644 index cdef14d3..00000000 --- a/data/roundtrip/roundtrip25.json +++ /dev/null @@ -1 +0,0 @@ -[2.225073858507201e-308] \ No newline at end of file diff --git a/data/roundtrip/roundtrip26.json b/data/roundtrip/roundtrip26.json deleted file mode 100644 index f4121b78..00000000 --- a/data/roundtrip/roundtrip26.json +++ /dev/null @@ -1 +0,0 @@ -[2.2250738585072014e-308] \ No newline at end of file diff --git a/data/roundtrip/roundtrip27.json b/data/roundtrip/roundtrip27.json deleted file mode 100644 index 17ce5211..00000000 --- a/data/roundtrip/roundtrip27.json +++ /dev/null @@ -1 +0,0 @@ -[1.7976931348623157e308] \ No newline at end of file diff --git a/docs/Project.toml b/docs/Project.toml new file mode 100644 index 00000000..1814eb33 --- /dev/null +++ b/docs/Project.toml @@ -0,0 +1,5 @@ +[deps] +Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" + +[compat] +Documenter = "1" diff --git a/docs/make.jl b/docs/make.jl new file mode 100644 index 00000000..a95f09ae --- /dev/null +++ b/docs/make.jl @@ -0,0 +1,5 @@ +using Documenter, JSON + +makedocs(modules = [JSON], sitename = "JSON.jl") + +deploydocs(repo = "github.com/JuliaIO/JSON.jl.git", push_preview = true) diff --git a/docs/src/index.md b/docs/src/index.md new file mode 100644 index 00000000..ee593a2e --- /dev/null +++ b/docs/src/index.md @@ -0,0 +1,7 @@ +# JSON.jl + +JSON Julia package repo. + +```@contents +``` + diff --git a/docs/src/migrate.md b/docs/src/migrate.md new file mode 100644 index 00000000..d5e3a5a3 --- /dev/null +++ b/docs/src/migrate.md @@ -0,0 +1,236 @@ +# Migration guides + +This guide provides an overview of how to migrate your code from either the pre-1.0 JSON.jl package to the 1.0 release or from JSON3.jl. The 1.0 release introduces several improvements and changes, particularly in how JSON is read and written, leveraging StructUtils.jl for customization and extensibility. Below, we outline the key differences and provide step-by-step instructions for updating your code. + +--- + +## Migration guide from pre-1.0 -> 1.0 + +### Writing JSON +- `JSON.json` + - What stayed the same: + - Produces a compact String by default + - Can automatically serialize basic structs in a sensible way + - Can take an integer 2nd argument to induce "pretty printing" of the JSON output + - What changed: + - Can now pass `JSON.json(x; pretty=true)` or `JSON.json(x; pretty=4)` to control pretty printing + - Can pass filename as first argument to write JSON directly to a file `JSON.json(file, x)` the file name is returned + - Can pass any `IO` as 1st argument to write JSON to it: `JSON.json(io, x)` + - Circular reference tracking is fixed/improved (previously peer references were written as null) + - Explicit keyword arguments to control a number of serialization features, including: + - `omit_null::Bool` whether `nothing`/`missing` Julia values should be skipped when serializing + - `omit_empty::Bool` whether empty Julia collection values should be skipped when serializing + - `allownan::Bool` similar to the parsing keyword argument to allow/disallow writing of invalid JSON values `NaN`, `-Inf`, and `Inf` + - `ninf::String` the string to write if `allownan=true` and serializing `-Inf` + - `inf::String` the string to write if `allownan=true` and serializing `Inf` + - `nan::String` the string to write if `allownan=true` and serializing `NaN` + - `jsonlines::String` when serializing an array, write each element independently on a new line as an implicit array; can be read back when parsing by also passing `jsonlines=true` + - `inline_limit::Int` threshold number of elements in an array under which an array should be printed on a single line (only applicable when pretty printing) + - `float_style::Symbol` allowed values are `:shortest`, `:fixed`, and `:exp` corresponding to printf format styles, `%g`, `%f`, and `%e`, respectively + - `float_precision::Int` number of decimal places to use when printing floats + - Why the changes: + - Mostly just modernizing the interfaces (use of keyword arguments vs. positional) + - Utilizing multiple dispatch to combine `JSON.print` and `JSON.json` and provide convenience for writing to files + - Most opened issues over the last few years were about providing more controls around writing JSON without having to completely implement a custom serializer + - More consistency with `JSON.parse` keyword args with `allownan` and `jsonlines` +- `JSON.print` + - What stayed the same: + - Technically still defined for backwards compatibility, but just calls `JSON.json` under the hood + - Why the changes: + - Not necessary as all the functionality can be combined without ambiguity or overlap with `JSON.json` +- `JSON.lower` + - What stayed the same: + - Still used to transform Julia values into JSON-appropriate values + - What changed: + - `lower` technically now lives in the StructUtils.jl package (though overloading in JSON is fine) + - Can overload for a specific "style" for non-owned types, like `struct MyStyle <: JSON.JSONStyle end`, then `JSON.lower(::MyStyle, x::Rational) = (den=x.den, num=x.num)`, then have the style used when writing like `JSON.json(1//3; style=MyStyle())` + - Probably don't need to `lower` except in rare cases; there are default `lower` defintions for common types and most structs/AbstractDict/AbstractArray will work out of the box; `lower` is mostly useful when wanting to have the JSON output of a struct be a string or a number, for example, so going between aggregate/non-aggregate from Julia to JSON + - Why the changes: + - Along with the new corresponding `lift` interface, the `lower` + `lift` combination is a powerful generalization of doing "domain transformations" +- `JSON.StructuralContext` / `JSON.show_json` / `JSON.Serialization` + - What stayed the same: + - These have been removed in favor of simpler interfaces and custom `JSONStyle` subtypes + overloads + - Why the changes: + - The use of distinct contexts for different writing styles (pretty, compact) is unnecessary and led to code duplication + - There was often confusion about whether a custom Serialization or StructuralContext was needed and what intefaces were then required to implement + - The need to customize separators, delimiters, and indentation, while powerful, can be accomplished much simpler via keyword arguments or is not necessary at all (i.e. JSON.jl shouldn't be too concerned with how to produce anything that isn't JSON) + - Instead of overloading show_string/show_element/show_key/show_pair/show_json, `lower` can be used to accomplish any requirements of "overloading" how values are serialized; the addition of "styles" also allows customizing for non-owned types instead of needing a custom context + `show_json` method +- `JSONText` + - What changed: + - Nothing; `JSONText` can still be used to have a JSON-formatted string be written as-is when serializing + +### Reading JSON +- `JSON.parse` / `JSON.parsefile` + - What stayed the same: + - These functions take the same JSON input arguments (String, IO, or filename for `parsefile`) + - The `dicttype`, `allownan`, and `null` keyword arguments all remain and implement the same functionality + - What changed: + - `JSON.Object{String, Any}` is now the default type used when parsing instead of `Dict{String, Any}`; `JSON.Object` is a drop-in replacement for `Dict`, supporting the `AbstractDict` interface, mutation, dot-access (getproperty) to keys, memory and performance benefits for small objects vs. `Dict`, and preserves the JSON order of keys. For large objects (hundreds or thousands of keys), or to otherwise restore the pre-1.0 behavior, you can do `JSON.parse(json; dicttype=Dict{String, Any})`. + - The `inttype` keyword argument has been removed + - The `allownan` keyword argument now defaults to `false` instead of `true` to provide a more accurate JSON specification behavior as the default + - The `use_mmap` keyword argument has been removed from `parsefile`; mmapping will now be decided automatically by the package and any mmaps used for parsing will be completely finalized when parsing has finished + - Numbers in JSON will now be parsed as `Int64`, `BigInt`, `Float64`, or `BigFloat`, instead of only `Int64` or `Float64`. Many JSON libraries support arbitrary precision ints/floats, and now JSON.jl does too. + - `JSON.parse(json, T)` and `JSON.parse!(json, x)` variants have been added for constructing a Julia value from JSON, or mutating an existing Julia value from JSON; `JSON.parsefile(json, T)` and `JSON.parsefile!(json, x)` are also supported; see [JSON Reading](@ref) for more details + - Why the changes: + - The `inttype` keyword argument is rare among other JSON libraries and doesn't serve a strong purpose; memory gains from possibly using smaller ints is minimal and leads to more error-prone code via overflows by trying to force integers into non-standard small types + - For the `allownan` default value change, there are many benchmarks/JSON-accuracy checking test suites that enforce adherance to the specification; following the specification by default is recommended and common across language JSON libraries + - Mmapping is an internal detail that most users shouldn't worry about anyway, and it can be done transparently without any outside affect to the user +- `JSONText` + - `JSONText` can now also be used while parsing, as a field type of a struct or directly to return the raw JSON (similar to how writing with `JSONText` works) + +## Migration guide for JSON3.jl + +The JSON.jl 1.0 release incorporates many of the design ideas that were originally developed in JSON3.jl. This guide helps you transition your code from JSON3.jl to JSON.jl 1.0, highlighting what's changed, what's similar, and the best way to update your code. + +### Writing JSON + +- `JSON3.write` → `JSON.json` + - What stayed the same: + - The core functionality of serializing Julia values to JSON remains the same + - Support for serializing custom structs in a sensible way + - Both can output to a string or IO + - What changed: + - Function name: `JSON3.write` becomes `JSON.json` + - Direct file writing: Instead of `open(file, "w") do io; JSON3.write(io, x); end`, you can use `JSON.json(file, x)` + - Customization framework: JSON3.jl uses StructTypes.jl, while JSON.jl 1.0 uses StructUtils.jl + - Pretty printing: `JSON3.pretty(JSON3.write(x))` becomes `JSON.json(x; pretty=true)` + - Special numeric values: In JSON3.jl, writing NaN/Inf/-Inf required passing `allow_inf=true`, in JSON.jl 1.0 you pass `allownan=true` + - Why the changes: + - Preference was given to existing JSON.jl names where possible (`JSON.json`, `allownan`, etc) + - JSON3 pretty printing support was an example of "bolted on" functionality that had a number of issues because it tried to operate on its own; in `JSON.json`, pretty printing is directly integrated with the core serializing code and thus doesn't suffer the same ergonomic problems + - StructUtils.jl is overall simpler and provides much more functionality "by default" meaning its much more invisible for majority of use-cases. Its design is the direct result of wanting to provide roughly similar functionality as StructTypes.jl but avoiding the pitfalls and architectural complexities it had + +- Custom Type Serialization + - What stayed the same: + - Both provide a way to customize how types are serialized + - Both support serializing custom types to any valid JSON value + - What changed: + - Interface: No need to declare `StructTypes.StructType` explicitly on structs (StructUtils.jl can detect the vast majority of struct types automatically) + - Non-owned types: JSON.jl (via StructUtils) provides the concept of defining a custom `StructStyle` subtype that allows customizing the lowering/lifting overloads of non-owned types (JSON3.jl had repeated requests/issues with users wanting more control over non-owned types without pirating) + - Why the changes: + - As noted above, the overall design of StructUtils is simpler and more automatic, with the default definitions working in the vast majority of cases. If you're the author of a custom Number, AbstractString, AbstractArray, or AbstractDict, you may need to dig further into StructUtil machinery to make your types serialize/deserialize as expected, but regular structs should "just work" + - Defining custom styles is meant to balance having to do some extra work (defining the style, passing it to `JSON.json`/`JSON.parse`) with the power and flexibility of control over how JSON serialization/deserialization work for any type, owned or not + +- Field Customization + - What stayed the same: + - Both allow renaming fields, excluding fields, and some control over manipulating fields from JSON output (keyword args, dateformats, etc.) + - What changed: + - StructUtils provides convenient "struct" macros (`@noarg`, `@kwarg`, `@tags`, `@defaults`) that allow defining default values for fields, and specifying "field tags" which are named tuples of properties for fields. Via field tags, fields can customize naming, ignoring/excluding, dateformating, custom lowering/lifting, and even "type choosing" for abstract types. + - Why the changes: + - The field tags and defaults of StructUtils provide very powerful and generalized abilities to specify properties for fields. These are integrated directly with the serialize/deserialize process of StructUtils and provide a seemless way to enhance and control fields as desired. Instead of providing numerous StructType overloads, we can annotate individual fields appropriately, keeping context and information tidy and close to the source. + +- Null and Empty Value Handling + - What stayed the same: + - Both allow control over including/omitting null values and empty collections + - What changed: + - Control mechanism: JSON3.jl uses `StructTypes.omitempties`, JSON.jl 1.0 uses keyword arguments `omit_null` and `omit_empty`; or struct-level overloads or annotations to control omission + +### Reading JSON + +- `JSON3.read` → `JSON.parse` / `JSON.lazy` + - What stayed the same: + - Core functionality of parsing JSON into Julia values + - Support for typed parsing into custom structs + - Lazy parsing features + - What changed: + - Function names: `JSON3.read` becomes either `JSON.parse` (eager) or `JSON.lazy` (lazy) + - Default container type: `JSON3.Object/JSON3.Array` becomes `JSON.Object{String, Any}/Vector{Any}` + - Type integration: JSON3.jl uses StructTypes.jl, JSON.jl 1.0 uses StructUtils.jl + - Lazy value access: Both use property access syntax (`obj.field`) but with slightly different semantics + - Migration examples: + ```julia + # JSON3.jl + obj = JSON3.read(json_str) + typed_obj = JSON3.read(json_str, MyType) + + # JSON.jl 1.0 + obj = JSON.parse(json_str) # eager parsing + lazy_obj = JSON.lazy(json_str) # lazy parsing + materialized = lazy_obj[] # materialize lazy value + typed_obj = JSON.parse(json_str, MyType) # typed parsing + ``` + +- Lazy Parsing + - What stayed the same: + - Both support lazy parsing for efficient access to parts of large JSON documents + - Both allow dot notation for accessing object fields + - What changed: + - Object types: `JSON3.Object` becomes `JSON.LazyValue` with object type + - Array indexing: Similar, but slight syntax differences for materializing values + - Materialization: In JSON3.jl specific values materialize when accessed, in JSON.jl 1.0 you explicitly use `[]` to materialize + - Migration examples: + ```julia + # JSON3.jl + obj = JSON3.read(json_str) + value = obj.deeply.nested.field # value is materialized + + # JSON.jl 1.0 + obj = JSON.lazy(json_str) + lazy_value = obj.deeply.nested.field # still lazy + value = obj.deeply.nested.field[] # now materialized + ``` + - Why the changes: + - The lazy support in JSON.jl is truly lazy and the underlying JSON is only parsed/navigated as explicitly requested. JSON3.jl still fully parsed the JSON into a fairly compact binary representation, avoiding full materialization of objects and arrays. + +- Typed Parsing + - What stayed the same: + - Both allow parsing directly into custom types + - Both support object mapping, handling nested types, unions with Nothing/Missing + - What changed: + - Interface: `StructTypes.StructType` becomes JSON.jl's StructUtils integration + - Default values: `StructTypes.defaults` becomes `@defaults` macro + - Type selection: Custom JSON3 dispatching becomes `JSON.@choosetype` + - Migration examples: + ```julia + # JSON3.jl + StructTypes.StructType(::Type{MyType}) = StructTypes.Struct() + StructTypes.defaults(::Type{MyType}) = (field1=0, field2="default") + + # Type selection in JSON3.jl + StructTypes.StructType(::Type{AbstractParent}) = StructTypes.AbstractType() + StructTypes.subtypes(::Type{AbstractParent}) = (a=ConcreteA, b=ConcreteB) + + # JSON.jl 1.0 + @defaults struct MyType + field1::Int = 0 + field2::String = "default" + end + + # Type selection in JSON.jl 1.0 + JSON.@choosetype AbstractParent x -> x.type[] == "a" ? ConcreteA : ConcreteB + ``` + +- Custom Field Mapping + - What stayed the same: + - Both support mapping between JSON property names and struct field names + - Both handle date formatting and other special types + - What changed: + - Interface: JSON3.jl uses `StructTypes.names`, JSON.jl 1.0 uses field tags + - Date handling: Different formats for specifying date formats + - Migration examples: + ```julia + # JSON3.jl + StructTypes.names(::Type{MyType}) = ((:json_name, :struct_field),) + StructTypes.keywordargs(::Type{MyType}) = (date_field=(dateformat=dateformat"yyyy-mm-dd",),) + + # JSON.jl 1.0 + @tags struct MyType + struct_field::Int &(json=(name="json_name",),) + date_field::Date &(json=(dateformat="yyyy-mm-dd",),) + end + ``` + +### Features unique to each library + +- Only in JSON3.jl: + - **Struct Generation**: The ability to automatically generate Julia struct definitions from JSON examples + ```julia + # This functionality is not available in JSON.jl 1.0 + struct_def = JSON3.generate_struct(json_data, "MyStruct") + ``` + - If you rely heavily on this feature, continue using JSON3.jl for this specific purpose until this functionality is migrated to a separate package + +- Only in JSON.jl 1.0: + - **Enhanced JSON Lines Support**: Better handling of JSON Lines format with auto-detection for files with `.jsonl` extension + - **More Float Formatting Controls**: Additional options for float precision and format style + - **Improved Circular Reference Handling**: Better detection and handling of circular references diff --git a/docs/src/reading.md b/docs/src/reading.md new file mode 100644 index 00000000..765ef521 --- /dev/null +++ b/docs/src/reading.md @@ -0,0 +1,421 @@ +# JSON Reading + +This guide to reading JSON in the JSON.jl package aims to: + - Provide a comprehensive overview of the JSON reading process. + - Explain the various options and configurations available for reading JSON data. + - Offer practical examples to illustrate the usage of different functions and options. + +```@contents +``` + +## Core JSON Parsing - `JSON.lazy` and `JSON.LazyValue` + +There are several "entrypoints" to reading JSON in JSON.jl, including: + - `JSON.parse`/`JSON.parse!` + - `JSON.parsefile`/`JSON.parsefile!` + - `JSON.lazy`/`JSON.lazyfile` + - `JSON.isvalidjson` + +These functions are all built to accept the same kinds of JSON inputs: + +| Accepted `json` sources | Notes | +|--------------------------------------------|---------------------------------------------------| +| `AbstractString` | UTF‑8; UTF‑8‑BOM handled automatically | +| `AbstractVector{UInt8}` | zero‑copy if already bytes | +| `IO`, `IOStream`, `Base.AbstractCmd` | stream fully read into a byte vector | + +The core JSON parsing machinery is hence built around having an `AbstractVector{UInt8}` or `AbstractString` JSON input where individual bytes can be parsed to identify JSON structure, validate syntax, and ultimately produce Julia-level values. + +Each entrypoint function first calls `JSON.lazy`, which will consume the JSON input until the type of the next JSON value can be identified (`{` for objects, `[` for arrays, `"` for strings, `t` for true, `f` for false, `n` for null, and `-` or a digit for numbers). `JSON.lazy` returns a `JSON.LazyValue`, which wraps the JSON input buffer (`AbstractVector{UInt8}` or `AbstractString`), and marks the byte position the value starts at, the type of the value, and any keyword arguments that were provided that may affect parsing. Currently supported parsing-specific keyword arguments to `JSON.lazy` (and thus all other entrypoint functions) include: + + - `allownan::Bool = false`: whether "special" float values shoudl be allowed while parsing (`NaN`, `Inf`, `-Inf`); these values are specifically _not allowed_ in the JSON spec, but many JSON libraries allow reading/writing + - `ninf::String = "-Infinity"`: the string that will be used to parse `-Inf` if `allownan=true` + - `inf::String = "Infinity"`: the string that will be used to parse `Inf` if `allownan=true` + - `nan::String = "NaN"`: the string that will be sued to parse `NaN` if `allownan=true` + - `jsonlines::Bool = false`: whether the JSON input should be treated as an implicit array, with newlines separating individual JSON elements with no leading `'['` or trailing `']'` characters. Common in logging or streaming workflows. Defaults to `true` when used with `JSON.parsefile` and the filename extension is `.jsonl` or `ndjson`. Note this ensures that parsing will _always_ return an array at the root-level. + - Materialization-specific keyword arguments (i.e. they affect materialization, but not parsing) + - `dicttype = JSON.Object{String, Any}`: type to parse JSON objects as by default (recursively) + - `null = nothing`: value to return for JSON `null` value + +So what can we do with a `JSON.LazyValue`? + +```julia +julia> x = JSON.lazy("{\"a\": 1, \"b\": null, \"c\": true, \"d\": false, \"e\": \"\", \"f\": [1,2,3], \"g\": {\"h\":{\"i\":\"foo\"}}}") +LazyObject{String} with 7 entries: + "a" => JSON.LazyValue(1) + "b" => JSON.LazyValue(nothing) + "c" => JSON.LazyValue(true) + "d" => JSON.LazyValue(false) + "e" => JSON.LazyValue("") + "f" => LazyValue[JSON.LazyValue(1), JSON.LazyValue(2), JSON.LazyValue(3)] + "g" => LazyObject{String}("h"=>LazyObject{String}("i"=>JSON.LazyValue("foo"))) +``` + +Note that for convenience at the REPL, special `show` overloads enable displaying the full contents of lazy values. In reality, remember the `LazyValue` only marks the _position_ of a value within the JSON. +`LazyValue`s support convenient syntax for both _navigating_ their structure and _materializing_, with an aim +to support lazy workflows. Examples include: + +```julia +# convenient "get" syntax on lazy objects +julia> x.a +JSON.LazyValue(1) + +julia> x[:b] +JSON.LazyValue(nothing) + +julia> x["c"] +JSON.LazyValue(true) + +julia> propertynames(x) +7-element Vector{Symbol}: + :a + :b + :c + :d + :e + :f + :g + +julia> x.g.h.i +JSON.LazyValue("foo") + +# array indexing on lazy arrays +julia> x.f[1] +JSON.LazyValue(1) + +julia> x.f[end] +JSON.LazyValue(3) + +julia> x.f[1:3] +3-element StructUtils.Selectors.List{Any}: + JSON.LazyValue(1) + JSON.LazyValue(2) + JSON.LazyValue(3) + +# default materialization of any LazyValue via empty getindex +julia> x.a[] +1 + +julia> x[] +JSON.Object{String, Any} with 7 entries: + "a" => 1 + "b" => nothing + "c" => true + "d" => false + "e" => "" + "f" => Any[1, 2, 3] + "g" => Object{String, Any}("h"=>Object{String, Any}("i"=>"foo")) +``` + +Let's take a closer look at one of these examples and talk through what's going on under the hood. For `x.g.h.i`, this deeply nested access of the `"foo"` value, is a chain of `getproperty` calls, with each call (i.e. `y = x.g`, then `z = y.h`, etc.) returning a `LazyValue` of where the next nested object begins in the raw JSON. With the final `getproperty` call (`h.i`), a non-object `LazyValue("foo")` is returned. In our raw JSON, the `"foo"` value is located near the end, so we can infer that by doing `x.g.h.i`, the underlying JSON was parsed or _navigated_ until the `i` key was found and its value returned. In this example, `"foo"` is indeed the last value in our raw JSON, but in the example of `x.c`, we can also be assured that only as much JSON as necessary was parsed/navigated before returning `LazyValue(true)`. In this way, the various syntax calls (`getproperty`, `getindex`, etc.) on `LazyValue`s can be thought of as purely _navigational_ as opposed to anything related to _materialization_. Indeed, the very purpose of the lazy machinery in JSON.jl is to _allow_ lazily navigating, specifically _without_ needing to materialize anything along the way. + +Ok, but at some point, we _do_ actually need Julia values to operate on, so let's shift to how _materialization_ works in JSON.jl. + +## `JSON.parse` - Untyped materialization + +In the `LazyValue` syntax example, it was shown that empty `getindex` will result in a "default" materialization of a `LazyValue`: + +```julia +julia> x[] +JSON.Object{String, Any} with 7 entries: + "a" => 1 + "b" => nothing + "c" => true + "d" => false + "e" => "" + "f" => Any[1, 2, 3] + "g" => Object{String, Any}("h"=>Object{String, Any}("i"=>"foo")) +``` + +Under the hood, this `getindex` call is really calling `JSON.parse(lazyvalue)`. `JSON.parse` can also be called as a main entrypoint function with all the same input types as `JSON.lazy`. This form of `parse` is referred to as "untyped parsing" or "untyped materialization". It allocates and _materializes_ the raw JSON values into appropriate "default" Julia-level values. In particular: + +| JSON construct | Default Julia value | +|----------------|---------------------------------------------------------------------------| +| object | `JSON.Object{String,Any}` (order‑preserving drop-in replacement for Dict) | +| array | `Vector{Any}` | +| string | `String` | +| number | `Int64`, `BigInt`, `Float64`, or `BigFloat` | +| `null` | `nothing` | +| `true/false` | `Bool` | + +Mostly vanilla, but what is `JSON.Object`? It is a custom `AbstractDict` using an internal linked-list implementation that preserves insertion order, behaves as a drop-in replacement for `Dict`, and allows memory and performance benefits vs. `Dict` for small # of entries. It also supports natural JSON-object-like +syntax for accessing or setting values, like `x.g.h.i` and `x.c = false`. + +Because `Object` uses a linked-list implementation, key lookups are `O(n)`, performing a linear scan on each access. For small number of entries (dozens), the real-performance difference vs. `Dict` hash-lookup is negligible, but for large objects, this can be prohibitive. For these cases, it's recommended to materialize JSON objects as regular Julia `Dict`, by utilizing the `dicttype` keyword argument, like: `JSON.parse(json; dicttype=Dict{String, Any})`. + +## `JSON.parse` - Typed materialization + +While untyped materialization is convenient for quick exploration, one of the most powerful features of JSON.jl is its ability to directly parse JSON into concrete Julia types. This is done by providing a type as the second argument to `JSON.parse` and opens up a world of type-safe JSON parsing with minimal boilerplate. + +### Basic usage with structs + +Let's start with a simple example. Suppose we have a Julia struct and a JSON string we want to parse into that type: + +```julia +struct Person + name::String + age::Int +end + +json = """{"name": "Alice", "age": 30}""" +person = JSON.parse(json, Person) +# Person("Alice", 30) +``` + +With this approach, JSON.jl automatically: +- Matches JSON object keys to struct field names +- Converts values to the appropriate field types +- Constructs the struct with the parsed values + +This works for nested structs too: + +```julia +struct Address + street::String + city::String + country::String +end + +struct Employee + name::String + age::Int + address::Address +end + +json = """ +{ + "name": "Bob", + "age": 42, + "address": { + "street": "123 Main St", + "city": "Anytown", + "country": "USA" + } +} +""" + +employee = JSON.parse(json, Employee) +``` + +### Arrays and collections + +You can parse JSON arrays directly into Julia arrays with a specific element type: + +```julia +# Parse into a Vector of integers +ints = JSON.parse("[1, 2, 3, 4, 5]", Vector{Int}) +# 5-element Vector{Int64}: [1, 2, 3, 4, 5] + +# Parse into a Vector of custom structs +people = JSON.parse(""" +[ + {"name": "Alice", "age": 30}, + {"name": "Bob", "age": 42} +] +""", Vector{Person}) +# 2-element Vector{Person}: [Person("Alice", 30), Person("Bob", 42)] +``` + +A particularly powerful feature is the ability to parse nested arrays into multi-dimensional arrays: + +```julia +# Parse a nested array into a Matrix +matrix = JSON.parse("[[1, 2], [3, 4]]", Matrix{Int}) +# 2×2 Matrix{Int64}: +# 1 3 +# 2 4 +``` + +Note that for matrices, JSON.jl expects column-major order (Julia's native format). The innermost arrays become the columns of the matrix. + +### Primitive and simple types + +JSON.jl can also parse JSON values directly into primitive types: + +```julia +# Parse a JSON number into an Int +n = JSON.parse("42", Int) +# 42 + +# Parse a JSON string into a String +s = JSON.parse("\"hello\"", String) +# "hello" + +# Parse a JSON string into a custom type like UUID +uuid = JSON.parse("\"123e4567-e89b-12d3-a456-426614174000\"", UUID) +# UUID("123e4567-e89b-12d3-a456-426614174000") + +# Parse a JSON string into a Date +date = JSON.parse("\"2023-05-08\"", Date) +# Date("2023-05-08") +``` + +### Type conversions and handling nulls + +JSON.jl provides smart handling for Union types, especially for dealing with potentially null values: + +```julia +struct OptionalData + id::Int + description::Union{String, Nothing} + score::Union{Float64, Missing} +end + +json = """ +{ + "id": 123, + "description": null, + "score": null +} +""" + +data = JSON.parse(json, OptionalData) +# OptionalData(123, nothing, missing) +``` + +Note how JSON.jl automatically: +- Converts JSON `null` to Julia `nothing` for `Union{T, Nothing}` fields +- Converts JSON `null` to Julia `missing` for `Union{T, Missing}` fields + +### Field customization through tags + +You can customize how JSON fields map to struct fields using "field tags" from StructUtils.jl via the struct macros (`@tags`, `@defaults`, `@kwarg`, or `@noarg`): + +```julia +using JSON, StructUtils + +@tags struct UserProfile + user_id::Int &(json=(name="id",),) + first_name::String &(json=(name="firstName",),) + last_name::String &(json=(name="lastName",),) + birth_date::Date &(json=(dateformat=dateformat"yyyy/mm/dd",),) +end + +json = """ +{ + "id": 42, + "firstName": "Jane", + "lastName": "Doe", + "birth_date": "1990/01/15" +} +""" + +user = JSON.parse(json, UserProfile) +# UserProfile(42, "Jane", "Doe", Date("1990-01-15")) +``` + +The `&(json=(name="...",),)` syntax lets you: +- Map differently named JSON keys to your struct fields +- Specify custom date formats for parsing dates +- And many other customizations + +Field tags are really named tuples of values, prefixed with the `&` character, so note the trailing `,` when the named tuple has a single element. +Also note that in this example, we "namespaced" our field tags with the `json=(...)` key. Then when "making" our struct, only the `json=(...)` field tags +are considered. This is because JSON.jl defines `json` as a "field tag key" for its custom `JSONStyle`, then passes a `JSONStyle` to be used when parsing. +That means you could specify the field tag like `&(name="id,)`, but if the field then is also used by any other package, it would also see that name. +Sometimes that may be desirable, but there are also cases where you want the namespacing, like: `&(json=(name="id",), postgres=(name="user_id",))`. + +### Default values with `@defaults` + +When some JSON fields might be missing, you can provide default values similar to field tags using any of the struct macros (`@tags`, `@defaults`, `@kwarg`, or `@noarg`): + +```julia +@defaults struct Configuration + port::Int = 8080 + host::String = "localhost" + debug::Bool = false + timeout::Int = 30 +end + +# Even with missing fields, parsing succeeds with defaults +json = """{"port": 9000}""" +config = JSON.parse(json, Configuration) +# Configuration(9000, "localhost", false, 30) +``` + +### Advanced Example: The FrankenStruct + +Let's explore a more comprehensive example that showcases many of JSON.jl's advanced typed parsing features: + +```julia +using Dates, JSON, StructUtils + +# First, define some types for polymorphic parsing +abstract type AbstractMonster end + +struct Dracula <: AbstractMonster + num_victims::Int +end + +struct Werewolf <: AbstractMonster + witching_hour::DateTime +end + +# Define a custom type chooser for AbstractMonster +JSON.@choosetype AbstractMonster x -> x.monster_type[] == "vampire" ? Dracula : Werewolf + +# Define a custom numeric type with special parsing +struct Percent <: Number + value::Float64 +end + +# Custom lifting for the Percent type +JSON.lift(::Type{Percent}, x) = Percent(Float64(x)) +StructUtils.liftkey(::Type{Percent}, x::String) = Percent(parse(Float64, x)) + +# Our complex struct with various field types and defaults +@defaults struct FrankenStruct + id::Int = 0 + name::String = "Jim" + address::Union{Nothing, String} = nothing + rate::Union{Missing, Float64} = missing + type::Symbol = :a &(json=(name="franken_type",),) + notsure::Any = nothing + monster::AbstractMonster = Dracula(0) + percent::Percent = Percent(0.0) + birthdate::Date = Date(0) &(json=(dateformat="yyyy/mm/dd",),) + percentages::Dict{Percent, Int} = Dict{Percent, Int}() + json_properties::JSONText = JSONText("") + matrix::Matrix{Float64} = Matrix{Float64}(undef, 0, 0) +end + +# A complex JSON input with various features to demonstrate +json = """ +{ + "id": 1, + "address": "123 Main St", + "rate": null, + "franken_type": "b", + "notsure": {"key": "value"}, + "monster": { + "monster_type": "vampire", + "num_victims": 10 + }, + "percent": 0.1, + "birthdate": "2023/10/01", + "percentages": { + "0.1": 1, + "0.2": 2 + }, + "json_properties": {"key": "value"}, + "matrix": [[1.0, 2.0], [3.0, 4.0]], + "extra_key": "extra_value" +} +""" + +franken = JSON.parse(json, FrankenStruct) +``` + +Let's walk through some notable features of the example above: + * The `name` field isn't present in the JSON input, so the default value of `"Jim"` is used. + * The `address` field uses a default `@choosetype` to determine that the JSON value is not `null`, so a `String` should be parsed for the field value. + * The `rate` field has a `null` JSON value, so the default `@choosetype` recognizes it should be "lifted" to `Missing`, which then uses a predefined `lift` definition for `Missing`. + * The `type` field is a `Symbol`, and has a fieldtag `json=(name="franken_type",)` which means the JSON key `franken_type` will be used to set the field value instead of the default `type` field name. A default `lift` definition for `Symbol` is used to convert the JSON string value to a `Symbol`. + * The `notsure` field is of type `Any`, so the default object type `JSON.Object{String, Any}` is used to materialize the JSON value. + * The `monster` field is a polymorphic type, and the JSON value has a `monster_type` key that determines which concrete type to use. The `@choosetype` macro is used to define the logic for choosing the concrete type based on the JSON input. Note that teh `x` in `@choosetype` is a `LazyValue`, so we materialize via `x.monster_type[]` in order to compare with the string `"vampire"`. + * The `percent` field is a custom type `Percent` and the `JSON.lift` macro defines how to construct a `Percent` from the JSON value, which is a `Float64` in this case. + * The `birthdate` field uses a custom date format for parsing, specified in the JSON input. + * The `percentages` field is a dictionary with keys of type `Percent`, which is a custom type. The `liftkey` function is defined to convert the JSON string keys to `Percent` types (parses the Float64 manually) + * The `json_properties` field has a type of `JSONText`, which means the raw JSON will be preserved as a String of the `JSONText` type. + * The `matrix` field is a `Matrix{Float64}`, so the JSON input array-of-arrays are materialized as such. + * The `extra_key` field is not defined in the `FrankenStruct` type, so it is ignored and skipped over. diff --git a/docs/src/reference.md b/docs/src/reference.md new file mode 100644 index 00000000..e99a5afb --- /dev/null +++ b/docs/src/reference.md @@ -0,0 +1,8 @@ +# API Reference + +```@contents +``` + +```@autodocs +Modules = [JSON] +``` diff --git a/docs/src/writing.md b/docs/src/writing.md new file mode 100644 index 00000000..d4c556a1 --- /dev/null +++ b/docs/src/writing.md @@ -0,0 +1,521 @@ +# JSON Writing + +This guide to writing JSON in the JSON.jl package aims to: + - Provide a comprehensive overview of the JSON serialization process. + - Explain the various options and configurations available for writing JSON data. + - Offer practical examples to illustrate the usage of different functions and options. + +```@contents +``` + +## Core JSON Serialization - `JSON.json` + +The main entrypoint for serializing Julia values to JSON in JSON.jl is the `JSON.json` function. This function offers flexible output options: + +```julia +# Serialize to a String +JSON.json(x) -> String + +# Serialize to an IO object +JSON.json(io::IO, x) -> IO + +# Serialize to a file +JSON.json(file_name::String, x) -> String +``` + +The `JSON.json` function accepts a wide range of Julia types and transforms them into their JSON representation by knowing how to serialize a core set of types: + +| Julia type | JSON representation | +|------------------------------------|------------------------------------------| +| `Nothing` | `null` | +| `Bool` | `true` or `false` | +| `Number` | Numeric value (integer or floating point) | +| `AbstractString` | String with escaped characters | +| `AbstractDict`/`NamedTuple` | Object (`{}`) | +| `AbstractVector`/`Tuple`/`Set` | Array (`[]`) | +| Custom structs | Object (`{}`) with fields as keys | +| `JSONText` | Raw JSON (inserted as-is) | + +For values that don't fall into one of the above categories, `JSON.lower` will be called allowing a "domain transformation" from Julia value to an appropriate representation of the categories above. + +## Customizing JSON Output + +`JSON.json` supports numerous keyword arguments to control how data is serialized: + +### Pretty Printing + +By default, `JSON.json` produces compact JSON without extra whitespace. For human-readable output: + +```julia +# Boolean flag for default pretty printing (2-space indent) +JSON.json(x; pretty=true) + +# Or specify custom indentation level +JSON.json(x; pretty=4) # 4-space indentation +``` + +Example of pretty printing: + +```julia +data = Dict("name" => "Alice", "scores" => [95, 87, 92]) + +# Compact output +JSON.json(data) +# {"name":"Alice","scores":[95,87,92]} + +# Pretty printed +JSON.json(data; pretty=true) +# { +# "name": "Alice", +# "scores": [ +# 95, +# 87, +# 92 +# ] +# } +``` + +When pretty printing, you can also control which arrays get printed inline versus multiline using the `inline_limit` option: + +```julia +JSON.json(data; pretty=true, inline_limit=10) +# { +# "name": "Alice", +# "scores": [95, 87, 92] +# } +``` + +### Null and Empty Value Handling + +JSON.json provides options to control how `nothing`, `missing`, and empty collections are handled: + +```julia +struct Person + name::String + email::Union{String, Nothing} + phone::Union{String, Nothing} + tags::Vector{String} +end + +person = Person("Alice", "alice@example.com", nothing, String[]) + +# Default behavior writes all values, including null +JSON.json(person) +# {"name":"Alice","email":"alice@example.com","phone":null,"tags":[]} + +# Exclude null values +JSON.json(person; omit_null=true) +# {"name":"Alice","email":"alice@example.com","tags":[]} + +# Omit empty collections as well +JSON.json(person; omit_null=true, omit_empty=true) +# {"name":"Alice","email":"alice@example.com"} +``` + +Note that we can also control whether null or empty values are omitted at the type level, either by overloading `omit_null`/`omit_empty` functions: +```julia +JSON.omit_null(::Type{Person}) = true +``` + +Or by using a convenient macro annotation when defining the struct: +```julia +@omit_null struct Person + name::String + email::Union{String, Nothing} + phone::Union{String, Nothing} + tags::Vector{String} +end +``` + +### Special Numeric Values + +By default, JSON.json throws an error when trying to serialize `NaN`, `Inf`, or `-Inf` as they are not valid JSON. However, you can enable them with the `allownan` option: + +```julia +numbers = [1.0, NaN, Inf, -Inf] + +# Default behavior throws an error +try + JSON.json(numbers) +catch e + println(e) +end +# ArgumentError("NaN not allowed to be written in JSON spec; pass `allownan=true` to allow anyway") + +# With allownan=true +JSON.json(numbers; allownan=true) +# [1.0,NaN,Infinity,-Infinity] + +# Custom representations +JSON.json(numbers; allownan=true, nan="null", inf="1e999", ninf="-1e999") +# [1.0,null,1e999,-1e999] +``` + +### Float Formatting + +Control how floating-point numbers are formatted in the JSON output: + +```julia +pi_value = [Float64(π)] + +# Default format (shortest representation) +JSON.json(pi_value) +# [3.141592653589793] + +# Fixed decimal notation +JSON.json(pi_value; float_style=:fixed, float_precision=2) +# [3.14] + +# Scientific notation +JSON.json(pi_value; float_style=:exp, float_precision=3) +# [3.142e+00] +``` + +### JSON Lines Format + +The JSON Lines format is useful for streaming records where each line is a JSON value: + +```julia +records = [ + Dict("id" => 1, "name" => "Alice"), + Dict("id" => 2, "name" => "Bob"), + Dict("id" => 3, "name" => "Charlie") +] + +# Standard JSON array +JSON.json(records) +# [{"id":1,"name":"Alice"},{"id":2,"name":"Bob"},{"id":3,"name":"Charlie"}] + +# JSON Lines format; each object on its own line, no begining or ending square brackets +JSON.json(records; jsonlines=true) +# {"id":1,"name":"Alice"} +# {"id":2,"name":"Bob"} +# {"id":3,"name":"Charlie"} +``` + +## Customizing Types + +### Using `JSON.JSONText` + +The `JSONText` type allows you to insert raw, pre-formatted JSON directly: + +```julia +data = Dict( + "name" => "Alice", + "config" => JSON.JSONText("{\"theme\":\"dark\",\"fontSize\":16}") +) + +JSON.json(data) +# {"name":"Alice","config":{"theme":"dark","fontSize":16}} +``` + +### Custom Type Serialization with `lower` + +For full control over how a type is serialized, you can define a `JSON.lower` method: + +```julia +struct Coordinate + lat::Float64 + lon::Float64 +end + +# Serialize as an array instead of an object +JSON.lower(c::Coordinate) = [c.lat, c.lon] + +point = Coordinate(40.7128, -74.0060) +JSON.json(point) +# [40.7128,-74.006] + +# For serializing custom formats +struct UUID + value::String +end + +JSON.lower(u::UUID) = u.value + +JSON.json(UUID("123e4567-e89b-12d3-a456-426614174000")) +# "123e4567-e89b-12d3-a456-426614174000" +``` + +### Custom Serialization for Non-Owned Types + +To customize serialization for types you don't own (those from other packages), you can use a custom style: + +```julia +using Dates + +# Create a custom style that inherits from JSONStyle +struct DateTimeStyle <: JSON.JSONStyle end + +# Define how to serialize Date and DateTime in this style +JSON.lower(::DateTimeStyle, d::Date) = string(d) +JSON.lower(::DateTimeStyle, dt::DateTime) = Dates.format(dt, "yyyy-mm-dd HH:MM:SS") + +# Use the custom style +JSON.json(Date(2023, 1, 1); style=DateTimeStyle()) +# "2023-01-01" + +JSON.json(DateTime(2023, 1, 1, 12, 30, 45); style=DateTimeStyle()) +# "2023-01-01 12:30:45" +``` + +## Customizing Struct Serialization + +### Field Names and Tags + +The JSON.jl package integrates with StructUtils.jl for fine-grained control over struct serialization. StructUtils.jl provides convenient "struct" macros: + - `@noarg`: generates a "no-argument" constructor (`T()`) + - `@kwarg`: generates an all-keyword-argument constructor, similar to `Base.@kwdef`; (`T(; kw1=v1, kw2=v2, ...)`) + - `@tags`/`@defaults`: convenience macros to enable specifying field defaults and field tags + +Each struct macro also supports the setting of field default values (using the same syntax as `Base.@kwdef`), as well as specifying "field tags" +using the `&(tag=val,)` syntax. + +```julia +using JSON, StructUtils + +# Using the @tags macro to customize field serialization +@tags struct User + user_id::Int &(json=(name="id",),) + first_name::String &(json=(name="firstName",),) + last_name::String &(json=(name="lastName",),) + created_at::DateTime &(json=(dateformat="yyyy-mm-dd",),) + internal_note::String &(json=(ignore=true,),) +end + +user = User(123, "Jane", "Doe", DateTime(2023, 5, 8), "Private note") + +JSON.json(user) +# {"id":123,"firstName":"Jane","lastName":"Doe","created_at":"2023-05-08"} +``` + +The various field tags allow: +- Renaming fields with `name` +- Custom date formatting with `dateformat` +- Excluding fields from JSON output with `ignore=true` + +### Default Values with `@defaults` + +Combine with the `@defaults` macro to provide default values: + +```julia +@defaults struct Configuration + port::Int = 8080 + host::String = "localhost" + debug::Bool = false + timeout::Int = 30 +end + +config = Configuration(9000) +JSON.json(config) +# {"port":9000,"host":"localhost","debug":false,"timeout":30} +``` + +## Handling Circular References + +`JSON.json` automatically detects circular references to prevent infinite recursion: + +```julia +mutable struct Node + value::Int + next::Union{Nothing, Node} +end + +# Create a circular reference +node = Node(1, nothing) +node.next = node + +# Without circular detection, this would cause a stack overflow +JSON.json(node; omit_null=false) +# {"value":1,"next":null} +``` + +## Custom Dictionary Key Serialization + +For dictionaries with non-string keys, `JSON.json` has a few default `lowerkey` definitions to convert keys to strings: + +```julia +# Integer keys +JSON.json(Dict(1 => "one", 2 => "two")) +# {"1":"one","2":"two"} + +# Symbol keys +JSON.json(Dict(:name => "Alice", :age => 30)) +# {"name":"Alice","age":30} + +# Custom key serialization +struct CustomKey + id::Int +end + +dict = Dict(CustomKey(1) => "value1", CustomKey(2) => "value2") +try + JSON.json(dict) +catch e + println(e) +end +# ArgumentError("No key representation for CustomKey. Define StructUtils.lowerkey(::CustomKey)") + +# Define how the key should be converted to a string +StructUtils.lowerkey(::JSON.JSONStyle, k::CustomKey) = "key-$(k.id)" + +JSON.json(dict) +# {"key-1":"value1","key-2":"value2"} +``` + +## Advanced Example: The FrankenStruct + +Let's explore a comprehensive example that showcases many of JSON.jl's advanced serialization features: + +```julia +using Dates, JSON, StructUtils + +abstract type AbstractMonster end + +struct Dracula <: AbstractMonster + num_victims::Int +end + +struct Werewolf <: AbstractMonster + witching_hour::DateTime +end + +struct Percent <: Number + value::Float64 +end + +JSON.lower(x::Percent) = x.value +StructUtils.lowerkey(x::Percent) = string(x.value) + +@noarg mutable struct FrankenStruct + id::Int + name::String # no default to show serialization of an undefined field + address::Union{Nothing, String} = nothing + rate::Union{Missing, Float64} = missing + type::Symbol = :a &(json=(name="franken_type",),) + notsure::Any = JSON.Object("key" => "value") + monster::AbstractMonster = Dracula(10) &(json=(lower=x -> x isa Dracula ? + (monster_type="vampire", num_victims=x.num_victims) : + (monster_type="werewolf", witching_hour=x.witching_hour),),) + percent::Percent = Percent(0.5) + birthdate::Date = Date(2025, 1, 1) &(json=(dateformat="yyyy/mm/dd",),) + percentages::Dict{Percent, Int} = Dict{Percent, Int}(Percent(0.0) => 0, Percent(1.0) => 1) + json_properties::JSONText = JSONText("{\"key\": \"value\"}") + matrix::Matrix{Float64} = [1.0 2.0; 3.0 4.0] + extra_field::Any = nothing &(json=(ignore=true,),) +end + +franken = FrankenStruct() +franken.id = 1 + +json = JSON.json(franken) +# "{\"id\":1,\"name\":null,\"address\":null,\"rate\":null,\"franken_type\":\"a\",\"notsure\":{\"key\":\"value\"},\"monster\":{\"monster_type\":\"vampire\",\"num_victims\":10},\"percent\":0.5,\"birthdate\":\"2025/01/01\",\"percentages\":{\"1.0\":1,\"0.0\":0},\"json_properties\":{\"key\": \"value\"},\"matrix\":[[1.0,3.0],[2.0,4.0]]}" +``` + +Let's analyze each part of this complex example to understand how JSON.jl's serialization features work: + +### Custom Type Serialization Strategy + +1. **The `AbstractMonster` Type Hierarchy**: + - We define an abstract type `AbstractMonster` with two concrete subtypes: `Dracula` and `Werewolf` + - Each type contains type-specific data (number of victims vs. witching hour) + +2. **Custom Numeric Type**: + - `Percent` is a custom numeric type that wraps a `Float64` + - We provide two serialization methods: + - `JSON.lower(x::Percent) = x.value`: This tells JSON how to serialize a `Percent` value (convert to the underlying Float64) + - `StructUtils.lowerkey(x::Percent) = string(x.value)`: This handles when a `Percent` is used as a dictionary key + +3. **The `FrankenStruct`**: + - Created with `@noarg` making it a mutable struct that can be default constructed like `FrankenStruct()` + +### Field-Level Serialization Control + +Let's examine each field of `FrankenStruct` in detail: + +1. **Basic Fields**: + - `id::Int`: Standard integer field (initialized explicitly to 1) + - `name::String`: Intentionally left uninitialized to demonstrate `#undef` serialization + +2. **Null Handling and Unions**: + - `address::Union{Nothing, String} = nothing`: Demonstrates how `Nothing` values are serialized + - `rate::Union{Missing, Float64} = missing`: Shows how `Missing` values are serialized (both become `null` in JSON) + +3. **Field Renaming with Tags**: + - `type::Symbol = :a &(json=(name="franken_type",),)`: + - The `name` tag changes the output JSON key from `"type"` to `"franken_type"` + - The value `:a` is automatically serialized as the string `"a"` through a default `lower` method for symbols + +4. **Any Type**: + - `notsure::Any = JSON.Object("key" => "value")`: Shows how JSON handles arbitrary types + +5. **Field-Specific Custom Serialization**: + - ``` + monster::AbstractMonster = Dracula(10) &(json=(lower=x -> x isa Dracula ? + (monster_type="vampire", num_victims=x.num_victims) : + (monster_type="werewolf", witching_hour=x.witching_hour),),) + ``` + - This demonstrates **field-specific custom serialization** using the `lower` field tag + - The lambda function checks the concrete type and produces a different JSON structure based on the type + - For `Dracula`, it adds a `"monster_type": "vampire"` field + - For `Werewolf`, it would add a `"monster_type": "werewolf"` field + - Unlike a global `JSON.lower` method, this approach only applies when this specific field is serialized + +6. **Custom Numeric Type**: + - `percent::Percent = Percent(0.5)`: Uses the global `JSON.lower` we defined to serialize as `0.5` + +7. **Custom Date Formatting**: + - `birthdate::Date = Date(2025, 1, 1) &(json=(dateformat="yyyy/mm/dd",),)`: + - The `dateformat` field tag controls how the date is formatted + - Instead of ISO format (`"2025-01-01"`), it's serialized as `"2025/01/01"` + +8. **Dictionary with Custom Keys**: + - `percentages::Dict{Percent, Int} = Dict{Percent, Int}(Percent(0.0) => 0, Percent(1.0) => 1)`: + - This dictionary uses our custom `Percent` type as keys + - JSON uses our `StructUtils.lowerkey` method to convert the keys to strings + +9. **Raw JSON Inclusion**: + - `json_properties::JSONText = JSONText("{\"key\": \"value\"}")`: + - The `JSONText` wrapper indicates this should be included as-is in the output + - No escaping or processing is done; the string is inserted directly into the JSON + +10. **Matrices and Multi-dimensional Arrays**: + - `matrix::Matrix{Float64} = [1.0 2.0; 3.0 4.0]`: + - 2D array serialized as nested arrays in column-major order + +11. **Ignoring Fields**: + - `extra_field::Any = nothing &(json=(ignore=true,),)`: + - The `ignore=true` field tag means this field will be completely excluded from serialization + - Useful for internal fields that shouldn't be part of the JSON representation + +### Output Analysis + +When we serialize this struct, we get a JSON string with all the specialized serialization rules applied: + +```json +{ + "id": 1, + "name": null, + "address": null, + "rate": null, + "franken_type": "a", + "notsure": {"key": "value"}, + "monster": {"monster_type": "vampire", "num_victims": 10}, + "percent": 0.5, + "birthdate": "2025/01/01", + "percentages": {"1.0": 1, "0.0": 0}, + "json_properties": {"key": "value"}, + "matrix": [[1.0, 3.0], [2.0, 4.0]] +} +``` + +Some key observations: +- `extra_field` is completely omitted due to the `ignore` tag +- Field names are either their originals (`id`, `name`) or renamed versions (`franken_type` instead of `type`) +- The nested `monster` field has custom serialization, producing a specialized format +- The date is in the custom format we specified +- Dictionary keys using our custom `Percent` type are properly converted to strings +- The matrix is serialized in column-major order as nested arrays +- The `JSONText` data is inserted directly without any additional processing + +This example demonstrates how JSON.jl provides extensive control over JSON serialization at multiple levels: global type rules, field-specific customization, and overall serialization options. diff --git a/src/Common.jl b/src/Common.jl deleted file mode 100644 index 55b1fe5f..00000000 --- a/src/Common.jl +++ /dev/null @@ -1,11 +0,0 @@ -""" -Internal implementation detail. -""" -module Common - -using Unicode - -include("bytes.jl") -include("errors.jl") - -end diff --git a/src/JSON.jl b/src/JSON.jl index b3396f0e..4ac0c637 100644 --- a/src/JSON.jl +++ b/src/JSON.jl @@ -1,39 +1,123 @@ -VERSION < v"0.7.0-beta2.199" && __precompile__() - module JSON -export json # returns a compact (or indented) JSON representation as a string -export JSONText # string wrapper to insert raw JSON into JSON output +# stdlibs +using Mmap, Dates, UUIDs, Logging +# external dependencies +using PrecompileTools, Parsers, StructUtils + +# reexport some StructUtils macros +import StructUtils: @noarg, @defaults, @tags, @choosetype, lower, lift +export JSONText, StructUtils, @noarg, @defaults, @tags, @choosetype, @omit_null, @omit_empty + +@enum Error InvalidJSON UnexpectedEOF ExpectedOpeningObjectChar ExpectedOpeningQuoteChar ExpectedOpeningArrayChar ExpectedClosingArrayChar ExpectedComma ExpectedColon ExpectedNewline InvalidChar InvalidNumber InvalidUTF16 + +@noinline function invalid(error, buf, pos::Int, T) + # compute which line the error falls on by counting “\n” bytes up to pos + cus = buf isa AbstractString ? codeunits(buf) : buf + line_no = count(b -> b == UInt8('\n'), view(cus, 1:pos)) + 1 + + li = pos > 20 ? pos - 9 : 1 + ri = min(sizeof(cus), pos + 20) + snippet_bytes = cus[li:ri] + snippet_pos = pos - li + 1 + snippet = String(copy(snippet_bytes)) + # find error position; if snippet has multi-codepoint chars, + # translate pos to char index, accounting for textwidth of char + erri = 1 + st = iterate(snippet) + while st !== nothing + c, i = st + i > snippet_pos && break + erri += textwidth(c) + st = iterate(snippet, i) + end -include("Common.jl") + snippet = replace(snippet, r"[\b\f\n\r\t]" => " ") + caret = repeat(' ', erri + 2) * "^" + msg = """ + invalid JSON at byte position $(pos) (line $line_no) parsing type $T: $error + $snippet$(error == UnexpectedEOF ? " " : "...") + $caret + """ + throw(ArgumentError(msg)) +end + +include("utils.jl") +include("object.jl") + +# default object type for parse +const DEFAULT_OBJECT_TYPE = Object{String, Any} -# Parser modules -include("Parser.jl") +""" + JSON.JSONText -# Writer modules -include("Serializations.jl") -include("Writer.jl") +Wrapper around a string containing JSON data. +Can be used to insert raw JSON in JSON output, like: +```julia +json(JSONText("{\"key\": \"value\"}")) +``` +This will output the JSON as-is, without escaping. +Note that no check is done to ensure that the JSON is valid. + +Can also be used to read "raw JSON" when parsing, meaning +no specialized structure (JSON.Object, Vector{Any}, etc.) is created. +Example: +```julia +x = JSON.parse("[1,2,3]", JSONText) +# x.value == "[1,2,3]" +``` +""" +struct JSONText + value::String +end + +include("lazy.jl") +include("parse.jl") +include("write.jl") + +""" + JSON.isvalidjson(json) -> Bool + +Check if the given JSON is valid. +This function will return `true` if the JSON is valid, and `false` otherwise. +Inputs can be a string, a vector of bytes, or an IO stream, the same inputs +as supported for `JSON.lazy` and `JSON.parse`. +""" +function isvalidjson end + +isvalidjson(io::Union{IO, Base.AbstractCmd}; kw...) = isvalidjson(Base.read(io); kw...) + +function isvalidjson(io::IOStream; kw...) + buf = Mmap.mmap(io) + res = isvalidjson(buf; kw...) + checkfinalize!(buf) + return res +end + +isvalidjson(buf::Union{AbstractVector{UInt8}, AbstractString}; kw...) = + isvalidjson(lazy(buf; kw...)) + +function isvalidjson(x::LazyValue) + try + skip(x) + return true + catch + return false + end +end -# stuff to re-"export" -# note that this package does not actually export anything except `json` but -# all of the following are part of the public interface in one way or another -using .Parser: parse, parsefile -using .Writer: show_json, json, lower, print, StructuralContext, show_element, - show_string, show_key, show_pair, show_null, begin_array, - end_array, begin_object, end_object, indent, delimit, separate, - JSONText -using .Serializations: Serialization, CommonSerialization, - StandardSerialization +# convenience aliases for pre-1.0 JSON compat +print(io::IO, obj, indent=nothing) = json(io, obj; pretty=something(indent, 0)) +print(a, indent=nothing) = print(stdout, a, indent) +@doc (@doc json) print -# for pretty-printed (non-compact) output, JSONText must be re-parsed: -Writer.lower(json::JSONText) = parse(json.s) +json(a, indent::Integer) = json(a; pretty=indent) -function _precompile_() - ccall(:jl_generating_output, Cint, ()) == 1 || return nothing - x = "{\"type\":\"callback\",\"data\":{\"callback\":1,\"result\":true,\"error\":false}}" - JSON.lower(JSON.parse(x)) +@compile_workload begin + x = JSON.parse("{\"a\": 1, \"b\": null, \"c\": true, \"d\": false, \"e\": \"\", \"f\": [1,null,true], \"g\": {\"key\": \"value\"}}") + # json = JSON.json(x) + # isvalid(json) end -_precompile_() end # module diff --git a/src/Parser.jl b/src/Parser.jl deleted file mode 100644 index 6bc5df2d..00000000 --- a/src/Parser.jl +++ /dev/null @@ -1,518 +0,0 @@ -module Parser # JSON - -using Mmap -using ..Common -import Parsers - -""" -Like `isspace`, but work on bytes and includes only the four whitespace -characters defined by the JSON standard: space, tab, line feed, and carriage -return. -""" -isjsonspace(b::UInt8) = b == SPACE || b == TAB || b == NEWLINE || b == RETURN - -""" -Like `isdigit`, but for bytes. -""" -isjsondigit(b::UInt8) = DIGIT_ZERO ≤ b ≤ DIGIT_NINE - -abstract type ParserState end - -mutable struct MemoryParserState <: ParserState - utf8::String - s::Int -end - -# it is convenient to access MemoryParserState like a Vector{UInt8} to avoid copies -Base.@propagate_inbounds Base.getindex(state::MemoryParserState, i::Int) = codeunit(state.utf8, i) -Base.length(state::MemoryParserState) = sizeof(state.utf8) - -mutable struct StreamingParserState{T <: IO} <: ParserState - io::T - cur::UInt8 - used::Bool - utf8array::Vector{UInt8} -end -StreamingParserState(io::IO) = StreamingParserState(io, 0x00, true, UInt8[]) - -struct ParserContext{DictType, IntType, AllowNanInf, NullValue} end - -""" -Return the byte at the current position of the `ParserState`. If there is no -byte (that is, the `ParserState` is done), then an error is thrown that the -input ended unexpectedly. -""" -@inline function byteat(ps::MemoryParserState) - @inbounds if hasmore(ps) - return ps[ps.s] - else - _error(E_UNEXPECTED_EOF, ps) - end -end - -@inline function byteat(ps::StreamingParserState) - if ps.used - ps.used = false - if eof(ps.io) - _error(E_UNEXPECTED_EOF, ps) - else - ps.cur = read(ps.io, UInt8) - end - end - ps.cur -end - -""" -Like `byteat`, but with no special bounds check and error message. Useful when -a current byte is known to exist. -""" -@inline current(ps::MemoryParserState) = ps[ps.s] -@inline current(ps::StreamingParserState) = byteat(ps) - -""" -Require the current byte of the `ParserState` to be the given byte, and then -skip past that byte. Otherwise, an error is thrown. -""" -@inline function skip!(ps::ParserState, c::UInt8) - if byteat(ps) == c - incr!(ps) - else - _error_expected_char(c, ps) - end -end -@noinline _error_expected_char(c, ps) = _error("Expected '$(Char(c))' here", ps) - -function skip!(ps::ParserState, cs::UInt8...) - for c in cs - skip!(ps, c) - end -end - -""" -Move the `ParserState` to the next byte. -""" -@inline incr!(ps::MemoryParserState) = (ps.s += 1) -@inline incr!(ps::StreamingParserState) = (ps.used = true) - -""" -Move the `ParserState` to the next byte, and return the value at the byte before -the advancement. If the `ParserState` is already done, then throw an error. -""" -@inline advance!(ps::ParserState) = (b = byteat(ps); incr!(ps); b) - -""" -Return `true` if there is a current byte, and `false` if all bytes have been -exhausted. -""" -@inline hasmore(ps::MemoryParserState) = ps.s ≤ length(ps) -@inline hasmore(ps::StreamingParserState) = true # no more now ≠ no more ever - -""" -Remove as many whitespace bytes as possible from the `ParserState` starting from -the current byte. -""" -@inline function chomp_space!(ps::ParserState) - @inbounds while hasmore(ps) && isjsonspace(current(ps)) - incr!(ps) - end -end - - -# Used for line counts -function _count_before(haystack::AbstractString, needle::Char, _end::Int) - count = 0 - for (i,c) in enumerate(haystack) - i >= _end && return count - count += c == needle - end - return count -end - - -# Throws an error message with an indicator to the source -@noinline function _error(message::AbstractString, ps::MemoryParserState) - orig = ps.utf8 - lines = _count_before(orig, '\n', ps.s) - # Replace all special multi-line/multi-space characters with a space. - strnl = replace(orig, r"[\b\f\n\r\t\s]" => " ") - li = (ps.s > 20) ? ps.s - 9 : 1 # Left index - ri = min(lastindex(orig), ps.s + 20) # Right index - error(message * - "\nLine: " * string(lines) * - "\nAround: ..." * strnl[li:ri] * "..." * - "\n " * (" " ^ (ps.s - li)) * "^\n" - ) -end - -@noinline function _error(message::AbstractString, ps::StreamingParserState) - error("$message\n ...when parsing byte with value '$(current(ps))'") -end - -# PARSING - -""" -Given a `ParserState`, after possibly any amount of whitespace, return the next -parseable value. -""" -function parse_value(pc::ParserContext, ps::ParserState) - chomp_space!(ps) - - @inbounds byte = byteat(ps) - if byte == STRING_DELIM - parse_string(ps) - elseif isjsondigit(byte) || byte == MINUS_SIGN - parse_number(pc, ps) - elseif byte == OBJECT_BEGIN - parse_object(pc, ps) - elseif byte == ARRAY_BEGIN - parse_array(pc, ps) - else - parse_jsconstant(pc, ps) - end -end - -function parse_jsconstant(::ParserContext{<:Any,<:Any,AllowNanInf,NullValue}, - ps::ParserState) where {AllowNanInf,NullValue} - c = advance!(ps) - if c == LATIN_T # true - skip!(ps, LATIN_R, LATIN_U, LATIN_E) - true - elseif c == LATIN_F # false - skip!(ps, LATIN_A, LATIN_L, LATIN_S, LATIN_E) - false - elseif c == LATIN_N # null - skip!(ps, LATIN_U, LATIN_L, LATIN_L) - NullValue - elseif AllowNanInf && c == LATIN_UPPER_N - skip!(ps, LATIN_A, LATIN_UPPER_N) - NaN - elseif AllowNanInf && c == LATIN_UPPER_I - skip!(ps, LATIN_N, LATIN_F, LATIN_I, LATIN_N, LATIN_I, LATIN_T, LATIN_Y) - Inf - else - _error(E_UNEXPECTED_CHAR, ps) - end -end - -function parse_array(pc::ParserContext, ps::ParserState) - result = Any[] - @inbounds incr!(ps) # Skip over opening '[' - chomp_space!(ps) - if byteat(ps) ≠ ARRAY_END # special case for empty array - @inbounds while true - push!(result, parse_value(pc, ps)) - chomp_space!(ps) - byteat(ps) == ARRAY_END && break - skip!(ps, DELIMITER) - end - end - - @inbounds incr!(ps) - result -end - - -function parse_object(pc::ParserContext{DictType,<:Real,<:Any}, ps::ParserState) where DictType - obj = DictType() - keyT = keytype(typeof(obj)) - - incr!(ps) # Skip over opening '{' - chomp_space!(ps) - if byteat(ps) ≠ OBJECT_END # special case for empty object - @inbounds while true - # Read key - chomp_space!(ps) - byteat(ps) == STRING_DELIM || _error(E_BAD_KEY, ps) - key = parse_string(ps) - chomp_space!(ps) - skip!(ps, SEPARATOR) - # Read value - value = parse_value(pc, ps) - chomp_space!(ps) - obj[keyT === Symbol ? Symbol(key) : convert(keyT, key)] = value - byteat(ps) == OBJECT_END && break - skip!(ps, DELIMITER) - end - end - - incr!(ps) - obj -end - - -utf16_is_surrogate(c::UInt16) = (c & 0xf800) == 0xd800 -utf16_get_supplementary(lead::UInt16, trail::UInt16) = Char(UInt32(lead-0xd7f7)<<10 + trail) - -function read_four_hex_digits!(ps::ParserState) - local n::UInt16 = 0 - - for _ in 1:4 - b = advance!(ps) - n = n << 4 + if isjsondigit(b) - b - DIGIT_ZERO - elseif LATIN_A ≤ b ≤ LATIN_F - b - (LATIN_A - UInt8(10)) - elseif LATIN_UPPER_A ≤ b ≤ LATIN_UPPER_F - b - (LATIN_UPPER_A - UInt8(10)) - else - _error(E_BAD_ESCAPE, ps) - end - end - - n -end - -function read_unicode_escape!(ps) - u1 = read_four_hex_digits!(ps) - if utf16_is_surrogate(u1) - skip!(ps, BACKSLASH) - skip!(ps, LATIN_U) - u2 = read_four_hex_digits!(ps) - utf16_get_supplementary(u1, u2) - else - Char(u1) - end -end - -function parse_string(ps::ParserState) - b = IOBuffer() - incr!(ps) # skip opening quote - while true - c = advance!(ps) - - if c == BACKSLASH - c = advance!(ps) - if c == LATIN_U # Unicode escape - write(b, read_unicode_escape!(ps)) - else - c = get(ESCAPES, c, 0x00) - c == 0x00 && _error(E_BAD_ESCAPE, ps) - write(b, c) - end - continue - elseif c < SPACE - _error(E_BAD_CONTROL, ps) - elseif c == STRING_DELIM - return String(take!(b)) - end - - write(b, c) - end -end - -""" -Return `true` if the given bytes vector, starting at `from` and ending at `to`, -has a leading zero. -""" -function hasleadingzero(bytes, from::Int, to::Int) - c = bytes[from] - from + 1 < to && c == UInt8('-') && - bytes[from + 1] == DIGIT_ZERO && isjsondigit(bytes[from + 2]) || - from < to && to > from + 1 && c == DIGIT_ZERO && - isjsondigit(bytes[from + 1]) -end - -""" -Parse a float from the given bytes vector, starting at `from` and ending at the -byte before `to`. Bytes enclosed should all be ASCII characters. -""" -float_from_bytes(bytes::MemoryParserState, from::Int, to::Int) = float_from_bytes(bytes.utf8, from, to) - -function float_from_bytes(bytes::Union{String, Vector{UInt8}}, from::Int, to::Int)::Union{Float64,Nothing} - return Parsers.tryparse(Float64, bytes isa String ? SubString(bytes, from:to) : view(bytes, from:to)) -end - -""" -Parse an integer from the given bytes vector, starting at `from` and ending at -the byte before `to`. Bytes enclosed should all be ASCII characters. -""" -function int_from_bytes(pc::ParserContext{<:Any,IntType,<:Any}, - ps::ParserState, - bytes, - from::Int, - to::Int) where IntType <: Real - @inbounds isnegative = bytes[from] == MINUS_SIGN ? (from += 1; true) : false - num = IntType(0) - @inbounds for i in from:to - c = bytes[i] - dig = c - DIGIT_ZERO - if dig < 0x10 - num = IntType(10) * num + IntType(dig) - else - _error(E_BAD_NUMBER, ps) - end - end - ifelse(isnegative, -num, num) -end - -function number_from_bytes(pc::ParserContext, - ps::ParserState, - isint::Bool, - bytes, - from::Int, - to::Int) - @inbounds if hasleadingzero(bytes, from, to) - _error(E_LEADING_ZERO, ps) - end - - if isint - @inbounds if to == from && bytes[from] == MINUS_SIGN - _error(E_BAD_NUMBER, ps) - end - int_from_bytes(pc, ps, bytes, from, to) - else - res = float_from_bytes(bytes, from, to) - res === nothing ? _error(E_BAD_NUMBER, ps) : res - end -end - - -function parse_number(pc::ParserContext{<:Any,<:Any,AllowNanInf}, ps::ParserState) where AllowNanInf - # Determine the end of the floating point by skipping past ASCII values - # 0-9, +, -, e, E, and . - number = ps.utf8array - isint = true - negative = false - - c = current(ps) - - # Parse and keep track of initial minus sign (for parsing -Infinity) - if AllowNanInf && c == MINUS_SIGN - push!(number, UInt8(c)) # save in case the next character is a number - negative = true - incr!(ps) - end - - @inbounds while hasmore(ps) - c = current(ps) - - if isjsondigit(c) || c == MINUS_SIGN - push!(number, UInt8(c)) - elseif c in (PLUS_SIGN, LATIN_E, LATIN_UPPER_E, DECIMAL_POINT) - push!(number, UInt8(c)) - isint = false - elseif AllowNanInf && c == LATIN_UPPER_I - infinity = parse_jsconstant(pc, ps) - resize!(number, 0) - return (negative ? -infinity : infinity) - else - break - end - - incr!(ps) - end - - v = number_from_bytes(pc, ps, isint, number, 1, length(number)) - resize!(number, 0) - return v -end - - -unparameterize_type(x) = x # Fallback for nontypes -- functions etc -function unparameterize_type(T::Type) - candidate = typeintersect(T, AbstractDict{String, Any}) - candidate <: Union{} ? T : candidate -end - -# Workaround for slow dynamic dispatch for creating objects -const DEFAULT_PARSERCONTEXT = ParserContext{Dict{String, Any}, Int64, false, nothing}() -function _get_parsercontext(dicttype, inttype, allownan, null) - if dicttype == Dict{String, Any} && inttype == Int64 && !allownan - DEFAULT_PARSERCONTEXT - else - ParserContext{unparameterize_type(dicttype), inttype, allownan, null}.instance - end -end - -""" - parse(str::AbstractString; - dicttype::Type{T}=Dict, - inttype::Type{<:Real}=Int64, - allownan::Bool=true, - null=nothing) where {T<:AbstractDict} - -Parses the given JSON string into corresponding Julia types. - -Keyword arguments: - • dicttype: Associative type to use when parsing JSON objects (default: Dict{String, Any}) - • inttype: Real number type to use when parsing JSON numbers that can be parsed - as integers (default: Int64) - • allownan: allow parsing of NaN, Infinity, and -Infinity (default: true) - • null: value to use for parsed JSON `null` values (default: `nothing`) -""" -function parse(str::AbstractString; - dicttype=Dict{String,Any}, - inttype::Type{<:Real}=Int64, - allownan::Bool=true, - null=nothing) - pc = _get_parsercontext(dicttype, inttype, allownan, null) - ps = MemoryParserState(str, 1) - v = parse_value(pc, ps) - chomp_space!(ps) - if hasmore(ps) - _error(E_EXPECTED_EOF, ps) - end - v -end - -""" - parse(io::IO; - dicttype::Type{T}=Dict, - inttype::Type{<:Real}=Int64, - allownan=true, - null=nothing) where {T<:AbstractDict} - -Parses JSON from the given IO stream into corresponding Julia types. - -Keyword arguments: - • dicttype: Associative type to use when parsing JSON objects (default: Dict{String, Any}) - • inttype: Real number type to use when parsing JSON numbers that can be parsed - as integers (default: Int64) - • allownan: allow parsing of NaN, Infinity, and -Infinity (default: true) - • null: value to use for parsed JSON `null` values (default: `nothing`) -""" -function parse(io::IO; - dicttype=Dict{String,Any}, - inttype::Type{<:Real}=Int64, - allownan::Bool=true, - null=nothing) - pc = _get_parsercontext(dicttype, inttype, allownan, null) - ps = StreamingParserState(io) - parse_value(pc, ps) -end - -""" - parsefile(filename::AbstractString; - dicttype=Dict{String, Any}, - inttype::Type{<:Real}=Int64, - allownan::Bool=true, - null=nothing, - use_mmap::Bool=true) - -Convenience function to parse JSON from the given file into corresponding Julia types. - -Keyword arguments: - • dicttype: Associative type to use when parsing JSON objects (default: Dict{String, Any}) - • inttype: Real number type to use when parsing JSON numbers that can be parsed - as integers (default: Int64) - • allownan: allow parsing of NaN, Infinity, and -Infinity (default: true) - • null: value to use for parsed JSON `null` values (default: `nothing`) - • use_mmap: use mmap when opening the file (default: true) -""" -function parsefile(filename::AbstractString; - dicttype=Dict{String, Any}, - inttype::Type{<:Real}=Int64, - null=nothing, - allownan::Bool=true, - use_mmap::Bool=true) - sz = filesize(filename) - open(filename) do io - s = use_mmap ? String(Mmap.mmap(io, Vector{UInt8}, sz)) : read(io, String) - parse(s; dicttype=dicttype, inttype=inttype, allownan=allownan, null=null) - end -end - -# Efficient implementations of some of the above for in-memory parsing -include("specialized.jl") - -end # module Parser diff --git a/src/Serializations.jl b/src/Serializations.jl deleted file mode 100644 index e4398ce6..00000000 --- a/src/Serializations.jl +++ /dev/null @@ -1,39 +0,0 @@ -""" -JSON writer serialization contexts. - -This module defines the `Serialization` abstract type and several concrete -implementations, as they relate to JSON. -""" -module Serializations - -using ..Common - -""" -A `Serialization` defines how objects are lowered to JSON format. -""" -abstract type Serialization end - -""" -The `CommonSerialization` comes with a default set of rules for serializing -Julia types to their JSON equivalents. Additional rules are provided either by -packages explicitly defining `JSON.show_json` for this serialization, or by the -`JSON.lower` method. Most concrete implementations of serializers should subtype -`CommonSerialization`, unless it is desirable to bypass the `lower` system, in -which case `Serialization` should be subtyped. -""" -abstract type CommonSerialization <: Serialization end - -""" -The `StandardSerialization` defines a common, standard JSON serialization format -that is optimized to: - -- strictly follow the JSON standard -- be useful in the greatest number of situations - -All serializations defined for `CommonSerialization` are inherited by -`StandardSerialization`. It is therefore generally advised to add new -serialization behaviour to `CommonSerialization`. -""" -struct StandardSerialization <: CommonSerialization end - -end diff --git a/src/Writer.jl b/src/Writer.jl deleted file mode 100644 index b3ca285e..00000000 --- a/src/Writer.jl +++ /dev/null @@ -1,402 +0,0 @@ -module Writer - -using Dates -using ..Common -using ..Serializations: Serialization, StandardSerialization, - CommonSerialization - -using Unicode - - -""" -Internal JSON.jl implementation detail; do not depend on this type. - -A JSON primitive that wraps around any composite type to enable `Dict`-like -serialization. -""" -struct CompositeTypeWrapper{T} - wrapped::T - fns::Vector{Symbol} -end - -CompositeTypeWrapper(x, syms) = CompositeTypeWrapper(x, collect(syms)) -CompositeTypeWrapper(x) = CompositeTypeWrapper(x, propertynames(x)) - -""" - lower(x) - -Return a value of a JSON-encodable primitive type that `x` should be lowered -into before encoding as JSON. Supported types are: `AbstractDict` and `NamedTuple` -to JSON objects, `Tuple` and `AbstractVector` to JSON arrays, `AbstractArray` to -nested JSON arrays, `AbstractString`, `Symbol`, `Enum`, or `Char` to JSON string, -`Integer` and `AbstractFloat` to JSON number, `Bool` to JSON boolean, and -`Nothing` to JSON null, or any other types with a `show_json` method defined. - -Extensions of this method should preserve the property that the return value is -one of the aforementioned types. If first lowering to some intermediate type is -required, then extensions should call `lower` before returning a value. - -Note that the return value need not be *recursively* lowered—this function may -for instance return an `AbstractArray{Any, 1}` whose elements are not JSON -primitives. -""" -function lower(a) - if nfields(a) > 0 - CompositeTypeWrapper(a) - else - error("Cannot serialize type $(typeof(a))") - end -end - -# To avoid allocating an intermediate string, we directly define `show_json` -# for this type instead of lowering it to a string first (which would -# allocate). However, the `show_json` method does call `lower` so as to allow -# users to change the lowering of their `Enum` or even `AbstractString` -# subtypes if necessary. -const IsPrintedAsString = Union{ - Dates.TimeType, Char, Type, AbstractString, Enum, Symbol} -lower(x::IsPrintedAsString) = x - -lower(m::Module) = throw(ArgumentError("cannot serialize Module $m as JSON")) -lower(x::Real) = convert(Float64, x) -lower(x::Base.AbstractSet) = collect(x) - -""" -Abstract supertype of all JSON and JSON-like structural writer contexts. -""" -abstract type StructuralContext <: IO end - -""" -Internal implementation detail. - -A JSON structural context around an `IO` object. Structural writer contexts -define the behaviour of serializing JSON structural objects, such as objects, -arrays, and strings to JSON. The translation of Julia types to JSON structural -objects is not handled by a `JSONContext`, but by a `Serialization` wrapper -around it. Abstract supertype of `PrettyContext` and `CompactContext`. Data can -be written to a JSON context in the usual way, but often higher-level operations -such as `begin_array` or `begin_object` are preferred to directly writing bytes -to the stream. -""" -abstract type JSONContext <: StructuralContext end - -""" -Internal implementation detail. - -To handle recursive references in objects/arrays when writing, by default we want -to track references to objects seen and break recursion cycles to avoid stack overflows. -Subtypes of `RecursiveCheckContext` must include two fields in order to allow recursive -cycle checking to work properly when writing: - * `objectids::Set{UInt64}`: set of object ids in the current stack of objects being written - * `recursive_cycle_token::Any`: Any string, `nothing`, or object to be written when a cycle is detected -""" -abstract type RecursiveCheckContext <: JSONContext end - -""" -Internal implementation detail. - -Keeps track of the current location in the array or object, which winds and -unwinds during serialization. -""" -mutable struct PrettyContext{T<:IO} <: RecursiveCheckContext - io::T - step::Int # number of spaces to step - state::Int # number of steps at present - first::Bool # whether an object/array was just started - objectids::Set{UInt64} - recursive_cycle_token -end -PrettyContext(io::IO, step, recursive_cycle_token=nothing) = PrettyContext(io, step, 0, false, Set{UInt64}(), recursive_cycle_token) - -""" -Internal implementation detail. - -For compact printing, which in JSON is fully recursive. -""" -mutable struct CompactContext{T<:IO} <: RecursiveCheckContext - io::T - first::Bool - objectids::Set{UInt64} - recursive_cycle_token -end -CompactContext(io::IO, recursive_cycle_token=nothing) = CompactContext(io, false, Set{UInt64}(), recursive_cycle_token) - -""" -Internal implementation detail. - -Implements an IO context safe for printing into JSON strings. -""" -struct StringContext{T<:IO} <: IO - io::T -end - -# These aliases make defining additional methods on `show_json` easier. -const CS = CommonSerialization -const SC = StructuralContext - -# Low-level direct access -Base.write(io::JSONContext, byte::UInt8) = write(io.io, byte) -Base.write(io::StringContext, byte::UInt8) = - write(io.io, ESCAPED_ARRAY[byte + 1]) -#= turn on if there's a performance benefit -write(io::StringContext, char::Char) = - char <= '\x7f' ? write(io, ESCAPED_ARRAY[UInt8(c) + 1]) : - Base.print(io, c) -=# - -""" - indent(io::StructuralContext) - -If appropriate, write a newline to the given context, then indent it by the -appropriate number of spaces. Otherwise, do nothing. -""" -@inline function indent(io::PrettyContext) - write(io, NEWLINE) - for _ in 1:io.state - write(io, SPACE) - end -end -@inline indent(io::CompactContext) = nothing - -""" - separate(io::StructuralContext) - -Write a colon, followed by a space if appropriate, to the given context. -""" -@inline separate(io::PrettyContext) = write(io, SEPARATOR, SPACE) -@inline separate(io::CompactContext) = write(io, SEPARATOR) - -""" - delimit(io::StructuralContext) - -If this is not the first item written in a collection, write a comma in the -structural context. Otherwise, do not write a comma, but set a flag that the -first element has been written already. -""" -@inline function delimit(io::JSONContext) - if !io.first - write(io, DELIMITER) - end - io.first = false -end - -for kind in ("object", "array") - beginfn = Symbol("begin_", kind) - beginsym = Symbol(uppercase(kind), "_BEGIN") - endfn = Symbol("end_", kind) - endsym = Symbol(uppercase(kind), "_END") - # Begin and end objects - @eval function $beginfn(io::PrettyContext) - write(io, $beginsym) - io.state += io.step - io.first = true - end - @eval $beginfn(io::CompactContext) = (write(io, $beginsym); io.first = true) - @eval function $endfn(io::PrettyContext) - io.state -= io.step - if !io.first - indent(io) - end - write(io, $endsym) - io.first = false - end - @eval $endfn(io::CompactContext) = (write(io, $endsym); io.first = false) -end - -""" - show_string(io::IO, str) - -Print `str` as a JSON string (that is, properly escaped and wrapped by double -quotes) to the given IO object `io`. -""" -function show_string(io::IO, x) - write(io, STRING_DELIM) - Base.print(StringContext(io), x) - write(io, STRING_DELIM) -end - -""" - show_null(io::IO) - -Print the string `null` to the given IO object `io`. -""" -show_null(io::IO) = Base.print(io, "null") - -""" - show_element(io::StructuralContext, s, x) - -Print object `x` as an element of a JSON array to context `io` using rules -defined by serialization `s`. -""" -function show_element(io::JSONContext, s, x) - delimit(io) - indent(io) - show_json(io, s, x) -end - -""" - show_key(io::StructuralContext, k) - -Print string `k` as the key of a JSON key-value pair to context `io`. -""" -function show_key(io::JSONContext, k) - delimit(io) - indent(io) - show_string(io, k) - separate(io) -end - -""" - show_pair(io::StructuralContext, s, k, v) - -Print the key-value pair defined by `k => v` as JSON to context `io`, using -rules defined by serialization `s`. -""" -function show_pair(io::JSONContext, s, k, v) - show_key(io, k) - show_json(io, s, v) -end -show_pair(io::JSONContext, s, kv) = show_pair(io, s, first(kv), last(kv)) - -# Default serialization rules for CommonSerialization (CS) -function show_json(io::SC, s::CS, x::IsPrintedAsString) - # We need this check to allow `lower(x::Enum)` overrides to work if needed; - # it should be optimized out if `lower` is a no-op - lx = lower(x) - if x === lx - show_string(io, x) - else - show_json(io, s, lx) - end -end - -function show_json(io::SC, s::CS, x::Union{Integer, AbstractFloat}) - if isfinite(x) - Base.print(io, x) - else - show_null(io) - end -end - -show_json(io::SC, ::CS, ::Nothing) = show_null(io) -show_json(io::SC, ::CS, ::Missing) = show_null(io) - -recursive_cycle_check(f, io, s, id) = f() - -function recursive_cycle_check(f, io::RecursiveCheckContext, s, id) - if id in io.objectids - show_json(io, s, io.recursive_cycle_token) - else - push!(io.objectids, id) - f() - delete!(io.objectids, id) - end -end - -function show_json(io::SC, s::CS, x::Union{AbstractDict, NamedTuple}) - recursive_cycle_check(io, s, objectid(x)) do - begin_object(io) - for kv in pairs(x) - show_pair(io, s, kv) - end - end_object(io) - end -end - -function show_json(io::SC, s::CS, kv::Pair) - begin_object(io) - show_pair(io, s, kv) - end_object(io) -end - -function show_json(io::SC, s::CS, x::CompositeTypeWrapper) - recursive_cycle_check(io, s, objectid(x.wrapped)) do - begin_object(io) - for fn in x.fns - show_pair(io, s, fn, getproperty(x.wrapped, fn)) - end - end_object(io) - end -end - -function show_json(io::SC, s::CS, x::Union{AbstractVector, Tuple}) - recursive_cycle_check(io, s, objectid(x)) do - begin_array(io) - for elt in x - show_element(io, s, elt) - end - end_array(io) - end -end - -""" -Serialize a multidimensional array to JSON in column-major format. That is, -`json([1 2 3; 4 5 6]) == "[[1,4],[2,5],[3,6]]"`. -""" -function show_json(io::SC, s::CS, A::AbstractArray{<:Any,n}) where n - begin_array(io) - newdims = ntuple(_ -> :, n - 1) - for j in axes(A, n) - show_element(io, s, view(A, newdims..., j)) - end - end_array(io) -end - -# special case for 0-dimensional arrays -show_json(io::SC, s::CS, A::AbstractArray{<:Any,0}) = show_json(io, s, A[]) - -show_json(io::SC, s::CS, a) = show_json(io, s, lower(a)) - -# Fallback show_json for non-SC types -""" -Serialize Julia object `obj` to IO `io` using the behaviour described by `s`. If -`indent` is provided, then the JSON will be pretty-printed; otherwise it will be -printed on one line. If pretty-printing is enabled, then a trailing newline will -be printed; otherwise there will be no trailing newline. -""" -function show_json(io::IO, s::Serialization, obj; indent=nothing) - ctx = indent === nothing ? CompactContext(io) : PrettyContext(io, indent) - show_json(ctx, s, obj) - if indent !== nothing - println(io) - end -end - -""" - JSONText(s::AbstractString) - -`JSONText` is a wrapper around a Julia string representing JSON-formatted -text, which is inserted *as-is* in the JSON output of `JSON.print` and `JSON.json` -for compact output, and is otherwise re-parsed for pretty-printed output. - -`s` *must* contain valid JSON text. Otherwise compact output will contain -the malformed `s` and other serialization output will throw a parsing exception. -""" -struct JSONText - s::String -end -show_json(io::CompactContext, s::CS, json::JSONText) = write(io, json.s) -# other contexts for JSONText are handled by lower(json) = parse(json.s) - -print(io::IO, obj, indent) = - show_json(io, StandardSerialization(), obj; indent=indent) -print(io::IO, obj) = show_json(io, StandardSerialization(), obj) - -print(a, indent) = print(stdout, a, indent) -print(a) = print(stdout, a) - -""" - json(a) - json(a, indent::Int) - -Creates a JSON string from a Julia object or value. - -Arguments: - • a: the Julia object or value to encode - • indent (optional number): if provided, pretty-print array and object - substructures by indenting with the provided number of spaces -""" -json(a) = sprint(print, a) -json(a, indent) = sprint(print, a, indent) - -end diff --git a/src/bytes.jl b/src/bytes.jl deleted file mode 100644 index 47758ff5..00000000 --- a/src/bytes.jl +++ /dev/null @@ -1,72 +0,0 @@ -# The following bytes have significant meaning in JSON -const BACKSPACE = UInt8('\b') -const TAB = UInt8('\t') -const NEWLINE = UInt8('\n') -const FORM_FEED = UInt8('\f') -const RETURN = UInt8('\r') -const SPACE = UInt8(' ') -const STRING_DELIM = UInt8('"') -const PLUS_SIGN = UInt8('+') -const DELIMITER = UInt8(',') -const MINUS_SIGN = UInt8('-') -const DECIMAL_POINT = UInt8('.') -const SOLIDUS = UInt8('/') -const DIGIT_ZERO = UInt8('0') -const DIGIT_NINE = UInt8('9') -const SEPARATOR = UInt8(':') -const LATIN_UPPER_A = UInt8('A') -const LATIN_UPPER_E = UInt8('E') -const LATIN_UPPER_F = UInt8('F') -const LATIN_UPPER_I = UInt8('I') -const LATIN_UPPER_N = UInt8('N') -const ARRAY_BEGIN = UInt8('[') -const BACKSLASH = UInt8('\\') -const ARRAY_END = UInt8(']') -const LATIN_A = UInt8('a') -const LATIN_B = UInt8('b') -const LATIN_E = UInt8('e') -const LATIN_F = UInt8('f') -const LATIN_I = UInt8('i') -const LATIN_L = UInt8('l') -const LATIN_N = UInt8('n') -const LATIN_R = UInt8('r') -const LATIN_S = UInt8('s') -const LATIN_T = UInt8('t') -const LATIN_U = UInt8('u') -const LATIN_Y = UInt8('y') -const OBJECT_BEGIN = UInt8('{') -const OBJECT_END = UInt8('}') - -const ESCAPES = Dict( - STRING_DELIM => STRING_DELIM, - BACKSLASH => BACKSLASH, - SOLIDUS => SOLIDUS, - LATIN_B => BACKSPACE, - LATIN_F => FORM_FEED, - LATIN_N => NEWLINE, - LATIN_R => RETURN, - LATIN_T => TAB) - -const REVERSE_ESCAPES = Dict(reverse(p) for p in ESCAPES) -const ESCAPED_ARRAY = Vector{Vector{UInt8}}(undef, 256) -for c in 0x00:0xFF - ESCAPED_ARRAY[c + 1] = if c == SOLIDUS - [SOLIDUS] # don't escape this one - elseif c ≥ 0x80 - [c] # UTF-8 character copied verbatim - elseif haskey(REVERSE_ESCAPES, c) - [BACKSLASH, REVERSE_ESCAPES[c]] - elseif iscntrl(Char(c)) || !isprint(Char(c)) - UInt8[BACKSLASH, LATIN_U, string(c, base=16, pad=4)...] - else - [c] - end -end - -export BACKSPACE, TAB, NEWLINE, FORM_FEED, RETURN, SPACE, STRING_DELIM, - PLUS_SIGN, DELIMITER, MINUS_SIGN, DECIMAL_POINT, SOLIDUS, DIGIT_ZERO, - DIGIT_NINE, SEPARATOR, LATIN_UPPER_A, LATIN_UPPER_E, LATIN_UPPER_F, - LATIN_UPPER_I, LATIN_UPPER_N, ARRAY_BEGIN, BACKSLASH, ARRAY_END, - LATIN_A, LATIN_B, LATIN_E, LATIN_F, LATIN_I, LATIN_L, LATIN_N, LATIN_R, - LATIN_S, LATIN_T, LATIN_U, LATIN_Y, OBJECT_BEGIN, OBJECT_END, ESCAPES, - REVERSE_ESCAPES, ESCAPED_ARRAY diff --git a/src/errors.jl b/src/errors.jl deleted file mode 100644 index c9c1c87d..00000000 --- a/src/errors.jl +++ /dev/null @@ -1,12 +0,0 @@ -# The following errors may be thrown by the parser -const E_EXPECTED_EOF = "Expected end of input" -const E_UNEXPECTED_EOF = "Unexpected end of input" -const E_UNEXPECTED_CHAR = "Unexpected character" -const E_BAD_KEY = "Invalid object key" -const E_BAD_ESCAPE = "Invalid escape sequence" -const E_BAD_CONTROL = "ASCII control character in string" -const E_LEADING_ZERO = "Invalid leading zero in number" -const E_BAD_NUMBER = "Invalid number" - -export E_EXPECTED_EOF, E_UNEXPECTED_EOF, E_UNEXPECTED_CHAR, E_BAD_KEY, - E_BAD_ESCAPE, E_BAD_CONTROL, E_LEADING_ZERO, E_BAD_NUMBER diff --git a/src/lazy.jl b/src/lazy.jl new file mode 100644 index 00000000..678d7c1d --- /dev/null +++ b/src/lazy.jl @@ -0,0 +1,728 @@ +""" + JSON.lazy(json; kw...) + JSON.lazyfile(file; kw...) + +Detect the initial JSON value in `json`, returning a `JSON.LazyValue` instance. `json` input can be: + * `AbstractString` + * `AbstractVector{UInt8}` + * `IO`, `IOStream`, `Cmd` (bytes are fully read into a `Vector{UInt8}` for parsing, i.e. `read(json)` is called) + +`lazyfile` is a convenience method that takes a filename and opens the file before calling `lazy`. + +The `JSON.LazyValue` supports the "selection" syntax +for lazily navigating the JSON value. For example (`x = JSON.lazy(json)`): + * `x.key`, `x[:key]` or `x["key"]` for JSON objects + * `x[1]`, `x[2:3]`, `x[end]` for JSON arrays + * `propertynames(x)` to see all keys in the JSON object + * `x.a.b.c` for selecting deeply nested values + * `x[~, (k, v) -> k == "foo"]` for recursively searching for key "foo" and return matching values + +NOTE: Selecting values from a `LazyValue` will always return a `LazyValue`. +Selecting a specific key of an object or index of an array will only parse +what is necessary before returning. This leads to a few conclusions about +how to effectively utilize `LazyValue`: + * `JSON.lazy` is great for one-time access of a value in JSON + * It's also great for finding a required deeply nested value + * It's not great for any case where repeated access to values is required; + this results in the same JSON being parsed on each access (i.e. naively iterating a lazy JSON array will be O(n^2)) + * Best practice is to use `JSON.lazy` sparingly unless there's a specific case where it will benefit; + or use `JSON.lazy` as a means to access a value that is then fully materialized + +Another option for processing `JSON.LazyValue` is calling `foreach(f, x)` which is defined on +`JSON.LazyValue` for JSON objects and arrays. For objects, `f` should be of the form +`f(kv::Pair{String, LazyValue})` where `kv` is a key-value pair, and for arrays, +`f(v::LazyValue)` where `v` is the value at the index. This allows for iterating over all key-value pairs in an object +or all values in an array without materializing the entire structure. + +Lazy values can be materialized via `JSON.parse` in a few different forms: + * `JSON.parse(json)`: Default materialization into `JSON.Object` (a Dict-like type), `Vector{Any}`, etc. + * `JSON.parse(json, T)`: Materialize into a user-provided type `T` (following rules/programmatic construction from StructUtils.jl) + * `JSON.parse!(json, x)`: Materialize into an existing object `x` (following rules/programmatic construction from StructUtils.jl) + +Thus for completeness sake, here's an example of ideal usage of `JSON.lazy`: + +```julia +x = JSON.lazy(very_large_json_object) +# find a deeply nested value +y = x.a.b.c.d.e.f.g +# materialize the value +z = JSON.parse(y) +# now mutate/repeatedly access values in z +``` + +In this example, we only parsed as much of the `very_large_json_object` as was required to find the value `y`. +Then we fully materialized `y` into `z`, which is now a normal Julia object. We can now mutate or access values in `z`. + +Currently supported keyword arguments include: + - `allownan::Bool = false`: whether "special" float values shoudl be allowed while parsing (`NaN`, `Inf`, `-Inf`); these values are specifically _not allowed_ in the JSON spec, but many JSON libraries allow reading/writing + - `ninf::String = "-Infinity"`: the string that will be used to parse `-Inf` if `allownan=true` + - `inf::String = "Infinity"`: the string that will be used to parse `Inf` if `allownan=true` + - `nan::String = "NaN"`: the string that will be sued to parse `NaN` if `allownan=true` + - `jsonlines::Bool = false`: whether the JSON input should be treated as an implicit array, with newlines separating individual JSON elements with no leading `'['` or trailing `']'` characters. Common in logging or streaming workflows. Defaults to `true` when used with `JSON.parsefile` and the filename extension is `.jsonl` or `ndjson`. Note this ensures that parsing will _always_ return an array at the root-level. + +Note that validation is only fully done on `null`, `true`, and `false`, +while other values are only lazily inferred from the first non-whitespace character: + * `'{'`: JSON object + * `'['`: JSON array + * `'"'`: JSON string + * `'0'`-`'9'` or `'-'`: JSON number + +Further validation for these values is done later when materialized, like `JSON.parse`, +or via selection syntax calls on a `LazyValue`. +""" +function lazy end + +# helper struct we pack lazy-parsing keyword args into +# held by LazyValues for access +@kwdef struct LazyOptions + allownan::Bool = false + ninf::String = "-Infinity" + inf::String = "Infinity" + nan::String = "NaN" + jsonlines::Bool = false +end + +lazy(io::Union{IO, Base.AbstractCmd}; kw...) = lazy(Base.read(io); kw...) +# we don't call checkfinalize! here because LazyValue holds a ref to buf for it's lifetime +lazy(io::IOStream; kw...) = lazy(Mmap.mmap(io); kw...) + +lazyfile(file; jsonlines::Union{Bool, Nothing}=nothing, kw...) = open(io -> lazy(io; jsonlines=(jsonlines === nothing ? isjsonl(file) : jsonlines), kw...), file) + +@doc (@doc lazy) lazyfile + +function lazy(buf::Union{AbstractVector{UInt8}, AbstractString}; kw...) + if !applicable(pointer, buf, 1) || (buf isa AbstractVector{UInt8} && !isone(only(strides(buf)))) + if buf isa AbstractString + buf = String(buf) + else + buf = Vector{UInt8}(buf) + end + end + len = getlength(buf) + if len == 0 + error = UnexpectedEOF + pos = 0 + @goto invalid + end + pos = 1 + # detect and error on UTF-16LE BOM + if len >= 2 && getbyte(buf, pos) == 0xff && getbyte(buf, pos + 1) == 0xfe + error = InvalidUTF16 + @goto invalid + end + # detect and error on UTF-16BE BOM + if len >= 2 && getbyte(buf, pos) == 0xfe && getbyte(buf, pos + 1) == 0xff + error = InvalidUTF16 + @goto invalid + end + # detect and ignore UTF-8 BOM + pos = (len >= 3 && getbyte(buf, pos) == 0xef && getbyte(buf, pos + 1) == 0xbb && getbyte(buf, pos + 2) == 0xbf) ? pos + 3 : pos + @nextbyte + return _lazy(buf, pos, len, b, LazyOptions(; kw...), true) + +@label invalid + invalid(error, buf, pos, Any) +end + +""" + JSON.LazyValue + +A lazy representation of a JSON value. The `LazyValue` type +supports the "selection" syntax for lazily navigating the JSON value. +Lazy values can be materialized via `JSON.parse(x)`, `JSON.parse(x, T)`, or `JSON.parse!(x, y)`. +""" +struct LazyValue{T} + buf::T # wrapped json source, AbstractVector{UInt8} or AbstractString + pos::Int # byte position in buf where this value starts + type::JSONTypes.T # scoped enum for type of value: OBJECT, ARRAY, etc. + opts::LazyOptions + isroot::Bool # true if this is the root LazyValue +end + +# convenience types only used for defining `show` on LazyValue +# this allows, for example, a LazyValue w/ type OBJECT to be +# displayed like a Dict using Base AbstractDict machinery +# while a LazyValue w/ type ARRAY is displayed like an Array +struct LazyObject{T} <: AbstractDict{String, LazyValue} + buf::T + pos::Int + opts::LazyOptions + isroot::Bool + LazyObject(x::LazyValue{T}) where {T} = new{T}(getbuf(x), getpos(x), getopts(x), getisroot(x)) +end + +struct LazyArray{T} <: AbstractVector{LazyValue} + buf::T + pos::Int + opts::LazyOptions + isroot::Bool + LazyArray(x::LazyValue{T}) where {T} = new{T}(getbuf(x), getpos(x), getopts(x), getisroot(x)) +end + +# helper accessors so we can overload getproperty for convenience +getbuf(x) = getfield(x, :buf) +getpos(x) = getfield(x, :pos) +gettype(x) = getfield(x, :type) +getopts(x) = getfield(x, :opts) +getisroot(x) = getfield(x, :isroot) + +const LazyValues{T} = Union{LazyValue{T}, LazyObject{T}, LazyArray{T}} + +# default materialization that calls parse +Base.getindex(x::LazyValues) = parse(x) + +# some overloads/usage of StructUtils + LazyValues +# this defines all the right getproperty, getindex methods +Selectors.@selectors LazyValues + +Base.lastindex(x::LazyValues) = length(x) + +# this ensures LazyValues can be "sources" in StructUtils.make +@inline function StructUtils.applyeach(::StructUtils.StructStyle, f, x::LazyValues) + type = gettype(x) + if type == JSONTypes.OBJECT + return applyobject(f, x) + elseif type == JSONTypes.ARRAY + return applyarray(f, x) + end + throw(ArgumentError("applyeach not applicable for `$(typeof(x))` with JSON type = `$type`")) +end + +@inline function Base.foreach(f, x::LazyValues) + type = gettype(x) + if type == JSONTypes.OBJECT + applyobject((k, v) -> f(convert(String, k) => v), x) + elseif type == JSONTypes.ARRAY + applyarray((i, v) -> f(v), x) + else + throw(ArgumentError("foreach not applicable for `$(typeof(x))` with JSON type = `$type`")) + end + return +end + +StructUtils.structlike(::StructUtils.StructStyle, x::LazyValues) = gettype(x) == JSONTypes.OBJECT +StructUtils.arraylike(::StructUtils.StructStyle, x::LazyValues) = gettype(x) == JSONTypes.ARRAY +StructUtils.nulllike(::StructUtils.StructStyle, x::LazyValues) = gettype(x) == JSONTypes.NULL + +# core method that detects what JSON value is at the current position +# and immediately returns an appropriate LazyValue instance +function _lazy(buf, pos, len, b, opts, isroot=false) + if opts.jsonlines + return LazyValue(buf, pos, JSONTypes.ARRAY, opts, isroot) + elseif b == UInt8('{') + return LazyValue(buf, pos, JSONTypes.OBJECT, opts, isroot) + elseif b == UInt8('[') + return LazyValue(buf, pos, JSONTypes.ARRAY, opts, isroot) + elseif b == UInt8('"') + return LazyValue(buf, pos, JSONTypes.STRING, opts, isroot) + elseif b == UInt8('n') && pos + 3 <= len && + getbyte(buf, pos + 1) == UInt8('u') && + getbyte(buf, pos + 2) == UInt8('l') && + getbyte(buf, pos + 3) == UInt8('l') + return LazyValue(buf, pos, JSONTypes.NULL, opts, isroot) + elseif b == UInt8('t') && pos + 3 <= len && + getbyte(buf, pos + 1) == UInt8('r') && + getbyte(buf, pos + 2) == UInt8('u') && + getbyte(buf, pos + 3) == UInt8('e') + return LazyValue(buf, pos, JSONTypes.TRUE, opts, isroot) + elseif b == UInt8('f') && pos + 4 <= len && + getbyte(buf, pos + 1) == UInt8('a') && + getbyte(buf, pos + 2) == UInt8('l') && + getbyte(buf, pos + 3) == UInt8('s') && + getbyte(buf, pos + 4) == UInt8('e') + return LazyValue(buf, pos, JSONTypes.FALSE, opts, isroot) + elseif b == UInt8('-') || (UInt8('0') <= b <= UInt8('9')) || (opts.allownan && (b == UInt8('+') || firstbyteeq(opts.nan, b) || firstbyteeq(opts.ninf, b) || firstbyteeq(opts.inf, b))) + return LazyValue(buf, pos, JSONTypes.NUMBER, opts, isroot) + else + error = InvalidJSON + @goto invalid + end +@label invalid + if !opts.allownan + # quick check if the value here is inf/nan/+1 and we can provide + # a more helpful error message about how to parse + if b in (UInt8('N'), UInt8('n'), UInt8('I'), UInt8('i'), UInt8('+')) + throw(ArgumentError("JSON parsing error: possible `NaN`, `Inf`, or `-Inf` which are not valid JSON values. Use the `allownan=true` option and `ninf`, `inf`, and/or `nan` keyword arguments to parse.")) + end + end + invalid(error, buf, pos, Any) +end + +# core JSON object parsing function +# takes a `keyvalfunc` that is applied to each key/value pair +# `keyvalfunc` is provided a PtrString => LazyValue pair +# `keyvalfunc` can return `StructUtils.EarlyReturn` to short-circuit parsing +# otherwise, it should return a `pos::Int` value that notes the next position to continue parsing +# to materialize the key, call `convert(String, key)` +# PtrString can be compared to String via `==` or `isequal` to help avoid allocating the full String in some cases +# returns a `pos` value that notes the next position where parsing should continue +# this is essentially the `StructUtils.applyeach` implementation for LazyValues w/ type OBJECT +function applyobject(keyvalfunc, x::LazyValues) + pos = getpos(x) + buf = getbuf(x) + len = getlength(buf) + opts = getopts(x) + b = getbyte(buf, pos) + if b != UInt8('{') + error = ExpectedOpeningObjectChar + @goto invalid + end + pos += 1 + @nextbyte + b == UInt8('}') && return pos + 1 + while true + # applystring returns key as a PtrString + key, pos = @inline applystring(nothing, LazyValue(buf, pos, JSONTypes.STRING, getopts(x), false)) + @nextbyte + if b != UInt8(':') + error = ExpectedColon + @goto invalid + end + pos += 1 + @nextbyte + # we're now positioned at the start of the value + val = _lazy(buf, pos, len, b, opts) + ret = keyvalfunc(key, val) + # if ret is an EarlyReturn, then we're short-circuiting + # parsing via e.g. selection syntax, so return immediately + ret isa StructUtils.EarlyReturn && return ret + # if keyvalfunc didn't materialize `val` and return an + # updated `pos`, then we need to skip val ourselves + # WARNING: parsing can get corrupted if random Int values are returned from keyvalfunc + pos = (ret isa Int && ret > pos) ? ret : skip(val) + @nextbyte + # check for terminating conditions + if b == UInt8('}') + return pos + 1 + elseif b != UInt8(',') + error = ExpectedComma + @goto invalid + end + pos += 1 # move past ',' + @nextbyte + end +@label invalid + invalid(error, buf, pos, "object") +end + +# jsonlines is unique because it's an *implicit* array +# so newlines are valid delimiters (not ignored whitespace) +# and EOFs are valid terminators (not errors) +# these checks are injected after we've processed the "line" +# so we need to check for EOFs and newlines +macro jsonlines_checks() + esc(quote + # if we're at EOF, then we're done + pos > len && return pos + # now we want to ignore whitespace, but *not* newlines + b = getbyte(buf, pos) + while b == UInt8(' ') || b == UInt8('\t') + pos += 1 + pos > len && return pos + b = getbyte(buf, pos) + end + # any combo of '\r', '\n', or '\r\n' is a valid delimiter + foundr = false + if b == UInt8('\r') + foundr = true + pos += 1 + pos > len && return pos + b = getbyte(buf, pos) + end + if b == UInt8('\n') + pos += 1 + pos > len && return pos + b = getbyte(buf, pos) + elseif !foundr + # if we didn't find a newline and we're not EOF + # then that's an error; only whitespace, newlines, + # and EOFs are valid in between lines + error = ExpectedNewline + @goto invalid + end + # since we found a newline, we now ignore all whitespace, including newlines (empty lines) + # until we find EOF or non-whitespace + while b == UInt8(' ') || b == UInt8('\t') || b == UInt8('\n') || b == UInt8('\r') + pos += 1 + pos > len && return pos + b = getbyte(buf, pos) + end + end) +end + +# core JSON array parsing function +# takes a `keyvalfunc` that is applied to each index => value element +# `keyvalfunc` is provided a Int => LazyValue pair +# applyeach always requires a key-value pair function +# so we use the index as the key +# returns a `pos` value that notes the next position where parsing should continue +# this is essentially the `StructUtils.applyeach` implementation for LazyValues w/ type ARRAY +function applyarray(keyvalfunc, x::LazyValues) + pos = getpos(x) + buf = getbuf(x) + len = getlength(buf) + opts = getopts(x) + jsonlines = opts.jsonlines + b = getbyte(buf, pos) + if !jsonlines + if b != UInt8('[') + error = ExpectedOpeningArrayChar + @goto invalid + end + pos += 1 + @nextbyte + b == UInt8(']') && return pos + 1 + else + # for jsonlines, we need to make sure that recursive + # lazy values *don't* consider individual lines *also* + # to be jsonlines + opts = LazyOptions(; allownan=opts.allownan, ninf=opts.ninf, inf=opts.inf, nan=opts.nan, jsonlines=false) + end + i = 1 + while true + # we're now positioned at the start of the value + val = _lazy(buf, pos, len, b, opts) + ret = keyvalfunc(i, val) + ret isa StructUtils.EarlyReturn && return ret + # if keyvalfunc didn't materialize `val` and return an + # updated `pos`, then we need to skip val ourselves + # WARNING: parsing can get corrupted if random Int values are returned from keyvalfunc + pos = (ret isa Int && ret > pos) ? ret : skip(val) + if jsonlines + @jsonlines_checks + else + @nextbyte + if b == UInt8(']') + return pos + 1 + elseif b != UInt8(',') + error = ExpectedComma + @goto invalid + end + pos += 1 # move past ',' + @nextbyte + end + i += 1 + end + +@label invalid + invalid(error, buf, pos, "array") +end + +# temporary string type to enable deferrment of string allocation in certain cases (like navigating a lazy structure) +struct PtrString + ptr::Ptr{UInt8} + len::Int + escaped::Bool +end + +if VERSION < v"1.11" + mem(n) = Vector{UInt8}(undef, n) + _tostr(m::Vector{UInt8}, slen) = ccall(:jl_array_to_string, Ref{String}, (Any,), resize!(m, slen)) +else + mem(n) = Memory{UInt8}(undef, n) + _tostr(m::Memory{UInt8}, slen) = ccall(:jl_genericmemory_to_string, Ref{String}, (Any, Int), m, slen) +end + +function Base.convert(::Type{String}, x::PtrString) + if x.escaped + m = mem(x.len) + slen = GC.@preserve m unsafe_unescape_to_buffer(x.ptr, x.len, pointer(m)) + return _tostr(m, slen) + end + return unsafe_string(x.ptr, x.len) +end + +Base.convert(::Type{Symbol}, x::PtrString) = ccall(:jl_symbol_n, Ref{Symbol}, (Ptr{UInt8}, Int), x.ptr, x.len) + +function Base.convert(::Type{T}, x::PtrString) where {T <: Enum} + sym = convert(Symbol, x) + for (k, v) in Base.Enums.namemap(T) + v === sym && return T(k) + end + throw(ArgumentError("invalid `$T` string value: \"$sym\"")) +end + +Base.:(==)(x::PtrString, y::AbstractString) = x.len == sizeof(y) && ccall(:memcmp, Cint, (Ptr{UInt8}, Ptr{UInt8}, Csize_t), x.ptr, pointer(y), x.len) == 0 +Base.:(==)(x::PtrString, y::PtrString) = x.len == y.len && ccall(:memcmp, Cint, (Ptr{UInt8}, Ptr{UInt8}, Csize_t), x.ptr, y.ptr, x.len) == 0 +Base.isequal(x::PtrString, y::AbstractString) = x == y +Base.isequal(x::PtrString, y::PtrString) = x == y +StructUtils.keyeq(x::PtrString, y::AbstractString) = x == y +StructUtils.keyeq(x::PtrString, y::String) = x == y +StructUtils.keyeq(x::PtrString, y::Symbol) = convert(Symbol, x) == y + +# core JSON string parsing function +# returns a PtrString and the next position to parse +# a PtrString is a semi-lazy, internal-only representation +# that notes whether escape characters were encountered while parsing +# or not. It allows materialize, _binary, etc. to deal +# with the string data appropriately without forcing a String allocation +# PtrString should NEVER be visible to users though! +function applystring(f, x::LazyValue) + buf, pos = getbuf(x), getpos(x) + len, b = getlength(buf), getbyte(buf, pos) + if b != UInt8('"') + error = ExpectedOpeningQuoteChar + @goto invalid + end + pos += 1 + spos = pos + escaped = false + @nextbyte(false) + while b != UInt8('"') + # disallow raw control characters within a JSON string + b <= UInt8(0x1F) && unescaped_control(b) + if b == UInt8('\\') + # skip next character + escaped = true + if pos + 2 > len + error = UnexpectedEOF + @goto invalid + end + pos += 2 + else + pos += 1 + end + @nextbyte(false) + end + str = PtrString(pointer(buf, spos), pos - spos, escaped) + if f === nothing + return str, pos + 1 + else + f(str) + return pos + 1 + end + +@label invalid + invalid(error, buf, pos, "string") +end + +# core JSON number parsing function +# we rely on functionality in Parsers to help infer what kind +# of number we're parsing; valid return types include: +# Int64, BigInt, Float64 or BigFloat +const INT64_OVERFLOW_VAL = div(typemax(Int64), 10) +const INT64_OVERFLOW_DIGIT = typemax(Int64) % 10 + +macro check_special(special, value) + esc(quote + pos = startpos + b = getbyte(buf, pos) + bytes = codeunits($special) + i = 1 + while b == @inbounds(bytes[i]) + pos += 1 + i += 1 + i > length(bytes) && break + if pos > len + error = UnexpectedEOF + @goto invalid + end + b = getbyte(buf, pos) + i += 1 + end + if i > length(bytes) + valfunc($value) + return pos + end + end) +end + +function applynumber(valfunc, x::LazyValue) + buf = getbuf(x) + pos = getpos(x) + len = getlength(buf) + opts = getopts(x) + b = getbyte(buf, pos) + startpos = pos + isneg = isfloat = overflow = false + if !opts.allownan + val = Int64(0) + isneg = b == UInt8('-') + if isneg || b == UInt8('+') # spec doesn't allow leading +, but we do + pos += 1 + if pos > len + error = UnexpectedEOF + @goto invalid + end + b = getbyte(buf, pos) + end + # Parse integer part, check for leading zeros (invalid JSON) + if b == UInt8('0') + pos += 1 + if pos <= len + b = getbyte(buf, pos) + if UInt8('0') <= b <= UInt8('9') + error = InvalidNumber + @goto invalid + end + end + elseif UInt8('1') <= b <= UInt8('9') + while UInt8('0') <= b <= UInt8('9') + digit = Int64(b - UInt8('0')) + if val > INT64_OVERFLOW_VAL || (val == INT64_OVERFLOW_VAL && digit > INT64_OVERFLOW_DIGIT) + overflow = true + break + end + val = Int64(10) * val + digit + pos += 1 + pos > len && break + b = getbyte(buf, pos) + end + if overflow + bval = BigInt(val) + while UInt8('0') <= b <= UInt8('9') + digit = BigInt(b - UInt8('0')) + bval = BigInt(10) * bval + digit + pos += 1 + pos > len && break + b = getbyte(buf, pos) + end + end + else + error = InvalidNumber + @goto invalid + end + # Check for decimal or exponent + if b == UInt8('.') || b == UInt8('e') || b == UInt8('E') + isfloat = true + # in strict JSON spec, we need at least one digit after the decimal + if b == UInt8('.') + pos += 1 + if pos > len + error = UnexpectedEOF + @goto invalid + end + b = getbyte(buf, pos) + if !(UInt8('0') <= b <= UInt8('9')) + error = InvalidNumber + @goto invalid + end + end + end + end + if isfloat || opts.allownan + if opts.allownan + # check for NaN, Inf, -Inf + @check_special(opts.nan, NaN) + @check_special(opts.inf, Inf) + @check_special(opts.ninf, -Inf) + end + res = Parsers.xparse2(Float64, buf, startpos, len) + if !opts.allownan && Parsers.specialvalue(res.code) + # if we overflowed, then let's try BigFloat + bres = Parsers.xparse2(BigFloat, buf, startpos, len) + if !Parsers.invalid(bres.code) + valfunc(bres.val) + return startpos + bres.tlen + end + end + if Parsers.invalid(res.code) + error = InvalidNumber + @goto invalid + end + valfunc(res.val) + return startpos + res.tlen + else + if overflow + valfunc(isneg ? -bval : bval) + else + valfunc(isneg ? -val : val) + end + return pos + end + +@label invalid + invalid(InvalidNumber, buf, startpos, "number") +end + +# efficiently skip over a JSON value +# for object/array/string/number, we pass no-op functions +# and for bool/null, we just skip the appropriate number of bytes +function skip(x::LazyValues) + T = gettype(x) + if T == JSONTypes.OBJECT + return applyobject((k, v) -> 0, x) + elseif T == JSONTypes.ARRAY + return applyarray((i, v) -> 0, x) + elseif T == JSONTypes.STRING + return applystring(s -> 0, x) + elseif T == JSONTypes.NUMBER + return applynumber(n -> 0, x) + elseif T == JSONTypes.TRUE + return getpos(x) + 4 + elseif T == JSONTypes.FALSE + return getpos(x) + 5 + elseif T == JSONTypes.NULL + return getpos(x) + 4 + else + error("invalid JSON value type: $T") + end +end + +# helper definitions for LazyObject/LazyArray to they display as such +gettype(::LazyObject) = JSONTypes.OBJECT + +Base.length(x::LazyObject) = StructUtils.applylength(x) + +struct IterateObjectClosure + kvs::Vector{Pair{String, LazyValue}} +end + +function (f::IterateObjectClosure)(k, v) + push!(f.kvs, convert(String, k) => v) + return +end + +function Base.iterate(x::LazyObject, st=nothing) + if st === nothing + # first iteration + kvs = Pair{String, LazyValue}[] + applyobject(IterateObjectClosure(kvs), x) + i = 1 + else + kvs = st[1] + i = st[2] + end + i > length(kvs) && return nothing + return kvs[i], (kvs, i + 1) +end + +gettype(::LazyArray) = JSONTypes.ARRAY + +Base.IndexStyle(::Type{<:LazyArray}) = Base.IndexLinear() + +Base.size(x::LazyArray) = (StructUtils.applylength(x),) + +Base.isassigned(x::LazyArray, i::Int) = true +Base.getindex(x::LazyArray, i::Int) = Selectors._getindex(x, i) + +# show implementation for LazyValue +function Base.show(io::IO, x::LazyValue) + T = gettype(x) + if T == JSONTypes.OBJECT + compact = get(io, :compact, false)::Bool + lo = LazyObject(x) + if compact + show(io, lo) + else + io = IOContext(io, :compact => true) + show(io, MIME"text/plain"(), lo) + end + elseif T == JSONTypes.ARRAY + compact = get(io, :compact, false)::Bool + la = LazyArray(x) + if compact + show(io, la) + else + io = IOContext(io, :compact => true) + show(io, MIME"text/plain"(), la) + end + elseif T == JSONTypes.STRING + str, _ = applystring(nothing, x) + Base.print(io, "JSON.LazyValue(", repr(convert(String, str)), ")") + elseif T == JSONTypes.NULL + Base.print(io, "JSON.LazyValue(nothing)") + else # bool/number + Base.print(io, "JSON.LazyValue(", parse(x), ")") + end +end \ No newline at end of file diff --git a/src/object.jl b/src/object.jl new file mode 100644 index 00000000..10f73c8e --- /dev/null +++ b/src/object.jl @@ -0,0 +1,206 @@ +struct NotSet end +const notset = NotSet() + +""" + JSON.Object{K,V} + +A mutable `AbstractDict` type for JSON objects. Internally is a linked list of key-value pairs, where each pair is represented by an `Object` instance. The first instance is the root object. +The `Object` type is used to represent JSON objects in a mutable way, allowing for efficient insertion and deletion of key-value pairs. It is designed to be used with the `JSON` package for parsing and deserializing JSON data. + +Because of the linked-list representation, key lookups are O(n), using a simple linear scan. +For small objects, this is very efficient, and worth the memory overhead vs. a full `Dict` or `OrderedDict`. +For Objects with many entries (hundreds or thousands), this is not as efficient. In that case, consider using a `Dict` or `OrderedDict` instead. +""" +# empty Object: key, value, child all notset +# root Object: key, value are notset, child is defined +# non-root Object: key, value are set, child is notset for last node +mutable struct Object{K,V} <: AbstractDict{K,V} + key::Union{NotSet, K} # for root object, key/value are notset + value::Union{NotSet, V} + child::Union{NotSet, Object{K,V}} # possibly notset + + # root constructor: key is const notset + function Object{K,V}() where {K,V} + x = new{K,V}(notset, notset, notset) + return x + end + + # all non-root Objects *must* be set as the child of another Object + # WARNING: this constructor can allow duplicate `k` in a root Object as no check is done + function Object{K,V}(obj::Object{K,V}, k, v) where {K,V} + @assert _ch(obj) === notset "Object child already defined" + nobj = new{K,V}(k, v, notset) + setfield!(obj, :child, nobj) + return nobj + end +end + +Object() = Object{Any, Any}() # default empty object + +_k(obj::Object) = getfield(obj, :key) +_v(obj::Object) = getfield(obj, :value) +_ch(obj::Object) = getfield(obj, :child) + +Object(d::AbstractDict{K,V}) where {K,V} = Object{K,V}(d) + +function Object{K,V}(d::AbstractDict{K,V}) where {K,V} + root = obj = Object{K,V}() + for (k, v) in d + obj = Object{K,V}(obj, k, v) + end + return root +end + +Object(pairs::Pair{K,V}...) where {K,V} = Object{K,V}(pairs...) +Object(pairs::Pair...) = Object{Any,Any}(pairs...) + +function Object{K,V}(pairs::Pair...) where {K,V} + root = obj = Object{K,V}() + for (k, v) in pairs + obj = Object{K,V}(obj, k, v) + end + return root +end + +# generic iterator constructors +function Object(itr) + root = obj = nothing + st = iterate(itr) + while st !== nothing + kv, state = st + if kv isa Pair || kv isa Tuple{Any,Any} + k, v = kv + if root === nothing + root = Object{typeof(k), typeof(v)}() + obj = root + end + obj = Object{typeof(k), typeof(v)}(obj, k, v) + else + throw(ArgumentError("Iterator must yield Pair or 2-tuple, got $(typeof(kv))")) + end + st = iterate(itr, state) + end + return root === nothing ? Object{Any,Any}() : root +end + +function Object{K,V}(itr) where {K,V} + root = obj = Object{K,V}() + st = iterate(itr) + while st !== nothing + kv, state = st + if kv isa Pair || kv isa Tuple{Any,Any} + k, v = kv + obj = Object{K, V}(obj, k, v) + else + throw(ArgumentError("Iterator must yield Pair or 2-tuple, got $(typeof(kv))")) + end + st = iterate(itr, state) + end + return root +end + +function Base.iterate(orig::Object{K,V}, obj=orig) where {K,V} + obj === nothing && return nothing + if _k(obj) === notset + # if key is notset, we either have to iterate from the child or we're done + return _ch(obj) === notset ? nothing : iterate(_ch(obj)::Object{K,V}) + end + return (Pair{K,V}(_k(obj)::K, _v(obj)::V), _ch(obj) === notset ? nothing : _ch(obj)::Object{K,V}) +end + +function Base.length(obj::Object{K,V}) where {K,V} + count = 0 + while true + _k(obj) !== notset && (count += 1) + _ch(obj) === notset && break + obj = _ch(obj)::Object{K,V} + end + return count +end +Base.isempty(obj::Object) = _k(obj) === notset && _ch(obj) === notset +Base.empty(::Object{K,V}) where {K,V} = Object{K,V}() # empty object + +# linear node lookup +@inline function find_node_by_key(obj::Object{K,V}, key::K) where {K,V} + while true + _k(obj) !== notset && isequal(_k(obj)::K, key) && return obj + _ch(obj) === notset && break + obj = _ch(obj)::Object{K,V} + end + return nothing +end + +# get with fallback callable +function Base.get(f::Base.Callable, obj::Object{K,V}, key) where {K,V} + node = find_node_by_key(obj, key) + node !== nothing && return _v(node)::V + return f() +end + +Base.getindex(obj::Object, key) = get(() -> throw(KeyError(key)), obj, key) +Base.get(obj::Object, key, default) = get(() -> default, obj, key) + +# support getproperty for dot access +Base.getproperty(obj::Object{Symbol}, sym::Symbol) = getindex(obj, sym) +Base.getproperty(obj::Object{String}, sym::Symbol) = getindex(obj, String(sym)) +Base.propertynames(obj::Object{K,V}) where {K,V} = _k(obj) === notset && _ch(obj) === notset ? () : _propertynames(_ch(obj)::Object{K,V}, ()) + +function _propertynames(obj::Object{K,V}, acc) where {K,V} + new = (acc..., Symbol(_k(obj)::K)) + return _ch(obj) === notset ? new : _propertynames(_ch(obj)::Object{K,V}, new) +end + +# haskey +Base.haskey(obj::Object, key) = find_node_by_key(obj, key) !== nothing +Base.haskey(obj::Object{String}, key::Symbol) = haskey(obj, String(key)) + +# setindex! finds node with key and sets value or inserts a new node +function Base.setindex!(obj::Object{K,V}, value, key::K) where {K,V} + root = obj + while true + if _k(obj) !== notset && isequal(_k(obj)::K, key) + setfield!(obj, :value, convert(V, value)) + return root + end + _ch(obj) === notset && break + obj = _ch(obj)::Object{K,V} + end + # if we reach here, we need to insert a new node + Object{K,V}(obj, key, value) + return value +end + +# delete! removes node +function Base.delete!(obj::Object{K,V}, key::K) where {K,V} + # check empty case + _ch(obj) === notset && return obj + root = parent = obj + obj = _ch(obj)::Object{K,V} + while true + if _k(obj) !== notset && isequal(_k(obj)::K, key) + # we found the node to remove + # if node is leaf, we need to set parent as leaf + # otherwise, we set child as child of parent + if _ch(obj) === notset + setfield!(parent, :child, notset) + else + setfield!(parent, :child, _ch(obj)::Object{K,V}) + end + end + _ch(obj) === notset && break + parent = obj + obj = _ch(obj)::Object{K,V} + end + return root +end + +function Base.empty!(obj::Object) + setfield!(obj, :child, notset) + return obj +end + +# support setproperty for dot access +Base.setproperty!(obj::Object, sym::Symbol, val) = setindex!(obj, val, sym) +Base.setproperty!(obj::Object{String}, sym::Symbol, val) = setindex!(obj, val, String(sym)) + +Base.merge(a::NamedTuple, b::Object{String,Any}) = merge(a, (Symbol(k) => v for (k, v) in b)) \ No newline at end of file diff --git a/src/parse.jl b/src/parse.jl new file mode 100644 index 00000000..ed3be684 --- /dev/null +++ b/src/parse.jl @@ -0,0 +1,361 @@ +""" + JSON.parse(json) + JSON.parse(json, T) + JSON.parse!(json, x) + JSON.parsefile(filename) + JSON.parsefile(filename, T) + JSON.parsefile!(filename, x) + +Parse a JSON input (string, vector, stream, LazyValue, etc.) into a Julia value. The `parsefile` variants +take a filename, open the file, and pass the `IOStream` to `parse`. + +Currently supported keyword arguments include: + * `allownan`: allows parsing `NaN`, `Inf`, and `-Inf` since they are otherwise invalid JSON + * `ninf`: string to use for `-Inf` (default: `"-Infinity"`) + * `inf`: string to use for `Inf` (default: `"Infinity"`) + * `nan`: string to use for `NaN` (default: `"NaN"`) + * `jsonlines`: treat the `json` input as an implicit JSON array, delimited by newlines, each element being parsed from each row/line in the input + * `dicttype`: a custom `AbstractDict` type to use instead of `$DEFAULT_OBJECT_TYPE` as the default type for JSON object materialization + * `null`: a custom value to use for JSON null values (default: `nothing`) + * `style`: a custom `StructUtils.StructStyle` subtype instance to be used in calls to `StructUtils.make` and `StructUtils.lift`. This allows overriding + default behaviors for non-owned types. + +The methods without a type specified (`JSON.parse(json)`, `JSON.parsefile(filename)`), do a generic materialization into +predefined default types, including: + * JSON object => `$DEFAULT_OBJECT_TYPE` (**see note below**) + * JSON array => `Vector{Any}` + * JSON string => `String` + * JSON number => `Int64`, `BigInt`, `Float64`, or `BigFloat` + * JSON true => `true` + * JSON false => `false` + * JSON null => `nothing` + +When a type `T` is specified (`JSON.parse(json, T)`, `JSON.parsefile(filename, T)`), materialization to a value +of type `T` will be attempted utilizing machinery and interfaces provided by the StructUtils.jl package, including: + * For JSON objects, JSON keys will be matched against field names of `T` with a value being constructed via `T(args...)` + * If `T` was defined with the `@noarg` macro, an empty instance will be constructed, and field values set as JSON keys match field names + * If `T` had default field values defined using the `@defaults` or `@kwarg` macros (from StructUtils.jl package), those will be set in the value of `T` unless different values are parsed from the JSON + * JSON keys that don't match field names in `T` will be ignored (skipped over) + * If a field in `T` has a `name` fieldtag, the `name` value will be used to match JSON keys instead + * If `T` or any recursive field type of `T` is abstract, an appropriate `JSON.@choosetype T x -> ...` definition should exist for "choosing" a concrete type at runtime; default type choosing exists for `Union{T, Missing}` and `Union{T, Nothing}` where the JSON value is checked if `null`. If the `Any` type is encountered, the default materialization types will be used (`JSON.Object`, `Vector{Any}`, etc.) + * For any non-JSON-standard non-aggregate (i.e. non-object, non-array) field type of `T`, a `JSON.lift(::Type{T}, x) = ...` definition can be defined for how to "lift" the default JSON value (String, Number, Bool, `nothing`) to the type `T`; a default lift definition exists, for example, for `JSON.lift(::Type{Missing}, x) = missing` where the standard JSON value for `null` is `nothing` and it can be "lifted" to `missing` + * For any `T` or recursive field type of `T` that is `AbstractDict`, non-string/symbol/integer keys will need to have a `StructUtils.liftkey(::Type{T}, x))` definition for how to "lift" the JSON string key to the key type of `T` + +For any `T` or recursive field type of `T` that is `JSON.JSONText`, the next full raw JSON value will be preserved in the `JSONText` wrapper as-is. + +For the unique case of nested JSON arrays and prior knowledge of the expected dimensionality, +a target type `T` can be given as an `AbstractArray{T, N}` subtype. In this case, the JSON array data is materialized as an +n-dimensional array, where: the number of JSON array nestings must match the Julia array dimensionality (`N`), +nested JSON arrays at matching depths are assumed to have equal lengths, and the length of +the innermost JSON array is the 1st dimension length and so on. For example, the JSON array `[[[1.0,2.0]]]` +would be materialized as a 3-dimensional array of `Float64` with sizes `(2, 1, 1)`, when called +like `JSON.parse("[[[1.0,2.0]]]", Array{Float64, 3})`. Note that n-dimensional Julia +arrays are written to json as nested JSON arrays by default, to enable lossless re-parsing, +though the dimensionality must still be provided explicitly to the call to `parse` (i.e. default parsing via `JSON.parse(json)` +will result in plain nested `Vector{Any}`s returned). + +Examples: +```julia +using Dates + +abstract type AbstractMonster end + +struct Dracula <: AbstractMonster + num_victims::Int +end + +struct Werewolf <: AbstractMonster + witching_hour::DateTime +end + +JSON.@choosetype AbstractMonster x -> x.monster_type[] == "vampire" ? Dracula : Werewolf + +struct Percent <: Number + value::Float64 +end + +JSON.lift(::Type{Percent}, x) = Percent(Float64(x)) +StructUtils.liftkey(::Type{Percent}, x::String) = Percent(parse(Float64, x)) + +@defaults struct FrankenStruct + id::Int = 0 + name::String = "Jim" + address::Union{Nothing, String} = nothing + rate::Union{Missing, Float64} = missing + type::Symbol = :a &(json=(name="franken_type",),) + notsure::Any = nothing + monster::AbstractMonster = Dracula(0) + percent::Percent = Percent(0.0) + birthdate::Date = Date(0) &(json=(dateformat="yyyy/mm/dd",),) + percentages::Dict{Percent, Int} = Dict{Percent, Int}() + json_properties::JSONText = JSONText("") + matrix::Matrix{Float64} = Matrix{Float64}(undef, 0, 0) +end + +json = \"\"\" +{ + "id": 1, + "address": "123 Main St", + "rate": null, + "franken_type": "b", + "notsure": {"key": "value"}, + "monster": { + "monster_type": "vampire", + "num_victims": 10 + }, + "percent": 0.1, + "birthdate": "2023/10/01", + "percentages": { + "0.1": 1, + "0.2": 2 + }, + "json_properties": {"key": "value"}, + "matrix": [[1.0, 2.0], [3.0, 4.0]], + "extra_key": "extra_value" +} +\"\"\" +JSON.parse(json, FrankenStruct) +# FrankenStruct(1, "Jim", "123 Main St", missing, :b, JSON.Object{String, Any}("key" => "value"), Dracula(10), Percent(0.1), Date("2023-10-01"), Dict{Percent, Int64}(Percent(0.2) => 2, Percent(0.1) => 1), JSONText("{\"key\": \"value\"}"), [1.0 3.0; 2.0 4.0]) +``` + +Let's walk through some notable features of the example above: + * The `name` field isn't present in the JSON input, so the default value of `"Jim"` is used. + * The `address` field uses a default `@choosetype` to determine that the JSON value is not `null`, so a `String` should be parsed for the field value. + * The `rate` field has a `null` JSON value, so the default `@choosetype` recognizes it should be "lifted" to `Missing`, which then uses a predefined `lift` definition for `Missing`. + * The `type` field is a `Symbol`, and has a fieldtag `json=(name="franken_type",)` which means the JSON key `franken_type` will be used to set the field value instead of the default `type` field name. A default `lift` definition for `Symbol` is used to convert the JSON string value to a `Symbol`. + * The `notsure` field is of type `Any`, so the default object type `JSON.Object{String, Any}` is used to materialize the JSON value. + * The `monster` field is a polymorphic type, and the JSON value has a `monster_type` key that determines which concrete type to use. The `@choosetype` macro is used to define the logic for choosing the concrete type based on the JSON input. Note that teh `x` in `@choosetype` is a `LazyValue`, so we materialize via `x.monster_type[]` in order to compare with the string `"vampire"`. + * The `percent` field is a custom type `Percent` and the `JSON.lift` defines how to construct a `Percent` from the JSON value, which is a `Float64` in this case. + * The `birthdate` field uses a custom date format for parsing, specified in the JSON input. + * The `percentages` field is a dictionary with keys of type `Percent`, which is a custom type. The `liftkey` function is defined to convert the JSON string keys to `Percent` types (parses the Float64 manually) + * The `json_properties` field has a type of `JSONText`, which means the raw JSON will be preserved as a String of the `JSONText` type. + * The `matrix` field is a `Matrix{Float64}`, so the JSON input array-of-arrays are materialized as such. + * The `extra_key` field is not defined in the `FrankenStruct` type, so it is ignored and skipped over. + +NOTE: +Why use `JSON.Object{String, Any}` as the default object type? It provides several benefits: + * Behaves as a drop-in replacement for `Dict{String, Any}`, so no loss of functionality + * Performance! It's internal representation means memory savings and faster construction for small objects typical in JSON (vs `Dict`) + * Insertion order is preserved, so the order of keys in the JSON input is preserved in `JSON.Object` + * Convenient `getproperty` (i.e. `obj.key`) syntax is supported, even for `Object{String,Any}` key types (again ideal/specialized for JSON usage) + +`JSON.Object` internal representation uses a linked list, thus key lookups are linear time (O(n)). For *large* JSON objects, +(hundreds or thousands of keys), consider using a `Dict{String, Any}` instead, like `JSON.parse(json; dicttype=Dict{String, Any})`. +""" +function parse end + +import StructUtils: StructStyle + +abstract type JSONStyle <: StructStyle end + +# defining a custom style allows us to pass a non-default dicttype `O` through JSON.parse +struct JSONReadStyle{O, T} <: JSONStyle + null::T +end + +JSONReadStyle{O}(null::T) where {O, T} = JSONReadStyle{O, T}(null) + +objecttype(::StructStyle) = DEFAULT_OBJECT_TYPE +objecttype(::JSONReadStyle{OT}) where {OT} = OT +nullvalue(::StructStyle) = nothing +nullvalue(st::JSONReadStyle) = st.null + +# this allows struct fields to specify tags under the json key specifically to override JSON behavior +StructUtils.fieldtagkey(::JSONStyle) = :json + +function parsefile end +@doc (@doc parse) parsefile + +function parsefile! end +@doc (@doc parse) parsefile! + +parsefile(file; jsonlines::Union{Bool, Nothing}=nothing, kw...) = open(io -> parse(io; jsonlines=(jsonlines === nothing ? isjsonl(file) : jsonlines), kw...), file) +parsefile(file, ::Type{T}; jsonlines::Union{Bool, Nothing}=nothing, kw...) where {T} = open(io -> parse(io, T; jsonlines=(jsonlines === nothing ? isjsonl(file) : jsonlines), kw...), file) +parsefile!(file, x::T; jsonlines::Union{Bool, Nothing}=nothing, kw...) where {T} = open(io -> parse!(io, x; jsonlines=(jsonlines === nothing ? isjsonl(file) : jsonlines), kw...), file) + +parse(io::Union{IO, Base.AbstractCmd}, ::Type{T}=Any; kw...) where {T} = parse(Base.read(io), T; kw...) + +function parse(io::IOStream, ::Type{T}=Any; kw...) where {T} + buf = Mmap.mmap(io) + res = parse(buf, T; kw...) + checkfinalize!(buf) + return res +end + +parse!(io::Union{IO, Base.AbstractCmd}, x::T; kw...) where {T} = parse!(Base.read(io), x; kw...) + +function parse!(io::IOStream, x::T; kw...) where {T} + buf = Mmap.mmap(io) + res = parse!(buf, x; kw...) + checkfinalize!(buf) + return res +end + +parse(buf::Union{AbstractVector{UInt8}, AbstractString}, ::Type{T}=Any; + dicttype::Type{O}=DEFAULT_OBJECT_TYPE, + null=nothing, + style::StructStyle=JSONReadStyle{dicttype}(null), kw...) where {T, O} = + @inline parse(lazy(buf; kw...), T; dicttype, null, style) + +parse!(buf::Union{AbstractVector{UInt8}, AbstractString}, x::T; dicttype::Type{O}=DEFAULT_OBJECT_TYPE, null=nothing, style::StructStyle=JSONReadStyle{dicttype}(null), kw...) where {T, O} = + @inline parse!(lazy(buf; kw...), x; dicttype, null, style) + +function parse(x::LazyValue, ::Type{T}=Any; dicttype::Type{O}=DEFAULT_OBJECT_TYPE, null=nothing, style::StructStyle=JSONReadStyle{dicttype}(null)) where {T, O} + out = StructUtils.ValueClosure{T}() + if T == Any && dicttype == DEFAULT_OBJECT_TYPE + pos = applyvalue(out, x, null) + else + pos = StructUtils.make!(out, style, T, x, (;)) + end + getisroot(x) && checkendpos(x, T, pos) + return out.value +end + +parse!(x::LazyValue, obj::T; dicttype::Type{O}=DEFAULT_OBJECT_TYPE, null=nothing, style::StructStyle=JSONReadStyle{dicttype}(null)) where {T, O} = StructUtils.make!(style, obj, x) + +# for LazyValue, if x started at the beginning of the JSON input, +# then we want to ensure that the entire input was consumed +# and error if there are any trailing invalid JSON characters +function checkendpos(x::LazyValue, ::Type{T}, pos) where {T} + buf = getbuf(x) + len = getlength(buf) + if pos <= len + b = getbyte(buf, pos) + while b == UInt8('\t') || b == UInt8(' ') || b == UInt8('\n') || b == UInt8('\r') + pos += 1 + pos > len && break + b = getbyte(buf, pos) + end + end + if (pos - 1) != len + invalid(InvalidChar, buf, pos, T) + end + return nothing +end + +# specialized closure to optimize Object{String, Any} insertions +# to avoid doing a linear scan on each insertion, we use a Set +# to track keys seen so far. In the common case of non-duplicated key, +# we can insert the new key-val pair directly after the latest leaf node +mutable struct ObjectClosure{T} + root::Object{String, Any} + obj::Object{String, Any} + keys::Set{String} + null::T +end + +ObjectClosure(obj, null) = ObjectClosure(obj, obj, sizehint!(Set{String}(), 16), null) + +@inline function insert_or_overwrite!(oc::ObjectClosure, key, val) + # in! does both a hash lookup and also sets the key if not present + if _in!(key, oc.keys) + # slow path for dups; does a linear scan from our root object + setindex!(oc.root, val, key) + return + end + # this uses an "unsafe" constructor that returns the new leaf node + # and sets the child of the previous node to the new node + oc.obj = Object{String, Any}(oc.obj, key, val) # fast append path +end + +(oc::ObjectClosure)(k, v) = applyvalue(val -> insert_or_overwrite!(oc, convert(String, k), val), v, oc.null) + +# generic apply `f` to LazyValue, using default types to materialize, depending on type +function applyvalue(f, x::LazyValues, null) + type = gettype(x) + if type == JSONTypes.OBJECT + obj = Object{String, Any}() + pos = applyobject(ObjectClosure(obj, null), x) + f(obj) + return pos + elseif type == JSONTypes.ARRAY + # basically free to allocate 16 instead of Julia-default 8 and avoids + # a reallocation in many cases + arr = Vector{Any}(undef, 16) + resize!(arr, 0) + pos = applyarray(x) do _, v + applyvalue(val -> push!(arr, val), v, null) + end + f(arr) + return pos + elseif type == JSONTypes.STRING + return applystring(s -> f(convert(String, s)), x) + elseif type == JSONTypes.NUMBER + return applynumber(f, x) + elseif type == JSONTypes.NULL + f(null) + return getpos(x) + 4 + elseif type == JSONTypes.TRUE + f(true) + return getpos(x) + 4 + elseif type == JSONTypes.FALSE + f(false) + return getpos(x) + 5 + else + throw(ArgumentError("cannot parse $x")) + end +end + +# we overload make! for Any for LazyValues because we can dispatch to more specific +# types base on the LazyValue type +function StructUtils.make!(f, st::StructStyle, ::Type{Any}, x::LazyValues, tags) + if haskey(tags, :choosetype) + return StructUtils.make!(f, st, tags.choosetype(x), x, StructUtils._delete(tags, :choosetype)) + end + type = gettype(x) + if type == JSONTypes.OBJECT + return StructUtils.make!(f, st, objecttype(st), x, tags) + elseif type == JSONTypes.ARRAY + return StructUtils.make!(f, st, Vector{Any}, x, tags) + elseif type == JSONTypes.STRING + return StructUtils.lift(f, st, String, x, tags) + elseif type == JSONTypes.NUMBER + return StructUtils.lift(f, st, Number, x, tags) + elseif type == JSONTypes.NULL + return StructUtils.lift(f, st, Nothing, x, tags) + elseif type == JSONTypes.TRUE || type == JSONTypes.FALSE + return StructUtils.lift(f, st, Bool, x, tags) + else + throw(ArgumentError("cannot parse $x")) + end +end + +# catch PtrString via lift or make! so we can ensure it never "escapes" to user-level +StructUtils.liftkey(st::StructStyle, ::Type{T}, x::PtrString) where {T} = + StructUtils.liftkey(st, T, convert(String, x)) +StructUtils.lift(f, st::StructStyle, ::Type{T}, x::PtrString, tags) where {T} = + StructUtils.lift(f, st, T, convert(String, x), tags) +StructUtils.make!(f, st::StructStyle, ::Type{T}, x::PtrString, tags) where {T} = + StructUtils.make!(f, st, T, convert(String, x), tags) + +function StructUtils.lift(f, style::StructStyle, ::Type{T}, x::LazyValues, tags) where {T} + type = gettype(x) + if type == JSONTypes.STRING + return applystring(s -> StructUtils.lift(f, style, T, s, tags), x) + elseif type == JSONTypes.NUMBER + return applynumber(x -> StructUtils.lift(f, style, T, x, tags), x) + elseif type == JSONTypes.NULL + StructUtils.lift(f, style, T, nullvalue(style), tags) + return getpos(x) + 4 + elseif type == JSONTypes.TRUE + StructUtils.lift(f, style, T, true, tags) + return getpos(x) + 4 + elseif type == JSONTypes.FALSE + StructUtils.lift(f, style, T, false, tags) + return getpos(x) + 5 + elseif Base.issingletontype(T) + StructUtils.lift(f, style, T, T(), tags) + return skip(x) + else + f(StructUtils.lift(style, T, x, tags)) + return skip(x) + end +end + +function StructUtils.make!(f, st::StructStyle, ::Type{JSONText}, x::LazyValues, tags) + buf = getbuf(x) + pos = getpos(x) + endpos = skip(x) + GC.@preserve buf f(JSONText(unsafe_string(pointer(buf, pos), endpos - pos))) + return endpos +end diff --git a/src/specialized.jl b/src/specialized.jl deleted file mode 100644 index e0f0e3bd..00000000 --- a/src/specialized.jl +++ /dev/null @@ -1,157 +0,0 @@ -function maxsize_buffer(maxsize::Int) - IOBuffer(maxsize=maxsize) -end - -# Specialized functions for increased performance when JSON is in-memory -function parse_string(ps::MemoryParserState) - # "Dry Run": find length of string so we can allocate the right amount of - # memory from the start. Does not do full error checking. - fastpath, len = predict_string(ps) - - # Now read the string itself: - - # Fast path occurs when the string has no escaped characters. This is quite - # often the case in real-world data, especially when keys are short strings. - # We can just copy the data from the buffer in this case. - if fastpath - s = ps.s - ps.s = s + len + 2 # byte after closing quote - return unsafe_string(pointer(ps.utf8)+s, len) - else - String(take!(parse_string(ps, maxsize_buffer(len)))) - end -end - -""" -Scan through a string at the current parser state and return a tuple containing -information about the string. This function avoids memory allocation where -possible. - -The first element of the returned tuple is a boolean indicating whether the -string may be copied directly from the parser state. Special casing string -parsing when there are no escaped characters leads to substantially increased -performance in common situations. - -The second element of the returned tuple is an integer representing the exact -length of the string, in bytes when encoded as UTF-8. This information is useful -for pre-sizing a buffer to contain the parsed string. - -This function will throw an error if: - - - invalid control characters are found - - an invalid unicode escape is read - - the string is not terminated - -No error is thrown when other invalid backslash escapes are encountered. -""" -function predict_string(ps::MemoryParserState) - e = length(ps) - fastpath = true # true if no escapes in this string, so it can be copied - len = 0 # the number of UTF8 bytes the string contains - - s = ps.s + 1 # skip past opening string character " - @inbounds while s <= e - c = ps[s] - if c == BACKSLASH - fastpath = false - (s += 1) > e && break - if ps[s] == LATIN_U # Unicode escape - t = ps.s - ps.s = s + 1 - len += write(devnull, read_unicode_escape!(ps)) - s = ps.s - ps.s = t - continue - end - elseif c == STRING_DELIM - return fastpath, len - elseif c < SPACE - ps.s = s - _error(E_BAD_CONTROL, ps) - end - len += 1 - s += 1 - end - - ps.s = s - _error(E_UNEXPECTED_EOF, ps) -end - -""" -Parse the string starting at the parser state’s current location into the given -pre-sized IOBuffer. The only correctness checking is for escape sequences, so the -passed-in buffer must exactly represent the amount of space needed for parsing. -""" -function parse_string(ps::MemoryParserState, b::IOBuffer) - s = ps.s - e = length(ps) - - s += 1 # skip past opening string character " - len = b.maxsize - @inbounds while b.size < len - c = ps[s] - if c == BACKSLASH - s += 1 - s > e && break - c = ps[s] - if c == LATIN_U # Unicode escape - ps.s = s + 1 - write(b, read_unicode_escape!(ps)) - s = ps.s - continue - else - c = get(ESCAPES, c, 0x00) - if c == 0x00 - ps.s = s - _error(E_BAD_ESCAPE, ps) - end - end - end - - # UTF8-encoded non-ascii characters will be copied verbatim, which is - # the desired behaviour - write(b, c) - s += 1 - end - - # don't worry about non-termination or other edge cases; those should have - # been caught in the dry run. - ps.s = s + 1 - b -end - -function parse_number(pc::ParserContext{<:Any,<:Any,AllowNanInf}, ps::MemoryParserState) where AllowNanInf - s = p = ps.s - e = length(ps) - isint = true - negative = false - - @inbounds c = ps[p] - - # Parse and keep track of initial minus sign (for parsing -Infinity) - if AllowNanInf && c == MINUS_SIGN - negative = true - p += 1 - end - - # Determine the end of the floating point by skipping past ASCII values - # 0-9, +, -, e, E, and . - while p ≤ e - @inbounds c = ps[p] - if isjsondigit(c) || MINUS_SIGN == c # no-op - elseif PLUS_SIGN == c || LATIN_E == c || LATIN_UPPER_E == c || - DECIMAL_POINT == c - isint = false - elseif AllowNanInf && LATIN_UPPER_I == c - ps.s = p - infinity = parse_jsconstant(pc, ps) - return (negative ? -infinity : infinity) - else - break - end - p += 1 - end - ps.s = p - - number_from_bytes(pc, ps, isint, ps, s, p - 1) -end diff --git a/src/utils.jl b/src/utils.jl new file mode 100644 index 00000000..5756a423 --- /dev/null +++ b/src/utils.jl @@ -0,0 +1,250 @@ +# pre-1.11 compat +if VERSION < v"1.11" + function _in!(x, s::Set) + xT = convert(eltype(s), x) + idx, sh = Base.ht_keyindex2_shorthash!(s.dict, xT) + idx > 0 && return true + Base._setindex!(s.dict, nothing, xT, -idx, sh) + return false + end +else + const _in! = Base.in! +end + +# hand-rolled scoped enum +module JSONTypes + primitive type T 8 end + T(x::UInt8) = reinterpret(T, x) + const OBJECT = T(0x00) + const ARRAY = T(0x01) + const STRING = T(0x02) + const INT = T(0x03) + const FLOAT = T(0x04) + const FALSE = T(0x05) + const TRUE = T(0x06) + const NULL = T(0x07) + const NUMBER = T(0x08) # currently used by LazyValue + const names = Dict( + OBJECT => "OBJECT", + ARRAY => "ARRAY", + STRING => "STRING", + INT => "INT", + FALSE => "FALSE", + TRUE => "TRUE", + FLOAT => "FLOAT", + NULL => "NULL", + NUMBER => "NUMBER", + ) + Base.show(io::IO, x::T) = Base.print(io, "JSONTypes.", names[x]) +end + +# windows doesn't like mmaped buffers hanging around and will error if you try to modify +# so we call finalize eagerly to ensure the buffer is unmapped +# ref: https://github.com/JuliaIO/JSON.jl/issues/112 +checkfinalize!(buf) = Sys.iswindows() && + ((VERSION ≥ v"1.11" && hasfield(typeof(buf), :ref) && hasfield(typeof(buf.ref), :mem)) ? finalize(buf.ref.mem) : finalize(buf)) + +isjsonl(filename) = endswith(filename, ".jsonl") || endswith(filename, ".ndjson") + +getlength(buf::AbstractVector{UInt8}) = length(buf) +getlength(buf::AbstractString) = sizeof(buf) + +# unchecked +function getbyte(buf::AbstractVector{UInt8}, pos) + @inbounds b = buf[pos] + return b +end + +# unchecked +function getbyte(buf::AbstractString, pos) + @inbounds b = codeunit(buf, pos) + return b +end + +# helper macro to get the next byte from `buf` at index `pos` +# checks if `pos` is greater than `len` and @goto invalid if so +# if checkwh=true keep going until we get a non-whitespace byte +macro nextbyte(checkwh=true) + esc(quote + if pos > len + error = UnexpectedEOF + @goto invalid + end + b = getbyte(buf, pos) + if $checkwh + while b == UInt8('\t') || b == UInt8(' ') || b == UInt8('\n') || b == UInt8('\r') + pos += 1 + if pos > len + error = UnexpectedEOF + @goto invalid + end + b = getbyte(buf, pos) + end + end + end) +end + +firstbyteeq(str::String, b::UInt8) = isempty(str) ? false : codeunit(str, 1) == b + +# string escape/unescape utilities +const NEEDESCAPE = Set(map(UInt8, ('"', '\\', '\b', '\f', '\n', '\r', '\t'))) + +function escapechar(b) + b == UInt8('"') && return UInt8('"') + b == UInt8('\\') && return UInt8('\\') + b == UInt8('\b') && return UInt8('b') + b == UInt8('\f') && return UInt8('f') + b == UInt8('\n') && return UInt8('n') + b == UInt8('\r') && return UInt8('r') + b == UInt8('\t') && return UInt8('t') + return 0x00 +end + +iscntrl(c::Char) = c <= '\x1f' || '\x7f' <= c <= '\u9f' +function escaped(b) + if b == UInt8('/') + return [UInt8('/')] + elseif b >= 0x80 + return [b] + elseif b in NEEDESCAPE + return [UInt8('\\'), escapechar(b)] + elseif iscntrl(Char(b)) + return UInt8[UInt8('\\'), UInt8('u'), Base.string(b, base=16, pad=4)...] + else + return [b] + end +end + +const ESCAPECHARS = [escaped(b) for b = 0x00:0xff] +const ESCAPELENS = [length(x) for x in ESCAPECHARS] + +function escapelength(str) + x = 0 + @simd for i = 1:ncodeunits(str) + @inbounds len = ESCAPELENS[codeunit(str, i) + 1] + x += len + end + return x +end + +function reverseescapechar(b) + b == UInt8('"') && return UInt8('"') + b == UInt8('\\') && return UInt8('\\') + b == UInt8('/') && return UInt8('/') + b == UInt8('b') && return UInt8('\b') + b == UInt8('f') && return UInt8('\f') + b == UInt8('n') && return UInt8('\n') + b == UInt8('r') && return UInt8('\r') + b == UInt8('t') && return UInt8('\t') + return 0x00 +end + +utf16_is_surrogate(c::UInt16) = (c & 0xf800) == 0xd800 +utf16_get_supplementary(lead::UInt16, trail::UInt16) = Char(UInt32(lead-0xd7f7)<<10 + trail) + +@noinline invalid_escape(src, n) = throw(ArgumentError("encountered invalid escape character in json string: \"$(unsafe_string(src, n))\"")) +@noinline unescaped_control(b) = throw(ArgumentError("encountered unescaped control character in json: '$(escape_string(Base.string(Char(b))))'")) + +# unsafe because we're not checking that src or dst are valid pointers +# NOR are we checking that up to `n` bytes after dst are also valid to write to +function unsafe_unescape_to_buffer(src::Ptr{UInt8}, n::Int, dst::Ptr{UInt8}) + len = 1 + i = 1 + @inbounds begin + while i <= n + b = unsafe_load(src, i) + if b == UInt8('\\') + i += 1 + i > n && invalid_escape(src, n) + b = unsafe_load(src, i) + if b == UInt8('u') + # need at least 4 hex digits for '\uXXXX' + if i + 4 > n + invalid_escape(src, n) + end + # parse 4 hex digits into c without throwing + c = UInt16(0) + for offset in 1:4 + bb = unsafe_load(src, i + offset) + nv = if UInt8('0') <= bb <= UInt8('9') + bb - UInt8('0') + elseif UInt8('A') <= bb <= UInt8('F') + bb - (UInt8('A') - 10) + elseif UInt8('a') <= bb <= UInt8('f') + bb - (UInt8('a') - 10) + else + invalid_escape(src, n) + end + c = (c << 4) + UInt16(nv) + end + # advance past the 4 hex digits + i += 4 + b = unsafe_load(src, i) + if utf16_is_surrogate(c) + # check for a following "\uXXXX" to form a pair + if i + 6 > n || unsafe_load(src, i+1) != UInt8('\\') || unsafe_load(src, i+2) != UInt8('u') + # lone surrogate: emit raw code unit in WTF-8 + b1 = UInt8(0xE0 | ((c >> 12) & 0x0F)) + b2 = UInt8(0x80 | ((c >> 6) & 0x3F)) + b3 = UInt8(0x80 | ( c & 0x3F)) + unsafe_store!(dst, b1, len); len += 1 + unsafe_store!(dst, b2, len); len += 1 + unsafe_store!(dst, b3, len); len += 1 + continue + end + # parse next 4 hex digits into c2 + c2 = UInt16(0) + for offset in 3:6 + bb = unsafe_load(src, i + offset) + nv = if UInt8('0') <= bb <= UInt8('9') + bb - UInt8('0') + elseif UInt8('A') <= bb <= UInt8('F') + bb - (UInt8('A') - 10) + elseif UInt8('a') <= bb <= UInt8('f') + bb - (UInt8('a') - 10) + else + invalid_escape(src, n) + end + c2 = (c2 << 4) + UInt16(nv) + end + if utf16_is_surrogate(c2) + # valid surrogate pair: combine and emit as UTF-8 + ch = utf16_get_supplementary(c, c2) + # consume the '\\uYYYY' + i += 6 + st = codeunits(Base.string(ch)) + for k = 1:length(st)-1 + unsafe_store!(dst, st[k], len); len += 1 + end + b = st[end] + else + # invalid trailing surrogate: treat lead as lone + b1 = UInt8(0xE0 | ((c >> 12) & 0x0F)) + b2 = UInt8(0x80 | ((c >> 6) & 0x3F)) + b3 = UInt8(0x80 | ( c & 0x3F)) + unsafe_store!(dst, b1, len); len += 1 + unsafe_store!(dst, b2, len); len += 1 + unsafe_store!(dst, b3, len); len += 1 + continue + end + else + # non-surrogate: emit as usual + ch = Char(c) + st = codeunits(Base.string(ch)) + for k = 1:length(st)-1 + unsafe_store!(dst, st[k], len); len += 1 + end + b = st[end] + end + else + b = reverseescapechar(b) + b == 0x00 && invalid_escape(src, n) + end + end + unsafe_store!(dst, b, len) + len += 1 + i += 1 + end + end + return len-1 +end diff --git a/src/write.jl b/src/write.jl new file mode 100644 index 00000000..bd249d37 --- /dev/null +++ b/src/write.jl @@ -0,0 +1,759 @@ +struct JSONWriteStyle <: JSONStyle end + +sizeguess(::Nothing) = 4 +sizeguess(x::Bool) = 5 +sizeguess(x::Integer) = 20 +sizeguess(x::AbstractFloat) = 20 +sizeguess(x::Union{Float16, Float32, Float64}) = Base.Ryu.neededdigits(typeof(x)) +sizeguess(x::AbstractString) = 2 + sizeof(x) +sizeguess(_) = 512 + +StructUtils.lower(::JSONStyle, ::Missing) = nothing +StructUtils.lower(::JSONStyle, x::Symbol) = String(x) +StructUtils.lower(::JSONStyle, x::Union{Enum, AbstractChar, VersionNumber, Cstring, Cwstring, UUID, Dates.TimeType, Type, Logging.LogLevel}) = string(x) +StructUtils.lower(::JSONStyle, x::Regex) = x.pattern +StructUtils.lower(::JSONStyle, x::AbstractArray{<:Any,0}) = x[1] +StructUtils.lower(::JSONStyle, x::AbstractArray{<:Any, N}) where {N} = (view(x, ntuple(_ -> :, N - 1)..., j) for j in axes(x, N)) +StructUtils.lower(::JSONStyle, x::AbstractVector) = x + +""" + JSON.omit_null(::Type{T})::Bool + JSON.omit_null(::JSONStyle, ::Type{T})::Bool + +Controls whether struct fields that are undefined or are `nothing` are included in the JSON output. +Returns `false` by default, meaning all fields are included, regardless of undef or `nothing`. To instead +ensure only *non-null* fields are written, set this to `true`. +This can also be controlled via the `omit_null` keyword argument in [`JSON.json`](@ref). + +```julia +# Override for a specific type +JSON.omit_null(::Type{MyStruct}) = true + +# Override for a custom style +struct MyStyle <: JSON.JSONStyle end +JSON.omit_null(::MyStyle, ::Type{T}) where {T} = true +``` +""" +omit_null(::Type{T}) where {T} = false +omit_null(::JSONStyle, ::Type{T}) where {T} = omit_null(T) + +""" + @omit_null struct T ... + @omit_null T + +Convenience macro to set `omit_null(::Type{T})` to `true` for the struct `T`. +Can be used in three ways: +1. In front of a struct definition: `@omit_null struct T ... end` +2. Applied to an existing struct name: `@omit_null T` +3. Chained with other macros: `@omit_null @defaults struct T ... end` + +The macro automatically handles complex macro expansions by walking the expression +tree to find struct definitions, making it compatible with macros like `StructUtils.@defaults`. + +# Examples +```julia +# Method 1: Struct annotation +@omit_null struct Person + name::String + email::Union{Nothing, String} +end + +# Method 2: Apply to existing struct +struct User + id::Int + profile::Union{Nothing, String} +end +@omit_null User + +# Method 3: Chain with @defaults +@omit_null @defaults struct Employee + name::String = "Anonymous" + manager::Union{Nothing, String} = nothing +end +``` +""" +macro omit_null(expr) + return _omit_macro_impl(expr, :omit_null, __module__) +end + +""" + JSON.omit_empty(::Type{T})::Bool + JSON.omit_empty(::JSONStyle, ::Type{T})::Bool + +Controls whether struct fields that are empty are included in the JSON output. +Returns `false` by default, meaning empty fields *are* included. To instead exclude empty fields, +set this to `true`. A field is considered empty if it is `nothing`, an empty collection +(empty array, dict, string, tuple, or named tuple), or `missing`. +This can also be controlled via the `omit_empty` keyword argument in [`JSON.json`](@ref). + +```julia +# Override for a specific type +JSON.omit_empty(::Type{MyStruct}) = true + +# Override for a custom style +struct MyStyle <: JSON.JSONStyle end +JSON.omit_empty(::MyStyle, ::Type{T}) where {T} = true +``` +""" +omit_empty(::Type{T}) where {T} = false +omit_empty(::JSONStyle, ::Type{T}) where {T} = omit_empty(T) + +is_empty(x) = false +is_empty(::Nothing) = true +is_empty(x::Union{AbstractDict, AbstractArray, AbstractString, Tuple, NamedTuple}) = Base.isempty(x) + +""" + @omit_empty struct T ... + @omit_empty T + +Convenience macro to set `omit_empty(::Type{T})` to `true` for the struct `T`. +Can be used in three ways: +1. In front of a struct definition: `@omit_empty struct T ... end` +2. Applied to an existing struct name: `@omit_empty T` +3. Chained with other macros: `@omit_empty @other_macro struct T ... end` +""" +macro omit_empty(expr) + return _omit_macro_impl(expr, :omit_empty, __module__) +end + +# Helper function for both @omit_null and @omit_empty macros +function _omit_macro_impl(expr, omit_func_name, module_context) + original_expr = expr + expr = macroexpand(module_context, expr) + # Case 1: Just a type name (Symbol or more complex type expression) + if isa(expr, Symbol) || (Meta.isexpr(expr, :curly) || Meta.isexpr(expr, :where)) + # Extract the base type name + T = _extract_type_name(expr) + return esc(quote + JSON.$omit_func_name(::Type{$T}) = true + end) + end + # Case 2: Struct definition (possibly from macro expansion) + if Meta.isexpr(expr, :struct) + ismutable, T, fieldsblock = expr.args + T = _extract_type_name(T) + return esc(quote + # insert original expr as-is + $expr + # omit function overload + JSON.$omit_func_name(::Type{$T}) = true + end) + end + # Case 3: Block expression (from complex macros like @defaults) + if Meta.isexpr(expr, :block) + # Try to find a struct definition in the block + struct_expr = _find_struct_in_block(expr) + if struct_expr !== nothing + ismutable, T, fieldsblock = struct_expr.args + T = _extract_type_name(T) + return esc(quote + # insert original expr as-is + $original_expr + # omit function overload + JSON.$omit_func_name(::Type{$T}) = true + end) + end + end + # Case 4: Macro expression that we hope expands to a struct + if Meta.isexpr(original_expr, :macrocall) + # Try to see if the expanded form is a struct + if Meta.isexpr(expr, :struct) + ismutable, T, fieldsblock = expr.args + T = _extract_type_name(T) + return esc(quote + # insert original expr as-is + $original_expr + # omit function overload + JSON.$omit_func_name(::Type{$T}) = true + end) + else + throw(ArgumentError("Macro $(original_expr.args[1]) did not expand to a struct definition")) + end + end + throw(ArgumentError("Invalid usage of @$omit_func_name macro. Expected: struct definition, type name, or macro that expands to struct definition")) +end + +# Helper function to recursively find a struct definition in a block expression +function _find_struct_in_block(expr) + if Meta.isexpr(expr, :struct) + return expr + elseif Meta.isexpr(expr, :block) + for arg in expr.args + result = _find_struct_in_block(arg) + if result !== nothing + return result + end + end + end + return nothing +end + +# Helper function to extract the base type name from various type expressions +function _extract_type_name(T) + if isa(T, Symbol) + return T + elseif Meta.isexpr(T, :<:) + # Handle subtyping: struct Foo <: Bar + return _extract_type_name(T.args[1]) + elseif Meta.isexpr(T, :curly) + # Handle parametric types: keep the full parametric type Foo{T} + return T + elseif Meta.isexpr(T, :where) + # Handle where clauses: struct Foo{T} where T + return _extract_type_name(T.args[1]) + else + return T + end +end + +StructUtils.lowerkey(::JSONStyle, s::AbstractString) = s +StructUtils.lowerkey(::JSONStyle, sym::Symbol) = String(sym) +StructUtils.lowerkey(::JSONStyle, n::Union{Integer, Union{Float16, Float32, Float64}}) = string(n) +StructUtils.lowerkey(::JSONStyle, x) = throw(ArgumentError("No key representation for $(typeof(x)). Define StructUtils.lowerkey(::$(typeof(x)))")) + +""" + JSON.json(x) -> String + JSON.json(io, x) + JSON.json(file_name, x) + +Serialize `x` to JSON format. The 1st method takes just the object and returns a `String`. +In the 2nd method, `io` is an `IO` object, and the JSON output will be written to it. +For the 3rd method, `file_name` is a `String`, a file will be opened and the JSON output will be written to it. + +All methods accept the following keyword arguments: + +- `omit_null::Union{Bool, Nothing}=nothing`: Controls whether struct fields that are undefined or are `nothing` are included in the JSON output. + If `true`, only non-null fields are written. If `false`, all fields are included regardless of being undefined or `nothing`. + If `nothing`, the behavior is determined by `JSON.omit_null(::Type{T})`, which is `false` by default. + +- `omit_empty::Union{Bool, Nothing}=nothing`: Controls whether struct fields that are empty are included in the JSON output. + If `true`, empty fields are excluded. If `false`, empty fields are included. + If `nothing`, the behavior is determined by `JSON.omit_empty(::Type{T})`. + +- `allownan::Bool=false`: If `true`, allow `Inf`, `-Inf`, and `NaN` in the output. + If `false`, throw an error if `Inf`, `-Inf`, or `NaN` is encountered. + +- `jsonlines::Bool=false`: If `true`, input must be array-like and the output will be written in the JSON Lines format, + where each element of the array is written on a separate line (i.e. separated by a single newline character `\n`). + If `false`, the output will be written in the standard JSON format. + +- `pretty::Union{Integer,Bool}=false`: Controls pretty printing of the JSON output. + If `true`, the output will be pretty-printed with 2 spaces of indentation. + If an integer, it will be used as the number of spaces of indentation. + If `false` or `0`, the output will be compact. + Note: Pretty printing is not supported when `jsonlines=true`. + +- `inline_limit::Int=0`: For arrays shorter than this limit, pretty printing will be disabled (indentation set to 0). + +- `ninf::String="-Infinity"`: Custom string representation for negative infinity. + +- `inf::String="Infinity"`: Custom string representation for positive infinity. + +- `nan::String="NaN"`: Custom string representation for NaN. + +- `float_style::Symbol=:shortest`: Controls how floating-point numbers are formatted. + Options are: + - `:shortest`: Use the shortest representation that preserves the value + - `:fixed`: Use fixed-point notation + - `:exp`: Use exponential notation + +- `float_precision::Int=1`: Number of decimal places to use when `float_style` is `:fixed` or `:exp`. + +- `bufsize::Int=2^22`: Buffer size in bytes for IO operations. When writing to IO, the buffer will be flushed + to the IO stream once it reaches this size. This helps control memory usage during large write operations. + Default is 4MB (2^22 bytes). This parameter is ignored when returning a String. + +- `style::JSONStyle=JSONWriteStyle()`: Custom style object that controls serialization behavior. This allows customizing + certain aspects of serialization, like defining a custom `lower` method for a non-owned type. Like `struct MyStyle <: JSONStyle end`, + `JSON.lower(x::Rational) = (num=x.num, den=x.den)`, then calling `JSON.json(1//3; style=MyStyle())` will output + `{"num": 1, "den": 3}`. + +By default, `x` must be a JSON-serializable object. Supported types include: + * `AbstractString` => JSON string: types must support the `AbstractString` interface, specifically with support for + `ncodeunits` and `codeunit(x, i)`. + * `Bool` => JSON boolean: must be `true` or `false` + * `Nothing` => JSON null: must be the `nothing` singleton value + * `Number` => JSON number: `Integer` subtypes or `Union{Float16, Float32, Float64}` have default implementations + for other `Number` types, [`JSON.tostring`](@ref) is first called to convert + the value to a `String` before being written directly to JSON output + * `AbstractArray`/`Tuple`/`AbstractSet` => JSON array: objects for which `JSON.arraylike` returns `true` + are output as JSON arrays. `arraylike` is defined by default for + `AbstractArray`, `AbstractSet`, `Tuple`, and `Base.Generator`. For other types that define, + they must also properly implement `StructUtils.applyeach` to iterate over the index => elements pairs. + Note that arrays with dimensionality > 1 are written as nested arrays, with `N` nestings for `N` dimensions, + and the 1st dimension is always the innermost nested JSON array (column-major order). + * `AbstractDict`/`NamedTuple`/structs => JSON object: if a value doesn't fall into any of the above categories, + it is output as a JSON object. `StructUtils.applyeach` is called, which has appropriate implementations + for `AbstractDict`, `NamedTuple`, and structs, where field names => values are iterated over. Field names can + be output with an alternative name via field tag overload, like `field::Type &(json=(name="alternative_name",),)` + +If an object is not JSON-serializable, an override for `JSON.lower` can +be defined to convert it to a JSON-serializable object. Some default `lower` defintions +are defined in JSON itself, for example: + * `StructUtils.lower(::Missing) = nothing` + * `StructUtils.lower(x::Symbol) = String(x)` + * `StructUtils.lower(x::Union{Enum, AbstractChar, VersionNumber, Cstring, Cwstring, UUID, Dates.TimeType}) = string(x)` + * `StructUtils.lower(x::Regex) = x.pattern` + +These allow common Base/stdlib types to be serialized in an expected format. + +Circular references are tracked automatically and cycles are broken by writing `null` for any children references. + +For pre-formatted JSON data as a String, use `JSONText(json)` to write the string out as-is. + +For `AbstractDict` objects with non-string keys, `StructUtils.lowerkey` will be called before serializing. This allows aggregate +or other types of dict keys to be converted to an appropriate string representation. See `StructUtils.liftkey` +for the reverse operation, which is called when parsing JSON data back into a dict type. + +*NOTE*: `JSON.json` should _not_ be overloaded directly by custom +types as this isn't robust for various output options (IO, String, etc.) +nor recursive situations. Types should define an appropriate +`JSON.lower` definition instead. + +Examples: +```julia +using Dates + +abstract type AbstractMonster end + +struct Dracula <: AbstractMonster + num_victims::Int +end + +struct Werewolf <: AbstractMonster + witching_hour::DateTime +end + +struct Percent <: Number + value::Float64 +end + +JSON.lower(x::Percent) = x.value +StructUtils.lowerkey(x::Percent) = string(x.value) + +@noarg mutable struct FrankenStruct + id::Int + name::String # no default to show serialization of an undefined field + address::Union{Nothing, String} = nothing + rate::Union{Missing, Float64} = missing + type::Symbol = :a &(json=(name="franken_type",),) + notsure::Any = JSON.Object("key" => "value") + monster::AbstractMonster = Dracula(10) &(json=(lower=x -> x isa Dracula ? (monster_type="vampire", num_victims=x.num_victims) : (monster_type="werewolf", witching_hour=x.witching_hour),),) + percent::Percent = Percent(0.5) + birthdate::Date = Date(2025, 1, 1) &(json=(dateformat="yyyy/mm/dd",),) + percentages::Dict{Percent, Int} = Dict{Percent, Int}(Percent(0.0) => 0, Percent(1.0) => 1) + json_properties::JSONText = JSONText("{\"key\": \"value\"}") + matrix::Matrix{Float64} = [1.0 2.0; 3.0 4.0] + extra_field::Any = nothing &(json=(ignore=true,),) +end + +franken = FrankenStruct() +franken.id = 1 + +json = JSON.json(franken; omit_null=false) +# "{\"id\":1,\"name\":null,\"address\":null,\"rate\":null,\"franken_type\":\"a\",\"notsure\":{\"key\":\"value\"},\"monster\":{\"monster_type\":\"vampire\",\"num_victims\":10},\"percent\":0.5,\"birthdate\":\"2025/01/01\",\"percentages\":{\"1.0\":1,\"0.0\":0},\"json_properties\":{\"key\": \"value\"},\"matrix\":[[1.0,3.0],[2.0,4.0]]}" +``` + +A few comments on the JSON produced in the example above: + - The `name` field was `#undef`, and thus was serialized as `null`. + - The `address` and `rate` fields were `nothing` and `missing`, respectively, and thus were serialized as `null`. + - The `type` field has a `name` field tag, so the JSON key for this field is `franken_type` instead of `type`. + - The `notsure` field is a `JSON.Object`, so it is serialized as a JSON object. + - The `monster` field is a `AbstractMonster`, which is a custom type. It has a `lower` field tag that specifies how the value of this field specifically (not all AbstractMonster) should be serialized + - The `percent` field is a `Percent`, which is a custom type. It has a `lower` method that specifies how `Percent` values should be serialized + - The `birthdate` field has a `dateformat` field tag, so the value follows the format (`yyyy/mm/dd`) instead of the default date ISO format (`yyyy-mm-dd`) + - The `percentages` field is a `Dict{Percent, Int}`, which is a custom type. It has a `lowerkey` method that specifies how `Percent` keys should be serialized as strings + - The `json_properties` field is a `JSONText`, so the JSONText value is serialized as-is + - The `matrix` field is a `Matrix{Float64}`, which is a custom type. It is serialized as a JSON array, with the first dimension being the innermost nested JSON array (column-major order) + - The `extra_field` field has a `ignore` field tag, so it is skipped when serializing + +""" +function json end + +@kwdef struct WriteOptions{S} + omit_null::Union{Bool, Nothing} = nothing + omit_empty::Union{Bool, Nothing} = nothing + allownan::Bool = false + jsonlines::Bool = false + pretty::Int = 0 + ninf::String = "-Infinity" + inf::String = "Infinity" + nan::String = "NaN" + inline_limit::Int = 0 + float_style::Symbol = :shortest # :shortest, :fixed, :exp + float_precision::Int = 1 + bufsize::Int = 2^22 # 4MB default buffer size for IO flushing + style::S = JSONWriteStyle() +end + +@noinline float_style_throw(fs) = throw(ArgumentError("Invalid float style: $fs")) +float_style_check(fs) = fs == :shortest || fs == :fixed || fs == :exp || float_style_throw(fs) + +# if jsonlines and pretty is not 0 or false, throw an ArgumentError +@noinline _jsonlines_pretty_throw() = throw(ArgumentError("pretty printing is not supported when writing jsonlines")) +_jsonlines_pretty_check(jsonlines, pretty) = jsonlines && pretty !== false && !iszero(pretty) && _jsonlines_pretty_throw() + +function json(io::IO, x::T; pretty::Union{Integer,Bool}=false, kw...) where {T} + opts = WriteOptions(; pretty=pretty === true ? 2 : Int(pretty), kw...) + _jsonlines_pretty_check(opts.jsonlines, opts.pretty) + float_style_check(opts.float_style) + y = StructUtils.lower(opts.style, x) + # Use smaller initial buffer size, limited by bufsize + initial_size = min(sizeguess(y), opts.bufsize) + buf = Vector{UInt8}(undef, initial_size) + pos = json!(buf, 1, y, opts, Any[y], io) + # Write any remaining buffer contents to IO + if pos > 1 + write(io, view(buf, 1:pos-1)) + end + return nothing +end + +if isdefined(Base, :StringVector) + stringvec(n) = Base.StringVector(n) +else + stringvec(n) = Vector{UInt8}(undef, n) +end + +function json(x; pretty::Union{Integer,Bool}=false, kw...) + opts = WriteOptions(; pretty=pretty === true ? 2 : Int(pretty), kw...) + _jsonlines_pretty_check(opts.jsonlines, opts.pretty) + float_style_check(opts.float_style) + y = StructUtils.lower(opts.style, x) + buf = stringvec(sizeguess(y)) + pos = json!(buf, 1, y, opts, Any[y], nothing) + return String(resize!(buf, pos - 1)) +end + +function json(fname::String, obj; kw...) + open(fname, "w") do io + json(io, obj; kw...) + end + return fname +end + +# we use the same growth strategy as Base julia does for array growing +# which starts with small N at ~5x and approaches 1.125x as N grows +# ref: https://github.com/JuliaLang/julia/pull/40453 +newlen(n₀) = ceil(Int, n₀ + 4*n₀^(7 / 8) + n₀ / 8) + +macro checkn(n, force_resize=false) + esc(quote + if (pos + $n - 1) > length(buf) + # If we have an IO object and buffer would exceed bufsize, flush to IO first + # unless force_resize is true (used for comma writing to avoid flushing partial JSON) + if io !== nothing && length(buf) >= bufsize && pos > 1 && !$force_resize + write(io, view(buf, 1:pos-1)) + pos = 1 + end + # Resize buffer if still needed + if (pos + $n - 1) > length(buf) + resize!(buf, newlen(pos + $n)) + end + end + end) +end + +struct WriteClosure{JS, arraylike, T} # T is the type of the parent object/array being written + buf::Vector{UInt8} + pos::Ptr{Int} + wroteany::Ptr{Bool} # to track if we wrote any data to the buffer + indent::Int + depth::Int + opts::JS + ancestor_stack::Vector{Any} # to track circular references + io::Union{Nothing, IO} + bufsize::Int +end + +function indent(buf, pos, ind, depth, io, bufsize) + if ind > 0 + n = ind * depth + 1 + @checkn n + buf[pos] = UInt8('\n') + for i = 1:(n - 1) + buf[pos + i] = UInt8(' ') + end + pos += n + end + return pos +end + +checkkey(s) = s isa AbstractString || throw(ArgumentError("Value returned from `StructUtils.lowerkey` must be a string: $(typeof(s))")) + +function (f::WriteClosure{JS, arraylike, T})(key, val) where {JS, arraylike, T} + track_ref = ismutabletype(typeof(val)) + is_circ_ref = track_ref && any(x -> x === val, f.ancestor_stack) + if !arraylike + # for objects, check omit_null/omit_empty + # and skip if the value is null or empty + if f.opts.omit_null === true || (f.opts.omit_null === nothing && omit_null(f.opts.style, T)) + (is_circ_ref || val === nothing) && return + end + if f.opts.omit_empty === true || (f.opts.omit_empty === nothing && omit_empty(f.opts.style, T)) + (is_circ_ref || is_empty(val)) && return + end + end + pos = unsafe_load(f.pos) + unsafe_store!(f.wroteany, true) # at this point, we know something will be written + buf = f.buf + ind = f.indent + io = f.io + bufsize = f.bufsize + pos = indent(buf, pos, ind, f.depth, io, bufsize) + # if not an array, we need to write the key + ':' + if !arraylike + # skey = StructUtils.lowerkey(f.opts, key) + # check if the key is a string + checkkey(key) + pos = _string(buf, pos, key, io, bufsize) + @checkn 1 + buf[pos] = UInt8(':') + pos += 1 + if ind > 0 + @checkn 1 + buf[pos] = UInt8(' ') + pos += 1 + end + end + # check if the lowered value is in our ancestor stack + if is_circ_ref + # if so, it's a circular reference! so we just write `null` + pos = _null(buf, pos, io, bufsize) + else + track_ref && push!(f.ancestor_stack, val) + # if jsonlines, we need to recursively set to false + if f.opts.jsonlines + opts = WriteOptions(; omit_null=f.opts.omit_null, omit_empty=f.opts.omit_empty, allownan=f.opts.allownan, jsonlines=false, pretty=f.opts.pretty, ninf=f.opts.ninf, inf=f.opts.inf, nan=f.opts.nan, inline_limit=f.opts.inline_limit, float_style=f.opts.float_style, float_precision=f.opts.float_precision) + else + opts = f.opts + end + pos = json!(buf, pos, val, opts, f.ancestor_stack, io, ind, f.depth, bufsize) + track_ref && pop!(f.ancestor_stack) + end + @checkn 1 true + @inbounds buf[pos] = f.opts.jsonlines ? UInt8('\n') : UInt8(',') + pos += 1 + # store our updated pos + unsafe_store!(f.pos, pos) + return +end + +@noinline throwjsonlines() = throw(ArgumentError("jsonlines only supported for arraylike")) + +# assume x is lowered value +function json!(buf, pos, x, opts::WriteOptions, ancestor_stack::Union{Nothing, Vector{Any}}=nothing, io::Union{Nothing, IO}=nothing, ind::Int=opts.pretty, depth::Int=0, bufsize::Int=opts.bufsize) + # string + if x isa AbstractString + return _string(buf, pos, x, io, bufsize) + # write JSONText out directly + elseif x isa JSONText + val = x.value + @checkn sizeof(val) + for i = 1:sizeof(val) + @inbounds buf[pos + i - 1] = codeunit(val, i) + end + return pos + sizeof(val) + # bool; check before Number since Bool <: Number + elseif x isa Bool + if x + @checkn 4 + @inbounds buf[pos] = 't' + @inbounds buf[pos + 1] = 'r' + @inbounds buf[pos + 2] = 'u' + @inbounds buf[pos + 3] = 'e' + return pos + 4 + else + @checkn 5 + @inbounds buf[pos] = 'f' + @inbounds buf[pos + 1] = 'a' + @inbounds buf[pos + 2] = 'l' + @inbounds buf[pos + 3] = 's' + @inbounds buf[pos + 4] = 'e' + return pos + 5 + end + # number + elseif x isa Number + return _number(buf, pos, x, opts, io, bufsize) + # null + elseif x === nothing + return _null(buf, pos, io, bufsize) + # object or array + elseif StructUtils.dictlike(opts.style, x) || StructUtils.arraylike(opts.style, x) || StructUtils.structlike(opts.style, x) + al = StructUtils.arraylike(opts.style, x) + # override pretty indent to 0 for arrays shorter than inline_limit + if al && opts.pretty > 0 && opts.inline_limit > 0 && length(x) < opts.inline_limit + local_ind = 0 + else + local_ind = ind + end + if !opts.jsonlines + @checkn 1 + @inbounds buf[pos] = al ? UInt8('[') : UInt8('{') + pos += 1 + else + al || throwjsonlines() + end + ref = Ref(pos) + wroteany = false + wroteanyref = Ref(false) + GC.@preserve ref wroteanyref begin + c = WriteClosure{typeof(opts), al, typeof(x)}(buf, Base.unsafe_convert(Ptr{Int}, ref), Base.unsafe_convert(Ptr{Bool}, wroteanyref), local_ind, depth + 1, opts, ancestor_stack, io, bufsize) + StructUtils.applyeach(opts.style, c, x) + # get updated pos + pos = unsafe_load(c.pos) + wroteany = unsafe_load(c.wroteany) + end + # in WriteClosure, we eagerly write a comma after each element + # so for non-empty object/arrays, we can just overwrite the last comma with the closechar + if wroteany + pos -= 1 + pos = indent(buf, pos, local_ind, depth, io, bufsize) + else + # but if the object/array was empty, we need to do the check manually + @checkn 1 + end + # even if the input is empty and we're jsonlines, the spec says it's ok to end w/ a newline + @inbounds buf[pos] = opts.jsonlines ? UInt8('\n') : al ? UInt8(']') : UInt8('}') + return pos + 1 + else + return _string(buf, pos, x, io, bufsize) + end +end + +function _null(buf, pos, io, bufsize) + @checkn 4 + @inbounds buf[pos] = 'n' + @inbounds buf[pos + 1] = 'u' + @inbounds buf[pos + 2] = 'l' + @inbounds buf[pos + 3] = 'l' + return pos + 4 +end + +_string(buf, pos, x, io, bufsize) = _string(buf, pos, string(x), io, bufsize) +_string(buf, pos, x::LazyValues, io, bufsize) = _string(buf, pos, getindex(x), io, bufsize) +_string(buf, pos, x::PtrString, io, bufsize) = _string(buf, pos, convert(String, x), io, bufsize) + +function _string(buf, pos, x::AbstractString, io, bufsize) + sz = ncodeunits(x) + el = escapelength(x) + @checkn (el + 2) + @inbounds buf[pos] = UInt8('"') + pos += 1 + if el > sz + for i = 1:sz + @inbounds escbytes = ESCAPECHARS[codeunit(x, i) + 1] + for j = 1:length(escbytes) + @inbounds buf[pos] = escbytes[j] + pos += 1 + end + end + else + @simd for i = 1:sz + @inbounds buf[pos] = codeunit(x, i) + pos += 1 + end + end + @inbounds buf[pos] = UInt8('"') + return pos + 1 +end + +""" + JSON.tostring(x) + +Overloadable function that allows non-`Integer` `Number` types +to convert themselves to a `String` that is then used +when serializing `x` to JSON. Note that if the result of `tostring` +is not a valid JSON number, it will be serialized as a JSON string, +with double quotes around it. + +An example overload would look something like: +```julia +JSON.tostring(x::MyDecimal) = string(x) +``` +""" +tostring(x) = string(Float64(x)) + +split_sign(n::Integer) = unsigned(abs(n)), n < 0 +split_sign(n::Unsigned) = n, false +split_sign(x::BigInt) = (abs(x), x < 0) + +@noinline infcheck(x, allownan) = isfinite(x) || allownan || throw(ArgumentError("$x not allowed to be written in JSON spec; pass `allownan=true` to allow anyway")) + +function _number(buf, pos, x::Number, opts::WriteOptions, io, bufsize) + if x isa Integer + y, neg = split_sign(x) + n = i = ndigits(y, base=10, pad=1) + @checkn (i + neg) + if neg + @inbounds buf[pos] = UInt8('-') + pos += 1 + end + while i > 0 + @inbounds buf[pos + i - 1] = 48 + rem(y, 10) + y = oftype(y, div(y, 10)) + i -= 1 + end + return pos + n + elseif x isa AbstractFloat + infcheck(x, opts.allownan) + if x isa Union{Float16, Float32, Float64} + if isnan(x) + nan = opts.nan + @checkn sizeof(nan) + for i = 1:sizeof(nan) + @inbounds buf[pos + i - 1] = UInt8(codeunit(nan, i)) + end + return pos + sizeof(nan) + elseif isinf(x) + if x < 0 + inf = opts.ninf + else + inf = opts.inf + end + @checkn sizeof(inf) + for i = 1:sizeof(inf) + @inbounds buf[pos + i - 1] = UInt8(codeunit(inf, i)) + end + return pos + sizeof(inf) + end + if opts.float_style == :shortest + @checkn Base.Ryu.neededdigits(typeof(x)) + return Base.Ryu.writeshortest(buf, pos, x) + elseif opts.float_style == :fixed + @checkn (opts.float_precision + Base.Ryu.neededdigits(typeof(x))) + return Base.Ryu.writefixed(buf, pos, x, opts.float_precision, false, false, true) + elseif opts.float_style == :exp + @checkn (opts.float_precision + Base.Ryu.neededdigits(typeof(x))) + return Base.Ryu.writeexp(buf, pos, x, opts.float_precision, false, false, true) + else + # unreachable as we validate float_style inputs + @assert false + end + end + else + str = tostring(x) + if anyinvalidnumberchars(str) + # serialize as string + return _string(buf, pos, str, io, bufsize) + end + bytes = codeunits(str) + sz = sizeof(bytes) + @checkn sz + for i = 1:sz + @inbounds buf[pos + i - 1] = bytes[i] + end + return pos + sz + end +end + +function anyinvalidnumberchars(x) + for i = 1:sizeof(x) + b = codeunit(x, i) + if !(b == UInt8('-') || b == UInt8('.') || b == UInt8('e') || b == UInt8('E') || + UInt8('0') <= b <= UInt8('9')) + return true + end + end + return false +end \ No newline at end of file diff --git a/test/JSONTestSuite.tar b/test/JSONTestSuite.tar new file mode 100644 index 00000000..833e93ba Binary files /dev/null and b/test/JSONTestSuite.tar differ diff --git a/test/REQUIRE b/test/REQUIRE deleted file mode 100644 index d0dacdd9..00000000 --- a/test/REQUIRE +++ /dev/null @@ -1,3 +0,0 @@ -DataStructures -FixedPointNumbers -OffsetArrays diff --git a/test/async.jl b/test/async.jl deleted file mode 100644 index 977f32e5..00000000 --- a/test/async.jl +++ /dev/null @@ -1,117 +0,0 @@ -using JSON -using Test -using Distributed: RemoteChannel -using Sockets - -@isdefined(a) || include("json-samples.jl") - -finished_async_tests = RemoteChannel() - -port, serv = listenany(7777) -@async let s; try - s = accept(serv) - close(serv) - @test JSON.parse(s) !== nothing # a - @test JSON.parse(s) !== nothing # b - validate_c(s) # c - @test JSON.parse(s) !== nothing # d - validate_svg_tviewer_menu(s) # svg_tviewer_menu - @test JSON.parse(s) !== nothing # gmaps - @test JSON.parse(s) !== nothing # colors1 - @test JSON.parse(s) !== nothing # colors2 - @test JSON.parse(s) !== nothing # colors3 - @test JSON.parse(s) !== nothing # twitter - @test JSON.parse(s) !== nothing # facebook - validate_flickr(s) # flickr - @test JSON.parse(s) !== nothing # youtube - @test JSON.parse(s) !== nothing # iphone - @test JSON.parse(s) !== nothing # customer - @test JSON.parse(s) !== nothing # product - @test JSON.parse(s) !== nothing # interop - validate_unicode(s) # unicode - @test JSON.parse(s) !== nothing # issue5 - @test JSON.parse(s) !== nothing # dollars - @test JSON.parse(s) !== nothing # brackets - - put!(finished_async_tests, nothing) -catch ex - @error "async test failure" _exception=ex -finally - @isdefined(s) && close(s) - close(serv) -end; end - -w = connect(Sockets.localhost, port) - -@test JSON.parse(a) !== nothing -write(w, a) - -@test JSON.parse(b) !== nothing -write(w, b) - -validate_c(c) -write(w, c) - -@test JSON.parse(d) !== nothing -write(w, d) - -validate_svg_tviewer_menu(svg_tviewer_menu) -write(w, svg_tviewer_menu) - -@test JSON.parse(gmaps) !== nothing -write(w, gmaps) - -@test JSON.parse(colors1) !== nothing -write(w, colors1) - -@test JSON.parse(colors2) !== nothing -write(w, colors2) - -@test JSON.parse(colors3) !== nothing -write(w, colors3) - -@test JSON.parse(twitter) !== nothing -write(w, twitter) - -@test JSON.parse(facebook) !== nothing -write(w, facebook) - -validate_flickr(flickr) -write(w, flickr) - -@test JSON.parse(youtube) !== nothing -write(w, youtube) - -@test JSON.parse(iphone) !== nothing -write(w, iphone) - -@test JSON.parse(customer) !== nothing -write(w, customer) - -@test JSON.parse(product) !== nothing -write(w, product) - -@test JSON.parse(interop) !== nothing -write(w, interop) - -validate_unicode(unicode) -write(w, unicode) - -# issue #5 -issue5 = "[\"A\",\"B\",\"C\\n\"]" -JSON.parse(issue5) -write(w, issue5) - -# $ escaping issue -dollars = ["all of the \$s", "µniçø∂\$"] -json_dollars = json(dollars) -@test JSON.parse(json_dollars) !== nothing -write(w, json_dollars) - -# unmatched brackets -brackets = Dict("foo"=>"ba}r", "be}e]p"=>"boo{p") -json_brackets = json(brackets) -@test JSON.parse(json_brackets) !== nothing -write(w, json_dollars) - -fetch(finished_async_tests) diff --git a/test/enum.jl b/test/enum.jl deleted file mode 100644 index ead3d99a..00000000 --- a/test/enum.jl +++ /dev/null @@ -1,4 +0,0 @@ -@enum Animal zebra aardvark horse -@test json(zebra) == "\"zebra\"" -@test json([aardvark, horse, Dict("z" => zebra)]) == - "[\"aardvark\",\"horse\",{\"z\":\"zebra\"}]" diff --git a/test/indentation.jl b/test/indentation.jl deleted file mode 100644 index 98fa5f02..00000000 --- a/test/indentation.jl +++ /dev/null @@ -1,10 +0,0 @@ -# check indented json has same final value as non indented -fb = JSON.parse(facebook) -fbjson1 = json(fb, 2) -fbjson2 = json(fb) -@test JSON.parse(fbjson1) == JSON.parse(fbjson2) - -ev = JSON.parse(svg_tviewer_menu) -ejson1 = json(ev, 2) -ejson2 = json(ev) -@test JSON.parse(ejson1) == JSON.parse(ejson2) diff --git a/test/json-checker.jl b/test/json-checker.jl deleted file mode 100644 index 6552108a..00000000 --- a/test/json-checker.jl +++ /dev/null @@ -1,28 +0,0 @@ -# Run modified JSON checker tests - -const JSON_DATA_DIR = joinpath(dirname(pathof(JSON)), "../data") - -for i in 1:38 - file = "fail$(lpad(string(i), 2, "0")).json" - filepath = joinpath(JSON_DATA_DIR, "jsonchecker", file) - - @test_throws ErrorException JSON.parsefile(filepath) -end - -for i in 1:3 - # Test that the files parse successfully and match streaming parser - tf = joinpath(JSON_DATA_DIR, "jsonchecker", "pass$(lpad(string(i), 2, "0")).json") - @test JSON.parsefile(tf) == open(JSON.parse, tf) -end - -# Run JSON roundtrip tests (check consistency of .json) - -roundtrip(data) = JSON.json(JSON.Parser.parse(data)) - -for i in 1:27 - file = "roundtrip$(lpad(string(i), 2, "0")).json" - filepath = joinpath(JSON_DATA_DIR, "roundtrip", file) - - rt = roundtrip(read(filepath, String)) - @test rt == roundtrip(rt) -end diff --git a/test/json-samples.jl b/test/json-samples.jl deleted file mode 100644 index 85a31b95..00000000 --- a/test/json-samples.jl +++ /dev/null @@ -1,644 +0,0 @@ -#Examples from http://json.org/example.html -a="{\"menu\": { - \"id\": \"file\", - \"value\": \"File\", - \"popup\": { - \"menuitem\": [ - {\"value\": \"New\", \"onclick\": \"CreateNewDoc()\"}, - {\"value\": \"Open\", \"onclick\": \"OpenDoc()\"}, - {\"value\": \"Close\", \"onclick\": \"CloseDoc()\"} - ] - } - }} - " - - -b="{ - \"glossary\": { - \"title\": \"example glossary\", - \"GlossDiv\": { - \"title\": \"S\", - \"GlossList\": { - \"GlossEntry\": { - \"ID\": \"SGML\", - \"SortAs\": \"SGML\", - \"GlossTerm\": \"Standard Generalized Markup Language\", - \"Acronym\": \"SGML\", - \"Abbrev\": \"ISO 8879:1986\", - \"GlossDef\": { - \"para\": \"A meta-markup language, used to create markup languages such as DocBook.\", - \"GlossSeeAlso\": [\"GML\", \"XML\"] - }, - \"GlossSee\": \"markup\" - } - } - } - } -} -" - -const c = """ -{"widget": { - "debug": "on", - "window": { - "title": "Sample Konfabulator Widget", - "name": "main_window", - "width": 500, - "height": 500 - }, - "image": { - "src": "Images/Sun.png", - "name": "sun1", - "hOffset": 250, - "vOffset": 250, - "alignment": "center" - }, - "text": { - "data": "Click Here", - "size": 36.5, - "style": "bold", - "name": "text1", - "hOffset": 250, - "vOffset": 100, - "alignment": "center", - "onMouseUp": "sun1.opacity = (sun1.opacity / 100) * 90;" - } -}}""" -function validate_c(c) - j = JSON.parse(c) - @test j != nothing - @test typeof(j["widget"]["image"]["hOffset"]) == Int64 - @test j["widget"]["image"]["hOffset"] == 250 - @test typeof(j["widget"]["text"]["size"]) == Float64 - @test j["widget"]["text"]["size"] == 36.5 -end - -d = "{\"web-app\": { - \"servlet\": [ - { - \"servlet-name\": \"cofaxCDS\", - \"servlet-class\": \"org.cofax.cds.CDSServlet\", - \"init-param\": { - \"configGlossary:installationAt\": \"Philadelphia, PA\", - \"configGlossary:adminEmail\": \"ksm@pobox.com\", - \"configGlossary:poweredBy\": \"Cofax\", - \"configGlossary:poweredByIcon\": \"/images/cofax.gif\", - \"configGlossary:staticPath\": \"/content/static\", - \"templateProcessorClass\": \"org.cofax.WysiwygTemplate\", - \"templateLoaderClass\": \"org.cofax.FilesTemplateLoader\", - \"templatePath\": \"templates\", - \"templateOverridePath\": \"\", - \"defaultListTemplate\": \"listTemplate.htm\", - \"defaultFileTemplate\": \"articleTemplate.htm\", - \"useJSP\": false, - \"jspListTemplate\": \"listTemplate.jsp\", - \"jspFileTemplate\": \"articleTemplate.jsp\", - \"cachePackageTagsTrack\": 200, - \"cachePackageTagsStore\": 200, - \"cachePackageTagsRefresh\": 60, - \"cacheTemplatesTrack\": 100, - \"cacheTemplatesStore\": 50, - \"cacheTemplatesRefresh\": 15, - \"cachePagesTrack\": 200, - \"cachePagesStore\": 100, - \"cachePagesRefresh\": 10, - \"cachePagesDirtyRead\": 10, - \"searchEngineListTemplate\": \"forSearchEnginesList.htm\", - \"searchEngineFileTemplate\": \"forSearchEngines.htm\", - \"searchEngineRobotsDb\": \"WEB-INF/robots.db\", - \"useDataStore\": true, - \"dataStoreClass\": \"org.cofax.SqlDataStore\", - \"redirectionClass\": \"org.cofax.SqlRedirection\", - \"dataStoreName\": \"cofax\", - \"dataStoreDriver\": \"com.microsoft.jdbc.sqlserver.SQLServerDriver\", - \"dataStoreUrl\": \"jdbc:microsoft:sqlserver://LOCALHOST:1433;DatabaseName=goon\", - \"dataStoreUser\": \"sa\", - \"dataStorePassword\": \"dataStoreTestQuery\", - \"dataStoreTestQuery\": \"SET NOCOUNT ON;select test='test';\", - \"dataStoreLogFile\": \"/usr/local/tomcat/logs/datastore.log\", - \"dataStoreInitConns\": 10, - \"dataStoreMaxConns\": 100, - \"dataStoreConnUsageLimit\": 100, - \"dataStoreLogLevel\": \"debug\", - \"maxUrlLength\": 500}}, - { - \"servlet-name\": \"cofaxEmail\", - \"servlet-class\": \"org.cofax.cds.EmailServlet\", - \"init-param\": { - \"mailHost\": \"mail1\", - \"mailHostOverride\": \"mail2\"}}, - { - \"servlet-name\": \"cofaxAdmin\", - \"servlet-class\": \"org.cofax.cds.AdminServlet\"}, - - { - \"servlet-name\": \"fileServlet\", - \"servlet-class\": \"org.cofax.cds.FileServlet\"}, - { - \"servlet-name\": \"cofaxTools\", - \"servlet-class\": \"org.cofax.cms.CofaxToolsServlet\", - \"init-param\": { - \"templatePath\": \"toolstemplates/\", - \"log\": 1, - \"logLocation\": \"/usr/local/tomcat/logs/CofaxTools.log\", - \"logMaxSize\": \"\", - \"dataLog\": 1, - \"dataLogLocation\": \"/usr/local/tomcat/logs/dataLog.log\", - \"dataLogMaxSize\": \"\", - \"removePageCache\": \"/content/admin/remove?cache=pages&id=\", - \"removeTemplateCache\": \"/content/admin/remove?cache=templates&id=\", - \"fileTransferFolder\": \"/usr/local/tomcat/webapps/content/fileTransferFolder\", - \"lookInContext\": 1, - \"adminGroupID\": 4, - \"betaServer\": true}}], - \"servlet-mapping\": { - \"cofaxCDS\": \"/\", - \"cofaxEmail\": \"/cofaxutil/aemail/*\", - \"cofaxAdmin\": \"/admin/*\", - \"fileServlet\": \"/static/*\", - \"cofaxTools\": \"/tools/*\"}, - - \"taglib\": { - \"taglib-uri\": \"cofax.tld\", - \"taglib-location\": \"/WEB-INF/tlds/cofax.tld\"}}}" - -const svg_tviewer_menu = """ -{"menu": { - "header": "SVG\\tViewer\\u03b1", - "items": [ - {"id": "Open"}, - {"id": "OpenNew", "label": "Open New"}, - null, - {"id": "ZoomIn", "label": "Zoom In"}, - {"id": "ZoomOut", "label": "Zoom Out"}, - {"id": "OriginalView", "label": "Original View"}, - null, - {"id": "Quality"}, - {"id": "Pause"}, - {"id": "Mute"}, - null, - {"id": "Find", "label": "Find..."}, - {"id": "FindAgain", "label": "Find Again"}, - {"id": "Copy"}, - {"id": "CopyAgain", "label": "Copy Again"}, - {"id": "CopySVG", "label": "Copy SVG"}, - {"id": "ViewSVG", "label": "View SVG"}, - {"id": "ViewSource", "label": "View Source"}, - {"id": "SaveAs", "label": "Save As"}, - null, - {"id": "Help"}, - {"id": "About", "label": "About Adobe SVG Viewer..."} - ] -}}""" -function validate_svg_tviewer_menu(str) - j = JSON.parse(str) - @test j != nothing - @test typeof(j) == Dict{String, Any} - @test length(j) == 1 - @test typeof(j["menu"]) == Dict{String, Any} - @test length(j["menu"]) == 2 - @test j["menu"]["header"] == "SVG\tViewerα" - @test isa(j["menu"]["items"], Vector{Any}) - @test length(j["menu"]["items"]) == 22 - @test j["menu"]["items"][3] == nothing - @test j["menu"]["items"][2]["id"] == "OpenNew" - @test j["menu"]["items"][2]["label"] == "Open New" -end - - -#Example JSON strings from http://www.jquery4u.com/json/10-example-json-files/ - -gmaps= "{\"markers\": [ - { - \"point\":\"new GLatLng(40.266044,-74.718479)\", - \"homeTeam\":\"Lawrence Library\", - \"awayTeam\":\"LUGip\", - \"markerImage\":\"images/red.png\", - \"information\": \"Linux users group meets second Wednesday of each month.\", - \"fixture\":\"Wednesday 7pm\", - \"capacity\":\"\", - \"previousScore\":\"\" - }, - { - \"point\":\"new GLatLng(40.211600,-74.695702)\", - \"homeTeam\":\"Hamilton Library\", - \"awayTeam\":\"LUGip HW SIG\", - \"markerImage\":\"images/white.png\", - \"information\": \"Linux users can meet the first Tuesday of the month to work out hardware and configuration issues.\", - \"fixture\":\"Tuesday 7pm\", - \"capacity\":\"\", - \"tv\":\"\" - }, - { - \"point\":\"new GLatLng(40.294535,-74.682012)\", - \"homeTeam\":\"Applebees\", - \"awayTeam\":\"After LUPip Mtg Spot\", - \"markerImage\":\"images/newcastle.png\", - \"information\": \"Some of us go there after the main LUGip meeting, drink brews, and talk.\", - \"fixture\":\"Wednesday whenever\", - \"capacity\":\"2 to 4 pints\", - \"tv\":\"\" - } -] }" - -colors1 = "{ - \"colorsArray\":[{ - \"colorName\":\"red\", - \"hexValue\":\"#f00\" - }, - { - \"colorName\":\"green\", - \"hexValue\":\"#0f0\" - }, - { - \"colorName\":\"blue\", - \"hexValue\":\"#00f\" - }, - { - \"colorName\":\"cyan\", - \"hexValue\":\"#0ff\" - }, - { - \"colorName\":\"magenta\", - \"hexValue\":\"#f0f\" - }, - { - \"colorName\":\"yellow\", - \"hexValue\":\"#ff0\" - }, - { - \"colorName\":\"black\", - \"hexValue\":\"#000\" - } - ] -}" - -colors2 = "{ - \"colorsArray\":[{ - \"red\":\"#f00\", - \"green\":\"#0f0\", - \"blue\":\"#00f\", - \"cyan\":\"#0ff\", - \"magenta\":\"#f0f\", - \"yellow\":\"#ff0\", - \"black\":\"#000\" - } - ] -}" - -colors3 = "{ - \"red\":\"#f00\", - \"green\":\"#0f0\", - \"blue\":\"#00f\", - \"cyan\":\"#0ff\", - \"magenta\":\"#f0f\", - \"yellow\":\"#ff0\", - \"black\":\"#000\" -}" - -twitter = "{\"results\":[ - - {\"text\":\"@twitterapi http://tinyurl.com/ctrefg\", - \"to_user_id\":396524, - \"to_user\":\"TwitterAPI\", - \"from_user\":\"jkoum\", - \"metadata\": - { - \"result_type\":\"popular\", - \"recent_retweets\": 109 - }, - \"id\":1478555574, - \"from_user_id\":1833773, - \"iso_language_code\":\"nl\", - \"source\":\"twitter\", - \"profile_image_url\":\"http://s3.amazonaws.com/twitter_production/profile_images/118412707/2522215727_a5f07da155_b_normal.jpg\", - \"created_at\":\"Wed, 08 Apr 2009 19:22:10 +0000\"}], - \"since_id\":0, - \"max_id\":1480307926, - \"refresh_url\":\"?since_id=1480307926&q=%40twitterapi\", - \"results_per_page\":15, - \"next_page\":\"?page=2&max_id=1480307926&q=%40twitterapi\", - \"completed_in\":0.031704, - \"page\":1, - \"query\":\"%40twitterapi\"}" - -facebook= "{ - \"data\": [ - { - \"id\": \"X999_Y999\", - \"from\": { - \"name\": \"Tom Brady\", \"id\": \"X12\" - }, - \"message\": \"Looking forward to 2010!\", - \"actions\": [ - { - \"name\": \"Comment\", - \"link\": \"http://www.facebook.com/X999/posts/Y999\" - }, - { - \"name\": \"Like\", - \"link\": \"http://www.facebook.com/X999/posts/Y999\" - } - ], - \"type\": \"status\", - \"created_time\": \"2010-08-02T21:27:44+0000\", - \"updated_time\": \"2010-08-02T21:27:44+0000\" - }, - { - \"id\": \"X998_Y998\", - \"from\": { - \"name\": \"Peyton Manning\", \"id\": \"X18\" - }, - \"message\": \"Where's my contract?\", - \"actions\": [ - { - \"name\": \"Comment\", - \"link\": \"http://www.facebook.com/X998/posts/Y998\" - }, - { - \"name\": \"Like\", - \"link\": \"http://www.facebook.com/X998/posts/Y998\" - } - ], - \"type\": \"status\", - \"created_time\": \"2010-08-02T21:27:44+0000\", - \"updated_time\": \"2010-08-02T21:27:44+0000\" - } - ] -}" - -const flickr = """{ - "title": "Talk On Travel Pool", - "link": "http://www.flickr.com/groups/talkontravel/pool/", - "description": "Travel and vacation photos from around the world.", - "modified": "2009-02-02T11:10:27Z", - "generator": "http://www.flickr.com/", - "totalItems":222, - "items": [ - { - "title": "View from the hotel", - "link": "http://www.flickr.com/photos/33112458@N08/3081564649/in/pool-998875@N22", - "media": {"m":"http://farm4.static.flickr.com/3037/3081564649_4a6569750c_m.jpg"}, - "date_taken": "2008-12-04T04:43:03-08:00", - "description": "

Talk On Travel has added a photo to the pool:

\\"View

", - "published": "2008-12-04T12:43:03Z", - "author": "nobody@flickr.com (Talk On Travel)", - "author_id": "33112458@N08", - "tags": "spain dolphins tenerife canaries lagomera aqualand playadelasamericas junglepark losgigantos loscristines talkontravel" - } - ] -}""" -function validate_flickr(str) - k = JSON.parse(str) - @test k != nothing - @test k["totalItems"] == 222 - @test k["items"][1]["description"][12] == '\"' -end - -youtube = "{\"apiVersion\":\"2.0\", - \"data\":{ - \"updated\":\"2010-01-07T19:58:42.949Z\", - \"totalItems\":800, - \"startIndex\":1, - \"itemsPerPage\":1, - \"items\":[ - {\"id\":\"hYB0mn5zh2c\", - \"uploaded\":\"2007-06-05T22:07:03.000Z\", - \"updated\":\"2010-01-07T13:26:50.000Z\", - \"uploader\":\"GoogleDeveloperDay\", - \"category\":\"News\", - \"title\":\"Google Developers Day US - Maps API Introduction\", - \"description\":\"Google Maps API Introduction ...\", - \"tags\":[ - \"GDD07\",\"GDD07US\",\"Maps\" - ], - \"thumbnail\":{ - \"default\":\"http://i.ytimg.com/vi/hYB0mn5zh2c/default.jpg\", - \"hqDefault\":\"http://i.ytimg.com/vi/hYB0mn5zh2c/hqdefault.jpg\" - }, - \"player\":{ - \"default\":\"http://www.youtube.com/watch?v\u003dhYB0mn5zh2c\" - }, - \"content\":{ - \"1\":\"rtsp://v5.cache3.c.youtube.com/CiILENy.../0/0/0/video.3gp\", - \"5\":\"http://www.youtube.com/v/hYB0mn5zh2c?f...\", - \"6\":\"rtsp://v1.cache1.c.youtube.com/CiILENy.../0/0/0/video.3gp\" - }, - \"duration\":2840, - \"aspectRatio\":\"widescreen\", - \"rating\":4.63, - \"ratingCount\":68, - \"viewCount\":220101, - \"favoriteCount\":201, - \"commentCount\":22, - \"status\":{ - \"value\":\"restricted\", - \"reason\":\"limitedSyndication\" - }, - \"accessControl\":{ - \"syndicate\":\"allowed\", - \"commentVote\":\"allowed\", - \"rate\":\"allowed\", - \"list\":\"allowed\", - \"comment\":\"allowed\", - \"embed\":\"allowed\", - \"videoRespond\":\"moderated\" - } - } - ] - } -}" - -iphone = "{ - \"menu\": { - \"header\": \"xProgress SVG Viewer\", - \"items\": [ - { - \"id\": \"Open\" - }, - { - \"id\": \"OpenNew\", - \"label\": \"Open New\" - }, - null, - { - \"id\": \"ZoomIn\", - \"label\": \"Zoom In\" - }, - { - \"id\": \"ZoomOut\", - \"label\": \"Zoom Out\" - }, - { - \"id\": \"OriginalView\", - \"label\": \"Original View\" - }, - null, - { - \"id\": \"Quality\" - }, - { - \"id\": \"Pause\" - }, - { - \"id\": \"Mute\" - }, - null, - { - \"id\": \"Find\", - \"label\": \"Find...\" - }, - { - \"id\": \"FindAgain\", - \"label\": \"Find Again\" - }, - { - \"id\": \"Copy\" - }, - { - \"id\": \"CopyAgain\", - \"label\": \"Copy Again\" - }, - { - \"id\": \"CopySVG\", - \"label\": \"Copy SVG\" - }, - { - \"id\": \"ViewSVG\", - \"label\": \"View SVG\" - }, - { - \"id\": \"ViewSource\", - \"label\": \"View Source\" - }, - { - \"id\": \"SaveAs\", - \"label\": \"Save As\" - }, - null, - { - \"id\": \"Help\" - }, - { - \"id\": \"About\", - \"label\": \"About xProgress CVG Viewer...\" - } - ] - } -}" - -customer = "{ - \"firstName\": \"John\", - \"lastName\": \"Smith\", - \"age\": 25, - \"address\": - { - \"streetAddress\": \"21 2nd Street\", - \"city\": \"New York\", - \"state\": \"NY\", - \"postalCode\": \"10021\" - }, - \"phoneNumber\": - [ - { - \"type\": \"home\", - \"number\": \"212 555-1234\" - }, - { - \"type\": \"fax\", - \"number\": \"646 555-4567\" - } - ] - }" - - product = "{ - \"name\":\"Product\", - \"properties\": - { - \"id\": - { - \"type\":\"number\", - \"description\":\"Product identifier\", - \"required\":true - }, - \"name\": - { - \"description\":\"Name of the product\", - \"type\":\"string\", - \"required\":true - }, - \"price\": - { - \"type\":\"number\", - \"minimum\":0, - \"required\":true - }, - \"tags\": - { - \"type\":\"array\", - \"items\": - { - \"type\":\"string\" - } - } - } -}" - -interop = "{ - \"ResultSet\": { - \"totalResultsAvailable\": \"1827221\", - \"totalResultsReturned\": 2, - \"firstResultPosition\": 1, - \"Result\": [ - { - \"Title\": \"potato jpg\", - \"Summary\": \"Kentang Si bungsu dari keluarga Solanum tuberosum L ini ternyata memiliki khasiat untuk mengurangi kerutan jerawat bintik hitam dan kemerahan pada kulit Gunakan seminggu sekali sebagai\", - \"Url\": \"http://www.mediaindonesia.com/spaw/uploads/images/potato.jpg\", - \"ClickUrl\": \"http://www.mediaindonesia.com/spaw/uploads/images/potato.jpg\", - \"RefererUrl\": \"http://www.mediaindonesia.com/mediaperempuan/index.php?ar_id=Nzkw\", - \"FileSize\": 22630, - \"FileFormat\": \"jpeg\", - \"Height\": \"362\", - \"Width\": \"532\", - \"Thumbnail\": { - \"Url\": \"http://thm-a01.yimg.com/nimage/557094559c18f16a\", - \"Height\": \"98\", - \"Width\": \"145\" - } - }, - { - \"Title\": \"potato jpg\", - \"Summary\": \"Introduction of puneri aloo This is a traditional potato preparation flavoured with curry leaves and peanuts and can be eaten on fasting day Preparation time 10 min\", - \"Url\": \"http://www.infovisual.info/01/photo/potato.jpg\", - \"ClickUrl\": \"http://www.infovisual.info/01/photo/potato.jpg\", - \"RefererUrl\": \"http://sundayfood.com/puneri-aloo-indian-%20recipe\", - \"FileSize\": 119398, - \"FileFormat\": \"jpeg\", - \"Height\": \"685\", - \"Width\": \"1024\", - \"Thumbnail\": { - \"Url\": \"http://thm-a01.yimg.com/nimage/7fa23212efe84b64\", - \"Height\": \"107\", - \"Width\": \"160\" - } - } - ] - } -}" - -const unicode = """ -{"অলিম্পিকস": { - "অ্যাথলেট": "২২টি দেশ থেকে ২,০৩৫ জন প্রতিযোগী", - "ইভেন্ট": "২২টি ইভেন্টের মধ্যে ছিল দড়ি টানাটানি", - "রেকর্ড": [ - {"১০০মি. স্প্রিন্ট": "রেজি ওয়াকার, দক্ষিণ আফ্রিকা"}, - {"Marathon": "জনি হেইস"}, - {" ফ্রি-স্টাইল সাঁতার": "Henry Taylor, Britain"} - ] -}} -""" -function validate_unicode(str) - u = JSON.parse(str) - @test u != nothing - @test u["অলিম্পিকস"]["রেকর্ড"][2]["Marathon"] == "জনি হেইস" -end diff --git a/test/json.jl b/test/json.jl new file mode 100644 index 00000000..47077998 --- /dev/null +++ b/test/json.jl @@ -0,0 +1,588 @@ +using JSON, Test, Logging + +mutable struct CircularRef + id::Int + self::Union{Nothing, CircularRef} +end + +struct CustomNumber <: Real + x::Float64 +end + +@omit_null struct OmitNull + id::Int + name::Union{Nothing, String} +end + +@omit_empty struct OmitEmpty + id::Int + value::Union{Nothing, String} + values::Vector{Int} +end + +@testset "JSON.json" begin + +@testset "Basics" begin + @test JSON.json(nothing) == "null" + @test JSON.json(true) == "true" + @test JSON.json(false) == "false" + # test the JSON output of a bunch of numbers + @test JSON.json(0) == "0" + @test JSON.json(1) == "1" + @test JSON.json(1.0) == "1.0" + @test JSON.json(1.0f0) == "1.0" + @test JSON.json(1.0f1) == "10.0" + @test JSON.json(1.0f-1) == "0.1" + @test JSON.json(1.0f-2) == "0.01" + @test JSON.json(1.0f-3) == "0.001" + @test JSON.json(1.0f-4) == "0.0001" + @test JSON.json(1.0f-5) == "1.0e-5" + @test JSON.json(-1) == "-1" + @test JSON.json(-1.0) == "-1.0" + @test JSON.json(typemin(Int64)) == "-9223372036854775808" + @test JSON.json(typemax(Int64)) == "9223372036854775807" + @test JSON.json(BigInt(1)) == "1" + @test JSON.json(BigInt(1) << 100) == "1267650600228229401496703205376" + @test JSON.json(BigInt(-1)) == "-1" + @test JSON.json(BigInt(-1) << 100) == "-1267650600228229401496703205376" + @test JSON.json(typemin(UInt64)) == "0" + @test JSON.json(typemax(UInt64)) == "18446744073709551615" + @test_throws ArgumentError JSON.json(NaN) + @test_throws ArgumentError JSON.json(Inf) + @test_throws ArgumentError JSON.json(-Inf) + @test JSON.json(NaN; allownan=true) == "NaN" + @test JSON.json(Inf; allownan=true) == "Infinity" + @test JSON.json(-Inf; allownan=true) == "-Infinity" + # custom nan or inf strings + @test JSON.json(NaN; allownan=true, nan="nan") == "nan" + @test JSON.json(Inf; allownan=true, inf="inf") == "inf" + @test JSON.json(-Inf; allownan=true, ninf="-inf") == "-inf" + # test the JSON output of a bunch of strings + @test JSON.json("") == "\"\"" + @test JSON.json("a") == "\"a\"" + @test JSON.json("a\"b") == "\"a\\\"b\"" + @test JSON.json("a\\b") == "\"a\\\\b\"" + @test JSON.json("a\b") == "\"a\\b\"" + @test JSON.json("a\f") == "\"a\\f\"" + # test the JSON output of a bunch of strings with unicode characters + @test JSON.json("\u2200") == "\"∀\"" + @test JSON.json("\u2200\u2201") == "\"∀∁\"" + @test JSON.json("\u2200\u2201\u2202") == "\"∀∁∂\"" + @test JSON.json("\u2200\u2201\u2202\u2203") == "\"∀∁∂∃\"" + # test the JSON output of a bunch of arrays + @test JSON.json(Int[]) == "[]" + @test JSON.json(Int[1]) == "[1]" + @test JSON.json(Int[1, 2]) == "[1,2]" + @test JSON.json((1, 2)) == "[1,2]" + @test JSON.json(Set([2])) == "[2]" + @test JSON.json([1, nothing, "hey", 3.14, true, false]) == "[1,null,\"hey\",3.14,true,false]" + # test the JSON output of a bunch of dicts/namedtuples + @test JSON.json(Dict{Int, Int}()) == "{}" + @test JSON.json(Dict{Int, Int}(1 => 2)) == "{\"1\":2}" + @test JSON.json((a = 1, b = 2)) == "{\"a\":1,\"b\":2}" + @test JSON.json((a = nothing, b=2, c="hey", d=3.14, e=true, f=false)) == "{\"a\":null,\"b\":2,\"c\":\"hey\",\"d\":3.14,\"e\":true,\"f\":false}" + # test the JSON output of nested array/objects + @test JSON.json([1, [2, 3], [4, [5, 6]]]) == "[1,[2,3],[4,[5,6]]]" + @test JSON.json(Dict{Int, Any}(1 => Dict{Int, Any}(2 => Dict{Int, Any}(3 => 4)))) == "{\"1\":{\"2\":{\"3\":4}}}" + # now a mix of arrays and objects + @test JSON.json([1, Dict{Int, Any}(2 => Dict{Int, Any}(3 => 4))]) == "[1,{\"2\":{\"3\":4}}]" + @test JSON.json(Dict{Int, Any}(1 => [2, Dict{Int, Any}(3 => 4)])) == "{\"1\":[2,{\"3\":4}]}" + # test undefined elements of an array + arr = Vector{String}(undef, 3) + arr[1] = "a" + arr[3] = "b" + @test JSON.json(arr) == "[\"a\",null,\"b\"]" + # test custom struct writing + # defined in the test/struct.jl file + a = A(1, 2, 3, 4) + @test JSON.json(a) == "{\"a\":1,\"b\":2,\"c\":3,\"d\":4}" + x = LotsOfFields("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35") + @test JSON.json(x) == "{\"x1\":\"1\",\"x2\":\"2\",\"x3\":\"3\",\"x4\":\"4\",\"x5\":\"5\",\"x6\":\"6\",\"x7\":\"7\",\"x8\":\"8\",\"x9\":\"9\",\"x10\":\"10\",\"x11\":\"11\",\"x12\":\"12\",\"x13\":\"13\",\"x14\":\"14\",\"x15\":\"15\",\"x16\":\"16\",\"x17\":\"17\",\"x18\":\"18\",\"x19\":\"19\",\"x20\":\"20\",\"x21\":\"21\",\"x22\":\"22\",\"x23\":\"23\",\"x24\":\"24\",\"x25\":\"25\",\"x26\":\"26\",\"x27\":\"27\",\"x28\":\"28\",\"x29\":\"29\",\"x30\":\"30\",\"x31\":\"31\",\"x32\":\"32\",\"x33\":\"33\",\"x34\":\"34\",\"x35\":\"35\"}" + # test custom struct writing with custom field names + x = L(1, "george", 33.3) + @test JSON.json(x) == "{\"id\":1,\"firstName\":\"george\",\"rate\":33.3}" + # test custom struct writing with undef fields + x = UndefGuy() + x.id = 10 + @test JSON.json(x) == "{\"id\":10,\"name\":null}" + # test structs with circular references + x = CircularRef(11, nothing) + x.self = x + @test JSON.json(x) == "{\"id\":11,\"self\":null}" + # test lowering + x = K(123, missing) + @test JSON.json(x) == "{\"id\":123,\"value\":null}" + x = UUID(typemax(UInt128)) + @test JSON.json(x) == "\"ffffffff-ffff-ffff-ffff-ffffffffffff\"" + @test JSON.json(:a) == "\"a\"" + @test JSON.json(apple) == "\"apple\"" + @test JSON.json('a') == "\"a\"" + @test JSON.json('∀') == "\"∀\"" + @test JSON.json(v"1.2.3") == "\"1.2.3\"" + @test JSON.json(r"1.2.3") == "\"1.2.3\"" + @test JSON.json(Date(2023, 2, 23)) == "\"2023-02-23\"" + @test JSON.json(DateTime(2023, 2, 23, 12, 34, 56)) == "\"2023-02-23T12:34:56\"" + @test JSON.json(Time(12, 34, 56)) == "\"12:34:56\"" + # test field-specific lowering + x = ThreeDates(Date(2023, 2, 23), DateTime(2023, 2, 23, 12, 34, 56), Time(12, 34, 56)) + @test JSON.json(x) == "{\"date\":\"2023_02_23\",\"datetime\":\"2023/02/23 12:34:56\",\"time\":\"12/34/56\"}" + # test matrix writing + @test JSON.json([1 2; 3 4]) == "[[1,3],[2,4]]" + @test JSON.json((a=[1 2; 3 4],)) == "{\"a\":[[1,3],[2,4]]}" + # singleton writing + @test JSON.json(C()) == "\"C()\"" + # module writing + @test JSON.json(JSON) == "\"JSON\"" + # function writing + @test JSON.json(JSON.json) == "\"json\"" + # SimpleVector writing + @test JSON.json(Core.svec(1, 2, 3)) == "[1,2,3]" + # DataType writing + @test JSON.json(Float64) == "\"Float64\"" + @test JSON.json(Union{Missing, Float64}) == "\"Union{Missing, Float64}\"" + # LogLevel writing + @test JSON.json(Logging.Info) == "\"Info\"" + @test JSON.json(Logging.LogLevel(1)) == "\"LogLevel(1)\"" + # multidimensional arrays + # "[[1.0],[2.0]]" => (1, 2) + m = Matrix{Float64}(undef, 1, 2) + m[1] = 1 + m[2] = 2 + @test JSON.json(m) == "[[1.0],[2.0]]" + # "[[1.0,2.0]]" => (2, 1) + m = Matrix{Float64}(undef, 2, 1) + m[1] = 1 + m[2] = 2 + @test JSON.json(m) == "[[1.0,2.0]]" + # "[[[1.0]],[[2.0]]]" => (1, 1, 2) + m = Array{Float64}(undef, 1, 1, 2) + m[1] = 1 + m[2] = 2 + @test JSON.json(m) == "[[[1.0]],[[2.0]]]" + # "[[[1.0],[2.0]]]" => (1, 2, 1) + m = Array{Float64}(undef, 1, 2, 1) + m[1] = 1 + m[2] = 2 + @test JSON.json(m) == "[[[1.0],[2.0]]]" + # "[[[1.0,2.0]]]" => (2, 1, 1) + m = Array{Float64}(undef, 2, 1, 1) + m[1] = 1 + m[2] = 2 + @test JSON.json(m) == "[[[1.0,2.0]]]" + + m = Array{Float64}(undef, 1, 2, 3) + m[1] = 1 + m[2] = 2 + m[3] = 3 + m[4] = 4 + m[5] = 5 + m[6] = 6 + @test JSON.json(m) == "[[[1.0],[2.0]],[[3.0],[4.0]],[[5.0],[6.0]]]" + # 0-dimensional array + m = Array{Float64,0}(undef) + m[1] = 1.0 + @test JSON.json(m) == "1.0" + # JSON.json forms + io = IOBuffer() + JSON.json(io, missing) + @test String(take!(io)) == "null" + fname, io = mktemp() + close(io) + JSON.json(fname, missing) + @test read(fname, String) == "null" + rm(fname) + @testset "pretty output" begin + @test JSON.json([1, 2, 3], pretty=4) == "[\n 1,\n 2,\n 3\n]" + @test JSON.json([1, 2, 3], pretty=true) == "[\n 1,\n 2,\n 3\n]" + @test JSON.json([1, 2, 3], pretty=0) == "[1,2,3]" + # empty object/array + @test JSON.json([], pretty=true) == "[]" + @test JSON.json(Dict(), pretty=true) == "{}" + # several levels of nesting + @test JSON.json([1, [2, 3], [4, [5, 6]]], pretty=true) == "[\n 1,\n [\n 2,\n 3\n ],\n [\n 4,\n [\n 5,\n 6\n ]\n ]\n]" + # several levels of nesting with a mix of nulls, numbers, strings, booleans, empty objects, arrays, etc. + @test JSON.json([1, [2, 3], [4, [5, 6]], nothing, "hey", 3.14, true, false, Dict(), []], pretty=true) == "[\n 1,\n [\n 2,\n 3\n ],\n [\n 4,\n [\n 5,\n 6\n ]\n ],\n null,\n \"hey\",\n 3.14,\n true,\n false,\n {},\n []\n]" + # JSON.jl pre-1.0 compat + io = IOBuffer() + JSON.print(io, [1, 2, 3], 2) + @test String(take!(io)) == "[\n 1,\n 2,\n 3\n]" + @test JSON.json([1, 2, 3], 2) == "[\n 1,\n 2,\n 3\n]" + # inline_limit tests + @test JSON.json([1, 2]; pretty=2, inline_limit=3) == "[1,2]" + @test JSON.json([1, 2, 3]; pretty=2, inline_limit=3) == "[\n 1,\n 2,\n 3\n]" + end + # non-Integer/AbstractFloat but <: Real output + @test_throws MethodError JSON.json(CustomNumber(3.14)) + JSON.tostring(x::CustomNumber) = string(x.x) + @test JSON.json(CustomNumber(3.14)) == "3.14" + # jsonlines output + @test JSON.json([1, 2, 3]; jsonlines=true) == "1\n2\n3\n" + # jsonlines output with pretty not allowed + @test_throws ArgumentError JSON.json([1, 2, 3]; jsonlines=true, pretty=true) + # jsonlines each line is an object + @test JSON.json([(a=1, b=2), (a=3, b=4)]; jsonlines=true) == "{\"a\":1,\"b\":2}\n{\"a\":3,\"b\":4}\n" + # jsonlines with empty array + @test JSON.json([]; jsonlines=true) == "\n" + # jsonlines not allowed on objects + @test_throws ArgumentError JSON.json((a=1, b=2); jsonlines=true) + # circular reference tracking + a = Any[1, 2, 3] + push!(a, a) + @test JSON.json(a) == "[1,2,3,null]" + a = (a=1,) + x = [a, a, a] + @test JSON.json(x) == "[{\"a\":1},{\"a\":1},{\"a\":1}]" + a = CircularRef(1, nothing) + a.self = a + x = [a, a, a] + @test JSON.json(x) == "[{\"id\":1,\"self\":null},{\"id\":1,\"self\":null},{\"id\":1,\"self\":null}]" + # custom key function + @test_throws ArgumentError JSON.json(Dict(Point(1, 2) => "hi")) + StructUtils.lowerkey(::JSON.JSONStyle, p::Point) = "$(p.x)_$(p.y)" + @test JSON.json(Dict(Point(1, 2) => "hi")) == "{\"1_2\":\"hi\"}" + x = JSONText("[1,2,3]") + @test JSON.json(x) == "[1,2,3]" + @test JSON.json((a=1, b=nothing)) == "{\"a\":1,\"b\":null}" + @test JSON.json((a=1, b=nothing); omit_null=true) == "{\"a\":1}" + @test JSON.json((a=1, b=nothing); omit_null=false) == "{\"a\":1,\"b\":null}" + @test JSON.json((a=1, b=[]); omit_empty=true) == "{\"a\":1}" + @test JSON.json((a=1, b=[]); omit_empty=false) == "{\"a\":1,\"b\":[]}" + # custom style overload + JSON.lower(::CustomJSONStyle, x::Rational) = (num=x.num, den=x.den) + @test JSON.json(1//3; style=CustomJSONStyle()) == "{\"num\":1,\"den\":3}" + # @omit_null and @omit_empty + @test JSON.json(OmitNull(1, nothing)) == "{\"id\":1}" + @test JSON.json(OmitNull(1, nothing); omit_null=false) == "{\"id\":1,\"name\":null}" + @test JSON.json(OmitEmpty(1, nothing, [])) == "{\"id\":1}" + @test JSON.json(OmitEmpty(1, "abc", []); omit_empty=false) == "{\"id\":1,\"value\":\"abc\",\"values\":[]}" + # float_style and float_precision + @test JSON.json(Float64(π); float_style=:fixed, float_precision=2) == "3.14" + @test JSON.json(Float64(π); float_style=:exp, float_precision=2) == "3.14e+00" + @test_throws ArgumentError JSON.json(Float64(π); float_style=:not_a_style) +end + +@testset "Enhanced @omit_null and @omit_empty macros" begin + # Test structs for new macro functionality + + # Test case 1: Apply @omit_null to existing struct + struct ExistingStruct1 + id::Int + name::Union{Nothing, String} + value::Union{Nothing, Int} + end + @omit_null ExistingStruct1 + + # Test case 2: Apply @omit_empty to existing struct + struct ExistingStruct2 + id::Int + items::Vector{String} + data::Dict{String, Int} + end + @omit_empty ExistingStruct2 + + # Test case 3: Chaining with StructUtils.@defaults macro + @omit_null @defaults struct ChainedStruct1 + id::Int = 1 + name::Union{Nothing, String} = nothing + end + + @omit_empty @defaults struct ChainedStruct2 + id::Int = 1 + items::Vector{String} = String[] + end + + # Test case 4: Complex type expressions + struct ParametricStruct{T} + id::Int + value::Union{Nothing, T} + end + @omit_null ParametricStruct{String} # Apply to specific parametric type + + struct SimpleStruct + id::Int + name::Union{Nothing, String} + end + @omit_null SimpleStruct + + # Tests for case 1: Apply to existing struct + @test JSON.json(ExistingStruct1(1, nothing, nothing)) == "{\"id\":1}" + @test JSON.json(ExistingStruct1(1, "test", 42)) == "{\"id\":1,\"name\":\"test\",\"value\":42}" + @test JSON.json(ExistingStruct1(1, nothing, nothing); omit_null=false) == "{\"id\":1,\"name\":null,\"value\":null}" + + @test JSON.json(ExistingStruct2(1, String[], Dict{String, Int}())) == "{\"id\":1}" + @test JSON.json(ExistingStruct2(1, ["test"], Dict("key" => 1))) == "{\"id\":1,\"items\":[\"test\"],\"data\":{\"key\":1}}" + @test JSON.json(ExistingStruct2(1, String[], Dict{String, Int}()); omit_empty=false) == "{\"id\":1,\"items\":[],\"data\":{}}" + + # Tests for case 3: Chained macros with @defaults + @test JSON.json(ChainedStruct1()) == "{\"id\":1}" # Uses default constructor from @defaults + @test JSON.json(ChainedStruct1(2, "test")) == "{\"id\":2,\"name\":\"test\"}" + + @test JSON.json(ChainedStruct2()) == "{\"id\":1}" # Uses default constructor from @defaults + @test JSON.json(ChainedStruct2(2, ["test"])) == "{\"id\":2,\"items\":[\"test\"]}" + + # Tests for case 4: Complex types + @test JSON.json(ParametricStruct{String}(1, nothing)) == "{\"id\":1}" + @test JSON.json(ParametricStruct{String}(1, "test")) == "{\"id\":1,\"value\":\"test\"}" + + @test JSON.json(SimpleStruct(1, nothing)) == "{\"id\":1}" + @test JSON.json(SimpleStruct(1, "test")) == "{\"id\":1,\"name\":\"test\"}" + + # Test error cases + @test_throws LoadError eval(:(@omit_null 123)) + @test_throws LoadError eval(:(@omit_empty "not_a_type")) +end + +@testset "Buffered IO" begin + # Helper function to create large test data + function create_large_object(size::Int) + return Dict{String, Any}( + "large_array" => collect(1:size), + "nested_data" => Dict{String, Any}( + "strings" => ["test_string_$i" for i in 1:div(size, 10)], + "numbers" => [i * 3.14159 for i in 1:div(size, 10)], + "booleans" => [i % 2 == 0 for i in 1:div(size, 10)] + ), + "metadata" => Dict{String, Any}( + "size" => size, + "type" => "test_data", + "description" => "Large test object for buffered IO testing" * "x"^100 + ) + ) + end + + @testset "Basic buffered IO functionality" begin + # Test with small buffer size (512 bytes) + test_data = create_large_object(100) + + # Test writing to IOBuffer with small buffer + io1 = IOBuffer() + JSON.json(io1, test_data; bufsize=512) + result1 = String(take!(io1)) + + # Test writing to IOBuffer with default buffer size + io2 = IOBuffer() + JSON.json(io2, test_data) + result2 = String(take!(io2)) + + # Results should be identical regardless of buffer size + @test result1 == result2 + + # Test writing to IOBuffer with very large buffer + io3 = IOBuffer() + JSON.json(io3, test_data; bufsize=1024*1024) # 1MB buffer + result3 = String(take!(io3)) + + @test result1 == result3 + + # Verify the JSON can be parsed back correctly + parsed = JSON.parse(result1) + @test parsed["large_array"] == collect(1:100) + @test parsed["metadata"]["size"] == 100 + end + + @testset "Buffer size boundary conditions" begin + # Create data that will test buffer boundaries + test_data = create_large_object(500) + expected_result = JSON.json(test_data) + + # Test with various buffer sizes around typical JSON size + buffer_sizes = [64, 128, 256, 512, 1024, 2048, 4096, 8192] + + for bufsize in buffer_sizes + io = IOBuffer() + JSON.json(io, test_data; bufsize=bufsize) + result = String(take!(io)) + @test result == expected_result + end + end + + @testset "Multiple flush scenarios" begin + # Create data large enough to trigger multiple flushes + large_data = create_large_object(2000) + + # Test with very small buffer to force multiple flushes + io = IOBuffer() + JSON.json(io, large_data; bufsize=256) + result_small_buf = String(take!(io)) + + # Compare with large buffer (no flushes) + io2 = IOBuffer() + JSON.json(io2, large_data; bufsize=1024*1024) + result_large_buf = String(take!(io2)) + + @test result_small_buf == result_large_buf + + # Verify correctness by parsing + parsed = JSON.parse(result_small_buf) + @test length(parsed["large_array"]) == 2000 + @test parsed["metadata"]["size"] == 2000 + end + + @testset "Array and object combinations with buffering" begin + # Test mix of arrays and objects that might cross buffer boundaries + mixed_data = [ + Dict("id" => i, "data" => collect((i-1)*10+1:i*10), "metadata" => "item_$i" * "x"^50) + for i in 1:100 + ] + + buffer_sizes = [128, 512, 2048] + expected = JSON.json(mixed_data) + + for bufsize in buffer_sizes + io = IOBuffer() + JSON.json(io, mixed_data; bufsize=bufsize) + result = String(take!(io)) + @test result == expected + end + end + + @testset "String escaping with buffering" begin + # Test strings that require escaping across buffer boundaries + strings_with_escaping = [ + "String with \"quotes\" and \\backslashes\\", + "String with\nnewlines\tand\ttabs", + "Unicode string: 🌟🚀💻🔥⭐", + "Mixed content: \"Hello\\nWorld\"\t🌍", + "Very long string: " * "A"^1000 * " with \" quotes \" and \\ backslashes \\" + ] + + test_data = Dict("strings" => strings_with_escaping) + expected = JSON.json(test_data) + + # Test with small buffer that will split escaped sequences + io = IOBuffer() + JSON.json(io, test_data; bufsize=64) + result = String(take!(io)) + @test result == expected + + # Verify by parsing back + parsed = JSON.parse(result) + @test parsed["strings"] == strings_with_escaping + end + + @testset "Pretty printing with buffering" begin + test_data = create_large_object(50) + + # Test pretty printing with different buffer sizes + buffer_sizes = [256, 1024, 4096] + + for bufsize in buffer_sizes + # Pretty printing with 2 spaces + io1 = IOBuffer() + JSON.json(io1, test_data; pretty=2, bufsize=bufsize) + result1 = String(take!(io1)) + + # Compare with reference (large buffer) + io2 = IOBuffer() + JSON.json(io2, test_data; pretty=2, bufsize=1024*1024) + result2 = String(take!(io2)) + + @test result1 == result2 + + # Ensure it's actually pretty printed + @test contains(result1, "\n") + @test contains(result1, " ") # indentation + end + end + + @testset "Edge cases and error conditions" begin + # Test with minimal buffer size + simple_data = Dict("key" => "value") + + # Very small buffer (smaller than a single JSON element) + io = IOBuffer() + JSON.json(io, simple_data; bufsize=8) + result = String(take!(io)) + @test result == "{\"key\":\"value\"}" + + # Test with empty data + io = IOBuffer() + JSON.json(io, Dict{String,Any}(); bufsize=32) + result = String(take!(io)) + @test result == "{}" + + # Test with array + io = IOBuffer() + JSON.json(io, Int[]; bufsize=32) + result = String(take!(io)) + @test result == "[]" + end + + @testset "File writing with buffering" begin + # Test writing to actual file with different buffer sizes + test_data = create_large_object(300) + expected = JSON.json(test_data) + + temp_files = String[] + + try + for (i, bufsize) in enumerate([512, 2048, 8192]) + filename = tempname() * ".json" + push!(temp_files, filename) + + # Write with specific buffer size + open(filename, "w") do io + JSON.json(io, test_data; bufsize=bufsize) + end + + # Read back and verify + content = read(filename, String) + @test content == expected + + # Parse to verify correctness + parsed = JSON.parse(content) + @test parsed["large_array"][1] == 1 + @test parsed["large_array"][end] == 300 + end + finally + # Clean up temp files + for filename in temp_files + isfile(filename) && rm(filename) + end + end + end + + @testset "JSONLines with buffering" begin + # Test JSONLines format with buffering + data = [ + Dict("id" => i, "value" => "item_$i", "data" => collect(1:i)) + for i in 1:20 + ] + + buffer_sizes = [128, 512, 2048] + expected = JSON.json(data; jsonlines=true) + + for bufsize in buffer_sizes + io = IOBuffer() + JSON.json(io, data; jsonlines=true, bufsize=bufsize) + result = String(take!(io)) + @test result == expected + + # Verify each line is valid JSON + lines = split(strip(result), '\n') + @test length(lines) == 20 + for (i, line) in enumerate(lines) + parsed_line = JSON.parse(line) + @test parsed_line["id"] == i + end + end + end + + @testset "Circular references with buffering" begin + # Test circular reference handling with small buffers + a = Any[1, 2, 3] + push!(a, a) # circular reference + + buffer_sizes = [64, 256, 1024] + expected = "[1,2,3,null]" # circular ref becomes null + + for bufsize in buffer_sizes + io = IOBuffer() + JSON.json(io, a; bufsize=bufsize) + result = String(take!(io)) + @test result == expected + end + end +end + +end # @testset "JSON.json" \ No newline at end of file diff --git a/test/jsonchecker.tar b/test/jsonchecker.tar new file mode 100644 index 00000000..1a4ea7f5 Binary files /dev/null and b/test/jsonchecker.tar differ diff --git a/test/lazy.jl b/test/lazy.jl new file mode 100644 index 00000000..00e3670b --- /dev/null +++ b/test/lazy.jl @@ -0,0 +1,184 @@ +using JSON, Test + +# helper struct for testing reading json from files +struct File end + +make(::Type{String}, x) = x +make(::Type{SubString{String}}, x) = SubString(x) +make(::Type{Vector{UInt8}}, x) = Vector{UInt8}(x) +make(::Type{IOBuffer}, x) = IOBuffer(x) +function make(::Type{File}, x) + _, io = mktemp() + write(io, x) + seekstart(io) + return io +end + +function makefile(nm, x) + dir = mktempdir() + file = joinpath(dir, nm) + open(file, "w") do io + write(io, x) + end + return file +end + +@testset "JSON.lazy" begin + for T in (String, SubString{String}, IOBuffer, Vector{UInt8}, File) + @test JSON.gettype(JSON.lazy(make(T, "1"))) == JSON.JSONTypes.NUMBER + @test JSON.gettype(JSON.lazy(make(T, "true"))) == JSON.JSONTypes.TRUE + @test JSON.gettype(JSON.lazy(make(T, "false"))) == JSON.JSONTypes.FALSE + @test JSON.gettype(JSON.lazy(make(T, "null"))) == JSON.JSONTypes.NULL + @test JSON.gettype(JSON.lazy(make(T, "[]"))) == JSON.JSONTypes.ARRAY + @test JSON.gettype(JSON.lazy(make(T, "{}"))) == JSON.JSONTypes.OBJECT + @test JSON.gettype(JSON.lazy(make(T, "\"\""))) == JSON.JSONTypes.STRING + @test_throws ArgumentError JSON.lazy(make(T, "a")) + end + # lazyfile + x = JSON.lazyfile(makefile("empty_object.json", "{}")) + @test JSON.gettype(x) == JSON.JSONTypes.OBJECT + @test length(x) == 0 + # LazyObject with all possible JSON types + x = JSON.lazy("{\"a\": 1, \"b\": null, \"c\": true, \"d\": false, \"e\": \"\", \"f\": [], \"g\": {}}") + @test length(x) == 7 + if VERSION >= v"1.7" + @test sprint(show, x) == "LazyObject{String} with 7 entries:\n \"a\" => JSON.LazyValue(1)\n \"b\" => JSON.LazyValue(nothing)\n \"c\" => JSON.LazyValue(true)\n \"d\" => JSON.LazyValue(false)\n \"e\" => JSON.LazyValue(\"\")\n \"f\" => LazyValue[]\n \"g\" => LazyObject{String}()" + end + i = 0 + foreach(x) do (k, v) + i += 1 + @test k isa String + @test v isa JSON.LazyValue + end + @test i == 7 + # LazyArray with all possible JSON types + x = JSON.lazy("[1, null, true, false, \"\", [], {}]") + @test length(x) == 7 + @test JSON.gettype(x[end]) == JSON.JSONTypes.OBJECT + if VERSION >= v"1.7" + @test sprint(show, x) == "7-element LazyArray{String}:\n JSON.LazyValue(1)\n JSON.LazyValue(nothing)\n JSON.LazyValue(true)\n JSON.LazyValue(false)\n JSON.LazyValue(\"\")\n LazyValue[]\n LazyObject{String}()" + end + i = 0 + foreach(x) do v + i += 1 + @test v isa JSON.LazyValue + end + @test i == 7 + # error cases + x = JSON.lazy("{}") + @test_throws ArgumentError JSON.applyarray((i, v) -> nothing, x) + @test_throws ArgumentError JSON.applystring(nothing, x) + x = JSON.lazy("{}"; allownan=true) + @test_throws ArgumentError JSON.applynumber(x -> nothing, x) + + # lazy indexing selection support + # examples from https://support.smartbear.com/alertsite/docs/monitors/api/endpoint/jsonpath.html + json = """ + { + "store": { + "book": [ + { + "category": "reference", + "author": "Nigel Rees", + "title": "Sayings of the Century", + "price": 8.95 + }, + { + "category": "fiction", + "author": "Herman Melville", + "title": "Moby Dick", + "isbn": "0-553-21311-3", + "price": 8.99 + }, + { + "category": "fiction", + "author": "J.R.R. Tolkien", + "title": "The Lord of the Rings", + "isbn": "0-395-19395-8", + "price": 22.99 + } + ], + "bicycle": { + "color": "red", + "price": 19.95 + } + }, + "expensive": 10 + } + """ + x = JSON.lazy(json) + @test propertynames(x) == [:store, :expensive] + y = x.store[:][] # All direct properties of store (not recursive). + @test length(y) == 2 && y[1] isa Vector{Any} && y[2] isa JSON.Object{String, Any} + y = x.store.bicycle.color[] # The color of the bicycle in the store. + @test y == "red" + y = x[~, "price"][] # The prices of all items in the store. + @test y == [8.95, 8.99, 22.99, 19.95] + y = x.store.book[:][] # All books in the store. + @test length(y) == 3 && eltype(y) == JSON.Object{String, Any} + y = x[~, "book"][1].title[] # The titles of all books in the store. + @test y == ["Sayings of the Century", "Moby Dick", "The Lord of the Rings"] + y = x[~, "book"][1][1][] # The first book in the store. + @test y == Dict("category" => "reference", "author" => "Nigel Rees", "title" => "Sayings of the Century", "price" => 8.95) + y = x[~, "book"][1][1].author[] # The author of the first book in the store. + @test y == "Nigel Rees" + # @test_throws ArgumentError x[~, "book"][1].author[~] + y = x[~, "book"][1][:, (i, z) -> z.author[] == "J.R.R. Tolkien"].title[] # The titles of all books by J.R.R. Tolkien + @test y == ["The Lord of the Rings"] + y = x[~, :][] # All properties of the root object flattened in one list/array + @test length(y) == 17 + @test_throws KeyError x.foo + @test_throws KeyError x.store.book[100] + list = x.store.book[:] + @test eltype(list) == Any + @test isassigned(list, 1) + @test list[:] === list + @test length(list[[1, 3]]) == 2 + # test that we correctly skip over all kinds of values + json = """ + { + "a": 1, + "a1": 3.14, + "a2": 100000000000000000000000, + "a3": 170141183460469231731687303715884105728, + "a4": 1.7976931348623157e310, + "b": null, + "c": true, + "d": false, + "e": "hey there sailor", + "f": [], + "g": {}, + "h": [1, 2, 3], + "i": {"a": 1, "b": 2}, + "j": [1, {"a": 1, "b": 2}, 3], + "k": {"a": 1, "b": [1, 2, 3]}, + "l": [1, {"a": 1, "b": [1, 2, 3]}, 3], + "m": {"a": 1, "b": {"a": 1, "b": 2}}, + "n": [1, {"a": 1, "b": {"a": 1, "b": 2}}, 3], + "o": {"a": 1, "b": {"a": 1, "b": [1, 2, 3]}}, + "p": [1, {"a": 1, "b": {"a": 1, "b": [1, 2, 3]}}, 3], + "q": {"a": 1, "b": {"a": 1, "b": {"a": 1, "b": 2}}}, + "r": [1, {"a": 1, "b": {"a": 1, "b": {"a": 1, "b": 2}}}, 3], + "s": {"a": 1, "b": {"a": 1, "b": {"a": 1, "b": [1, 2, 3]}}}, + "t": [1, {"a": 1, "b": {"a": 1, "b": {"a": 1, "b": [1, 2, 3]}}}, 3], + "z": 602 + } + """ + x = JSON.lazy(json) + @test x.z[] == 602 + @test JSON.isvalidjson(x) + json = """ + [ + { + "a": [1, 2, 3] + }, + { + "a": [1, 2, 3] + } + ] + """ + x = JSON.lazy(json) + @test x[~, "a"][] == [[1, 2, 3], [1, 2, 3]] + @test x[:].a[] == [[1, 2, 3], [1, 2, 3]] + @test JSON.isvalidjson(x) +end diff --git a/test/lowering.jl b/test/lowering.jl deleted file mode 100644 index 81dba7d6..00000000 --- a/test/lowering.jl +++ /dev/null @@ -1,46 +0,0 @@ -module TestLowering - -using JSON -using Test -using Dates -using FixedPointNumbers: Fixed - -@test JSON.json(Date(2016, 8, 3)) == "\"2016-08-03\"" - -@test JSON.json(:x) == "\"x\"" -@test_throws ArgumentError JSON.json(Base) - -struct Type151{T} - x::T -end - -@test JSON.parse(JSON.json(Type151)) == string(Type151) - -JSON.lower(v::Type151{T}) where {T} = Dict(:type => T, :value => v.x) -@test JSON.parse(JSON.json(Type151(1.0))) == Dict( - "type" => "Float64", - "value" => 1.0) - -fixednum = Fixed{Int16, 15}(0.1234) -@test JSON.parse(JSON.json(fixednum)) == convert(Float64, fixednum) - -# test that the default string-serialization of enums can be overridden by -# `lower` if needed -@enum Fruit apple orange banana -JSON.lower(x::Fruit) = string("Fruit: ", x) -@test JSON.json(apple) == "\"Fruit: apple\"" - -@enum Vegetable carrot tomato potato -JSON.lower(x::Vegetable) = Dict(string(x) => Int(x)) -@test JSON.json(potato) == "{\"potato\":2}" - -# test that the default lowering for compound types can be overridden by `propertynames` and -# `getproperty` if needed -struct Type152 - x::Int -end -Base.propertynames(v::Type152) = (:type, fieldnames(Type152)...) -Base.getproperty(v::Type152, s::Symbol) = s == :type ? :Type152 : getfield(v, s) -@test JSON.json(Type152(152)) == "{\"type\":\"Type152\",\"x\":152}" - -end diff --git a/test/object.jl b/test/object.jl new file mode 100644 index 00000000..cd166a64 --- /dev/null +++ b/test/object.jl @@ -0,0 +1,342 @@ +using JSON, Test + +@testset "JSON.Object Tests" begin + # Test empty JSON.Object + @testset "Empty Object" begin + obj = JSON.Object{String, Int}() + @test isempty(obj) + @test length(obj) == 0 + @test collect(obj) == [] + @test propertynames(obj) == () + @test get(obj, "key", nothing) === nothing + @test get(() -> 42, obj, "key") == 42 + @test_throws KeyError obj["key"] + @test isempty(empty(obj)) + @test_throws KeyError obj.key + @test !haskey(obj, "key") + @test length(obj) == length(delete!(obj, "key")) + @test isempty(JSON.Object()) + end + + # Test JSON.Object with one entry + @testset "Single Entry Object" begin + obj = JSON.Object{String, Int}() + # internal way to add a key-value pair + JSON.Object{String, Int}(obj, "key", 42) + @test !isempty(obj) + @test length(obj) == 1 + @test collect(obj) == ["key" => 42] + @test propertynames(obj) == (:key,) + @test get(obj, "key", nothing) == 42 + @test obj["key"] == 42 + @test get(() -> 0, obj, "key") == 42 + @test_throws KeyError obj["nonexistent_key"] + # Object with String or Symbol key supports getproperty + @test obj.key == 42 + @test haskey(obj, "key") + @test !haskey(obj, "nonexistent_key") + # test setindex! and delete! + obj["key"] = 100 + @test obj["key"] == 100 + delete!(obj, "key") + @test isempty(obj) + obj.key = 200 + @test obj.key == 200 + end + + # Test JSON.Object with multiple entries + @testset "Multiple Entry Object" begin + obj = JSON.Object{String, Int}() + ch = JSON.Object{String, Int}(obj, "key1", 1) + ch = JSON.Object{String, Int}(ch, "key2", 2) + ch = JSON.Object{String, Int}(ch, "key3", 3) + @test !isempty(obj) + @test length(obj) == 3 + @test propertynames(obj) == (:key1, :key2, :key3) + @test collect(obj) == ["key1" => 1, "key2" => 2, "key3" => 3] + @test get(obj, "key2", nothing) == 2 + @test obj["key3"] == 3 + obj["key3"] = 100 + @test obj["key3"] == 100 + delete!(obj, "key2") + @test length(obj) == 2 + @test collect(obj) == ["key1" => 1, "key3" => 100] + end + + # Test iteration over keys, values, and pairs + @testset "Iteration" begin + obj = JSON.Object{String, Int}() + obj["a"] = 1 + obj["b"] = 2 + obj["c"] = 3 + + @test collect(keys(obj)) == ["a", "b", "c"] + @test collect(values(obj)) == [1, 2, 3] + @test collect(pairs(obj)) == ["a" => 1, "b" => 2, "c" => 3] + end + + # Test membership + @testset "Membership" begin + obj = JSON.Object{String, Int}() + obj["x"] = 10 + @test haskey(obj, "x") + @test !haskey(obj, "y") + @test "x" in keys(obj) + @test 10 in values(obj) + end + + # Test modification + @testset "Modification" begin + obj = JSON.Object{String, Int}() + obj["key1"] = 100 + obj["key2"] = 200 + merge!(obj, Dict("key3" => 300, "key4" => 400)) + @test length(obj) == 4 + @test obj["key3"] == 300 + empty!(obj) + @test isempty(obj) + end + + # Test copying + @testset "Copying" begin + obj = JSON.Object{String, Int}() + obj["a"] = 1 + obj["b"] = 2 + obj_copy = copy(obj) + @test obj == obj_copy + obj["a"] = 10 + @test obj != obj_copy + end + + # Test equality + @testset "Equality" begin + obj1 = JSON.Object{String, Int}() + obj2 = JSON.Object{String, Int}() + obj1["key"] = 42 + obj2["key"] = 42 + @test obj1 == obj2 + obj2["key"] = 100 + @test obj1 != obj2 + end + + # Test edge cases + @testset "Edge Cases" begin + obj = JSON.Object{Union{String, Nothing}, Int}() + obj["key"] = 1 + obj[nothing] = 2 + @test obj["key"] == 1 + @test obj[nothing] == 2 + obj["key"] = 100 + @test obj["key"] == 100 + end + + # Test serialization + @testset "Serialization" begin + obj = JSON.Object{String, Int}() + obj["a"] = 1 + obj["b"] = 2 + dict = Dict(obj) + @test dict == Dict("a" => 1, "b" => 2) + obj2 = JSON.Object{String, Int}(dict) + @test obj == obj2 + end + + # constructors + @testset "Constructors" begin + obj = JSON.Object(Dict("a" => 1, "b" => 2)) + @test obj["a"] == 1 + @test obj["b"] == 2 + obj2 = JSON.Object("a" => 1, "b" => 2) + @test obj2["a"] == 1 + @test obj2["b"] == 2 + obj3 = JSON.Object(:a => 1, :b => 2) + @test obj3[:a] == 1 + @test obj3[:b] == 2 + obj = JSON.Object{Symbol, Int}() + obj[:a] = 1 + @test obj.a == 1 + obj.a = 2 + @test obj[:a] == 2 + end + + # Test performance (basic check for large dictionaries) + @testset "Performance" begin + obj = JSON.Object{Int, Int}() + for i in 1:10_000 + obj[i] = i + end + @test length(obj) == 10_000 + @test obj[5_000] == 5_000 + end + + # Test new iterator constructors + @testset "Iterator Constructors" begin + # Test generic Object(itr) constructor with Pairs + pairs = ["a" => 1, "b" => 2, "c" => 3] + obj1 = JSON.Object(pairs) + @test obj1["a"] == 1 + @test obj1["b"] == 2 + @test obj1["c"] == 3 + @test length(obj1) == 3 + + # Test generic Object(itr) constructor with Tuples + tuples = [("x", 10), ("y", 20), ("z", 30)] + obj2 = JSON.Object(tuples) + @test obj2["x"] == 10 + @test obj2["y"] == 20 + @test obj2["z"] == 30 + @test length(obj2) == 3 + + # Test generic Object(itr) constructor with consistent types + symbols = [:a => 1, :b => 2, :c => 3] + obj3 = JSON.Object(symbols) + @test obj3[:a] == 1 + @test obj3[:b] == 2 + @test obj3[:c] == 3 + @test length(obj3) == 3 + + # Test that mixed types require explicit Any type specification + mixed = [:a => 1, :b => "hello", :c => 3.14] + @test_throws MethodError JSON.Object(mixed) + + # But works with explicit Any type + obj3_any = JSON.Object{Symbol,Any}(mixed) + @test obj3_any[:a] == 1 + @test obj3_any[:b] == "hello" + @test obj3_any[:c] == 3.14 + @test length(obj3_any) == 3 + + # Test empty iterator + empty_iter = Pair{String, Int}[] + obj4 = JSON.Object(empty_iter) + @test isempty(obj4) + @test length(obj4) == 0 + + # Test typed Object{K,V}(itr) constructor + typed_pairs = ["key1" => 100, "key2" => 200] + obj5 = JSON.Object{String, Int}(typed_pairs) + @test obj5["key1"] == 100 + @test obj5["key2"] == 200 + @test length(obj5) == 2 + + # Test typed constructor with tuples + typed_tuples = [("a", 1), ("b", 2)] + obj6 = JSON.Object{String, Int}(typed_tuples) + @test obj6["a"] == 1 + @test obj6["b"] == 2 + @test length(obj6) == 2 + + # Test error handling for invalid iterator elements + invalid_iter = [1, 2, 3] # Not pairs or tuples + @test_throws ArgumentError JSON.Object(invalid_iter) + @test_throws ArgumentError JSON.Object{String, Int}(invalid_iter) + + # Test with generator expression + gen_pairs = (string(i) => i^2 for i in 1:5) + obj7 = JSON.Object(gen_pairs) + @test obj7["1"] == 1 + @test obj7["3"] == 9 + @test obj7["5"] == 25 + @test length(obj7) == 5 + + # Test with Dict as iterator + dict_input = Dict("foo" => 42, "bar" => 24) + obj8 = JSON.Object(dict_input) + @test obj8["foo"] == 42 + @test obj8["bar"] == 24 + @test length(obj8) == 2 + end + + # Test enhanced haskey for String objects with Symbol keys + @testset "Enhanced haskey for String Objects" begin + obj = JSON.Object{String, Any}() + obj["hello"] = "world" + obj["count"] = 42 + + # Test basic string key lookup + @test haskey(obj, "hello") + @test haskey(obj, "count") + @test !haskey(obj, "missing") + + # Test Symbol key lookup (should convert to String) + @test haskey(obj, :hello) + @test haskey(obj, :count) + @test !haskey(obj, :missing) + + # Test that Symbol keys work for both existing and non-existing keys + obj["symbol_test"] = "value" + @test haskey(obj, :symbol_test) + @test !haskey(obj, :nonexistent_symbol) + + # Test with empty object + empty_obj = JSON.Object{String, Any}() + @test !haskey(empty_obj, :anything) + @test !haskey(empty_obj, "anything") + end + + # Test merge functionality with NamedTuple + @testset "NamedTuple Merge" begin + # Test basic merge + nt = (a = 1, b = 2, c = 3) + obj = JSON.Object{String, Any}() + obj["x"] = 10 + obj["y"] = 20 + + merged = merge(nt, obj) + @test merged.a == 1 + @test merged.b == 2 + @test merged.c == 3 + @test merged.x == 10 + @test merged.y == 20 + + # Test merge with overlapping keys (Object values should override NamedTuple) + nt2 = (a = 100, d = 400) + obj2 = JSON.Object{String, Any}() + obj2["a"] = 999 # This should override the NamedTuple value + obj2["b"] = 200 + + merged2 = merge(nt2, obj2) + @test merged2.a == 999 # Object value overrides NamedTuple + @test merged2.d == 400 # NamedTuple value preserved + @test merged2.b == 200 # Object value added + + # Test merge with empty NamedTuple + empty_nt = NamedTuple() + obj3 = JSON.Object{String, Any}() + obj3["key"] = "value" + + merged3 = merge(empty_nt, obj3) + @test merged3.key == "value" + @test length(merged3) == 1 + + # Test merge with empty Object + nt4 = (x = 1, y = 2) + empty_obj = JSON.Object{String, Any}() + + merged4 = merge(nt4, empty_obj) + @test merged4.x == 1 + @test merged4.y == 2 + @test length(merged4) == 2 + + # Test that the result is a NamedTuple + result = merge((a = 1,), JSON.Object{String, Any}("b" => 2)) + @test result isa NamedTuple + @test haskey(result, :a) + @test haskey(result, :b) + + # Test with various value types + obj_mixed = JSON.Object{String, Any}() + obj_mixed["string"] = "hello" + obj_mixed["number"] = 42 + obj_mixed["float"] = 3.14 + obj_mixed["bool"] = true + + nt_mixed = (existing = "original",) + merged_mixed = merge(nt_mixed, obj_mixed) + @test merged_mixed.existing == "original" + @test merged_mixed.string == "hello" + @test merged_mixed.number == 42 + @test merged_mixed.float == 3.14 + @test merged_mixed.bool == true + end +end \ No newline at end of file diff --git a/test/parse.jl b/test/parse.jl new file mode 100644 index 00000000..8dd9b46a --- /dev/null +++ b/test/parse.jl @@ -0,0 +1,754 @@ +using JSON, StructUtils, UUIDs, Dates, Test + +struct CustomJSONStyle <: JSON.JSONStyle end + +struct A + a::Int + b::Int + c::Int + d::Int +end + +@noarg mutable struct B + a::Int + b::Int + c::Int + d::Int +end + +struct C +end + +struct D + a::Int + b::Float64 + c::String +end + +struct LotsOfFields + x1::String + x2::String + x3::String + x4::String + x5::String + x6::String + x7::String + x8::String + x9::String + x10::String + x11::String + x12::String + x13::String + x14::String + x15::String + x16::String + x17::String + x18::String + x19::String + x20::String + x21::String + x22::String + x23::String + x24::String + x25::String + x26::String + x27::String + x28::String + x29::String + x30::String + x31::String + x32::String + x33::String + x34::String + x35::String +end + +struct Wrapper + x::NamedTuple{(:a, :b), Tuple{Int, String}} +end + +@noarg mutable struct UndefGuy + id::Int + name::String +end + +struct E + id::Int + a::A +end + +@kwarg struct F + id::Int + rate::Float64 + name::String +end + +@kwarg struct G + id::Int + rate::Float64 + name::String + f::F +end + +struct H + id::Int + name::String + properties::Dict{String, Any} + addresses::Vector{String} +end + +@enum Fruit apple banana + +struct I + id::Int + name::String + fruit::Fruit +end + +abstract type Vehicle end + +struct Car <: Vehicle + type::String + make::String + model::String + seatingCapacity::Int + topSpeed::Float64 +end + +struct Truck <: Vehicle + type::String + make::String + model::String + payloadCapacity::Float64 +end + +struct J + id::Union{Int, Nothing} + name::Union{String, Nothing} + rate::Union{Int64, Float64} +end + +struct K + id::Int + value::Union{Float64, Missing} +end + +@kwarg struct System + duration::Real = 0 # mandatory + cwd::Union{Nothing, String} = nothing + environment::Union{Nothing, Dict} = nothing + batch::Union{Nothing, Dict} = nothing + shell::Union{Nothing, Dict} = nothing +end + +StructUtils.@defaults struct L + id::Int + first_name::String &(json=(name=:firstName,),) + rate::Float64 = 33.3 +end + +StructUtils.@tags struct ThreeDates + date::Date &(json=(dateformat=dateformat"yyyy_mm_dd",),) + datetime::DateTime &(json=(dateformat=dateformat"yyyy/mm/dd HH:MM:SS",),) + time::Time &(json=(dateformat=dateformat"HH/MM/SS",),) +end + +struct M + id::Int + value::Union{Nothing,K} +end + +struct Recurs + id::Int + value::Union{Nothing,Recurs} +end + +struct N + id::Int + uuid::UUID +end + +struct O + id::Int + name::Union{I,L,Missing,Nothing} +end + +struct Point + x::Int + y::Int +end + +@defaults struct P + num::Int64 + foo::String = "bar" +end + +# example from JSON.parse docstring +abstract type AbstractMonster end + +struct Dracula <: AbstractMonster + num_victims::Int +end + +JSON.lower(x::Dracula) = (type="vampire", num_victims=x.num_victims) + +struct Werewolf <: AbstractMonster + witching_hour::DateTime +end + +JSON.lower(x::Werewolf) = (type="werewolf", witching_hour=x.witching_hour) + +JSON.@choosetype AbstractMonster x -> x.monster_type[] == "vampire" ? Dracula : Werewolf + +struct Percent <: Number + value::Float64 +end + +JSON.lift(::Type{Percent}, x) = Percent(Float64(x)) +StructUtils.liftkey(::Type{Percent}, x::String) = Percent(parse(Float64, x)) + +@defaults struct FrankenStruct + id::Int = 0 + name::String = "Jim" + address::Union{Nothing, String} = nothing + rate::Union{Missing, Float64} = missing + type::Symbol = :a &(json=(name="franken_type",),) + notsure::Any = nothing + monster::AbstractMonster = Dracula(0) + percent::Percent = Percent(0.0) + birthdate::Date = Date(0) &(json=(dateformat="yyyy/mm/dd",),) + percentages::Dict{Percent, Int} = Dict{Percent, Int}() + json_properties::JSONText = JSONText("") + matrix::Matrix{Float64} = Matrix{Float64}(undef, 0, 0) +end + +@tags struct Q + id::Int + any::Any &(choosetype=x -> x.type[] == "int" ? @NamedTuple{type::String, value::Int} : x.type[] == "float" ? @NamedTuple{type::String, value::Float64} : @NamedTuple{type::String, value::String},) +end + +@testset "JSON.parse" begin + @testset "errors" begin + # Unexpected character in array + @test_throws ArgumentError JSON.lazy("[1,2,3/4,5,6,7]")[] + # Unexpected character in object + @test_throws ArgumentError JSON.lazy("{\"1\":2, \"2\":3 _ \"4\":5}")[] + # Invalid escaped character + @test_throws ArgumentError JSON.lazy("[\"alpha\\α\"]")[] + # Invalid 'simple' and 'unknown value' + @test_throws ArgumentError JSON.lazy("[tXXe]")[] + @test_throws ArgumentError JSON.lazy("[fail]")[] + @test_throws ArgumentError JSON.lazy("∞")[] + # Invalid number + @test_throws ArgumentError JSON.lazy("[5,2,-]")[] + @test_throws ArgumentError JSON.lazy("[5,2,+β]")[] + # Incomplete escape + @test_throws ArgumentError JSON.lazy("\"\\")[] + @test_throws ArgumentError JSON.lazy("[\"🍕\"_\"🍕\"")[] + # incomplete surrogate pair *doesn't* throw, but resulting string is invalid utf8 + # https://github.com/JuliaIO/JSON.jl/issues/232 + x = JSON.parse("{\"id\":\"5\",\"name\":\"IllegalUnicodehalf-surrogateU+D800\",\"url\":\"http://www.example.com/#\\\\\\ud800\\\\\\u597D\",\"expect_url\":\"http://www.example.com/#\\\\\\uFFFD\\\\\\u597D\"}") + @test !isvalid(x.url) + @test x.url == "http://www.example.com/#\\\ud8000\\好" + end # @testset "errors" + + # JSON.jl pre-1.0 compat + x = JSON.parse("{}") + @test isempty(x) && typeof(x) == JSON.Object{String, Any} + x = JSON.parsefile(makefile("empty_object.json", "{}")) + @test isempty(x) && typeof(x) == JSON.Object{String, Any} + x = JSON.parsefile(makefile("empty_object.json", "{}"), Any) + @test isempty(x) && typeof(x) == JSON.Object{String, Any} + x = Dict{String, Any}() + JSON.parsefile!(makefile("empty_object.json", "{}"), x) + @test isempty(x) + io = IOBuffer() + write(io, "{}") + seekstart(io) + x = JSON.parse(io) + @test isempty(x) && typeof(x) == JSON.Object{String, Any} + seekstart(io) + x = Dict{String, Any}() + JSON.parse!(io, x) + @test isempty(x) + seekstart(io) + @test JSON.isvalidjson(io) + open(makefile("empty_object.json", "{}"), "r") do io + x = Dict{String, Any}() + JSON.parse!(io, x) + @test isempty(x) + end + open(makefile("empty_object.json", "{}"), "r") do io + @test JSON.isvalidjson(io) + end + @test JSON.isvalidjson("{}") + x = JSON.parse("{}") + @test isempty(x) && typeof(x) == JSON.Object{String, Any} + @test_throws ArgumentError JSON.parse(JSON.LazyValue(".", 1, JSON.JSONTypes.OBJECT, JSON.LazyOptions(), true)) + x = JSON.lazy("1") + @test_throws ArgumentError JSON.StructUtils.applyeach((k, v) -> nothing, x) + x = JSON.parse("{\"a\": 1}") + @test !isempty(x) && x["a"] == 1 && typeof(x) == JSON.Object{String, Any} + x = JSON.parse("{\"a\": 1, \"b\": null, \"c\": true, \"d\": false, \"e\": \"\", \"f\": [], \"g\": {}}") + @test !isempty(x) && x["a"] == 1 && x["b"] === nothing && x["c"] === true && x["d"] === false && x["e"] == "" && x["f"] == Any[] && x["g"] == JSON.Object{String, Any}() + # custom dicttype + x = JSON.parse("{\"a\": 1, \"b\": null, \"c\": true, \"d\": false, \"e\": \"\", \"f\": [], \"g\": {}}"; dicttype=Dict{String, Any}) + # test that x isa Dict and nested x.g is also a Dict + @test x isa Dict{String, Any} && !isempty(x) && x["a"] == 1 && x["b"] === nothing && x["c"] === true && x["d"] === false && x["e"] == "" && x["f"] == Any[] && x["g"] == Dict{String, Any}() && typeof(x["g"]) == Dict{String, Any} + # alternative key types + x = JSON.parse("{\"a\": 1, \"b\": null}"; dicttype=JSON.Object{Symbol, Any}) + @test x isa JSON.Object{Symbol, Any} && !isempty(x) && x[:a] == 1 && x[:b] === nothing + x = JSON.parse("{\"apple\": 1, \"banana\": null}"; dicttype=JSON.Object{Fruit, Any}) + @test x isa JSON.Object{Fruit, Any} && !isempty(x) && x[apple] == 1 && x[banana] === nothing + x = JSON.parse("[]") + @test isempty(x) && x == Any[] + x = JSON.parse("[1, null, true, false, \"\", [], {}]") + @test !isempty(x) && x[1] == 1 && x[2] === nothing && x[3] === true && x[4] === false && x[5] == "" && x[6] == Any[] && x[7] == JSON.Object{String, Any}() + x = JSON.parse("1") + @test x == 1 + x = JSON.parse("true") + @test x === true + x = JSON.parse("false") + @test x === false + x = JSON.parse("null") + @test x === nothing + x = JSON.parse("\"\"") + @test x == "" + x = JSON.parse("\"a\"") + @test x == "a" + x = JSON.parse("\"\\\"\"") + @test x == "\"" + x = JSON.parse("\"\\\\\"") + @test x == "\\" + x = JSON.parse("\"\\/\"") + @test x == "/" + x = JSON.parse("\"\\b\"") + @test x == "\b" + x = JSON.parse("\"\\f\"") + @test x == "\f" + x = JSON.parse("\"\\n\"") + @test x == "\n" + x = JSON.parse("\"\\r\"") + @test x == "\r" + x = JSON.parse("\"\\t\"") + @test x == "\t" + x = JSON.parse("\"\\u0000\"") + @test x == "\0" + x = JSON.parse("\"\\uD83D\\uDE00\"") + @test x == "😀" + x = JSON.parse("\"\\u0061\"") + @test x == "a" + x = JSON.parse("\"\\u2028\"") + @test x == "\u2028" + x = JSON.parse("\"\\u2029\"") + @test x == "\u2029" + @test_throws ArgumentError JSON.parse("nula") + @test_throws ArgumentError JSON.parse("nul") + @test_throws ArgumentError JSON.parse("trub") + # allownan for parsing normally invalid json values + @test JSON.parse("NaN"; allownan=true) === NaN + @test JSON.parse("Inf"; inf="Inf", allownan=true) === Inf + # jsonlines support + @test JSON.parse("1"; jsonlines=true) == [1] + @test JSON.parse("1 \t"; jsonlines=true) == [1] + @test JSON.parse("1 \t\r"; jsonlines=true) == [1] + @test JSON.parse("1 \t\r\n"; jsonlines=true) == [1] + @test JSON.parse("1 \t\r\nnull"; jsonlines=true) == [1, nothing] + @test JSON.lazy("1\nnull"; jsonlines=true)[] == [1, nothing] + @test JSON.parse("1\n\n2\n\n"; jsonlines=true) == [1, 2] + # auto-detected jsonlines + @test JSON.parsefile(makefile("jsonlines.jsonl", "1\n2\n3\n4")) == [1, 2, 3, 4] + # missing newline + @test_throws ArgumentError JSON.parse("1 \t\bnull"; jsonlines=true) + @test_throws ArgumentError JSON.parse(""; jsonlines=true) + @test JSON.parse("1\n2\n3\n4"; jsonlines=true) == [1, 2, 3, 4] + @test JSON.parse("[1]\n[2]\n[3]\n[4]"; jsonlines=true) == [[1], [2], [3], [4]] + @test JSON.parse("{\"a\": 1}\n{\"b\": 2}\n{\"c\": 3}\n{\"d\": 4}"; jsonlines=true) == [Dict("a" => 1), Dict("b" => 2), Dict("c" => 3), Dict("d" => 4)] + @test JSON.parse(""" + ["Name", "Session", "Score", "Completed"] + ["Gilbert", "2013", 24, true] + ["Alexa", "2013", 29, true] + ["May", "2012B", 14, false] + ["Deloise", "2012A", 19, true] + """; jsonlines=true, allownan=true) == + [["Name", "Session", "Score", "Completed"], + ["Gilbert", "2013", 24, true], + ["Alexa", "2013", 29, true], + ["May", "2012B", 14, false], + ["Deloise", "2012A", 19, true]] + @test JSON.parse(""" + {"name": "Gilbert", "wins": [["straight", "7♣"], ["one pair", "10♥"]]} + {"name": "Alexa", "wins": [["two pair", "4♠"], ["two pair", "9♠"]]} + {"name": "May", "wins": []} + {"name": "Deloise", "wins": [["three of a kind", "5♣"]]} + """; jsonlines=true) == + [Dict("name" => "Gilbert", "wins" => [["straight", "7♣"], ["one pair", "10♥"]]), + Dict("name" => "Alexa", "wins" => [["two pair", "4♠"], ["two pair", "9♠"]]), + Dict("name" => "May", "wins" => []), + Dict("name" => "Deloise", "wins" => [["three of a kind", "5♣"]])] + + @test_throws ArgumentError JSON.parse("{\"a\" 1}") + @test_throws ArgumentError JSON.parse("123a") + @test_throws ArgumentError JSON.parse("123.4a") + @test_throws ArgumentError JSON.parse("[1]e") + @test_throws ArgumentError JSON.parse("\"abc\"e+") + @test_throws ArgumentError JSON.parse("1a\n2\n3"; jsonlines=true) + @test_throws ArgumentError JSON.parse("1\n2\n3a"; jsonlines=true) + @test_throws ArgumentError JSON.parse(" 123a") + + @testset "Number parsing" begin + @test JSON.parse("1") === Int64(1) + @test JSON.parse("1 ") === Int64(1) + @test JSON.parse("-1") === Int64(-1) + @test_throws ArgumentError JSON.parse("1.") + @test_throws ArgumentError JSON.parse("-1.") + @test_throws ArgumentError JSON.parse("-1. ") + @test JSON.parse("1.1") === 1.1 + @test JSON.parse("1e1") === 10.0 + @test JSON.parse("1E23") === 1e23 + # @test JSON.parse("1f23") === 1f23 + # @test JSON.parse("1F23") === 1f23 + @test JSON.parse("100000000000000000000000") == 100000000000000000000000 + for T in (Int8, Int16, Int32, Int64, Int128) + @test JSON.parse(string(T(1))) == T(1) + @test JSON.parse(string(T(-1))) == T(-1) + end + + @test JSON.parse("428.0E+03") === 428e3 + @test JSON.parse("1e+1") === 10.0 + @test JSON.parse("1e-1") === 0.1 + @test JSON.parse("1.1e1") === 11.0 + @test JSON.parse("1.1e+1") === 11.0 + @test JSON.parse("1.1e-1") === 0.11 + @test JSON.parse("1.1e-01") === 0.11 + @test JSON.parse("1.1e-001") === 0.11 + @test JSON.parse("1.1e-0001") === 0.11 + @test JSON.parse("9223372036854775797") === 9223372036854775797 + @test JSON.parse("9223372036854775798") === 9223372036854775798 + @test JSON.parse("9223372036854775799") === 9223372036854775799 + @test JSON.parse("9223372036854775800") === 9223372036854775800 + @test JSON.parse("9223372036854775801") === 9223372036854775801 + @test JSON.parse("9223372036854775802") === 9223372036854775802 + @test JSON.parse("9223372036854775803") === 9223372036854775803 + @test JSON.parse("9223372036854775804") === 9223372036854775804 + @test JSON.parse("9223372036854775805") === 9223372036854775805 + @test JSON.parse("9223372036854775806") === 9223372036854775806 + @test JSON.parse("9223372036854775807") === 9223372036854775807 + # promote to BigInt + x = JSON.parse("9223372036854775808") + # only == here because BigInt don't compare w/ === + @test x isa BigInt && x == 9223372036854775808 + x = JSON.parse("170141183460469231731687303715884105727") + @test x isa BigInt && x == 170141183460469231731687303715884105727 + x = JSON.parse("170141183460469231731687303715884105728") + @test x isa BigInt && x == 170141183460469231731687303715884105728 + # BigFloat + @test JSON.parse("1.7976931348623157e310") == big"1.7976931348623157e310" + + # zeros + @test JSON.parse("0") === Int64(0) + @test JSON.parse("0e0") === 0.0 + @test JSON.parse("-0e0") === -0.0 + @test JSON.parse("0e-0") === 0.0 + @test JSON.parse("-0e-0") === -0.0 + @test JSON.parse("0e+0") === 0.0 + @test JSON.parse("-0e+0") === -0.0 + @test JSON.parse("0e+01234567890123456789") == big"0.0" + @test JSON.parse("0.00e-01234567890123456789") == big"0.0" + @test JSON.parse("-0e+01234567890123456789") == big"0.0" + @test JSON.parse("-0.00e-01234567890123456789") == big"0.0" + @test JSON.parse("0e291") === 0.0 + @test JSON.parse("0e292") === 0.0 + @test JSON.parse("0e347") == big"0.0" + @test JSON.parse("0e348") == big"0.0" + @test JSON.parse("-0e291") === 0.0 + @test JSON.parse("-0e292") === 0.0 + @test JSON.parse("-0e347") == big"0.0" + @test JSON.parse("-0e348") == big"0.0" + @test JSON.parse("2e-324") === 0.0 + # extremes + @test JSON.parse("1e310") == big"1e310" + @test JSON.parse("-1e310") == big"-1e310" + @test JSON.parse("1e-305") === 1e-305 + @test JSON.parse("1e-306") === 1e-306 + @test JSON.parse("1e-307") === 1e-307 + @test JSON.parse("1e-308") === 1e-308 + @test JSON.parse("1e-309") === 1e-309 + @test JSON.parse("1e-310") === 1e-310 + @test JSON.parse("1e-322") === 1e-322 + @test JSON.parse("5e-324") === 5e-324 + @test JSON.parse("4e-324") === 5e-324 + @test JSON.parse("3e-324") === 5e-324 + # errors + @test_throws ArgumentError JSON.parse("1e") + @test_throws ArgumentError JSON.parse("1.0ea") + @test_throws ArgumentError JSON.parse("1e+") + @test_throws ArgumentError JSON.parse("1e-") + @test_throws ArgumentError JSON.parse(".") + @test_throws ArgumentError JSON.parse("1.a") + @test_throws ArgumentError JSON.parse("1e1.") + @test_throws ArgumentError JSON.parse("-") + @test_throws ArgumentError JSON.parse("1.1.") + @test_throws ArgumentError JSON.parse("+0e0") + @test_throws ArgumentError JSON.parse("+0e+0") + @test_throws ArgumentError JSON.parse("+0e-0") + @test_throws ArgumentError JSON.parse(".1") + @test_throws ArgumentError JSON.parse("+1") + end + @testset "JSON.parse with types" begin + obj = JSON.parse("""{ "a": 1,"b": 2,"c": 3,"d": 4}""", A) + @test obj == A(1, 2, 3, 4) + # test order doesn't matter + obj2 = JSON.parse("""{ "d": 1,"b": 2,"c": 3,"a": 4}""", A) + @test obj2 == A(4, 2, 3, 1) + # NamedTuple + obj = JSON.parse("""{ "d": 1,"b": 2,"c": 3,"a": 4}""", NamedTuple{(:a, :b, :c, :d), Tuple{Int, Int, Int, Int}}) + @test obj == (a = 4, b = 2, c = 3, d = 1) + @test JSON.parse("{}", C) === C() + # we also support materializing singleton from JSON.json output + @test JSON.parse("\"C()\"", C) === C() + obj = B() + JSON.parse!("""{ "a": 1,"b": 2,"c": 3,"d": 4}""", obj) + @test obj.a == 1 && obj.b == 2 && obj.c == 3 && obj.d == 4 + obj = JSON.parse("""{ "a": 1,"b": 2,"c": 3,"d": 4}""", B) + @test obj.a == 1 && obj.b == 2 && obj.c == 3 && obj.d == 4 + # can materialize json array into struct assuming field order + obj = JSON.parse("""[1, 2, 3, 4]""", A) + @test obj == A(1, 2, 3, 4) + # must be careful though because we don't check that the array is the same length as the struct + @test JSON.parse("""[1, 2, 3, 4, 5]""", A) == A(1, 2, 3, 4) + @test_throws Any JSON.parse("""[1, 2, 3]""", A) + # materialize singleton from empty json array + @test JSON.parse("""[]""", C) == C() + # materialize mutable from json array + obj = JSON.parse("""[1, 2, 3, 4]""", B) + @test obj.a == 1 && obj.b == 2 && obj.c == 3 && obj.d == 4 + obj = B() + JSON.parse!("""[1, 2, 3, 4]""", obj) + @test obj.a == 1 && obj.b == 2 && obj.c == 3 && obj.d == 4 + # materialize kwdef from json array + obj = JSON.parse("""[1, 3.14, "hey there sailor"]""", F) + @test obj == F(1, 3.14, "hey there sailor") + # materialize NamedTuple from json array + obj = JSON.parse("""[1, 3.14, "hey there sailor"]""", NamedTuple{(:id, :rate, :name), Tuple{Int, Float64, String}}) + @test obj == (id = 1, rate = 3.14, name = "hey there sailor") + # materialize Tuple from json array + obj = JSON.parse("""[1, 3.14, "hey there sailor"]""", Tuple{Int, Float64, String}) + @test obj == (1, 3.14, "hey there sailor") + obj = JSON.parse("""{ "a": 1,"b": 2.0,"c": "3"}""", Tuple{Int, Float64, String}) + @test obj == (1, 2.0, "3") + obj = JSON.parse("""{ "a": 1,"b": 2.0,"c": "3"}""", D) + @test obj == D(1, 2.0, "3") + obj = JSON.parse("""{ "x1": "1","x2": "2","x3": "3","x4": "4","x5": "5","x6": "6","x7": "7","x8": "8","x9": "9","x10": "10","x11": "11","x12": "12","x13": "13","x14": "14","x15": "15","x16": "16","x17": "17","x18": "18","x19": "19","x20": "20","x21": "21","x22": "22","x23": "23","x24": "24","x25": "25","x26": "26","x27": "27","x28": "28","x29": "29","x30": "30","x31": "31","x32": "32","x33": "33","x34": "34","x35": "35"}""", LotsOfFields) + @test obj == LotsOfFields("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35") + obj = JSON.parse("""{ "x": {"a": 1, "b": "2"}}""", Wrapper) + @test obj == Wrapper((a=1, b="2")) + obj = JSON.parse!("""{ "id": 1, "name": "2"}""", UndefGuy) + @test obj.id == 1 && obj.name == "2" + obj = JSON.parse!("""{ "id": 1}""", UndefGuy) + @test obj.id == 1 && !isdefined(obj, :name) + obj = JSON.parse("""{ "id": 1, "a": {"a": 1, "b": 2, "c": 3, "d": 4}}""", E) + @test obj == E(1, A(1, 2, 3, 4)) + obj = JSON.parse("""{ "id": 1, "rate": 2.0, "name": "3"}""", F) + @test obj == F(1, 2.0, "3") + obj = JSON.parse("""{ "id": 1, "rate": 2.0, "name": "3", "f": {"id": 1, "rate": 2.0, "name": "3"}}""", G) + @test obj == G(1, 2.0, "3", F(1, 2.0, "3")) + # Dict/Array fields + obj = JSON.parse("""{ "id": 1, "name": "2", "properties": {"a": 1, "b": 2}, "addresses": ["a", "b"]}""", H) + @test obj.id == 1 && obj.name == "2" && obj.properties == Dict("a" => 1, "b" => 2) && obj.addresses == ["a", "b"] + # Enum + @test JSON.parse("\"apple\"", Fruit) == apple + @test JSON.parse("""{"id": 1, "name": "2", "fruit": "banana"} """, I) == I(1, "2", banana) + # abstract type + @test JSON.parse("""{"id": 1, "name": "2", "fruit": "banana"} """, Any) == JSON.Object("id" => 1, "name" => "2", "fruit" => "banana") + @test JSON.parse("""{"id": 1, "f": {"id": 1, "rate": 2.0, "ints": [1, 2, 3]}}""", @NamedTuple{id::Int, f::Any}) == (id = 1, f = JSON.Object("id" => 1, "rate" => 2.0, "ints" => [1, 2, 3])) + + JSON.@choosetype Vehicle x -> x.type[] == "car" ? Car : x.type[] == "truck" ? Truck : throw(ArgumentError("Unknown vehicle type: $(x.type[])")) + + @test JSON.parse("""{"type": "car","make": "Mercedes-Benz","model": "S500","seatingCapacity": 5,"topSpeed": 250.1}""", Vehicle) == Car("car", "Mercedes-Benz", "S500", 5, 250.1) + @test JSON.parse("""{"type": "truck","make": "Isuzu","model": "NQR","payloadCapacity": 7500.5}""", Vehicle) == Truck("truck", "Isuzu", "NQR", 7500.5) + # union + @test JSON.parse("""{"id": 1, "name": "2", "rate": 3}""", J) == J(1, "2", Int64(3)) + @test JSON.parse("""{"id": null, "name": null, "rate": 3.14}""", J) == J(nothing, nothing, 3.14) + # test K + @test JSON.parse("""{"id": 1, "value": null}""", K) == K(1, missing) + # Real + @test JSON.parse("""{"duration": 3600.0}""", System) == System(duration=3600.0) + # struct + jsonlines + for raw in [ + """ + { "a": 1, "b": 3.14, "c": "hey" } + { "a": 2, "b": 6.28, "c": "hi" } + """, + # No newline at end + """ + { "a": 1, "b": 3.14, "c": "hey" } + { "a": 2, "b": 6.28, "c": "hi" }""", + # No newline, extra whitespace at end + """ + { "a": 1, "b": 3.14, "c": "hey" } + { "a": 2, "b": 6.28, "c": "hi" } """, + # Whitespace at start of line + """ + { "a": 1, "b": 3.14, "c": "hey" } + { "a": 2, "b": 6.28, "c": "hi" } + """, + # Extra whitespace at beginning, end of lines, end of string + " { \"a\": 1, \"b\": 3.14, \"c\": \"hey\" } \n" * + " { \"a\": 2, \"b\": 6.28, \"c\": \"hi\" } \n ", + ] + for nl in ("\n", "\r", "\r\n") + jsonl = replace(raw, "\n" => nl) + dss = JSON.parse(jsonl, Vector{D}, jsonlines=true) + @test length(dss) == 2 + @test dss[1].a == 1 + @test dss[1].b == 3.14 + @test dss[1].c == "hey" + @test dss[2].a == 2 + @test dss[2].b == 6.28 + @test dss[2].c == "hi" + end + end + # test L + @test JSON.parse("""{"id": 1, "firstName": "george", "first_name": "harry"}""", L) == L(1, "george", 33.3) + # test Char + @test JSON.parse("\"a\"", Char) == 'a' + @test JSON.parse("\"\u2200\"", Char) == '∀' + @test_throws ArgumentError JSON.parse("\"ab\"", Char) + # test UUID + @test JSON.parse("\"ffffffff-ffff-ffff-ffff-ffffffffffff\"", UUID) == UUID(typemax(UInt128)) + # test Symbol + @test JSON.parse("\"a\"", Symbol) == :a + # test VersionNumber + @test JSON.parse("\"1.2.3\"", VersionNumber) == v"1.2.3" + # test Regex + @test JSON.parse("\"1.2.3\"", Regex) == r"1.2.3" + # test Dates + @test JSON.parse("\"2023-02-23T22:39:02\"", DateTime) == DateTime(2023, 2, 23, 22, 39, 2) + @test JSON.parse("\"2023-02-23\"", Date) == Date(2023, 2, 23) + @test JSON.parse("\"22:39:02\"", Time) == Time(22, 39, 2) + @test JSON.parse("{\"date\":\"2023_02_23\",\"datetime\":\"2023/02/23 12:34:56\",\"time\":\"12/34/56\"}", ThreeDates) == + ThreeDates(Date(2023, 2, 23), DateTime(2023, 2, 23, 12, 34, 56), Time(12, 34, 56)) + # test Array w/ lifted value + @test isequal(JSON.parse("[null,null]", Vector{Missing}), [missing, missing]) + # test Matrix + @test JSON.parse("[[1,3],[2,4]]", Matrix{Int}) == [1 2; 3 4] + @test JSON.parse("{\"a\": [[1,3],[2,4]]}", NamedTuple{(:a,),Tuple{Matrix{Int}}}) == (a=[1 2; 3 4],) + # test Matrix w/ lifted value + @test isequal(JSON.parse("[[null,null],[null,null]]", Matrix{Missing}), [missing missing; missing missing]) + # test lift on Dict values + obj = JSON.parse("""{\"ffffffff-ffff-ffff-ffff-ffffffffffff\": null,\"ffffffff-ffff-ffff-ffff-fffffffffffe\": null}""", Dict{UUID,Missing}) + @test obj[UUID(typemax(UInt128))] === missing + @test obj[UUID(typemax(UInt128) - 0x01)] === missing + # parse! with custom dicttype + obj = Dict{String, Any}() + JSON.parse!("""{"a": {"a": 1, "b": 2}, "b": {"a": 3, "b": 4}}""", obj; dicttype=Dict{String, Any}) + @test obj["a"] == Dict("a" => 1, "b" => 2) + @test obj["b"] == Dict("a" => 3, "b" => 4) + # nested union struct field + @test JSON.parse("""{"id": 1, "value": {"id": 1, "value": null}}""", M) == M(1, K(1, missing)) + # recusrive field materialization + x = JSON.parse("""{ "id": 1, "value": { "id": 2 } }""", Recurs) + @test x == Recurs(1, Recurs(2, nothing)) + # multidimensional arrays + # "[[1.0],[2.0]]" => (1, 2) + m = Matrix{Float64}(undef, 1, 2) + m[1] = 1 + m[2] = 2 + @test JSON.parse("[[1.0],[2.0]]", Matrix{Float64}) == m + # "[[1.0,2.0]]" => (2, 1) + m = Matrix{Float64}(undef, 2, 1) + m[1] = 1 + m[2] = 2 + @test JSON.parse("[[1.0,2.0]]", Matrix{Float64}) == m + # "[[[1.0]],[[2.0]]]" => (1, 1, 2) + m = Array{Float64}(undef, 1, 1, 2) + m[1] = 1 + m[2] = 2 + @test JSON.parse("[[[1.0]],[[2.0]]]", Array{Float64, 3}) == m + # "[[[1.0],[2.0]]]" => (1, 2, 1) + m = Array{Float64}(undef, 1, 2, 1) + m[1] = 1 + m[2] = 2 + @test JSON.parse("[[[1.0],[2.0]]]", Array{Float64, 3}) == m + # "[[[1.0,2.0]]]" => (2, 1, 1) + m = Array{Float64}(undef, 2, 1, 1) + m[1] = 1 + m[2] = 2 + @test JSON.parse("[[[1.0,2.0]]]", Array{Float64, 3}) == m + m = Array{Float64}(undef, 1, 2, 3) + m[1] = 1 + m[2] = 2 + m[3] = 3 + m[4] = 4 + m[5] = 5 + m[6] = 6 + @test JSON.parse("[[[1.0],[2.0]],[[3.0],[4.0]],[[5.0],[6.0]]]", Array{Float64, 3}) == m + # 0-dimensional array + m = Array{Float64,0}(undef) + m[1] = 1.0 + @test JSON.parse("1.0", Array{Float64,0}) == m + # test custom JSONStyle + # StructUtils.lift(::CustomJSONStyle, ::Type{UUID}, x) = UUID(UInt128(x)) + # @test JSON.parse("340282366920938463463374607431768211455", UUID; style=CustomJSONStyle()) == UUID(typemax(UInt128)) + # @test JSON.parse("{\"id\": 0, \"uuid\": 340282366920938463463374607431768211455}", N; style=CustomJSONStyle()) == N(0, UUID(typemax(UInt128))) + # tricky unions + @test JSON.parse("{\"id\":0}", O) == O(0, nothing) + @test JSON.parse("{\"id\":0,\"name\":null}", O) == O(0, missing) + # StructUtils.choosetype(::CustomJSONStyle, ::Type{Union{I,L,Missing,Nothing}}, val) = JSON.gettype(val) == JSON.JSONTypes.NULL ? Missing : hasproperty(val, :fruit) ? I : L + # @test JSON.parse("{\"id\":0,\"name\":{\"id\":1,\"name\":\"jim\",\"fruit\":\"apple\"}}", O; style=CustomJSONStyle()) == O(0, I(1, "jim", apple)) + # @test JSON.parse("{\"id\":0,\"name\":{\"id\":1,\"firstName\":\"jim\",\"rate\":3.14}}", O; style=CustomJSONStyle()) == O(0, L(1, "jim", 3.14)) + + StructUtils.liftkey(::JSON.JSONStyle, ::Type{Point}, x::String) = Point(parse(Int, split(x, "_")[1]), parse(Int, split(x, "_")[2])) + @test JSON.parse("{\"1_2\":\"hi\"}", Dict{Point, String}) == Dict(Point(1, 2) => "hi") + # https://github.com/quinnj/JSON3.jl/issues/138 + @test JSON.parse("""{"num": 42}""", P) == P(42, "bar") + end + x = JSON.parse("[1,2,3]", JSONText) + @test x == JSONText("[1,2,3]") + # frankenstruct + json = """ + { + "id": 1, + "address": "123 Main St", + "rate": null, + "franken_type": "b", + "notsure": {"key": "value"}, + "monster": { + "monster_type": "vampire", + "num_victims": 10 + }, + "percent": 0.1, + "birthdate": "2023/10/01", + "percentages": { + "0.1": 1, + "0.2": 2 + }, + "json_properties": {"key": "value"}, + "matrix": [[1.0, 2.0], [3.0, 4.0]], + "extra_key": "extra_value" + } + """ + fr = JSON.parse(json, FrankenStruct) + # FrankenStruct(1, "Jim", "123 Main St", missing, :b, JSON.Object{String, Any}("key" => "value"), Dracula(10), Percent(0.1), Date("2023-10-01"), Dict{Percent, Int64}(Percent(0.2) => 2, Percent(0.1) => 1), JSONText("{\"key\": \"value\"}"), [1.0 3.0; 2.0 4.0]) + @test fr.id == 1 + @test fr.name == "Jim" + @test fr.address == "123 Main St" + @test fr.rate === missing + @test fr.type == :b + @test fr.notsure == JSON.Object{String, Any}("key" => "value") + @test fr.monster == Dracula(10) + @test fr.percent == Percent(0.1) + @test fr.birthdate == Date("2023-10-01") + @test fr.percentages == Dict(Percent(0.2) => 2, Percent(0.1) => 1) + @test fr.json_properties == JSONText("{\"key\": \"value\"}") + @test fr.matrix == [1.0 3.0; 2.0 4.0] + # test custom JSONStyle overload + JSON.lift(::CustomJSONStyle, ::Type{Rational}, x) = Rational(x.num[], x.den[]) + @test JSON.parse("{\"num\": 1,\"den\":3}", Rational; style=CustomJSONStyle()) == 1//3 + @test isequal(JSON.parse("{\"num\": 1,\"den\":null}", @NamedTuple{num::Int, den::Union{Int, Missing}}; null=missing, style=StructUtils.DefaultStyle()), (num=1, den=missing)) + # choosetype field tag on Any struct field + @test JSON.parse("{\"id\":1,\"any\":{\"type\":\"int\",\"value\":10}}", Q) == Q(1, (type="int", value=10)) + @test JSON.parse("{\"id\":1,\"any\":{\"type\":\"float\",\"value\":3.14}}", Q) == Q(1, (type="float", value=3.14)) + @test JSON.parse("{\"id\":1,\"any\":{\"type\":\"string\",\"value\":\"hi\"}}", Q) == Q(1, (type="string", value="hi")) +end diff --git a/test/parser/dicttype.jl b/test/parser/dicttype.jl deleted file mode 100644 index 6e4d3285..00000000 --- a/test/parser/dicttype.jl +++ /dev/null @@ -1,22 +0,0 @@ -MissingDict() = DataStructures.DefaultDict{String,Any}(Missing) - -@testset for T in [ - DataStructures.OrderedDict, - Dict{Symbol, Int32}, - MissingDict -] - val = JSON.parse("{\"x\": 3}", dicttype=T) - @test length(val) == 1 - key = collect(keys(val))[1] - @test string(key) == "x" - @test val[key] == 3 - - if T == MissingDict - @test val isa DataStructures.DefaultDict{String} - @test val["y"] === missing - else - @test val isa T - @test_throws KeyError val["y"] - end -end - diff --git a/test/parser/inttype.jl b/test/parser/inttype.jl deleted file mode 100644 index 30e9ca16..00000000 --- a/test/parser/inttype.jl +++ /dev/null @@ -1,16 +0,0 @@ -@testset for T in [Int32, Int64, Int128, BigInt] - val = JSON.parse("{\"x\": 3}", inttype=T) - @test isa(val, Dict{String, Any}) - @test length(val) == 1 - key = collect(keys(val))[1] - @test string(key) == "x" - value = val[key] - @test value == 3 - @test typeof(value) == T -end - -@testset begin - teststr = """{"201736327611975630": 18005722827070440994}""" - val = JSON.parse(teststr, inttype=Int128) - @test val == Dict{String,Any}("201736327611975630"=> 18005722827070440994) -end diff --git a/test/parser/invalid-input.jl b/test/parser/invalid-input.jl deleted file mode 100644 index 924f225e..00000000 --- a/test/parser/invalid-input.jl +++ /dev/null @@ -1,33 +0,0 @@ -const FAILURES = [ - # Unexpected character in array - "[1,2,3/4,5,6,7]", - # Unexpected character in object - "{\"1\":2, \"2\":3 _ \"4\":5}", - # Invalid escaped character - "[\"alpha\\α\"]", - "[\"\\u05AG\"]", - # Invalid 'simple' and 'unknown value' - "[tXXe]", - "[fail]", - "∞", - # Invalid number - "[5,2,-]", - "[5,2,+β]", - # Incomplete escape - "\"\\", - # Control character - "\"\0\"", - # Issue #99 - "[\"🍕\"_\"🍕\"", - # Issue #260 - "1997-03-03", - "1997.1-", -] - -@testset for fail in FAILURES - # Test memory parser - @test_throws ErrorException JSON.parse(fail) - - # Test streaming parser - @test_throws ErrorException JSON.parse(IOBuffer(fail)) -end diff --git a/test/parser/nan-inf.jl b/test/parser/nan-inf.jl deleted file mode 100644 index ef591ce0..00000000 --- a/test/parser/nan-inf.jl +++ /dev/null @@ -1,35 +0,0 @@ -@testset begin - test_str = """ - { - "x": NaN, - "y": Infinity, - "z": -Infinity, - "q": [true, null, "hello", 1, -1, 1.5, -1.5, [true]] - }""" - - test_dict = Dict( - "x" => NaN, - "y" => Inf, - "z" => -Inf, - "q" => [true, nothing, "hello", 1, -1, 1.5, -1.5, [true]] - ) - - @test_throws ErrorException JSON.parse(test_str, allownan=false) - val = JSON.parse(test_str) - @test isequal(val, test_dict) - - @test_throws ErrorException JSON.parse(IOBuffer(test_str), allownan=false) - val2 = JSON.parse(IOBuffer(test_str)) - @test isequal(val2, test_dict) - - # Test that the number following -Infinity parses correctly - @test isequal(JSON.parse("[-Infinity, 1]"), [-Inf, 1]) - @test isequal(JSON.parse("[-Infinity, -1]"), [-Inf, -1]) - @test isequal(JSON.parse("""{"a": -Infinity, "b": 1.0}"""), Dict("a" => -Inf, "b"=> 1.0)) - @test isequal(JSON.parse("""{"a": -Infinity, "b": -1.0}"""), Dict("a" => -Inf, "b"=> -1.0)) - - @test isequal(JSON.parse(IOBuffer("[-Infinity, 1]")), [-Inf, 1]) - @test isequal(JSON.parse(IOBuffer("[-Infinity, -1]")), [-Inf, -1]) - @test isequal(JSON.parse(IOBuffer("""{"a": -Infinity, "b": 1.0}""")), Dict("a" => -Inf, "b"=> 1.0)) - @test isequal(JSON.parse(IOBuffer("""{"a": -Infinity, "b": -1.0}""")), Dict("a" => -Inf, "b"=> -1.0)) -end diff --git a/test/parser/null.jl b/test/parser/null.jl deleted file mode 100644 index 6f0c4131..00000000 --- a/test/parser/null.jl +++ /dev/null @@ -1,7 +0,0 @@ -@testset "Custom null values" begin - s = "{\"x\": null}" - for null in (nothing, missing) - val = JSON.parse(s, null=null) - @test val["x"] === null - end -end diff --git a/test/parser/parsefile.jl b/test/parser/parsefile.jl deleted file mode 100644 index f5b9f6c3..00000000 --- a/test/parser/parsefile.jl +++ /dev/null @@ -1,10 +0,0 @@ -tmppath, io = mktemp() -write(io, facebook) -close(io) -if Sys.iswindows() - # don't use mmap on Windows, to avoid ERROR: unlink: operation not permitted (EPERM) - @test haskey(JSON.parsefile(tmppath; use_mmap=false), "data") -else - @test haskey(JSON.parsefile(tmppath), "data") -end -rm(tmppath) diff --git a/test/regression/issue021.jl b/test/regression/issue021.jl deleted file mode 100644 index 856f8207..00000000 --- a/test/regression/issue021.jl +++ /dev/null @@ -1,4 +0,0 @@ -test21 = "[\r\n{\r\n\"a\": 1,\r\n\"b\": 2\r\n},\r\n{\r\n\"a\": 3,\r\n\"b\": 4\r\n}\r\n]" -a = JSON.parse(test21) -@test isa(a, Vector{Any}) -@test length(a) == 2 diff --git a/test/regression/issue026.jl b/test/regression/issue026.jl deleted file mode 100644 index ff9ea6d2..00000000 --- a/test/regression/issue026.jl +++ /dev/null @@ -1,2 +0,0 @@ -obj = JSON.parse("{\"a\":2e10}") -@test obj["a"] == 2e10 diff --git a/test/regression/issue057.jl b/test/regression/issue057.jl deleted file mode 100644 index 1797a8a9..00000000 --- a/test/regression/issue057.jl +++ /dev/null @@ -1,2 +0,0 @@ -obj = JSON.parse("{\"\U0001d712\":\"\\ud835\\udf12\"}") -@test(obj["𝜒"] == "𝜒") diff --git a/test/regression/issue109.jl b/test/regression/issue109.jl deleted file mode 100644 index 6dc2d9d3..00000000 --- a/test/regression/issue109.jl +++ /dev/null @@ -1,8 +0,0 @@ -mutable struct t109 - i::Int -end - -let iob = IOBuffer() - JSON.print(iob, t109(1)) - @test get(JSON.parse(String(take!(iob))), "i", 0) == 1 -end diff --git a/test/regression/issue152.jl b/test/regression/issue152.jl deleted file mode 100644 index 5b4a01bf..00000000 --- a/test/regression/issue152.jl +++ /dev/null @@ -1,2 +0,0 @@ -@test json([Int64[] Int64[]]) == "[[],[]]" -@test json([Int64[] Int64[]]') == "[]" diff --git a/test/regression/issue163.jl b/test/regression/issue163.jl deleted file mode 100644 index 5ace4fa2..00000000 --- a/test/regression/issue163.jl +++ /dev/null @@ -1 +0,0 @@ -@test Float32(JSON.parse(json(2.1f-8))) == 2.1f-8 diff --git a/test/runtests.jl b/test/runtests.jl index 37d398d7..3280fbb7 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -1,120 +1,93 @@ -using JSON -using Test -using Dates -using Distributed: RemoteChannel -using OffsetArrays +using JSON, Test, Tar -import DataStructures +include(joinpath(dirname(pathof(JSON)), "../test/object.jl")) +include(joinpath(dirname(pathof(JSON)), "../test/lazy.jl")) +include(joinpath(dirname(pathof(JSON)), "../test/parse.jl")) +include(joinpath(dirname(pathof(JSON)), "../test/json.jl")) -include("json-samples.jl") - -@testset "Parser" begin - @testset "Parser Failures" begin - include("parser/invalid-input.jl") - end - - @testset "parsefile" begin - include("parser/parsefile.jl") - end - - @testset "dicttype" begin - include("parser/dicttype.jl") - end - - @testset "inttype" begin - include("parser/inttype.jl") - end - - @testset "nan_inf" begin - include("parser/nan-inf.jl") - end - - @testset "null" begin - include("parser/null.jl") - end - - @testset "Miscellaneous" begin - # test for single values - @test JSON.parse("true") == true - @test JSON.parse("null") == nothing - @test JSON.parse("\"hello\"") == "hello" - @test JSON.parse("\"a\"") == "a" - @test JSON.parse("1") == 1 - @test JSON.parse("1.5") == 1.5 - @test JSON.parse("[true]") == [true] +function tar_files(tarball::String) + data = Dict{String, Vector{UInt8}}() + buf = Vector{UInt8}(undef, Tar.DEFAULT_BUFFER_SIZE) + io = IOBuffer() + open(tarball) do tio + Tar.read_tarball(_ -> true, tio; buf=buf) do header, _ + if header.type == :file + take!(io) # In case there are multiple entries for the file + Tar.read_data(tio, io; size=header.size, buf) + data[header.path] = take!(io) + end + end end + data end -@testset "Serializer" begin - @testset "Standard Serializer" begin - include("standard-serializer.jl") - end - - @testset "Lowering" begin - include("lowering.jl") - end - - @testset "Custom Serializer" begin - include("serializer.jl") +# JSONTestSuite + +function parse_testfile(i, file, data) + # known failures on 14, 32, 33 (all are "i_...", meaning the spec is ambiguous anyway) + # 14: i_string_UTF-16LE_with_BOM.json, we don't support UTF-16 + # 32: i_string_utf16BE_no_BOM.json, we don't support UTF-16 + # 33: i_string_utf16LE_no_BOM.json, we don't support UTF-16 + if startswith(file, "n_") + try + JSON.parse(data) + @warn "no error thrown while parsing json test file" file=file i=i + @test !(file isa String) + catch + @test file isa String + end + elseif startswith(file, "i_") + try + JSON.parse(data) + catch + @warn "error thrown while parsing json test file" file=file i=i + end + else + try + JSON.parse(data) + @test file isa String + catch + @warn "error thrown while parsing json test file" file=file i=i + @test !(file isa String) + rethrow() + end end end -@testset "Integration" begin - # ::Nothing values should be encoded as null - testDict = Dict("a" => nothing) - nothingJson = JSON.json(testDict) - nothingDict = JSON.parse(nothingJson) - @test testDict == nothingDict - - @testset "async" begin - include("async.jl") - end - - @testset "indentation" begin - include("indentation.jl") - end - - @testset "JSON Checker" begin - include("json-checker.jl") +println("\nTest cases 70, 85, and 104 are expected to emit warnings next\n") +const jsontestsuite = tar_files(joinpath(dirname(pathof(JSON)), "../test/JSONTestSuite.tar")) +@testset "JSONTestSuite" begin + for (i, (file, data)) in enumerate(jsontestsuite) + parse_testfile(i, file, data) end end -@testset "Regression" begin - @testset "for issue #$i" for i in [21, 26, 57, 109, 152, 163] - include("regression/issue$(lpad(string(i), 3, "0")).jl") +# jsonchecker + +function parse_testfile2(i, file, data) + if startswith(file, "fail") + try + JSON.parse(data) + @warn "no error thrown while parsing json test file" file=file i=i + @test !(file isa String) + catch + @test file isa String + end + else + try + JSON.parse(data) + @test file isa String + catch + @warn "error thrown while parsing json test file" file=file i=i + @test !(file isa String) + rethrow() + end end end -mutable struct R1 - id::Int - obj -end - -struct MyCustomWriteContext <: JSON.Writer.RecursiveCheckContext - io - objectids::Set{UInt64} - recursive_cycle_token -end -MyCustomWriteContext(io) = MyCustomWriteContext(io, Set{UInt64}(), nothing) -Base.print(io::MyCustomWriteContext, x::UInt8) = Base.print(io.io, x) -for delegate in [:indent, - :delimit, - :separate, - :begin_array, - :end_array, - :begin_object, - :end_object] -@eval JSON.Writer.$delegate(io::MyCustomWriteContext) = JSON.Writer.$delegate(io.io) -end - -@testset "RecursiveCheckContext" begin - x = R1(1, nothing) - x.obj = x - str = JSON.json(x) - @test str == "{\"id\":1,\"obj\":null}" - io = IOBuffer() - str = JSON.show_json(MyCustomWriteContext(JSON.Writer.CompactContext(io)), JSON.Serializations.StandardSerialization(), x) - @test String(take!(io)) == "{\"id\":1,\"obj\":null}" -end - -# Check that printing to the default stdout doesn't fail +const jsonchecker = tar_files(joinpath(dirname(pathof(JSON)), "../test/jsonchecker.tar")) +@testset "jsonchecker" begin + for (i, (file, data)) in enumerate(jsonchecker) + parse_testfile2(i, file, data) + end +end \ No newline at end of file diff --git a/test/serializer.jl b/test/serializer.jl deleted file mode 100644 index 87927fe1..00000000 --- a/test/serializer.jl +++ /dev/null @@ -1,95 +0,0 @@ -module TestSerializer - -using JSON -using Test - -# to define a new serialization behaviour, import these first -import JSON.Serializations: CommonSerialization, StandardSerialization -import JSON: StructuralContext - -# those names are long so we can define some type aliases -const CS = CommonSerialization -const SC = StructuralContext - -# for test harness purposes -function sprint_kwarg(f, args...; kwargs...) - b = IOBuffer() - f(b, args...; kwargs...) - String(take!(b)) -end - -# issue #168: Print NaN and Inf as Julia would -struct NaNSerialization <: CS end -JSON.show_json(io::SC, ::NaNSerialization, f::AbstractFloat) = Base.print(io, f) - -@test sprint(JSON.show_json, NaNSerialization(), [NaN, Inf, -Inf, 0.0]) == - "[NaN,Inf,-Inf,0.0]" - -@test sprint_kwarg( - JSON.show_json, - NaNSerialization(), - [NaN, Inf, -Inf, 0.0]; - indent=4 -) == """ -[ - NaN, - Inf, - -Inf, - 0.0 -] -""" - -# issue #170: Print JavaScript functions directly -struct JSSerialization <: CS end -struct JSFunction - data::String -end - -function JSON.show_json(io::SC, ::JSSerialization, f::JSFunction) - first = true - for line in split(f.data, '\n') - if !first - JSON.indent(io) - end - first = false - Base.print(io, line) - end -end - -@test sprint_kwarg(JSON.show_json, JSSerialization(), Any[ - 1, - 2, - JSFunction("function test() {\n return 1;\n}") -]; indent=2) == """ -[ - 1, - 2, - function test() { - return 1; - } -] -""" - -# test serializing a type without any fields -struct SingletonType end -@test_throws ErrorException json(SingletonType()) - -# test printing to stdout -let filename = tempname() - open(filename, "w") do f - redirect_stdout(f) do - JSON.print(Any[1, 2, 3.0]) - end - end - @test read(filename, String) == "[1,2,3.0]" - rm(filename) -end - -# issue #184: serializing a 0-dimensional array -@test sprint(JSON.show_json, JSON.StandardSerialization(), view([184], 1)) == "184" - -# test serializing with a JSONText object -@test json([JSONText("{\"bar\": [3,4,5]}"),314159]) == "[{\"bar\": [3,4,5]},314159]" -@test json([JSONText("{\"bar\": [3,4,5]}"),314159], 1) == "[\n {\n \"bar\": [\n 3,\n 4,\n 5\n ]\n },\n 314159\n]\n" - -end diff --git a/test/standard-serializer.jl b/test/standard-serializer.jl deleted file mode 100644 index b88feb56..00000000 --- a/test/standard-serializer.jl +++ /dev/null @@ -1,79 +0,0 @@ -@testset "Symbol" begin - symtest = Dict(:symbolarray => [:apple, :pear], :symbolsingleton => :hello) - @test (JSON.json(symtest) == "{\"symbolarray\":[\"apple\",\"pear\"],\"symbolsingleton\":\"hello\"}" - || JSON.json(symtest) == "{\"symbolsingleton\":\"hello\",\"symbolarray\":[\"apple\",\"pear\"]}") -end - -@testset "Floats" begin - @test sprint(JSON.print, [NaN]) == "[null]" - @test sprint(JSON.print, [Inf]) == "[null]" -end - -@testset "Union{Nothing,T} (old Nullable)" begin - @test sprint(JSON.print, Union{Any,Nothing}[nothing]) == "[null]" - @test sprint(JSON.print, Union{Int64,Nothing}[nothing]) == "[null]" - @test sprint(JSON.print, Union{Int64,Nothing}[1]) == "[1]" -end - -@testset "Char" begin - @test json('a') == "\"a\"" - @test json('\\') == "\"\\\\\"" - @test json('\n') == "\"\\n\"" - @test json('🍩') =="\"🍩\"" -end - -@testset "Enum" begin - include("enum.jl") -end - -@testset "Type" begin - @test sprint(JSON.print, Float64) == string("\"Float64\"") -end - -@testset "Module" begin - @test_throws ArgumentError sprint(JSON.print, JSON) -end - -@testset "Dates" begin - @test json(Date("2016-04-13")) == "\"2016-04-13\"" - @test json([Date("2016-04-13"), Date("2016-04-12")]) == "[\"2016-04-13\",\"2016-04-12\"]" - @test json(DateTime("2016-04-13T00:00:00")) == "\"2016-04-13T00:00:00\"" - @test json([DateTime("2016-04-13T00:00:00"), DateTime("2016-04-12T00:00:00")]) == "[\"2016-04-13T00:00:00\",\"2016-04-12T00:00:00\"]" -end - -@testset "Null bytes" begin - zeros = Dict("\0" => "\0") - json_zeros = json(zeros) - @test occursin("\\u0000", json_zeros) - @test !occursin("\\0", json_zeros) - @test JSON.parse(json_zeros) == zeros -end - -@testset "All bytes" begin - str = String(collect(0x00:0xff)) - bytes = Dict(str => str) - json_bytes = json(bytes) - @test JSON.parse(json_bytes) == bytes -end - -@testset "Arrays" begin - # Printing an empty array or Dict shouldn't cause a BoundsError - @test json(String[]) == "[]" - @test json(Dict()) == "{}" - - #Multidimensional arrays - @test json([0 1; 2 0]) == "[[0,2],[1,0]]" - @test json(OffsetArray([0 1; 2 0], 0:1, 10:11)) == "[[0,2],[1,0]]" -end - -@testset "Pairs" begin - @test json(1 => 2) == "{\"1\":2}" - @test json(:foo => 2) == "{\"foo\":2}" - @test json([1, 2] => [3, 4]) == "{\"$([1, 2])\":[3,4]}" - @test json([1 => 2]) == "[{\"1\":2}]" -end - -@testset "Sets" begin - @test json(Set()) == "[]" - @test json(Set([1, 2])) in ["[1,2]", "[2,1]"] -end