1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
//! # nom, eating data byte by byte
//!
//! nom is a parser combinator library with a focus on safe parsing,
//! streaming patterns, and as much as possible zero copy.
//!
//! ## Example
//!
//! ```rust
//! #[macro_use]
//! extern crate nom;
//!
//! #[derive(Debug,PartialEq)]
//! pub struct Color {
//!   pub red:     u8,
//!   pub green:   u8,
//!   pub blue:    u8,
//! }
//!
//! fn from_hex(input: &str) -> Result<u8, std::num::ParseIntError> {
//!   u8::from_str_radix(input, 16)
//! }
//!
//! fn is_hex_digit(c: char) -> bool {
//!   c.is_digit(16)
//! }
//!
//! named!(hex_primary<&str, u8>,
//!   map_res!(take_while_m_n!(2, 2, is_hex_digit), from_hex)
//! );
//!
//! named!(hex_color<&str, Color>,
//!   do_parse!(
//!            tag!("#")   >>
//!     red:   hex_primary >>
//!     green: hex_primary >>
//!     blue:  hex_primary >>
//!     (Color { red, green, blue })
//!   )
//! );
//!
//! fn main() {
//!   assert_eq!(hex_color("#2F14DF"), Ok(("", Color {
//!     red: 47,
//!     green: 20,
//!     blue: 223,
//!   })));
//! }
//! ```
//!
//! The code is available on [Github](https://github.com/Geal/nom)
//!
//! There are a few [guides](https://github.com/Geal/nom/tree/master/doc) with more details
//! about [the design of nom](https://github.com/Geal/nom/blob/master/doc/how_nom_macros_work.md),
//! [how to write parsers](https://github.com/Geal/nom/blob/master/doc/making_a_new_parser_from_scratch.md),
//! or the [error management system](https://github.com/Geal/nom/blob/master/doc/error_management.md).
//!
//! **Looking for a specific combinator? Read the
//! ["choose a combinator" guide](https://github.com/Geal/nom/blob/master/doc/choosing_a_combinator.md)**
//!
//! If you are upgrading to nom 2.0, please read the
//! [migration document](https://github.com/Geal/nom/blob/master/doc/upgrading_to_nom_2.md).
//!
//! If you are upgrading to nom 4.0, please read the
//! [migration document](https://github.com/Geal/nom/blob/master/doc/upgrading_to_nom_4.md).
//!
//! See also the [FAQ](https://github.com/Geal/nom/blob/master/doc/FAQ.md).
//!
//! ## Parser combinators
//!
//! Parser combinators are an approach to parsers that is very different from
//! software like [lex](https://en.wikipedia.org/wiki/Lex_(software)) and
//! [yacc](https://en.wikipedia.org/wiki/Yacc). Instead of writing the grammar
//! in a separate file and generating the corresponding code, you use very small
//! functions with very specific purpose, like "take 5 bytes", or "recognize the
//! word 'HTTP'", and assemble then in meaningful patterns like "recognize
//! 'HTTP', then a space, then a version".
//! The resulting code is small, and looks like the grammar you would have
//! written with other parser approaches.
//!
//! This has a few advantages:
//!
//! - the parsers are small and easy to write
//! - the parsers components are easy to reuse (if they're general enough, please add them to nom!)
//! - the parsers components are easy to test separately (unit tests and property-based tests)
//! - the parser combination code looks close to the grammar you would have written
//! - you can build partial parsers, specific to the data you need at the moment, and ignore the rest
//!
//! Here is an example of one such parser, to recognize text between parentheses:
//!
//! ```rust
//! #[macro_use]
//! extern crate nom;
//!
//! # fn main() {
//! named!(parens, delimited!(char!('('), is_not!(")"), char!(')')));
//! # }
//! ```
//!
//! It defines a function named `parens`, which will recognize a sequence of the character `(`, the longest byte array not containing `)`, then the character `)`, and will return the byte array in the middle.
//!
//! Here is another parser, written without using nom's macros this time:
//!
//! ```rust
//! #[macro_use]
//! extern crate nom;
//!
//! use nom::{IResult,Err,Needed};
//!
//! # fn main() {
//! fn take4(i:&[u8]) -> IResult<&[u8], &[u8]>{
//!   if i.len() < 4 {
//!     Err(Err::Incomplete(Needed::Size(4)))
//!   } else {
//!     Ok((&i[4..],&i[0..4]))
//!   }
//! }
//! # }
//! ```
//!
//! This function takes a byte array as input, and tries to consume 4 bytes.
//! Writing all the parsers manually, like this, is dangerous, despite Rust's safety features. There
//! are still a lot of mistakes one can make. That's why nom provides a list of macros to help in
//! developing parsers.
//!
//! With macros, you would write it like this:
//!
//! ```rust
//! #[macro_use]
//! extern crate nom;
//!
//! # fn main() {
//! named!(take4, take!(4));
//! # }
//! ```
//!
//! A parser in nom is a function which, for an input type `I`, an output type `O`
//! and an optional error type `E`, will have the following signature:
//!
//! ```rust,ignore
//! fn parser(input: I) -> IResult<I, O, E>;
//! ```
//!
//! Or like this, if you don't want to specify a custom error type (it will be `u32` by default):
//!
//! ```rust,ignore
//! fn parser(input: I) -> IResult<I, O>;
//! ```
//!
//! `IResult` is an alias for the `Result` type:
//!
//! ```rust
//! use nom::{Needed, Context};
//!
//! type IResult<I, O, E = u32> = Result<(I, O), Err<I, E>>;
//!
//! enum Err<I, E = u32> {
//!   Incomplete(Needed),
//!   Error(Context<I, E>),
//!   Failure(Context<I, E>),
//! }
//! ```
//!
//! It can have the following values:
//!
//! - a correct result `Ok((I,O))` with the first element being the remaining of the input (not parsed yet), and the second the output value;
//! - an error `Err(Err::Error(c))` with `c` an enum that contains an error code with its position in the input, and optionally a chain of accumulated errors;
//! - an error `Err(Err::Incomplete(Needed))` indicating that more input is necessary. `Needed` can indicate how much data is needed
//! - an error `Err(Err::Failure(c))`. It works like the `Error` case, except it indicates an unrecoverable error: we cannot backtrack and test another parser
//!
//! Please refer to the [documentation][doc] for an exhaustive list of parsers. See also the
//! ["choose a combinator" guide](https://github.com/Geal/nom/blob/master/doc/choosing_a_combinator.md)**.
//!
//! ## Making new parsers with macros
//!
//! Macros are the main way to make new parsers by combining other ones. Those macros accept other macros or function names as arguments. You then need to make a function out of that combinator with **`named!`**, or a closure with **`closure!`**. Here is how you would do, with the **`tag!`** and **`take!`** combinators:
//!
//! ```rust
//! # #[macro_use] extern crate nom;
//! # fn main() {
//! named!(abcd_parser, tag!("abcd")); // will consume bytes if the input begins with "abcd"
//!
//! named!(take_10, take!(10));        // will consume and return 10 bytes of input
//! # }
//! ```
//!
//! The **`named!`** macro can take three different syntaxes:
//!
//! ```rust,ignore
//! named!(my_function( &[u8] ) -> &[u8], tag!("abcd"));
//!
//! named!(my_function<&[u8], &[u8]>, tag!("abcd"));
//!
//! named!(my_function, tag!("abcd")); // when you know the parser takes &[u8] as input, and returns &[u8] as output
//! ```
//!
//! **IMPORTANT NOTE**: Rust's macros can be very sensitive to the syntax, so you may encounter an error compiling parsers like this one:
//!
//! ```rust
//! # #[macro_use] extern crate nom;
//! # #[cfg(feature = "alloc")]
//! # fn main() {
//! named!(my_function<&[u8], Vec<&[u8]>>, many0!(tag!("abcd")));
//! # }
//!
//! # #[cfg(not(feature = "alloc"))]
//! # fn main() {}
//! ```
//!
//! You will get the following error: `error: expected an item keyword`. This
//! happens because `>>` is seen as an operator, so the macro parser does not
//! recognize what we want. There is a way to avoid it, by inserting a space:
//!
//! ```rust
//! # #[macro_use] extern crate nom;
//! # #[cfg(feature = "alloc")]
//! # fn main() {
//! named!(my_function<&[u8], Vec<&[u8]> >, many0!(tag!("abcd")));
//! # }
//! # #[cfg(not(feature = "alloc"))]
//! # fn main() {}
//! ```
//!
//! This will compile correctly. I am very sorry for this inconvenience.
//!
//! ## Combining parsers
//!
//! There are more high level patterns, like the **`alt!`** combinator, which provides a choice between multiple parsers. If one branch fails, it tries the next, and returns the result of the first parser that succeeds:
//!
//! ```rust
//! # #[macro_use] extern crate nom;
//! # fn main() {
//! named!(alt_tags, alt!(tag!("abcd") | tag!("efgh")));
//!
//! assert_eq!(alt_tags(b"abcdxxx"), Ok((&b"xxx"[..], &b"abcd"[..])));
//! assert_eq!(alt_tags(b"efghxxx"), Ok((&b"xxx"[..], &b"efgh"[..])));
//! assert_eq!(alt_tags(b"ijklxxx"), Err(nom::Err::Error(error_position!(&b"ijklxxx"[..], nom::ErrorKind::Alt))));
//! # }
//! ```
//!
//! The pipe `|` character is used as separator.
//!
//! The **`opt!`** combinator makes a parser optional. If the child parser returns an error, **`opt!`** will succeed and return None:
//!
//! ```rust
//! # #[macro_use] extern crate nom;
//! # fn main() {
//! named!( abcd_opt< &[u8], Option<&[u8]> >, opt!( tag!("abcd") ) );
//!
//! assert_eq!(abcd_opt(b"abcdxxx"), Ok((&b"xxx"[..], Some(&b"abcd"[..]))));
//! assert_eq!(abcd_opt(b"efghxxx"), Ok((&b"efghxxx"[..], None)));
//! # }
//! ```
//!
//! **`many0!`** applies a parser 0 or more times, and returns a vector of the aggregated results:
//!
//! ```rust
//! # #[macro_use] extern crate nom;
//! # #[cfg(feature = "alloc")]
//! # fn main() {
//! use std::str;
//!
//! named!(multi< Vec<&str> >, many0!( map_res!(tag!( "abcd" ), str::from_utf8) ) );
//! let a = b"abcdef";
//! let b = b"abcdabcdef";
//! let c = b"azerty";
//! assert_eq!(multi(a), Ok((&b"ef"[..],     vec!["abcd"])));
//! assert_eq!(multi(b), Ok((&b"ef"[..],     vec!["abcd", "abcd"])));
//! assert_eq!(multi(c), Ok((&b"azerty"[..], Vec::new())));
//! # }
//! # #[cfg(not(feature = "alloc"))]
//! # fn main() {}
//! ```
//!
//! Here are some basic combining macros available:
//!
//! - **`opt!`**: will make the parser optional (if it returns the `O` type, the new parser returns `Option<O>`)
//! - **`many0!`**: will apply the parser 0 or more times (if it returns the `O` type, the new parser returns `Vec<O>`)
//! - **`many1!`**: will apply the parser 1 or more times
//!
//! There are more complex (and more useful) parsers like `do_parse!` and `tuple!`, which are used to apply a series of parsers then assemble their results.
//!
//! Example with `tuple!`:
//!
//! ```rust
//! # #[macro_use] extern crate nom;
//! # fn main() {
//! use nom::{ErrorKind, Needed,be_u16};
//!
//! named!(tpl<&[u8], (u16, &[u8], &[u8]) >,
//!   tuple!(
//!     be_u16 ,
//!     take!(3),
//!     tag!("fg")
//!   )
//! );
//!
//! assert_eq!(
//!   tpl(&b"abcdefgh"[..]),
//!   Ok((
//!     &b"h"[..],
//!     (0x6162u16, &b"cde"[..], &b"fg"[..])
//!   ))
//! );
//! assert_eq!(tpl(&b"abcde"[..]), Err(nom::Err::Incomplete(Needed::Size(2))));
//! let input = &b"abcdejk"[..];
//! assert_eq!(tpl(input), Err(nom::Err::Error(error_position!(&input[5..], ErrorKind::Tag))));
//! # }
//! ```
//!
//! Example with `do_parse!`:
//!
//! ```rust
//! # #[macro_use] extern crate nom;
//! # fn main() {
//! use nom::IResult;
//!
//! #[derive(Debug, PartialEq)]
//! struct A {
//!   a: u8,
//!   b: u8
//! }
//!
//! fn ret_int1(i:&[u8]) -> IResult<&[u8], u8> { Ok((i,1)) }
//! fn ret_int2(i:&[u8]) -> IResult<&[u8], u8> { Ok((i,2)) }
//!
//! named!(f<&[u8],A>,
//!   do_parse!(    // the parser takes a byte array as input, and returns an A struct
//!     tag!("abcd")       >>      // begins with "abcd"
//!     opt!(tag!("abcd")) >>      // this is an optional parser
//!     aa: ret_int1       >>      // the return value of ret_int1, if it does not fail, will be stored in aa
//!     tag!("efgh")       >>
//!     bb: ret_int2       >>
//!     tag!("efgh")       >>
//!
//!     (A{a: aa, b: bb})          // the final tuple will be able to use the variable defined previously
//!   )
//! );
//!
//! let r = f(b"abcdabcdefghefghX");
//! assert_eq!(r, Ok((&b"X"[..], A{a: 1, b: 2})));
//!
//! let r2 = f(b"abcdefghefghX");
//! assert_eq!(r2, Ok((&b"X"[..], A{a: 1, b: 2})));
//! # }
//! ```
//!
//! The double right arrow `>>` is used as separator between every parser in the sequence, and the last closure can see the variables storing the result of parsers. Unless the specified return type is already a tuple, the final line should be that type wrapped in a tuple.
//!
//! More examples of [`do_parse!`](macro.do_parse.html) and [`tuple!`](macro.tuple.html) usage can be found in the [INI file parser example](tests/ini.rs).
//!
//! **Going further:** read the [guides](https://github.com/Geal/nom/tree/master/doc)!
#![cfg_attr(all(not(feature = "std"), feature = "alloc"), feature(alloc))]
#![cfg_attr(not(feature = "std"), no_std)]
//#![warn(missing_docs)]
#![cfg_attr(feature = "cargo-clippy", allow(doc_markdown))]
#![cfg_attr(nightly, feature(test))]

#[cfg(all(not(feature = "std"), feature = "alloc"))]
#[macro_use]
extern crate alloc;
#[cfg(feature = "regexp_macros")]
#[macro_use]
extern crate lazy_static;
extern crate memchr;
#[cfg(feature = "regexp")]
pub extern crate regex;
#[cfg(nightly)]
extern crate test;

/// Lib module to re-export everything needed from `std` or `core`/`alloc`. This is how `serde` does
/// it, albeit there it is not public.
pub mod lib {
  /// `std` facade allowing `std`/`core` to be interchangeable. Reexports `alloc` crate optionally,
  /// as well as `core` or `std`
  #[cfg(not(feature = "std"))]
  pub mod std {
    #[cfg(feature = "alloc")]
    #[cfg_attr(feature = "alloc", macro_use)]
    pub use alloc::{boxed, string, vec};

    pub use core::{cmp, convert, fmt, iter, mem, ops, option, result, slice, str};
    pub mod prelude {
      pub use core::prelude as v1;
    }
  }

  #[cfg(feature = "std")]
  pub mod std {
    pub use std::{boxed, cmp, collections, convert, fmt, hash, iter, mem, ops, option, result, slice, str, string, vec};
    pub mod prelude {
      pub use std::prelude as v1;
    }
  }

  #[cfg(feature = "regexp")]
  pub use regex;
}

pub use self::traits::*;
pub use self::util::*;

#[cfg(feature = "verbose-errors")]
pub use self::verbose_errors::*;

#[cfg(not(feature = "verbose-errors"))]
pub use self::simple_errors::*;

pub use self::branch::*;
pub use self::internal::*;
pub use self::macros::*;
pub use self::methods::*;
pub use self::multi::*;
pub use self::sequence::*;

pub use self::bits::*;
pub use self::bytes::*;

pub use self::character::*;
pub use self::nom::*;

pub use self::whitespace::*;

#[cfg(feature = "regexp")]
pub use self::regexp::*;
pub use self::str::*;

#[macro_use]
mod util;

#[cfg(feature = "verbose-errors")]
#[macro_use]
pub mod verbose_errors;

#[cfg(not(feature = "verbose-errors"))]
#[macro_use]
pub mod simple_errors;

#[macro_use]
mod internal;
mod traits;
#[macro_use]
mod macros;
#[macro_use]
mod branch;
#[macro_use]
mod sequence;
#[macro_use]
mod multi;
#[macro_use]
pub mod methods;

#[macro_use]
mod bytes;
#[macro_use]
pub mod bits;

#[macro_use]
mod character;
#[macro_use]
mod nom;

#[macro_use]
pub mod whitespace;

#[cfg(feature = "regexp")]
#[macro_use]
mod regexp;

mod str;

pub mod types;