Regular Mess

Regular expressions can actually be quite messy. Don’t you think so? Well, think not before you see this…

Below is a regular expression used to validate email addresses according to the RFC 822 grammar. I just encountered this while browsing web today. Seriously, what the hell?

(?:(?:rn)?[ t])*(?:(?:(?:[^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)?[ t]
)+|Z|(?=[["()<>@,;:\".[]]))|"(?:[^"r\]|\.|(?:(?:rn)?[ t]))*"(?:(?:
rn)?[ t])*)(?:.(?:(?:rn)?[ t])*(?:[^()<>@,;:\".[] 00-31]+(?:(?:(
?:rn)?[ t])+|Z|(?=[["()<>@,;:\".[]]))|"(?:[^"r\]|\.|(?:(?:rn)?[
t]))*"(?:(?:rn)?[ t])*))*@(?:(?:rn)?[ t])*(?:[^()<>@,;:\".[] 00-
31]+(?:(?:(?:rn)?[ t])+|Z|(?=[["()<>@,;:\".[]]))|[([^[]r\]|\.)*
](?:(?:rn)?[ t])*)(?:.(?:(?:rn)?[ t])*(?:[^()<>@,;:\".[] 00-31]+
(?:(?:(?:rn)?[ t])+|Z|(?=[["()<>@,;:\".[]]))|[([^[]r\]|\.)*](?:
(?:rn)?[ t])*))*|(?:[^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)?[ t])+|Z
|(?=[["()<>@,;:\".[]]))|"(?:[^"r\]|\.|(?:(?:rn)?[ t]))*"(?:(?:rn)
?[ t])*)*<(?:(?:rn)?[ t])*(?:@(?:[^()<>@,;:\".[] 00-31]+(?:(?:(?:
rn)?[ t])+|Z|(?=[["()<>@,;:\".[]]))|[([^[]r\]|\.)*](?:(?:rn)?[
t])*)(?:.(?:(?:rn)?[ t])*(?:[^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)
?[ t])+|Z|(?=[["()<>@,;:\".[]]))|[([^[]r\]|\.)*](?:(?:rn)?[ t]
)*))*(?:,@(?:(?:rn)?[ t])*(?:[^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)?[
t])+|Z|(?=[["()<>@,;:\".[]]))|[([^[]r\]|\.)*](?:(?:rn)?[ t])*
)(?:.(?:(?:rn)?[ t])*(?:[^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)?[ t]
)+|Z|(?=[["()<>@,;:\".[]]))|[([^[]r\]|\.)*](?:(?:rn)?[ t])*))*)
*:(?:(?:rn)?[ t])*)?(?:[^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)?[ t])+
|Z|(?=[["()<>@,;:\".[]]))|"(?:[^"r\]|\.|(?:(?:rn)?[ t]))*"(?:(?:r
n)?[ t])*)(?:.(?:(?:rn)?[ t])*(?:[^()<>@,;:\".[] 00-31]+(?:(?:(?:
rn)?[ t])+|Z|(?=[["()<>@,;:\".[]]))|"(?:[^"r\]|\.|(?:(?:rn)?[ t
]))*"(?:(?:rn)?[ t])*))*@(?:(?:rn)?[ t])*(?:[^()<>@,;:\".[] 00-31
]+(?:(?:(?:rn)?[ t])+|Z|(?=[["()<>@,;:\".[]]))|[([^[]r\]|\.)*](
?:(?:rn)?[ t])*)(?:.(?:(?:rn)?[ t])*(?:[^()<>@,;:\".[] 00-31]+(?
:(?:(?:rn)?[ t])+|Z|(?=[["()<>@,;:\".[]]))|[([^[]r\]|\.)*](?:(?
:rn)?[ t])*))*>(?:(?:rn)?[ t])*)|(?:[^()<>@,;:\".[] 00-31]+(?:(?
:(?:rn)?[ t])+|Z|(?=[["()<>@,;:\".[]]))|"(?:[^"r\]|\.|(?:(?:rn)?
[ t]))*"(?:(?:rn)?[ t])*)*:(?:(?:rn)?[ t])*(?:(?:(?:[^()<>@,;:\".[]
00-31]+(?:(?:(?:rn)?[ t])+|Z|(?=[["()<>@,;:\".[]]))|"(?:[^"r\]|
\.|(?:(?:rn)?[ t]))*"(?:(?:rn)?[ t])*)(?:.(?:(?:rn)?[ t])*(?:[^()<>
@,;:\".[] 00-31]+(?:(?:(?:rn)?[ t])+|Z|(?=[["()<>@,;:\".[]]))|"
(?:[^"r\]|\.|(?:(?:rn)?[ t]))*"(?:(?:rn)?[ t])*))*@(?:(?:rn)?[ t]
)*(?:[^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)?[ t])+|Z|(?=[["()<>@,;:\
".[]]))|[([^[]r\]|\.)*](?:(?:rn)?[ t])*)(?:.(?:(?:rn)?[ t])*(?
:[^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)?[ t])+|Z|(?=[["()<>@,;:\".[
]]))|[([^[]r\]|\.)*](?:(?:rn)?[ t])*))*|(?:[^()<>@,;:\".[] 00-
31]+(?:(?:(?:rn)?[ t])+|Z|(?=[["()<>@,;:\".[]]))|"(?:[^"r\]|\.|(
?:(?:rn)?[ t]))*"(?:(?:rn)?[ t])*)*<(?:(?:rn)?[ t])*(?:@(?:[^()<>@,;
:\".[] 00-31]+(?:(?:(?:rn)?[ t])+|Z|(?=[["()<>@,;:\".[]]))|[([
^[]r\]|\.)*](?:(?:rn)?[ t])*)(?:.(?:(?:rn)?[ t])*(?:[^()<>@,;:\"
.[] 00-31]+(?:(?:(?:rn)?[ t])+|Z|(?=[["()<>@,;:\".[]]))|[([^[
]r\]|\.)*](?:(?:rn)?[ t])*))*(?:,@(?:(?:rn)?[ t])*(?:[^()<>@,;:\".
[] 00-31]+(?:(?:(?:rn)?[ t])+|Z|(?=[["()<>@,;:\".[]]))|[([^[]
r\]|\.)*](?:(?:rn)?[ t])*)(?:.(?:(?:rn)?[ t])*(?:[^()<>@,;:\".[]
00-31]+(?:(?:(?:rn)?[ t])+|Z|(?=[["()<>@,;:\".[]]))|[([^[]r\]
|\.)*](?:(?:rn)?[ t])*))*)*:(?:(?:rn)?[ t])*)?(?:[^()<>@,;:\".[]
00-31]+(?:(?:(?:rn)?[ t])+|Z|(?=[["()<>@,;:\".[]]))|"(?:[^"r\]|\
.|(?:(?:rn)?[ t]))*"(?:(?:rn)?[ t])*)(?:.(?:(?:rn)?[ t])*(?:[^()<>@,
;:\".[] 00-31]+(?:(?:(?:rn)?[ t])+|Z|(?=[["()<>@,;:\".[]]))|"(?
:[^"r\]|\.|(?:(?:rn)?[ t]))*"(?:(?:rn)?[ t])*))*@(?:(?:rn)?[ t])*
(?:[^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)?[ t])+|Z|(?=[["()<>@,;:\".
[]]))|[([^[]r\]|\.)*](?:(?:rn)?[ t])*)(?:.(?:(?:rn)?[ t])*(?:[
^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)?[ t])+|Z|(?=[["()<>@,;:\".[]
]))|[([^[]r\]|\.)*](?:(?:rn)?[ t])*))*>(?:(?:rn)?[ t])*)(?:,s*(
?:(?:[^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)?[ t])+|Z|(?=[["()<>@,;:\
".[]]))|"(?:[^"r\]|\.|(?:(?:rn)?[ t]))*"(?:(?:rn)?[ t])*)(?:.(?:(
?:rn)?[ t])*(?:[^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)?[ t])+|Z|(?=[
["()<>@,;:\".[]]))|"(?:[^"r\]|\.|(?:(?:rn)?[ t]))*"(?:(?:rn)?[ t
])*))*@(?:(?:rn)?[ t])*(?:[^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)?[ t
])+|Z|(?=[["()<>@,;:\".[]]))|[([^[]r\]|\.)*](?:(?:rn)?[ t])*)(?
:.(?:(?:rn)?[ t])*(?:[^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)?[ t])+|
Z|(?=[["()<>@,;:\".[]]))|[([^[]r\]|\.)*](?:(?:rn)?[ t])*))*|(?:
[^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)?[ t])+|Z|(?=[["()<>@,;:\".[
]]))|"(?:[^"r\]|\.|(?:(?:rn)?[ t]))*"(?:(?:rn)?[ t])*)*<(?:(?:rn)
?[ t])*(?:@(?:[^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)?[ t])+|Z|(?=[["
()<>@,;:\".[]]))|[([^[]r\]|\.)*](?:(?:rn)?[ t])*)(?:.(?:(?:rn)
?[ t])*(?:[^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)?[ t])+|Z|(?=[["()<>
@,;:\".[]]))|[([^[]r\]|\.)*](?:(?:rn)?[ t])*))*(?:,@(?:(?:rn)?[
t])*(?:[^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)?[ t])+|Z|(?=[["()<>@,
;:\".[]]))|[([^[]r\]|\.)*](?:(?:rn)?[ t])*)(?:.(?:(?:rn)?[ t]
)*(?:[^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)?[ t])+|Z|(?=[["()<>@,;:\
".[]]))|[([^[]r\]|\.)*](?:(?:rn)?[ t])*))*)*:(?:(?:rn)?[ t])*)?
(?:[^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)?[ t])+|Z|(?=[["()<>@,;:\".
[]]))|"(?:[^"r\]|\.|(?:(?:rn)?[ t]))*"(?:(?:rn)?[ t])*)(?:.(?:(?:
rn)?[ t])*(?:[^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)?[ t])+|Z|(?=[[
"()<>@,;:\".[]]))|"(?:[^"r\]|\.|(?:(?:rn)?[ t]))*"(?:(?:rn)?[ t])
*))*@(?:(?:rn)?[ t])*(?:[^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)?[ t])
+|Z|(?=[["()<>@,;:\".[]]))|[([^[]r\]|\.)*](?:(?:rn)?[ t])*)(?:
.(?:(?:rn)?[ t])*(?:[^()<>@,;:\".[] 00-31]+(?:(?:(?:rn)?[ t])+|Z
|(?=[["()<>@,;:\".[]]))|[([^[]r\]|\.)*](?:(?:rn)?[ t])*))*>(?:(
?:rn)?[ t])*))*)?;s*)

1 comment

  1. Makes me wonder, how in the world did they come up with this? Probably drawing the Finite Automata first is a better approach…

Leave a Reply