Index: modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro
===================================================================
--- modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro	(revision 0)
+++ modules/analysis/common/src/java/org/apache/lucene/analysis/standard/ASCIITLD.jflex-macro	(revision 0)
@@ -0,0 +1,310 @@
+/*
+ * Copyright 2001-2005 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Generated from IANA Root Zone Database <http://www.iana.org/domains/root/db/>
+// on Thu May 27 06:01:28 GMT 2010
+// by org.apache.lucene:lucene-buildhelper-maven-plugin:generate-jflex-tld-macros
+
+ASCIITLD = \\. (
+	  [aA][cC]   // .AC country-code Ascension Island Network Information Center (AC Domain Registry) c/o Cable and Wireless (Ascension Island)
+	| [aA][dD]   // .AD country-code Andorra Andorra Telecom
+	| [aA][eE]   // .AE country-code United Arab Emirates Telecommunication Regulatory Authority (TRA)
+	| [aA][eE][rR][oO]   // .AERO sponsored Reserved for members of the air-transport industry Societe Internationale de Telecommunications Aeronautique (SITA INC USA)
+	| [aA][fF]   // .AF country-code Afghanistan Ministry of Communications and IT
+	| [aA][gG]   // .AG country-code Antigua and Barbuda UHSA School of Medicine
+	| [aA][iI]   // .AI country-code Anguilla Government of Anguilla
+	| [aA][lL]   // .AL country-code Albania Electronic and Postal Communications Authority - AKEP
+	| [aA][mM]   // .AM country-code Armenia Internet Society
+	| [aA][nN]   // .AN country-code Netherlands Antilles University of The Netherlands Antilles
+	| [aA][oO]   // .AO country-code Angola Faculdade de Engenharia da Universidade Agostinho Neto
+	| [aA][qQ]   // .AQ country-code Antarctica Mott and Associates
+	| [aA][rR]   // .AR country-code Argentina MRECIC (Ministerio de Relaciones Exteriores, Comercio Internacional y Culto)
+	| [aA][rR][pP][aA]   // .ARPA infrastructure Reserved exclusively to support operationally-critical infrastructural identifier spaces as advised by the Internet Architecture Board Internet Assigned Numbers Authority
+	| [aA][sS]   // .AS country-code American Samoa AS Domain Registry
+	| [aA][sS][iI][aA]   // .ASIA sponsored Restricted to the Pan-Asia and Asia Pacific community DotAsia Organisation Ltd.
+	| [aA][tT]   // .AT country-code Austria NIC.AT Internet Verwaltungs und Betriebsgesellschaft m.b.H
+	| [aA][uU]   // .AU country-code Australia .au Domain Administration (auDA)
+	| [aA][wW]   // .AW country-code Aruba SETAR
+	| [aA][xX]   // .AX country-code Aland Islands Ålands landskapsregering
+	| [aA][zZ]   // .AZ country-code Azerbaijan IntraNS
+	| [bB][aA]   // .BA country-code Bosnia and Herzegovina Universtiy Telinformatic Centre (UTIC)
+	| [bB][bB]   // .BB country-code Barbados Government of Barbados Ministry of Economic Affairs and Development Telecommunications Unit
+	| [bB][dD]   // .BD country-code Bangladesh Ministry of Post &amp; Telecommunications Bangladesh Secretariate
+	| [bB][eE]   // .BE country-code Belgium DNS BE vzw/asbl
+	| [bB][fF]   // .BF country-code Burkina Faso DELGI Delegational Generale Informatique
+	| [bB][gG]   // .BG country-code Bulgaria Register.BG
+	| [bB][hH]   // .BH country-code Bahrain BATELCO
+	| [bB][iI]   // .BI country-code Burundi Centre National de l'Informatique
+	| [bB][iI][zZ]   // .BIZ generic-restricted Restricted for Business NeuStar, Inc.
+	| [bB][jJ]   // .BJ country-code Benin Offices des Postes et Telecommunications
+	| [bB][lL]   // .BL country-code Saint Barthelemy Not assigned
+	| [bB][mM]   // .BM country-code Bermuda Registry General Ministry of Labour and Immigration
+	| [bB][nN]   // .BN country-code Brunei Darussalam Jabatan Telekom Brunei
+	| [bB][oO]   // .BO country-code Bolivia Agencia para el Desarrollo de la Información de la Sociedad en Bolivia
+	| [bB][rR]   // .BR country-code Brazil Comite Gestor da Internet no Brasil
+	| [bB][sS]   // .BS country-code Bahamas The College of the Bahamas
+	| [bB][tT]   // .BT country-code Bhutan Ministry of Information and Communications
+	| [bB][vV]   // .BV country-code Bouvet Island UNINETT Norid A/S
+	| [bB][wW]   // .BW country-code Botswana University of Botswana
+	| [bB][yY]   // .BY country-code Belarus Open Contact Ltd.
+	| [bB][zZ]   // .BZ country-code Belize University of Belize
+	| [cC][aA]   // .CA country-code Canada Canadian Internet Registration Authority (CIRA) Autorite Canadienne pour les Enregistrements Internet (ACEI)
+	| [cC][aA][tT]   // .CAT sponsored Reserved for the Catalan linguistic and cultural community Fundacio puntCAT
+	| [cC][cC]   // .CC country-code Cocos (Keeling) Islands eNIC Cocos (Keeling) Islands Ltd. Pty, d/b/a Island Internet Services
+	| [cC][dD]   // .CD country-code Congo, The Democratic Republic of the NIC Congo - Interpoint SARL
+	| [cC][fF]   // .CF country-code Central African Republic Societe Centrafricaine de Telecommunications (SOCATEL)
+	| [cC][gG]   // .CG country-code Congo ONPT Congo and Interpoint Switzerland
+	| [cC][hH]   // .CH country-code Switzerland SWITCH The Swiss Education & Research Network
+	| [cC][iI]   // .CI country-code Cote d'Ivoire INP-HB Institut National Polytechnique Felix Houphouet Boigny
+	| [cC][kK]   // .CK country-code Cook Islands Telecom Cook Islands Ltd.
+	| [cC][lL]   // .CL country-code Chile NIC Chile (University of Chile)
+	| [cC][mM]   // .CM country-code Cameroon Cameroon Telecommunications (CAMTEL)
+	| [cC][nN]   // .CN country-code China Chinese Academy of Sciences The Computer Network Center
+	| [cC][oO]   // .CO country-code Colombia .CO Internet S.A.S.
+	| [cC][oO][mM]   // .COM generic Generic top-level domain VeriSign Global Registry Services
+	| [cC][oO][oO][pP]   // .COOP sponsored Reserved for cooperative associations DotCooperation LLC
+	| [cC][rR]   // .CR country-code Costa Rica National Academy of Sciences Academia Nacional de Ciencias
+	| [cC][uU]   // .CU country-code Cuba CENIAInternet Industria y San Jose Capitolio Nacional
+	| [cC][vV]   // .CV country-code Cape Verde Agência Nacional das Comunicações (ANAC)
+	| [cC][xX]   // .CX country-code Christmas Island Christmas Island Internet Administration Limited
+	| [cC][yY]   // .CY country-code Cyprus University of Cyprus
+	| [cC][zZ]   // .CZ country-code Czech Republic CZ.NIC, z.s.p.o
+	| [dD][eE]   // .DE country-code Germany DENIC eG
+	| [dD][jJ]   // .DJ country-code Djibouti Djibouti Telecom S.A
+	| [dD][kK]   // .DK country-code Denmark Dansk Internet Forum
+	| [dD][mM]   // .DM country-code Dominica DotDM Corporation
+	| [dD][oO]   // .DO country-code Dominican Republic Pontificia Universidad Catolica Madre y Maestra Recinto Santo Tomas de Aquino
+	| [dD][zZ]   // .DZ country-code Algeria CERIST
+	| [eE][cC]   // .EC country-code Ecuador NIC.EC (NICEC) S.A.
+	| [eE][dD][uU]   // .EDU sponsored Reserved for post-secondary institutions accredited by an agency on the U.S. Department of Education's list of Nationally Recognized Accrediting Agencies EDUCAUSE
+	| [eE][eE]   // .EE country-code Estonia National Institute of Chemical Physics and Biophysics
+	| [eE][gG]   // .EG country-code Egypt Egyptian Universities Network (EUN) Supreme Council of Universities
+	| [eE][hH]   // .EH country-code Western Sahara Not assigned
+	| [eE][rR]   // .ER country-code Eritrea Eritrea Telecommunication Services Corporation (EriTel)
+	| [eE][sS]   // .ES country-code Spain Red.es
+	| [eE][tT]   // .ET country-code Ethiopia Ethiopian Telecommunications Corporation
+	| [eE][uU]   // .EU country-code European Union EURid vzw/asbl
+	| [fF][iI]   // .FI country-code Finland Finnish Communications Regulatory Authority
+	| [fF][jJ]   // .FJ country-code Fiji The University of the South Pacific IT Services
+	| [fF][kK]   // .FK country-code Falkland Islands (Malvinas) Falkland Islands Government
+	| [fF][mM]   // .FM country-code Micronesia, Federated States of FSM Telecommunications Corporation
+	| [fF][oO]   // .FO country-code Faroe Islands FO Council
+	| [fF][rR]   // .FR country-code France AFNIC (NIC France) - Immeuble International
+	| [gG][aA]   // .GA country-code Gabon Gabon Telecom
+	| [gG][bB]   // .GB country-code United Kingdom Reserved Domain - IANA
+	| [gG][dD]   // .GD country-code Grenada The National Telecommunications Regulatory Commission (NTRC)
+	| [gG][eE]   // .GE country-code Georgia Caucasus Online
+	| [gG][fF]   // .GF country-code French Guiana Net Plus
+	| [gG][gG]   // .GG country-code Guernsey Island Networks Ltd.
+	| [gG][hH]   // .GH country-code Ghana Network Computer Systems Limited
+	| [gG][iI]   // .GI country-code Gibraltar Sapphire Networks
+	| [gG][lL]   // .GL country-code Greenland TELE Greenland A/S
+	| [gG][mM]   // .GM country-code Gambia GM-NIC
+	| [gG][nN]   // .GN country-code Guinea Centre National des Sciences Halieutiques de Boussoura
+	| [gG][oO][vV]   // .GOV sponsored Reserved exclusively for the United States Government General Services Administration Attn: QTDC, 2E08 (.gov Domain Registration)
+	| [gG][pP]   // .GP country-code Guadeloupe Networking Technologies Group
+	| [gG][qQ]   // .GQ country-code Equatorial Guinea GETESA
+	| [gG][rR]   // .GR country-code Greece ICS-FORTH GR
+	| [gG][sS]   // .GS country-code South Georgia and the South Sandwich Islands Government of South Georgia and South Sandwich Islands (GSGSSI)
+	| [gG][tT]   // .GT country-code Guatemala Universidad del Valle de Guatemala
+	| [gG][uU]   // .GU country-code Guam University of Guam Computer Center
+	| [gG][wW]   // .GW country-code Guinea-Bissau Fundação IT & MEDIA Universidade de Bissao
+	| [gG][yY]   // .GY country-code Guyana University of Guyana
+	| [hH][kK]   // .HK country-code Hong Kong Hong Kong Internet Registration Corporation Ltd.
+	| [hH][mM]   // .HM country-code Heard Island and McDonald Islands HM Domain Registry
+	| [hH][nN]   // .HN country-code Honduras Red de Desarrollo Sostenible Honduras
+	| [hH][rR]   // .HR country-code Croatia CARNet - Croatian Academic and Research Network
+	| [hH][tT]   // .HT country-code Haiti Consortium FDS/RDDH
+	| [hH][uU]   // .HU country-code Hungary Council of Hungarian Internet Providers (CHIP)
+	| [iI][dD]   // .ID country-code Indonesia IDNIC-PPAU Mikroelektronika
+	| [iI][eE]   // .IE country-code Ireland University College Dublin Computing Services Computer Centre
+	| [iI][lL]   // .IL country-code Israel Internet Society of Israel
+	| [iI][mM]   // .IM country-code Isle of Man Isle of Man Government
+	| [iI][nN]   // .IN country-code India National Internet Exchange of India
+	| [iI][nN][fF][oO]   // .INFO generic Generic top-level domain Afilias Limited
+	| [iI][nN][tT]   // .INT sponsored Used only for registering organizations established by international treaties between governments Internet Assigned Numbers Authority
+	| [iI][oO]   // .IO country-code British Indian Ocean Territory IO Top Level Domain Registry Cable and Wireless
+	| [iI][qQ]   // .IQ country-code Iraq Communications and Media Commission (CMC)
+	| [iI][rR]   // .IR country-code Iran, Islamic Republic of Institute for Studies in Theoretical Physics & Mathematics (IPM)
+	| [iI][sS]   // .IS country-code Iceland ISNIC - Internet Iceland ltd.
+	| [iI][tT]   // .IT country-code Italy IIT - CNR
+	| [jJ][eE]   // .JE country-code Jersey Island Networks (Jersey) Ltd.
+	| [jJ][mM]   // .JM country-code Jamaica University of West Indies
+	| [jJ][oO]   // .JO country-code Jordan National Information Technology Centre
+	| [jJ][oO][bB][sS]   // .JOBS sponsored Reserved for human resource managers Employ Media LLC
+	| [jJ][pP]   // .JP country-code Japan Japan Registry Services Co., Ltd.
+	| [kK][eE]   // .KE country-code Kenya Kenya Network Information Center (KeNIC)
+	| [kK][gG]   // .KG country-code Kyrgyzstan AsiaInfo Telecommunication Enterprise
+	| [kK][hH]   // .KH country-code Cambodia Ministry of Post and Telecommunications
+	| [kK][iI]   // .KI country-code Kiribati Ministry of Communications, Transport, and Tourism Development
+	| [kK][mM]   // .KM country-code Comoros Comores Telecom
+	| [kK][nN]   // .KN country-code Saint Kitts and Nevis Ministry of Finance, Sustainable Development Information & Technology
+	| [kK][pP]   // .KP country-code Korea, Democratic People's Republic of Korea Computer Center
+	| [kK][rR]   // .KR country-code Korea, Republic of Korea Internet & Security Agency (KISA)
+	| [kK][wW]   // .KW country-code Kuwait Ministry of Communications
+	| [kK][yY]   // .KY country-code Cayman Islands The Information and Communications Technology Authority
+	| [kK][zZ]   // .KZ country-code Kazakhstan Association of IT Companies of Kazakhstan
+	| [lL][aA]   // .LA country-code Lao People's Democratic Republic Lao National Internet Committee (LANIC) Science Technology and Environment Agency
+	| [lL][bB]   // .LB country-code Lebanon American University of Beirut Computing and Networking Services
+	| [lL][cC]   // .LC country-code Saint Lucia University of Puerto Rico
+	| [lL][iI]   // .LI country-code Liechtenstein Hochschule Liechtenstein
+	| [lL][kK]   // .LK country-code Sri Lanka Council for Information Technology LK Domain Registrar
+	| [lL][rR]   // .LR country-code Liberia Data Technology Solutions, Inc.
+	| [lL][sS]   // .LS country-code Lesotho National University of Lesotho
+	| [lL][tT]   // .LT country-code Lithuania Kaunas University of Technology Information Technology Development Institute
+	| [lL][uU]   // .LU country-code Luxembourg RESTENA
+	| [lL][vV]   // .LV country-code Latvia University of Latvia Institute of Mathematics and Computer Science Department of Network Solutions (DNS)
+	| [lL][yY]   // .LY country-code Libyan Arab Jamahiriya General Post and Telecommunication Company
+	| [mM][aA]   // .MA country-code Morocco Agence Nationale de Réglementation des Télécommunications (ANRT)
+	| [mM][cC]   // .MC country-code Monaco Gouvernement de Monaco Direction des Telecommunications
+	| [mM][dD]   // .MD country-code Moldova, Republic of MoldData S.E.
+	| [mM][eE]   // .ME country-code Montenegro Government of Montenegro
+	| [mM][fF]   // .MF country-code Saint Martin Not assigned
+	| [mM][gG]   // .MG country-code Madagascar NIC-MG (Network Information Center Madagascar)
+	| [mM][hH]   // .MH country-code Marshall Islands Cabinet Office
+	| [mM][iI][lL]   // .MIL sponsored Reserved exclusively for the United States Military DoD Network Information Center
+	| [mM][kK]   // .MK country-code Macedonia, The Former Yugoslav Republic of Ministry of Foreign Relations
+	| [mM][lL]   // .ML country-code Mali SOTELMA
+	| [mM][mM]   // .MM country-code Myanmar Ministry of Communications, Posts & Telegraphs
+	| [mM][nN]   // .MN country-code Mongolia Datacom Co., Ltd.
+	| [mM][oO]   // .MO country-code Macao University of Macau
+	| [mM][oO][bB][iI]   // .MOBI sponsored Reserved for consumers and providers of mobile products and services mTLD Top Level Domain Limited dba dotMobi
+	| [mM][pP]   // .MP country-code Northern Mariana Islands Saipan Datacom, Inc.
+	| [mM][qQ]   // .MQ country-code Martinique SYSTEL
+	| [mM][rR]   // .MR country-code Mauritania University of Nouakchott
+	| [mM][sS]   // .MS country-code Montserrat MNI Networks Ltd.
+	| [mM][tT]   // .MT country-code Malta NIC (Malta)
+	| [mM][uU]   // .MU country-code Mauritius Internet Direct Ltd
+	| [mM][uU][sS][eE][uU][mM]   // .MUSEUM sponsored Reserved for museums Museum Domain Management Association
+	| [mM][vV]   // .MV country-code Maldives Dhiraagu Pvt. Ltd. (DHIVEHINET)
+	| [mM][wW]   // .MW country-code Malawi Malawi Sustainable Development Network Programme (Malawi SDNP)
+	| [mM][xX]   // .MX country-code Mexico NIC-Mexico ITESM - Campus Monterrey
+	| [mM][yY]   // .MY country-code Malaysia MYNIC Berhad
+	| [mM][zZ]   // .MZ country-code Mozambique Centro de Informatica de Universidade Eduardo Mondlane
+	| [nN][aA]   // .NA country-code Namibia Namibian Network Information Center
+	| [nN][aA][mM][eE]   // .NAME generic-restricted Reserved for individuals The Global Name Registry Ltd.
+	| [nN][cC]   // .NC country-code New Caledonia Office des Postes et Telecommunications
+	| [nN][eE]   // .NE country-code Niger SONITEL
+	| [nN][eE][tT]   // .NET generic Generic top-level domain VeriSign Global Registry Services
+	| [nN][fF]   // .NF country-code Norfolk Island Norfolk Island Data Services
+	| [nN][gG]   // .NG country-code Nigeria Nigeria Internet Registration Association
+	| [nN][iI]   // .NI country-code Nicaragua Universidad Nacional del Ingernieria Centro de Computo
+	| [nN][lL]   // .NL country-code Netherlands Stichting Internet Domeinregistratie Nederland
+	| [nN][oO]   // .NO country-code Norway UNINETT Norid A/S
+	| [nN][pP]   // .NP country-code Nepal Mercantile Communications Pvt. Ltd.
+	| [nN][rR]   // .NR country-code Nauru CENPAC NET
+	| [nN][uU]   // .NU country-code Niue Internet Users Society - Niue
+	| [nN][zZ]   // .NZ country-code New Zealand InternetNZ
+	| [oO][mM]   // .OM country-code Oman Oman Telecommunications Company
+	| [oO][rR][gG]   // .ORG generic Generic top-level domain Public Interest Registry (PIR)
+	| [pP][aA]   // .PA country-code Panama Universidad Tecnologica de Panama Panamanian Academic National Network
+	| [pP][eE]   // .PE country-code Peru Red Cientifica Peruana
+	| [pP][fF]   // .PF country-code French Polynesia Ministère des Postes et Télécommunications et des sports, chargé des nouvelles technologies de l'information
+	| [pP][gG]   // .PG country-code Papua New Guinea PNG DNS Administration Vice Chancellors Office The Papua New Guinea University of Technology
+	| [pP][hH]   // .PH country-code Philippines PH Domain Foundation
+	| [pP][kK]   // .PK country-code Pakistan PKNIC
+	| [pP][lL]   // .PL country-code Poland Research and Academic Computer Network
+	| [pP][mM]   // .PM country-code Saint Pierre and Miquelon AFNIC (NIC France) - Immeuble International
+	| [pP][nN]   // .PN country-code Pitcairn Pitcairn Island Administration
+	| [pP][rR]   // .PR country-code Puerto Rico Gauss Research Laboratory Inc.
+	| [pP][rR][oO]   // .PRO generic-restricted Restricted to credentialed professionals and related entities Registry Services Corporation dba RegistryPro
+	| [pP][sS]   // .PS country-code Palestinian Territory, Occupied Ministry Of Telecommunications &amp; Information Technology, Government Computer Center.
+	| [pP][tT]   // .PT country-code Portugal Fundacao para a Computacao Cientifica Nacional
+	| [pP][wW]   // .PW country-code Palau Micronesia Investment and Development Corporation
+	| [pP][yY]   // .PY country-code Paraguay NIC-PY
+	| [qQ][aA]   // .QA country-code Qatar Qatar Telecom (Q-Tel) Q.S.C.
+	| [rR][eE]   // .RE country-code Reunion AFNIC (NIC France) - Immeuble International
+	| [rR][oO]   // .RO country-code Romania National Institute for R&D in Informatics
+	| [rR][sS]   // .RS country-code Serbia Serbian National Register of Internet Domain Names (RNIDS)
+	| [rR][uU]   // .RU country-code Russian Federation Coordination Center for TLD RU
+	| [rR][wW]   // .RW country-code Rwanda NIC Congo - Interpoint SARL
+	| [sS][aA]   // .SA country-code Saudi Arabia Communications and Information Technology Commission
+	| [sS][bB]   // .SB country-code Solomon Islands Solomon Telekom Company Limited
+	| [sS][cC]   // .SC country-code Seychelles VCS Pty Ltd
+	| [sS][dD]   // .SD country-code Sudan Sudan Internet Society
+	| [sS][eE]   // .SE country-code Sweden The Internet Infrastructure Foundation
+	| [sS][gG]   // .SG country-code Singapore Singapore Network Information Centre (SGNIC) Pte Ltd
+	| [sS][hH]   // .SH country-code Saint Helena, Ascension and Tristan da Cunha Government of St. Helena
+	| [sS][iI]   // .SI country-code Slovenia Academic and Research Network of Slovenia (ARNES)
+	| [sS][jJ]   // .SJ country-code Svalbard and Jan Mayen UNINETT Norid A/S
+	| [sS][kK]   // .SK country-code Slovakia SK-NIC, a.s.
+	| [sS][lL]   // .SL country-code Sierra Leone Sierratel
+	| [sS][mM]   // .SM country-code San Marino Telecom Italia San Marino S.p.A.
+	| [sS][nN]   // .SN country-code Senegal Universite Cheikh Anta Diop NIC Senegal
+	| [sS][oO]   // .SO country-code Somalia Ministry of Post and Telecommunications
+	| [sS][rR]   // .SR country-code Suriname Telesur
+	| [sS][tT]   // .ST country-code Sao Tome and Principe Tecnisys
+	| [sS][uU]   // .SU country-code Soviet Union (being phased out) Russian Institute for Development of Public Networks (ROSNIIROS)
+	| [sS][vV]   // .SV country-code El Salvador SVNet Col. Medica Dr. Emilio Alvarez
+	| [sS][yY]   // .SY country-code Syrian Arab Republic Syrian Telecommunications Establishment (STE)
+	| [sS][zZ]   // .SZ country-code Swaziland University of Swaziland Department of Computer Science
+	| [tT][cC]   // .TC country-code Turks and Caicos Islands Melrex TC
+	| [tT][dD]   // .TD country-code Chad Société des télécommunications du Tchad (SOTEL TCHAD)
+	| [tT][eE][lL]   // .TEL sponsored Reserved for businesses and individuals to publish their contact data Telnic Ltd.
+	| [tT][fF]   // .TF country-code French Southern Territories AFNIC (NIC France) - Immeuble International
+	| [tT][gG]   // .TG country-code Togo Cafe Informatique et Telecommunications
+	| [tT][hH]   // .TH country-code Thailand Thai Network Information Center Foundation
+	| [tT][jJ]   // .TJ country-code Tajikistan Information Technology Center
+	| [tT][kK]   // .TK country-code Tokelau Telecommunication Tokelau Corporation (Teletok)
+	| [tT][lL]   // .TL country-code Timor-Leste Ministry of Infrastructure Information and Technology Division
+	| [tT][mM]   // .TM country-code Turkmenistan TM Domain Registry Ltd
+	| [tT][nN]   // .TN country-code Tunisia Agence Tunisienne d'Internet
+	| [tT][oO]   // .TO country-code Tonga Government of the Kingdom of Tonga H.R.H. Crown Prince Tupouto'a c/o Consulate of Tonga
+	| [tT][pP]   // .TP country-code Portuguese Timor (being phased out) -
+	| [tT][rR]   // .TR country-code Turkey Middle East Technical University Department of Computer Engineering
+	| [tT][rR][aA][vV][eE][lL]   // .TRAVEL sponsored Reserved for entities whose primary area of activity is in the travel industry Tralliance Registry Management Company, LLC.
+	| [tT][tT]   // .TT country-code Trinidad and Tobago University of the West Indies Faculty of Engineering
+	| [tT][vV]   // .TV country-code Tuvalu Ministry of Finance and Tourism
+	| [tT][wW]   // .TW country-code Taiwan Taiwan Network Information Center (TWNIC)
+	| [tT][zZ]   // .TZ country-code Tanzania, United Republic of Tanzania Network Information Centre (tzNIC)
+	| [uU][aA]   // .UA country-code Ukraine Communication Systems Ltd
+	| [uU][gG]   // .UG country-code Uganda Uganda Online Ltd.
+	| [uU][kK]   // .UK country-code United Kingdom Nominet UK
+	| [uU][mM]   // .UM country-code United States Minor Outlying Islands Not assigned
+	| [uU][sS]   // .US country-code United States NeuStar, Inc.
+	| [uU][yY]   // .UY country-code Uruguay SeCIU - Universidad de la Republica
+	| [uU][zZ]   // .UZ country-code Uzbekistan Computerization and Information Technologies Developing Center UZINFOCOM
+	| [vV][aA]   // .VA country-code Holy See (Vatican City State) Holy See Secretariat of State Internet Office of the Holy See
+	| [vV][cC]   // .VC country-code Saint Vincent and the Grenadines Ministry of Telecommunications, Science, Technology and Industry
+	| [vV][eE]   // .VE country-code Venezuela, Bolivarian Republic of Centro Nacional de Tecnologias de Informacion
+	| [vV][gG]   // .VG country-code Virgin Islands, British Pinebrook Developments Ltd
+	| [vV][iI]   // .VI country-code Virgin Islands, U.S. Virgin Islands Public Telcommunications System c/o COBEX Internet Services
+	| [vV][nN]   // .VN country-code Viet Nam Ministry of Information and Communications of Socialist Republic of Viet Nam
+	| [vV][uU]   // .VU country-code Vanuatu Telecom Vanuatu Limited
+	| [wW][fF]   // .WF country-code Wallis and Futuna AFNIC (NIC France) - Immeuble International
+	| [wW][sS]   // .WS country-code Samoa Government of Samoa Ministry of Foreign Affairs
+	| [xX][nN]--0[zZ][wW][mM]56[dD]   // .测试 test:zh-Hans test Reserved for testing internationalised domain names Internet Assigned Numbers Authority
+	| [xX][nN]--11[bB]5[bB][sS]3[aA]9[aA][jJ]6[gG]   // .परीक्षा test:hi-Deva test Reserved for testing internationalised domain names Internet Assigned Numbers Authority
+	| [xX][nN]--80[aA][kK][hH][bB][yY][kK][nN][jJ]4[fF]   // .испытание test:ru-Cyrl test Reserved for testing internationalised domain names Internet Assigned Numbers Authority
+	| [xX][nN]--9[tT]4[bB]11[yY][iI]5[aA]   // .테스트 test:ko-Hang test Reserved for testing internationalised domain names Internet Assigned Numbers Authority
+	| [xX][nN]--[dD][eE][bB][aA]0[aA][dD]   // .טעסט test:yi-Hebr test Reserved for testing internationalised domain names Internet Assigned Numbers Authority
+	| [xX][nN]--[gG]6[wW]251[dD]   // .測試 test:zh-Hant test Reserved for testing internationalised domain names Internet Assigned Numbers Authority
+	| [xX][nN]--[hH][gG][bB][kK]6[aA][jJ]7[fF]53[bB][bB][aA]   // .آزمایشی test:fa-Arab test Reserved for testing internationalised domain names Internet Assigned Numbers Authority
+	| [xX][nN]--[hH][lL][cC][jJ]6[aA][yY][aA]9[eE][sS][cC]7[aA]   // .பரிட்சை test:ta-Taml test Reserved for testing internationalised domain names Internet Assigned Numbers Authority
+	| [xX][nN]--[jJ][xX][aA][lL][pP][dD][lL][pP]   // .δοκιμή test:el-Grek test Reserved for testing internationalised domain names Internet Assigned Numbers Authority
+	| [xX][nN]--[kK][gG][bB][eE][cC][hH][tT][vV]   // .إختبار test:ar-Arab test Reserved for testing internationalised domain names Internet Assigned Numbers Authority
+	| [xX][nN]--[mM][gG][bB][aA][aA][mM]7[aA]8[hH]   // .امارات emarat:ar-Arab country-code United Arab Emirates Telecommunications Regulatory Authority (TRA)
+	| [xX][nN]--[mM][gG][bB][eE][rR][pP]4[aA]5[dD]4[aA][rR]   // .السعودية alsaudiah:ar-Arab country-code Saudi Arabia Communications and Information Technology Commission
+	| [xX][nN]--[pP]1[aA][iI]   // .рф rf:ru-Cyrl country-code Russian Federation Coordination Center for TLD RU
+	| [xX][nN]--[wW][gG][bB][hH]1[cC]   // .مصر misr:ar-Arab country-code Egypt National Telecommunication Regulatory Authority - NTRA
+	| [xX][nN]--[zZ][cC][kK][zZ][aA][hH]   // .テスト test:ja-Kana test Reserved for testing internationalised domain names Internet Assigned Numbers Authority
+	| [yY][eE]   // .YE country-code Yemen TeleYemen
+	| [yY][tT]   // .YT country-code Mayotte AFNIC (NIC France) - Immeuble International
+	| [zZ][aA]   // .ZA country-code South Africa ZA Domain Name Authority
+	| [zZ][mM]   // .ZM country-code Zambia ZAMNET Communication Systems Ltd.
+	| [zZ][wW]   // .ZW country-code Zimbabwe Postal and Telecommunications Regulatory Authority of Zimbabwe (POTRAZ)
+	) \\.?   // Accept trailing root (empty) domain
+
Index: lucene-buildhelper-maven-plugin/src/main/java/org/apache/lucene/mojo/GenerateJflexTLDMacrosMojo.java
===================================================================
--- lucene-buildhelper-maven-plugin/src/main/java/org/apache/lucene/mojo/GenerateJflexTLDMacrosMojo.java	(revision 0)
+++ lucene-buildhelper-maven-plugin/src/main/java/org/apache/lucene/mojo/GenerateJflexTLDMacrosMojo.java	(revision 0)
@@ -0,0 +1,277 @@
+package org.apache.lucene.mojo;
+
+/*
+ * Copyright 2001-2005 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.http.client.HttpClient;
+import org.apache.http.client.methods.HttpGet;
+import org.apache.http.impl.client.BasicResponseHandler;
+import org.apache.http.impl.client.DefaultHttpClient;
+import org.apache.maven.plugin.AbstractMojo;
+import org.apache.maven.plugin.MojoExecutionException;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.util.Calendar;
+import java.util.GregorianCalendar;
+import java.util.SortedMap;
+import java.util.TimeZone;
+import java.util.TreeMap;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * Generates a file containing JFlex macros to accept valid ASCII TLDs 
+ * (top level domains), for inclusion in JFlex grammars that can accept 
+ * domain names.
+ * <p/> 
+ * The IANA Root Zone Database is queried via HTTP, the response is parsed, and
+ * the results are written out to a file containing a JFlex macro that will 
+ * accept all valid ASCII-only TLDs, including punycode forms of 
+ * internationalized TLDs.
+ * 
+ * @goal generate-jflex-tld-macros
+ * @phase generate-sources
+ * @requiresProject false
+ */
+public class GenerateJflexTLDMacrosMojo extends AbstractMojo {
+  
+  private static final String IANA_ROOT_ZONE_DB_URL
+    = "http://www.iana.org/domains/root/db/";
+  private static final String OUTPUT_FILENAME = "ASCIITLD.jflex-macro";
+  private static final String APACHE_LICENSE 
+    = "/*\n"
+      + " * Copyright 2001-2005 The Apache Software Foundation.\n"
+      + " *\n"
+      + " * Licensed under the Apache License, Version 2.0 (the \"License\");\n"
+      + " * you may not use this file except in compliance with the License.\n"
+      + " * You may obtain a copy of the License at\n"
+      + " *\n"
+      + " *      http://www.apache.org/licenses/LICENSE-2.0\n"
+      + " *\n"
+      + " * Unless required by applicable law or agreed to in writing, software\n"
+      + " * distributed under the License is distributed on an \"AS IS\" BASIS,\n"
+      + " * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n"
+      + " * See the License for the specific language governing permissions and\n"
+      + " * limitations under the License.\n"
+      + " */\n\n";
+
+  // Example entries from the IANA Root Zone Database 
+  // (whitespace/indentation added for clarity):
+  //
+  // <tr class="iana-group-23 iana-type-1">
+  //   <td><a href="/domains/root/db/ws.html">.WS</a></td>
+  //   <td>country-code</td>
+  //   <td>Samoa<br/>
+  //     <span class="tld-table-so">
+  //       <line>Government of Samoa</line><br/>
+  //       <line>Ministry of Foreign Affairs</line>
+  //     </span>
+  //   </td>
+  // </tr>
+  // <tr class="iana-group-24 iana-type-2 iana-type-3">
+  //   <td>
+  //     <a href="/domains/root/db/xn--0zwm56d.html">.&#27979;&#35797;<br/>
+  //       <span class="tld-table-so">test:zh-Hans</span>
+  //     </a>
+  //   </td>
+  //   <td>test</td>
+  //   <td>Reserved for testing internationalised domain names<br/>
+  //     <span class="tld-table-so">Internet Assigned Numbers Authority</span>
+  //   </td>
+  // </tr>
+  private Pattern TLD_PATTERN = Pattern.compile
+    ("<td[^<]*>\\s*<a\\s+href\\s*=\\s*\"/domains/root/db/([^.]+)\\.html\"\\s*>"
+     +"\\s*(.*?)</tr>", Pattern.CASE_INSENSITIVE | Pattern.DOTALL);
+  private Pattern XML_TAG_PATTERN = Pattern.compile("</?[^>]+>");
+  private Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
+  private Pattern DECIMAL_CHARACTER_REFERENCE_PATTERN
+    = Pattern.compile("&#(\\d+);");
+  private Pattern HEXADECIMAL_CHARACTER_REFERENCE_PATTERN 
+    = Pattern.compile("&#x([\\dA-F]+);", Pattern.CASE_INSENSITIVE);
+  
+  private Calendar calendar
+    = GregorianCalendar.getInstance(TimeZone.getTimeZone("GMT"));
+  private SortedMap<String,String> ASCIITLDs = new TreeMap<String,String>();
+  private String downloadDateTime;
+  
+  /**
+   * Location of the output directory for JFlex Macro definition files.
+   * @parameter expression="src/java/org/apache/lucene/analysis/standard"
+   */
+  private File outputDirectory;
+  
+
+
+  public void execute() throws MojoExecutionException {
+
+    extractASCIITLDs(getIANARootZoneDatabase());
+    writeOutput();
+    }
+
+  /**
+   * Downloads the IANA Root Zone Database from {@link #IANA_ROOT_ZONE_DB_URL}.
+   * 
+   * @return the text of the HTML page containing the IANA Root Zone Database.
+   * @throws MojoExecutionException if an {@link IOException} is thrown during
+   *  the process of downloading the IANA Root Zone Database.
+   */
+  private String getIANARootZoneDatabase() throws MojoExecutionException {
+    HttpGet httpget = new HttpGet(IANA_ROOT_ZONE_DB_URL);
+    httpget.addHeader("Cache-Control", "no-cache");
+    HttpClient httpclient = new DefaultHttpClient();
+    String responseBody;
+    downloadDateTime = String.format("%tc", calendar);
+    try {
+      responseBody = httpclient.execute(httpget, new BasicResponseHandler());
+    } catch (IOException e) {
+      throw new MojoExecutionException("Error downloading IANA Root Zone DB", e);
+    } finally {
+      httpclient.getConnectionManager().shutdown();
+    }
+    return responseBody;
+  }
+
+  /**
+   * Extracts ASCII TLDs and associated descriptions
+   * from the passed-in HTML page containing the IANA Root Zone Database,
+   * populating {@link #ASCIITLDs}.
+   * 
+   * @param IANADB HTML page containing the IANA Root Zone Database
+   */
+  private void extractASCIITLDs(String IANADB) {
+    Matcher matcher = TLD_PATTERN.matcher(IANADB);
+    while (matcher.find()) {
+      String ASCIITLD = matcher.group(1).toLowerCase();
+      String description = normalizeDescription(matcher.group(2));
+      ASCIITLDs.put(ASCIITLD, description); 
+    }
+  }
+
+  /**
+   * Strips XML tags, converts numeric character references into the 
+   * corresponding characters, and collapses whitespace.
+   * 
+   * @param description The description to normalize
+   * @return the normalized description
+   */
+  private String normalizeDescription(String description) {
+    StringBuffer buf = new StringBuffer();
+    Matcher matcher = XML_TAG_PATTERN.matcher(description);
+    while (matcher.find()) {
+      matcher.appendReplacement(buf, " ");
+    }
+    matcher.appendTail(buf);
+    matcher = DECIMAL_CHARACTER_REFERENCE_PATTERN.matcher(buf.toString());
+    buf.setLength(0);
+    while (matcher.find()) {
+      int charRef = Integer.parseInt(matcher.group(1));
+      matcher.appendReplacement(buf, new String(new int[]{ charRef }, 0, 1));
+    }
+    matcher.appendTail(buf);
+    matcher = HEXADECIMAL_CHARACTER_REFERENCE_PATTERN.matcher(buf.toString());
+    buf.setLength(0);
+    while (matcher.find()) {
+      int charRef = Integer.parseInt(matcher.group(1), 16);
+      matcher.appendReplacement(buf, new String(new int[]{ charRef }, 0, 1));
+    }
+    matcher.appendTail(buf);
+    matcher = WHITESPACE_PATTERN.matcher(buf.toString());
+    buf.setLength(0);
+    while (matcher.find()) {
+      matcher.appendReplacement(buf, " ");
+    }
+    matcher.appendTail(buf);
+    return buf.toString().trim();
+  }
+  
+  /**
+   * Writes the output file containing the JFlex macro to accept all valid
+   * ASCII TLDs.
+   * 
+   * @throws MojoExecutionException if an {@link IOException} is thrown during
+   *  the process of writing the output file. 
+   */
+  private void writeOutput() throws MojoExecutionException {
+    File outputFile = new File(outputDirectory, OUTPUT_FILENAME);
+    Writer writer = null;
+    try {
+      writer = new OutputStreamWriter(new FileOutputStream(outputFile), "UTF-8");
+      writer.write(APACHE_LICENSE);
+      writer.write("// Generated from IANA Root Zone Database <");
+      writer.write(IANA_ROOT_ZONE_DB_URL);
+      writer.write(">\n// on ");
+      writer.write(downloadDateTime);
+      writer.write("\n// by org.apache.lucene:lucene-buildhelper-maven-plugin"
+                   + ":generate-jflex-tld-macros\n\n");
+      writer.write("ASCIITLD = \\\\. (\n");
+      boolean isFirst = true;
+      for (SortedMap.Entry<String,String> entry : ASCIITLDs.entrySet()) {
+        String ASCIITLDregex = getCaseInsensitiveRegex(entry.getKey());
+        String description = entry.getValue();
+        writer.write("\t");
+        if (isFirst) {
+          isFirst = false;
+          writer.write("  "); 
+        } else {
+          writer.write("| "); 
+        }
+        writer.write(ASCIITLDregex);
+        writer.write("   // ");
+        writer.write(description);
+        writer.write("\n");
+      }
+      writer.write("\t) \\\\.?   // Accept trailing root (empty) domain\n\n");
+     
+    } catch (IOException e) {
+      throw new MojoExecutionException
+        ("Error writing JFlex macro output file " + outputFile.getPath(), e);
+    } finally {
+      if (null != writer) {
+        try {
+          writer.flush();
+          writer.close();
+        } catch (IOException e) {
+          // Ignore problems flushing and closing.
+        }
+      }
+    }
+    getLog().info("Generated output file: " + outputFile.getPath());
+  }
+
+  /**
+   * Returns a regex accepting both lower- and uppercase forms of the given 
+   * ASCII TLD, e.g. "us" -> "[uU][sS]"
+   * 
+   * @param ASCIITLD The TLD for which to provide the regex
+   * @return case-insensitive regex accepting the given ASCII TLD
+   */
+  private String getCaseInsensitiveRegex(String ASCIITLD) {
+    StringBuilder builder = new StringBuilder();
+    for (int pos = 0 ; pos < ASCIITLD.length() ; ++pos) {
+      String ch = ASCIITLD.substring(pos, pos + 1);
+      if (Character.isDigit(ch.charAt(0)) || ch.equals("-")) {
+        builder.append(ch);
+      } else {
+        builder.append("[").append(ch).append(ch.toUpperCase()).append("]");
+      }
+    }
+    return builder.toString();
+  }
+}
Index: lucene-buildhelper-maven-plugin/pom.xml
===================================================================
--- lucene-buildhelper-maven-plugin/pom.xml	(revision 0)
+++ lucene-buildhelper-maven-plugin/pom.xml	(revision 0)
@@ -0,0 +1,44 @@
+<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <groupId>org.apache.lucene</groupId>
+  <artifactId>lucene-buildhelper-maven-plugin</artifactId>
+  <packaging>maven-plugin</packaging>
+  <version>1.0-SNAPSHOT</version>
+  <name>lucene-buildhelper-maven-plugin</name>
+  <url>http://maven.apache.org</url>
+  <dependencies>
+    <dependency>
+      <groupId>org.apache.maven</groupId>
+      <artifactId>maven-plugin-api</artifactId>
+      <version>2.0</version>
+    </dependency>
+    <dependency>
+      <groupId>org.apache.httpcomponents</groupId>
+      <artifactId>httpclient</artifactId>
+      <version>4.0.1</version>
+    </dependency>
+    <dependency>
+      <groupId>junit</groupId>
+      <artifactId>junit</artifactId>
+      <version>3.8.1</version>
+      <scope>test</scope>
+    </dependency>
+  </dependencies>
+  <build>
+    <plugins>
+      <plugin>
+        <groupId>org.apache.maven.plugins</groupId>
+        <artifactId>maven-compiler-plugin</artifactId>
+        <version>2.3.1</version>
+        <configuration>
+          <source>1.5</source>
+          <target>1.5</target>
+        </configuration>
+      </plugin>
+    </plugins>
+  </build>
+  <properties>
+    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
+  </properties>
+</project>
