Skip to content

Data Cleanser List

Cleanser Usage

Field Name Cleansers
RouteOrStreet street_name_cleanser, directionality_cleanser
SecondaryLocation street_name_cleanser, directionality_cleanser
MileMarker mile_marker_cleanser, white_space_cleanser
TREDSField block_cleanser, route_cleanser, address_cleanser, interstate_cleanser, private_road_cleanser, slash_bracket_cleanser, leading_trailing_space_cleanser

Cleanser List

Street Name Cleanser

"street_name_cleanser": [
    [/(?:INTERSTATE|I)[\s]*(?:-*)[\s]*(\d+[\s]*?(RAMP|HOV|REST AREA)*)/gm, "I-$1"],
    [/(.*)FORT ME?YER DR(.*)/gm, "FORT MYER DR"],
    [/(.)ACCESS$/gm, '$1'], // 'ACCESS' with leading character(s) exists
    [/(.*)HENRY G SHIRLEY MEMORIAL HW?Y(.*)/gm, "I-395"],
    [/(.*)CAPITAL BELTWAY(.*)/gm, "I-495"],
    [/(.*)WOODROW WILSON BR(.*)/gm, "I-495"],
    [/STREET/gm, "ST"],
    [/DRIVE$/gm, "DR"],
    [/ROAD$/gm, "RD"],
    [/BD$/gm, "BLVD"],
    [/BL$/gm, "BLVD"],
    [/BVD$/gm, "BLVD"],
    [/BOULEVARD$/gm, "BLVD"],
    [/BV$/gm, "BLVD"],
    [/HIGHWAY$/gm, "HWY"],
    [/H[YW]$/gm, "HWY"], //either HY or HW
    //[/HW$/gm, "HWY"],
    [/HWAY$/gm, "HWY"],
    [/PARKWAY$/gm, "PKWY"],
    [/PY$/gm, "PKWY"],
    [/PKY$/gm, "PKWY"],
    [/PKWAY$/gm, "PKWY"],
    [/PW$/gm, "PKWY"],
    [/PK$/gm, "PKWY"],
    [/PARK$/gm, "PKWY"],
    [/CIRCLE$/gm, "CIR"],
    [/CI$/gm, "CIR"],
    [/CR$/gm, "CIR"],
    [/CL$/gm, "CIR"],
    [/COURT$/gm, "CT"],
    [/CRT$/gm, "CT"],
    [/LANE$/gm, "LN"],
    [/LA$/gm, "LN"],
    [/AV$/gm, "AVE"],
    [/AVENUE$/gm, "AVE"],
    //[/WY$/gm, "WAY"],
    [/(?<!H)WY$/gm, "WAY"], // Adjusted regex to avoid replacing 'HWY'
    [/EXPRESS LN$/gm, "HOV"],
    [/TURNPIKE$/gm, "TPKE"],
    [/TNPK$/gm, "TPKE"],
    [/TN$/gm, "TPKE"],
    [/TP$/gm, "TPKE"],
    [/TL$/gm, "TRL"],
    [/LO$/gm, "LOOP"],
    [/BR$/gm, "BRANCH"],
    [/RE$/gm, "REACH"],
    [/XING$/gm, "CROSSING"],
    [/CG$/gm, "CROSSING"],
    [/CH$/gm, "CHASE"],
    [/CNCS$/gm, "CONCOURSE"],
    [/ALY$/gm, "ALLEY"],
    [/TR$/gm, "TRACE"],
    [/NONE$/gm, ""],
    //[/PKE$/gm, "PIKE"],
    [/(?<!T)PKE$/gm, "PIKE"], // Adjusted regex to avoid replacing 'TPKE'
    [/TERRACE$/gm, "TE"],
    [/BYPASS$/gm, "BYP"],
    [/BY$/gm, "BYP"],
    [/FRK$/gm, "FORK"],
    [/HOLW$/gm, "HOLLOW"],
    [/AR$/gm, "ARCH"],
    [/GR$/gm, "GRADE"],
    [/\./gm, ""],
    [/BLF$/gm, "BLUFF"],
    [/TRCE$/gm, "TRACE"],
    [/BRG$/gm, "BRIDGE"],
    [/(.*)RT 199(.*)/gm, "HUMELSINE PKWY"],
    [/(.*)RTE 199(.*)/gm, "HUMELSINE PKWY"],
    [/(.*)CHIPPENHAM RAMP(.*)/gm, "CHIPPENHAM PKWY RAMP"],
    [/^(264)$/gm, "I-264"],
    [/(.*)264 ROSEMONT RD(.*)/gm, "I-264"],
    [/(.*)CHIPPENHAM RP(.*)/gm, "CHIPPENHAM PKWY RAMP"],
    [/TUNL$/gm, "TUNNEL"],
    [/RP$/gm, "RAMP"],
    [/^(?:Us|Highway|Hwy|US|HIGHWAY|HWY)\\s*(\\d+)/gm, "US-%s"],
    [/^(81)$/gm, "I-81"],
    [/(.*)RT 288(.*)/gm, "VA-288"],
    [/KNL$/gm, "KNOLL"],
    [/CV$/gm, "COVE"],
    [/FRD$/gm, "FORD"],
    [/GRN$/gm, "GREEN"],
    [/(.*)264 WITCHDUCK RD(.*)/gm, "I-264"],
    [/\bMEM\b/gm, "MEMORIAL"],
    [/(.*)BLUE GRAY PKWY(.*)/gm, "BLUE AND GRAY PKWY"],
    [/\bMTN\b/gm, "MOUNTAIN"],
    [/TRC$/gm, "TRACE"],
    [/\bAVENUE\b/gm, "AVE"],
    [/(.*)JAMES RIVER BRIDGE BRID(.*)/gm, "JAMES RIVER BRIDGE"],
    [/^(64)$/gm, "I-64"],
    [/(.*)JEFF.* DAVIS [HWY][HIGHWAY](.*)/gm, "JEFFERSON DAVIS HWY"],
    [/X-ING$/gm, "CROSSING"],
    [/(.*)HIGHWAY FIFTEEN(.*)/gm, "US-15"],
    [/(.*)HIGHWAY FIFTY-EIGHT(.*)/gm, "US-58"],
    [/(.*)HIGHWAY FORTY-NINE(.*)/gm, "VA-49"],
    [/(.*)HIGHWAY FORTY-SEVEN(.*)/gm, "VA-47"],
    [/(.*)HIGHWAY NINE-O-THREE(.*)/gm, "VA-903"],
    [/(.*)HIGHWAY NINETY-TWO(.*)/gm, "VA-92"],
    [/(.*)HIGHWAY ONE(.*)/gm, "US-1"],
    [/(.*)HIGHWAY SIXTEEN(.*)/gm, "US-16"],
    [/(.*)VA-288 HWY(.*)/gm, "VA-288"],
    [/(.*)250\29(.*)/gm, "US-250"],
    [/(.*)29\250(.*)/gm, "US-250"],
    [/(.*)132 STHY(.*)/gm, "VA-132"],
    [/(.*)BELVIDERE\BRK(.*)/gm, "BELVIDERE ST RAMP"],
    [/(.*)BUSINESS FIFTY-EIGHT(.*)/gm, "US-58"],
    [/(.*)DOWNTOWN TUNNEL(.*)/gm, "I-264"],
    [/(.*)MEMORIALBRIDGE(.*)/gm, "MEMORIAL BRIDGE"],
    [/&\\s/gm, "AND"],
    [/EX$/gm, "EXT"],
    [/EXTENDED$/gm, "EXT"],
    [/BUS$/gm, ""],
    [/\bU HIGHWAY NO\b/gm, "US-"],
    [/\bUS HIGHWAY\b/gm, "US-"],
    [/\bUS HWY\b/gm, "US-"],
    [/RMP$/gm, "RAMP"],
    [/EXPRESSWAY$/gm, "EXPY"],
    [/ACCESS RAMP$/gm, "RAMP"],
    [/ON RAMP$/gm, "RAMP"],
    [/OFF RAMP$/gm, "RAMP"],
    [/ENTER RAMP$/gm, "RAMP"],
    [/EXIT RAMP$/gm, "RAMP"],
    [/(.*)SOUTH 71(.*)/gm, "VA-71"],
    [/(.*)NORTH 71(.*)/gm, "VA-71"],
    [/(.*)RT267(.*)/gm, "VA-267"],
    [/\bRTE \b/gm, "VA-"],
    [/\bROUTE \b/gm, "VA-"],
    [/(.*)HRBT RTE(.*)/gm, "I-64"],
    [/(.*)CROSSOVER(.*)/gm, "ACCESS"],
    [/(.*)CROSS OVER(.*)/gm, "ACCESS"],
    [/(.*)PARKING LOT(.*)/gm, "ACCESS"],        
    [/(US|VA|I)-\s+/gm, "$1-"], // Add regex to remove spaces after hyphens
]

Mile Marker Check

"mile_marker_cleanser": [
    [/[a-zA-Z]+/gm, ""]],

Directionality

"directionality_cleanser": [
    [/\bS\b/gm, ""],
    [/\bSOUTHBOUND\b/gm, ""],
    [/\bE\b/gm, ""],
    [/\bEASTBOUND\b/gm, ""],
    [/\bW\b/gm, ""],
    [/\bWESTBOUND\b/gm, ""],
    [/\bN\b/gm, ""],
    [/\bNORTHBOUND\b/gm, ""],
    [/\bNW\b/gm, ""],
    [/\bNE\b/gm, ""],
    [/\bSW\b/gm, ""],
    [/\bSE\b/gm, ""],
    [/\bSB\b/gm, ""],
    [/\bNB\b/gm, ""],
    [/\bEB\b/gm, ""],
    [/\bWB\b/gm, ""]
],

TREDS Cleansing

"block_cleanser": [
    [/^\d+\s*BLOCK\s*/gm, ""]],

"route_cleanser": [
    [/[SRB][ROTU][UES\s*TE\.\#]*([0-9]+)/gm, "$1"],
    [/(ROUTE|US|VA)*[\n\r\s\t-]*([0-9]+)/gm, "$2"]],

"address_cleanser": [
    [/^\d+-*\d*\s+(.+)/gm, "$1"]],

"interstate_cleanser": [
    [/(?:INTERSTATE|I)[\s]*(?:-*)[\s]*(\d+[\s]*(RAMP)*)/gm, "$1"],
    [/I-(\d+)\s*(WEST|EAST)/gm, "$1"]],

"private_road_cleanser":[
    [/\(*PRIVATE\)*\s*(.*)/gm, "$1"]],

"slash_bracket_cleanser": [
    [/[^a-zA-Z0-9 ]/gm,  ""]],

"leading_trailing_space_cleanser": [
        [/^\s+|\s+$/gm, ""]],  

Other cleanser

"date_cleanser": [
    [/Z$/gm, ""]],

"white_space_cleanser": [
    [/[\n\r\s\t-]+/gm,  ""]],