options { STATIC = false; } PARSER_BEGIN(HaskellParser) package tmp.generated_phaskell; import java.io.*; import java.util.*; import cide.gast.*; import cide.gparser.*; public class HaskellParser{ /** * Append the given {@link Token} and any preceding special tokens to a * given {@link StringBuffer}. * * @param token * the given JavaCC {@link Token} object * @param buffer * the buffer to which to append token */ final private static void accumulate(Token token, StringBuffer buffer) { // Append preceding special tokens to buffer: // Token special = firstSpecial(token); if (special != token) while (special != null) { buffer.append(special.toString()); special = special.next; } // Finally, append the token itself: // buffer.append(token.toString()); } /** * Accumulate {@list Token} objects from the token stream, respecting nested * code inside open and close pairs, until an * unmatched close is the next token in the stream. This * method assumes that an open token has just been read from * the stream so the initial nesting level is 1. The method returns when a * matching close token is the next token in the token * stream. The close token is left in the stream! * * @return the accumulated tokens as a {@link String}. * * @throws ParseException * if an end-of-file is found before an unmatched * close token. */ final private Token accumulateNestedRegion(int open, int close) throws ParseException { StringBuffer buffer = new StringBuffer(); Token token = getToken(1); // Initialize result with known information (starting position, etc.): // Token result = Token.newToken(UNANTICIPATED_SYMBOL); result.specialToken = null; Token startToken = firstSpecial(token); result.beginColumn = startToken.beginColumn; result.beginLine = startToken.beginLine; result.offset=startToken.offset; // Accumulate tokens until a close token is found: // for (int nesting = 1; nesting > 0;) { if (token.kind == EOF) throw new ParseException("accumulating from line " + result.beginLine + " at column " + result.beginColumn + ": EOF reached before ending " + tokenImage[close] + " found"); if (token.kind == open) ++nesting; else if (token.kind == close) { if (nesting == 1) break; --nesting; } // Update information in result: // result.endColumn = token.endColumn; result.endLine = token.endLine; result.length=(token.offset-result.offset)+token.length; result.next = token.next; accumulate(token, buffer); getNextToken(); token = getToken(1); } result.image = buffer.toString(); return result; } final private Token accumulateUntil(boolean considerInnerBlocks, int endTokenKind) throws ParseException { return accumulateUntil(considerInnerBlocks, new int[] { endTokenKind }); } final private Token accumulateUntil(boolean considerInnerBlocks, int endTokenKind1, int endTokenKind2) throws ParseException { return accumulateUntil(considerInnerBlocks, new int[] { endTokenKind1, endTokenKind2 }); } final private Token accumulateUntil(boolean considerInnerBlocks, int endTokenKind1, int endTokenKind2, int endTokenKind3) throws ParseException { return accumulateUntil(considerInnerBlocks, new int[] { endTokenKind1, endTokenKind2, endTokenKind3 }); } final private Token accumulateUntil(boolean considerInnerBlocks, int endTokenKind1, int endTokenKind2, int endTokenKind3, int endTokenKind4) throws ParseException { return accumulateUntil(considerInnerBlocks, new int[] { endTokenKind1, endTokenKind2, endTokenKind3, endTokenKind4 }); } final private boolean isNot(int kind) { return getToken(1).kind != kind; } final private boolean isNot(int k1, int k2) { return isNot(k1) && isNot(k2); } final private boolean isNot(int k1, int k2, int k3) { return isNot(k1, k2) && isNot(k3); } final private boolean isNot(int k1, int k2, int k3, int k4) { return isNot(k1, k2, k3) && isNot(k4); } /** returns true if the CONTEXT_ARROW is found before the SEMICOLON **/ final private boolean isContext() throws ParseException { int lookahead=1; while (true) { Token t = getToken(lookahead); if (t.kind==EOF) throw new ParseException("EOF found before ; or => (line " + token.beginLine + ", column " + token.beginColumn + ")"); if (t.kind==SEMICOLON) return false; if (t.kind==CONTEXT_ARROW) return true; lookahead++; } } /** * Accumulate {@link Token} objects from the token stream until a token * matching tokenKind is consumed from the stream. The tokens * are accumulated in buffer, NOT including the terminating * token. * * @return a {@link Token} formed by concatenating all intervening tokens * and special tokens. */ final private Token accumulateUntil(boolean considerInnerBlocks, int[] endTokenKinds) throws ParseException { StringBuffer buffer = new StringBuffer(); Token token = getToken(1); // Initialize result with known information (starting position, etc.): // Token result = Token.newToken(UNANTICIPATED_SYMBOL); result.specialToken = null; Token startToken = firstSpecial(token); result.beginColumn = startToken.beginColumn; result.beginLine = startToken.beginLine; result.offset=startToken.offset; // Accumulate tokens until a tokenKind token is found: // int nesting=0; while (!contains(endTokenKinds, token.kind) || nesting >0) { // Update information in result: // result.endColumn = token.endColumn; result.endLine = token.endLine; result.next = token.next; result.length=(token.offset-result.offset)+token.length; if (token.kind == EOF) throw new ParseException("from line " + result.beginLine + " at column " + result.beginColumn + ": EOF reached before " + images(endTokenKinds) + " found"); if (considerInnerBlocks && token.kind == LEFT_CURLY) ++nesting; else if (considerInnerBlocks && token.kind == RIGHT_CURLY) { --nesting; } accumulate(token, buffer); getNextToken(); token = getToken(1); } if (buffer.length() == 0) throw new ParseException("syntax error - empty pseudo-match (line " + result.beginLine + ", column " + result.beginColumn + ")"); result.image = buffer.toString(); return result; } private String images(int[] endTokenKinds) { String result = ""; for (int i : endTokenKinds) result += tokenImage[i] + " "; return result; } private boolean contains(int[] endTokenKinds, int kind) { for (int i : endTokenKinds) if (i == kind) return true; return false; } /** * Finds the first token, special or otherwise, in the list of special * tokens preceding this {@link Token}. If this list is non-empty, the * result will be a special token. Otherwise, it will be the starting * token. * * @param token the given {@link Token}. * @return the first special token preceding token. **/ final private static Token firstSpecial (Token token) { while (token.specialToken != null) token = token.specialToken ; return token ; } } PARSER_END(HaskellParser) JAVACODE Token findListContent () { return accumulateNestedRegion(LEFT_PAREN, RIGHT_PAREN) ; } JAVACODE Token findBlockContent () { return accumulateNestedRegion (LEFT_CURLY, RIGHT_CURLY) ; } JAVACODE Token findConRest () { //(block | ~(SEMICOLON|ALT|RIGHT_CURLY))* return accumulateUntil(true,SEMICOLON,ALT,RIGHT_CURLY,DERIVING) ; } JAVACODE Token findNonstddeclRest () { //(block | ~(SEMICOLON|RIGHT_CURLY))+ return accumulateUntil(true,SEMICOLON,RIGHT_CURLY) ; } JAVACODE Token findUntilSemiOrCCB () { //~(SEMICOLON | RIGHT_CURLY)* return accumulateUntil(false,SEMICOLON,RIGHT_CURLY) ; } JAVACODE Token findUntilSemiOrContextArrow () { //(~(CONTEXT_ARROW|SEMICOLON))*; return accumulateUntil(false,CONTEXT_ARROW,SEMICOLON) ; } JAVACODE Token findUntilSemiOrEquals () { //~(EQUALS|SEMICOLON))* return accumulateUntil(false,SEMICOLON,EQUALS) ; } JAVACODE Token findUntilEquals () { //~(EQUALS))* return accumulateUntil(false,EQUALS) ; } //options { // k = 9; // // Allow any char but \uFFFF (16 bit -1) // charVocabulary='\u0000'..'\uFFFE'; //} TOKEN : { | | | | | | | | | | | | | | | | | | | " > | | //| //| //| } SPECIAL_TOKEN : { " " | "\t" | "\n" | "\r" | <"--" (~["\n","\r"])* ("\n" | "\r" | "\r\n")> | <"{-"(~["-"])*"-"("-" | ~["-", "}"](~["-"])*"-")*"}"> } //PPDIRECTIVE // : '#' (~('\n'))* NEWLINE { $setType(Token.SKIP); } // ; TOKEN : { ( | | "'" )* > | ( | | "'" )*> | | "0o" | "0O" | "0x" | "0X" )> | <#DECIMAL : ()+ > | <#HEXADECIMAL : ()+> | <#OCTAL : ()+ > //| ) "'" > // //| |)* "\"" > | < CHARACTER_LITERAL: "'" ( (~["'","\\","\n","\r"]) | ("\\" ( ["n","t","b","r","f","\\","'","\""] | ["0"-"7"] ( ["0"-"7"] )? | ["0"-"3"] ["0"-"7"] ["0"-"7"] ) ) ) "'" > | < STRING_LITERAL: "\"" ( (~["\"","\\","\n","\r"]) | ("\\" ( ["n","t","b","r","f","\\","'","\""] | ["0"-"7"] ( ["0"-"7"] )? | ["0"-"3"] ["0"-"7"] ["0"-"7"] ) ) )* "\"" > | <#CHARACTER_ESCAPE : "\\" ( | | | "x" | "o" ) > | | <#STRING_ESCAPE : ("\\&" | )> | <#ASCII : "NUL" > //| //| <#GAP : "\\" ( | )+ "\\" > | <#LOWER_CASE: ["a"-"z","_"]> | <#UPPER_CASE: ["A"-"Z"]> | <#LETTER : | > | <#DIGIT: ["0"-"9"]> | <#HEXIT: ( | ["A"-"F"] | ["a"-"f"] )> | <#OCTIT: ["0"-"7"]> | | | | | | | | | | ( | ":" )* > | | ":" )* > | " | "?" | "@" | "\\" | "^" | "-" | "~" | "|" > | } TOKEN :{ } GRAMMARSTART module: "module" modid (exports)? @! "where" body ; qconid : //TODO | ; exports : "(" [exportsList] [","] ")" ; exportsList: &LI export (LL(2) "," &LI export)*; export : qvar | qtyconorcls [details] //-- type or class | "module" modid ; details : LL(2) "(" /*TODO ".."*/ ")" | "(" [cnamelist] ")" ; cnamelist: &LI cname ( "," &LI cname)*; qtyconorcls: qconid; cname: | ; qvar: qvarid | "(" qvarsym ")" ; qvarid: //TODO| ; qvarsym://TODO | ; modid:qconid; conid:; body : LL(2) "{" @+! impdecls [ ";"@! @! topdecls] @-! "}" | "{" @+! topdecls @-! "}" ; impdecls : &LI impdecl ( LL(2) ";" @! &LI impdecl )* ; impdecl: "import" ("qualified")? modid ("as" modid)? ( impspec )? | //empty declaration ; impspec : ("hiding" )? "(" [imports] ")" ; //-- should allow a trailing comma! imports : imp (LL("\",\" imp()") "," imp)* ; imp: var | tyconorcls [list]; tyconorcls: conid; topdecls: &LI topdecl ( ";"@! @! &LI topdecl )* ; topdecl : "type" simpletype declrhs :: typedecl | "data" optContext simpletype "=" constrs [deriving] :: datadecl | "newtype" optContext simpletype declrhs :: newtypedecl | "class" optContext conid tyvar ("where" cdecls)? :: classdecl | "instance" optContext qconid inst ("where" block)? :: instancedecl | "default" list :: defaultdecl | decl :: declaration ; decl: LL("vars() \"::\"") signdecl :: typeSignature | fixdecl :: fixityDeclaration | LL("funlhs() \"=\"") valdef :: valueDeclaration | nonstddecl :: nonStandardDeclaration | //empty declaration ; declrhs: "=" [LL("{isNot(SEMICOLON,RIGHT_CURLY)}") JAVATOKEN(findNonstddeclRest)]; optContext: [LL("{isContext()}") context "=>"]; deriving : "deriving" JAVATOKEN(findNonstddeclRest) ; constrs : &LI constr ("|" &LI constr)* ; constr: conP [LL("{isNot(SEMICOLON,ALT,RIGHT_CURLY,DERIVING)}") JAVATOKEN(findConRest)]; inst: gtycon | "(" gtycon (tyvar)* ")" | "[" conid "]" ; gtycon: qtyconorcls; cdecls: "{" ( cdeclsI )? "}" ; cdeclsI: &LI cdecl (";"@! @! &LI cdecl)*; cdecl: LL("vars() \"::\"") signdecl | nonstddecl; context: [LL("{isNot(SEMICOLON,CONTEXT_ARROW)}") JAVATOKEN(findUntilSemiOrContextArrow)];//(~(CONTEXT_ARROW|SEMICOLON))*; simpletype: [LL("{isNot(EQUALS)}") JAVATOKEN(findUntilEquals)]; //matches almost anything //used to ignore non-standard declarations nonstddecl : JAVATOKEN(findNonstddeclRest) ; //the valdef rule doesn't come directly from the report spec //it is inherited from the Language.Haskell.Parser impl valdef: funlhs declrhs ; //the fixdecl rule also doesn't come directly from the report spec fixdecl: fixity [] ops; fixity: "infixl" | "infixr" | "infix"; ops: &LI op ("," &LI op )* ; signdecl: vars "::" [LL("{isNot(SEMICOLON,RIGHT_CURLY)}") JAVATOKEN(findUntilSemiOrCCB)]; vars: &LI var ("," &LI var)*; var : | "(" ")" ; conP: | "(" ")" ; tyvar : ; funlhs: funlhsL [LL("{isNot(SEMICOLON,EQUALS,LEFT_CURLY)}") funlhsR]; funlhsL: LL(" varop()") varop | var; funlhsR: block | LL("{isNot(SEMICOLON,EQUALS)}") JAVATOKEN(findUntilSemiOrEquals); varop: | "`" "`" ; conop: | "`" "`"; op: LL(2) varop | conop; block : "{" [LL("{isNot(RIGHT_CURLY)}") JAVATOKEN(findBlockContent)] "}"; list : "(" [LL("{isNot(RIGHT_PAREN)}") JAVATOKEN(findListContent)] ")";