import java.io.*; import java.util.Collection; import java.util.ArrayList; import java.util.regex.*; /** * HTML parser for student transcripts made by Dowling College's Banner Database. Given a * user name and password, this class will retrieve and parse the transcript data on the fly, * subsequently allowing access to the various information obtained via its interface. */ public final class TranscriptParser { private static final int MATCH_NONE = 0; private static final int MATCH_TRANSFER = 1; private static final int MATCH_TAKEN = 2; private static final int MATCH_TAKING = 3; private static final int MATCH_UNDERGRAD_TOTALS = 4; private static final int SKIP_TRANSFER = 38; // 51; private static final int SKIP_TAKEN = 67; // 92; private static final int SKIP_TAKING = 49; // 62; private static final int SKIP_UNDERGRAD_TOTALS = 34; // 49; private static final int ON_CORRECT_PAGE = 0; private static final int ON_INCORRECT_PAGE = 1; private static final int ON_SYS_DOWN_PAGE = 2; private static final String TITLE_TAG = "title"; private static final String CORRECT_TITLE = "Academic Transcipt"; private static final String SYS_DOWN_TITLE = "Log In Failed"; private static final String IP_GRADE = "IP"; private static final String END_NBSP = " "; private static final String END_NOT_OFFIC = "***This is NOT an Official Transcript***"; private static final String END_TERM_TOTALS = "Term Totals (Undergraduate)"; private static final String STR_TRANSFER = "Transfer Credit Accepted By Institution"; private static final Pattern PAT_TAKEN = Pattern.compile("[a-zA-Z/]+( \\d)? (\\d){4}"); private static final String STR_TAKING = "Courses in Progress"; private static final String STR_UNDERGRAD_TOTALS = "Transcript Totals (Undergraduate)"; private HTMLParser parser; private ArrayList courseList; private boolean inTakingSection; private int transferCount, totalCount; /** * Constructs a TranscriptParser instance. */ public TranscriptParser() { inTakingSection = false; transferCount = 0; totalCount = 0; } /** * Checks a String to see if it piece of text relevant to the information * we need. This method is what determines what type of information to * parse. * @param str the String to check * @return an int indicating if a match occured (STR_TRANSFER, STR_TAKING */ private static int match(String str) { if(str.equals(STR_TRANSFER)) return MATCH_TRANSFER; else if(str.equals(STR_TAKING)) return MATCH_TAKING; else if(str.equals(STR_UNDERGRAD_TOTALS)) return MATCH_UNDERGRAD_TOTALS; else { Matcher matcher = PAT_TAKEN.matcher(str); if(matcher.matches()) return MATCH_TAKEN; } return MATCH_NONE; } /** */ private void skipTags(int numToSkip) throws IOException, TranscriptException { for(int i=0; i < numToSkip; ++i) { if(!parser.nextTag()) throw new TranscriptException("Transcript Incomplete"); } } /** */ private void parseTransfer() throws IOException, TranscriptException { skipTags(SKIP_TRANSFER); int currCol = 0; Course currCourse = null; while(parser.nextText()) { String currItem = parser.getCurrentItem(); if(currItem.equals(END_NBSP)) break; switch(currCol) { case 0: currCourse = new Course(); currCourse.setDiscipline(currItem); break; case 1: currCourse.setCourseNumber(currItem); break; case 3: currCourse.setGrade(currItem); break; case 4: currCourse.setCreditHours(currItem); break; } if(++currCol == 6) { courseList.add(currCourse); currCol = 0; } } } /** */ private void parseTaken() throws IOException, TranscriptException { skipTags(SKIP_TAKEN); int currCol = 0; Course currCourse = null; while(parser.nextText()) { String currItem = parser.getCurrentItem(); if(currCol == 0 && currItem.equals(END_TERM_TOTALS)) break; switch(currCol) { case 0: currCourse = new Course(); currCourse.setDiscipline(currItem); break; case 1: currCourse.setCourseNumber(currItem); break; case 5: currCourse.setGrade(currItem); break; case 6: currCourse.setCreditHours(currItem); break; } if(++currCol == 9) { courseList.add(currCourse); currCol = 0; } } } /** */ private void parseTaking() throws IOException, TranscriptException { skipTags(SKIP_TAKING); int currCol = 0; Course currCourse = null; while(parser.nextText()) { String currItem = parser.getCurrentItem(); if(currItem.equals(END_NOT_OFFIC)) break; switch(currCol) { case 0: currCourse = new Course(); currCourse.setDiscipline(currItem); break; case 1: currCourse.setCourseNumber(currItem); break; case 5: currCourse.setCreditHours(currItem); //totalCount += currCourse.getCreditHours(); break; } if(++currCol == 7) { currCourse.setGrade(IP_GRADE); courseList.add(currCourse); currCol = 0; } } } /** */ private void parseUnderGradTotals() throws IOException, TranscriptException { skipTags(SKIP_UNDERGRAD_TOTALS); int currCol = 0; int currRow = 0; while(parser.nextText()) { String currItem = parser.getCurrentItem(); if(currItem.equals(END_NOT_OFFIC)) break; if(currCol == 3) { switch(currRow) { case 1: transferCount += (int)Double.parseDouble(currItem); break; case 2: totalCount += (int)Double.parseDouble(currItem); break; } } if(++currCol == 7) { ++currRow; currCol = 0; } } } /** */ private int verifyIsTranscriptPage() throws IOException, TranscriptException { while(parser.nextTag()) { String currItem = parser.getCurrentItem(); if(currItem.equalsIgnoreCase(TITLE_TAG) && parser.nextText()) { currItem = parser.getCurrentItem(); if(currItem.equals(CORRECT_TITLE)) return ON_CORRECT_PAGE; else if(currItem.equals(SYS_DOWN_TITLE)) return ON_SYS_DOWN_PAGE; } } return ON_INCORRECT_PAGE; } /** * Parses a student's transcript. Given a username and password, this * method will parse all necessary information from a student's * transcript on-the-fly as it is downloaded. This method must * be called before the client should expect legitimate data * to be returned from this class' accessor methods. * @param userId The student's Banner user id. * @param password The student's Banner password. * @exception IOException If access to banner cannot be obtained. * @exception TranscriptException If the transcript obtained from banner was invalid. * @exception MalformedURLException If the URL's used internally to access banner are malformed. */ public void parse(String userId, String password) throws IOException, TranscriptException, java.net.MalformedURLException { TranscriptReciever transRec = new TranscriptReciever(); InputStream in = transRec.getTranscriptStream(userId, password); if(in == null) { throw new TranscriptException( "Unable to retrieve your transcripts at this time. The server " + "containing your transcript information is currently unavailable.", TranscriptException.ERROR_CONTINUE); } parser = new HTMLParser(in); courseList = new ArrayList(); inTakingSection = false; transferCount = 0; totalCount = 0; int currPage = verifyIsTranscriptPage(); if(currPage == ON_INCORRECT_PAGE) { throw new TranscriptException( "Unable to retrieve your transcripts. Please make ensure your " + "user id and password are correct and try again.", TranscriptException.ERROR_CONTINUE); } else if(currPage == ON_SYS_DOWN_PAGE) { throw new TranscriptException( "Unable to retrieve your transcripts at this time. The server " + "containing your transcript information is currently unavailable.", TranscriptException.ERROR_CONTINUE); } while(parser.nextText()) { String currItem = parser.getCurrentItem(); int matchType = match(currItem); switch(matchType) { case MATCH_TRANSFER: parseTransfer(); break; case MATCH_TAKING: inTakingSection = true; break; case MATCH_TAKEN: if(inTakingSection) parseTaking(); else parseTaken(); break; case MATCH_UNDERGRAD_TOTALS: parseUnderGradTotals(); break; } } in.close(); } /** */ public Collection getCourses() { return courseList; } /** */ public int getTransferCreditCount() { return transferCount; } /** */ public int getTotalCreditCount() { return totalCount; } } final class HTMLParser { private static final int TEMP_BUFF_SIZE = 128; private BufferedReader in; private char currChar; private char[] tempBuff; private char[] currItem; private int amtInBuffer; private int indxTemp; private int indxItem; /** */ HTMLParser(InputStream inStream) throws IOException { in = new BufferedReader( new InputStreamReader(inStream)); tempBuff = new char[TEMP_BUFF_SIZE]; currItem = new char[TEMP_BUFF_SIZE]; indxTemp = 0; indxItem = 0; nextBlock(); nextChar(); } /** */ private boolean nextChar() throws IOException { if(amtInBuffer == -1) return false; if(indxTemp == amtInBuffer && nextBlock() == -1) return false; currChar = tempBuff[indxTemp++]; return true; } /** */ private int nextBlock() throws IOException { indxTemp = 0; amtInBuffer = in.read(tempBuff, 0, TEMP_BUFF_SIZE); return amtInBuffer; } private boolean syncOnTag() throws IOException { if(amtInBuffer == -1) return false; if(currChar == '<') return true; while(nextChar()) if(currChar == '<') break; return amtInBuffer != -1; } /** */ private boolean syncOnText() throws IOException { if(amtInBuffer == -1) return false; int openCount = 0; if(currChar == '<') ++openCount; else if(!Character.isWhitespace(currChar)) return true; while(nextChar()) { if(currChar == '<') {++openCount;continue;} if(currChar == '>') {--openCount;continue;} if(!Character.isWhitespace(currChar) && openCount <= 0) break; } return amtInBuffer != -1; } /** */ private int stripWhiteSpace() { if(indxItem == 0) return 0; int start = 0; while(Character.isWhitespace(currItem[start])) ++start; while(Character.isWhitespace(currItem[indxItem-1])) --indxItem; return start; } private void resizeCurrItemBuffer() { char[] newArray = new char[currItem.length*2]; System.arraycopy(currItem, 0, newArray, 0, currItem.length); currItem = newArray; } /** */ public String getCurrentItem() { int start = stripWhiteSpace(); return new String(currItem, start, indxItem); } /** */ public boolean nextTag() throws IOException { indxItem = 0; if(!syncOnTag()) return false; while(nextChar()) { if(Character.getType(currChar) == Character.LINE_SEPARATOR) continue; else if(currChar == '>') break; if(indxItem == currItem.length) resizeCurrItemBuffer(); currItem[indxItem++] = currChar; } nextChar(); return indxItem > 0; } /** */ public boolean nextText() throws IOException { indxItem = 0; if(!syncOnText()) return false; currItem[indxItem++] = currChar; while(nextChar()) { if(currChar == '<') break; if(indxItem == currItem.length) resizeCurrItemBuffer(); currItem[indxItem++] = currChar; } // no nextChar() call because it should be aligned on a '<' return indxItem > 0; } }