Mypal/parser/xml/nsSAXXMLReader.cpp

720 lines
20 KiB
C++

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "nsIInputStream.h"
#include "nsNetCID.h"
#include "nsNetUtil.h"
#include "nsNullPrincipal.h"
#include "nsIParser.h"
#include "nsParserCIID.h"
#include "nsStreamUtils.h"
#include "nsStringStream.h"
#include "nsIScriptError.h"
#include "nsSAXAttributes.h"
#include "nsSAXLocator.h"
#include "nsSAXXMLReader.h"
#include "nsCharsetSource.h"
#include "mozilla/dom/EncodingUtils.h"
using mozilla::dom::EncodingUtils;
#define XMLNS_URI "http://www.w3.org/2000/xmlns/"
static NS_DEFINE_CID(kParserCID, NS_PARSER_CID);
NS_IMPL_CYCLE_COLLECTION(nsSAXXMLReader,
mContentHandler,
mDTDHandler,
mErrorHandler,
mLexicalHandler,
mDeclarationHandler,
mBaseURI,
mListener,
mParserObserver)
NS_IMPL_CYCLE_COLLECTING_ADDREF(nsSAXXMLReader)
NS_IMPL_CYCLE_COLLECTING_RELEASE(nsSAXXMLReader)
NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(nsSAXXMLReader)
NS_INTERFACE_MAP_ENTRY(nsISAXXMLReader)
NS_INTERFACE_MAP_ENTRY(nsIExpatSink)
NS_INTERFACE_MAP_ENTRY(nsIExtendedExpatSink)
NS_INTERFACE_MAP_ENTRY(nsIContentSink)
NS_INTERFACE_MAP_ENTRY(nsIRequestObserver)
NS_INTERFACE_MAP_ENTRY(nsIStreamListener)
NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISAXXMLReader)
NS_INTERFACE_MAP_END
nsSAXXMLReader::nsSAXXMLReader() :
mIsAsyncParse(false),
mEnableNamespacePrefixes(false)
{
}
// nsIContentSink
NS_IMETHODIMP
nsSAXXMLReader::WillBuildModel(nsDTDMode)
{
if (mContentHandler)
return mContentHandler->StartDocument();
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::DidBuildModel(bool aTerminated)
{
if (mContentHandler)
return mContentHandler->EndDocument();
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::SetParser(nsParserBase *aParser)
{
return NS_OK;
}
// nsIExtendedExpatSink
NS_IMETHODIMP
nsSAXXMLReader::HandleStartElement(const char16_t *aName,
const char16_t **aAtts,
uint32_t aAttsCount,
uint32_t aLineNumber)
{
if (!mContentHandler)
return NS_OK;
RefPtr<nsSAXAttributes> atts = new nsSAXAttributes();
if (!atts)
return NS_ERROR_OUT_OF_MEMORY;
nsAutoString uri, localName, qName;
for (; *aAtts; aAtts += 2) {
SplitExpatName(aAtts[0], uri, localName, qName);
// XXX don't have attr type information
NS_NAMED_LITERAL_STRING(cdataType, "CDATA");
// could support xmlns reporting, it's a standard SAX feature
if (mEnableNamespacePrefixes || !uri.EqualsLiteral(XMLNS_URI)) {
NS_ASSERTION(aAtts[1], "null passed to handler");
atts->AddAttribute(uri, localName, qName, cdataType,
nsDependentString(aAtts[1]));
}
}
// Deal with the element name
SplitExpatName(aName, uri, localName, qName);
return mContentHandler->StartElement(uri, localName, qName, atts);
}
NS_IMETHODIMP
nsSAXXMLReader::HandleEndElement(const char16_t *aName)
{
if (mContentHandler) {
nsAutoString uri, localName, qName;
SplitExpatName(aName, uri, localName, qName);
return mContentHandler->EndElement(uri, localName, qName);
}
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::HandleComment(const char16_t *aName)
{
NS_ASSERTION(aName, "null passed to handler");
if (mLexicalHandler)
return mLexicalHandler->Comment(nsDependentString(aName));
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::HandleCDataSection(const char16_t *aData,
uint32_t aLength)
{
nsresult rv;
if (mLexicalHandler) {
rv = mLexicalHandler->StartCDATA();
NS_ENSURE_SUCCESS(rv, rv);
}
if (mContentHandler) {
rv = mContentHandler->Characters(Substring(aData, aData+aLength));
NS_ENSURE_SUCCESS(rv, rv);
}
if (mLexicalHandler) {
rv = mLexicalHandler->EndCDATA();
NS_ENSURE_SUCCESS(rv, rv);
}
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::HandleStartDTD(const char16_t *aName,
const char16_t *aSystemId,
const char16_t *aPublicId)
{
char16_t nullChar = char16_t(0);
if (!aName)
aName = &nullChar;
if (!aSystemId)
aSystemId = &nullChar;
if (!aPublicId)
aPublicId = &nullChar;
mSystemId = aSystemId;
mPublicId = aPublicId;
if (mLexicalHandler) {
return mLexicalHandler->StartDTD(nsDependentString(aName),
nsDependentString(aPublicId),
nsDependentString(aSystemId));
}
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::HandleDoctypeDecl(const nsAString & aSubset,
const nsAString & aName,
const nsAString & aSystemId,
const nsAString & aPublicId,
nsISupports* aCatalogData)
{
if (mLexicalHandler)
return mLexicalHandler->EndDTD();
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::HandleCharacterData(const char16_t *aData,
uint32_t aLength)
{
if (mContentHandler)
return mContentHandler->Characters(Substring(aData, aData+aLength));
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::HandleStartNamespaceDecl(const char16_t *aPrefix,
const char16_t *aUri)
{
if (!mContentHandler)
return NS_OK;
char16_t nullChar = char16_t(0);
if (!aPrefix)
aPrefix = &nullChar;
if (!aUri)
aUri = &nullChar;
return mContentHandler->StartPrefixMapping(nsDependentString(aPrefix),
nsDependentString(aUri));
}
NS_IMETHODIMP
nsSAXXMLReader::HandleEndNamespaceDecl(const char16_t *aPrefix)
{
if (!mContentHandler)
return NS_OK;
if (aPrefix)
return mContentHandler->EndPrefixMapping(nsDependentString(aPrefix));
return mContentHandler->EndPrefixMapping(EmptyString());
}
NS_IMETHODIMP
nsSAXXMLReader::HandleProcessingInstruction(const char16_t *aTarget,
const char16_t *aData)
{
NS_ASSERTION(aTarget && aData, "null passed to handler");
if (mContentHandler) {
return mContentHandler->ProcessingInstruction(nsDependentString(aTarget),
nsDependentString(aData));
}
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::HandleNotationDecl(const char16_t *aNotationName,
const char16_t *aSystemId,
const char16_t *aPublicId)
{
NS_ASSERTION(aNotationName, "null passed to handler");
if (mDTDHandler) {
char16_t nullChar = char16_t(0);
if (!aSystemId)
aSystemId = &nullChar;
if (!aPublicId)
aPublicId = &nullChar;
return mDTDHandler->NotationDecl(nsDependentString(aNotationName),
nsDependentString(aSystemId),
nsDependentString(aPublicId));
}
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::HandleUnparsedEntityDecl(const char16_t *aEntityName,
const char16_t *aSystemId,
const char16_t *aPublicId,
const char16_t *aNotationName)
{
NS_ASSERTION(aEntityName && aNotationName, "null passed to handler");
if (mDTDHandler) {
char16_t nullChar = char16_t(0);
if (!aSystemId)
aSystemId = &nullChar;
if (!aPublicId)
aPublicId = &nullChar;
return mDTDHandler->UnparsedEntityDecl(nsDependentString(aEntityName),
nsDependentString(aSystemId),
nsDependentString(aPublicId),
nsDependentString(aNotationName));
}
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::HandleXMLDeclaration(const char16_t *aVersion,
const char16_t *aEncoding,
int32_t aStandalone)
{
NS_ASSERTION(aVersion, "null passed to handler");
if (mDeclarationHandler) {
char16_t nullChar = char16_t(0);
if (!aEncoding)
aEncoding = &nullChar;
mDeclarationHandler->HandleXMLDeclaration(nsDependentString(aVersion),
nsDependentString(aEncoding),
aStandalone > 0);
}
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::ReportError(const char16_t* aErrorText,
const char16_t* aSourceText,
nsIScriptError *aError,
bool *_retval)
{
NS_PRECONDITION(aError && aSourceText && aErrorText, "Check arguments!!!");
// Normally, the expat driver should report the error.
*_retval = true;
if (mErrorHandler) {
uint32_t lineNumber;
nsresult rv = aError->GetLineNumber(&lineNumber);
NS_ENSURE_SUCCESS(rv, rv);
uint32_t columnNumber;
rv = aError->GetColumnNumber(&columnNumber);
NS_ENSURE_SUCCESS(rv, rv);
nsCOMPtr<nsISAXLocator> locator = new nsSAXLocator(mPublicId,
mSystemId,
lineNumber,
columnNumber);
if (!locator)
return NS_ERROR_OUT_OF_MEMORY;
rv = mErrorHandler->FatalError(locator, nsDependentString(aErrorText));
if (NS_SUCCEEDED(rv)) {
// The error handler has handled the script error. Don't log to console.
*_retval = false;
}
}
return NS_OK;
}
// nsISAXXMLReader
NS_IMETHODIMP
nsSAXXMLReader::GetBaseURI(nsIURI **aBaseURI)
{
NS_IF_ADDREF(*aBaseURI = mBaseURI);
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::SetBaseURI(nsIURI *aBaseURI)
{
mBaseURI = aBaseURI;
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::GetContentHandler(nsISAXContentHandler **aContentHandler)
{
NS_IF_ADDREF(*aContentHandler = mContentHandler);
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::SetContentHandler(nsISAXContentHandler *aContentHandler)
{
mContentHandler = aContentHandler;
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::GetDtdHandler(nsISAXDTDHandler **aDtdHandler)
{
NS_IF_ADDREF(*aDtdHandler = mDTDHandler);
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::SetDtdHandler(nsISAXDTDHandler *aDtdHandler)
{
mDTDHandler = aDtdHandler;
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::GetErrorHandler(nsISAXErrorHandler **aErrorHandler)
{
NS_IF_ADDREF(*aErrorHandler = mErrorHandler);
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::SetErrorHandler(nsISAXErrorHandler *aErrorHandler)
{
mErrorHandler = aErrorHandler;
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::SetFeature(const nsAString &aName, bool aValue)
{
if (aName.EqualsLiteral("http://xml.org/sax/features/namespace-prefixes")) {
mEnableNamespacePrefixes = aValue;
return NS_OK;
}
return NS_ERROR_NOT_IMPLEMENTED;
}
NS_IMETHODIMP
nsSAXXMLReader::GetFeature(const nsAString &aName, bool *aResult)
{
if (aName.EqualsLiteral("http://xml.org/sax/features/namespace-prefixes")) {
*aResult = mEnableNamespacePrefixes;
return NS_OK;
}
return NS_ERROR_NOT_IMPLEMENTED;
}
NS_IMETHODIMP
nsSAXXMLReader::GetDeclarationHandler(nsIMozSAXXMLDeclarationHandler **aDeclarationHandler) {
NS_IF_ADDREF(*aDeclarationHandler = mDeclarationHandler);
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::SetDeclarationHandler(nsIMozSAXXMLDeclarationHandler *aDeclarationHandler) {
mDeclarationHandler = aDeclarationHandler;
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::GetLexicalHandler(nsISAXLexicalHandler **aLexicalHandler)
{
NS_IF_ADDREF(*aLexicalHandler = mLexicalHandler);
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::SetLexicalHandler(nsISAXLexicalHandler *aLexicalHandler)
{
mLexicalHandler = aLexicalHandler;
return NS_OK;
}
NS_IMETHODIMP
nsSAXXMLReader::SetProperty(const nsAString &aName, nsISupports* aValue)
{
return NS_ERROR_NOT_IMPLEMENTED;
}
NS_IMETHODIMP
nsSAXXMLReader::GetProperty(const nsAString &aName, bool *aResult)
{
return NS_ERROR_NOT_IMPLEMENTED;
}
NS_IMETHODIMP
nsSAXXMLReader::ParseFromString(const nsAString &aStr,
const char *aContentType)
{
// Don't call this in the middle of an async parse
NS_ENSURE_TRUE(!mIsAsyncParse, NS_ERROR_FAILURE);
NS_ConvertUTF16toUTF8 data(aStr);
// The new stream holds a reference to the buffer
nsCOMPtr<nsIInputStream> stream;
nsresult rv = NS_NewByteInputStream(getter_AddRefs(stream),
data.get(), data.Length(),
NS_ASSIGNMENT_DEPEND);
NS_ENSURE_SUCCESS(rv, rv);
return ParseFromStream(stream, "UTF-8", aContentType);
}
NS_IMETHODIMP
nsSAXXMLReader::ParseFromStream(nsIInputStream *aStream,
const char *aCharset,
const char *aContentType)
{
// Don't call this in the middle of an async parse
NS_ENSURE_TRUE(!mIsAsyncParse, NS_ERROR_FAILURE);
NS_ENSURE_ARG(aStream);
NS_ENSURE_ARG(aContentType);
// Put the nsCOMPtr out here so we hold a ref to the stream as needed
nsresult rv;
nsCOMPtr<nsIInputStream> bufferedStream;
if (!NS_InputStreamIsBuffered(aStream)) {
rv = NS_NewBufferedInputStream(getter_AddRefs(bufferedStream),
aStream, 4096);
NS_ENSURE_SUCCESS(rv, rv);
aStream = bufferedStream;
}
rv = EnsureBaseURI();
NS_ENSURE_SUCCESS(rv, rv);
nsCOMPtr<nsIPrincipal> nullPrincipal = nsNullPrincipal::Create();
// The following channel is never openend, so it does not matter what
// securityFlags we pass; let's follow the principle of least privilege.
nsCOMPtr<nsIChannel> parserChannel;
rv = NS_NewInputStreamChannel(getter_AddRefs(parserChannel),
mBaseURI,
aStream,
nullPrincipal,
nsILoadInfo::SEC_REQUIRE_SAME_ORIGIN_DATA_IS_BLOCKED,
nsIContentPolicy::TYPE_OTHER,
nsDependentCString(aContentType));
if (!parserChannel || NS_FAILED(rv))
return NS_ERROR_FAILURE;
if (aCharset)
parserChannel->SetContentCharset(nsDependentCString(aCharset));
rv = InitParser(nullptr, parserChannel);
NS_ENSURE_SUCCESS(rv, rv);
rv = mListener->OnStartRequest(parserChannel, nullptr);
if (NS_FAILED(rv))
parserChannel->Cancel(rv);
/* When parsing a new document, we need to clear the XML identifiers.
HandleStartDTD will set these values from the DTD declaration tag.
We won't have them, of course, if there's a well-formedness error
before the DTD tag (such as a space before an XML declaration).
*/
mSystemId.Truncate();
mPublicId.Truncate();
nsresult status;
parserChannel->GetStatus(&status);
uint64_t offset = 0;
while (NS_SUCCEEDED(rv) && NS_SUCCEEDED(status)) {
uint64_t available;
rv = aStream->Available(&available);
if (rv == NS_BASE_STREAM_CLOSED) {
rv = NS_OK;
available = 0;
}
if (NS_FAILED(rv)) {
parserChannel->Cancel(rv);
break;
}
if (! available)
break; // blocking input stream has none available when done
if (available > UINT32_MAX)
available = UINT32_MAX;
rv = mListener->OnDataAvailable(parserChannel, nullptr,
aStream,
offset,
(uint32_t)available);
if (NS_SUCCEEDED(rv))
offset += available;
else
parserChannel->Cancel(rv);
parserChannel->GetStatus(&status);
}
rv = mListener->OnStopRequest(parserChannel, nullptr, status);
mListener = nullptr;
return rv;
}
NS_IMETHODIMP
nsSAXXMLReader::ParseAsync(nsIRequestObserver *aObserver)
{
mParserObserver = aObserver;
mIsAsyncParse = true;
return NS_OK;
}
// nsIRequestObserver
NS_IMETHODIMP
nsSAXXMLReader::OnStartRequest(nsIRequest *aRequest, nsISupports *aContext)
{
NS_ENSURE_TRUE(mIsAsyncParse, NS_ERROR_FAILURE);
nsresult rv;
rv = EnsureBaseURI();
NS_ENSURE_SUCCESS(rv, rv);
nsCOMPtr<nsIChannel> channel = do_QueryInterface(aRequest);
rv = InitParser(mParserObserver, channel);
NS_ENSURE_SUCCESS(rv, rv);
// we don't need or want this anymore
mParserObserver = nullptr;
return mListener->OnStartRequest(aRequest, aContext);
}
NS_IMETHODIMP
nsSAXXMLReader::OnStopRequest(nsIRequest *aRequest, nsISupports *aContext,
nsresult status)
{
NS_ENSURE_TRUE(mIsAsyncParse, NS_ERROR_FAILURE);
NS_ENSURE_STATE(mListener);
nsresult rv = mListener->OnStopRequest(aRequest, aContext, status);
mListener = nullptr;
mIsAsyncParse = false;
return rv;
}
// nsIStreamListener
NS_IMETHODIMP
nsSAXXMLReader::OnDataAvailable(nsIRequest *aRequest, nsISupports *aContext,
nsIInputStream *aInputStream, uint64_t offset,
uint32_t count)
{
NS_ENSURE_TRUE(mIsAsyncParse, NS_ERROR_FAILURE);
NS_ENSURE_STATE(mListener);
return mListener->OnDataAvailable(aRequest, aContext, aInputStream, offset,
count);
}
nsresult
nsSAXXMLReader::InitParser(nsIRequestObserver *aObserver, nsIChannel *aChannel)
{
nsresult rv;
// setup the parser
nsCOMPtr<nsIParser> parser = do_CreateInstance(kParserCID, &rv);
NS_ENSURE_SUCCESS(rv, rv);
parser->SetContentSink(this);
int32_t charsetSource = kCharsetFromDocTypeDefault;
nsAutoCString charset(NS_LITERAL_CSTRING("UTF-8"));
TryChannelCharset(aChannel, charsetSource, charset);
parser->SetDocumentCharset(charset, charsetSource);
rv = parser->Parse(mBaseURI, aObserver);
NS_ENSURE_SUCCESS(rv, rv);
mListener = do_QueryInterface(parser, &rv);
return rv;
}
// from nsDocument.cpp
bool
nsSAXXMLReader::TryChannelCharset(nsIChannel *aChannel,
int32_t& aCharsetSource,
nsACString& aCharset)
{
if (aCharsetSource >= kCharsetFromChannel)
return true;
if (aChannel) {
nsAutoCString charsetVal;
nsresult rv = aChannel->GetContentCharset(charsetVal);
if (NS_SUCCEEDED(rv)) {
nsAutoCString preferred;
if (!EncodingUtils::FindEncodingForLabel(charsetVal, preferred))
return false;
aCharset = preferred;
aCharsetSource = kCharsetFromChannel;
return true;
}
}
return false;
}
nsresult
nsSAXXMLReader::EnsureBaseURI()
{
if (mBaseURI)
return NS_OK;
return NS_NewURI(getter_AddRefs(mBaseURI), "about:blank");
}
nsresult
nsSAXXMLReader::SplitExpatName(const char16_t *aExpatName,
nsString &aURI,
nsString &aLocalName,
nsString &aQName)
{
/**
* Adapted from RDFContentSinkImpl
*
* Expat can send the following:
* localName
* namespaceURI<separator>localName
* namespaceURI<separator>localName<separator>prefix
*
* and we use 0xFFFF for the <separator>.
*
*/
NS_ASSERTION(aExpatName, "null passed to handler");
nsDependentString expatStr(aExpatName);
int32_t break1, break2 = kNotFound;
break1 = expatStr.FindChar(char16_t(0xFFFF));
if (break1 == kNotFound) {
aLocalName = expatStr; // no namespace
aURI.Truncate();
aQName = expatStr;
} else {
aURI = StringHead(expatStr, break1);
break2 = expatStr.FindChar(char16_t(0xFFFF), break1 + 1);
if (break2 == kNotFound) { // namespace, but no prefix
aLocalName = Substring(expatStr, break1 + 1);
aQName = aLocalName;
} else { // namespace with prefix
aLocalName = Substring(expatStr, break1 + 1, break2 - break1 - 1);
aQName = Substring(expatStr, break2 + 1) +
NS_LITERAL_STRING(":") + aLocalName;
}
}
return NS_OK;
}