我有以下方法来获取 XLSX 文件并将其转换为 XDocument:
public static XDocument ConvertXlsx2Xml(string fileName, string sheetName)
{
// Return the value of the specified cell.
const string documentRelationshipType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
const string worksheetSchema = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
const string sharedStringsRelationshipType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings";
const string sharedStringSchema = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
string cellValue = null;
var xsurvey = new XDocument(new XDeclaration("1.0", "UTF-8", "yes"));
var xroot = new XElement("Root"); //Create the root
using (Package xlPackage = Package.Open(fileName, FileMode.Open, FileAccess.Read))
{
PackagePart documentPart = null;
Uri documentUri = null;
// Get the main document part (workbook.xml).
foreach (System.IO.Packaging.PackageRelationship relationship in xlPackage.GetRelationshipsByType(documentRelationshipType))
{
// There should only be one document part in the package.
documentUri = PackUriHelper.ResolvePartUri(new Uri("/", UriKind.Relative), relationship.TargetUri);
documentPart = xlPackage.GetPart(documentUri);
// There should only be one instance, but get out no matter what.
break;
}
if (documentPart != null)
{
// Load the contents of the workbook.
var doc = new XmlDocument();
doc.Load(documentPart.GetStream());
/*
doc now contains the following important nodes:
<bookViews>
<workbookView xWindow="-15615" yWindow="2535" windowWidth="26835" windowHeight="13095" activeTab="2" />
<sheets>
<sheet name="Sheet1" sheetId="2" r:id="rId1" />
*/
// Create a namespace manager, so you can search.
// Add a prefix (d) for the default namespace.
var nt = new NameTable();
var nsManager = new XmlNamespaceManager(nt);
nsManager.AddNamespace("d", worksheetSchema);
nsManager.AddNamespace("s", sharedStringSchema);
//If value for sheetName isn't found, take the first sheet
string searchString = string.Format("//d:sheet[@name='{0}']", sheetName);
XmlNode sheetNode = doc.SelectSingleNode(searchString, nsManager) ??
doc.SelectSingleNode("//d:sheet", nsManager);
/*
* 11/15/12 DS Added to avoid pulling the data each time
* Create a dictionary of the shared strings from the associated string file
*/
#region Shared String Dictionary
var sharedStrings = new Dictionary<int, string>();
foreach (System.IO.Packaging.PackageRelationship stringRelationship in documentPart.GetRelationshipsByType(sharedStringsRelationshipType))
{
// There should only be one shared string reference, so you exit this loop immediately.
Uri sharedStringsUri = PackUriHelper.ResolvePartUri(documentUri, stringRelationship.TargetUri);
PackagePart stringPart = xlPackage.GetPart(sharedStringsUri);
{
// Load the contents of the shared strings.
var stringDoc = new XmlDocument(nt);
stringDoc.Load(stringPart.GetStream());
nsManager.AddNamespace("s", sharedStringSchema);
const string strSearch = "//s:sst";
XmlNode stringNode = stringDoc.SelectSingleNode(strSearch, nsManager);
int keyInt = 0;
if (stringNode != null)
foreach (XmlElement nd in stringNode)
{
//string test = nd.InnerText;
sharedStrings.Add(keyInt, nd.InnerText);
keyInt = keyInt + 1;
}
}
}
#endregion
var hrowList = new List<string>();
var hrowArray = new string[] {};
if (sheetNode != null && sheetNode.Attributes != null)
{
// Get the relId attribute:
XmlAttribute relationAttribute = sheetNode.Attributes["r:id"];
if (relationAttribute != null)
{
string relId = relationAttribute.Value;
// First, get the relation between the document and the sheet.
PackageRelationship sheetRelation = documentPart.GetRelationship(relId);
Uri sheetUri = PackUriHelper.ResolvePartUri(documentUri, sheetRelation.TargetUri);
PackagePart sheetPart = xlPackage.GetPart(sheetUri);
// Load the contents of the workbook.
var sheetDoc = new XmlDocument(nt);
sheetDoc.Load(sheetPart.GetStream());
/*
* sheetDoc now contains the following important nodes:
* <dimension ref="A1:V81" /> range of sheet data
* <sheetData>
* <row r="1" spans="1:22"> <row> r = row number, spans = columns containing the data
* <c r="A1" t="s"><v>0</v></c> <c> r = Cell address (A1,B4,etc), t = data type ("s"=string,"b"=bool, null=decimal)
* <v> contents are the index num if t="s", or value of t=null
*/
XmlNode sheetDataNode = sheetDoc.SelectSingleNode("//d:sheetData", nsManager);
int roNum = 0;
if (sheetDataNode != null)
{
var isSkip = false;
foreach (XmlElement row in sheetDataNode)
{
var xrow = new XElement("Row");
foreach (XmlElement cell in row)
{
XmlAttribute typeAttr = cell.Attributes["t"];
string cellType = typeAttr != null ? typeAttr.Value : string.Empty;
XmlNode valueNode = cell.SelectSingleNode("d:v", nsManager);
cellValue = valueNode != null ? valueNode.InnerText : cellValue;
// Check the cell type. At this point, this code only checks for booleans and strings individually.
switch (cellType)
{
case "b":
cellValue = cellValue == "1" ? "TRUE" : "FALSE";
break;
case "s":
cellValue = sharedStrings[Convert.ToInt32(cellValue)];
break;
}
if (cellValue == null) continue;
cellValue = cellValue.Replace("\r", "");
cellValue = cellValue.Replace("\n", " ");
cellValue = cellValue.Trim();
if (roNum == 0)
{
hrowList.Add(cellValue);
}
else
{
//XmlAttribute rowAttr = cell.Attributes["r"];
//int intStart = rowAttr.Value.IndexOfAny("0123456789".ToCharArray());
//colLet = rowAttr.Value.Substring(0, intStart);
//int colNum = NumberFromExcelColumn(colLet);
int colNum = GetColNum(cell);
/* 05/29/13 DS force column names to UPPER to remove case sensitivity */
var xvar = new XElement(hrowArray[colNum - 1].ToUpper());
xvar.SetValue(cellValue);
xrow.Add(xvar);
}
/* 6/18/2013 DS You must clear the cellValue so it is carried into the next cell value if it is empty. */
cellValue = "";
}
if (roNum == 0) hrowArray = hrowList.ToArray();
else xroot.Add(xrow);
roNum = roNum + 1;
}
}
}
}
}
}
xsurvey.Add(xroot);
return xsurvey;
}
在大多数情况下,它工作得很好。但是,我刚刚注意到,如果其中一个单元格值包含像 0.004 这样的数字,它将变为 4.0000000000000001E-3。
生成的 XML 被导入,该值作为字符串加载,但在最终传输到生产表之前,此特定字段将转换为数字。该格式不适用于数字。
如何防止负载变化?如果我不能,是否有更好的方法来防止系统错误,而不是专门清理该字段并将其作为传输过程的一部分更改回来?
更新
只有小于 0.01 的数字才有问题。1、1.004 和 .04 都可以,但 0.004 则不行。
更新 2
如果我在填充数据之前将单元格格式化为文本,我就没有这个问题。ManEx 如何存储阻止干净上传的数据。